Source file src/pkg/html/template/transition.go
1 // Copyright 2011 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 package template
6
7 import (
8 "bytes"
9 "strings"
10 )
11
12 // transitionFunc is the array of context transition functions for text nodes.
13 // A transition function takes a context and template text input, and returns
14 // the updated context and the number of bytes consumed from the front of the
15 // input.
16 var transitionFunc = [...]func(context, []byte) (context, int){
17 stateText: tText,
18 stateTag: tTag,
19 stateAttrName: tAttrName,
20 stateAfterName: tAfterName,
21 stateBeforeValue: tBeforeValue,
22 stateHTMLCmt: tHTMLCmt,
23 stateRCDATA: tSpecialTagEnd,
24 stateAttr: tAttr,
25 stateURL: tURL,
26 stateJS: tJS,
27 stateJSDqStr: tJSDelimited,
28 stateJSSqStr: tJSDelimited,
29 stateJSRegexp: tJSDelimited,
30 stateJSBlockCmt: tBlockCmt,
31 stateJSLineCmt: tLineCmt,
32 stateCSS: tCSS,
33 stateCSSDqStr: tCSSStr,
34 stateCSSSqStr: tCSSStr,
35 stateCSSDqURL: tCSSStr,
36 stateCSSSqURL: tCSSStr,
37 stateCSSURL: tCSSStr,
38 stateCSSBlockCmt: tBlockCmt,
39 stateCSSLineCmt: tLineCmt,
40 stateError: tError,
41 }
42
43 var commentStart = []byte("<!--")
44 var commentEnd = []byte("-->")
45
46 // tText is the context transition function for the text state.
47 func tText(c context, s []byte) (context, int) {
48 k := 0
49 for {
50 i := k + bytes.IndexByte(s[k:], '<')
51 if i < k || i+1 == len(s) {
52 return c, len(s)
53 } else if i+4 <= len(s) && bytes.Equal(commentStart, s[i:i+4]) {
54 return context{state: stateHTMLCmt}, i + 4
55 }
56 i++
57 end := false
58 if s[i] == '/' {
59 if i+1 == len(s) {
60 return c, len(s)
61 }
62 end, i = true, i+1
63 }
64 j, e := eatTagName(s, i)
65 if j != i {
66 if end {
67 e = elementNone
68 }
69 // We've found an HTML tag.
70 return context{state: stateTag, element: e}, j
71 }
72 k = j
73 }
74 panic("unreachable")
75 }
76
77 var elementContentType = [...]state{
78 elementNone: stateText,
79 elementScript: stateJS,
80 elementStyle: stateCSS,
81 elementTextarea: stateRCDATA,
82 elementTitle: stateRCDATA,
83 }
84
85 // tTag is the context transition function for the tag state.
86 func tTag(c context, s []byte) (context, int) {
87 // Find the attribute name.
88 i := eatWhiteSpace(s, 0)
89 if i == len(s) {
90 return c, len(s)
91 }
92 if s[i] == '>' {
93 return context{
94 state: elementContentType[c.element],
95 element: c.element,
96 }, i + 1
97 }
98 j, err := eatAttrName(s, i)
99 if err != nil {
100 return context{state: stateError, err: err}, len(s)
101 }
102 state, attr := stateTag, attrNone
103 if i == j {
104 return context{
105 state: stateError,
106 err: errorf(ErrBadHTML, 0, "expected space, attr name, or end of tag, but got %q", s[i:]),
107 }, len(s)
108 }
109 switch attrType(string(s[i:j])) {
110 case contentTypeURL:
111 attr = attrURL
112 case contentTypeCSS:
113 attr = attrStyle
114 case contentTypeJS:
115 attr = attrScript
116 }
117 if j == len(s) {
118 state = stateAttrName
119 } else {
120 state = stateAfterName
121 }
122 return context{state: state, element: c.element, attr: attr}, j
123 }
124
125 // tAttrName is the context transition function for stateAttrName.
126 func tAttrName(c context, s []byte) (context, int) {
127 i, err := eatAttrName(s, 0)
128 if err != nil {
129 return context{state: stateError, err: err}, len(s)
130 } else if i != len(s) {
131 c.state = stateAfterName
132 }
133 return c, i
134 }
135
136 // tAfterName is the context transition function for stateAfterName.
137 func tAfterName(c context, s []byte) (context, int) {
138 // Look for the start of the value.
139 i := eatWhiteSpace(s, 0)
140 if i == len(s) {
141 return c, len(s)
142 } else if s[i] != '=' {
143 // Occurs due to tag ending '>', and valueless attribute.
144 c.state = stateTag
145 return c, i
146 }
147 c.state = stateBeforeValue
148 // Consume the "=".
149 return c, i + 1
150 }
151
152 var attrStartStates = [...]state{
153 attrNone: stateAttr,
154 attrScript: stateJS,
155 attrStyle: stateCSS,
156 attrURL: stateURL,
157 }
158
159 // tBeforeValue is the context transition function for stateBeforeValue.
160 func tBeforeValue(c context, s []byte) (context, int) {
161 i := eatWhiteSpace(s, 0)
162 if i == len(s) {
163 return c, len(s)
164 }
165 // Find the attribute delimiter.
166 delim := delimSpaceOrTagEnd
167 switch s[i] {
168 case '\'':
169 delim, i = delimSingleQuote, i+1
170 case '"':
171 delim, i = delimDoubleQuote, i+1
172 }
173 c.state, c.delim, c.attr = attrStartStates[c.attr], delim, attrNone
174 return c, i
175 }
176
177 // tHTMLCmt is the context transition function for stateHTMLCmt.
178 func tHTMLCmt(c context, s []byte) (context, int) {
179 if i := bytes.Index(s, commentEnd); i != -1 {
180 return context{}, i + 3
181 }
182 return c, len(s)
183 }
184
185 // specialTagEndMarkers maps element types to the character sequence that
186 // case-insensitively signals the end of the special tag body.
187 var specialTagEndMarkers = [...]string{
188 elementScript: "</script",
189 elementStyle: "</style",
190 elementTextarea: "</textarea",
191 elementTitle: "</title",
192 }
193
194 // tSpecialTagEnd is the context transition function for raw text and RCDATA
195 // element states.
196 func tSpecialTagEnd(c context, s []byte) (context, int) {
197 if c.element != elementNone {
198 if i := strings.Index(strings.ToLower(string(s)), specialTagEndMarkers[c.element]); i != -1 {
199 return context{}, i
200 }
201 }
202 return c, len(s)
203 }
204
205 // tAttr is the context transition function for the attribute state.
206 func tAttr(c context, s []byte) (context, int) {
207 return c, len(s)
208 }
209
210 // tURL is the context transition function for the URL state.
211 func tURL(c context, s []byte) (context, int) {
212 if bytes.IndexAny(s, "#?") >= 0 {
213 c.urlPart = urlPartQueryOrFrag
214 } else if len(s) != eatWhiteSpace(s, 0) && c.urlPart == urlPartNone {
215 // HTML5 uses "Valid URL potentially surrounded by spaces" for
216 // attrs: http://www.w3.org/TR/html5/index.html#attributes-1
217 c.urlPart = urlPartPreQuery
218 }
219 return c, len(s)
220 }
221
222 // tJS is the context transition function for the JS state.
223 func tJS(c context, s []byte) (context, int) {
224 i := bytes.IndexAny(s, `"'/`)
225 if i == -1 {
226 // Entire input is non string, comment, regexp tokens.
227 c.jsCtx = nextJSCtx(s, c.jsCtx)
228 return c, len(s)
229 }
230 c.jsCtx = nextJSCtx(s[:i], c.jsCtx)
231 switch s[i] {
232 case '"':
233 c.state, c.jsCtx = stateJSDqStr, jsCtxRegexp
234 case '\'':
235 c.state, c.jsCtx = stateJSSqStr, jsCtxRegexp
236 case '/':
237 switch {
238 case i+1 < len(s) && s[i+1] == '/':
239 c.state, i = stateJSLineCmt, i+1
240 case i+1 < len(s) && s[i+1] == '*':
241 c.state, i = stateJSBlockCmt, i+1
242 case c.jsCtx == jsCtxRegexp:
243 c.state = stateJSRegexp
244 case c.jsCtx == jsCtxDivOp:
245 c.jsCtx = jsCtxRegexp
246 default:
247 return context{
248 state: stateError,
249 err: errorf(ErrSlashAmbig, 0, "'/' could start a division or regexp: %.32q", s[i:]),
250 }, len(s)
251 }
252 default:
253 panic("unreachable")
254 }
255 return c, i + 1
256 }
257
258 // tJSDelimited is the context transition function for the JS string and regexp
259 // states.
260 func tJSDelimited(c context, s []byte) (context, int) {
261 specials := `\"`
262 switch c.state {
263 case stateJSSqStr:
264 specials = `\'`
265 case stateJSRegexp:
266 specials = `\/[]`
267 }
268
269 k, inCharset := 0, false
270 for {
271 i := k + bytes.IndexAny(s[k:], specials)
272 if i < k {
273 break
274 }
275 switch s[i] {
276 case '\\':
277 i++
278 if i == len(s) {
279 return context{
280 state: stateError,
281 err: errorf(ErrPartialEscape, 0, "unfinished escape sequence in JS string: %q", s),
282 }, len(s)
283 }
284 case '[':
285 inCharset = true
286 case ']':
287 inCharset = false
288 default:
289 // end delimiter
290 if !inCharset {
291 c.state, c.jsCtx = stateJS, jsCtxDivOp
292 return c, i + 1
293 }
294 }
295 k = i + 1
296 }
297
298 if inCharset {
299 // This can be fixed by making context richer if interpolation
300 // into charsets is desired.
301 return context{
302 state: stateError,
303 err: errorf(ErrPartialCharset, 0, "unfinished JS regexp charset: %q", s),
304 }, len(s)
305 }
306
307 return c, len(s)
308 }
309
310 var blockCommentEnd = []byte("*/")
311
312 // tBlockCmt is the context transition function for /*comment*/ states.
313 func tBlockCmt(c context, s []byte) (context, int) {
314 i := bytes.Index(s, blockCommentEnd)
315 if i == -1 {
316 return c, len(s)
317 }
318 switch c.state {
319 case stateJSBlockCmt:
320 c.state = stateJS
321 case stateCSSBlockCmt:
322 c.state = stateCSS
323 default:
324 panic(c.state.String())
325 }
326 return c, i + 2
327 }
328
329 // tLineCmt is the context transition function for //comment states.
330 func tLineCmt(c context, s []byte) (context, int) {
331 var lineTerminators string
332 var endState state
333 switch c.state {
334 case stateJSLineCmt:
335 lineTerminators, endState = "\n\r\u2028\u2029", stateJS
336 case stateCSSLineCmt:
337 lineTerminators, endState = "\n\f\r", stateCSS
338 // Line comments are not part of any published CSS standard but
339 // are supported by the 4 major browsers.
340 // This defines line comments as
341 // LINECOMMENT ::= "//" [^\n\f\d]*
342 // since http://www.w3.org/TR/css3-syntax/#SUBTOK-nl defines
343 // newlines:
344 // nl ::= #xA | #xD #xA | #xD | #xC
345 default:
346 panic(c.state.String())
347 }
348
349 i := bytes.IndexAny(s, lineTerminators)
350 if i == -1 {
351 return c, len(s)
352 }
353 c.state = endState
354 // Per section 7.4 of EcmaScript 5 : http://es5.github.com/#x7.4
355 // "However, the LineTerminator at the end of the line is not
356 // considered to be part of the single-line comment; it is
357 // recognized separately by the lexical grammar and becomes part
358 // of the stream of input elements for the syntactic grammar."
359 return c, i
360 }
361
362 // tCSS is the context transition function for the CSS state.
363 func tCSS(c context, s []byte) (context, int) {
364 // CSS quoted strings are almost never used except for:
365 // (1) URLs as in background: "/foo.png"
366 // (2) Multiword font-names as in font-family: "Times New Roman"
367 // (3) List separators in content values as in inline-lists:
368 // <style>
369 // ul.inlineList { list-style: none; padding:0 }
370 // ul.inlineList > li { display: inline }
371 // ul.inlineList > li:before { content: ", " }
372 // ul.inlineList > li:first-child:before { content: "" }
373 // </style>
374 // <ul class=inlineList><li>One<li>Two<li>Three</ul>
375 // (4) Attribute value selectors as in a[href="http://example.com/"]
376 //
377 // We conservatively treat all strings as URLs, but make some
378 // allowances to avoid confusion.
379 //
380 // In (1), our conservative assumption is justified.
381 // In (2), valid font names do not contain ':', '?', or '#', so our
382 // conservative assumption is fine since we will never transition past
383 // urlPartPreQuery.
384 // In (3), our protocol heuristic should not be tripped, and there
385 // should not be non-space content after a '?' or '#', so as long as
386 // we only %-encode RFC 3986 reserved characters we are ok.
387 // In (4), we should URL escape for URL attributes, and for others we
388 // have the attribute name available if our conservative assumption
389 // proves problematic for real code.
390
391 k := 0
392 for {
393 i := k + bytes.IndexAny(s[k:], `("'/`)
394 if i < k {
395 return c, len(s)
396 }
397 switch s[i] {
398 case '(':
399 // Look for url to the left.
400 p := bytes.TrimRight(s[:i], "\t\n\f\r ")
401 if endsWithCSSKeyword(p, "url") {
402 j := len(s) - len(bytes.TrimLeft(s[i+1:], "\t\n\f\r "))
403 switch {
404 case j != len(s) && s[j] == '"':
405 c.state, j = stateCSSDqURL, j+1
406 case j != len(s) && s[j] == '\'':
407 c.state, j = stateCSSSqURL, j+1
408 default:
409 c.state = stateCSSURL
410 }
411 return c, j
412 }
413 case '/':
414 if i+1 < len(s) {
415 switch s[i+1] {
416 case '/':
417 c.state = stateCSSLineCmt
418 return c, i + 2
419 case '*':
420 c.state = stateCSSBlockCmt
421 return c, i + 2
422 }
423 }
424 case '"':
425 c.state = stateCSSDqStr
426 return c, i + 1
427 case '\'':
428 c.state = stateCSSSqStr
429 return c, i + 1
430 }
431 k = i + 1
432 }
433 panic("unreachable")
434 }
435
436 // tCSSStr is the context transition function for the CSS string and URL states.
437 func tCSSStr(c context, s []byte) (context, int) {
438 var endAndEsc string
439 switch c.state {
440 case stateCSSDqStr, stateCSSDqURL:
441 endAndEsc = `\"`
442 case stateCSSSqStr, stateCSSSqURL:
443 endAndEsc = `\'`
444 case stateCSSURL:
445 // Unquoted URLs end with a newline or close parenthesis.
446 // The below includes the wc (whitespace character) and nl.
447 endAndEsc = "\\\t\n\f\r )"
448 default:
449 panic(c.state.String())
450 }
451
452 k := 0
453 for {
454 i := k + bytes.IndexAny(s[k:], endAndEsc)
455 if i < k {
456 c, nread := tURL(c, decodeCSS(s[k:]))
457 return c, k + nread
458 }
459 if s[i] == '\\' {
460 i++
461 if i == len(s) {
462 return context{
463 state: stateError,
464 err: errorf(ErrPartialEscape, 0, "unfinished escape sequence in CSS string: %q", s),
465 }, len(s)
466 }
467 } else {
468 c.state = stateCSS
469 return c, i + 1
470 }
471 c, _ = tURL(c, decodeCSS(s[:i+1]))
472 k = i + 1
473 }
474 panic("unreachable")
475 }
476
477 // tError is the context transition function for the error state.
478 func tError(c context, s []byte) (context, int) {
479 return c, len(s)
480 }
481
482 // eatAttrName returns the largest j such that s[i:j] is an attribute name.
483 // It returns an error if s[i:] does not look like it begins with an
484 // attribute name, such as encountering a quote mark without a preceding
485 // equals sign.
486 func eatAttrName(s []byte, i int) (int, *Error) {
487 for j := i; j < len(s); j++ {
488 switch s[j] {
489 case ' ', '\t', '\n', '\f', '\r', '=', '>':
490 return j, nil
491 case '\'', '"', '<':
492 // These result in a parse warning in HTML5 and are
493 // indicative of serious problems if seen in an attr
494 // name in a template.
495 return -1, errorf(ErrBadHTML, 0, "%q in attribute name: %.32q", s[j:j+1], s)
496 default:
497 // No-op.
498 }
499 }
500 return len(s), nil
501 }
502
503 var elementNameMap = map[string]element{
504 "script": elementScript,
505 "style": elementStyle,
506 "textarea": elementTextarea,
507 "title": elementTitle,
508 }
509
510 // asciiAlpha returns whether c is an ASCII letter.
511 func asciiAlpha(c byte) bool {
512 return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
513 }
514
515 // asciiAlphaNum returns whether c is an ASCII letter or digit.
516 func asciiAlphaNum(c byte) bool {
517 return asciiAlpha(c) || '0' <= c && c <= '9'
518 }
519
520 // eatTagName returns the largest j such that s[i:j] is a tag name and the tag type.
521 func eatTagName(s []byte, i int) (int, element) {
522 if i == len(s) || !asciiAlpha(s[i]) {
523 return i, elementNone
524 }
525 j := i + 1
526 for j < len(s) {
527 x := s[j]
528 if asciiAlphaNum(x) {
529 j++
530 continue
531 }
532 // Allow "x-y" or "x:y" but not "x-", "-y", or "x--y".
533 if (x == ':' || x == '-') && j+1 < len(s) && asciiAlphaNum(s[j+1]) {
534 j += 2
535 continue
536 }
537 break
538 }
539 return j, elementNameMap[strings.ToLower(string(s[i:j]))]
540 }
541
542 // eatWhiteSpace returns the largest j such that s[i:j] is white space.
543 func eatWhiteSpace(s []byte, i int) int {
544 for j := i; j < len(s); j++ {
545 switch s[j] {
546 case ' ', '\t', '\n', '\f', '\r':
547 // No-op.
548 default:
549 return j
550 }
551 }
552 return len(s)
553 }