Source file src/pkg/html/template/transition.go
1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package template 6 7 import ( 8 "bytes" 9 "strings" 10 ) 11 12 // transitionFunc is the array of context transition functions for text nodes. 13 // A transition function takes a context and template text input, and returns 14 // the updated context and the number of bytes consumed from the front of the 15 // input. 16 var transitionFunc = [...]func(context, []byte) (context, int){ 17 stateText: tText, 18 stateTag: tTag, 19 stateAttrName: tAttrName, 20 stateAfterName: tAfterName, 21 stateBeforeValue: tBeforeValue, 22 stateHTMLCmt: tHTMLCmt, 23 stateRCDATA: tSpecialTagEnd, 24 stateAttr: tAttr, 25 stateURL: tURL, 26 stateJS: tJS, 27 stateJSDqStr: tJSDelimited, 28 stateJSSqStr: tJSDelimited, 29 stateJSRegexp: tJSDelimited, 30 stateJSBlockCmt: tBlockCmt, 31 stateJSLineCmt: tLineCmt, 32 stateCSS: tCSS, 33 stateCSSDqStr: tCSSStr, 34 stateCSSSqStr: tCSSStr, 35 stateCSSDqURL: tCSSStr, 36 stateCSSSqURL: tCSSStr, 37 stateCSSURL: tCSSStr, 38 stateCSSBlockCmt: tBlockCmt, 39 stateCSSLineCmt: tLineCmt, 40 stateError: tError, 41 } 42 43 var commentStart = []byte("<!--") 44 var commentEnd = []byte("-->") 45 46 // tText is the context transition function for the text state. 47 func tText(c context, s []byte) (context, int) { 48 k := 0 49 for { 50 i := k + bytes.IndexByte(s[k:], '<') 51 if i < k || i+1 == len(s) { 52 return c, len(s) 53 } else if i+4 <= len(s) && bytes.Equal(commentStart, s[i:i+4]) { 54 return context{state: stateHTMLCmt}, i + 4 55 } 56 i++ 57 end := false 58 if s[i] == '/' { 59 if i+1 == len(s) { 60 return c, len(s) 61 } 62 end, i = true, i+1 63 } 64 j, e := eatTagName(s, i) 65 if j != i { 66 if end { 67 e = elementNone 68 } 69 // We've found an HTML tag. 70 return context{state: stateTag, element: e}, j 71 } 72 k = j 73 } 74 panic("unreachable") 75 } 76 77 var elementContentType = [...]state{ 78 elementNone: stateText, 79 elementScript: stateJS, 80 elementStyle: stateCSS, 81 elementTextarea: stateRCDATA, 82 elementTitle: stateRCDATA, 83 } 84 85 // tTag is the context transition function for the tag state. 86 func tTag(c context, s []byte) (context, int) { 87 // Find the attribute name. 88 i := eatWhiteSpace(s, 0) 89 if i == len(s) { 90 return c, len(s) 91 } 92 if s[i] == '>' { 93 return context{ 94 state: elementContentType[c.element], 95 element: c.element, 96 }, i + 1 97 } 98 j, err := eatAttrName(s, i) 99 if err != nil { 100 return context{state: stateError, err: err}, len(s) 101 } 102 state, attr := stateTag, attrNone 103 if i == j { 104 return context{ 105 state: stateError, 106 err: errorf(ErrBadHTML, 0, "expected space, attr name, or end of tag, but got %q", s[i:]), 107 }, len(s) 108 } 109 switch attrType(string(s[i:j])) { 110 case contentTypeURL: 111 attr = attrURL 112 case contentTypeCSS: 113 attr = attrStyle 114 case contentTypeJS: 115 attr = attrScript 116 } 117 if j == len(s) { 118 state = stateAttrName 119 } else { 120 state = stateAfterName 121 } 122 return context{state: state, element: c.element, attr: attr}, j 123 } 124 125 // tAttrName is the context transition function for stateAttrName. 126 func tAttrName(c context, s []byte) (context, int) { 127 i, err := eatAttrName(s, 0) 128 if err != nil { 129 return context{state: stateError, err: err}, len(s) 130 } else if i != len(s) { 131 c.state = stateAfterName 132 } 133 return c, i 134 } 135 136 // tAfterName is the context transition function for stateAfterName. 137 func tAfterName(c context, s []byte) (context, int) { 138 // Look for the start of the value. 139 i := eatWhiteSpace(s, 0) 140 if i == len(s) { 141 return c, len(s) 142 } else if s[i] != '=' { 143 // Occurs due to tag ending '>', and valueless attribute. 144 c.state = stateTag 145 return c, i 146 } 147 c.state = stateBeforeValue 148 // Consume the "=". 149 return c, i + 1 150 } 151 152 var attrStartStates = [...]state{ 153 attrNone: stateAttr, 154 attrScript: stateJS, 155 attrStyle: stateCSS, 156 attrURL: stateURL, 157 } 158 159 // tBeforeValue is the context transition function for stateBeforeValue. 160 func tBeforeValue(c context, s []byte) (context, int) { 161 i := eatWhiteSpace(s, 0) 162 if i == len(s) { 163 return c, len(s) 164 } 165 // Find the attribute delimiter. 166 delim := delimSpaceOrTagEnd 167 switch s[i] { 168 case '\'': 169 delim, i = delimSingleQuote, i+1 170 case '"': 171 delim, i = delimDoubleQuote, i+1 172 } 173 c.state, c.delim, c.attr = attrStartStates[c.attr], delim, attrNone 174 return c, i 175 } 176 177 // tHTMLCmt is the context transition function for stateHTMLCmt. 178 func tHTMLCmt(c context, s []byte) (context, int) { 179 if i := bytes.Index(s, commentEnd); i != -1 { 180 return context{}, i + 3 181 } 182 return c, len(s) 183 } 184 185 // specialTagEndMarkers maps element types to the character sequence that 186 // case-insensitively signals the end of the special tag body. 187 var specialTagEndMarkers = [...]string{ 188 elementScript: "</script", 189 elementStyle: "</style", 190 elementTextarea: "</textarea", 191 elementTitle: "</title", 192 } 193 194 // tSpecialTagEnd is the context transition function for raw text and RCDATA 195 // element states. 196 func tSpecialTagEnd(c context, s []byte) (context, int) { 197 if c.element != elementNone { 198 if i := strings.Index(strings.ToLower(string(s)), specialTagEndMarkers[c.element]); i != -1 { 199 return context{}, i 200 } 201 } 202 return c, len(s) 203 } 204 205 // tAttr is the context transition function for the attribute state. 206 func tAttr(c context, s []byte) (context, int) { 207 return c, len(s) 208 } 209 210 // tURL is the context transition function for the URL state. 211 func tURL(c context, s []byte) (context, int) { 212 if bytes.IndexAny(s, "#?") >= 0 { 213 c.urlPart = urlPartQueryOrFrag 214 } else if len(s) != eatWhiteSpace(s, 0) && c.urlPart == urlPartNone { 215 // HTML5 uses "Valid URL potentially surrounded by spaces" for 216 // attrs: http://www.w3.org/TR/html5/index.html#attributes-1 217 c.urlPart = urlPartPreQuery 218 } 219 return c, len(s) 220 } 221 222 // tJS is the context transition function for the JS state. 223 func tJS(c context, s []byte) (context, int) { 224 i := bytes.IndexAny(s, `"'/`) 225 if i == -1 { 226 // Entire input is non string, comment, regexp tokens. 227 c.jsCtx = nextJSCtx(s, c.jsCtx) 228 return c, len(s) 229 } 230 c.jsCtx = nextJSCtx(s[:i], c.jsCtx) 231 switch s[i] { 232 case '"': 233 c.state, c.jsCtx = stateJSDqStr, jsCtxRegexp 234 case '\'': 235 c.state, c.jsCtx = stateJSSqStr, jsCtxRegexp 236 case '/': 237 switch { 238 case i+1 < len(s) && s[i+1] == '/': 239 c.state, i = stateJSLineCmt, i+1 240 case i+1 < len(s) && s[i+1] == '*': 241 c.state, i = stateJSBlockCmt, i+1 242 case c.jsCtx == jsCtxRegexp: 243 c.state = stateJSRegexp 244 case c.jsCtx == jsCtxDivOp: 245 c.jsCtx = jsCtxRegexp 246 default: 247 return context{ 248 state: stateError, 249 err: errorf(ErrSlashAmbig, 0, "'/' could start a division or regexp: %.32q", s[i:]), 250 }, len(s) 251 } 252 default: 253 panic("unreachable") 254 } 255 return c, i + 1 256 } 257 258 // tJSDelimited is the context transition function for the JS string and regexp 259 // states. 260 func tJSDelimited(c context, s []byte) (context, int) { 261 specials := `\"` 262 switch c.state { 263 case stateJSSqStr: 264 specials = `\'` 265 case stateJSRegexp: 266 specials = `\/[]` 267 } 268 269 k, inCharset := 0, false 270 for { 271 i := k + bytes.IndexAny(s[k:], specials) 272 if i < k { 273 break 274 } 275 switch s[i] { 276 case '\\': 277 i++ 278 if i == len(s) { 279 return context{ 280 state: stateError, 281 err: errorf(ErrPartialEscape, 0, "unfinished escape sequence in JS string: %q", s), 282 }, len(s) 283 } 284 case '[': 285 inCharset = true 286 case ']': 287 inCharset = false 288 default: 289 // end delimiter 290 if !inCharset { 291 c.state, c.jsCtx = stateJS, jsCtxDivOp 292 return c, i + 1 293 } 294 } 295 k = i + 1 296 } 297 298 if inCharset { 299 // This can be fixed by making context richer if interpolation 300 // into charsets is desired. 301 return context{ 302 state: stateError, 303 err: errorf(ErrPartialCharset, 0, "unfinished JS regexp charset: %q", s), 304 }, len(s) 305 } 306 307 return c, len(s) 308 } 309 310 var blockCommentEnd = []byte("*/") 311 312 // tBlockCmt is the context transition function for /*comment*/ states. 313 func tBlockCmt(c context, s []byte) (context, int) { 314 i := bytes.Index(s, blockCommentEnd) 315 if i == -1 { 316 return c, len(s) 317 } 318 switch c.state { 319 case stateJSBlockCmt: 320 c.state = stateJS 321 case stateCSSBlockCmt: 322 c.state = stateCSS 323 default: 324 panic(c.state.String()) 325 } 326 return c, i + 2 327 } 328 329 // tLineCmt is the context transition function for //comment states. 330 func tLineCmt(c context, s []byte) (context, int) { 331 var lineTerminators string 332 var endState state 333 switch c.state { 334 case stateJSLineCmt: 335 lineTerminators, endState = "\n\r\u2028\u2029", stateJS 336 case stateCSSLineCmt: 337 lineTerminators, endState = "\n\f\r", stateCSS 338 // Line comments are not part of any published CSS standard but 339 // are supported by the 4 major browsers. 340 // This defines line comments as 341 // LINECOMMENT ::= "//" [^\n\f\d]* 342 // since http://www.w3.org/TR/css3-syntax/#SUBTOK-nl defines 343 // newlines: 344 // nl ::= #xA | #xD #xA | #xD | #xC 345 default: 346 panic(c.state.String()) 347 } 348 349 i := bytes.IndexAny(s, lineTerminators) 350 if i == -1 { 351 return c, len(s) 352 } 353 c.state = endState 354 // Per section 7.4 of EcmaScript 5 : http://es5.github.com/#x7.4 355 // "However, the LineTerminator at the end of the line is not 356 // considered to be part of the single-line comment; it is 357 // recognized separately by the lexical grammar and becomes part 358 // of the stream of input elements for the syntactic grammar." 359 return c, i 360 } 361 362 // tCSS is the context transition function for the CSS state. 363 func tCSS(c context, s []byte) (context, int) { 364 // CSS quoted strings are almost never used except for: 365 // (1) URLs as in background: "/foo.png" 366 // (2) Multiword font-names as in font-family: "Times New Roman" 367 // (3) List separators in content values as in inline-lists: 368 // <style> 369 // ul.inlineList { list-style: none; padding:0 } 370 // ul.inlineList > li { display: inline } 371 // ul.inlineList > li:before { content: ", " } 372 // ul.inlineList > li:first-child:before { content: "" } 373 // </style> 374 // <ul class=inlineList><li>One<li>Two<li>Three</ul> 375 // (4) Attribute value selectors as in a[href="http://example.com/"] 376 // 377 // We conservatively treat all strings as URLs, but make some 378 // allowances to avoid confusion. 379 // 380 // In (1), our conservative assumption is justified. 381 // In (2), valid font names do not contain ':', '?', or '#', so our 382 // conservative assumption is fine since we will never transition past 383 // urlPartPreQuery. 384 // In (3), our protocol heuristic should not be tripped, and there 385 // should not be non-space content after a '?' or '#', so as long as 386 // we only %-encode RFC 3986 reserved characters we are ok. 387 // In (4), we should URL escape for URL attributes, and for others we 388 // have the attribute name available if our conservative assumption 389 // proves problematic for real code. 390 391 k := 0 392 for { 393 i := k + bytes.IndexAny(s[k:], `("'/`) 394 if i < k { 395 return c, len(s) 396 } 397 switch s[i] { 398 case '(': 399 // Look for url to the left. 400 p := bytes.TrimRight(s[:i], "\t\n\f\r ") 401 if endsWithCSSKeyword(p, "url") { 402 j := len(s) - len(bytes.TrimLeft(s[i+1:], "\t\n\f\r ")) 403 switch { 404 case j != len(s) && s[j] == '"': 405 c.state, j = stateCSSDqURL, j+1 406 case j != len(s) && s[j] == '\'': 407 c.state, j = stateCSSSqURL, j+1 408 default: 409 c.state = stateCSSURL 410 } 411 return c, j 412 } 413 case '/': 414 if i+1 < len(s) { 415 switch s[i+1] { 416 case '/': 417 c.state = stateCSSLineCmt 418 return c, i + 2 419 case '*': 420 c.state = stateCSSBlockCmt 421 return c, i + 2 422 } 423 } 424 case '"': 425 c.state = stateCSSDqStr 426 return c, i + 1 427 case '\'': 428 c.state = stateCSSSqStr 429 return c, i + 1 430 } 431 k = i + 1 432 } 433 panic("unreachable") 434 } 435 436 // tCSSStr is the context transition function for the CSS string and URL states. 437 func tCSSStr(c context, s []byte) (context, int) { 438 var endAndEsc string 439 switch c.state { 440 case stateCSSDqStr, stateCSSDqURL: 441 endAndEsc = `\"` 442 case stateCSSSqStr, stateCSSSqURL: 443 endAndEsc = `\'` 444 case stateCSSURL: 445 // Unquoted URLs end with a newline or close parenthesis. 446 // The below includes the wc (whitespace character) and nl. 447 endAndEsc = "\\\t\n\f\r )" 448 default: 449 panic(c.state.String()) 450 } 451 452 k := 0 453 for { 454 i := k + bytes.IndexAny(s[k:], endAndEsc) 455 if i < k { 456 c, nread := tURL(c, decodeCSS(s[k:])) 457 return c, k + nread 458 } 459 if s[i] == '\\' { 460 i++ 461 if i == len(s) { 462 return context{ 463 state: stateError, 464 err: errorf(ErrPartialEscape, 0, "unfinished escape sequence in CSS string: %q", s), 465 }, len(s) 466 } 467 } else { 468 c.state = stateCSS 469 return c, i + 1 470 } 471 c, _ = tURL(c, decodeCSS(s[:i+1])) 472 k = i + 1 473 } 474 panic("unreachable") 475 } 476 477 // tError is the context transition function for the error state. 478 func tError(c context, s []byte) (context, int) { 479 return c, len(s) 480 } 481 482 // eatAttrName returns the largest j such that s[i:j] is an attribute name. 483 // It returns an error if s[i:] does not look like it begins with an 484 // attribute name, such as encountering a quote mark without a preceding 485 // equals sign. 486 func eatAttrName(s []byte, i int) (int, *Error) { 487 for j := i; j < len(s); j++ { 488 switch s[j] { 489 case ' ', '\t', '\n', '\f', '\r', '=', '>': 490 return j, nil 491 case '\'', '"', '<': 492 // These result in a parse warning in HTML5 and are 493 // indicative of serious problems if seen in an attr 494 // name in a template. 495 return -1, errorf(ErrBadHTML, 0, "%q in attribute name: %.32q", s[j:j+1], s) 496 default: 497 // No-op. 498 } 499 } 500 return len(s), nil 501 } 502 503 var elementNameMap = map[string]element{ 504 "script": elementScript, 505 "style": elementStyle, 506 "textarea": elementTextarea, 507 "title": elementTitle, 508 } 509 510 // asciiAlpha returns whether c is an ASCII letter. 511 func asciiAlpha(c byte) bool { 512 return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' 513 } 514 515 // asciiAlphaNum returns whether c is an ASCII letter or digit. 516 func asciiAlphaNum(c byte) bool { 517 return asciiAlpha(c) || '0' <= c && c <= '9' 518 } 519 520 // eatTagName returns the largest j such that s[i:j] is a tag name and the tag type. 521 func eatTagName(s []byte, i int) (int, element) { 522 if i == len(s) || !asciiAlpha(s[i]) { 523 return i, elementNone 524 } 525 j := i + 1 526 for j < len(s) { 527 x := s[j] 528 if asciiAlphaNum(x) { 529 j++ 530 continue 531 } 532 // Allow "x-y" or "x:y" but not "x-", "-y", or "x--y". 533 if (x == ':' || x == '-') && j+1 < len(s) && asciiAlphaNum(s[j+1]) { 534 j += 2 535 continue 536 } 537 break 538 } 539 return j, elementNameMap[strings.ToLower(string(s[i:j]))] 540 } 541 542 // eatWhiteSpace returns the largest j such that s[i:j] is white space. 543 func eatWhiteSpace(s []byte, i int) int { 544 for j := i; j < len(s); j++ { 545 switch s[j] { 546 case ' ', '\t', '\n', '\f', '\r': 547 // No-op. 548 default: 549 return j 550 } 551 } 552 return len(s) 553 }