Source file src/pkg/html/template/js.go
1 // Copyright 2011 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 package template
6
7 import (
8 "bytes"
9 "encoding/json"
10 "fmt"
11 "reflect"
12 "strings"
13 "unicode/utf8"
14 )
15
16 // nextJSCtx returns the context that determines whether a slash after the
17 // given run of tokens tokens starts a regular expression instead of a division
18 // operator: / or /=.
19 //
20 // This assumes that the token run does not include any string tokens, comment
21 // tokens, regular expression literal tokens, or division operators.
22 //
23 // This fails on some valid but nonsensical JavaScript programs like
24 // "x = ++/foo/i" which is quite different than "x++/foo/i", but is not known to
25 // fail on any known useful programs. It is based on the draft
26 // JavaScript 2.0 lexical grammar and requires one token of lookbehind:
27 // http://www.mozilla.org/js/language/js20-2000-07/rationale/syntax.html
28 func nextJSCtx(s []byte, preceding jsCtx) jsCtx {
29 s = bytes.TrimRight(s, "\t\n\f\r \u2028\u2029")
30 if len(s) == 0 {
31 return preceding
32 }
33
34 // All cases below are in the single-byte UTF-8 group.
35 switch c, n := s[len(s)-1], len(s); c {
36 case '+', '-':
37 // ++ and -- are not regexp preceders, but + and - are whether
38 // they are used as infix or prefix operators.
39 start := n - 1
40 // Count the number of adjacent dashes or pluses.
41 for start > 0 && s[start-1] == c {
42 start--
43 }
44 if (n-start)&1 == 1 {
45 // Reached for trailing minus signs since "---" is the
46 // same as "-- -".
47 return jsCtxRegexp
48 }
49 return jsCtxDivOp
50 case '.':
51 // Handle "42."
52 if n != 1 && '0' <= s[n-2] && s[n-2] <= '9' {
53 return jsCtxDivOp
54 }
55 return jsCtxRegexp
56 // Suffixes for all punctuators from section 7.7 of the language spec
57 // that only end binary operators not handled above.
58 case ',', '<', '>', '=', '*', '%', '&', '|', '^', '?':
59 return jsCtxRegexp
60 // Suffixes for all punctuators from section 7.7 of the language spec
61 // that are prefix operators not handled above.
62 case '!', '~':
63 return jsCtxRegexp
64 // Matches all the punctuators from section 7.7 of the language spec
65 // that are open brackets not handled above.
66 case '(', '[':
67 return jsCtxRegexp
68 // Matches all the punctuators from section 7.7 of the language spec
69 // that precede expression starts.
70 case ':', ';', '{':
71 return jsCtxRegexp
72 // CAVEAT: the close punctuators ('}', ']', ')') precede div ops and
73 // are handled in the default except for '}' which can precede a
74 // division op as in
75 // ({ valueOf: function () { return 42 } } / 2
76 // which is valid, but, in practice, developers don't divide object
77 // literals, so our heuristic works well for code like
78 // function () { ... } /foo/.test(x) && sideEffect();
79 // The ')' punctuator can precede a regular expression as in
80 // if (b) /foo/.test(x) && ...
81 // but this is much less likely than
82 // (a + b) / c
83 case '}':
84 return jsCtxRegexp
85 default:
86 // Look for an IdentifierName and see if it is a keyword that
87 // can precede a regular expression.
88 j := n
89 for j > 0 && isJSIdentPart(rune(s[j-1])) {
90 j--
91 }
92 if regexpPrecederKeywords[string(s[j:])] {
93 return jsCtxRegexp
94 }
95 }
96 // Otherwise is a punctuator not listed above, or
97 // a string which precedes a div op, or an identifier
98 // which precedes a div op.
99 return jsCtxDivOp
100 }
101
102 // regexPrecederKeywords is a set of reserved JS keywords that can precede a
103 // regular expression in JS source.
104 var regexpPrecederKeywords = map[string]bool{
105 "break": true,
106 "case": true,
107 "continue": true,
108 "delete": true,
109 "do": true,
110 "else": true,
111 "finally": true,
112 "in": true,
113 "instanceof": true,
114 "return": true,
115 "throw": true,
116 "try": true,
117 "typeof": true,
118 "void": true,
119 }
120
121 var jsonMarshalType = reflect.TypeOf((*json.Marshaler)(nil)).Elem()
122
123 // indirectToJSONMarshaler returns the value, after dereferencing as many times
124 // as necessary to reach the base type (or nil) or an implementation of json.Marshal.
125 func indirectToJSONMarshaler(a interface{}) interface{} {
126 v := reflect.ValueOf(a)
127 for !v.Type().Implements(jsonMarshalType) && v.Kind() == reflect.Ptr && !v.IsNil() {
128 v = v.Elem()
129 }
130 return v.Interface()
131 }
132
133 // jsValEscaper escapes its inputs to a JS Expression (section 11.14) that has
134 // neither side-effects nor free variables outside (NaN, Infinity).
135 func jsValEscaper(args ...interface{}) string {
136 var a interface{}
137 if len(args) == 1 {
138 a = indirectToJSONMarshaler(args[0])
139 switch t := a.(type) {
140 case JS:
141 return string(t)
142 case JSStr:
143 // TODO: normalize quotes.
144 return `"` + string(t) + `"`
145 case json.Marshaler:
146 // Do not treat as a Stringer.
147 case fmt.Stringer:
148 a = t.String()
149 }
150 } else {
151 for i, arg := range args {
152 args[i] = indirectToJSONMarshaler(arg)
153 }
154 a = fmt.Sprint(args...)
155 }
156 // TODO: detect cycles before calling Marshal which loops infinitely on
157 // cyclic data. This may be an unacceptable DoS risk.
158
159 b, err := json.Marshal(a)
160 if err != nil {
161 // Put a space before comment so that if it is flush against
162 // a division operator it is not turned into a line comment:
163 // x/{{y}}
164 // turning into
165 // x//* error marshalling y:
166 // second line of error message */null
167 return fmt.Sprintf(" /* %s */null ", strings.Replace(err.Error(), "*/", "* /", -1))
168 }
169
170 // TODO: maybe post-process output to prevent it from containing
171 // "<!--", "-->", "<![CDATA[", "]]>", or "</script"
172 // in case custom marshallers produce output containing those.
173
174 // TODO: Maybe abbreviate \u00ab to \xab to produce more compact output.
175 if len(b) == 0 {
176 // In, `x=y/{{.}}*z` a json.Marshaler that produces "" should
177 // not cause the output `x=y/*z`.
178 return " null "
179 }
180 first, _ := utf8.DecodeRune(b)
181 last, _ := utf8.DecodeLastRune(b)
182 var buf bytes.Buffer
183 // Prevent IdentifierNames and NumericLiterals from running into
184 // keywords: in, instanceof, typeof, void
185 pad := isJSIdentPart(first) || isJSIdentPart(last)
186 if pad {
187 buf.WriteByte(' ')
188 }
189 written := 0
190 // Make sure that json.Marshal escapes codepoints U+2028 & U+2029
191 // so it falls within the subset of JSON which is valid JS.
192 for i := 0; i < len(b); {
193 rune, n := utf8.DecodeRune(b[i:])
194 repl := ""
195 if rune == 0x2028 {
196 repl = `\u2028`
197 } else if rune == 0x2029 {
198 repl = `\u2029`
199 }
200 if repl != "" {
201 buf.Write(b[written:i])
202 buf.WriteString(repl)
203 written = i + n
204 }
205 i += n
206 }
207 if buf.Len() != 0 {
208 buf.Write(b[written:])
209 if pad {
210 buf.WriteByte(' ')
211 }
212 b = buf.Bytes()
213 }
214 return string(b)
215 }
216
217 // jsStrEscaper produces a string that can be included between quotes in
218 // JavaScript source, in JavaScript embedded in an HTML5 <script> element,
219 // or in an HTML5 event handler attribute such as onclick.
220 func jsStrEscaper(args ...interface{}) string {
221 s, t := stringify(args...)
222 if t == contentTypeJSStr {
223 return replace(s, jsStrNormReplacementTable)
224 }
225 return replace(s, jsStrReplacementTable)
226 }
227
228 // jsRegexpEscaper behaves like jsStrEscaper but escapes regular expression
229 // specials so the result is treated literally when included in a regular
230 // expression literal. /foo{{.X}}bar/ matches the string "foo" followed by
231 // the literal text of {{.X}} followed by the string "bar".
232 func jsRegexpEscaper(args ...interface{}) string {
233 s, _ := stringify(args...)
234 s = replace(s, jsRegexpReplacementTable)
235 if s == "" {
236 // /{{.X}}/ should not produce a line comment when .X == "".
237 return "(?:)"
238 }
239 return s
240 }
241
242 // replace replaces each rune r of s with replacementTable[r], provided that
243 // r < len(replacementTable). If replacementTable[r] is the empty string then
244 // no replacement is made.
245 // It also replaces runes U+2028 and U+2029 with the raw strings `\u2028` and
246 // `\u2029`.
247 func replace(s string, replacementTable []string) string {
248 var b bytes.Buffer
249 written := 0
250 for i, r := range s {
251 var repl string
252 switch {
253 case int(r) < len(replacementTable) && replacementTable[r] != "":
254 repl = replacementTable[r]
255 case r == '\u2028':
256 repl = `\u2028`
257 case r == '\u2029':
258 repl = `\u2029`
259 default:
260 continue
261 }
262 b.WriteString(s[written:i])
263 b.WriteString(repl)
264 written = i + utf8.RuneLen(r)
265 }
266 if written == 0 {
267 return s
268 }
269 b.WriteString(s[written:])
270 return b.String()
271 }
272
273 var jsStrReplacementTable = []string{
274 0: `\0`,
275 '\t': `\t`,
276 '\n': `\n`,
277 '\v': `\x0b`, // "\v" == "v" on IE 6.
278 '\f': `\f`,
279 '\r': `\r`,
280 // Encode HTML specials as hex so the output can be embedded
281 // in HTML attributes without further encoding.
282 '"': `\x22`,
283 '&': `\x26`,
284 '\'': `\x27`,
285 '+': `\x2b`,
286 '/': `\/`,
287 '<': `\x3c`,
288 '>': `\x3e`,
289 '\\': `\\`,
290 }
291
292 // jsStrNormReplacementTable is like jsStrReplacementTable but does not
293 // overencode existing escapes since this table has no entry for `\`.
294 var jsStrNormReplacementTable = []string{
295 0: `\0`,
296 '\t': `\t`,
297 '\n': `\n`,
298 '\v': `\x0b`, // "\v" == "v" on IE 6.
299 '\f': `\f`,
300 '\r': `\r`,
301 // Encode HTML specials as hex so the output can be embedded
302 // in HTML attributes without further encoding.
303 '"': `\x22`,
304 '&': `\x26`,
305 '\'': `\x27`,
306 '+': `\x2b`,
307 '/': `\/`,
308 '<': `\x3c`,
309 '>': `\x3e`,
310 }
311
312 var jsRegexpReplacementTable = []string{
313 0: `\0`,
314 '\t': `\t`,
315 '\n': `\n`,
316 '\v': `\x0b`, // "\v" == "v" on IE 6.
317 '\f': `\f`,
318 '\r': `\r`,
319 // Encode HTML specials as hex so the output can be embedded
320 // in HTML attributes without further encoding.
321 '"': `\x22`,
322 '$': `\$`,
323 '&': `\x26`,
324 '\'': `\x27`,
325 '(': `\(`,
326 ')': `\)`,
327 '*': `\*`,
328 '+': `\x2b`,
329 '-': `\-`,
330 '.': `\.`,
331 '/': `\/`,
332 '<': `\x3c`,
333 '>': `\x3e`,
334 '?': `\?`,
335 '[': `\[`,
336 '\\': `\\`,
337 ']': `\]`,
338 '^': `\^`,
339 '{': `\{`,
340 '|': `\|`,
341 '}': `\}`,
342 }
343
344 // isJSIdentPart returns whether the given rune is a JS identifier part.
345 // It does not handle all the non-Latin letters, joiners, and combining marks,
346 // but it does handle every codepoint that can occur in a numeric literal or
347 // a keyword.
348 func isJSIdentPart(r rune) bool {
349 switch {
350 case r == '$':
351 return true
352 case '0' <= r && r <= '9':
353 return true
354 case 'A' <= r && r <= 'Z':
355 return true
356 case r == '_':
357 return true
358 case 'a' <= r && r <= 'z':
359 return true
360 }
361 return false
362 }