Source file src/pkg/strconv/quote.go
1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package strconv 6 7 import ( 8 "unicode/utf8" 9 ) 10 11 const lowerhex = "0123456789abcdef" 12 13 func quoteWith(s string, quote byte, ASCIIonly bool) string { 14 var runeTmp [utf8.UTFMax]byte 15 buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations. 16 buf = append(buf, quote) 17 for width := 0; len(s) > 0; s = s[width:] { 18 r := rune(s[0]) 19 width = 1 20 if r >= utf8.RuneSelf { 21 r, width = utf8.DecodeRuneInString(s) 22 } 23 if width == 1 && r == utf8.RuneError { 24 buf = append(buf, `\x`...) 25 buf = append(buf, lowerhex[s[0]>>4]) 26 buf = append(buf, lowerhex[s[0]&0xF]) 27 continue 28 } 29 if r == rune(quote) || r == '\\' { // always backslashed 30 buf = append(buf, '\\') 31 buf = append(buf, byte(r)) 32 continue 33 } 34 if ASCIIonly { 35 if r < utf8.RuneSelf && IsPrint(r) { 36 buf = append(buf, byte(r)) 37 continue 38 } 39 } else if IsPrint(r) { 40 n := utf8.EncodeRune(runeTmp[:], r) 41 buf = append(buf, runeTmp[:n]...) 42 continue 43 } 44 switch r { 45 case '\a': 46 buf = append(buf, `\a`...) 47 case '\b': 48 buf = append(buf, `\b`...) 49 case '\f': 50 buf = append(buf, `\f`...) 51 case '\n': 52 buf = append(buf, `\n`...) 53 case '\r': 54 buf = append(buf, `\r`...) 55 case '\t': 56 buf = append(buf, `\t`...) 57 case '\v': 58 buf = append(buf, `\v`...) 59 default: 60 switch { 61 case r < ' ': 62 buf = append(buf, `\x`...) 63 buf = append(buf, lowerhex[s[0]>>4]) 64 buf = append(buf, lowerhex[s[0]&0xF]) 65 case r > utf8.MaxRune: 66 r = 0xFFFD 67 fallthrough 68 case r < 0x10000: 69 buf = append(buf, `\u`...) 70 for s := 12; s >= 0; s -= 4 { 71 buf = append(buf, lowerhex[r>>uint(s)&0xF]) 72 } 73 default: 74 buf = append(buf, `\U`...) 75 for s := 28; s >= 0; s -= 4 { 76 buf = append(buf, lowerhex[r>>uint(s)&0xF]) 77 } 78 } 79 } 80 } 81 buf = append(buf, quote) 82 return string(buf) 83 84 } 85 86 // Quote returns a double-quoted Go string literal representing s. The 87 // returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for 88 // control characters and non-printable characters as defined by 89 // IsPrint. 90 func Quote(s string) string { 91 return quoteWith(s, '"', false) 92 } 93 94 // AppendQuote appends a double-quoted Go string literal representing s, 95 // as generated by Quote, to dst and returns the extended buffer. 96 func AppendQuote(dst []byte, s string) []byte { 97 return append(dst, Quote(s)...) 98 } 99 100 // QuoteToASCII returns a double-quoted Go string literal representing s. 101 // The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for 102 // non-ASCII characters and non-printable characters as defined by IsPrint. 103 func QuoteToASCII(s string) string { 104 return quoteWith(s, '"', true) 105 } 106 107 // AppendQuoteToASCII appends a double-quoted Go string literal representing s, 108 // as generated by QuoteToASCII, to dst and returns the extended buffer. 109 func AppendQuoteToASCII(dst []byte, s string) []byte { 110 return append(dst, QuoteToASCII(s)...) 111 } 112 113 // QuoteRune returns a single-quoted Go character literal representing the 114 // rune. The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) 115 // for control characters and non-printable characters as defined by IsPrint. 116 func QuoteRune(r rune) string { 117 // TODO: avoid the allocation here. 118 return quoteWith(string(r), '\'', false) 119 } 120 121 // AppendQuoteRune appends a single-quoted Go character literal representing the rune, 122 // as generated by QuoteRune, to dst and returns the extended buffer. 123 func AppendQuoteRune(dst []byte, r rune) []byte { 124 return append(dst, QuoteRune(r)...) 125 } 126 127 // QuoteRuneToASCII returns a single-quoted Go character literal representing 128 // the rune. The returned string uses Go escape sequences (\t, \n, \xFF, 129 // \u0100) for non-ASCII characters and non-printable characters as defined 130 // by IsPrint. 131 func QuoteRuneToASCII(r rune) string { 132 // TODO: avoid the allocation here. 133 return quoteWith(string(r), '\'', true) 134 } 135 136 // AppendQuoteRune appends a single-quoted Go character literal representing the rune, 137 // as generated by QuoteRuneToASCII, to dst and returns the extended buffer. 138 func AppendQuoteRuneToASCII(dst []byte, r rune) []byte { 139 return append(dst, QuoteRuneToASCII(r)...) 140 } 141 142 // CanBackquote returns whether the string s would be 143 // a valid Go string literal if enclosed in backquotes. 144 func CanBackquote(s string) bool { 145 for i := 0; i < len(s); i++ { 146 if (s[i] < ' ' && s[i] != '\t') || s[i] == '`' { 147 return false 148 } 149 } 150 return true 151 } 152 153 func unhex(b byte) (v rune, ok bool) { 154 c := rune(b) 155 switch { 156 case '0' <= c && c <= '9': 157 return c - '0', true 158 case 'a' <= c && c <= 'f': 159 return c - 'a' + 10, true 160 case 'A' <= c && c <= 'F': 161 return c - 'A' + 10, true 162 } 163 return 164 } 165 166 // UnquoteChar decodes the first character or byte in the escaped string 167 // or character literal represented by the string s. 168 // It returns four values: 169 // 170 // 1) value, the decoded Unicode code point or byte value; 171 // 2) multibyte, a boolean indicating whether the decoded character requires a multibyte UTF-8 representation; 172 // 3) tail, the remainder of the string after the character; and 173 // 4) an error that will be nil if the character is syntactically valid. 174 // 175 // The second argument, quote, specifies the type of literal being parsed 176 // and therefore which escaped quote character is permitted. 177 // If set to a single quote, it permits the sequence \' and disallows unescaped '. 178 // If set to a double quote, it permits \" and disallows unescaped ". 179 // If set to zero, it does not permit either escape and allows both quote characters to appear unescaped. 180 func UnquoteChar(s string, quote byte) (value rune, multibyte bool, tail string, err error) { 181 // easy cases 182 switch c := s[0]; { 183 case c == quote && (quote == '\'' || quote == '"'): 184 err = ErrSyntax 185 return 186 case c >= utf8.RuneSelf: 187 r, size := utf8.DecodeRuneInString(s) 188 return r, true, s[size:], nil 189 case c != '\\': 190 return rune(s[0]), false, s[1:], nil 191 } 192 193 // hard case: c is backslash 194 if len(s) <= 1 { 195 err = ErrSyntax 196 return 197 } 198 c := s[1] 199 s = s[2:] 200 201 switch c { 202 case 'a': 203 value = '\a' 204 case 'b': 205 value = '\b' 206 case 'f': 207 value = '\f' 208 case 'n': 209 value = '\n' 210 case 'r': 211 value = '\r' 212 case 't': 213 value = '\t' 214 case 'v': 215 value = '\v' 216 case 'x', 'u', 'U': 217 n := 0 218 switch c { 219 case 'x': 220 n = 2 221 case 'u': 222 n = 4 223 case 'U': 224 n = 8 225 } 226 var v rune 227 if len(s) < n { 228 err = ErrSyntax 229 return 230 } 231 for j := 0; j < n; j++ { 232 x, ok := unhex(s[j]) 233 if !ok { 234 err = ErrSyntax 235 return 236 } 237 v = v<<4 | x 238 } 239 s = s[n:] 240 if c == 'x' { 241 // single-byte string, possibly not UTF-8 242 value = v 243 break 244 } 245 if v > utf8.MaxRune { 246 err = ErrSyntax 247 return 248 } 249 value = v 250 multibyte = true 251 case '0', '1', '2', '3', '4', '5', '6', '7': 252 v := rune(c) - '0' 253 if len(s) < 2 { 254 err = ErrSyntax 255 return 256 } 257 for j := 0; j < 2; j++ { // one digit already; two more 258 x := rune(s[j]) - '0' 259 if x < 0 || x > 7 { 260 err = ErrSyntax 261 return 262 } 263 v = (v << 3) | x 264 } 265 s = s[2:] 266 if v > 255 { 267 err = ErrSyntax 268 return 269 } 270 value = v 271 case '\\': 272 value = '\\' 273 case '\'', '"': 274 if c != quote { 275 err = ErrSyntax 276 return 277 } 278 value = rune(c) 279 default: 280 err = ErrSyntax 281 return 282 } 283 tail = s 284 return 285 } 286 287 // Unquote interprets s as a single-quoted, double-quoted, 288 // or backquoted Go string literal, returning the string value 289 // that s quotes. (If s is single-quoted, it would be a Go 290 // character literal; Unquote returns the corresponding 291 // one-character string.) 292 func Unquote(s string) (t string, err error) { 293 n := len(s) 294 if n < 2 { 295 return "", ErrSyntax 296 } 297 quote := s[0] 298 if quote != s[n-1] { 299 return "", ErrSyntax 300 } 301 s = s[1 : n-1] 302 303 if quote == '`' { 304 if contains(s, '`') { 305 return "", ErrSyntax 306 } 307 return s, nil 308 } 309 if quote != '"' && quote != '\'' { 310 return "", ErrSyntax 311 } 312 if contains(s, '\n') { 313 return "", ErrSyntax 314 } 315 316 // Is it trivial? Avoid allocation. 317 if !contains(s, '\\') && !contains(s, quote) { 318 switch quote { 319 case '"': 320 return s, nil 321 case '\'': 322 r, size := utf8.DecodeRuneInString(s) 323 if size == len(s) && (r != utf8.RuneError || size != 1) { 324 return s, nil 325 } 326 } 327 } 328 329 var runeTmp [utf8.UTFMax]byte 330 buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations. 331 for len(s) > 0 { 332 c, multibyte, ss, err := UnquoteChar(s, quote) 333 if err != nil { 334 return "", err 335 } 336 s = ss 337 if c < utf8.RuneSelf || !multibyte { 338 buf = append(buf, byte(c)) 339 } else { 340 n := utf8.EncodeRune(runeTmp[:], c) 341 buf = append(buf, runeTmp[:n]...) 342 } 343 if quote == '\'' && len(s) != 0 { 344 // single-quoted must be single character 345 return "", ErrSyntax 346 } 347 } 348 return string(buf), nil 349 } 350 351 // contains reports whether the string contains the byte c. 352 func contains(s string, c byte) bool { 353 for i := 0; i < len(s); i++ { 354 if s[i] == c { 355 return true 356 } 357 } 358 return false 359 } 360 361 // bsearch16 returns the smallest i such that a[i] >= x. 362 // If there is no such i, bsearch16 returns len(a). 363 func bsearch16(a []uint16, x uint16) int { 364 i, j := 0, len(a) 365 for i < j { 366 h := i + (j-i)/2 367 if a[h] < x { 368 i = h + 1 369 } else { 370 j = h 371 } 372 } 373 return i 374 } 375 376 // bsearch32 returns the smallest i such that a[i] >= x. 377 // If there is no such i, bsearch32 returns len(a). 378 func bsearch32(a []uint32, x uint32) int { 379 i, j := 0, len(a) 380 for i < j { 381 h := i + (j-i)/2 382 if a[h] < x { 383 i = h + 1 384 } else { 385 j = h 386 } 387 } 388 return i 389 } 390 391 // TODO: IsPrint is a local implementation of unicode.IsPrint, verified by the tests 392 // to give the same answer. It allows this package not to depend on unicode, 393 // and therefore not pull in all the Unicode tables. If the linker were better 394 // at tossing unused tables, we could get rid of this implementation. 395 // That would be nice. 396 397 // IsPrint reports whether the rune is defined as printable by Go, with 398 // the same definition as unicode.IsPrint: letters, numbers, punctuation, 399 // symbols and ASCII space. 400 func IsPrint(r rune) bool { 401 // Fast check for Latin-1 402 if r <= 0xFF { 403 if 0x20 <= r && r <= 0x7E { 404 // All the ASCII is printable from space through DEL-1. 405 return true 406 } 407 if 0xA1 <= r && r <= 0xFF { 408 // Similarly for ¡ through ÿ... 409 return r != 0xAD // ...except for the bizarre soft hyphen. 410 } 411 return false 412 } 413 414 // Same algorithm, either on uint16 or uint32 value. 415 // First, find first i such that isPrint[i] >= x. 416 // This is the index of either the start or end of a pair that might span x. 417 // The start is even (isPrint[i&^1]) and the end is odd (isPrint[i|1]). 418 // If we find x in a range, make sure x is not in isNotPrint list. 419 420 if 0 <= r && r < 1<<16 { 421 rr, isPrint, isNotPrint := uint16(r), isPrint16, isNotPrint16 422 i := bsearch16(isPrint, rr) 423 if i >= len(isPrint) || rr < isPrint[i&^1] || isPrint[i|1] < rr { 424 return false 425 } 426 j := bsearch16(isNotPrint, rr) 427 return j >= len(isNotPrint) || isNotPrint[j] != rr 428 } 429 430 rr, isPrint, isNotPrint := uint32(r), isPrint32, isNotPrint32 431 i := bsearch32(isPrint, rr) 432 if i >= len(isPrint) || rr < isPrint[i&^1] || isPrint[i|1] < rr { 433 return false 434 } 435 if r >= 0x20000 { 436 return true 437 } 438 r -= 0x10000 439 j := bsearch16(isNotPrint, uint16(r)) 440 return j >= len(isNotPrint) || isNotPrint[j] != uint16(r) 441 }