Source file src/pkg/go/doc/comment.go
1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Godoc comment extraction and comment -> HTML formatting. 6 7 package doc 8 9 import ( 10 "io" 11 "regexp" 12 "strings" 13 "text/template" // for HTMLEscape 14 "unicode" 15 "unicode/utf8" 16 ) 17 18 var ( 19 ldquo = []byte("“") 20 rdquo = []byte("”") 21 ) 22 23 // Escape comment text for HTML. If nice is set, 24 // also turn `` into “ and '' into ”. 25 func commentEscape(w io.Writer, text string, nice bool) { 26 last := 0 27 if nice { 28 for i := 0; i < len(text)-1; i++ { 29 ch := text[i] 30 if ch == text[i+1] && (ch == '`' || ch == '\'') { 31 template.HTMLEscape(w, []byte(text[last:i])) 32 last = i + 2 33 switch ch { 34 case '`': 35 w.Write(ldquo) 36 case '\'': 37 w.Write(rdquo) 38 } 39 i++ // loop will add one more 40 } 41 } 42 } 43 template.HTMLEscape(w, []byte(text[last:])) 44 } 45 46 const ( 47 // Regexp for Go identifiers 48 identRx = `[a-zA-Z_][a-zA-Z_0-9]*` // TODO(gri) ASCII only for now - fix this 49 50 // Regexp for URLs 51 protocol = `(https?|ftp|file|gopher|mailto|news|nntp|telnet|wais|prospero):` 52 hostPart = `[a-zA-Z0-9_@\-]+` 53 filePart = `[a-zA-Z0-9_?%#~&/\-+=]+` 54 urlRx = protocol + `//` + // http:// 55 hostPart + `([.:]` + hostPart + `)*/?` + // //www.google.com:8080/ 56 filePart + `([:.,]` + filePart + `)*` 57 ) 58 59 var matchRx = regexp.MustCompile(`(` + urlRx + `)|(` + identRx + `)`) 60 61 var ( 62 html_a = []byte(`<a href="`) 63 html_aq = []byte(`">`) 64 html_enda = []byte("</a>") 65 html_i = []byte("<i>") 66 html_endi = []byte("</i>") 67 html_p = []byte("<p>\n") 68 html_endp = []byte("</p>\n") 69 html_pre = []byte("<pre>") 70 html_endpre = []byte("</pre>\n") 71 html_h = []byte(`<h3 id="`) 72 html_hq = []byte(`">`) 73 html_endh = []byte("</h3>\n") 74 ) 75 76 // Emphasize and escape a line of text for HTML. URLs are converted into links; 77 // if the URL also appears in the words map, the link is taken from the map (if 78 // the corresponding map value is the empty string, the URL is not converted 79 // into a link). Go identifiers that appear in the words map are italicized; if 80 // the corresponding map value is not the empty string, it is considered a URL 81 // and the word is converted into a link. If nice is set, the remaining text's 82 // appearance is improved where it makes sense (e.g., `` is turned into “ 83 // and '' into ”). 84 func emphasize(w io.Writer, line string, words map[string]string, nice bool) { 85 for { 86 m := matchRx.FindStringSubmatchIndex(line) 87 if m == nil { 88 break 89 } 90 // m >= 6 (two parenthesized sub-regexps in matchRx, 1st one is urlRx) 91 92 // write text before match 93 commentEscape(w, line[0:m[0]], nice) 94 95 // analyze match 96 match := line[m[0]:m[1]] 97 url := "" 98 italics := false 99 if words != nil { 100 url, italics = words[string(match)] 101 } 102 if m[2] >= 0 { 103 // match against first parenthesized sub-regexp; must be match against urlRx 104 if !italics { 105 // no alternative URL in words list, use match instead 106 url = string(match) 107 } 108 italics = false // don't italicize URLs 109 } 110 111 // write match 112 if len(url) > 0 { 113 w.Write(html_a) 114 template.HTMLEscape(w, []byte(url)) 115 w.Write(html_aq) 116 } 117 if italics { 118 w.Write(html_i) 119 } 120 commentEscape(w, match, nice) 121 if italics { 122 w.Write(html_endi) 123 } 124 if len(url) > 0 { 125 w.Write(html_enda) 126 } 127 128 // advance 129 line = line[m[1]:] 130 } 131 commentEscape(w, line, nice) 132 } 133 134 func indentLen(s string) int { 135 i := 0 136 for i < len(s) && (s[i] == ' ' || s[i] == '\t') { 137 i++ 138 } 139 return i 140 } 141 142 func isBlank(s string) bool { 143 return len(s) == 0 || (len(s) == 1 && s[0] == '\n') 144 } 145 146 func commonPrefix(a, b string) string { 147 i := 0 148 for i < len(a) && i < len(b) && a[i] == b[i] { 149 i++ 150 } 151 return a[0:i] 152 } 153 154 func unindent(block []string) { 155 if len(block) == 0 { 156 return 157 } 158 159 // compute maximum common white prefix 160 prefix := block[0][0:indentLen(block[0])] 161 for _, line := range block { 162 if !isBlank(line) { 163 prefix = commonPrefix(prefix, line[0:indentLen(line)]) 164 } 165 } 166 n := len(prefix) 167 168 // remove 169 for i, line := range block { 170 if !isBlank(line) { 171 block[i] = line[n:] 172 } 173 } 174 } 175 176 // heading returns the trimmed line if it passes as a section heading; 177 // otherwise it returns the empty string. 178 func heading(line string) string { 179 line = strings.TrimSpace(line) 180 if len(line) == 0 { 181 return "" 182 } 183 184 // a heading must start with an uppercase letter 185 r, _ := utf8.DecodeRuneInString(line) 186 if !unicode.IsLetter(r) || !unicode.IsUpper(r) { 187 return "" 188 } 189 190 // it must end in a letter or digit: 191 r, _ = utf8.DecodeLastRuneInString(line) 192 if !unicode.IsLetter(r) && !unicode.IsDigit(r) { 193 return "" 194 } 195 196 // exclude lines with illegal characters 197 if strings.IndexAny(line, ",.;:!?+*/=()[]{}_^°&§~%#@<\">\\") >= 0 { 198 return "" 199 } 200 201 // allow "'" for possessive "'s" only 202 for b := line; ; { 203 i := strings.IndexRune(b, '\'') 204 if i < 0 { 205 break 206 } 207 if i+1 >= len(b) || b[i+1] != 's' || (i+2 < len(b) && b[i+2] != ' ') { 208 return "" // not followed by "s " 209 } 210 b = b[i+2:] 211 } 212 213 return line 214 } 215 216 type op int 217 218 const ( 219 opPara op = iota 220 opHead 221 opPre 222 ) 223 224 type block struct { 225 op op 226 lines []string 227 } 228 229 var nonAlphaNumRx = regexp.MustCompile(`[^a-zA-Z0-9]`) 230 231 func anchorID(line string) string { 232 return nonAlphaNumRx.ReplaceAllString(line, "_") 233 } 234 235 // ToHTML converts comment text to formatted HTML. 236 // The comment was prepared by DocReader, 237 // so it is known not to have leading, trailing blank lines 238 // nor to have trailing spaces at the end of lines. 239 // The comment markers have already been removed. 240 // 241 // Turn each run of multiple \n into </p><p>. 242 // Turn each run of indented lines into a <pre> block without indent. 243 // Enclose headings with header tags. 244 // 245 // URLs in the comment text are converted into links; if the URL also appears 246 // in the words map, the link is taken from the map (if the corresponding map 247 // value is the empty string, the URL is not converted into a link). 248 // 249 // Go identifiers that appear in the words map are italicized; if the corresponding 250 // map value is not the empty string, it is considered a URL and the word is converted 251 // into a link. 252 func ToHTML(w io.Writer, text string, words map[string]string) { 253 for _, b := range blocks(text) { 254 switch b.op { 255 case opPara: 256 w.Write(html_p) 257 for _, line := range b.lines { 258 emphasize(w, line, words, true) 259 } 260 w.Write(html_endp) 261 case opHead: 262 w.Write(html_h) 263 id := "" 264 for _, line := range b.lines { 265 if id == "" { 266 id = anchorID(line) 267 w.Write([]byte(id)) 268 w.Write(html_hq) 269 } 270 commentEscape(w, line, true) 271 } 272 if id == "" { 273 w.Write(html_hq) 274 } 275 w.Write(html_endh) 276 case opPre: 277 w.Write(html_pre) 278 for _, line := range b.lines { 279 emphasize(w, line, nil, false) 280 } 281 w.Write(html_endpre) 282 } 283 } 284 } 285 286 func blocks(text string) []block { 287 var ( 288 out []block 289 para []string 290 291 lastWasBlank = false 292 lastWasHeading = false 293 ) 294 295 close := func() { 296 if para != nil { 297 out = append(out, block{opPara, para}) 298 para = nil 299 } 300 } 301 302 lines := strings.SplitAfter(text, "\n") 303 unindent(lines) 304 for i := 0; i < len(lines); { 305 line := lines[i] 306 if isBlank(line) { 307 // close paragraph 308 close() 309 i++ 310 lastWasBlank = true 311 continue 312 } 313 if indentLen(line) > 0 { 314 // close paragraph 315 close() 316 317 // count indented or blank lines 318 j := i + 1 319 for j < len(lines) && (isBlank(lines[j]) || indentLen(lines[j]) > 0) { 320 j++ 321 } 322 // but not trailing blank lines 323 for j > i && isBlank(lines[j-1]) { 324 j-- 325 } 326 pre := lines[i:j] 327 i = j 328 329 unindent(pre) 330 331 // put those lines in a pre block 332 out = append(out, block{opPre, pre}) 333 lastWasHeading = false 334 continue 335 } 336 337 if lastWasBlank && !lastWasHeading && i+2 < len(lines) && 338 isBlank(lines[i+1]) && !isBlank(lines[i+2]) && indentLen(lines[i+2]) == 0 { 339 // current line is non-blank, surrounded by blank lines 340 // and the next non-blank line is not indented: this 341 // might be a heading. 342 if head := heading(line); head != "" { 343 close() 344 out = append(out, block{opHead, []string{head}}) 345 i += 2 346 lastWasHeading = true 347 continue 348 } 349 } 350 351 // open paragraph 352 lastWasBlank = false 353 lastWasHeading = false 354 para = append(para, lines[i]) 355 i++ 356 } 357 close() 358 359 return out 360 } 361 362 // ToText prepares comment text for presentation in textual output. 363 // It wraps paragraphs of text to width or fewer Unicode code points 364 // and then prefixes each line with the indent. In preformatted sections 365 // (such as program text), it prefixes each non-blank line with preIndent. 366 func ToText(w io.Writer, text string, indent, preIndent string, width int) { 367 l := lineWrapper{ 368 out: w, 369 width: width, 370 indent: indent, 371 } 372 for _, b := range blocks(text) { 373 switch b.op { 374 case opPara: 375 // l.write will add leading newline if required 376 for _, line := range b.lines { 377 l.write(line) 378 } 379 l.flush() 380 case opHead: 381 w.Write(nl) 382 for _, line := range b.lines { 383 l.write(line + "\n") 384 } 385 l.flush() 386 case opPre: 387 w.Write(nl) 388 for _, line := range b.lines { 389 if !isBlank(line) { 390 w.Write([]byte(preIndent)) 391 w.Write([]byte(line)) 392 } 393 } 394 } 395 } 396 } 397 398 type lineWrapper struct { 399 out io.Writer 400 printed bool 401 width int 402 indent string 403 n int 404 pendSpace int 405 } 406 407 var nl = []byte("\n") 408 var space = []byte(" ") 409 410 func (l *lineWrapper) write(text string) { 411 if l.n == 0 && l.printed { 412 l.out.Write(nl) // blank line before new paragraph 413 } 414 l.printed = true 415 416 for _, f := range strings.Fields(text) { 417 w := utf8.RuneCountInString(f) 418 // wrap if line is too long 419 if l.n > 0 && l.n+l.pendSpace+w > l.width { 420 l.out.Write(nl) 421 l.n = 0 422 l.pendSpace = 0 423 } 424 if l.n == 0 { 425 l.out.Write([]byte(l.indent)) 426 } 427 l.out.Write(space[:l.pendSpace]) 428 l.out.Write([]byte(f)) 429 l.n += l.pendSpace + w 430 l.pendSpace = 1 431 } 432 } 433 434 func (l *lineWrapper) flush() { 435 if l.n == 0 { 436 return 437 } 438 l.out.Write(nl) 439 l.pendSpace = 0 440 l.n = 0 441 }