src/pkg/go/doc/comment.go - The Go Programming Language

Golang

Source file src/pkg/go/doc/comment.go

     1	// Copyright 2009 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	// Godoc comment extraction and comment -> HTML formatting.
     6	
     7	package doc
     8	
     9	import (
    10		"io"
    11		"regexp"
    12		"strings"
    13		"text/template" // for HTMLEscape
    14		"unicode"
    15		"unicode/utf8"
    16	)
    17	
    18	var (
    19		ldquo = []byte("“")
    20		rdquo = []byte("”")
    21	)
    22	
    23	// Escape comment text for HTML. If nice is set,
    24	// also turn `` into “ and '' into ”.
    25	func commentEscape(w io.Writer, text string, nice bool) {
    26		last := 0
    27		if nice {
    28			for i := 0; i < len(text)-1; i++ {
    29				ch := text[i]
    30				if ch == text[i+1] && (ch == '`' || ch == '\'') {
    31					template.HTMLEscape(w, []byte(text[last:i]))
    32					last = i + 2
    33					switch ch {
    34					case '`':
    35						w.Write(ldquo)
    36					case '\'':
    37						w.Write(rdquo)
    38					}
    39					i++ // loop will add one more
    40				}
    41			}
    42		}
    43		template.HTMLEscape(w, []byte(text[last:]))
    44	}
    45	
    46	const (
    47		// Regexp for Go identifiers
    48		identRx = `[a-zA-Z_][a-zA-Z_0-9]*` // TODO(gri) ASCII only for now - fix this
    49	
    50		// Regexp for URLs
    51		protocol = `(https?|ftp|file|gopher|mailto|news|nntp|telnet|wais|prospero):`
    52		hostPart = `[a-zA-Z0-9_@\-]+`
    53		filePart = `[a-zA-Z0-9_?%#~&/\-+=]+`
    54		urlRx    = protocol + `//` + // http://
    55			hostPart + `([.:]` + hostPart + `)*/?` + // //www.google.com:8080/
    56			filePart + `([:.,]` + filePart + `)*`
    57	)
    58	
    59	var matchRx = regexp.MustCompile(`(` + urlRx + `)|(` + identRx + `)`)
    60	
    61	var (
    62		html_a      = []byte(`<a href="`)
    63		html_aq     = []byte(`">`)
    64		html_enda   = []byte("</a>")
    65		html_i      = []byte("<i>")
    66		html_endi   = []byte("</i>")
    67		html_p      = []byte("<p>\n")
    68		html_endp   = []byte("</p>\n")
    69		html_pre    = []byte("<pre>")
    70		html_endpre = []byte("</pre>\n")
    71		html_h      = []byte(`<h3 id="`)
    72		html_hq     = []byte(`">`)
    73		html_endh   = []byte("</h3>\n")
    74	)
    75	
    76	// Emphasize and escape a line of text for HTML. URLs are converted into links;
    77	// if the URL also appears in the words map, the link is taken from the map (if
    78	// the corresponding map value is the empty string, the URL is not converted
    79	// into a link). Go identifiers that appear in the words map are italicized; if
    80	// the corresponding map value is not the empty string, it is considered a URL
    81	// and the word is converted into a link. If nice is set, the remaining text's
    82	// appearance is improved where it makes sense (e.g., `` is turned into &ldquo;
    83	// and '' into &rdquo;).
    84	func emphasize(w io.Writer, line string, words map[string]string, nice bool) {
    85		for {
    86			m := matchRx.FindStringSubmatchIndex(line)
    87			if m == nil {
    88				break
    89			}
    90			// m >= 6 (two parenthesized sub-regexps in matchRx, 1st one is urlRx)
    91	
    92			// write text before match
    93			commentEscape(w, line[0:m[0]], nice)
    94	
    95			// analyze match
    96			match := line[m[0]:m[1]]
    97			url := ""
    98			italics := false
    99			if words != nil {
   100				url, italics = words[string(match)]
   101			}
   102			if m[2] >= 0 {
   103				// match against first parenthesized sub-regexp; must be match against urlRx
   104				if !italics {
   105					// no alternative URL in words list, use match instead
   106					url = string(match)
   107				}
   108				italics = false // don't italicize URLs
   109			}
   110	
   111			// write match
   112			if len(url) > 0 {
   113				w.Write(html_a)
   114				template.HTMLEscape(w, []byte(url))
   115				w.Write(html_aq)
   116			}
   117			if italics {
   118				w.Write(html_i)
   119			}
   120			commentEscape(w, match, nice)
   121			if italics {
   122				w.Write(html_endi)
   123			}
   124			if len(url) > 0 {
   125				w.Write(html_enda)
   126			}
   127	
   128			// advance
   129			line = line[m[1]:]
   130		}
   131		commentEscape(w, line, nice)
   132	}
   133	
   134	func indentLen(s string) int {
   135		i := 0
   136		for i < len(s) && (s[i] == ' ' || s[i] == '\t') {
   137			i++
   138		}
   139		return i
   140	}
   141	
   142	func isBlank(s string) bool {
   143		return len(s) == 0 || (len(s) == 1 && s[0] == '\n')
   144	}
   145	
   146	func commonPrefix(a, b string) string {
   147		i := 0
   148		for i < len(a) && i < len(b) && a[i] == b[i] {
   149			i++
   150		}
   151		return a[0:i]
   152	}
   153	
   154	func unindent(block []string) {
   155		if len(block) == 0 {
   156			return
   157		}
   158	
   159		// compute maximum common white prefix
   160		prefix := block[0][0:indentLen(block[0])]
   161		for _, line := range block {
   162			if !isBlank(line) {
   163				prefix = commonPrefix(prefix, line[0:indentLen(line)])
   164			}
   165		}
   166		n := len(prefix)
   167	
   168		// remove
   169		for i, line := range block {
   170			if !isBlank(line) {
   171				block[i] = line[n:]
   172			}
   173		}
   174	}
   175	
   176	// heading returns the trimmed line if it passes as a section heading;
   177	// otherwise it returns the empty string. 
   178	func heading(line string) string {
   179		line = strings.TrimSpace(line)
   180		if len(line) == 0 {
   181			return ""
   182		}
   183	
   184		// a heading must start with an uppercase letter
   185		r, _ := utf8.DecodeRuneInString(line)
   186		if !unicode.IsLetter(r) || !unicode.IsUpper(r) {
   187			return ""
   188		}
   189	
   190		// it must end in a letter or digit:
   191		r, _ = utf8.DecodeLastRuneInString(line)
   192		if !unicode.IsLetter(r) && !unicode.IsDigit(r) {
   193			return ""
   194		}
   195	
   196		// exclude lines with illegal characters
   197		if strings.IndexAny(line, ",.;:!?+*/=()[]{}_^°&§~%#@<\">\\") >= 0 {
   198			return ""
   199		}
   200	
   201		// allow "'" for possessive "'s" only
   202		for b := line; ; {
   203			i := strings.IndexRune(b, '\'')
   204			if i < 0 {
   205				break
   206			}
   207			if i+1 >= len(b) || b[i+1] != 's' || (i+2 < len(b) && b[i+2] != ' ') {
   208				return "" // not followed by "s "
   209			}
   210			b = b[i+2:]
   211		}
   212	
   213		return line
   214	}
   215	
   216	type op int
   217	
   218	const (
   219		opPara op = iota
   220		opHead
   221		opPre
   222	)
   223	
   224	type block struct {
   225		op    op
   226		lines []string
   227	}
   228	
   229	var nonAlphaNumRx = regexp.MustCompile(`[^a-zA-Z0-9]`)
   230	
   231	func anchorID(line string) string {
   232		return nonAlphaNumRx.ReplaceAllString(line, "_")
   233	}
   234	
   235	// ToHTML converts comment text to formatted HTML.
   236	// The comment was prepared by DocReader,
   237	// so it is known not to have leading, trailing blank lines
   238	// nor to have trailing spaces at the end of lines.
   239	// The comment markers have already been removed.
   240	//
   241	// Turn each run of multiple \n into </p><p>.
   242	// Turn each run of indented lines into a <pre> block without indent.
   243	// Enclose headings with header tags.
   244	//
   245	// URLs in the comment text are converted into links; if the URL also appears
   246	// in the words map, the link is taken from the map (if the corresponding map
   247	// value is the empty string, the URL is not converted into a link).
   248	//
   249	// Go identifiers that appear in the words map are italicized; if the corresponding
   250	// map value is not the empty string, it is considered a URL and the word is converted
   251	// into a link.
   252	func ToHTML(w io.Writer, text string, words map[string]string) {
   253		for _, b := range blocks(text) {
   254			switch b.op {
   255			case opPara:
   256				w.Write(html_p)
   257				for _, line := range b.lines {
   258					emphasize(w, line, words, true)
   259				}
   260				w.Write(html_endp)
   261			case opHead:
   262				w.Write(html_h)
   263				id := ""
   264				for _, line := range b.lines {
   265					if id == "" {
   266						id = anchorID(line)
   267						w.Write([]byte(id))
   268						w.Write(html_hq)
   269					}
   270					commentEscape(w, line, true)
   271				}
   272				if id == "" {
   273					w.Write(html_hq)
   274				}
   275				w.Write(html_endh)
   276			case opPre:
   277				w.Write(html_pre)
   278				for _, line := range b.lines {
   279					emphasize(w, line, nil, false)
   280				}
   281				w.Write(html_endpre)
   282			}
   283		}
   284	}
   285	
   286	func blocks(text string) []block {
   287		var (
   288			out  []block
   289			para []string
   290	
   291			lastWasBlank   = false
   292			lastWasHeading = false
   293		)
   294	
   295		close := func() {
   296			if para != nil {
   297				out = append(out, block{opPara, para})
   298				para = nil
   299			}
   300		}
   301	
   302		lines := strings.SplitAfter(text, "\n")
   303		unindent(lines)
   304		for i := 0; i < len(lines); {
   305			line := lines[i]
   306			if isBlank(line) {
   307				// close paragraph
   308				close()
   309				i++
   310				lastWasBlank = true
   311				continue
   312			}
   313			if indentLen(line) > 0 {
   314				// close paragraph
   315				close()
   316	
   317				// count indented or blank lines
   318				j := i + 1
   319				for j < len(lines) && (isBlank(lines[j]) || indentLen(lines[j]) > 0) {
   320					j++
   321				}
   322				// but not trailing blank lines
   323				for j > i && isBlank(lines[j-1]) {
   324					j--
   325				}
   326				pre := lines[i:j]
   327				i = j
   328	
   329				unindent(pre)
   330	
   331				// put those lines in a pre block
   332				out = append(out, block{opPre, pre})
   333				lastWasHeading = false
   334				continue
   335			}
   336	
   337			if lastWasBlank && !lastWasHeading && i+2 < len(lines) &&
   338				isBlank(lines[i+1]) && !isBlank(lines[i+2]) && indentLen(lines[i+2]) == 0 {
   339				// current line is non-blank, surrounded by blank lines
   340				// and the next non-blank line is not indented: this
   341				// might be a heading.
   342				if head := heading(line); head != "" {
   343					close()
   344					out = append(out, block{opHead, []string{head}})
   345					i += 2
   346					lastWasHeading = true
   347					continue
   348				}
   349			}
   350	
   351			// open paragraph
   352			lastWasBlank = false
   353			lastWasHeading = false
   354			para = append(para, lines[i])
   355			i++
   356		}
   357		close()
   358	
   359		return out
   360	}
   361	
   362	// ToText prepares comment text for presentation in textual output.
   363	// It wraps paragraphs of text to width or fewer Unicode code points
   364	// and then prefixes each line with the indent.  In preformatted sections
   365	// (such as program text), it prefixes each non-blank line with preIndent.
   366	func ToText(w io.Writer, text string, indent, preIndent string, width int) {
   367		l := lineWrapper{
   368			out:    w,
   369			width:  width,
   370			indent: indent,
   371		}
   372		for _, b := range blocks(text) {
   373			switch b.op {
   374			case opPara:
   375				// l.write will add leading newline if required
   376				for _, line := range b.lines {
   377					l.write(line)
   378				}
   379				l.flush()
   380			case opHead:
   381				w.Write(nl)
   382				for _, line := range b.lines {
   383					l.write(line + "\n")
   384				}
   385				l.flush()
   386			case opPre:
   387				w.Write(nl)
   388				for _, line := range b.lines {
   389					if !isBlank(line) {
   390						w.Write([]byte(preIndent))
   391						w.Write([]byte(line))
   392					}
   393				}
   394			}
   395		}
   396	}
   397	
   398	type lineWrapper struct {
   399		out       io.Writer
   400		printed   bool
   401		width     int
   402		indent    string
   403		n         int
   404		pendSpace int
   405	}
   406	
   407	var nl = []byte("\n")
   408	var space = []byte(" ")
   409	
   410	func (l *lineWrapper) write(text string) {
   411		if l.n == 0 && l.printed {
   412			l.out.Write(nl) // blank line before new paragraph
   413		}
   414		l.printed = true
   415	
   416		for _, f := range strings.Fields(text) {
   417			w := utf8.RuneCountInString(f)
   418			// wrap if line is too long
   419			if l.n > 0 && l.n+l.pendSpace+w > l.width {
   420				l.out.Write(nl)
   421				l.n = 0
   422				l.pendSpace = 0
   423			}
   424			if l.n == 0 {
   425				l.out.Write([]byte(l.indent))
   426			}
   427			l.out.Write(space[:l.pendSpace])
   428			l.out.Write([]byte(f))
   429			l.n += l.pendSpace + w
   430			l.pendSpace = 1
   431		}
   432	}
   433	
   434	func (l *lineWrapper) flush() {
   435		if l.n == 0 {
   436			return
   437		}
   438		l.out.Write(nl)
   439		l.pendSpace = 0
   440		l.n = 0
   441	}