src/pkg/text/template/parse/lex.go - The Go Programming Language

Golang

Source file src/pkg/text/template/parse/lex.go

     1	// Copyright 2011 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	package parse
     6	
     7	import (
     8		"fmt"
     9		"strings"
    10		"unicode"
    11		"unicode/utf8"
    12	)
    13	
    14	// item represents a token or text string returned from the scanner.
    15	type item struct {
    16		typ itemType
    17		val string
    18	}
    19	
    20	func (i item) String() string {
    21		switch {
    22		case i.typ == itemEOF:
    23			return "EOF"
    24		case i.typ == itemError:
    25			return i.val
    26		case i.typ > itemKeyword:
    27			return fmt.Sprintf("<%s>", i.val)
    28		case len(i.val) > 10:
    29			return fmt.Sprintf("%.10q...", i.val)
    30		}
    31		return fmt.Sprintf("%q", i.val)
    32	}
    33	
    34	// itemType identifies the type of lex items.
    35	type itemType int
    36	
    37	const (
    38		itemError        itemType = iota // error occurred; value is text of error
    39		itemBool                         // boolean constant
    40		itemChar                         // printable ASCII character; grab bag for comma etc.
    41		itemCharConstant                 // character constant
    42		itemComplex                      // complex constant (1+2i); imaginary is just a number
    43		itemColonEquals                  // colon-equals (':=') introducing a declaration
    44		itemEOF
    45		itemField      // alphanumeric identifier, starting with '.', possibly chained ('.x.y')
    46		itemIdentifier // alphanumeric identifier
    47		itemLeftDelim  // left action delimiter
    48		itemNumber     // simple number, including imaginary
    49		itemPipe       // pipe symbol
    50		itemRawString  // raw quoted string (includes quotes)
    51		itemRightDelim // right action delimiter
    52		itemString     // quoted string (includes quotes)
    53		itemText       // plain text
    54		itemVariable   // variable starting with '$', such as '$' or  '$1' or '$hello'.
    55		// Keywords appear after all the rest.
    56		itemKeyword  // used only to delimit the keywords
    57		itemDot      // the cursor, spelled '.'.
    58		itemDefine   // define keyword
    59		itemElse     // else keyword
    60		itemEnd      // end keyword
    61		itemIf       // if keyword
    62		itemRange    // range keyword
    63		itemTemplate // template keyword
    64		itemWith     // with keyword
    65	)
    66	
    67	// Make the types prettyprint.
    68	var itemName = map[itemType]string{
    69		itemError:        "error",
    70		itemBool:         "bool",
    71		itemChar:         "char",
    72		itemCharConstant: "charconst",
    73		itemComplex:      "complex",
    74		itemColonEquals:  ":=",
    75		itemEOF:          "EOF",
    76		itemField:        "field",
    77		itemIdentifier:   "identifier",
    78		itemLeftDelim:    "left delim",
    79		itemNumber:       "number",
    80		itemPipe:         "pipe",
    81		itemRawString:    "raw string",
    82		itemRightDelim:   "right delim",
    83		itemString:       "string",
    84		itemVariable:     "variable",
    85		// keywords
    86		itemDot:      ".",
    87		itemDefine:   "define",
    88		itemElse:     "else",
    89		itemIf:       "if",
    90		itemEnd:      "end",
    91		itemRange:    "range",
    92		itemTemplate: "template",
    93		itemWith:     "with",
    94	}
    95	
    96	func (i itemType) String() string {
    97		s := itemName[i]
    98		if s == "" {
    99			return fmt.Sprintf("item%d", int(i))
   100		}
   101		return s
   102	}
   103	
   104	var key = map[string]itemType{
   105		".":        itemDot,
   106		"define":   itemDefine,
   107		"else":     itemElse,
   108		"end":      itemEnd,
   109		"if":       itemIf,
   110		"range":    itemRange,
   111		"template": itemTemplate,
   112		"with":     itemWith,
   113	}
   114	
   115	const eof = -1
   116	
   117	// stateFn represents the state of the scanner as a function that returns the next state.
   118	type stateFn func(*lexer) stateFn
   119	
   120	// lexer holds the state of the scanner.
   121	type lexer struct {
   122		name       string    // the name of the input; used only for error reports.
   123		input      string    // the string being scanned.
   124		leftDelim  string    // start of action.
   125		rightDelim string    // end of action.
   126		state      stateFn   // the next lexing function to enter.
   127		pos        int       // current position in the input.
   128		start      int       // start position of this item.
   129		width      int       // width of last rune read from input.
   130		items      chan item // channel of scanned items.
   131	}
   132	
   133	// next returns the next rune in the input.
   134	func (l *lexer) next() (r rune) {
   135		if l.pos >= len(l.input) {
   136			l.width = 0
   137			return eof
   138		}
   139		r, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
   140		l.pos += l.width
   141		return r
   142	}
   143	
   144	// peek returns but does not consume the next rune in the input.
   145	func (l *lexer) peek() rune {
   146		r := l.next()
   147		l.backup()
   148		return r
   149	}
   150	
   151	// backup steps back one rune. Can only be called once per call of next.
   152	func (l *lexer) backup() {
   153		l.pos -= l.width
   154	}
   155	
   156	// emit passes an item back to the client.
   157	func (l *lexer) emit(t itemType) {
   158		l.items <- item{t, l.input[l.start:l.pos]}
   159		l.start = l.pos
   160	}
   161	
   162	// ignore skips over the pending input before this point.
   163	func (l *lexer) ignore() {
   164		l.start = l.pos
   165	}
   166	
   167	// accept consumes the next rune if it's from the valid set.
   168	func (l *lexer) accept(valid string) bool {
   169		if strings.IndexRune(valid, l.next()) >= 0 {
   170			return true
   171		}
   172		l.backup()
   173		return false
   174	}
   175	
   176	// acceptRun consumes a run of runes from the valid set.
   177	func (l *lexer) acceptRun(valid string) {
   178		for strings.IndexRune(valid, l.next()) >= 0 {
   179		}
   180		l.backup()
   181	}
   182	
   183	// lineNumber reports which line we're on. Doing it this way
   184	// means we don't have to worry about peek double counting.
   185	func (l *lexer) lineNumber() int {
   186		return 1 + strings.Count(l.input[:l.pos], "\n")
   187	}
   188	
   189	// error returns an error token and terminates the scan by passing
   190	// back a nil pointer that will be the next state, terminating l.nextItem.
   191	func (l *lexer) errorf(format string, args ...interface{}) stateFn {
   192		l.items <- item{itemError, fmt.Sprintf(format, args...)}
   193		return nil
   194	}
   195	
   196	// nextItem returns the next item from the input.
   197	func (l *lexer) nextItem() item {
   198		for {
   199			select {
   200			case item := <-l.items:
   201				return item
   202			default:
   203				l.state = l.state(l)
   204			}
   205		}
   206		panic("not reached")
   207	}
   208	
   209	// lex creates a new scanner for the input string.
   210	func lex(name, input, left, right string) *lexer {
   211		if left == "" {
   212			left = leftDelim
   213		}
   214		if right == "" {
   215			right = rightDelim
   216		}
   217		l := &lexer{
   218			name:       name,
   219			input:      input,
   220			leftDelim:  left,
   221			rightDelim: right,
   222			state:      lexText,
   223			items:      make(chan item, 2), // Two items of buffering is sufficient for all state functions
   224		}
   225		return l
   226	}
   227	
   228	// state functions
   229	
   230	const (
   231		leftDelim    = "{{"
   232		rightDelim   = "}}"
   233		leftComment  = "/*"
   234		rightComment = "*/"
   235	)
   236	
   237	// lexText scans until an opening action delimiter, "{{".
   238	func lexText(l *lexer) stateFn {
   239		for {
   240			if strings.HasPrefix(l.input[l.pos:], l.leftDelim) {
   241				if l.pos > l.start {
   242					l.emit(itemText)
   243				}
   244				return lexLeftDelim
   245			}
   246			if l.next() == eof {
   247				break
   248			}
   249		}
   250		// Correctly reached EOF.
   251		if l.pos > l.start {
   252			l.emit(itemText)
   253		}
   254		l.emit(itemEOF)
   255		return nil
   256	}
   257	
   258	// lexLeftDelim scans the left delimiter, which is known to be present.
   259	func lexLeftDelim(l *lexer) stateFn {
   260		if strings.HasPrefix(l.input[l.pos:], l.leftDelim+leftComment) {
   261			return lexComment
   262		}
   263		l.pos += len(l.leftDelim)
   264		l.emit(itemLeftDelim)
   265		return lexInsideAction
   266	}
   267	
   268	// lexComment scans a comment. The left comment marker is known to be present.
   269	func lexComment(l *lexer) stateFn {
   270		i := strings.Index(l.input[l.pos:], rightComment+l.rightDelim)
   271		if i < 0 {
   272			return l.errorf("unclosed comment")
   273		}
   274		l.pos += i + len(rightComment) + len(l.rightDelim)
   275		l.ignore()
   276		return lexText
   277	}
   278	
   279	// lexRightDelim scans the right delimiter, which is known to be present.
   280	func lexRightDelim(l *lexer) stateFn {
   281		l.pos += len(l.rightDelim)
   282		l.emit(itemRightDelim)
   283		return lexText
   284	}
   285	
   286	// lexInsideAction scans the elements inside action delimiters.
   287	func lexInsideAction(l *lexer) stateFn {
   288		// Either number, quoted string, or identifier.
   289		// Spaces separate and are ignored.
   290		// Pipe symbols separate and are emitted.
   291		if strings.HasPrefix(l.input[l.pos:], l.rightDelim) {
   292			return lexRightDelim
   293		}
   294		switch r := l.next(); {
   295		case r == eof || r == '\n':
   296			return l.errorf("unclosed action")
   297		case isSpace(r):
   298			l.ignore()
   299		case r == ':':
   300			if l.next() != '=' {
   301				return l.errorf("expected :=")
   302			}
   303			l.emit(itemColonEquals)
   304		case r == '|':
   305			l.emit(itemPipe)
   306		case r == '"':
   307			return lexQuote
   308		case r == '`':
   309			return lexRawQuote
   310		case r == '$':
   311			return lexIdentifier
   312		case r == '\'':
   313			return lexChar
   314		case r == '.':
   315			// special look-ahead for ".field" so we don't break l.backup().
   316			if l.pos < len(l.input) {
   317				r := l.input[l.pos]
   318				if r < '0' || '9' < r {
   319					return lexIdentifier // itemDot comes from the keyword table.
   320				}
   321			}
   322			fallthrough // '.' can start a number.
   323		case r == '+' || r == '-' || ('0' <= r && r <= '9'):
   324			l.backup()
   325			return lexNumber
   326		case isAlphaNumeric(r):
   327			l.backup()
   328			return lexIdentifier
   329		case r <= unicode.MaxASCII && unicode.IsPrint(r):
   330			l.emit(itemChar)
   331			return lexInsideAction
   332		default:
   333			return l.errorf("unrecognized character in action: %#U", r)
   334		}
   335		return lexInsideAction
   336	}
   337	
   338	// lexIdentifier scans an alphanumeric or field.
   339	func lexIdentifier(l *lexer) stateFn {
   340	Loop:
   341		for {
   342			switch r := l.next(); {
   343			case isAlphaNumeric(r):
   344				// absorb.
   345			case r == '.' && (l.input[l.start] == '.' || l.input[l.start] == '$'):
   346				// field chaining; absorb into one token.
   347			default:
   348				l.backup()
   349				word := l.input[l.start:l.pos]
   350				if !l.atTerminator() {
   351					return l.errorf("unexpected character %+U", r)
   352				}
   353				switch {
   354				case key[word] > itemKeyword:
   355					l.emit(key[word])
   356				case word[0] == '.':
   357					l.emit(itemField)
   358				case word[0] == '$':
   359					l.emit(itemVariable)
   360				case word == "true", word == "false":
   361					l.emit(itemBool)
   362				default:
   363					l.emit(itemIdentifier)
   364				}
   365				break Loop
   366			}
   367		}
   368		return lexInsideAction
   369	}
   370	
   371	// atTerminator reports whether the input is at valid termination character to
   372	// appear after an identifier. Mostly to catch cases like "$x+2" not being
   373	// acceptable without a space, in case we decide one day to implement
   374	// arithmetic.
   375	func (l *lexer) atTerminator() bool {
   376		r := l.peek()
   377		if isSpace(r) {
   378			return true
   379		}
   380		switch r {
   381		case eof, ',', '|', ':':
   382			return true
   383		}
   384		// Does r start the delimiter? This can be ambiguous (with delim=="//", $x/2 will
   385		// succeed but should fail) but only in extremely rare cases caused by willfully
   386		// bad choice of delimiter.
   387		if rd, _ := utf8.DecodeRuneInString(l.rightDelim); rd == r {
   388			return true
   389		}
   390		return false
   391	}
   392	
   393	// lexChar scans a character constant. The initial quote is already
   394	// scanned.  Syntax checking is done by the parse.
   395	func lexChar(l *lexer) stateFn {
   396	Loop:
   397		for {
   398			switch l.next() {
   399			case '\\':
   400				if r := l.next(); r != eof && r != '\n' {
   401					break
   402				}
   403				fallthrough
   404			case eof, '\n':
   405				return l.errorf("unterminated character constant")
   406			case '\'':
   407				break Loop
   408			}
   409		}
   410		l.emit(itemCharConstant)
   411		return lexInsideAction
   412	}
   413	
   414	// lexNumber scans a number: decimal, octal, hex, float, or imaginary.  This
   415	// isn't a perfect number scanner - for instance it accepts "." and "0x0.2"
   416	// and "089" - but when it's wrong the input is invalid and the parser (via
   417	// strconv) will notice.
   418	func lexNumber(l *lexer) stateFn {
   419		if !l.scanNumber() {
   420			return l.errorf("bad number syntax: %q", l.input[l.start:l.pos])
   421		}
   422		if sign := l.peek(); sign == '+' || sign == '-' {
   423			// Complex: 1+2i.  No spaces, must end in 'i'.
   424			if !l.scanNumber() || l.input[l.pos-1] != 'i' {
   425				return l.errorf("bad number syntax: %q", l.input[l.start:l.pos])
   426			}
   427			l.emit(itemComplex)
   428		} else {
   429			l.emit(itemNumber)
   430		}
   431		return lexInsideAction
   432	}
   433	
   434	func (l *lexer) scanNumber() bool {
   435		// Optional leading sign.
   436		l.accept("+-")
   437		// Is it hex?
   438		digits := "0123456789"
   439		if l.accept("0") && l.accept("xX") {
   440			digits = "0123456789abcdefABCDEF"
   441		}
   442		l.acceptRun(digits)
   443		if l.accept(".") {
   444			l.acceptRun(digits)
   445		}
   446		if l.accept("eE") {
   447			l.accept("+-")
   448			l.acceptRun("0123456789")
   449		}
   450		// Is it imaginary?
   451		l.accept("i")
   452		// Next thing mustn't be alphanumeric.
   453		if isAlphaNumeric(l.peek()) {
   454			l.next()
   455			return false
   456		}
   457		return true
   458	}
   459	
   460	// lexQuote scans a quoted string.
   461	func lexQuote(l *lexer) stateFn {
   462	Loop:
   463		for {
   464			switch l.next() {
   465			case '\\':
   466				if r := l.next(); r != eof && r != '\n' {
   467					break
   468				}
   469				fallthrough
   470			case eof, '\n':
   471				return l.errorf("unterminated quoted string")
   472			case '"':
   473				break Loop
   474			}
   475		}
   476		l.emit(itemString)
   477		return lexInsideAction
   478	}
   479	
   480	// lexRawQuote scans a raw quoted string.
   481	func lexRawQuote(l *lexer) stateFn {
   482	Loop:
   483		for {
   484			switch l.next() {
   485			case eof, '\n':
   486				return l.errorf("unterminated raw quoted string")
   487			case '`':
   488				break Loop
   489			}
   490		}
   491		l.emit(itemRawString)
   492		return lexInsideAction
   493	}
   494	
   495	// isSpace reports whether r is a space character.
   496	func isSpace(r rune) bool {
   497		switch r {
   498		case ' ', '\t', '\n', '\r':
   499			return true
   500		}
   501		return false
   502	}
   503	
   504	// isAlphaNumeric reports whether r is an alphabetic, digit, or underscore.
   505	func isAlphaNumeric(r rune) bool {
   506		return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r)
   507	}