src/pkg/text/tabwriter/tabwriter.go - The Go Programming Language

Golang

Source file src/pkg/text/tabwriter/tabwriter.go

     1	// Copyright 2009 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	// Package tabwriter implements a write filter (tabwriter.Writer) that
     6	// translates tabbed columns in input into properly aligned text.
     7	//
     8	// The package is using the Elastic Tabstops algorithm described at
     9	// http://nickgravgaard.com/elastictabstops/index.html.
    10	//
    11	package tabwriter
    12	
    13	import (
    14		"bytes"
    15		"io"
    16		"unicode/utf8"
    17	)
    18	
    19	// ----------------------------------------------------------------------------
    20	// Filter implementation
    21	
    22	// A cell represents a segment of text terminated by tabs or line breaks.
    23	// The text itself is stored in a separate buffer; cell only describes the
    24	// segment's size in bytes, its width in runes, and whether it's an htab
    25	// ('\t') terminated cell.
    26	//
    27	type cell struct {
    28		size  int  // cell size in bytes
    29		width int  // cell width in runes
    30		htab  bool // true if the cell is terminated by an htab ('\t')
    31	}
    32	
    33	// A Writer is a filter that inserts padding around tab-delimited
    34	// columns in its input to align them in the output.
    35	//
    36	// The Writer treats incoming bytes as UTF-8 encoded text consisting
    37	// of cells terminated by (horizontal or vertical) tabs or line
    38	// breaks (newline or formfeed characters). Cells in adjacent lines
    39	// constitute a column. The Writer inserts padding as needed to
    40	// make all cells in a column have the same width, effectively
    41	// aligning the columns. It assumes that all characters have the
    42	// same width except for tabs for which a tabwidth must be specified.
    43	// Note that cells are tab-terminated, not tab-separated: trailing
    44	// non-tab text at the end of a line does not form a column cell.
    45	//
    46	// The Writer assumes that all Unicode code points have the same width;
    47	// this may not be true in some fonts.
    48	//
    49	// If DiscardEmptyColumns is set, empty columns that are terminated
    50	// entirely by vertical (or "soft") tabs are discarded. Columns
    51	// terminated by horizontal (or "hard") tabs are not affected by
    52	// this flag.
    53	//
    54	// If a Writer is configured to filter HTML, HTML tags and entities
    55	// are passed through. The widths of tags and entities are
    56	// assumed to be zero (tags) and one (entities) for formatting purposes.
    57	//
    58	// A segment of text may be escaped by bracketing it with Escape
    59	// characters. The tabwriter passes escaped text segments through
    60	// unchanged. In particular, it does not interpret any tabs or line
    61	// breaks within the segment. If the StripEscape flag is set, the
    62	// Escape characters are stripped from the output; otherwise they
    63	// are passed through as well. For the purpose of formatting, the
    64	// width of the escaped text is always computed excluding the Escape
    65	// characters.
    66	//
    67	// The formfeed character ('\f') acts like a newline but it also
    68	// terminates all columns in the current line (effectively calling
    69	// Flush). Cells in the next line start new columns. Unless found
    70	// inside an HTML tag or inside an escaped text segment, formfeed
    71	// characters appear as newlines in the output.
    72	//
    73	// The Writer must buffer input internally, because proper spacing
    74	// of one line may depend on the cells in future lines. Clients must
    75	// call Flush when done calling Write.
    76	//
    77	type Writer struct {
    78		// configuration
    79		output   io.Writer
    80		minwidth int
    81		tabwidth int
    82		padding  int
    83		padbytes [8]byte
    84		flags    uint
    85	
    86		// current state
    87		buf     bytes.Buffer // collected text excluding tabs or line breaks
    88		pos     int          // buffer position up to which cell.width of incomplete cell has been computed
    89		cell    cell         // current incomplete cell; cell.width is up to buf[pos] excluding ignored sections
    90		endChar byte         // terminating char of escaped sequence (Escape for escapes, '>', ';' for HTML tags/entities, or 0)
    91		lines   [][]cell     // list of lines; each line is a list of cells
    92		widths  []int        // list of column widths in runes - re-used during formatting
    93	}
    94	
    95	func (b *Writer) addLine() { b.lines = append(b.lines, []cell{}) }
    96	
    97	// Reset the current state.
    98	func (b *Writer) reset() {
    99		b.buf.Reset()
   100		b.pos = 0
   101		b.cell = cell{}
   102		b.endChar = 0
   103		b.lines = b.lines[0:0]
   104		b.widths = b.widths[0:0]
   105		b.addLine()
   106	}
   107	
   108	// Internal representation (current state):
   109	//
   110	// - all text written is appended to buf; tabs and line breaks are stripped away
   111	// - at any given time there is a (possibly empty) incomplete cell at the end
   112	//   (the cell starts after a tab or line break)
   113	// - cell.size is the number of bytes belonging to the cell so far
   114	// - cell.width is text width in runes of that cell from the start of the cell to
   115	//   position pos; html tags and entities are excluded from this width if html
   116	//   filtering is enabled
   117	// - the sizes and widths of processed text are kept in the lines list
   118	//   which contains a list of cells for each line
   119	// - the widths list is a temporary list with current widths used during
   120	//   formatting; it is kept in Writer because it's re-used
   121	//
   122	//                    |<---------- size ---------->|
   123	//                    |                            |
   124	//                    |<- width ->|<- ignored ->|  |
   125	//                    |           |             |  |
   126	// [---processed---tab------------<tag>...</tag>...]
   127	// ^                  ^                         ^
   128	// |                  |                         |
   129	// buf                start of incomplete cell  pos
   130	
   131	// Formatting can be controlled with these flags.
   132	const (
   133		// Ignore html tags and treat entities (starting with '&'
   134		// and ending in ';') as single characters (width = 1).
   135		FilterHTML uint = 1 << iota
   136	
   137		// Strip Escape characters bracketing escaped text segments
   138		// instead of passing them through unchanged with the text.
   139		StripEscape
   140	
   141		// Force right-alignment of cell content.
   142		// Default is left-alignment.
   143		AlignRight
   144	
   145		// Handle empty columns as if they were not present in
   146		// the input in the first place.
   147		DiscardEmptyColumns
   148	
   149		// Always use tabs for indentation columns (i.e., padding of
   150		// leading empty cells on the left) independent of padchar.
   151		TabIndent
   152	
   153		// Print a vertical bar ('|') between columns (after formatting).
   154		// Discarded columns appear as zero-width columns ("||").
   155		Debug
   156	)
   157	
   158	// A Writer must be initialized with a call to Init. The first parameter (output)
   159	// specifies the filter output. The remaining parameters control the formatting:
   160	//
   161	//	minwidth	minimal cell width including any padding
   162	//	tabwidth	width of tab characters (equivalent number of spaces)
   163	//	padding		padding added to a cell before computing its width
   164	//	padchar		ASCII char used for padding
   165	//			if padchar == '\t', the Writer will assume that the
   166	//			width of a '\t' in the formatted output is tabwidth,
   167	//			and cells are left-aligned independent of align_left
   168	//			(for correct-looking results, tabwidth must correspond
   169	//			to the tab width in the viewer displaying the result)
   170	//	flags		formatting control
   171	//
   172	func (b *Writer) Init(output io.Writer, minwidth, tabwidth, padding int, padchar byte, flags uint) *Writer {
   173		if minwidth < 0 || tabwidth < 0 || padding < 0 {
   174			panic("negative minwidth, tabwidth, or padding")
   175		}
   176		b.output = output
   177		b.minwidth = minwidth
   178		b.tabwidth = tabwidth
   179		b.padding = padding
   180		for i := range b.padbytes {
   181			b.padbytes[i] = padchar
   182		}
   183		if padchar == '\t' {
   184			// tab padding enforces left-alignment
   185			flags &^= AlignRight
   186		}
   187		b.flags = flags
   188	
   189		b.reset()
   190	
   191		return b
   192	}
   193	
   194	// debugging support (keep code around)
   195	func (b *Writer) dump() {
   196		pos := 0
   197		for i, line := range b.lines {
   198			print("(", i, ") ")
   199			for _, c := range line {
   200				print("[", string(b.buf.Bytes()[pos:pos+c.size]), "]")
   201				pos += c.size
   202			}
   203			print("\n")
   204		}
   205		print("\n")
   206	}
   207	
   208	// local error wrapper so we can distinguish errors we want to return
   209	// as errors from genuine panics (which we don't want to return as errors)
   210	type osError struct {
   211		err error
   212	}
   213	
   214	func (b *Writer) write0(buf []byte) {
   215		n, err := b.output.Write(buf)
   216		if n != len(buf) && err == nil {
   217			err = io.ErrShortWrite
   218		}
   219		if err != nil {
   220			panic(osError{err})
   221		}
   222	}
   223	
   224	func (b *Writer) writeN(src []byte, n int) {
   225		for n > len(src) {
   226			b.write0(src)
   227			n -= len(src)
   228		}
   229		b.write0(src[0:n])
   230	}
   231	
   232	var (
   233		newline = []byte{'\n'}
   234		tabs    = []byte("\t\t\t\t\t\t\t\t")
   235	)
   236	
   237	func (b *Writer) writePadding(textw, cellw int, useTabs bool) {
   238		if b.padbytes[0] == '\t' || useTabs {
   239			// padding is done with tabs
   240			if b.tabwidth == 0 {
   241				return // tabs have no width - can't do any padding
   242			}
   243			// make cellw the smallest multiple of b.tabwidth
   244			cellw = (cellw + b.tabwidth - 1) / b.tabwidth * b.tabwidth
   245			n := cellw - textw // amount of padding
   246			if n < 0 {
   247				panic("internal error")
   248			}
   249			b.writeN(tabs, (n+b.tabwidth-1)/b.tabwidth)
   250			return
   251		}
   252	
   253		// padding is done with non-tab characters
   254		b.writeN(b.padbytes[0:], cellw-textw)
   255	}
   256	
   257	var vbar = []byte{'|'}
   258	
   259	func (b *Writer) writeLines(pos0 int, line0, line1 int) (pos int) {
   260		pos = pos0
   261		for i := line0; i < line1; i++ {
   262			line := b.lines[i]
   263	
   264			// if TabIndent is set, use tabs to pad leading empty cells
   265			useTabs := b.flags&TabIndent != 0
   266	
   267			for j, c := range line {
   268				if j > 0 && b.flags&Debug != 0 {
   269					// indicate column break
   270					b.write0(vbar)
   271				}
   272	
   273				if c.size == 0 {
   274					// empty cell
   275					if j < len(b.widths) {
   276						b.writePadding(c.width, b.widths[j], useTabs)
   277					}
   278				} else {
   279					// non-empty cell
   280					useTabs = false
   281					if b.flags&AlignRight == 0 { // align left
   282						b.write0(b.buf.Bytes()[pos : pos+c.size])
   283						pos += c.size
   284						if j < len(b.widths) {
   285							b.writePadding(c.width, b.widths[j], false)
   286						}
   287					} else { // align right
   288						if j < len(b.widths) {
   289							b.writePadding(c.width, b.widths[j], false)
   290						}
   291						b.write0(b.buf.Bytes()[pos : pos+c.size])
   292						pos += c.size
   293					}
   294				}
   295			}
   296	
   297			if i+1 == len(b.lines) {
   298				// last buffered line - we don't have a newline, so just write
   299				// any outstanding buffered data
   300				b.write0(b.buf.Bytes()[pos : pos+b.cell.size])
   301				pos += b.cell.size
   302			} else {
   303				// not the last line - write newline
   304				b.write0(newline)
   305			}
   306		}
   307		return
   308	}
   309	
   310	// Format the text between line0 and line1 (excluding line1); pos
   311	// is the buffer position corresponding to the beginning of line0.
   312	// Returns the buffer position corresponding to the beginning of
   313	// line1 and an error, if any.
   314	//
   315	func (b *Writer) format(pos0 int, line0, line1 int) (pos int) {
   316		pos = pos0
   317		column := len(b.widths)
   318		for this := line0; this < line1; this++ {
   319			line := b.lines[this]
   320	
   321			if column < len(line)-1 {
   322				// cell exists in this column => this line
   323				// has more cells than the previous line
   324				// (the last cell per line is ignored because cells are
   325				// tab-terminated; the last cell per line describes the
   326				// text before the newline/formfeed and does not belong
   327				// to a column)
   328	
   329				// print unprinted lines until beginning of block
   330				pos = b.writeLines(pos, line0, this)
   331				line0 = this
   332	
   333				// column block begin
   334				width := b.minwidth // minimal column width
   335				discardable := true // true if all cells in this column are empty and "soft"
   336				for ; this < line1; this++ {
   337					line = b.lines[this]
   338					if column < len(line)-1 {
   339						// cell exists in this column
   340						c := line[column]
   341						// update width
   342						if w := c.width + b.padding; w > width {
   343							width = w
   344						}
   345						// update discardable
   346						if c.width > 0 || c.htab {
   347							discardable = false
   348						}
   349					} else {
   350						break
   351					}
   352				}
   353				// column block end
   354	
   355				// discard empty columns if necessary
   356				if discardable && b.flags&DiscardEmptyColumns != 0 {
   357					width = 0
   358				}
   359	
   360				// format and print all columns to the right of this column
   361				// (we know the widths of this column and all columns to the left)
   362				b.widths = append(b.widths, width) // push width
   363				pos = b.format(pos, line0, this)
   364				b.widths = b.widths[0 : len(b.widths)-1] // pop width
   365				line0 = this
   366			}
   367		}
   368	
   369		// print unprinted lines until end
   370		return b.writeLines(pos, line0, line1)
   371	}
   372	
   373	// Append text to current cell.
   374	func (b *Writer) append(text []byte) {
   375		b.buf.Write(text)
   376		b.cell.size += len(text)
   377	}
   378	
   379	// Update the cell width.
   380	func (b *Writer) updateWidth() {
   381		b.cell.width += utf8.RuneCount(b.buf.Bytes()[b.pos:b.buf.Len()])
   382		b.pos = b.buf.Len()
   383	}
   384	
   385	// To escape a text segment, bracket it with Escape characters.
   386	// For instance, the tab in this string "Ignore this tab: \xff\t\xff"
   387	// does not terminate a cell and constitutes a single character of
   388	// width one for formatting purposes.
   389	//
   390	// The value 0xff was chosen because it cannot appear in a valid UTF-8 sequence.
   391	//
   392	const Escape = '\xff'
   393	
   394	// Start escaped mode.
   395	func (b *Writer) startEscape(ch byte) {
   396		switch ch {
   397		case Escape:
   398			b.endChar = Escape
   399		case '<':
   400			b.endChar = '>'
   401		case '&':
   402			b.endChar = ';'
   403		}
   404	}
   405	
   406	// Terminate escaped mode. If the escaped text was an HTML tag, its width
   407	// is assumed to be zero for formatting purposes; if it was an HTML entity,
   408	// its width is assumed to be one. In all other cases, the width is the
   409	// unicode width of the text.
   410	//
   411	func (b *Writer) endEscape() {
   412		switch b.endChar {
   413		case Escape:
   414			b.updateWidth()
   415			if b.flags&StripEscape == 0 {
   416				b.cell.width -= 2 // don't count the Escape chars
   417			}
   418		case '>': // tag of zero width
   419		case ';':
   420			b.cell.width++ // entity, count as one rune
   421		}
   422		b.pos = b.buf.Len()
   423		b.endChar = 0
   424	}
   425	
   426	// Terminate the current cell by adding it to the list of cells of the
   427	// current line. Returns the number of cells in that line.
   428	//
   429	func (b *Writer) terminateCell(htab bool) int {
   430		b.cell.htab = htab
   431		line := &b.lines[len(b.lines)-1]
   432		*line = append(*line, b.cell)
   433		b.cell = cell{}
   434		return len(*line)
   435	}
   436	
   437	func handlePanic(err *error) {
   438		if e := recover(); e != nil {
   439			*err = e.(osError).err // re-panics if it's not a local osError
   440		}
   441	}
   442	
   443	// Flush should be called after the last call to Write to ensure
   444	// that any data buffered in the Writer is written to output. Any
   445	// incomplete escape sequence at the end is considered
   446	// complete for formatting purposes.
   447	//
   448	func (b *Writer) Flush() (err error) {
   449		defer b.reset() // even in the presence of errors
   450		defer handlePanic(&err)
   451	
   452		// add current cell if not empty
   453		if b.cell.size > 0 {
   454			if b.endChar != 0 {
   455				// inside escape - terminate it even if incomplete
   456				b.endEscape()
   457			}
   458			b.terminateCell(false)
   459		}
   460	
   461		// format contents of buffer
   462		b.format(0, 0, len(b.lines))
   463	
   464		return
   465	}
   466	
   467	var hbar = []byte("---\n")
   468	
   469	// Write writes buf to the writer b.
   470	// The only errors returned are ones encountered
   471	// while writing to the underlying output stream.
   472	//
   473	func (b *Writer) Write(buf []byte) (n int, err error) {
   474		defer handlePanic(&err)
   475	
   476		// split text into cells
   477		n = 0
   478		for i, ch := range buf {
   479			if b.endChar == 0 {
   480				// outside escape
   481				switch ch {
   482				case '\t', '\v', '\n', '\f':
   483					// end of cell
   484					b.append(buf[n:i])
   485					b.updateWidth()
   486					n = i + 1 // ch consumed
   487					ncells := b.terminateCell(ch == '\t')
   488					if ch == '\n' || ch == '\f' {
   489						// terminate line
   490						b.addLine()
   491						if ch == '\f' || ncells == 1 {
   492							// A '\f' always forces a flush. Otherwise, if the previous
   493							// line has only one cell which does not have an impact on
   494							// the formatting of the following lines (the last cell per
   495							// line is ignored by format()), thus we can flush the
   496							// Writer contents.
   497							if err = b.Flush(); err != nil {
   498								return
   499							}
   500							if ch == '\f' && b.flags&Debug != 0 {
   501								// indicate section break
   502								b.write0(hbar)
   503							}
   504						}
   505					}
   506	
   507				case Escape:
   508					// start of escaped sequence
   509					b.append(buf[n:i])
   510					b.updateWidth()
   511					n = i
   512					if b.flags&StripEscape != 0 {
   513						n++ // strip Escape
   514					}
   515					b.startEscape(Escape)
   516	
   517				case '<', '&':
   518					// possibly an html tag/entity
   519					if b.flags&FilterHTML != 0 {
   520						// begin of tag/entity
   521						b.append(buf[n:i])
   522						b.updateWidth()
   523						n = i
   524						b.startEscape(ch)
   525					}
   526				}
   527	
   528			} else {
   529				// inside escape
   530				if ch == b.endChar {
   531					// end of tag/entity
   532					j := i + 1
   533					if ch == Escape && b.flags&StripEscape != 0 {
   534						j = i // strip Escape
   535					}
   536					b.append(buf[n:j])
   537					n = i + 1 // ch consumed
   538					b.endEscape()
   539				}
   540			}
   541		}
   542	
   543		// append leftover text
   544		b.append(buf[n:])
   545		n = len(buf)
   546		return
   547	}
   548	
   549	// NewWriter allocates and initializes a new tabwriter.Writer.
   550	// The parameters are the same as for the the Init function.
   551	//
   552	func NewWriter(output io.Writer, minwidth, tabwidth, padding int, padchar byte, flags uint) *Writer {
   553		return new(Writer).Init(output, minwidth, tabwidth, padding, padchar, flags)
   554	}