src/pkg/unicode/graphic.go - The Go Programming Language

Golang

Source file src/pkg/unicode/graphic.go

     1	// Copyright 2011 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	package unicode
     6	
     7	// Bit masks for each code point under U+0100, for fast lookup.
     8	const (
     9		pC  = 1 << iota // a control character.
    10		pP              // a punctuation character.
    11		pN              // a numeral.
    12		pS              // a symbolic character.
    13		pZ              // a spacing character.
    14		pLu             // an upper-case letter.
    15		pLl             // a lower-case letter.
    16		pp              // a printable character according to Go's definition.
    17		pg  = pp | pZ   // a graphical character according to the Unicode definition.
    18	)
    19	
    20	// GraphicRanges defines the set of graphic characters according to Unicode.
    21	var GraphicRanges = []*RangeTable{
    22		L, M, N, P, S, Zs,
    23	}
    24	
    25	// PrintRanges defines the set of printable characters according to Go.
    26	// ASCII space, U+0020, is handled separately.
    27	var PrintRanges = []*RangeTable{
    28		L, M, N, P, S,
    29	}
    30	
    31	// IsGraphic reports whether the rune is defined as a Graphic by Unicode.
    32	// Such characters include letters, marks, numbers, punctuation, symbols, and
    33	// spaces, from categories L, M, N, P, S, Zs.
    34	func IsGraphic(r rune) bool {
    35		// We convert to uint32 to avoid the extra test for negative,
    36		// and in the index we convert to uint8 to avoid the range check.
    37		if uint32(r) <= MaxLatin1 {
    38			return properties[uint8(r)]&pg != 0
    39		}
    40		return IsOneOf(GraphicRanges, r)
    41	}
    42	
    43	// IsPrint reports whether the rune is defined as printable by Go. Such
    44	// characters include letters, marks, numbers, punctuation, symbols, and the
    45	// ASCII space character, from categories L, M, N, P, S and the ASCII space
    46	// character.  This categorization is the same as IsGraphic except that the
    47	// only spacing character is ASCII space, U+0020.
    48	func IsPrint(r rune) bool {
    49		if uint32(r) <= MaxLatin1 {
    50			return properties[uint8(r)]&pp != 0
    51		}
    52		return IsOneOf(PrintRanges, r)
    53	}
    54	
    55	// IsOneOf reports whether the rune is a member of one of the ranges.
    56	func IsOneOf(set []*RangeTable, r rune) bool {
    57		for _, inside := range set {
    58			if Is(inside, r) {
    59				return true
    60			}
    61		}
    62		return false
    63	}
    64	
    65	// IsControl reports whether the rune is a control character.
    66	// The C (Other) Unicode category includes more code points
    67	// such as surrogates; use Is(C, r) to test for them.
    68	func IsControl(r rune) bool {
    69		if uint32(r) <= MaxLatin1 {
    70			return properties[uint8(r)]&pC != 0
    71		}
    72		// All control characters are < Latin1Max.
    73		return false
    74	}
    75	
    76	// IsLetter reports whether the rune is a letter (category L).
    77	func IsLetter(r rune) bool {
    78		if uint32(r) <= MaxLatin1 {
    79			return properties[uint8(r)]&(pLu|pLl) != 0
    80		}
    81		return Is(Letter, r)
    82	}
    83	
    84	// IsMark reports whether the rune is a mark character (category M).
    85	func IsMark(r rune) bool {
    86		// There are no mark characters in Latin-1.
    87		return Is(Mark, r)
    88	}
    89	
    90	// IsNumber reports whether the rune is a number (category N).
    91	func IsNumber(r rune) bool {
    92		if uint32(r) <= MaxLatin1 {
    93			return properties[uint8(r)]&pN != 0
    94		}
    95		return Is(Number, r)
    96	}
    97	
    98	// IsPunct reports whether the rune is a Unicode punctuation character
    99	// (category P).
   100	func IsPunct(r rune) bool {
   101		if uint32(r) <= MaxLatin1 {
   102			return properties[uint8(r)]&pP != 0
   103		}
   104		return Is(Punct, r)
   105	}
   106	
   107	// IsSpace reports whether the rune is a space character as defined
   108	// by Unicode's White Space property; in the Latin-1 space
   109	// this is
   110	//	'\t', '\n', '\v', '\f', '\r', ' ', U+0085 (NEL), U+00A0 (NBSP).
   111	// Other definitions of spacing characters are set by category
   112	// Z and property Pattern_White_Space.
   113	func IsSpace(r rune) bool {
   114		// This property isn't the same as Z; special-case it.
   115		if uint32(r) <= MaxLatin1 {
   116			switch r {
   117			case '\t', '\n', '\v', '\f', '\r', ' ', 0x85, 0xA0:
   118				return true
   119			}
   120			return false
   121		}
   122		return Is(White_Space, r)
   123	}
   124	
   125	// IsSymbol reports whether the rune is a symbolic character.
   126	func IsSymbol(r rune) bool {
   127		if uint32(r) <= MaxLatin1 {
   128			return properties[uint8(r)]&pS != 0
   129		}
   130		return Is(Symbol, r)
   131	}