Source file src/pkg/unicode/graphic.go
1 // Copyright 2011 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 package unicode
6
7 // Bit masks for each code point under U+0100, for fast lookup.
8 const (
9 pC = 1 << iota // a control character.
10 pP // a punctuation character.
11 pN // a numeral.
12 pS // a symbolic character.
13 pZ // a spacing character.
14 pLu // an upper-case letter.
15 pLl // a lower-case letter.
16 pp // a printable character according to Go's definition.
17 pg = pp | pZ // a graphical character according to the Unicode definition.
18 )
19
20 // GraphicRanges defines the set of graphic characters according to Unicode.
21 var GraphicRanges = []*RangeTable{
22 L, M, N, P, S, Zs,
23 }
24
25 // PrintRanges defines the set of printable characters according to Go.
26 // ASCII space, U+0020, is handled separately.
27 var PrintRanges = []*RangeTable{
28 L, M, N, P, S,
29 }
30
31 // IsGraphic reports whether the rune is defined as a Graphic by Unicode.
32 // Such characters include letters, marks, numbers, punctuation, symbols, and
33 // spaces, from categories L, M, N, P, S, Zs.
34 func IsGraphic(r rune) bool {
35 // We convert to uint32 to avoid the extra test for negative,
36 // and in the index we convert to uint8 to avoid the range check.
37 if uint32(r) <= MaxLatin1 {
38 return properties[uint8(r)]&pg != 0
39 }
40 return IsOneOf(GraphicRanges, r)
41 }
42
43 // IsPrint reports whether the rune is defined as printable by Go. Such
44 // characters include letters, marks, numbers, punctuation, symbols, and the
45 // ASCII space character, from categories L, M, N, P, S and the ASCII space
46 // character. This categorization is the same as IsGraphic except that the
47 // only spacing character is ASCII space, U+0020.
48 func IsPrint(r rune) bool {
49 if uint32(r) <= MaxLatin1 {
50 return properties[uint8(r)]&pp != 0
51 }
52 return IsOneOf(PrintRanges, r)
53 }
54
55 // IsOneOf reports whether the rune is a member of one of the ranges.
56 func IsOneOf(set []*RangeTable, r rune) bool {
57 for _, inside := range set {
58 if Is(inside, r) {
59 return true
60 }
61 }
62 return false
63 }
64
65 // IsControl reports whether the rune is a control character.
66 // The C (Other) Unicode category includes more code points
67 // such as surrogates; use Is(C, r) to test for them.
68 func IsControl(r rune) bool {
69 if uint32(r) <= MaxLatin1 {
70 return properties[uint8(r)]&pC != 0
71 }
72 // All control characters are < Latin1Max.
73 return false
74 }
75
76 // IsLetter reports whether the rune is a letter (category L).
77 func IsLetter(r rune) bool {
78 if uint32(r) <= MaxLatin1 {
79 return properties[uint8(r)]&(pLu|pLl) != 0
80 }
81 return Is(Letter, r)
82 }
83
84 // IsMark reports whether the rune is a mark character (category M).
85 func IsMark(r rune) bool {
86 // There are no mark characters in Latin-1.
87 return Is(Mark, r)
88 }
89
90 // IsNumber reports whether the rune is a number (category N).
91 func IsNumber(r rune) bool {
92 if uint32(r) <= MaxLatin1 {
93 return properties[uint8(r)]&pN != 0
94 }
95 return Is(Number, r)
96 }
97
98 // IsPunct reports whether the rune is a Unicode punctuation character
99 // (category P).
100 func IsPunct(r rune) bool {
101 if uint32(r) <= MaxLatin1 {
102 return properties[uint8(r)]&pP != 0
103 }
104 return Is(Punct, r)
105 }
106
107 // IsSpace reports whether the rune is a space character as defined
108 // by Unicode's White Space property; in the Latin-1 space
109 // this is
110 // '\t', '\n', '\v', '\f', '\r', ' ', U+0085 (NEL), U+00A0 (NBSP).
111 // Other definitions of spacing characters are set by category
112 // Z and property Pattern_White_Space.
113 func IsSpace(r rune) bool {
114 // This property isn't the same as Z; special-case it.
115 if uint32(r) <= MaxLatin1 {
116 switch r {
117 case '\t', '\n', '\v', '\f', '\r', ' ', 0x85, 0xA0:
118 return true
119 }
120 return false
121 }
122 return Is(White_Space, r)
123 }
124
125 // IsSymbol reports whether the rune is a symbolic character.
126 func IsSymbol(r rune) bool {
127 if uint32(r) <= MaxLatin1 {
128 return properties[uint8(r)]&pS != 0
129 }
130 return Is(Symbol, r)
131 }