Source file src/pkg/unicode/graphic.go
1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package unicode 6 7 // Bit masks for each code point under U+0100, for fast lookup. 8 const ( 9 pC = 1 << iota // a control character. 10 pP // a punctuation character. 11 pN // a numeral. 12 pS // a symbolic character. 13 pZ // a spacing character. 14 pLu // an upper-case letter. 15 pLl // a lower-case letter. 16 pp // a printable character according to Go's definition. 17 pg = pp | pZ // a graphical character according to the Unicode definition. 18 ) 19 20 // GraphicRanges defines the set of graphic characters according to Unicode. 21 var GraphicRanges = []*RangeTable{ 22 L, M, N, P, S, Zs, 23 } 24 25 // PrintRanges defines the set of printable characters according to Go. 26 // ASCII space, U+0020, is handled separately. 27 var PrintRanges = []*RangeTable{ 28 L, M, N, P, S, 29 } 30 31 // IsGraphic reports whether the rune is defined as a Graphic by Unicode. 32 // Such characters include letters, marks, numbers, punctuation, symbols, and 33 // spaces, from categories L, M, N, P, S, Zs. 34 func IsGraphic(r rune) bool { 35 // We convert to uint32 to avoid the extra test for negative, 36 // and in the index we convert to uint8 to avoid the range check. 37 if uint32(r) <= MaxLatin1 { 38 return properties[uint8(r)]&pg != 0 39 } 40 return IsOneOf(GraphicRanges, r) 41 } 42 43 // IsPrint reports whether the rune is defined as printable by Go. Such 44 // characters include letters, marks, numbers, punctuation, symbols, and the 45 // ASCII space character, from categories L, M, N, P, S and the ASCII space 46 // character. This categorization is the same as IsGraphic except that the 47 // only spacing character is ASCII space, U+0020. 48 func IsPrint(r rune) bool { 49 if uint32(r) <= MaxLatin1 { 50 return properties[uint8(r)]&pp != 0 51 } 52 return IsOneOf(PrintRanges, r) 53 } 54 55 // IsOneOf reports whether the rune is a member of one of the ranges. 56 func IsOneOf(set []*RangeTable, r rune) bool { 57 for _, inside := range set { 58 if Is(inside, r) { 59 return true 60 } 61 } 62 return false 63 } 64 65 // IsControl reports whether the rune is a control character. 66 // The C (Other) Unicode category includes more code points 67 // such as surrogates; use Is(C, r) to test for them. 68 func IsControl(r rune) bool { 69 if uint32(r) <= MaxLatin1 { 70 return properties[uint8(r)]&pC != 0 71 } 72 // All control characters are < Latin1Max. 73 return false 74 } 75 76 // IsLetter reports whether the rune is a letter (category L). 77 func IsLetter(r rune) bool { 78 if uint32(r) <= MaxLatin1 { 79 return properties[uint8(r)]&(pLu|pLl) != 0 80 } 81 return Is(Letter, r) 82 } 83 84 // IsMark reports whether the rune is a mark character (category M). 85 func IsMark(r rune) bool { 86 // There are no mark characters in Latin-1. 87 return Is(Mark, r) 88 } 89 90 // IsNumber reports whether the rune is a number (category N). 91 func IsNumber(r rune) bool { 92 if uint32(r) <= MaxLatin1 { 93 return properties[uint8(r)]&pN != 0 94 } 95 return Is(Number, r) 96 } 97 98 // IsPunct reports whether the rune is a Unicode punctuation character 99 // (category P). 100 func IsPunct(r rune) bool { 101 if uint32(r) <= MaxLatin1 { 102 return properties[uint8(r)]&pP != 0 103 } 104 return Is(Punct, r) 105 } 106 107 // IsSpace reports whether the rune is a space character as defined 108 // by Unicode's White Space property; in the Latin-1 space 109 // this is 110 // '\t', '\n', '\v', '\f', '\r', ' ', U+0085 (NEL), U+00A0 (NBSP). 111 // Other definitions of spacing characters are set by category 112 // Z and property Pattern_White_Space. 113 func IsSpace(r rune) bool { 114 // This property isn't the same as Z; special-case it. 115 if uint32(r) <= MaxLatin1 { 116 switch r { 117 case '\t', '\n', '\v', '\f', '\r', ' ', 0x85, 0xA0: 118 return true 119 } 120 return false 121 } 122 return Is(White_Space, r) 123 } 124 125 // IsSymbol reports whether the rune is a symbolic character. 126 func IsSymbol(r rune) bool { 127 if uint32(r) <= MaxLatin1 { 128 return properties[uint8(r)]&pS != 0 129 } 130 return Is(Symbol, r) 131 }