Unicode.cpp
00001 00002 // 00003 // SFML - Simple and Fast Multimedia Library 00004 // Copyright (C) 2007-2009 Laurent Gomila ([email protected]) 00005 // 00006 // This software is provided 'as-is', without any express or implied warranty. 00007 // In no event will the authors be held liable for any damages arising from the use of this software. 00008 // 00009 // Permission is granted to anyone to use this software for any purpose, 00010 // including commercial applications, and to alter it and redistribute it freely, 00011 // subject to the following restrictions: 00012 // 00013 // 1. The origin of this software must not be misrepresented; 00014 // you must not claim that you wrote the original software. 00015 // If you use this software in a product, an acknowledgment 00016 // in the product documentation would be appreciated but is not required. 00017 // 00018 // 2. Altered source versions must be plainly marked as such, 00019 // and must not be misrepresented as being the original software. 00020 // 00021 // 3. This notice may not be removed or altered from any source distribution. 00022 // 00024 00026 // Headers 00028 #include <SFML/System/Unicode.hpp> 00029 #include <stdexcept> 00030 #include <string.h> 00031 00032 00034 // References : 00035 // 00036 // http://www.unicode.org/ 00037 // http://www.unicode.org/Public/PROGRAMS/CVTUTF/ConvertUTF.c 00038 // http://www.unicode.org/Public/PROGRAMS/CVTUTF/ConvertUTF.h 00039 // http://people.w3.org/rishida/scripts/uniview/conversion 00040 // 00042 00043 namespace 00044 { 00046 // Generic utility function to compute the number 00047 // of characters in a null-terminated string of any type 00049 template <typename T> 00050 std::size_t StrLen(const T* Str) 00051 { 00052 std::size_t Length = 0; 00053 while (*Str++) Length++; 00054 return Length; 00055 } 00056 00058 // Get the current system locale 00060 std::locale GetCurrentLocale() 00061 { 00062 try 00063 { 00064 return std::locale(""); 00065 } 00066 catch (std::runtime_error&) 00067 { 00068 // It seems some implementations don't know the "" locale 00069 // (Mac OS, MinGW) 00070 00071 return std::locale(); 00072 } 00073 } 00074 } 00075 00076 namespace sf 00077 { 00079 // Static member data 00081 const int Unicode::UTF8TrailingBytes[256] = 00082 { 00083 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00084 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00085 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00086 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00087 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00088 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00089 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00090 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5 00091 }; 00092 const Uint32 Unicode::UTF8Offsets[6] = 00093 { 00094 0x00000000, 0x00003080, 0x000E2080, 0x03C82080, 0xFA082080, 0x82082080 00095 }; 00096 const Uint8 Unicode::UTF8FirstBytes[7] = 00097 { 00098 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC 00099 }; 00100 00101 00105 Unicode::Text::Text() 00106 { 00107 // Nothing to do 00108 } 00109 00110 00114 Unicode::Text::Text(const char* Str) 00115 { 00116 if (Str) 00117 { 00118 std::size_t Length = StrLen(Str); 00119 if (Length > 0) 00120 { 00121 myUTF32String.reserve(Length + 1); 00122 Unicode::ANSIToUTF32(Str, Str + Length, std::back_inserter(myUTF32String)); 00123 } 00124 } 00125 } 00126 Unicode::Text::Text(const wchar_t* Str) 00127 { 00128 if (Str) 00129 { 00130 std::size_t Length = StrLen(Str); 00131 if (Length > 0) 00132 { 00133 // See comments below, in Unicode::Text::Text(const std::wstring&) 00134 myUTF32String.reserve(Length + 1); 00135 switch (sizeof(wchar_t)) 00136 { 00137 case 2 : Unicode::UTF16ToUTF32(Str, Str + Length, std::back_inserter(myUTF32String), 0); break; 00138 case 4 : std::copy(Str, Str + Length, std::back_inserter(myUTF32String)); break; 00139 default : break; 00140 } 00141 } 00142 } 00143 } 00144 Unicode::Text::Text(const Uint8* Str) 00145 { 00146 if (Str) 00147 { 00148 std::size_t Length = StrLen(Str); 00149 if (Length > 0) 00150 { 00151 myUTF32String.reserve(Length + 1); 00152 Unicode::UTF8ToUTF32(Str, Str + Length, std::back_inserter(myUTF32String), 0); 00153 } 00154 } 00155 } 00156 Unicode::Text::Text(const Uint16* Str) 00157 { 00158 if (Str) 00159 { 00160 std::size_t Length = StrLen(Str); 00161 if (Length > 0) 00162 { 00163 myUTF32String.reserve(Length+ 1); 00164 Unicode::UTF16ToUTF32(Str, Str + Length, std::back_inserter(myUTF32String), 0); 00165 } 00166 } 00167 } 00168 Unicode::Text::Text(const Uint32* Str) 00169 { 00170 if (Str) 00171 myUTF32String = Str; 00172 } 00173 Unicode::Text::Text(const std::string& Str) 00174 { 00175 myUTF32String.reserve(Str.length() + 1); 00176 Unicode::ANSIToUTF32(Str.begin(), Str.end(), std::back_inserter(myUTF32String)); 00177 } 00178 Unicode::Text::Text(const std::wstring& Str) 00179 { 00180 // This function assumes that 2-byte large wchar_t are encoded in UTF-16 (Windows), and 00181 // 4-byte large wchar_t are encoded using UTF-32 (Unix) 00182 // Is that always true ? (some platforms may use JIS Japanese encoding) 00183 // The macro __STDC_ISO_10646__ should help identifying UTF-32 compliant implementations 00184 00185 myUTF32String.reserve(Str.length() + 1); 00186 00187 // Select the proper function according to the (supposed) wchar_t system encoding 00188 switch (sizeof(wchar_t)) 00189 { 00190 // wchar_t uses UTF-16 -- need a conversion 00191 case 2 : 00192 { 00193 Unicode::UTF16ToUTF32(Str.begin(), Str.end(), std::back_inserter(myUTF32String), 0); 00194 break; 00195 } 00196 00197 // wchar_t uses UTF-32 -- direct copy 00198 case 4 : 00199 { 00200 std::copy(Str.begin(), Str.end(), std::back_inserter(myUTF32String)); 00201 break; 00202 } 00203 00204 // This should never happen 00205 default : break; 00206 } 00207 } 00208 Unicode::Text::Text(const Unicode::UTF8String& Str) 00209 { 00210 myUTF32String.reserve(Str.length() + 1); 00211 Unicode::UTF8ToUTF32(Str.begin(), Str.end(), std::back_inserter(myUTF32String), 0); 00212 } 00213 Unicode::Text::Text(const Unicode::UTF16String& Str) 00214 { 00215 myUTF32String.reserve(Str.length() + 1); 00216 Unicode::UTF16ToUTF32(Str.begin(), Str.end(), std::back_inserter(myUTF32String), 0); 00217 } 00218 Unicode::Text::Text(const Unicode::UTF32String& Str) 00219 { 00220 myUTF32String = Str; 00221 } 00222 00223 00227 Unicode::Text::operator std::string() const 00228 { 00229 std::string Output; 00230 Output.reserve(myUTF32String.length() + 1); 00231 Unicode::UTF32ToANSI(myUTF32String.begin(), myUTF32String.end(), std::back_inserter(Output), 0, Unicode::GetDefaultLocale()); 00232 return Output; 00233 } 00234 Unicode::Text::operator std::wstring() const 00235 { 00236 // This function assumes that 2-byte large wchar_t are encoded in UTF-16 (Windows), and 00237 // 4-byte large wchar_t are encoded using UTF-32 (Unix) 00238 // Is that always true ? (some platforms may use JIS Japanese encoding) 00239 // The macro __STDC_ISO_10646__ should help identifying UTF-32 compliant implementations 00240 00241 std::wstring Output; 00242 Output.reserve(myUTF32String.length() + 1); 00243 00244 // Select the proper function according to the (supposed) wchar_t system encoding 00245 switch (sizeof(wchar_t)) 00246 { 00247 // wchar_t uses UTF-16 -- need a conversion 00248 case 2 : 00249 { 00250 UTF32ToUTF16(myUTF32String.begin(), myUTF32String.end(), std::back_inserter(Output), 0); 00251 break; 00252 } 00253 00254 // wchar_t uses UTF-32 -- direct copy 00255 case 4 : 00256 { 00257 std::copy(myUTF32String.begin(), myUTF32String.end(), std::back_inserter(Output)); 00258 break; 00259 } 00260 00261 // This should never happen 00262 default : break; 00263 } 00264 return Output; 00265 } 00266 Unicode::Text::operator sf::Unicode::UTF8String() const 00267 { 00268 Unicode::UTF8String Output; 00269 Output.reserve(myUTF32String.length() * 4 + 1); 00270 Unicode::UTF32ToUTF8(myUTF32String.begin(), myUTF32String.end(), std::back_inserter(Output), 0); 00271 return Output; 00272 } 00273 Unicode::Text::operator sf::Unicode::UTF16String() const 00274 { 00275 Unicode::UTF16String Output; 00276 Output.reserve(myUTF32String.length() * 2 + 1); 00277 Unicode::UTF32ToUTF16(myUTF32String.begin(), myUTF32String.end(), std::back_inserter(Output), 0); 00278 return Output; 00279 } 00280 Unicode::Text::operator const sf::Unicode::UTF32String&() const 00281 { 00282 return myUTF32String; 00283 } 00284 00285 00289 const std::locale& Unicode::GetDefaultLocale() 00290 { 00291 // It seems getting the default locale is a very expensive operation, 00292 // so we only do it once and then store the locale for reuse. 00293 // Warning : this code won't be aware of any change of the default locale during runtime 00294 00295 static std::locale DefaultLocale = GetCurrentLocale(); 00296 00297 return DefaultLocale; 00298 } 00299 00300 } // namespace sf
:: Copyright © 2007-2008 Laurent Gomila, all rights reserved :: Documentation generated by doxygen 1.5.2 ::