Unicode.inl
00001 00002 // 00003 // SFML - Simple and Fast Multimedia Library 00004 // Copyright (C) 2007-2009 Laurent Gomila ([email protected]) 00005 // 00006 // This software is provided 'as-is', without any express or implied warranty. 00007 // In no event will the authors be held liable for any damages arising from the use of this software. 00008 // 00009 // Permission is granted to anyone to use this software for any purpose, 00010 // including commercial applications, and to alter it and redistribute it freely, 00011 // subject to the following restrictions: 00012 // 00013 // 1. The origin of this software must not be misrepresented; 00014 // you must not claim that you wrote the original software. 00015 // If you use this software in a product, an acknowledgment 00016 // in the product documentation would be appreciated but is not required. 00017 // 00018 // 2. Altered source versions must be plainly marked as such, 00019 // and must not be misrepresented as being the original software. 00020 // 00021 // 3. This notice may not be removed or altered from any source distribution. 00022 // 00024 00025 00030 template <typename In, typename Out> 00031 inline Out Unicode::UTF32ToANSI(In Begin, In End, Out Output, char Replacement, const std::locale& Locale) 00032 { 00033 #ifdef __MINGW32__ 00034 00035 // MinGW has a almost no support for unicode stuff 00036 // As a consequence, the MinGW version of this function can only use the default locale 00037 // and ignores the one passed as parameter 00038 while (Begin < End) 00039 { 00040 char Char = 0; 00041 if (wctomb(&Char, static_cast<wchar_t>(*Begin++)) >= 0) 00042 *Output++ = Char; 00043 else if (Replacement) 00044 *Output++ = Replacement; 00045 } 00046 00047 #else 00048 00049 // Get the facet of the locale which deals with character conversion 00050 const std::ctype<wchar_t>& Facet = std::use_facet< std::ctype<wchar_t> >(Locale); 00051 00052 // Use the facet to convert each character of the input string 00053 while (Begin < End) 00054 *Output++ = Facet.narrow(static_cast<wchar_t>(*Begin++), Replacement); 00055 00056 #endif 00057 00058 return Output; 00059 } 00060 00061 00066 template <typename In, typename Out> 00067 inline Out Unicode::ANSIToUTF32(In Begin, In End, Out Output, const std::locale& Locale) 00068 { 00069 #ifdef __MINGW32__ 00070 00071 // MinGW has a almost no support for unicode stuff 00072 // As a consequence, the MinGW version of this function can only use the default locale 00073 // and ignores the one passed as parameter 00074 while (Begin < End) 00075 { 00076 wchar_t Char = 0; 00077 mbtowc(&Char, &*Begin, 1); 00078 Begin++; 00079 *Output++ = static_cast<Uint32>(Char); 00080 } 00081 00082 #else 00083 00084 // Get the facet of the locale which deals with character conversion 00085 const std::ctype<wchar_t>& Facet = std::use_facet< std::ctype<wchar_t> >(Locale); 00086 00087 // Use the facet to convert each character of the input string 00088 while (Begin < End) 00089 *Output++ = static_cast<Uint32>(Facet.widen(*Begin++)); 00090 00091 #endif 00092 00093 return Output; 00094 } 00095 00096 00101 template <typename In, typename Out> 00102 inline Out Unicode::UTF8ToUTF16(In Begin, In End, Out Output, Uint16 Replacement) 00103 { 00104 while (Begin < End) 00105 { 00106 Uint32 c = 0; 00107 int TrailingBytes = UTF8TrailingBytes[static_cast<int>(*Begin)]; 00108 if (Begin + TrailingBytes < End) 00109 { 00110 // First decode the UTF-8 character 00111 switch (TrailingBytes) 00112 { 00113 case 5 : c += *Begin++; c <<= 6; 00114 case 4 : c += *Begin++; c <<= 6; 00115 case 3 : c += *Begin++; c <<= 6; 00116 case 2 : c += *Begin++; c <<= 6; 00117 case 1 : c += *Begin++; c <<= 6; 00118 case 0 : c += *Begin++; 00119 } 00120 c -= UTF8Offsets[TrailingBytes]; 00121 00122 // Then encode it in UTF-16 00123 if (c < 0xFFFF) 00124 { 00125 // Character can be converted directly to 16 bits, just need to check it's in the valid range 00126 if ((c >= 0xD800) && (c <= 0xDFFF)) 00127 { 00128 // Invalid character (this range is reserved) 00129 if (Replacement) 00130 *Output++ = Replacement; 00131 } 00132 else 00133 { 00134 // Valid character directly convertible to 16 bits 00135 *Output++ = static_cast<Uint16>(c); 00136 } 00137 } 00138 else if (c > 0x0010FFFF) 00139 { 00140 // Invalid character (greater than the maximum unicode value) 00141 if (Replacement) 00142 *Output++ = Replacement; 00143 } 00144 else 00145 { 00146 // Character will be converted to 2 UTF-16 elements 00147 c -= 0x0010000; 00148 *Output++ = static_cast<Uint16>((c >> 10) + 0xD800); 00149 *Output++ = static_cast<Uint16>((c & 0x3FFUL) + 0xDC00); 00150 } 00151 } 00152 } 00153 00154 return Output; 00155 } 00156 00157 00162 template <typename In, typename Out> 00163 inline Out Unicode::UTF8ToUTF32(In Begin, In End, Out Output, Uint32 Replacement) 00164 { 00165 while (Begin < End) 00166 { 00167 Uint32 c = 0; 00168 int TrailingBytes = UTF8TrailingBytes[static_cast<int>(*Begin)]; 00169 if (Begin + TrailingBytes < End) 00170 { 00171 // First decode the UTF-8 character 00172 switch (TrailingBytes) 00173 { 00174 case 5 : c += *Begin++; c <<= 6; 00175 case 4 : c += *Begin++; c <<= 6; 00176 case 3 : c += *Begin++; c <<= 6; 00177 case 2 : c += *Begin++; c <<= 6; 00178 case 1 : c += *Begin++; c <<= 6; 00179 case 0 : c += *Begin++; 00180 } 00181 c -= UTF8Offsets[TrailingBytes]; 00182 00183 // Then write it if valid 00184 if ((c < 0xD800) || (c > 0xDFFF)) 00185 { 00186 // Valid UTF-32 character 00187 *Output++ = c; 00188 } 00189 else 00190 { 00191 // Invalid UTF-32 character 00192 if (Replacement) 00193 *Output++ = Replacement; 00194 } 00195 } 00196 } 00197 00198 return Output; 00199 } 00200 00201 00206 template <typename In, typename Out> 00207 inline Out Unicode::UTF16ToUTF8(In Begin, In End, Out Output, Uint8 Replacement) 00208 { 00209 while (Begin < End) 00210 { 00211 Uint32 c = *Begin++; 00212 00213 // If it's a surrogate pair, first convert to a single UTF-32 character 00214 if ((c >= 0xD800) && (c <= 0xDBFF)) 00215 { 00216 if (Begin < End) 00217 { 00218 // The second element is valid : convert the two elements to a UTF-32 character 00219 Uint32 d = *Begin++; 00220 if ((d >= 0xDC00) && (d <= 0xDFFF)) 00221 c = static_cast<Uint32>(((c - 0xD800) << 10) + (d - 0xDC00) + 0x0010000); 00222 } 00223 else 00224 { 00225 // Invalid second element 00226 if (Replacement) 00227 *Output++ = Replacement; 00228 } 00229 } 00230 00231 // Then convert to UTF-8 00232 if (c > 0x0010FFFF) 00233 { 00234 // Invalid character (greater than the maximum unicode value) 00235 if (Replacement) 00236 *Output++ = Replacement; 00237 } 00238 else 00239 { 00240 // Valid character 00241 00242 // Get number of bytes to write 00243 int BytesToWrite = 1; 00244 if (c < 0x80) BytesToWrite = 1; 00245 else if (c < 0x800) BytesToWrite = 2; 00246 else if (c < 0x10000) BytesToWrite = 3; 00247 else if (c <= 0x0010FFFF) BytesToWrite = 4; 00248 00249 // Extract bytes to write 00250 Uint8 Bytes[4]; 00251 switch (BytesToWrite) 00252 { 00253 case 4 : Bytes[3] = static_cast<Uint8>((c | 0x80) & 0xBF); c >>= 6; 00254 case 3 : Bytes[2] = static_cast<Uint8>((c | 0x80) & 0xBF); c >>= 6; 00255 case 2 : Bytes[1] = static_cast<Uint8>((c | 0x80) & 0xBF); c >>= 6; 00256 case 1 : Bytes[0] = static_cast<Uint8> (c | UTF8FirstBytes[BytesToWrite]); 00257 } 00258 00259 // Add them to the output 00260 const Uint8* CurByte = Bytes; 00261 switch (BytesToWrite) 00262 { 00263 case 4 : *Output++ = *CurByte++; 00264 case 3 : *Output++ = *CurByte++; 00265 case 2 : *Output++ = *CurByte++; 00266 case 1 : *Output++ = *CurByte++; 00267 } 00268 } 00269 } 00270 00271 return Output; 00272 } 00273 00274 00279 template <typename In, typename Out> 00280 inline Out Unicode::UTF16ToUTF32(In Begin, In End, Out Output, Uint32 Replacement) 00281 { 00282 while (Begin < End) 00283 { 00284 Uint16 c = *Begin++; 00285 if ((c >= 0xD800) && (c <= 0xDBFF)) 00286 { 00287 // We have a surrogate pair, ie. a character composed of two elements 00288 if (Begin < End) 00289 { 00290 Uint16 d = *Begin++; 00291 if ((d >= 0xDC00) && (d <= 0xDFFF)) 00292 { 00293 // The second element is valid : convert the two elements to a UTF-32 character 00294 *Output++ = static_cast<Uint32>(((c - 0xD800) << 10) + (d - 0xDC00) + 0x0010000); 00295 } 00296 else 00297 { 00298 // Invalid second element 00299 if (Replacement) 00300 *Output++ = Replacement; 00301 } 00302 } 00303 } 00304 else if ((c >= 0xDC00) && (c <= 0xDFFF)) 00305 { 00306 // Invalid character 00307 if (Replacement) 00308 *Output++ = Replacement; 00309 } 00310 else 00311 { 00312 // Valid character directly convertible to UTF-32 00313 *Output++ = static_cast<Uint32>(c); 00314 } 00315 } 00316 00317 return Output; 00318 } 00319 00320 00325 template <typename In, typename Out> 00326 inline Out Unicode::UTF32ToUTF8(In Begin, In End, Out Output, Uint8 Replacement) 00327 { 00328 while (Begin < End) 00329 { 00330 Uint32 c = *Begin++; 00331 if (c > 0x0010FFFF) 00332 { 00333 // Invalid character (greater than the maximum unicode value) 00334 if (Replacement) 00335 *Output++ = Replacement; 00336 } 00337 else 00338 { 00339 // Valid character 00340 00341 // Get number of bytes to write 00342 int BytesToWrite = 1; 00343 if (c < 0x80) BytesToWrite = 1; 00344 else if (c < 0x800) BytesToWrite = 2; 00345 else if (c < 0x10000) BytesToWrite = 3; 00346 else if (c <= 0x0010FFFF) BytesToWrite = 4; 00347 00348 // Extract bytes to write 00349 Uint8 Bytes[4]; 00350 switch (BytesToWrite) 00351 { 00352 case 4 : Bytes[3] = static_cast<Uint8>((c | 0x80) & 0xBF); c >>= 6; 00353 case 3 : Bytes[2] = static_cast<Uint8>((c | 0x80) & 0xBF); c >>= 6; 00354 case 2 : Bytes[1] = static_cast<Uint8>((c | 0x80) & 0xBF); c >>= 6; 00355 case 1 : Bytes[0] = static_cast<Uint8> (c | UTF8FirstBytes[BytesToWrite]); 00356 } 00357 00358 // Add them to the output 00359 const Uint8* CurByte = Bytes; 00360 switch (BytesToWrite) 00361 { 00362 case 4 : *Output++ = *CurByte++; 00363 case 3 : *Output++ = *CurByte++; 00364 case 2 : *Output++ = *CurByte++; 00365 case 1 : *Output++ = *CurByte++; 00366 } 00367 } 00368 } 00369 00370 return Output; 00371 } 00372 00373 00378 template <typename In, typename Out> 00379 inline Out Unicode::UTF32ToUTF16(In Begin, In End, Out Output, Uint16 Replacement) 00380 { 00381 while (Begin < End) 00382 { 00383 Uint32 c = *Begin++; 00384 if (c < 0xFFFF) 00385 { 00386 // Character can be converted directly to 16 bits, just need to check it's in the valid range 00387 if ((c >= 0xD800) && (c <= 0xDFFF)) 00388 { 00389 // Invalid character (this range is reserved) 00390 if (Replacement) 00391 *Output++ = Replacement; 00392 } 00393 else 00394 { 00395 // Valid character directly convertible to 16 bits 00396 *Output++ = static_cast<Uint16>(c); 00397 } 00398 } 00399 else if (c > 0x0010FFFF) 00400 { 00401 // Invalid character (greater than the maximum unicode value) 00402 if (Replacement) 00403 *Output++ = Replacement; 00404 } 00405 else 00406 { 00407 // Character will be converted to 2 UTF-16 elements 00408 c -= 0x0010000; 00409 *Output++ = static_cast<Uint16>((c >> 10) + 0xD800); 00410 *Output++ = static_cast<Uint16>((c & 0x3FFUL) + 0xDC00); 00411 } 00412 } 00413 00414 return Output; 00415 } 00416 00417 00421 template <typename In> 00422 inline std::size_t Unicode::GetUTF8Length(In Begin, In End) 00423 { 00424 std::size_t Length = 0; 00425 while (Begin < End) 00426 { 00427 int NbBytes = UTF8TrailingBytes[static_cast<int>(*Begin)]; 00428 if (Begin + NbBytes < End) 00429 ++Length; 00430 00431 Begin += NbBytes + 1; 00432 } 00433 00434 return Length; 00435 } 00436 00437 00441 template <typename In> 00442 inline std::size_t Unicode::GetUTF16Length(In Begin, In End) 00443 { 00444 std::size_t Length = 0; 00445 while (Begin < End) 00446 { 00447 if ((*Begin >= 0xD800) && (*Begin <= 0xDBFF)) 00448 { 00449 ++Begin; 00450 if ((Begin < End) && ((*Begin >= 0xDC00) && (*Begin <= 0xDFFF))) 00451 { 00452 ++Length; 00453 } 00454 } 00455 else 00456 { 00457 ++Length; 00458 } 00459 00460 ++Begin; 00461 } 00462 00463 return Length; 00464 } 00465 00466 00470 template <typename In> 00471 inline std::size_t Unicode::GetUTF32Length(In Begin, In End) 00472 { 00473 return End - Begin; 00474 }
:: Copyright © 2007-2008 Laurent Gomila, all rights reserved :: Documentation generated by doxygen 1.5.2 ::