PageRenderTime 24ms CodeModel.GetById 14ms app.highlight 4ms RepoModel.GetById 1ms app.codeStats 0ms

/extlibs/SFML/include/SFML/System/Utf.inl

https://bitbucket.org/hugoruscitti/pilascpp
C++ Header | 671 lines | 445 code | 124 blank | 102 comment | 67 complexity | 02d108254fa111f7422014451bae706f MD5 | raw file
  1////////////////////////////////////////////////////////////
  2//
  3// SFML - Simple and Fast Multimedia Library
  4// Copyright (C) 2007-2009 Laurent Gomila (laurent.gom@gmail.com)
  5//
  6// This software is provided 'as-is', without any express or implied warranty.
  7// In no event will the authors be held liable for any damages arising from the use of this software.
  8//
  9// Permission is granted to anyone to use this software for any purpose,
 10// including commercial applications, and to alter it and redistribute it freely,
 11// subject to the following restrictions:
 12//
 13// 1. The origin of this software must not be misrepresented;
 14//    you must not claim that you wrote the original software.
 15//    If you use this software in a product, an acknowledgment
 16//    in the product documentation would be appreciated but is not required.
 17//
 18// 2. Altered source versions must be plainly marked as such,
 19//    and must not be misrepresented as being the original software.
 20//
 21// 3. This notice may not be removed or altered from any source distribution.
 22//
 23////////////////////////////////////////////////////////////
 24
 25
 26////////////////////////////////////////////////////////////
 27template <typename In>
 28In Utf<8>::Decode(In begin, In end, Uint32& output, Uint32 replacement)
 29{
 30    // Some useful precomputed data
 31    static const int trailing[256] =
 32    {
 33        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 34        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 35        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 36        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 37        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 38        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 39        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 40        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5
 41    };
 42    static const Uint32 offsets[6] =
 43    {
 44        0x00000000, 0x00003080, 0x000E2080, 0x03C82080, 0xFA082080, 0x82082080
 45    };
 46
 47    // Decode the character
 48    int trailingBytes = trailing[static_cast<Uint8>(*begin)];
 49    if (begin + trailingBytes < end)
 50    {
 51        output = 0;
 52        switch (trailingBytes)
 53        {
 54            case 5 : output += static_cast<Uint8>(*begin++); output <<= 6;
 55            case 4 : output += static_cast<Uint8>(*begin++); output <<= 6;
 56            case 3 : output += static_cast<Uint8>(*begin++); output <<= 6;
 57            case 2 : output += static_cast<Uint8>(*begin++); output <<= 6;
 58            case 1 : output += static_cast<Uint8>(*begin++); output <<= 6;
 59            case 0 : output += static_cast<Uint8>(*begin++);
 60        }
 61        output -= offsets[trailingBytes];
 62    }
 63    else
 64    {
 65        // Incomplete character
 66        begin = end;
 67        output = replacement;
 68    }
 69
 70    return begin;
 71}
 72
 73
 74////////////////////////////////////////////////////////////
 75template <typename Out>
 76Out Utf<8>::Encode(Uint32 input, Out output, Uint8 replacement)
 77{
 78    // Some useful precomputed data
 79    static const Uint8 firstBytes[7] =
 80    {
 81        0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC
 82    };
 83
 84    // Encode the character
 85    if ((input > 0x0010FFFF) || ((input >= 0xD800) && (input <= 0xDBFF)))
 86    {
 87        // Invalid character
 88        if (replacement)
 89            *output++ = replacement;
 90    }
 91    else
 92    {
 93        // Valid character
 94
 95        // Get the number of bytes to write
 96        int bytesToWrite = 1;
 97        if      (input <  0x80)       bytesToWrite = 1;
 98        else if (input <  0x800)      bytesToWrite = 2;
 99        else if (input <  0x10000)    bytesToWrite = 3;
100        else if (input <= 0x0010FFFF) bytesToWrite = 4;
101
102        // Extract the bytes to write
103        Uint8 bytes[4];
104        switch (bytesToWrite)
105        {
106            case 4 : bytes[3] = static_cast<Uint8>((input | 0x80) & 0xBF); input >>= 6;
107            case 3 : bytes[2] = static_cast<Uint8>((input | 0x80) & 0xBF); input >>= 6;
108            case 2 : bytes[1] = static_cast<Uint8>((input | 0x80) & 0xBF); input >>= 6;
109            case 1 : bytes[0] = static_cast<Uint8> (input | firstBytes[bytesToWrite]);
110        }
111
112        // Add them to the output
113        const Uint8* currentByte = bytes;
114        switch (bytesToWrite)
115        {
116            case 4 : *output++ = *currentByte++;
117            case 3 : *output++ = *currentByte++;
118            case 2 : *output++ = *currentByte++;
119            case 1 : *output++ = *currentByte++;
120        }
121    }
122
123    return output;
124}
125
126
127////////////////////////////////////////////////////////////
128template <typename In>
129In Utf<8>::Next(In begin, In end)
130{
131    Uint32 codepoint;
132    return Decode(begin, end, codepoint);
133}
134
135
136////////////////////////////////////////////////////////////
137template <typename In>
138std::size_t Utf<8>::Count(In begin, In end)
139{
140    std::size_t length = 0;
141    while (begin < end)
142    {
143        begin = Next(begin, end);
144        ++length;
145    }
146
147    return length;
148}
149
150
151////////////////////////////////////////////////////////////
152template <typename In, typename Out>
153Out Utf<8>::FromAnsi(In begin, In end, Out output, const std::locale& locale)
154{
155    while (begin < end)
156    {
157        Uint32 codepoint = Utf<32>::DecodeAnsi(*begin++, locale);
158        output = Encode(codepoint, output);
159    }
160
161    return output;
162}
163
164
165////////////////////////////////////////////////////////////
166template <typename In, typename Out>
167Out Utf<8>::FromWide(In begin, In end, Out output)
168{
169    while (begin < end)
170    {
171        Uint32 codepoint = Utf<32>::DecodeWide(*begin++);
172        output = Encode(codepoint, output);
173    }
174
175    return output;
176}
177
178
179////////////////////////////////////////////////////////////
180template <typename In, typename Out>
181Out Utf<8>::ToAnsi(In begin, In end, Out output, char replacement, const std::locale& locale)
182{
183    while (begin < end)
184    {
185        Uint32 codepoint;
186        begin = Decode(begin, end, codepoint);
187        output = Utf<32>::EncodeAnsi(codepoint, output, replacement, locale);
188    }
189
190    return output;
191}
192
193
194////////////////////////////////////////////////////////////
195template <typename In, typename Out>
196Out Utf<8>::ToWide(In begin, In end, Out output, wchar_t replacement)
197{
198    while (begin < end)
199    {
200        Uint32 codepoint;
201        begin = Decode(begin, end, codepoint);
202        output = Utf<32>::EncodeWide(codepoint, output, replacement);
203    }
204
205    return output;
206}
207
208
209////////////////////////////////////////////////////////////
210template <typename In, typename Out>
211Out Utf<8>::ToUtf8(In begin, In end, Out output)
212{
213    while (begin < end)
214        *output++ = *begin++;
215
216    return output;
217}
218
219
220////////////////////////////////////////////////////////////
221template <typename In, typename Out>
222Out Utf<8>::ToUtf16(In begin, In end, Out output)
223{
224    while (begin < end)
225    {
226        Uint32 codepoint;
227        begin = Decode(begin, end, codepoint);
228        output = Utf<16>::Encode(codepoint, output);
229    }
230
231    return output;
232}
233
234
235////////////////////////////////////////////////////////////
236template <typename In, typename Out>
237Out Utf<8>::ToUtf32(In begin, In end, Out output)
238{
239    while (begin < end)
240    {
241        Uint32 codepoint;
242        begin = Decode(begin, end, codepoint);
243        *output++ = codepoint;
244    }
245
246    return output;
247}
248
249
250////////////////////////////////////////////////////////////
251template <typename In>
252In Utf<16>::Decode(In begin, In end, Uint32& output, Uint32 replacement)
253{
254    Uint16 first = *begin++;
255
256    // If it's a surrogate pair, first convert to a single UTF-32 character
257    if ((first >= 0xD800) && (first <= 0xDBFF))
258    {
259        if (begin < end)
260        {
261            Uint32 second = *begin++;
262            if ((second >= 0xDC00) && (second <= 0xDFFF))
263            {
264                // The second element is valid: convert the two elements to a UTF-32 character
265                output = static_cast<Uint32>(((first - 0xD800) << 10) + (second - 0xDC00) + 0x0010000);
266            }
267            else
268            {
269                // Invalid character
270                output = replacement;
271            }
272        }
273        else
274        {
275            // Invalid character
276            begin = end;
277            output = replacement;
278        }
279    }
280    else
281    {
282        // We can make a direct copy
283        output = first;
284    }
285
286    return begin;
287}
288
289
290////////////////////////////////////////////////////////////
291template <typename Out>
292Out Utf<16>::Encode(Uint32 input, Out output, Uint16 replacement)
293{
294    if (input < 0xFFFF)
295    {
296        // The character can be copied directly, we just need to check if it's in the valid range
297        if ((input >= 0xD800) && (input <= 0xDFFF))
298        {
299            // Invalid character (this range is reserved)
300            if (replacement)
301                *output++ = replacement;
302        }
303        else
304        {
305            // Valid character directly convertible to a single UTF-16 character
306            *output++ = static_cast<Uint16>(input);
307        }
308    }
309    else if (input > 0x0010FFFF)
310    {
311        // Invalid character (greater than the maximum unicode value)
312        if (replacement)
313            *output++ = replacement;
314    }
315    else
316    {
317        // The input character will be converted to two UTF-16 elements
318        input -= 0x0010000;
319        *output++ = static_cast<Uint16>((input >> 10)     + 0xD800);
320        *output++ = static_cast<Uint16>((input & 0x3FFUL) + 0xDC00);
321    }
322
323    return output;
324}
325
326
327////////////////////////////////////////////////////////////
328template <typename In>
329In Utf<16>::Next(In begin, In end)
330{
331    Uint32 codepoint;
332    return Decode(begin, end, codepoint);
333}
334
335
336////////////////////////////////////////////////////////////
337template <typename In>
338std::size_t Utf<16>::Count(In begin, In end)
339{
340    std::size_t length = 0;
341    while (begin < end)
342    {
343        begin = Next(begin, end);
344        ++length;
345    }
346
347    return length;
348}
349
350
351////////////////////////////////////////////////////////////
352template <typename In, typename Out>
353Out Utf<16>::FromAnsi(In begin, In end, Out output, const std::locale& locale)
354{
355    while (begin < end)
356    {
357        Uint32 codepoint = Utf<32>::DecodeAnsi(*begin++, locale);
358        output = Encode(codepoint, output);
359    }
360
361    return output;
362}
363
364
365////////////////////////////////////////////////////////////
366template <typename In, typename Out>
367Out Utf<16>::FromWide(In begin, In end, Out output)
368{
369    while (begin < end)
370    {
371        Uint32 codepoint = Utf<32>::DecodeWide(*begin++);
372        output = Encode(codepoint, output);
373    }
374
375    return output;
376}
377
378
379////////////////////////////////////////////////////////////
380template <typename In, typename Out>
381Out Utf<16>::ToAnsi(In begin, In end, Out output, char replacement, const std::locale& locale)
382{
383    while (begin < end)
384    {
385        Uint32 codepoint;
386        begin = Decode(begin, end, codepoint);
387        output = Utf<32>::EncodeAnsi(codepoint, output, replacement, locale);
388    }
389
390    return output;
391}
392
393
394////////////////////////////////////////////////////////////
395template <typename In, typename Out>
396Out Utf<16>::ToWide(In begin, In end, Out output, wchar_t replacement)
397{
398    while (begin < end)
399    {
400        Uint32 codepoint;
401        begin = Decode(begin, end, codepoint);
402        output = Utf<32>::EncodeWide(codepoint, output, replacement);
403    }
404
405    return output;
406}
407
408
409////////////////////////////////////////////////////////////
410template <typename In, typename Out>
411Out Utf<16>::ToUtf8(In begin, In end, Out output)
412{
413    while (begin < end)
414    {
415        Uint32 codepoint;
416        begin = Decode(begin, end, codepoint);
417        output = Utf<8>::Encode(codepoint, output);
418    }
419
420    return output;
421}
422
423
424////////////////////////////////////////////////////////////
425template <typename In, typename Out>
426Out Utf<16>::ToUtf16(In begin, In end, Out output)
427{
428    while (begin < end)
429        *output++ = *begin++;
430
431    return output;
432}
433
434
435////////////////////////////////////////////////////////////
436template <typename In, typename Out>
437Out Utf<16>::ToUtf32(In begin, In end, Out output)
438{
439    while (begin < end)
440    {
441        Uint32 codepoint;
442        begin = Decode(begin, end, codepoint);
443        *output++ = codepoint;
444    }
445
446    return output;
447}
448
449
450////////////////////////////////////////////////////////////
451template <typename In>
452In Utf<32>::Decode(In begin, In end, Uint32& output, Uint32)
453{
454    output = *begin++;
455    return begin;
456}
457
458
459////////////////////////////////////////////////////////////
460template <typename Out>
461Out Utf<32>::Encode(Uint32 input, Out output, Uint32 replacement)
462{
463    *output++ = input;
464    return output;
465}
466
467
468////////////////////////////////////////////////////////////
469template <typename In>
470In Utf<32>::Next(In begin, In end)
471{
472    return ++begin;
473}
474
475
476////////////////////////////////////////////////////////////
477template <typename In>
478std::size_t Utf<32>::Count(In begin, In end)
479{
480    return begin - end;
481}
482
483
484////////////////////////////////////////////////////////////
485template <typename In, typename Out>
486Out Utf<32>::FromAnsi(In begin, In end, Out output, const std::locale& locale)
487{
488    while (begin < end)
489        *output++ = DecodeAnsi(*begin++, locale);
490
491    return output;
492}
493
494
495////////////////////////////////////////////////////////////
496template <typename In, typename Out>
497Out Utf<32>::FromWide(In begin, In end, Out output)
498{
499    while (begin < end)
500        *output++ = DecodeWide(*begin++);
501
502    return output;
503}
504
505
506////////////////////////////////////////////////////////////
507template <typename In, typename Out>
508Out Utf<32>::ToAnsi(In begin, In end, Out output, char replacement, const std::locale& locale)
509{
510    while (begin < end)
511        output = EncodeAnsi(*begin++, output, replacement, locale);
512
513    return output;
514}
515
516
517////////////////////////////////////////////////////////////
518template <typename In, typename Out>
519Out Utf<32>::ToWide(In begin, In end, Out output, wchar_t replacement)
520{
521    while (begin < end)
522        output = EncodeWide(*begin++, output, replacement);
523
524    return output;
525}
526
527
528////////////////////////////////////////////////////////////
529template <typename In, typename Out>
530Out Utf<32>::ToUtf8(In begin, In end, Out output)
531{
532    while (begin < end)
533        output = Utf<8>::Encode(*begin++, output);
534
535    return output;
536}
537
538////////////////////////////////////////////////////////////
539template <typename In, typename Out>
540Out Utf<32>::ToUtf16(In begin, In end, Out output)
541{
542    while (begin < end)
543        output = Utf<16>::Encode(*begin++, output);
544
545    return output;
546}
547
548
549////////////////////////////////////////////////////////////
550template <typename In, typename Out>
551Out Utf<32>::ToUtf32(In begin, In end, Out output)
552{
553    while (begin < end)
554        *output++ = *begin++;
555
556    return output;
557}
558
559
560////////////////////////////////////////////////////////////
561template <typename In>
562Uint32 Utf<32>::DecodeAnsi(In input, const std::locale& locale)
563{
564    // On Windows, gcc's standard library (glibc++) has almost
565    // no support for Unicode stuff. As a consequence, in this
566    // context we can only use the default locale and ignore
567    // the one passed as parameter.
568
569    #if defined(SFML_SYSTEM_WINDOWS) &&                       /* if Windows ... */                          \
570       (defined(__GLIBCPP__) || defined (__GLIBCXX__)) &&     /* ... and standard library is glibc++ ... */ \
571      !(defined(__SGI_STL_PORT) || defined(_STLPORT_VERSION)) /* ... and STLPort is not used on top of it */
572
573        wchar_t character = 0;
574        mbtowc(&character, &input, 1);
575        return static_cast<Uint32>(character);
576
577    #else
578
579        // Get the facet of the locale which deals with character conversion
580        const std::ctype<wchar_t>& facet = std::use_facet< std::ctype<wchar_t> >(locale);
581
582        // Use the facet to convert each character of the input string
583        return static_cast<Uint32>(facet.widen(input));
584
585    #endif
586}
587
588
589////////////////////////////////////////////////////////////
590template <typename In>
591Uint32 Utf<32>::DecodeWide(In input)
592{
593    // The encoding of wide characters is not well defined and is left to the system;
594    // however we can safely assume that it is UCS-2 on Windows and
595    // UCS-4 on Unix systems.
596    // In both cases, a simple copy is enough (UCS-2 is a subset of UCS-4,
597    // and UCS-4 *is* UTF-32).
598
599    return input;
600}
601
602
603////////////////////////////////////////////////////////////
604template <typename Out>
605Out Utf<32>::EncodeAnsi(Uint32 codepoint, Out output, char replacement, const std::locale& locale)
606{
607    // On Windows, gcc's standard library (glibc++) has almost
608    // no support for Unicode stuff. As a consequence, in this
609    // context we can only use the default locale and ignore
610    // the one passed as parameter.
611
612    #if defined(SFML_SYSTEM_WINDOWS) &&                       /* if Windows ... */                          \
613       (defined(__GLIBCPP__) || defined (__GLIBCXX__)) &&     /* ... and standard library is glibc++ ... */ \
614      !(defined(__SGI_STL_PORT) || defined(_STLPORT_VERSION)) /* ... and STLPort is not used on top of it */
615
616        char character = 0;
617        if (wctomb(&character, static_cast<wchar_t>(codepoint)) >= 0)
618            *output++ = character;
619        else if (replacement)
620            *output++ = replacement;
621
622        return output;
623
624    #else
625
626        // Get the facet of the locale which deals with character conversion
627        const std::ctype<wchar_t>& facet = std::use_facet< std::ctype<wchar_t> >(locale);
628
629        // Use the facet to convert each character of the input string
630        *output++ = facet.narrow(static_cast<wchar_t>(codepoint), replacement);
631
632        return output;
633
634    #endif
635}
636
637
638////////////////////////////////////////////////////////////
639template <typename Out>
640Out Utf<32>::EncodeWide(Uint32 codepoint, Out output, wchar_t replacement)
641{
642    // The encoding of wide characters is not well defined and is left to the system;
643    // however we can safely assume that it is UCS-2 on Windows and
644    // UCS-4 on Unix systems.
645    // For UCS-2 we need to check if the source characters fits in (UCS-2 is a subset of UCS-4).
646    // For UCS-4 we can do a direct copy (UCS-4 *is* UTF-32).
647
648    switch (sizeof(wchar_t))
649    {
650        case 4:
651        {
652            *output++ = static_cast<wchar_t>(codepoint);
653            break;
654        }
655
656        default:
657        {
658            if ((codepoint <= 0xFFFF) && ((codepoint < 0xD800) || (codepoint > 0xDFFF)))
659            {
660                *output++ = static_cast<wchar_t>(codepoint);
661            }
662            else if (replacement)
663            {
664                *output++ = replacement;
665            }
666            break;
667        }
668    }
669
670    return output;
671}