adding -w option to Yuescript tool.

author: Li Jin <dragon-fly@qq.com> 2022-11-15 17:23:46 +0800
committer: Li Jin <dragon-fly@qq.com> 2022-11-15 17:52:09 +0800
commit: 94f8330613877b3582d32bd11abd83a97b4399ad (patch)
tree: 5359de314be1ebde17f8d1e48632a97d18f9e50f /src/3rdParty/efsw/Utf.inl
parent: 60f8f00a022ac08701792b2897b72d8c99b50f52 (diff)
download: yuescript-94f8330613877b3582d32bd11abd83a97b4399ad.tar.gz
yuescript-94f8330613877b3582d32bd11abd83a97b4399ad.tar.bz2
yuescript-94f8330613877b3582d32bd11abd83a97b4399ad.zip
1 files changed, 576 insertions, 0 deletions
diff --git a/src/3rdParty/efsw/Utf.inl b/src/3rdParty/efsw/Utf.inl
new file mode 100755
index 0000000..7e3e9d6
--- /dev/null
+++ b/src/3rdParty/efsw/Utf.inl
@@ -0,0 +1,576 @@
+// References :
+// http://www.unicode.org/
+// http://www.unicode.org/Public/PROGRAMS/CVTUTF/ConvertUTF.c
+// http://www.unicode.org/Public/PROGRAMS/CVTUTF/ConvertUTF.h
+// http://people.w3.org/rishida/scripts/uniview/conversion
+////////////////////////////////////////////////////////////
+template <typename In> In Utf<8>::Decode( In begin, In end, Uint32& output, Uint32 replacement ) {
+        // Some useful precomputed data
+        static const int trailing[256] = {
+                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2,
+                2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5 };
+        static const Uint32 offsets[6] = { 0x00000000, 0x00003080, 0x000E2080,
+                                                                           0x03C82080, 0xFA082080, 0x82082080 };
+        // Decode the character
+        int trailingBytes = trailing[static_cast<Uint8>( *begin )];
+        if ( begin + trailingBytes < end ) {
+                output = 0;
+                switch ( trailingBytes ) {
+                        case 5:
+                                output += static_cast<Uint8>( *begin++ );
+                                output <<= 6;
+                        case 4:
+                                output += static_cast<Uint8>( *begin++ );
+                                output <<= 6;
+                        case 3:
+                                output += static_cast<Uint8>( *begin++ );
+                                output <<= 6;
+                        case 2:
+                                output += static_cast<Uint8>( *begin++ );
+                                output <<= 6;
+                        case 1:
+                                output += static_cast<Uint8>( *begin++ );
+                                output <<= 6;
+                        case 0:
+                                output += static_cast<Uint8>( *begin++ );
+                }
+                output -= offsets[trailingBytes];
+        } else {
+                // Incomplete character
+                begin = end;
+                output = replacement;
+        }
+        return begin;
+}
+template <typename Out> Out Utf<8>::Encode( Uint32 input, Out output, Uint8 replacement ) {
+        // Some useful precomputed data
+        static const Uint8 firstBytes[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
+        // Encode the character
+        if ( ( input > 0x0010FFFF ) || ( ( input >= 0xD800 ) && ( input <= 0xDBFF ) ) ) {
+                // Invalid character
+                if ( replacement )
+                        *output++ = replacement;
+        } else {
+                // Valid character
+                // Get the number of bytes to write
+                int bytesToWrite = 1;
+                if ( input < 0x80 )
+                        bytesToWrite = 1;
+                else if ( input < 0x800 )
+                        bytesToWrite = 2;
+                else if ( input < 0x10000 )
+                        bytesToWrite = 3;
+                else if ( input <= 0x0010FFFF )
+                        bytesToWrite = 4;
+                // Extract the bytes to write
+                Uint8 bytes[4];
+                switch ( bytesToWrite ) {
+                        case 4:
+                                bytes[3] = static_cast<Uint8>( ( input | 0x80 ) & 0xBF );
+                                input >>= 6;
+                        case 3:
+                                bytes[2] = static_cast<Uint8>( ( input | 0x80 ) & 0xBF );
+                                input >>= 6;
+                        case 2:
+                                bytes[1] = static_cast<Uint8>( ( input | 0x80 ) & 0xBF );
+                                input >>= 6;
+                        case 1:
+                                bytes[0] = static_cast<Uint8>( input | firstBytes[bytesToWrite] );
+                }
+                // Add them to the output
+                const Uint8* currentByte = bytes;
+                switch ( bytesToWrite ) {
+                        case 4:
+                                *output++ = *currentByte++;
+                        case 3:
+                                *output++ = *currentByte++;
+                        case 2:
+                                *output++ = *currentByte++;
+                        case 1:
+                                *output++ = *currentByte++;
+                }
+        }
+        return output;
+}
+template <typename In> In Utf<8>::Next( In begin, In end ) {
+        Uint32 codepoint;
+        return Decode( begin, end, codepoint );
+}
+template <typename In> std::size_t Utf<8>::Count( In begin, In end ) {
+        std::size_t length = 0;
+        while ( begin < end ) {
+                begin = Next( begin, end );
+                ++length;
+        }
+        return length;
+}
+template <typename In, typename Out>
+Out Utf<8>::FromAnsi( In begin, In end, Out output, const std::locale& locale ) {
+        while ( begin < end ) {
+                Uint32 codepoint = Utf<32>::DecodeAnsi( *begin++, locale );
+                output = Encode( codepoint, output );
+        }
+        return output;
+}
+template <typename In, typename Out> Out Utf<8>::FromWide( In begin, In end, Out output ) {
+        while ( begin < end ) {
+                Uint32 codepoint = Utf<32>::DecodeWide( *begin++ );
+                output = Encode( codepoint, output );
+        }
+        return output;
+}
+template <typename In, typename Out> Out Utf<8>::FromLatin1( In begin, In end, Out output ) {
+        // Latin-1 is directly compatible with Unicode encodings,
+        // and can thus be treated as (a sub-range of) UTF-32
+        while ( begin < end )
+                output = Encode( *begin++, output );
+        return output;
+}
+template <typename In, typename Out>
+Out Utf<8>::ToAnsi( In begin, In end, Out output, char replacement, const std::locale& locale ) {
+        while ( begin < end ) {
+                Uint32 codepoint;
+                begin = Decode( begin, end, codepoint );
+                output = Utf<32>::EncodeAnsi( codepoint, output, replacement, locale );
+        }
+        return output;
+}
+#ifndef EFSW_NO_WIDECHAR
+template <typename In, typename Out>
+Out Utf<8>::ToWide( In begin, In end, Out output, wchar_t replacement ) {
+        while ( begin < end ) {
+                Uint32 codepoint;
+                begin = Decode( begin, end, codepoint );
+                output = Utf<32>::EncodeWide( codepoint, output, replacement );
+        }
+        return output;
+}
+#endif
+template <typename In, typename Out>
+Out Utf<8>::ToLatin1( In begin, In end, Out output, char replacement ) {
+        // Latin-1 is directly compatible with Unicode encodings,
+        // and can thus be treated as (a sub-range of) UTF-32
+        while ( begin < end ) {
+                Uint32 codepoint;
+                begin = Decode( begin, end, codepoint );
+                *output++ = codepoint < 256 ? static_cast<char>( codepoint ) : replacement;
+        }
+        return output;
+}
+template <typename In, typename Out> Out Utf<8>::toUtf8( In begin, In end, Out output ) {
+        while ( begin < end )
+                *output++ = *begin++;
+        return output;
+}
+template <typename In, typename Out> Out Utf<8>::ToUtf16( In begin, In end, Out output ) {
+        while ( begin < end ) {
+                Uint32 codepoint;
+                begin = Decode( begin, end, codepoint );
+                output = Utf<16>::Encode( codepoint, output );
+        }
+        return output;
+}
+template <typename In, typename Out> Out Utf<8>::ToUtf32( In begin, In end, Out output ) {
+        while ( begin < end ) {
+                Uint32 codepoint;
+                begin = Decode( begin, end, codepoint );
+                *output++ = codepoint;
+        }
+        return output;
+}
+template <typename In> In Utf<16>::Decode( In begin, In end, Uint32& output, Uint32 replacement ) {
+        Uint16 first = *begin++;
+        // If it's a surrogate pair, first convert to a single UTF-32 character
+        if ( ( first >= 0xD800 ) && ( first <= 0xDBFF ) ) {
+                if ( begin < end ) {
+                        Uint32 second = *begin++;
+                        if ( ( second >= 0xDC00 ) && ( second <= 0xDFFF ) ) {
+                                // The second element is valid: convert the two elements to a UTF-32 character
+                                output = static_cast<Uint32>( ( ( first - 0xD800 ) << 10 ) + ( second - 0xDC00 ) +
+                                                                                          0x0010000 );
+                        } else {
+                                // Invalid character
+                                output = replacement;
+                        }
+                } else {
+                        // Invalid character
+                        begin = end;
+                        output = replacement;
+                }
+        } else {
+                // We can make a direct copy
+                output = first;
+        }
+        return begin;
+}
+template <typename Out> Out Utf<16>::Encode( Uint32 input, Out output, Uint16 replacement ) {
+        if ( input < 0xFFFF ) {
+                // The character can be copied directly, we just need to check if it's in the valid range
+                if ( ( input >= 0xD800 ) && ( input <= 0xDFFF ) ) {
+                        // Invalid character (this range is reserved)
+                        if ( replacement )
+                                *output++ = replacement;
+                } else {
+                        // Valid character directly convertible to a single UTF-16 character
+                        *output++ = static_cast<Uint16>( input );
+                }
+        } else if ( input > 0x0010FFFF ) {
+                // Invalid character (greater than the maximum unicode value)
+                if ( replacement )
+                        *output++ = replacement;
+        } else {
+                // The input character will be converted to two UTF-16 elements
+                input -= 0x0010000;
+                *output++ = static_cast<Uint16>( ( input >> 10 ) + 0xD800 );
+                *output++ = static_cast<Uint16>( ( input & 0x3FFUL ) + 0xDC00 );
+        }
+        return output;
+}
+template <typename In> In Utf<16>::Next( In begin, In end ) {
+        Uint32 codepoint;
+        return Decode( begin, end, codepoint );
+}
+template <typename In> std::size_t Utf<16>::Count( In begin, In end ) {
+        std::size_t length = 0;
+        while ( begin < end ) {
+                begin = Next( begin, end );
+                ++length;
+        }
+        return length;
+}
+template <typename In, typename Out>
+Out Utf<16>::FromAnsi( In begin, In end, Out output, const std::locale& locale ) {
+        while ( begin < end ) {
+                Uint32 codepoint = Utf<32>::DecodeAnsi( *begin++, locale );
+                output = Encode( codepoint, output );
+        }
+        return output;
+}
+template <typename In, typename Out> Out Utf<16>::FromWide( In begin, In end, Out output ) {
+        while ( begin < end ) {
+                Uint32 codepoint = Utf<32>::DecodeWide( *begin++ );
+                output = Encode( codepoint, output );
+        }
+        return output;
+}
+template <typename In, typename Out> Out Utf<16>::FromLatin1( In begin, In end, Out output ) {
+        // Latin-1 is directly compatible with Unicode encodings,
+        // and can thus be treated as (a sub-range of) UTF-32
+        while ( begin < end )
+                *output++ = *begin++;
+        return output;
+}
+template <typename In, typename Out>
+Out Utf<16>::ToAnsi( In begin, In end, Out output, char replacement, const std::locale& locale ) {
+        while ( begin < end ) {
+                Uint32 codepoint;
+                begin = Decode( begin, end, codepoint );
+                output = Utf<32>::EncodeAnsi( codepoint, output, replacement, locale );
+        }
+        return output;
+}
+#ifndef EFSW_NO_WIDECHAR
+template <typename In, typename Out>
+Out Utf<16>::ToWide( In begin, In end, Out output, wchar_t replacement ) {
+        while ( begin < end ) {
+                Uint32 codepoint;
+                begin = Decode( begin, end, codepoint );
+                output = Utf<32>::EncodeWide( codepoint, output, replacement );
+        }
+        return output;
+}
+#endif
+template <typename In, typename Out>
+Out Utf<16>::ToLatin1( In begin, In end, Out output, char replacement ) {
+        // Latin-1 is directly compatible with Unicode encodings,
+        // and can thus be treated as (a sub-range of) UTF-32
+        while ( begin < end ) {
+                *output++ = *begin < 256 ? static_cast<char>( *begin ) : replacement;
+                begin++;
+        }
+        return output;
+}
+template <typename In, typename Out> Out Utf<16>::toUtf8( In begin, In end, Out output ) {
+        while ( begin < end ) {
+                Uint32 codepoint;
+                begin = Decode( begin, end, codepoint );
+                output = Utf<8>::Encode( codepoint, output );
+        }
+        return output;
+}
+template <typename In, typename Out> Out Utf<16>::ToUtf16( In begin, In end, Out output ) {
+        while ( begin < end )
+                *output++ = *begin++;
+        return output;
+}
+template <typename In, typename Out> Out Utf<16>::ToUtf32( In begin, In end, Out output ) {
+        while ( begin < end ) {
+                Uint32 codepoint;
+                begin = Decode( begin, end, codepoint );
+                *output++ = codepoint;
+        }
+        return output;
+}
+template <typename In> In Utf<32>::Decode( In begin, In end, Uint32& output, Uint32 ) {
+        output = *begin++;
+        return begin;
+}
+template <typename Out> Out Utf<32>::Encode( Uint32 input, Out output, Uint32 replacement ) {
+        *output++ = input;
+        return output;
+}
+template <typename In> In Utf<32>::Next( In begin, In end ) {
+        return ++begin;
+}
+template <typename In> std::size_t Utf<32>::Count( In begin, In end ) {
+        return begin - end;
+}
+template <typename In, typename Out>
+Out Utf<32>::FromAnsi( In begin, In end, Out output, const std::locale& locale ) {
+        while ( begin < end )
+                *output++ = DecodeAnsi( *begin++, locale );
+        return output;
+}
+template <typename In, typename Out> Out Utf<32>::FromWide( In begin, In end, Out output ) {
+        while ( begin < end )
+                *output++ = DecodeWide( *begin++ );
+        return output;
+}
+template <typename In, typename Out> Out Utf<32>::FromLatin1( In begin, In end, Out output ) {
+        // Latin-1 is directly compatible with Unicode encodings,
+        // and can thus be treated as (a sub-range of) UTF-32
+        while ( begin < end )
+                *output++ = *begin++;
+        return output;
+}
+template <typename In, typename Out>
+Out Utf<32>::ToAnsi( In begin, In end, Out output, char replacement, const std::locale& locale ) {
+        while ( begin < end )
+                output = EncodeAnsi( *begin++, output, replacement, locale );
+        return output;
+}
+#ifndef EFSW_NO_WIDECHAR
+template <typename In, typename Out>
+Out Utf<32>::ToWide( In begin, In end, Out output, wchar_t replacement ) {
+        while ( begin < end )
+                output = EncodeWide( *begin++, output, replacement );
+        return output;
+}
+#endif
+template <typename In, typename Out>
+Out Utf<32>::ToLatin1( In begin, In end, Out output, char replacement ) {
+        // Latin-1 is directly compatible with Unicode encodings,
+        // and can thus be treated as (a sub-range of) UTF-32
+        while ( begin < end ) {
+                *output++ = *begin < 256 ? static_cast<char>( *begin ) : replacement;
+                begin++;
+        }
+        return output;
+}
+template <typename In, typename Out> Out Utf<32>::toUtf8( In begin, In end, Out output ) {
+        while ( begin < end )
+                output = Utf<8>::Encode( *begin++, output );
+        return output;
+}
+template <typename In, typename Out> Out Utf<32>::ToUtf16( In begin, In end, Out output ) {
+        while ( begin < end )
+                output = Utf<16>::Encode( *begin++, output );
+        return output;
+}
+template <typename In, typename Out> Out Utf<32>::ToUtf32( In begin, In end, Out output ) {
+        while ( begin < end )
+                *output++ = *begin++;
+        return output;
+}
+template <typename In> Uint32 Utf<32>::DecodeAnsi( In input, const std::locale& locale ) {
+        // On Windows, gcc's standard library (glibc++) has almost
+        // no support for Unicode stuff. As a consequence, in this
+        // context we can only use the default locale and ignore
+        // the one passed as parameter.
+#if EFSW_PLATFORM == EFSW_PLATFORM_WIN && /* if Windows ... */                  \
+        ( defined( __GLIBCPP__ ) ||                                                 \
+          defined( __GLIBCXX__ ) ) && /* ... and standard library is glibc++ ... */ \
+        !( defined( __SGI_STL_PORT ) ||                                             \
+           defined( _STLPORT_VERSION ) ) /* ... and STLPort is not used on top of it */
+        wchar_t character = 0;
+        mbtowc( &character, &input, 1 );
+        return static_cast<Uint32>( character );
+#else
+// Get the facet of the locale which deals with character conversion
+#ifndef EFSW_NO_WIDECHAR
+        const std::ctype<wchar_t>& facet = std::use_facet<std::ctype<wchar_t>>( locale );
+#else
+        const std::ctype<char>& facet = std::use_facet<std::ctype<char>>( locale );
+#endif
+        // Use the facet to convert each character of the input string
+        return static_cast<Uint32>( facet.widen( input ) );
+#endif
+}
+template <typename In> Uint32 Utf<32>::DecodeWide( In input ) {
+        // The encoding of wide characters is not well defined and is left to the system;
+        // however we can safely assume that it is UCS-2 on Windows and
+        // UCS-4 on Unix systems.
+        // In both cases, a simple copy is enough (UCS-2 is a subset of UCS-4,
+        // and UCS-4 *is* UTF-32).
+        return input;
+}
+template <typename Out>
+Out Utf<32>::EncodeAnsi( Uint32 codepoint, Out output, char replacement,
+                                                 const std::locale& locale ) {
+        // On Windows, gcc's standard library (glibc++) has almost
+        // no support for Unicode stuff. As a consequence, in this
+        // context we can only use the default locale and ignore
+        // the one passed as parameter.
+#if EFSW_PLATFORM == EFSW_PLATFORM_WIN && /* if Windows ... */                  \
+        ( defined( __GLIBCPP__ ) ||                                                 \
+          defined( __GLIBCXX__ ) ) && /* ... and standard library is glibc++ ... */ \
+        !( defined( __SGI_STL_PORT ) ||                                             \
+           defined( _STLPORT_VERSION ) ) /* ... and STLPort is not used on top of it */
+        char character = 0;
+        if ( wctomb( &character, static_cast<wchar_t>( codepoint ) ) >= 0 )
+                *output++ = character;
+        else if ( replacement )
+                *output++ = replacement;
+        return output;
+#else
+// Get the facet of the locale which deals with character conversion
+#ifndef EFSW_NO_WIDECHAR
+        const std::ctype<wchar_t>& facet = std::use_facet<std::ctype<wchar_t>>( locale );
+#else
+        const std::ctype<char>& facet = std::use_facet<std::ctype<char>>( locale );
+#endif
+        // Use the facet to convert each character of the input string
+        *output++ = facet.narrow( static_cast<wchar_t>( codepoint ), replacement );
+        return output;
+#endif
+}
+#ifndef EFSW_NO_WIDECHAR
+template <typename Out>
+Out Utf<32>::EncodeWide( Uint32 codepoint, Out output, wchar_t replacement ) {
+        // The encoding of wide characters is not well defined and is left to the system;
+        // however we can safely assume that it is UCS-2 on Windows and
+        // UCS-4 on Unix systems.
+        // For UCS-2 we need to check if the source characters fits in (UCS-2 is a subset of UCS-4).
+        // For UCS-4 we can do a direct copy (UCS-4 *is* UTF-32).
+        switch ( sizeof( wchar_t ) ) {
+                case 4: {
+                        *output++ = static_cast<wchar_t>( codepoint );
+                        break;
+                }
+                default: {
+                        if ( ( codepoint <= 0xFFFF ) && ( ( codepoint < 0xD800 ) || ( codepoint > 0xDFFF ) ) ) {
+                                *output++ = static_cast<wchar_t>( codepoint );
+                        } else if ( replacement ) {
+                                *output++ = replacement;
+                        }
+                        break;
+                }
+        }
+        return output;
+}
+#endif
author	Li Jin <dragon-fly@qq.com>	2022-11-15 17:23:46 +0800
committer	Li Jin <dragon-fly@qq.com>	2022-11-15 17:52:09 +0800
commit	94f8330613877b3582d32bd11abd83a97b4399ad (patch)
tree	5359de314be1ebde17f8d1e48632a97d18f9e50f /src/3rdParty/efsw/Utf.inl
parent	60f8f00a022ac08701792b2897b72d8c99b50f52 (diff)
download	yuescript-94f8330613877b3582d32bd11abd83a97b4399ad.tar.gz yuescript-94f8330613877b3582d32bd11abd83a97b4399ad.tar.bz2 yuescript-94f8330613877b3582d32bd11abd83a97b4399ad.zip

diff --git a/src/3rdParty/efsw/Utf.inl b/src/3rdParty/efsw/Utf.inl new file mode 100755 index 0000000..7e3e9d6 --- /dev/null +++ b/src/3rdParty/efsw/Utf.inl
@@ -0,0 +1,576 @@
	1	// References :
	2	// http://www.unicode.org/
	3	// http://www.unicode.org/Public/PROGRAMS/CVTUTF/ConvertUTF.c
	4	// http://www.unicode.org/Public/PROGRAMS/CVTUTF/ConvertUTF.h
	5	// http://people.w3.org/rishida/scripts/uniview/conversion
	6	////////////////////////////////////////////////////////////
	7
	8	template <typename In> In Utf<8>::Decode( In begin, In end, Uint32& output, Uint32 replacement ) {
	9	// Some useful precomputed data
	10	static const int trailing[256] = {
	11	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	12	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	13	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	14	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	15	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	16	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	17	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	18	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2,
	19	2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5 };
	20	static const Uint32 offsets[6] = { 0x00000000, 0x00003080, 0x000E2080,
	21	0x03C82080, 0xFA082080, 0x82082080 };
	22
	23	// Decode the character
	24	int trailingBytes = trailing[static_cast<Uint8>( *begin )];
	25	if ( begin + trailingBytes < end ) {
	26	output = 0;
	27	switch ( trailingBytes ) {
	28	case 5:
	29	output += static_cast<Uint8>( *begin++ );
	30	output <<= 6;
	31	case 4:
	32	output += static_cast<Uint8>( *begin++ );
	33	output <<= 6;
	34	case 3:
	35	output += static_cast<Uint8>( *begin++ );
	36	output <<= 6;
	37	case 2:
	38	output += static_cast<Uint8>( *begin++ );
	39	output <<= 6;
	40	case 1:
	41	output += static_cast<Uint8>( *begin++ );
	42	output <<= 6;
	43	case 0:
	44	output += static_cast<Uint8>( *begin++ );
	45	}
	46	output -= offsets[trailingBytes];
	47	} else {
	48	// Incomplete character
	49	begin = end;
	50	output = replacement;
	51	}
	52
	53	return begin;
	54	}
	55
	56	template <typename Out> Out Utf<8>::Encode( Uint32 input, Out output, Uint8 replacement ) {
	57	// Some useful precomputed data
	58	static const Uint8 firstBytes[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
	59
	60	// Encode the character
	61	if ( ( input > 0x0010FFFF ) \|\| ( ( input >= 0xD800 ) && ( input <= 0xDBFF ) ) ) {
	62	// Invalid character
	63	if ( replacement )
	64	*output++ = replacement;
	65	} else {
	66	// Valid character
	67
	68	// Get the number of bytes to write
	69	int bytesToWrite = 1;
	70	if ( input < 0x80 )
	71	bytesToWrite = 1;
	72	else if ( input < 0x800 )
	73	bytesToWrite = 2;
	74	else if ( input < 0x10000 )
	75	bytesToWrite = 3;
	76	else if ( input <= 0x0010FFFF )
	77	bytesToWrite = 4;
	78
	79	// Extract the bytes to write
	80	Uint8 bytes[4];
	81	switch ( bytesToWrite ) {
	82	case 4:
	83	bytes[3] = static_cast<Uint8>( ( input \| 0x80 ) & 0xBF );
	84	input >>= 6;
	85	case 3:
	86	bytes[2] = static_cast<Uint8>( ( input \| 0x80 ) & 0xBF );
	87	input >>= 6;
	88	case 2:
	89	bytes[1] = static_cast<Uint8>( ( input \| 0x80 ) & 0xBF );
	90	input >>= 6;
	91	case 1:
	92	bytes[0] = static_cast<Uint8>( input \| firstBytes[bytesToWrite] );
	93	}
	94
	95	// Add them to the output
	96	const Uint8* currentByte = bytes;
	97	switch ( bytesToWrite ) {
	98	case 4:
	99	output++ = currentByte++;
	100	case 3:
	101	output++ = currentByte++;
	102	case 2:
	103	output++ = currentByte++;
	104	case 1:
	105	output++ = currentByte++;
	106	}
	107	}
	108
	109	return output;
	110	}
	111
	112	template <typename In> In Utf<8>::Next( In begin, In end ) {
	113	Uint32 codepoint;
	114	return Decode( begin, end, codepoint );
	115	}
	116
	117	template <typename In> std::size_t Utf<8>::Count( In begin, In end ) {
	118	std::size_t length = 0;
	119	while ( begin < end ) {
	120	begin = Next( begin, end );
	121	++length;
	122	}
	123
	124	return length;
	125	}
	126
	127	template <typename In, typename Out>
	128	Out Utf<8>::FromAnsi( In begin, In end, Out output, const std::locale& locale ) {
	129	while ( begin < end ) {
	130	Uint32 codepoint = Utf<32>::DecodeAnsi( *begin++, locale );
	131	output = Encode( codepoint, output );
	132	}
	133
	134	return output;
	135	}
	136
	137	template <typename In, typename Out> Out Utf<8>::FromWide( In begin, In end, Out output ) {
	138	while ( begin < end ) {
	139	Uint32 codepoint = Utf<32>::DecodeWide( *begin++ );
	140	output = Encode( codepoint, output );
	141	}
	142
	143	return output;
	144	}
	145
	146	template <typename In, typename Out> Out Utf<8>::FromLatin1( In begin, In end, Out output ) {
	147	// Latin-1 is directly compatible with Unicode encodings,
	148	// and can thus be treated as (a sub-range of) UTF-32
	149	while ( begin < end )
	150	output = Encode( *begin++, output );
	151
	152	return output;
	153	}
	154
	155	template <typename In, typename Out>
	156	Out Utf<8>::ToAnsi( In begin, In end, Out output, char replacement, const std::locale& locale ) {
	157	while ( begin < end ) {
	158	Uint32 codepoint;
	159	begin = Decode( begin, end, codepoint );
	160	output = Utf<32>::EncodeAnsi( codepoint, output, replacement, locale );
	161	}
	162
	163	return output;
	164	}
	165
	166	#ifndef EFSW_NO_WIDECHAR
	167	template <typename In, typename Out>
	168	Out Utf<8>::ToWide( In begin, In end, Out output, wchar_t replacement ) {
	169	while ( begin < end ) {
	170	Uint32 codepoint;
	171	begin = Decode( begin, end, codepoint );
	172	output = Utf<32>::EncodeWide( codepoint, output, replacement );
	173	}
	174
	175	return output;
	176	}
	177	#endif
	178
	179	template <typename In, typename Out>
	180	Out Utf<8>::ToLatin1( In begin, In end, Out output, char replacement ) {
	181	// Latin-1 is directly compatible with Unicode encodings,
	182	// and can thus be treated as (a sub-range of) UTF-32
	183	while ( begin < end ) {
	184	Uint32 codepoint;
	185	begin = Decode( begin, end, codepoint );
	186	*output++ = codepoint < 256 ? static_cast<char>( codepoint ) : replacement;
	187	}
	188
	189	return output;
	190	}
	191
	192	template <typename In, typename Out> Out Utf<8>::toUtf8( In begin, In end, Out output ) {
	193	while ( begin < end )
	194	output++ = begin++;
	195
	196	return output;
	197	}
	198
	199	template <typename In, typename Out> Out Utf<8>::ToUtf16( In begin, In end, Out output ) {
	200	while ( begin < end ) {
	201	Uint32 codepoint;
	202	begin = Decode( begin, end, codepoint );
	203	output = Utf<16>::Encode( codepoint, output );
	204	}
	205
	206	return output;
	207	}
	208
	209	template <typename In, typename Out> Out Utf<8>::ToUtf32( In begin, In end, Out output ) {
	210	while ( begin < end ) {
	211	Uint32 codepoint;
	212	begin = Decode( begin, end, codepoint );
	213	*output++ = codepoint;
	214	}
	215
	216	return output;
	217	}
	218
	219	template <typename In> In Utf<16>::Decode( In begin, In end, Uint32& output, Uint32 replacement ) {
	220	Uint16 first = *begin++;
	221
	222	// If it's a surrogate pair, first convert to a single UTF-32 character
	223	if ( ( first >= 0xD800 ) && ( first <= 0xDBFF ) ) {
	224	if ( begin < end ) {
	225	Uint32 second = *begin++;
	226	if ( ( second >= 0xDC00 ) && ( second <= 0xDFFF ) ) {
	227	// The second element is valid: convert the two elements to a UTF-32 character
	228	output = static_cast<Uint32>( ( ( first - 0xD800 ) << 10 ) + ( second - 0xDC00 ) +
	229	0x0010000 );
	230	} else {
	231	// Invalid character
	232	output = replacement;
	233	}
	234	} else {
	235	// Invalid character
	236	begin = end;
	237	output = replacement;
	238	}
	239	} else {
	240	// We can make a direct copy
	241	output = first;
	242	}
	243
	244	return begin;
	245	}
	246
	247	template <typename Out> Out Utf<16>::Encode( Uint32 input, Out output, Uint16 replacement ) {
	248	if ( input < 0xFFFF ) {
	249	// The character can be copied directly, we just need to check if it's in the valid range
	250	if ( ( input >= 0xD800 ) && ( input <= 0xDFFF ) ) {
	251	// Invalid character (this range is reserved)
	252	if ( replacement )
	253	*output++ = replacement;
	254	} else {
	255	// Valid character directly convertible to a single UTF-16 character
	256	*output++ = static_cast<Uint16>( input );
	257	}
	258	} else if ( input > 0x0010FFFF ) {
	259	// Invalid character (greater than the maximum unicode value)
	260	if ( replacement )
	261	*output++ = replacement;
	262	} else {
	263	// The input character will be converted to two UTF-16 elements
	264	input -= 0x0010000;
	265	*output++ = static_cast<Uint16>( ( input >> 10 ) + 0xD800 );
	266	*output++ = static_cast<Uint16>( ( input & 0x3FFUL ) + 0xDC00 );
	267	}
	268
	269	return output;
	270	}
	271
	272	template <typename In> In Utf<16>::Next( In begin, In end ) {
	273	Uint32 codepoint;
	274	return Decode( begin, end, codepoint );
	275	}
	276
	277	template <typename In> std::size_t Utf<16>::Count( In begin, In end ) {
	278	std::size_t length = 0;
	279	while ( begin < end ) {
	280	begin = Next( begin, end );
	281	++length;
	282	}
	283
	284	return length;
	285	}
	286
	287	template <typename In, typename Out>
	288	Out Utf<16>::FromAnsi( In begin, In end, Out output, const std::locale& locale ) {
	289	while ( begin < end ) {
	290	Uint32 codepoint = Utf<32>::DecodeAnsi( *begin++, locale );
	291	output = Encode( codepoint, output );
	292	}
	293
	294	return output;
	295	}
	296
	297	template <typename In, typename Out> Out Utf<16>::FromWide( In begin, In end, Out output ) {
	298	while ( begin < end ) {
	299	Uint32 codepoint = Utf<32>::DecodeWide( *begin++ );
	300	output = Encode( codepoint, output );
	301	}
	302
	303	return output;
	304	}
	305
	306	template <typename In, typename Out> Out Utf<16>::FromLatin1( In begin, In end, Out output ) {
	307	// Latin-1 is directly compatible with Unicode encodings,
	308	// and can thus be treated as (a sub-range of) UTF-32
	309	while ( begin < end )
	310	output++ = begin++;
	311
	312	return output;
	313	}
	314
	315	template <typename In, typename Out>
	316	Out Utf<16>::ToAnsi( In begin, In end, Out output, char replacement, const std::locale& locale ) {
	317	while ( begin < end ) {
	318	Uint32 codepoint;
	319	begin = Decode( begin, end, codepoint );
	320	output = Utf<32>::EncodeAnsi( codepoint, output, replacement, locale );
	321	}
	322
	323	return output;
	324	}
	325
	326	#ifndef EFSW_NO_WIDECHAR
	327	template <typename In, typename Out>
	328	Out Utf<16>::ToWide( In begin, In end, Out output, wchar_t replacement ) {
	329	while ( begin < end ) {
	330	Uint32 codepoint;
	331	begin = Decode( begin, end, codepoint );
	332	output = Utf<32>::EncodeWide( codepoint, output, replacement );
	333	}
	334
	335	return output;
	336	}
	337	#endif
	338
	339	template <typename In, typename Out>
	340	Out Utf<16>::ToLatin1( In begin, In end, Out output, char replacement ) {
	341	// Latin-1 is directly compatible with Unicode encodings,
	342	// and can thus be treated as (a sub-range of) UTF-32
	343	while ( begin < end ) {
	344	output++ = begin < 256 ? static_cast<char>( *begin ) : replacement;
	345	begin++;
	346	}
	347
	348	return output;
	349	}
	350
	351	template <typename In, typename Out> Out Utf<16>::toUtf8( In begin, In end, Out output ) {
	352	while ( begin < end ) {
	353	Uint32 codepoint;
	354	begin = Decode( begin, end, codepoint );
	355	output = Utf<8>::Encode( codepoint, output );
	356	}
	357
	358	return output;
	359	}
	360
	361	template <typename In, typename Out> Out Utf<16>::ToUtf16( In begin, In end, Out output ) {
	362	while ( begin < end )
	363	output++ = begin++;
	364
	365	return output;
	366	}
	367
	368	template <typename In, typename Out> Out Utf<16>::ToUtf32( In begin, In end, Out output ) {
	369	while ( begin < end ) {
	370	Uint32 codepoint;
	371	begin = Decode( begin, end, codepoint );
	372	*output++ = codepoint;
	373	}
	374
	375	return output;
	376	}
	377
	378	template <typename In> In Utf<32>::Decode( In begin, In end, Uint32& output, Uint32 ) {
	379	output = *begin++;
	380	return begin;
	381	}
	382
	383	template <typename Out> Out Utf<32>::Encode( Uint32 input, Out output, Uint32 replacement ) {
	384	*output++ = input;
	385	return output;
	386	}
	387
	388	template <typename In> In Utf<32>::Next( In begin, In end ) {
	389	return ++begin;
	390	}
	391
	392	template <typename In> std::size_t Utf<32>::Count( In begin, In end ) {
	393	return begin - end;
	394	}
	395
	396	template <typename In, typename Out>
	397	Out Utf<32>::FromAnsi( In begin, In end, Out output, const std::locale& locale ) {
	398	while ( begin < end )
	399	output++ = DecodeAnsi( begin++, locale );
	400
	401	return output;
	402	}
	403
	404	template <typename In, typename Out> Out Utf<32>::FromWide( In begin, In end, Out output ) {
	405	while ( begin < end )
	406	output++ = DecodeWide( begin++ );
	407
	408	return output;
	409	}
	410
	411	template <typename In, typename Out> Out Utf<32>::FromLatin1( In begin, In end, Out output ) {
	412	// Latin-1 is directly compatible with Unicode encodings,
	413	// and can thus be treated as (a sub-range of) UTF-32
	414	while ( begin < end )
	415	output++ = begin++;
	416
	417	return output;
	418	}
	419
	420	template <typename In, typename Out>
	421	Out Utf<32>::ToAnsi( In begin, In end, Out output, char replacement, const std::locale& locale ) {
	422	while ( begin < end )
	423	output = EncodeAnsi( *begin++, output, replacement, locale );
	424
	425	return output;
	426	}
	427
	428	#ifndef EFSW_NO_WIDECHAR
	429	template <typename In, typename Out>
	430	Out Utf<32>::ToWide( In begin, In end, Out output, wchar_t replacement ) {
	431	while ( begin < end )
	432	output = EncodeWide( *begin++, output, replacement );
	433
	434	return output;
	435	}
	436	#endif
	437
	438	template <typename In, typename Out>
	439	Out Utf<32>::ToLatin1( In begin, In end, Out output, char replacement ) {
	440	// Latin-1 is directly compatible with Unicode encodings,
	441	// and can thus be treated as (a sub-range of) UTF-32
	442	while ( begin < end ) {
	443	output++ = begin < 256 ? static_cast<char>( *begin ) : replacement;
	444	begin++;
	445	}
	446
	447	return output;
	448	}
	449
	450	template <typename In, typename Out> Out Utf<32>::toUtf8( In begin, In end, Out output ) {
	451	while ( begin < end )
	452	output = Utf<8>::Encode( *begin++, output );
	453
	454	return output;
	455	}
	456
	457	template <typename In, typename Out> Out Utf<32>::ToUtf16( In begin, In end, Out output ) {
	458	while ( begin < end )
	459	output = Utf<16>::Encode( *begin++, output );
	460
	461	return output;
	462	}
	463
	464	template <typename In, typename Out> Out Utf<32>::ToUtf32( In begin, In end, Out output ) {
	465	while ( begin < end )
	466	output++ = begin++;
	467
	468	return output;
	469	}
	470
	471	template <typename In> Uint32 Utf<32>::DecodeAnsi( In input, const std::locale& locale ) {
	472	// On Windows, gcc's standard library (glibc++) has almost
	473	// no support for Unicode stuff. As a consequence, in this
	474	// context we can only use the default locale and ignore
	475	// the one passed as parameter.
	476
	477	#if EFSW_PLATFORM == EFSW_PLATFORM_WIN && /* if Windows ... */ \
	478	( defined( __GLIBCPP__ ) \|\| \
	479	defined( __GLIBCXX__ ) ) && /* ... and standard library is glibc++ ... */ \
	480	!( defined( __SGI_STL_PORT ) \|\| \
	481	defined( _STLPORT_VERSION ) ) /* ... and STLPort is not used on top of it */
	482
	483	wchar_t character = 0;
	484	mbtowc( &character, &input, 1 );
	485	return static_cast<Uint32>( character );
	486
	487	#else
	488	// Get the facet of the locale which deals with character conversion
	489	#ifndef EFSW_NO_WIDECHAR
	490	const std::ctype<wchar_t>& facet = std::use_facet<std::ctype<wchar_t>>( locale );
	491	#else
	492	const std::ctype<char>& facet = std::use_facet<std::ctype<char>>( locale );
	493	#endif
	494
	495	// Use the facet to convert each character of the input string
	496	return static_cast<Uint32>( facet.widen( input ) );
	497
	498	#endif
	499	}
	500
	501	template <typename In> Uint32 Utf<32>::DecodeWide( In input ) {
	502	// The encoding of wide characters is not well defined and is left to the system;
	503	// however we can safely assume that it is UCS-2 on Windows and
	504	// UCS-4 on Unix systems.
	505	// In both cases, a simple copy is enough (UCS-2 is a subset of UCS-4,
	506	// and UCS-4 is UTF-32).
	507
	508	return input;
	509	}
	510
	511	template <typename Out>
	512	Out Utf<32>::EncodeAnsi( Uint32 codepoint, Out output, char replacement,
	513	const std::locale& locale ) {
	514	// On Windows, gcc's standard library (glibc++) has almost
	515	// no support for Unicode stuff. As a consequence, in this
	516	// context we can only use the default locale and ignore
	517	// the one passed as parameter.
	518
	519	#if EFSW_PLATFORM == EFSW_PLATFORM_WIN && /* if Windows ... */ \
	520	( defined( __GLIBCPP__ ) \|\| \
	521	defined( __GLIBCXX__ ) ) && /* ... and standard library is glibc++ ... */ \
	522	!( defined( __SGI_STL_PORT ) \|\| \
	523	defined( _STLPORT_VERSION ) ) /* ... and STLPort is not used on top of it */
	524
	525	char character = 0;
	526	if ( wctomb( &character, static_cast<wchar_t>( codepoint ) ) >= 0 )
	527	*output++ = character;
	528	else if ( replacement )
	529	*output++ = replacement;
	530
	531	return output;
	532
	533	#else
	534	// Get the facet of the locale which deals with character conversion
	535	#ifndef EFSW_NO_WIDECHAR
	536	const std::ctype<wchar_t>& facet = std::use_facet<std::ctype<wchar_t>>( locale );
	537	#else
	538	const std::ctype<char>& facet = std::use_facet<std::ctype<char>>( locale );
	539	#endif
	540
	541	// Use the facet to convert each character of the input string
	542	*output++ = facet.narrow( static_cast<wchar_t>( codepoint ), replacement );
	543
	544	return output;
	545
	546	#endif
	547	}
	548
	549	#ifndef EFSW_NO_WIDECHAR
	550	template <typename Out>
	551	Out Utf<32>::EncodeWide( Uint32 codepoint, Out output, wchar_t replacement ) {
	552	// The encoding of wide characters is not well defined and is left to the system;
	553	// however we can safely assume that it is UCS-2 on Windows and
	554	// UCS-4 on Unix systems.
	555	// For UCS-2 we need to check if the source characters fits in (UCS-2 is a subset of UCS-4).
	556	// For UCS-4 we can do a direct copy (UCS-4 is UTF-32).
	557
	558	switch ( sizeof( wchar_t ) ) {
	559	case 4: {
	560	*output++ = static_cast<wchar_t>( codepoint );
	561	break;
	562	}
	563
	564	default: {
	565	if ( ( codepoint <= 0xFFFF ) && ( ( codepoint < 0xD800 ) \|\| ( codepoint > 0xDFFF ) ) ) {
	566	*output++ = static_cast<wchar_t>( codepoint );
	567	} else if ( replacement ) {
	568	*output++ = replacement;
	569	}
	570	break;
	571	}
	572	}
	573
	574	return output;
	575	}
	576	#endif