diff options
Diffstat (limited to 'MoonParser/pegtl/contrib/unescape.hpp')
-rw-r--r-- | MoonParser/pegtl/contrib/unescape.hpp | 203 |
1 files changed, 0 insertions, 203 deletions
diff --git a/MoonParser/pegtl/contrib/unescape.hpp b/MoonParser/pegtl/contrib/unescape.hpp deleted file mode 100644 index 2a7c53f..0000000 --- a/MoonParser/pegtl/contrib/unescape.hpp +++ /dev/null | |||
@@ -1,203 +0,0 @@ | |||
1 | // Copyright (c) 2014-2017 Dr. Colin Hirsch and Daniel Frey | ||
2 | // Please see LICENSE for license or visit https://github.com/taocpp/PEGTL/ | ||
3 | |||
4 | #ifndef TAOCPP_PEGTL_INCLUDE_CONTRIB_UNESCAPE_HPP | ||
5 | #define TAOCPP_PEGTL_INCLUDE_CONTRIB_UNESCAPE_HPP | ||
6 | |||
7 | #include <cassert> | ||
8 | #include <string> | ||
9 | |||
10 | #include "../ascii.hpp" | ||
11 | #include "../config.hpp" | ||
12 | #include "../parse_error.hpp" | ||
13 | |||
14 | namespace tao | ||
15 | { | ||
16 | namespace TAOCPP_PEGTL_NAMESPACE | ||
17 | { | ||
18 | namespace unescape | ||
19 | { | ||
20 | struct state | ||
21 | { | ||
22 | std::string unescaped; | ||
23 | }; | ||
24 | |||
25 | // Utility functions for the unescape actions. | ||
26 | |||
27 | inline bool utf8_append_utf32( std::string& string, const unsigned utf32 ) | ||
28 | { | ||
29 | if( utf32 <= 0x7f ) { | ||
30 | string += char( utf32 & 0xff ); | ||
31 | return true; | ||
32 | } | ||
33 | if( utf32 <= 0x7ff ) { | ||
34 | char tmp[] = { char( ( ( utf32 & 0x7c0 ) >> 6 ) | 0xc0 ), | ||
35 | char( ( ( utf32 & 0x03f ) ) | 0x80 ) }; | ||
36 | string.append( tmp, sizeof( tmp ) ); | ||
37 | return true; | ||
38 | } | ||
39 | if( utf32 <= 0xffff ) { | ||
40 | char tmp[] = { char( ( ( utf32 & 0xf000 ) >> 12 ) | 0xe0 ), | ||
41 | char( ( ( utf32 & 0x0fc0 ) >> 6 ) | 0x80 ), | ||
42 | char( ( ( utf32 & 0x003f ) ) | 0x80 ) }; | ||
43 | string.append( tmp, sizeof( tmp ) ); | ||
44 | return true; | ||
45 | } | ||
46 | if( utf32 <= 0x10ffff ) { | ||
47 | char tmp[] = { char( ( ( utf32 & 0x1c0000 ) >> 18 ) | 0xf0 ), | ||
48 | char( ( ( utf32 & 0x03f000 ) >> 12 ) | 0x80 ), | ||
49 | char( ( ( utf32 & 0x000fc0 ) >> 6 ) | 0x80 ), | ||
50 | char( ( ( utf32 & 0x00003f ) ) | 0x80 ) }; | ||
51 | string.append( tmp, sizeof( tmp ) ); | ||
52 | return true; | ||
53 | } | ||
54 | return false; | ||
55 | } | ||
56 | |||
57 | // This function MUST only be called for characters matching tao::TAOCPP_PEGTL_NAMESPACE::ascii::xdigit! | ||
58 | template< typename I > | ||
59 | I unhex_char( const char c ) | ||
60 | { | ||
61 | switch( c ) { | ||
62 | case '0': | ||
63 | case '1': | ||
64 | case '2': | ||
65 | case '3': | ||
66 | case '4': | ||
67 | case '5': | ||
68 | case '6': | ||
69 | case '7': | ||
70 | case '8': | ||
71 | case '9': | ||
72 | return I( c - '0' ); | ||
73 | case 'a': | ||
74 | case 'b': | ||
75 | case 'c': | ||
76 | case 'd': | ||
77 | case 'e': | ||
78 | case 'f': | ||
79 | return I( c - 'a' + 10 ); | ||
80 | case 'A': | ||
81 | case 'B': | ||
82 | case 'C': | ||
83 | case 'D': | ||
84 | case 'E': | ||
85 | case 'F': | ||
86 | return I( c - 'A' + 10 ); | ||
87 | } | ||
88 | throw std::runtime_error( "invalid character in unhex" ); // LCOV_EXCL_LINE | ||
89 | } | ||
90 | |||
91 | template< typename I > | ||
92 | I unhex_string( const char* begin, const char* const end ) | ||
93 | { | ||
94 | I r = 0; | ||
95 | while( begin != end ) { | ||
96 | r <<= 4; | ||
97 | r += unhex_char< I >( *begin++ ); | ||
98 | } | ||
99 | return r; | ||
100 | } | ||
101 | |||
102 | // Actions for common unescape situations. | ||
103 | |||
104 | struct append_all | ||
105 | { | ||
106 | template< typename Input, typename State > | ||
107 | static void apply( const Input& in, State& st ) | ||
108 | { | ||
109 | st.unescaped.append( in.begin(), in.size() ); | ||
110 | } | ||
111 | }; | ||
112 | |||
113 | // This action MUST be called for a character matching T which MUST be tao::TAOCPP_PEGTL_NAMESPACE::one< ... >. | ||
114 | template< typename T, char... Rs > | ||
115 | struct unescape_c | ||
116 | { | ||
117 | template< typename Input, typename State > | ||
118 | static void apply( const Input& in, State& st ) | ||
119 | { | ||
120 | assert( in.size() == 1 ); | ||
121 | st.unescaped += apply_one( *in.begin(), static_cast< const T* >( nullptr ) ); | ||
122 | } | ||
123 | |||
124 | template< char... Qs > | ||
125 | static char apply_one( const char c, const one< Qs... >* ) | ||
126 | { | ||
127 | static_assert( sizeof...( Qs ) == sizeof...( Rs ), "size mismatch between escaped characters and their mappings" ); | ||
128 | return apply_two( c, { Qs... }, { Rs... } ); | ||
129 | } | ||
130 | |||
131 | static char apply_two( const char c, const std::initializer_list< char >& q, const std::initializer_list< char >& r ) | ||
132 | { | ||
133 | for( std::size_t i = 0; i < q.size(); ++i ) { | ||
134 | if( *( q.begin() + i ) == c ) { | ||
135 | return *( r.begin() + i ); | ||
136 | } | ||
137 | } | ||
138 | throw std::runtime_error( "invalid character in unescape" ); // LCOV_EXCL_LINE | ||
139 | } | ||
140 | }; | ||
141 | |||
142 | // See src/example/pegtl/unescape.cpp for why the following two actions | ||
143 | // skip the first input character. They also MUST be called | ||
144 | // with non-empty matched inputs! | ||
145 | |||
146 | struct unescape_u | ||
147 | { | ||
148 | template< typename Input, typename State > | ||
149 | static void apply( const Input& in, State& st ) | ||
150 | { | ||
151 | assert( !in.empty() ); // First character MUST be present, usually 'u' or 'U'. | ||
152 | if( !utf8_append_utf32( st.unescaped, unhex_string< unsigned >( in.begin() + 1, in.end() ) ) ) { | ||
153 | throw parse_error( "invalid escaped unicode code point", in ); | ||
154 | } | ||
155 | } | ||
156 | }; | ||
157 | |||
158 | struct unescape_x | ||
159 | { | ||
160 | template< typename Input, typename State > | ||
161 | static void apply( const Input& in, State& st ) | ||
162 | { | ||
163 | assert( !in.empty() ); // First character MUST be present, usually 'x'. | ||
164 | st.unescaped += unhex_string< char >( in.begin() + 1, in.end() ); | ||
165 | } | ||
166 | }; | ||
167 | |||
168 | // The unescape_j action is similar to unescape_u, however unlike | ||
169 | // unescape_u it | ||
170 | // (a) assumes exactly 4 hexdigits per escape sequence, | ||
171 | // (b) accepts multiple consecutive escaped 16-bit values. | ||
172 | // When applied to more than one escape sequence, unescape_j | ||
173 | // translates UTF-16 surrogate pairs in the input into a single | ||
174 | // UTF-8 sequence in st.unescaped, as required for JSON by RFC 7159. | ||
175 | |||
176 | struct unescape_j | ||
177 | { | ||
178 | template< typename Input, typename State > | ||
179 | static void apply( const Input& in, State& st ) | ||
180 | { | ||
181 | assert( ( ( in.size() + 1 ) % 6 ) == 0 ); // Expects multiple "\\u1234", starting with the first "u". | ||
182 | for( const char* b = in.begin() + 1; b < in.end(); b += 6 ) { | ||
183 | const auto c = unhex_string< unsigned >( b, b + 4 ); | ||
184 | if( ( 0xd800 <= c ) && ( c <= 0xdbff ) && ( b + 6 < in.end() ) ) { | ||
185 | const auto d = unhex_string< unsigned >( b + 6, b + 10 ); | ||
186 | if( ( 0xdc00 <= d ) && ( d <= 0xdfff ) ) { | ||
187 | b += 6; | ||
188 | utf8_append_utf32( st.unescaped, ( ( ( c & 0x03ff ) << 10 ) | ( d & 0x03ff ) ) + 0x10000 ); | ||
189 | continue; | ||
190 | } | ||
191 | } | ||
192 | utf8_append_utf32( st.unescaped, c ); | ||
193 | } | ||
194 | } | ||
195 | }; | ||
196 | |||
197 | } // namespace unescape | ||
198 | |||
199 | } // namespace TAOCPP_PEGTL_NAMESPACE | ||
200 | |||
201 | } // namespace tao | ||
202 | |||
203 | #endif | ||