aboutsummaryrefslogtreecommitdiff
path: root/MoonParser/pegtl/contrib/unescape.hpp
diff options
context:
space:
mode:
Diffstat (limited to 'MoonParser/pegtl/contrib/unescape.hpp')
-rw-r--r--MoonParser/pegtl/contrib/unescape.hpp203
1 files changed, 0 insertions, 203 deletions
diff --git a/MoonParser/pegtl/contrib/unescape.hpp b/MoonParser/pegtl/contrib/unescape.hpp
deleted file mode 100644
index 2a7c53f..0000000
--- a/MoonParser/pegtl/contrib/unescape.hpp
+++ /dev/null
@@ -1,203 +0,0 @@
1// Copyright (c) 2014-2017 Dr. Colin Hirsch and Daniel Frey
2// Please see LICENSE for license or visit https://github.com/taocpp/PEGTL/
3
4#ifndef TAOCPP_PEGTL_INCLUDE_CONTRIB_UNESCAPE_HPP
5#define TAOCPP_PEGTL_INCLUDE_CONTRIB_UNESCAPE_HPP
6
7#include <cassert>
8#include <string>
9
10#include "../ascii.hpp"
11#include "../config.hpp"
12#include "../parse_error.hpp"
13
14namespace tao
15{
16 namespace TAOCPP_PEGTL_NAMESPACE
17 {
18 namespace unescape
19 {
20 struct state
21 {
22 std::string unescaped;
23 };
24
25 // Utility functions for the unescape actions.
26
27 inline bool utf8_append_utf32( std::string& string, const unsigned utf32 )
28 {
29 if( utf32 <= 0x7f ) {
30 string += char( utf32 & 0xff );
31 return true;
32 }
33 if( utf32 <= 0x7ff ) {
34 char tmp[] = { char( ( ( utf32 & 0x7c0 ) >> 6 ) | 0xc0 ),
35 char( ( ( utf32 & 0x03f ) ) | 0x80 ) };
36 string.append( tmp, sizeof( tmp ) );
37 return true;
38 }
39 if( utf32 <= 0xffff ) {
40 char tmp[] = { char( ( ( utf32 & 0xf000 ) >> 12 ) | 0xe0 ),
41 char( ( ( utf32 & 0x0fc0 ) >> 6 ) | 0x80 ),
42 char( ( ( utf32 & 0x003f ) ) | 0x80 ) };
43 string.append( tmp, sizeof( tmp ) );
44 return true;
45 }
46 if( utf32 <= 0x10ffff ) {
47 char tmp[] = { char( ( ( utf32 & 0x1c0000 ) >> 18 ) | 0xf0 ),
48 char( ( ( utf32 & 0x03f000 ) >> 12 ) | 0x80 ),
49 char( ( ( utf32 & 0x000fc0 ) >> 6 ) | 0x80 ),
50 char( ( ( utf32 & 0x00003f ) ) | 0x80 ) };
51 string.append( tmp, sizeof( tmp ) );
52 return true;
53 }
54 return false;
55 }
56
57 // This function MUST only be called for characters matching tao::TAOCPP_PEGTL_NAMESPACE::ascii::xdigit!
58 template< typename I >
59 I unhex_char( const char c )
60 {
61 switch( c ) {
62 case '0':
63 case '1':
64 case '2':
65 case '3':
66 case '4':
67 case '5':
68 case '6':
69 case '7':
70 case '8':
71 case '9':
72 return I( c - '0' );
73 case 'a':
74 case 'b':
75 case 'c':
76 case 'd':
77 case 'e':
78 case 'f':
79 return I( c - 'a' + 10 );
80 case 'A':
81 case 'B':
82 case 'C':
83 case 'D':
84 case 'E':
85 case 'F':
86 return I( c - 'A' + 10 );
87 }
88 throw std::runtime_error( "invalid character in unhex" ); // LCOV_EXCL_LINE
89 }
90
91 template< typename I >
92 I unhex_string( const char* begin, const char* const end )
93 {
94 I r = 0;
95 while( begin != end ) {
96 r <<= 4;
97 r += unhex_char< I >( *begin++ );
98 }
99 return r;
100 }
101
102 // Actions for common unescape situations.
103
104 struct append_all
105 {
106 template< typename Input, typename State >
107 static void apply( const Input& in, State& st )
108 {
109 st.unescaped.append( in.begin(), in.size() );
110 }
111 };
112
113 // This action MUST be called for a character matching T which MUST be tao::TAOCPP_PEGTL_NAMESPACE::one< ... >.
114 template< typename T, char... Rs >
115 struct unescape_c
116 {
117 template< typename Input, typename State >
118 static void apply( const Input& in, State& st )
119 {
120 assert( in.size() == 1 );
121 st.unescaped += apply_one( *in.begin(), static_cast< const T* >( nullptr ) );
122 }
123
124 template< char... Qs >
125 static char apply_one( const char c, const one< Qs... >* )
126 {
127 static_assert( sizeof...( Qs ) == sizeof...( Rs ), "size mismatch between escaped characters and their mappings" );
128 return apply_two( c, { Qs... }, { Rs... } );
129 }
130
131 static char apply_two( const char c, const std::initializer_list< char >& q, const std::initializer_list< char >& r )
132 {
133 for( std::size_t i = 0; i < q.size(); ++i ) {
134 if( *( q.begin() + i ) == c ) {
135 return *( r.begin() + i );
136 }
137 }
138 throw std::runtime_error( "invalid character in unescape" ); // LCOV_EXCL_LINE
139 }
140 };
141
142 // See src/example/pegtl/unescape.cpp for why the following two actions
143 // skip the first input character. They also MUST be called
144 // with non-empty matched inputs!
145
146 struct unescape_u
147 {
148 template< typename Input, typename State >
149 static void apply( const Input& in, State& st )
150 {
151 assert( !in.empty() ); // First character MUST be present, usually 'u' or 'U'.
152 if( !utf8_append_utf32( st.unescaped, unhex_string< unsigned >( in.begin() + 1, in.end() ) ) ) {
153 throw parse_error( "invalid escaped unicode code point", in );
154 }
155 }
156 };
157
158 struct unescape_x
159 {
160 template< typename Input, typename State >
161 static void apply( const Input& in, State& st )
162 {
163 assert( !in.empty() ); // First character MUST be present, usually 'x'.
164 st.unescaped += unhex_string< char >( in.begin() + 1, in.end() );
165 }
166 };
167
168 // The unescape_j action is similar to unescape_u, however unlike
169 // unescape_u it
170 // (a) assumes exactly 4 hexdigits per escape sequence,
171 // (b) accepts multiple consecutive escaped 16-bit values.
172 // When applied to more than one escape sequence, unescape_j
173 // translates UTF-16 surrogate pairs in the input into a single
174 // UTF-8 sequence in st.unescaped, as required for JSON by RFC 7159.
175
176 struct unescape_j
177 {
178 template< typename Input, typename State >
179 static void apply( const Input& in, State& st )
180 {
181 assert( ( ( in.size() + 1 ) % 6 ) == 0 ); // Expects multiple "\\u1234", starting with the first "u".
182 for( const char* b = in.begin() + 1; b < in.end(); b += 6 ) {
183 const auto c = unhex_string< unsigned >( b, b + 4 );
184 if( ( 0xd800 <= c ) && ( c <= 0xdbff ) && ( b + 6 < in.end() ) ) {
185 const auto d = unhex_string< unsigned >( b + 6, b + 10 );
186 if( ( 0xdc00 <= d ) && ( d <= 0xdfff ) ) {
187 b += 6;
188 utf8_append_utf32( st.unescaped, ( ( ( c & 0x03ff ) << 10 ) | ( d & 0x03ff ) ) + 0x10000 );
189 continue;
190 }
191 }
192 utf8_append_utf32( st.unescaped, c );
193 }
194 }
195 };
196
197 } // namespace unescape
198
199 } // namespace TAOCPP_PEGTL_NAMESPACE
200
201} // namespace tao
202
203#endif