aboutsummaryrefslogtreecommitdiff
path: root/MoonParser/pegtl/internal/peek_utf8.hpp
blob: 9ea0a0e14931f04cc03db15b5804e5c63aa3864a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
// Copyright (c) 2014-2017 Dr. Colin Hirsch and Daniel Frey
// Please see LICENSE for license or visit https://github.com/taocpp/PEGTL/

#ifndef TAOCPP_PEGTL_INCLUDE_INTERNAL_PEEK_UTF8_HPP
#define TAOCPP_PEGTL_INCLUDE_INTERNAL_PEEK_UTF8_HPP

#include "../config.hpp"

#include "input_pair.hpp"

namespace tao
{
   namespace TAOCPP_PEGTL_NAMESPACE
   {
      namespace internal
      {
         struct peek_utf8
         {
            using data_t = char32_t;
            using pair_t = input_pair< char32_t >;

            template< typename Input >
            static pair_t peek( Input& in )
            {
               char32_t c0 = in.peek_byte();

               if( ( c0 & 0x80 ) == 0 ) {
                  return { c0, 1 };
               }
               if( ( c0 & 0xE0 ) == 0xC0 ) {
                  if( in.size( 2 ) >= 2 ) {
                     const char32_t c1 = in.peek_byte( 1 );
                     if( ( c1 & 0xC0 ) == 0x80 ) {
                        c0 &= 0x1F;
                        c0 <<= 6;
                        c0 |= ( c1 & 0x3F );
                        if( c0 >= 0x80 ) {
                           return { c0, 2 };
                        }
                     }
                  }
               }
               else if( ( c0 & 0xF0 ) == 0xE0 ) {
                  if( in.size( 3 ) >= 3 ) {
                     const char32_t c1 = in.peek_byte( 1 );
                     const char32_t c2 = in.peek_byte( 2 );
                     if( ( ( c1 & 0xC0 ) == 0x80 ) && ( ( c2 & 0xC0 ) == 0x80 ) ) {
                        c0 &= 0x0F;
                        c0 <<= 6;
                        c0 |= ( c1 & 0x3F );
                        c0 <<= 6;
                        c0 |= ( c2 & 0x3F );
                        if( c0 >= 0x800 ) {
                           return { c0, 3 };
                        }
                     }
                  }
               }
               else if( ( c0 & 0xF8 ) == 0xF0 ) {
                  if( in.size( 4 ) >= 4 ) {
                     const char32_t c1 = in.peek_byte( 1 );
                     const char32_t c2 = in.peek_byte( 2 );
                     const char32_t c3 = in.peek_byte( 3 );
                     if( ( ( c1 & 0xC0 ) == 0x80 ) && ( ( c2 & 0xC0 ) == 0x80 ) && ( ( c3 & 0xC0 ) == 0x80 ) ) {
                        c0 &= 0x07;
                        c0 <<= 6;
                        c0 |= ( c1 & 0x3F );
                        c0 <<= 6;
                        c0 |= ( c2 & 0x3F );
                        c0 <<= 6;
                        c0 |= ( c3 & 0x3F );
                        if( c0 >= 0x10000 && c0 <= 0x10FFFF ) {
                           return { c0, 4 };
                        }
                     }
                  }
               }
               return { 0, 0 };
            }
         };

      }  // namespace internal

   }  // namespace TAOCPP_PEGTL_NAMESPACE

}  // namespace tao

#endif