From 05edfcff9b7eca52571b221e614b5cbf84e7d43d Mon Sep 17 00:00:00 2001 From: Roberto Ierusalimschy Date: Mon, 10 Apr 2023 13:47:45 -0300 Subject: Documentation Removed '$Id' from all files + updated copyright year + other changes in comments and documentation --- HISTORY | 8 +++++- lpcap.c | 4 --- lpcap.h | 3 --- lpcode.c | 4 --- lpcode.h | 3 --- lpeg.html | 92 +++------------------------------------------------------------ lpprint.c | 4 --- lpprint.h | 4 --- lptree.c | 4 --- lptree.h | 3 --- lptypes.h | 3 +-- lpvm.c | 4 --- lpvm.h | 3 --- re.html | 9 +++---- re.lua | 1 - 15 files changed, 15 insertions(+), 134 deletions(-) diff --git a/HISTORY b/HISTORY index 66a8e14..7fa72e6 100644 --- a/HISTORY +++ b/HISTORY @@ -1,4 +1,10 @@ -HISTORY for LPeg 1.0.2 +HISTORY for LPeg 1.1.0 + +* Changes from version 1.0.2 to 1.1.0 + --------------------------------- + + UTF-8 ranges + + Larger limit for number of rules in a grammar + + bug fixes * Changes from version 1.0.1 to 1.0.2 --------------------------------- diff --git a/lpcap.c b/lpcap.c index b332fde..ec17d23 100644 --- a/lpcap.c +++ b/lpcap.c @@ -1,7 +1,3 @@ -/* -** $Id: lpcap.c $ -** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) -*/ #include "lua.h" #include "lauxlib.h" diff --git a/lpcap.h b/lpcap.h index dc10d69..10539a0 100644 --- a/lpcap.h +++ b/lpcap.h @@ -1,6 +1,3 @@ -/* -** $Id: lpcap.h $ -*/ #if !defined(lpcap_h) #define lpcap_h diff --git a/lpcode.c b/lpcode.c index 64ad8ac..f2ae754 100644 --- a/lpcode.c +++ b/lpcode.c @@ -1,7 +1,3 @@ -/* -** $Id: lpcode.c $ -** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) -*/ #include diff --git a/lpcode.h b/lpcode.h index 34ee276..ec5f43f 100644 --- a/lpcode.h +++ b/lpcode.h @@ -1,6 +1,3 @@ -/* -** $Id: lpcode.h $ -*/ #if !defined(lpcode_h) #define lpcode_h diff --git a/lpeg.html b/lpeg.html index f4d8658..f50d327 100644 --- a/lpeg.html +++ b/lpeg.html @@ -10,7 +10,6 @@ -
@@ -664,10 +663,10 @@ LPeg does not specify when (and if) it evaluates its captures. consider the pattern lpeg.P"a" / func / 0. Because the "division" by 0 instructs LPeg to throw away the results from the pattern, -LPeg may or may not call func.) +it is not specified whether LPeg will call func.) Therefore, captures should avoid side effects. Moreover, -most captures cannot affect the way a pattern matches a subject. +captures cannot affect the way a pattern matches a subject. The only exception to this rule is the so-called match-time capture. When a match-time capture matches, @@ -1175,91 +1174,6 @@ local record = lpeg.Ct(field * (',' * field)^0) * (lpeg.P'\n' + -1) -

UTF-8 and Latin 1

-

-It is not difficult to use LPeg to convert a string from -UTF-8 encoding to Latin 1 (ISO 8859-1): -

- -
--- convert a two-byte UTF-8 sequence to a Latin 1 character
-local function f2 (s)
-  local c1, c2 = string.byte(s, 1, 2)
-  return string.char(c1 * 64 + c2 - 12416)
-end
-
-local utf8 = lpeg.R("\0\127")
-           + lpeg.R("\194\195") * lpeg.R("\128\191") / f2
-
-local decode_pattern = lpeg.Cs(utf8^0) * -1
-
-

-In this code, -the definition of UTF-8 is already restricted to the -Latin 1 range (from 0 to 255). -Any encoding outside this range (as well as any invalid encoding) -will not match that pattern. -

- -

-As the definition of decode_pattern demands that -the pattern matches the whole input (because of the -1 at its end), -any invalid string will simply fail to match, -without any useful information about the problem. -We can improve this situation redefining decode_pattern -as follows: -

-
-local function er (_, i) error("invalid encoding at position " .. i) end
-
-local decode_pattern = lpeg.Cs(utf8^0) * (-1 + lpeg.P(er))
-
-

-Now, if the pattern utf8^0 stops -before the end of the string, -an appropriate error function is called. -

- - -

UTF-8 and Unicode

-

-We can extend the previous patterns to handle all Unicode code points. -Of course, -we cannot translate them to Latin 1 or any other one-byte encoding. -Instead, our translation results in a array with the code points -represented as numbers. -The full code is here: -

-
--- decode a two-byte UTF-8 sequence
-local function f2 (s)
-  local c1, c2 = string.byte(s, 1, 2)
-  return c1 * 64 + c2 - 12416
-end
-
--- decode a three-byte UTF-8 sequence
-local function f3 (s)
-  local c1, c2, c3 = string.byte(s, 1, 3)
-  return (c1 * 64 + c2) * 64 + c3 - 925824
-end
-
--- decode a four-byte UTF-8 sequence
-local function f4 (s)
-  local c1, c2, c3, c4 = string.byte(s, 1, 4)
-  return ((c1 * 64 + c2) * 64 + c3) * 64 + c4 - 63447168
-end
-
-local cont = lpeg.R("\128\191")   -- continuation byte
-
-local utf8 = lpeg.R("\0\127") / string.byte
-           + lpeg.R("\194\223") * cont / f2
-           + lpeg.R("\224\239") * cont * cont / f3
-           + lpeg.R("\240\244") * cont * cont * cont / f4
-
-local decode_pattern = lpeg.Ct(utf8^0) * -1
-
- -

Lua's long strings

A long string in Lua starts with the pattern [=*[ @@ -1416,7 +1330,7 @@ the following command is all you need to install LPeg:

License

-Copyright © 2007-2019 Lua.org, PUC-Rio. +Copyright © 2007-2023 Lua.org, PUC-Rio.

Permission is hereby granted, free of charge, diff --git a/lpprint.c b/lpprint.c index 6893bb8..1c1b7b6 100644 --- a/lpprint.c +++ b/lpprint.c @@ -1,7 +1,3 @@ -/* -** $Id: lpprint.c $ -** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) -*/ #include #include diff --git a/lpprint.h b/lpprint.h index 15ef121..42d7f98 100644 --- a/lpprint.h +++ b/lpprint.h @@ -1,7 +1,3 @@ -/* -** $Id: lpprint.h $ -*/ - #if !defined(lpprint_h) #define lpprint_h diff --git a/lptree.c b/lptree.c index 2318153..4affac9 100644 --- a/lptree.c +++ b/lptree.c @@ -1,7 +1,3 @@ -/* -** $Id: lptree.c $ -** Copyright 2013, Lua.org & PUC-Rio (see 'lpeg.html' for license) -*/ #include #include diff --git a/lptree.h b/lptree.h index 892e013..aa331d2 100644 --- a/lptree.h +++ b/lptree.h @@ -1,6 +1,3 @@ -/* -** $Id: lptree.h $ -*/ #if !defined(lptree_h) #define lptree_h diff --git a/lptypes.h b/lptypes.h index ccb4c18..98b9597 100644 --- a/lptypes.h +++ b/lptypes.h @@ -1,7 +1,6 @@ /* -** $Id: lptypes.h $ ** LPeg - PEG pattern matching for Lua -** Copyright 2007-2019, Lua.org & PUC-Rio (see 'lpeg.html' for license) +** Copyright 2007-2023, Lua.org & PUC-Rio (see 'lpeg.html' for license) ** written by Roberto Ierusalimschy */ diff --git a/lpvm.c b/lpvm.c index 72ac1dd..8c001fc 100644 --- a/lpvm.c +++ b/lpvm.c @@ -1,7 +1,3 @@ -/* -** $Id: lpvm.c $ -** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) -*/ #include #include diff --git a/lpvm.h b/lpvm.h index ca625f9..cc79dcd 100644 --- a/lpvm.h +++ b/lpvm.h @@ -1,6 +1,3 @@ -/* -** $Id: lpvm.h $ -*/ #if !defined(lpvm_h) #define lpvm_h diff --git a/re.html b/re.html index 24a582a..ed4ccb1 100644 --- a/re.html +++ b/re.html @@ -10,7 +10,6 @@ -

@@ -95,7 +94,7 @@ equivalent to p / defs[name] equivalent to lpeg.Cmt(p, defs[name]) p ~> name fold capture equivalent to lpeg.Cf(p, defs[name]) -& p and predicate +& p and predicate ! p not predicate p1 p2 concatenation p1 / p2 ordered choice @@ -103,7 +102,7 @@ equivalent to lpeg.Cf(p, defs[name])

Any space appearing in a syntax description can be -replaced by zero or more space characters and Lua-style comments +replaced by zero or more space characters and Lua-style short comments (-- until end of line).

@@ -329,7 +328,7 @@ respecting the indentation:
 p = re.compile[[
   block <- {| {:ident:' '*:} line
-           ((=ident !' ' line) / &(=ident ' ') block)* |}
+           ((=ident !' ' line) / &(=ident ' ') block)* |}
   line <- {[^%nl]*} %nl
 ]]
 
@@ -453,7 +452,7 @@ print(re.match(p, p)) -- a self description must match itself

License

-Copyright © 2008-2015 Lua.org, PUC-Rio. +Copyright © 2008-2023 Lua.org, PUC-Rio.

Permission is hereby granted, free of charge, diff --git a/re.lua b/re.lua index 3bb8af7..22cb8de 100644 --- a/re.lua +++ b/re.lua @@ -1,4 +1,3 @@ --- $Id: re.lua $ -- imported functions and modules local tonumber, type, print, error = tonumber, type, print, error -- cgit v1.2.3-55-g6feb