aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRoberto Ierusalimschy <roberto@inf.puc-rio.br>2023-04-10 13:47:45 -0300
committerRoberto Ierusalimschy <roberto@inf.puc-rio.br>2023-04-10 13:47:45 -0300
commit05edfcff9b7eca52571b221e614b5cbf84e7d43d (patch)
tree37ca0e23753461795eb3ea6df873d725b94ba54e
parentafd7b912611d5777c12d55cb533beee760207949 (diff)
downloadlpeg-05edfcff9b7eca52571b221e614b5cbf84e7d43d.tar.gz
lpeg-05edfcff9b7eca52571b221e614b5cbf84e7d43d.tar.bz2
lpeg-05edfcff9b7eca52571b221e614b5cbf84e7d43d.zip
Documentation
Removed '$Id' from all files + updated copyright year + other changes in comments and documentation
-rw-r--r--HISTORY8
-rw-r--r--lpcap.c4
-rw-r--r--lpcap.h3
-rw-r--r--lpcode.c4
-rw-r--r--lpcode.h3
-rw-r--r--lpeg.html92
-rw-r--r--lpprint.c4
-rw-r--r--lpprint.h4
-rw-r--r--lptree.c4
-rw-r--r--lptree.h3
-rw-r--r--lptypes.h3
-rw-r--r--lpvm.c4
-rw-r--r--lpvm.h3
-rw-r--r--re.html9
-rw-r--r--re.lua1
15 files changed, 15 insertions, 134 deletions
diff --git a/HISTORY b/HISTORY
index 66a8e14..7fa72e6 100644
--- a/HISTORY
+++ b/HISTORY
@@ -1,4 +1,10 @@
1HISTORY for LPeg 1.0.2 1HISTORY for LPeg 1.1.0
2
3* Changes from version 1.0.2 to 1.1.0
4 ---------------------------------
5 + UTF-8 ranges
6 + Larger limit for number of rules in a grammar
7 + bug fixes
2 8
3* Changes from version 1.0.1 to 1.0.2 9* Changes from version 1.0.1 to 1.0.2
4 --------------------------------- 10 ---------------------------------
diff --git a/lpcap.c b/lpcap.c
index b332fde..ec17d23 100644
--- a/lpcap.c
+++ b/lpcap.c
@@ -1,7 +1,3 @@
1/*
2** $Id: lpcap.c $
3** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license)
4*/
5 1
6#include "lua.h" 2#include "lua.h"
7#include "lauxlib.h" 3#include "lauxlib.h"
diff --git a/lpcap.h b/lpcap.h
index dc10d69..10539a0 100644
--- a/lpcap.h
+++ b/lpcap.h
@@ -1,6 +1,3 @@
1/*
2** $Id: lpcap.h $
3*/
4 1
5#if !defined(lpcap_h) 2#if !defined(lpcap_h)
6#define lpcap_h 3#define lpcap_h
diff --git a/lpcode.c b/lpcode.c
index 64ad8ac..f2ae754 100644
--- a/lpcode.c
+++ b/lpcode.c
@@ -1,7 +1,3 @@
1/*
2** $Id: lpcode.c $
3** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license)
4*/
5 1
6#include <limits.h> 2#include <limits.h>
7 3
diff --git a/lpcode.h b/lpcode.h
index 34ee276..ec5f43f 100644
--- a/lpcode.h
+++ b/lpcode.h
@@ -1,6 +1,3 @@
1/*
2** $Id: lpcode.h $
3*/
4 1
5#if !defined(lpcode_h) 2#if !defined(lpcode_h)
6#define lpcode_h 3#define lpcode_h
diff --git a/lpeg.html b/lpeg.html
index f4d8658..f50d327 100644
--- a/lpeg.html
+++ b/lpeg.html
@@ -10,7 +10,6 @@
10</head> 10</head>
11<body> 11<body>
12 12
13<!-- $Id: lpeg.html $ -->
14 13
15<div id="container"> 14<div id="container">
16 15
@@ -664,10 +663,10 @@ LPeg does not specify when (and if) it evaluates its captures.
664consider the pattern <code>lpeg.P"a" / func / 0</code>. 663consider the pattern <code>lpeg.P"a" / func / 0</code>.
665Because the "division" by 0 instructs LPeg to throw away the 664Because the "division" by 0 instructs LPeg to throw away the
666results from the pattern, 665results from the pattern,
667LPeg may or may not call <code>func</code>.) 666it is not specified whether LPeg will call <code>func</code>.)
668Therefore, captures should avoid side effects. 667Therefore, captures should avoid side effects.
669Moreover, 668Moreover,
670most captures cannot affect the way a pattern matches a subject. 669captures cannot affect the way a pattern matches a subject.
671The only exception to this rule is the 670The only exception to this rule is the
672so-called <a href="#matchtime"><em>match-time capture</em></a>. 671so-called <a href="#matchtime"><em>match-time capture</em></a>.
673When a match-time capture matches, 672When a match-time capture matches,
@@ -1175,91 +1174,6 @@ local record = lpeg.Ct(field * (',' * field)^0) * (lpeg.P'\n' + -1)
1175</pre> 1174</pre>
1176 1175
1177 1176
1178<h3>UTF-8 and Latin 1</h3>
1179<p>
1180It is not difficult to use LPeg to convert a string from
1181UTF-8 encoding to Latin 1 (ISO 8859-1):
1182</p>
1183
1184<pre class="example">
1185-- convert a two-byte UTF-8 sequence to a Latin 1 character
1186local function f2 (s)
1187 local c1, c2 = string.byte(s, 1, 2)
1188 return string.char(c1 * 64 + c2 - 12416)
1189end
1190
1191local utf8 = lpeg.R("\0\127")
1192 + lpeg.R("\194\195") * lpeg.R("\128\191") / f2
1193
1194local decode_pattern = lpeg.Cs(utf8^0) * -1
1195</pre>
1196<p>
1197In this code,
1198the definition of UTF-8 is already restricted to the
1199Latin 1 range (from 0 to 255).
1200Any encoding outside this range (as well as any invalid encoding)
1201will not match that pattern.
1202</p>
1203
1204<p>
1205As the definition of <code>decode_pattern</code> demands that
1206the pattern matches the whole input (because of the -1 at its end),
1207any invalid string will simply fail to match,
1208without any useful information about the problem.
1209We can improve this situation redefining <code>decode_pattern</code>
1210as follows:
1211</p>
1212<pre class="example">
1213local function er (_, i) error("invalid encoding at position " .. i) end
1214
1215local decode_pattern = lpeg.Cs(utf8^0) * (-1 + lpeg.P(er))
1216</pre>
1217<p>
1218Now, if the pattern <code>utf8^0</code> stops
1219before the end of the string,
1220an appropriate error function is called.
1221</p>
1222
1223
1224<h3>UTF-8 and Unicode</h3>
1225<p>
1226We can extend the previous patterns to handle all Unicode code points.
1227Of course,
1228we cannot translate them to Latin 1 or any other one-byte encoding.
1229Instead, our translation results in a array with the code points
1230represented as numbers.
1231The full code is here:
1232</p>
1233<pre class="example">
1234-- decode a two-byte UTF-8 sequence
1235local function f2 (s)
1236 local c1, c2 = string.byte(s, 1, 2)
1237 return c1 * 64 + c2 - 12416
1238end
1239
1240-- decode a three-byte UTF-8 sequence
1241local function f3 (s)
1242 local c1, c2, c3 = string.byte(s, 1, 3)
1243 return (c1 * 64 + c2) * 64 + c3 - 925824
1244end
1245
1246-- decode a four-byte UTF-8 sequence
1247local function f4 (s)
1248 local c1, c2, c3, c4 = string.byte(s, 1, 4)
1249 return ((c1 * 64 + c2) * 64 + c3) * 64 + c4 - 63447168
1250end
1251
1252local cont = lpeg.R("\128\191") -- continuation byte
1253
1254local utf8 = lpeg.R("\0\127") / string.byte
1255 + lpeg.R("\194\223") * cont / f2
1256 + lpeg.R("\224\239") * cont * cont / f3
1257 + lpeg.R("\240\244") * cont * cont * cont / f4
1258
1259local decode_pattern = lpeg.Ct(utf8^0) * -1
1260</pre>
1261
1262
1263<h3>Lua's long strings</h3> 1177<h3>Lua's long strings</h3>
1264<p> 1178<p>
1265A long string in Lua starts with the pattern <code>[=*[</code> 1179A long string in Lua starts with the pattern <code>[=*[</code>
@@ -1416,7 +1330,7 @@ the following command is all you need to install LPeg:
1416<h2><a name="license">License</a></h2> 1330<h2><a name="license">License</a></h2>
1417 1331
1418<p> 1332<p>
1419Copyright &copy; 2007-2019 Lua.org, PUC-Rio. 1333Copyright &copy; 2007-2023 Lua.org, PUC-Rio.
1420</p> 1334</p>
1421<p> 1335<p>
1422Permission is hereby granted, free of charge, 1336Permission is hereby granted, free of charge,
diff --git a/lpprint.c b/lpprint.c
index 6893bb8..1c1b7b6 100644
--- a/lpprint.c
+++ b/lpprint.c
@@ -1,7 +1,3 @@
1/*
2** $Id: lpprint.c $
3** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license)
4*/
5 1
6#include <ctype.h> 2#include <ctype.h>
7#include <limits.h> 3#include <limits.h>
diff --git a/lpprint.h b/lpprint.h
index 15ef121..42d7f98 100644
--- a/lpprint.h
+++ b/lpprint.h
@@ -1,7 +1,3 @@
1/*
2** $Id: lpprint.h $
3*/
4
5 1
6#if !defined(lpprint_h) 2#if !defined(lpprint_h)
7#define lpprint_h 3#define lpprint_h
diff --git a/lptree.c b/lptree.c
index 2318153..4affac9 100644
--- a/lptree.c
+++ b/lptree.c
@@ -1,7 +1,3 @@
1/*
2** $Id: lptree.c $
3** Copyright 2013, Lua.org & PUC-Rio (see 'lpeg.html' for license)
4*/
5 1
6#include <ctype.h> 2#include <ctype.h>
7#include <limits.h> 3#include <limits.h>
diff --git a/lptree.h b/lptree.h
index 892e013..aa331d2 100644
--- a/lptree.h
+++ b/lptree.h
@@ -1,6 +1,3 @@
1/*
2** $Id: lptree.h $
3*/
4 1
5#if !defined(lptree_h) 2#if !defined(lptree_h)
6#define lptree_h 3#define lptree_h
diff --git a/lptypes.h b/lptypes.h
index ccb4c18..98b9597 100644
--- a/lptypes.h
+++ b/lptypes.h
@@ -1,7 +1,6 @@
1/* 1/*
2** $Id: lptypes.h $
3** LPeg - PEG pattern matching for Lua 2** LPeg - PEG pattern matching for Lua
4** Copyright 2007-2019, Lua.org & PUC-Rio (see 'lpeg.html' for license) 3** Copyright 2007-2023, Lua.org & PUC-Rio (see 'lpeg.html' for license)
5** written by Roberto Ierusalimschy 4** written by Roberto Ierusalimschy
6*/ 5*/
7 6
diff --git a/lpvm.c b/lpvm.c
index 72ac1dd..8c001fc 100644
--- a/lpvm.c
+++ b/lpvm.c
@@ -1,7 +1,3 @@
1/*
2** $Id: lpvm.c $
3** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license)
4*/
5 1
6#include <limits.h> 2#include <limits.h>
7#include <string.h> 3#include <string.h>
diff --git a/lpvm.h b/lpvm.h
index ca625f9..cc79dcd 100644
--- a/lpvm.h
+++ b/lpvm.h
@@ -1,6 +1,3 @@
1/*
2** $Id: lpvm.h $
3*/
4 1
5#if !defined(lpvm_h) 2#if !defined(lpvm_h)
6#define lpvm_h 3#define lpvm_h
diff --git a/re.html b/re.html
index 24a582a..ed4ccb1 100644
--- a/re.html
+++ b/re.html
@@ -10,7 +10,6 @@
10</head> 10</head>
11<body> 11<body>
12 12
13<!-- $Id: re.html $ -->
14 13
15<div id="container"> 14<div id="container">
16 15
@@ -95,7 +94,7 @@ equivalent to <code>p / defs[name]</code></td></tr>
95equivalent to <code>lpeg.Cmt(p, defs[name])</code></td></tr> 94equivalent to <code>lpeg.Cmt(p, defs[name])</code></td></tr>
96<tr><td><code>p ~&gt; name</code></td> <td>fold capture 95<tr><td><code>p ~&gt; name</code></td> <td>fold capture
97equivalent to <code>lpeg.Cf(p, defs[name])</code></td></tr> 96equivalent to <code>lpeg.Cf(p, defs[name])</code></td></tr>
98<tr><td><code>& p</code></td> <td>and predicate</td></tr> 97<tr><td><code>&amp; p</code></td> <td>and predicate</td></tr>
99<tr><td><code>! p</code></td> <td>not predicate</td></tr> 98<tr><td><code>! p</code></td> <td>not predicate</td></tr>
100<tr><td><code>p1 p2</code></td> <td>concatenation</td></tr> 99<tr><td><code>p1 p2</code></td> <td>concatenation</td></tr>
101<tr><td><code>p1 / p2</code></td> <td>ordered choice</td></tr> 100<tr><td><code>p1 / p2</code></td> <td>ordered choice</td></tr>
@@ -103,7 +102,7 @@ equivalent to <code>lpeg.Cf(p, defs[name])</code></td></tr>
103</tbody></table> 102</tbody></table>
104<p> 103<p>
105Any space appearing in a syntax description can be 104Any space appearing in a syntax description can be
106replaced by zero or more space characters and Lua-style comments 105replaced by zero or more space characters and Lua-style short comments
107(<code>--</code> until end of line). 106(<code>--</code> until end of line).
108</p> 107</p>
109 108
@@ -329,7 +328,7 @@ respecting the indentation:
329<pre class="example"> 328<pre class="example">
330p = re.compile[[ 329p = re.compile[[
331 block &lt;- {| {:ident:' '*:} line 330 block &lt;- {| {:ident:' '*:} line
332 ((=ident !' ' line) / &(=ident ' ') block)* |} 331 ((=ident !' ' line) / &amp;(=ident ' ') block)* |}
333 line &lt;- {[^%nl]*} %nl 332 line &lt;- {[^%nl]*} %nl
334]] 333]]
335</pre> 334</pre>
@@ -453,7 +452,7 @@ print(re.match(p, p)) -- a self description must match itself
453<h2><a name="license">License</a></h2> 452<h2><a name="license">License</a></h2>
454 453
455<p> 454<p>
456Copyright &copy; 2008-2015 Lua.org, PUC-Rio. 455Copyright &copy; 2008-2023 Lua.org, PUC-Rio.
457</p> 456</p>
458<p> 457<p>
459Permission is hereby granted, free of charge, 458Permission is hereby granted, free of charge,
diff --git a/re.lua b/re.lua
index 3bb8af7..22cb8de 100644
--- a/re.lua
+++ b/re.lua
@@ -1,4 +1,3 @@
1-- $Id: re.lua $
2 1
3-- imported functions and modules 2-- imported functions and modules
4local tonumber, type, print, error = tonumber, type, print, error 3local tonumber, type, print, error = tonumber, type, print, error