diff options
author | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2023-04-10 13:47:45 -0300 |
---|---|---|
committer | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2023-04-10 13:47:45 -0300 |
commit | 05edfcff9b7eca52571b221e614b5cbf84e7d43d (patch) | |
tree | 37ca0e23753461795eb3ea6df873d725b94ba54e | |
parent | afd7b912611d5777c12d55cb533beee760207949 (diff) | |
download | lpeg-05edfcff9b7eca52571b221e614b5cbf84e7d43d.tar.gz lpeg-05edfcff9b7eca52571b221e614b5cbf84e7d43d.tar.bz2 lpeg-05edfcff9b7eca52571b221e614b5cbf84e7d43d.zip |
Documentation
Removed '$Id' from all files + updated copyright year + other changes
in comments and documentation
-rw-r--r-- | HISTORY | 8 | ||||
-rw-r--r-- | lpcap.c | 4 | ||||
-rw-r--r-- | lpcap.h | 3 | ||||
-rw-r--r-- | lpcode.c | 4 | ||||
-rw-r--r-- | lpcode.h | 3 | ||||
-rw-r--r-- | lpeg.html | 92 | ||||
-rw-r--r-- | lpprint.c | 4 | ||||
-rw-r--r-- | lpprint.h | 4 | ||||
-rw-r--r-- | lptree.c | 4 | ||||
-rw-r--r-- | lptree.h | 3 | ||||
-rw-r--r-- | lptypes.h | 3 | ||||
-rw-r--r-- | lpvm.c | 4 | ||||
-rw-r--r-- | lpvm.h | 3 | ||||
-rw-r--r-- | re.html | 9 | ||||
-rw-r--r-- | re.lua | 1 |
15 files changed, 15 insertions, 134 deletions
@@ -1,4 +1,10 @@ | |||
1 | HISTORY for LPeg 1.0.2 | 1 | HISTORY for LPeg 1.1.0 |
2 | |||
3 | * Changes from version 1.0.2 to 1.1.0 | ||
4 | --------------------------------- | ||
5 | + UTF-8 ranges | ||
6 | + Larger limit for number of rules in a grammar | ||
7 | + bug fixes | ||
2 | 8 | ||
3 | * Changes from version 1.0.1 to 1.0.2 | 9 | * Changes from version 1.0.1 to 1.0.2 |
4 | --------------------------------- | 10 | --------------------------------- |
@@ -1,7 +1,3 @@ | |||
1 | /* | ||
2 | ** $Id: lpcap.c $ | ||
3 | ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) | ||
4 | */ | ||
5 | 1 | ||
6 | #include "lua.h" | 2 | #include "lua.h" |
7 | #include "lauxlib.h" | 3 | #include "lauxlib.h" |
@@ -1,6 +1,3 @@ | |||
1 | /* | ||
2 | ** $Id: lpcap.h $ | ||
3 | */ | ||
4 | 1 | ||
5 | #if !defined(lpcap_h) | 2 | #if !defined(lpcap_h) |
6 | #define lpcap_h | 3 | #define lpcap_h |
@@ -1,7 +1,3 @@ | |||
1 | /* | ||
2 | ** $Id: lpcode.c $ | ||
3 | ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) | ||
4 | */ | ||
5 | 1 | ||
6 | #include <limits.h> | 2 | #include <limits.h> |
7 | 3 | ||
@@ -1,6 +1,3 @@ | |||
1 | /* | ||
2 | ** $Id: lpcode.h $ | ||
3 | */ | ||
4 | 1 | ||
5 | #if !defined(lpcode_h) | 2 | #if !defined(lpcode_h) |
6 | #define lpcode_h | 3 | #define lpcode_h |
@@ -10,7 +10,6 @@ | |||
10 | </head> | 10 | </head> |
11 | <body> | 11 | <body> |
12 | 12 | ||
13 | <!-- $Id: lpeg.html $ --> | ||
14 | 13 | ||
15 | <div id="container"> | 14 | <div id="container"> |
16 | 15 | ||
@@ -664,10 +663,10 @@ LPeg does not specify when (and if) it evaluates its captures. | |||
664 | consider the pattern <code>lpeg.P"a" / func / 0</code>. | 663 | consider the pattern <code>lpeg.P"a" / func / 0</code>. |
665 | Because the "division" by 0 instructs LPeg to throw away the | 664 | Because the "division" by 0 instructs LPeg to throw away the |
666 | results from the pattern, | 665 | results from the pattern, |
667 | LPeg may or may not call <code>func</code>.) | 666 | it is not specified whether LPeg will call <code>func</code>.) |
668 | Therefore, captures should avoid side effects. | 667 | Therefore, captures should avoid side effects. |
669 | Moreover, | 668 | Moreover, |
670 | most captures cannot affect the way a pattern matches a subject. | 669 | captures cannot affect the way a pattern matches a subject. |
671 | The only exception to this rule is the | 670 | The only exception to this rule is the |
672 | so-called <a href="#matchtime"><em>match-time capture</em></a>. | 671 | so-called <a href="#matchtime"><em>match-time capture</em></a>. |
673 | When a match-time capture matches, | 672 | When a match-time capture matches, |
@@ -1175,91 +1174,6 @@ local record = lpeg.Ct(field * (',' * field)^0) * (lpeg.P'\n' + -1) | |||
1175 | </pre> | 1174 | </pre> |
1176 | 1175 | ||
1177 | 1176 | ||
1178 | <h3>UTF-8 and Latin 1</h3> | ||
1179 | <p> | ||
1180 | It is not difficult to use LPeg to convert a string from | ||
1181 | UTF-8 encoding to Latin 1 (ISO 8859-1): | ||
1182 | </p> | ||
1183 | |||
1184 | <pre class="example"> | ||
1185 | -- convert a two-byte UTF-8 sequence to a Latin 1 character | ||
1186 | local function f2 (s) | ||
1187 | local c1, c2 = string.byte(s, 1, 2) | ||
1188 | return string.char(c1 * 64 + c2 - 12416) | ||
1189 | end | ||
1190 | |||
1191 | local utf8 = lpeg.R("\0\127") | ||
1192 | + lpeg.R("\194\195") * lpeg.R("\128\191") / f2 | ||
1193 | |||
1194 | local decode_pattern = lpeg.Cs(utf8^0) * -1 | ||
1195 | </pre> | ||
1196 | <p> | ||
1197 | In this code, | ||
1198 | the definition of UTF-8 is already restricted to the | ||
1199 | Latin 1 range (from 0 to 255). | ||
1200 | Any encoding outside this range (as well as any invalid encoding) | ||
1201 | will not match that pattern. | ||
1202 | </p> | ||
1203 | |||
1204 | <p> | ||
1205 | As the definition of <code>decode_pattern</code> demands that | ||
1206 | the pattern matches the whole input (because of the -1 at its end), | ||
1207 | any invalid string will simply fail to match, | ||
1208 | without any useful information about the problem. | ||
1209 | We can improve this situation redefining <code>decode_pattern</code> | ||
1210 | as follows: | ||
1211 | </p> | ||
1212 | <pre class="example"> | ||
1213 | local function er (_, i) error("invalid encoding at position " .. i) end | ||
1214 | |||
1215 | local decode_pattern = lpeg.Cs(utf8^0) * (-1 + lpeg.P(er)) | ||
1216 | </pre> | ||
1217 | <p> | ||
1218 | Now, if the pattern <code>utf8^0</code> stops | ||
1219 | before the end of the string, | ||
1220 | an appropriate error function is called. | ||
1221 | </p> | ||
1222 | |||
1223 | |||
1224 | <h3>UTF-8 and Unicode</h3> | ||
1225 | <p> | ||
1226 | We can extend the previous patterns to handle all Unicode code points. | ||
1227 | Of course, | ||
1228 | we cannot translate them to Latin 1 or any other one-byte encoding. | ||
1229 | Instead, our translation results in a array with the code points | ||
1230 | represented as numbers. | ||
1231 | The full code is here: | ||
1232 | </p> | ||
1233 | <pre class="example"> | ||
1234 | -- decode a two-byte UTF-8 sequence | ||
1235 | local function f2 (s) | ||
1236 | local c1, c2 = string.byte(s, 1, 2) | ||
1237 | return c1 * 64 + c2 - 12416 | ||
1238 | end | ||
1239 | |||
1240 | -- decode a three-byte UTF-8 sequence | ||
1241 | local function f3 (s) | ||
1242 | local c1, c2, c3 = string.byte(s, 1, 3) | ||
1243 | return (c1 * 64 + c2) * 64 + c3 - 925824 | ||
1244 | end | ||
1245 | |||
1246 | -- decode a four-byte UTF-8 sequence | ||
1247 | local function f4 (s) | ||
1248 | local c1, c2, c3, c4 = string.byte(s, 1, 4) | ||
1249 | return ((c1 * 64 + c2) * 64 + c3) * 64 + c4 - 63447168 | ||
1250 | end | ||
1251 | |||
1252 | local cont = lpeg.R("\128\191") -- continuation byte | ||
1253 | |||
1254 | local utf8 = lpeg.R("\0\127") / string.byte | ||
1255 | + lpeg.R("\194\223") * cont / f2 | ||
1256 | + lpeg.R("\224\239") * cont * cont / f3 | ||
1257 | + lpeg.R("\240\244") * cont * cont * cont / f4 | ||
1258 | |||
1259 | local decode_pattern = lpeg.Ct(utf8^0) * -1 | ||
1260 | </pre> | ||
1261 | |||
1262 | |||
1263 | <h3>Lua's long strings</h3> | 1177 | <h3>Lua's long strings</h3> |
1264 | <p> | 1178 | <p> |
1265 | A long string in Lua starts with the pattern <code>[=*[</code> | 1179 | A long string in Lua starts with the pattern <code>[=*[</code> |
@@ -1416,7 +1330,7 @@ the following command is all you need to install LPeg: | |||
1416 | <h2><a name="license">License</a></h2> | 1330 | <h2><a name="license">License</a></h2> |
1417 | 1331 | ||
1418 | <p> | 1332 | <p> |
1419 | Copyright © 2007-2019 Lua.org, PUC-Rio. | 1333 | Copyright © 2007-2023 Lua.org, PUC-Rio. |
1420 | </p> | 1334 | </p> |
1421 | <p> | 1335 | <p> |
1422 | Permission is hereby granted, free of charge, | 1336 | Permission is hereby granted, free of charge, |
@@ -1,7 +1,3 @@ | |||
1 | /* | ||
2 | ** $Id: lpprint.c $ | ||
3 | ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) | ||
4 | */ | ||
5 | 1 | ||
6 | #include <ctype.h> | 2 | #include <ctype.h> |
7 | #include <limits.h> | 3 | #include <limits.h> |
@@ -1,7 +1,3 @@ | |||
1 | /* | ||
2 | ** $Id: lpprint.h $ | ||
3 | */ | ||
4 | |||
5 | 1 | ||
6 | #if !defined(lpprint_h) | 2 | #if !defined(lpprint_h) |
7 | #define lpprint_h | 3 | #define lpprint_h |
@@ -1,7 +1,3 @@ | |||
1 | /* | ||
2 | ** $Id: lptree.c $ | ||
3 | ** Copyright 2013, Lua.org & PUC-Rio (see 'lpeg.html' for license) | ||
4 | */ | ||
5 | 1 | ||
6 | #include <ctype.h> | 2 | #include <ctype.h> |
7 | #include <limits.h> | 3 | #include <limits.h> |
@@ -1,6 +1,3 @@ | |||
1 | /* | ||
2 | ** $Id: lptree.h $ | ||
3 | */ | ||
4 | 1 | ||
5 | #if !defined(lptree_h) | 2 | #if !defined(lptree_h) |
6 | #define lptree_h | 3 | #define lptree_h |
@@ -1,7 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | ** $Id: lptypes.h $ | ||
3 | ** LPeg - PEG pattern matching for Lua | 2 | ** LPeg - PEG pattern matching for Lua |
4 | ** Copyright 2007-2019, Lua.org & PUC-Rio (see 'lpeg.html' for license) | 3 | ** Copyright 2007-2023, Lua.org & PUC-Rio (see 'lpeg.html' for license) |
5 | ** written by Roberto Ierusalimschy | 4 | ** written by Roberto Ierusalimschy |
6 | */ | 5 | */ |
7 | 6 | ||
@@ -1,7 +1,3 @@ | |||
1 | /* | ||
2 | ** $Id: lpvm.c $ | ||
3 | ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) | ||
4 | */ | ||
5 | 1 | ||
6 | #include <limits.h> | 2 | #include <limits.h> |
7 | #include <string.h> | 3 | #include <string.h> |
@@ -1,6 +1,3 @@ | |||
1 | /* | ||
2 | ** $Id: lpvm.h $ | ||
3 | */ | ||
4 | 1 | ||
5 | #if !defined(lpvm_h) | 2 | #if !defined(lpvm_h) |
6 | #define lpvm_h | 3 | #define lpvm_h |
@@ -10,7 +10,6 @@ | |||
10 | </head> | 10 | </head> |
11 | <body> | 11 | <body> |
12 | 12 | ||
13 | <!-- $Id: re.html $ --> | ||
14 | 13 | ||
15 | <div id="container"> | 14 | <div id="container"> |
16 | 15 | ||
@@ -95,7 +94,7 @@ equivalent to <code>p / defs[name]</code></td></tr> | |||
95 | equivalent to <code>lpeg.Cmt(p, defs[name])</code></td></tr> | 94 | equivalent to <code>lpeg.Cmt(p, defs[name])</code></td></tr> |
96 | <tr><td><code>p ~> name</code></td> <td>fold capture | 95 | <tr><td><code>p ~> name</code></td> <td>fold capture |
97 | equivalent to <code>lpeg.Cf(p, defs[name])</code></td></tr> | 96 | equivalent to <code>lpeg.Cf(p, defs[name])</code></td></tr> |
98 | <tr><td><code>& p</code></td> <td>and predicate</td></tr> | 97 | <tr><td><code>& p</code></td> <td>and predicate</td></tr> |
99 | <tr><td><code>! p</code></td> <td>not predicate</td></tr> | 98 | <tr><td><code>! p</code></td> <td>not predicate</td></tr> |
100 | <tr><td><code>p1 p2</code></td> <td>concatenation</td></tr> | 99 | <tr><td><code>p1 p2</code></td> <td>concatenation</td></tr> |
101 | <tr><td><code>p1 / p2</code></td> <td>ordered choice</td></tr> | 100 | <tr><td><code>p1 / p2</code></td> <td>ordered choice</td></tr> |
@@ -103,7 +102,7 @@ equivalent to <code>lpeg.Cf(p, defs[name])</code></td></tr> | |||
103 | </tbody></table> | 102 | </tbody></table> |
104 | <p> | 103 | <p> |
105 | Any space appearing in a syntax description can be | 104 | Any space appearing in a syntax description can be |
106 | replaced by zero or more space characters and Lua-style comments | 105 | replaced by zero or more space characters and Lua-style short comments |
107 | (<code>--</code> until end of line). | 106 | (<code>--</code> until end of line). |
108 | </p> | 107 | </p> |
109 | 108 | ||
@@ -329,7 +328,7 @@ respecting the indentation: | |||
329 | <pre class="example"> | 328 | <pre class="example"> |
330 | p = re.compile[[ | 329 | p = re.compile[[ |
331 | block <- {| {:ident:' '*:} line | 330 | block <- {| {:ident:' '*:} line |
332 | ((=ident !' ' line) / &(=ident ' ') block)* |} | 331 | ((=ident !' ' line) / &(=ident ' ') block)* |} |
333 | line <- {[^%nl]*} %nl | 332 | line <- {[^%nl]*} %nl |
334 | ]] | 333 | ]] |
335 | </pre> | 334 | </pre> |
@@ -453,7 +452,7 @@ print(re.match(p, p)) -- a self description must match itself | |||
453 | <h2><a name="license">License</a></h2> | 452 | <h2><a name="license">License</a></h2> |
454 | 453 | ||
455 | <p> | 454 | <p> |
456 | Copyright © 2008-2015 Lua.org, PUC-Rio. | 455 | Copyright © 2008-2023 Lua.org, PUC-Rio. |
457 | </p> | 456 | </p> |
458 | <p> | 457 | <p> |
459 | Permission is hereby granted, free of charge, | 458 | Permission is hereby granted, free of charge, |
@@ -1,4 +1,3 @@ | |||
1 | -- $Id: re.lua $ | ||
2 | 1 | ||
3 | -- imported functions and modules | 2 | -- imported functions and modules |
4 | local tonumber, type, print, error = tonumber, type, print, error | 3 | local tonumber, type, print, error = tonumber, type, print, error |