1 files changed, 3 insertions, 89 deletions
diff --git a/lpeg.html b/lpeg.html
index f4d8658..f50d327 100644
--- a/lpeg.html
+++ b/lpeg.html
@@ -10,7 +10,6 @@
 </head>
 <body>
-<!-- $Id: lpeg.html $ -->
 <div id="container">
        
@@ -664,10 +663,10 @@ LPeg does not specify when (and if) it evaluates its captures.
 consider the pattern <code>lpeg.P"a" / func / 0</code>.
 Because the "division" by 0 instructs LPeg to throw away the
 results from the pattern,
-LPeg may or may not call <code>func</code>.)
+it is not specified whether LPeg will call <code>func</code>.)
 Therefore, captures should avoid side effects.
 Moreover,
-most captures cannot affect the way a pattern matches a subject.
+captures cannot affect the way a pattern matches a subject.
 The only exception to this rule is the
 so-called <a href="#matchtime"><em>match-time capture</em></a>.
 When a match-time capture matches,
@@ -1175,91 +1174,6 @@ local record = lpeg.Ct(field * (',' * field)^0) * (lpeg.P'\n' + -1)
 </pre>
-<h3>UTF-8 and Latin 1</h3>
-<p>
-It is not difficult to use LPeg to convert a string from
-UTF-8 encoding to Latin 1 (ISO 8859-1):
-</p>
-<pre class="example">
-- convert a two-byte UTF-8 sequence to a Latin 1 character
-local function f2 (s)
-  local c1, c2 = string.byte(s, 1, 2)
-  return string.char(c1 * 64 + c2 - 12416)
-end
-local utf8 = lpeg.R("\0\127")
-           + lpeg.R("\194\195") * lpeg.R("\128\191") / f2
-local decode_pattern = lpeg.Cs(utf8^0) * -1
-</pre>
-<p>
-In this code,
-the definition of UTF-8 is already restricted to the
-Latin 1 range (from 0 to 255).
-Any encoding outside this range (as well as any invalid encoding)
-will not match that pattern.
-</p>
-<p>
-As the definition of <code>decode_pattern</code> demands that
-the pattern matches the whole input (because of the -1 at its end),
-any invalid string will simply fail to match,
-without any useful information about the problem.
-We can improve this situation redefining <code>decode_pattern</code>
-as follows:
-</p>
-<pre class="example">
-local function er (_, i) error("invalid encoding at position " .. i) end
-local decode_pattern = lpeg.Cs(utf8^0) * (-1 + lpeg.P(er))
-</pre>
-<p>
-Now, if the pattern <code>utf8^0</code> stops
-before the end of the string,
-an appropriate error function is called.
-</p>
-<h3>UTF-8 and Unicode</h3>
-<p>
-We can extend the previous patterns to handle all Unicode code points.
-Of course,
-we cannot translate them to Latin 1 or any other one-byte encoding.
-Instead, our translation results in a array with the code points
-represented as numbers.
-The full code is here:
-</p>
-<pre class="example">
-- decode a two-byte UTF-8 sequence
-local function f2 (s)
-  local c1, c2 = string.byte(s, 1, 2)
-  return c1 * 64 + c2 - 12416
-end
-- decode a three-byte UTF-8 sequence
-local function f3 (s)
-  local c1, c2, c3 = string.byte(s, 1, 3)
-  return (c1 * 64 + c2) * 64 + c3 - 925824
-end
-- decode a four-byte UTF-8 sequence
-local function f4 (s)
-  local c1, c2, c3, c4 = string.byte(s, 1, 4)
-  return ((c1 * 64 + c2) * 64 + c3) * 64 + c4 - 63447168
-end
-local cont = lpeg.R("\128\191")   -- continuation byte
-local utf8 = lpeg.R("\0\127") / string.byte
-           + lpeg.R("\194\223") * cont / f2
-           + lpeg.R("\224\239") * cont * cont / f3
-           + lpeg.R("\240\244") * cont * cont * cont / f4
-local decode_pattern = lpeg.Ct(utf8^0) * -1
-</pre>
 <h3>Lua's long strings</h3>
 <p>
 A long string in Lua starts with the pattern <code>[=*[</code>
@@ -1416,7 +1330,7 @@ the following command is all you need to install LPeg:
 <h2><a name="license">License</a></h2>
 <p>
-Copyright &copy; 2007-2019 Lua.org, PUC-Rio.
+Copyright &copy; 2007-2023 Lua.org, PUC-Rio.
 </p>
 <p>
 Permission is hereby granted, free of charge,

diff --git a/lpeg.html b/lpeg.html index f4d8658..f50d327 100644 --- a/lpeg.html +++ b/lpeg.html
@@ -10,7 +10,6 @@
10	</head>	10	</head>
11	<body>	11	<body>
12		12
13	<!-- $Id: lpeg.html $ -->
14		13
15	<div id="container">	14	<div id="container">
16		15
@@ -664,10 +663,10 @@ LPeg does not specify when (and if) it evaluates its captures.
664	consider the pattern <code>lpeg.P"a" / func / 0</code>.	663	consider the pattern <code>lpeg.P"a" / func / 0</code>.
665	Because the "division" by 0 instructs LPeg to throw away the	664	Because the "division" by 0 instructs LPeg to throw away the
666	results from the pattern,	665	results from the pattern,
667	LPeg may or may not call <code>func</code>.)	666	it is not specified whether LPeg will call <code>func</code>.)
668	Therefore, captures should avoid side effects.	667	Therefore, captures should avoid side effects.
669	Moreover,	668	Moreover,
670	most captures cannot affect the way a pattern matches a subject.	669	captures cannot affect the way a pattern matches a subject.
671	The only exception to this rule is the	670	The only exception to this rule is the
672	so-called <a href="#matchtime"><em>match-time capture</em></a>.	671	so-called <a href="#matchtime"><em>match-time capture</em></a>.
673	When a match-time capture matches,	672	When a match-time capture matches,
@@ -1175,91 +1174,6 @@ local record = lpeg.Ct(field * (',' * field)^0) * (lpeg.P'\n' + -1)
1175	</pre>	1174	</pre>
1176		1175
1177		1176
1178	<h3>UTF-8 and Latin 1</h3>
1179	<p>
1180	It is not difficult to use LPeg to convert a string from
1181	UTF-8 encoding to Latin 1 (ISO 8859-1):
1182	</p>
1183
1184	<pre class="example">
1185	-- convert a two-byte UTF-8 sequence to a Latin 1 character
1186	local function f2 (s)
1187	local c1, c2 = string.byte(s, 1, 2)
1188	return string.char(c1 * 64 + c2 - 12416)
1189	end
1190
1191	local utf8 = lpeg.R("\0\127")
1192	+ lpeg.R("\194\195") * lpeg.R("\128\191") / f2
1193
1194	local decode_pattern = lpeg.Cs(utf8^0) * -1
1195	</pre>
1196	<p>
1197	In this code,
1198	the definition of UTF-8 is already restricted to the
1199	Latin 1 range (from 0 to 255).
1200	Any encoding outside this range (as well as any invalid encoding)
1201	will not match that pattern.
1202	</p>
1203
1204	<p>
1205	As the definition of <code>decode_pattern</code> demands that
1206	the pattern matches the whole input (because of the -1 at its end),
1207	any invalid string will simply fail to match,
1208	without any useful information about the problem.
1209	We can improve this situation redefining <code>decode_pattern</code>
1210	as follows:
1211	</p>
1212	<pre class="example">
1213	local function er (_, i) error("invalid encoding at position " .. i) end
1214
1215	local decode_pattern = lpeg.Cs(utf8^0) * (-1 + lpeg.P(er))
1216	</pre>
1217	<p>
1218	Now, if the pattern <code>utf8^0</code> stops
1219	before the end of the string,
1220	an appropriate error function is called.
1221	</p>
1222
1223
1224	<h3>UTF-8 and Unicode</h3>
1225	<p>
1226	We can extend the previous patterns to handle all Unicode code points.
1227	Of course,
1228	we cannot translate them to Latin 1 or any other one-byte encoding.
1229	Instead, our translation results in a array with the code points
1230	represented as numbers.
1231	The full code is here:
1232	</p>
1233	<pre class="example">
1234	-- decode a two-byte UTF-8 sequence
1235	local function f2 (s)
1236	local c1, c2 = string.byte(s, 1, 2)
1237	return c1 * 64 + c2 - 12416
1238	end
1239
1240	-- decode a three-byte UTF-8 sequence
1241	local function f3 (s)
1242	local c1, c2, c3 = string.byte(s, 1, 3)
1243	return (c1 * 64 + c2) * 64 + c3 - 925824
1244	end
1245
1246	-- decode a four-byte UTF-8 sequence
1247	local function f4 (s)
1248	local c1, c2, c3, c4 = string.byte(s, 1, 4)
1249	return ((c1 * 64 + c2) * 64 + c3) * 64 + c4 - 63447168
1250	end
1251
1252	local cont = lpeg.R("\128\191") -- continuation byte
1253
1254	local utf8 = lpeg.R("\0\127") / string.byte
1255	+ lpeg.R("\194\223") * cont / f2
1256	+ lpeg.R("\224\239") * cont * cont / f3
1257	+ lpeg.R("\240\244") * cont * cont * cont / f4
1258
1259	local decode_pattern = lpeg.Ct(utf8^0) * -1
1260	</pre>
1261
1262
1263	<h3>Lua's long strings</h3>	1177	<h3>Lua's long strings</h3>
1264	<p>	1178	<p>
1265	A long string in Lua starts with the pattern <code>[=*[</code>	1179	A long string in Lua starts with the pattern <code>[=*[</code>
@@ -1416,7 +1330,7 @@ the following command is all you need to install LPeg:
1416	<h2><a name="license">License</a></h2>	1330	<h2><a name="license">License</a></h2>
1417		1331
1418	<p>	1332	<p>
1419	Copyright © 2007-2019 Lua.org, PUC-Rio.	1333	Copyright © 2007-2023 Lua.org, PUC-Rio.
1420	</p>	1334	</p>
1421	<p>	1335	<p>
1422	Permission is hereby granted, free of charge,	1336	Permission is hereby granted, free of charge,