aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRoberto Ierusalimschy <roberto@inf.puc-rio.br>2022-08-23 16:08:53 -0300
committerRoberto Ierusalimschy <roberto@inf.puc-rio.br>2022-08-23 16:08:53 -0300
commit02060b7a37d88d4e92cf64a008c0651eae432c12 (patch)
tree6640e82d6dc7fe3f8e1601b490ad33babac6d95e
parenta1f77a234a053da46b06d5d4be00ffb30d3eb45b (diff)
downloadlua-02060b7a37d88d4e92cf64a008c0651eae432c12.tar.gz
lua-02060b7a37d88d4e92cf64a008c0651eae432c12.tar.bz2
lua-02060b7a37d88d4e92cf64a008c0651eae432c12.zip
Simpler handling of Byte Order Mark (BOM)
-rw-r--r--lauxlib.c47
-rw-r--r--testes/main.lua35
2 files changed, 56 insertions, 26 deletions
diff --git a/lauxlib.c b/lauxlib.c
index 413d8f97..cba5df9b 100644
--- a/lauxlib.c
+++ b/lauxlib.c
@@ -740,17 +740,18 @@ static int errfile (lua_State *L, const char *what, int fnameindex) {
740} 740}
741 741
742 742
743static int skipBOM (LoadF *lf) { 743/*
744 const char *p = "\xEF\xBB\xBF"; /* UTF-8 BOM mark */ 744** Skip an optional BOM at the start of a stream. If there is an
745 int c; 745** incomplete BOM (the first character is correct but the rest is
746 lf->n = 0; 746** not), returns the first character anyway to force an error
747 do { 747** (as no chunk can start with 0xEF).
748 c = getc(lf->f); 748*/
749 if (c == EOF || c != *(const unsigned char *)p++) return c; 749static int skipBOM (FILE *f) {
750 lf->buff[lf->n++] = c; /* to be read by the parser */ 750 int c = getc(f); /* read first character */
751 } while (*p != '\0'); 751 if (c == 0xEF && getc(f) == 0xBB && getc(f) == 0xBF) /* correct BOM? */
752 lf->n = 0; /* prefix matched; discard it */ 752 return getc(f); /* ignore BOM and return next char */
753 return getc(lf->f); /* return next character */ 753 else /* no (valid) BOM */
754 return c; /* return first character */
754} 755}
755 756
756 757
@@ -761,13 +762,13 @@ static int skipBOM (LoadF *lf) {
761** first "valid" character of the file (after the optional BOM and 762** first "valid" character of the file (after the optional BOM and
762** a first-line comment). 763** a first-line comment).
763*/ 764*/
764static int skipcomment (LoadF *lf, int *cp) { 765static int skipcomment (FILE *f, int *cp) {
765 int c = *cp = skipBOM(lf); 766 int c = *cp = skipBOM(f);
766 if (c == '#') { /* first line is a comment (Unix exec. file)? */ 767 if (c == '#') { /* first line is a comment (Unix exec. file)? */
767 do { /* skip first line */ 768 do { /* skip first line */
768 c = getc(lf->f); 769 c = getc(f);
769 } while (c != EOF && c != '\n'); 770 } while (c != EOF && c != '\n');
770 *cp = getc(lf->f); /* skip end-of-line, if present */ 771 *cp = getc(f); /* next character after comment, if present */
771 return 1; /* there was a comment */ 772 return 1; /* there was a comment */
772 } 773 }
773 else return 0; /* no comment */ 774 else return 0; /* no comment */
@@ -789,12 +790,16 @@ LUALIB_API int luaL_loadfilex (lua_State *L, const char *filename,
789 lf.f = fopen(filename, "r"); 790 lf.f = fopen(filename, "r");
790 if (lf.f == NULL) return errfile(L, "open", fnameindex); 791 if (lf.f == NULL) return errfile(L, "open", fnameindex);
791 } 792 }
792 if (skipcomment(&lf, &c)) /* read initial portion */ 793 lf.n = 0;
793 lf.buff[lf.n++] = '\n'; /* add line to correct line numbers */ 794 if (skipcomment(lf.f, &c)) /* read initial portion */
794 if (c == LUA_SIGNATURE[0] && filename) { /* binary file? */ 795 lf.buff[lf.n++] = '\n'; /* add newline to correct line numbers */
795 lf.f = freopen(filename, "rb", lf.f); /* reopen in binary mode */ 796 if (c == LUA_SIGNATURE[0]) { /* binary file? */
796 if (lf.f == NULL) return errfile(L, "reopen", fnameindex); 797 lf.n = 0; /* remove possible newline */
797 skipcomment(&lf, &c); /* re-read initial portion */ 798 if (filename) { /* "real" file? */
799 lf.f = freopen(filename, "rb", lf.f); /* reopen in binary mode */
800 if (lf.f == NULL) return errfile(L, "reopen", fnameindex);
801 skipcomment(lf.f, &c); /* re-read initial portion */
802 }
798 } 803 }
799 if (c != EOF) 804 if (c != EOF)
800 lf.buff[lf.n++] = c; /* 'c' is the first character of the stream */ 805 lf.buff[lf.n++] = c; /* 'c' is the first character of the stream */
diff --git a/testes/main.lua b/testes/main.lua
index 9def6386..9187420e 100644
--- a/testes/main.lua
+++ b/testes/main.lua
@@ -94,6 +94,33 @@ RUN('echo "print(10)\nprint(2)\n" | lua > %s', out)
94checkout("10\n2\n") 94checkout("10\n2\n")
95 95
96 96
97-- testing BOM
98prepfile("\xEF\xBB\xBF")
99RUN('lua %s > %s', prog, out)
100checkout("")
101
102prepfile("\xEF\xBB\xBFprint(3)")
103RUN('lua %s > %s', prog, out)
104checkout("3\n")
105
106prepfile("\xEF\xBB\xBF# comment!!\nprint(3)")
107RUN('lua %s > %s', prog, out)
108checkout("3\n")
109
110-- bad BOMs
111prepfile("\xEF")
112NoRun("unexpected symbol", 'lua %s > %s', prog, out)
113
114prepfile("\xEF\xBB")
115NoRun("unexpected symbol", 'lua %s > %s', prog, out)
116
117prepfile("\xEFprint(3)")
118NoRun("unexpected symbol", 'lua %s > %s', prog, out)
119
120prepfile("\xEF\xBBprint(3)")
121NoRun("unexpected symbol", 'lua %s > %s', prog, out)
122
123
97-- test option '-' 124-- test option '-'
98RUN('echo "print(arg[1])" | lua - -h > %s', out) 125RUN('echo "print(arg[1])" | lua - -h > %s', out)
99checkout("-h\n") 126checkout("-h\n")
@@ -385,12 +412,10 @@ checkprogout("101\n13\t22\n\n")
385prepfile[[#comment in 1st line without \n at the end]] 412prepfile[[#comment in 1st line without \n at the end]]
386RUN('lua %s', prog) 413RUN('lua %s', prog)
387 414
388prepfile[[#test line number when file starts with comment line 415-- first-line comment with binary file
389debug = require"debug" 416prepfile("#comment\n" .. string.dump(load("print(3)")))
390print(debug.getinfo(1).currentline)
391]]
392RUN('lua %s > %s', prog, out) 417RUN('lua %s > %s', prog, out)
393checkprogout('3\n') 418checkout('3\n')
394 419
395-- close Lua with an open file 420-- close Lua with an open file
396prepfile(string.format([[io.output(%q); io.write('alo')]], out)) 421prepfile(string.format([[io.output(%q); io.write('alo')]], out))