diff options
author | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2022-08-23 16:08:53 -0300 |
---|---|---|
committer | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2022-08-23 16:08:53 -0300 |
commit | 02060b7a37d88d4e92cf64a008c0651eae432c12 (patch) | |
tree | 6640e82d6dc7fe3f8e1601b490ad33babac6d95e | |
parent | a1f77a234a053da46b06d5d4be00ffb30d3eb45b (diff) | |
download | lua-02060b7a37d88d4e92cf64a008c0651eae432c12.tar.gz lua-02060b7a37d88d4e92cf64a008c0651eae432c12.tar.bz2 lua-02060b7a37d88d4e92cf64a008c0651eae432c12.zip |
Simpler handling of Byte Order Mark (BOM)
-rw-r--r-- | lauxlib.c | 47 | ||||
-rw-r--r-- | testes/main.lua | 35 |
2 files changed, 56 insertions, 26 deletions
@@ -740,17 +740,18 @@ static int errfile (lua_State *L, const char *what, int fnameindex) { | |||
740 | } | 740 | } |
741 | 741 | ||
742 | 742 | ||
743 | static int skipBOM (LoadF *lf) { | 743 | /* |
744 | const char *p = "\xEF\xBB\xBF"; /* UTF-8 BOM mark */ | 744 | ** Skip an optional BOM at the start of a stream. If there is an |
745 | int c; | 745 | ** incomplete BOM (the first character is correct but the rest is |
746 | lf->n = 0; | 746 | ** not), returns the first character anyway to force an error |
747 | do { | 747 | ** (as no chunk can start with 0xEF). |
748 | c = getc(lf->f); | 748 | */ |
749 | if (c == EOF || c != *(const unsigned char *)p++) return c; | 749 | static int skipBOM (FILE *f) { |
750 | lf->buff[lf->n++] = c; /* to be read by the parser */ | 750 | int c = getc(f); /* read first character */ |
751 | } while (*p != '\0'); | 751 | if (c == 0xEF && getc(f) == 0xBB && getc(f) == 0xBF) /* correct BOM? */ |
752 | lf->n = 0; /* prefix matched; discard it */ | 752 | return getc(f); /* ignore BOM and return next char */ |
753 | return getc(lf->f); /* return next character */ | 753 | else /* no (valid) BOM */ |
754 | return c; /* return first character */ | ||
754 | } | 755 | } |
755 | 756 | ||
756 | 757 | ||
@@ -761,13 +762,13 @@ static int skipBOM (LoadF *lf) { | |||
761 | ** first "valid" character of the file (after the optional BOM and | 762 | ** first "valid" character of the file (after the optional BOM and |
762 | ** a first-line comment). | 763 | ** a first-line comment). |
763 | */ | 764 | */ |
764 | static int skipcomment (LoadF *lf, int *cp) { | 765 | static int skipcomment (FILE *f, int *cp) { |
765 | int c = *cp = skipBOM(lf); | 766 | int c = *cp = skipBOM(f); |
766 | if (c == '#') { /* first line is a comment (Unix exec. file)? */ | 767 | if (c == '#') { /* first line is a comment (Unix exec. file)? */ |
767 | do { /* skip first line */ | 768 | do { /* skip first line */ |
768 | c = getc(lf->f); | 769 | c = getc(f); |
769 | } while (c != EOF && c != '\n'); | 770 | } while (c != EOF && c != '\n'); |
770 | *cp = getc(lf->f); /* skip end-of-line, if present */ | 771 | *cp = getc(f); /* next character after comment, if present */ |
771 | return 1; /* there was a comment */ | 772 | return 1; /* there was a comment */ |
772 | } | 773 | } |
773 | else return 0; /* no comment */ | 774 | else return 0; /* no comment */ |
@@ -789,12 +790,16 @@ LUALIB_API int luaL_loadfilex (lua_State *L, const char *filename, | |||
789 | lf.f = fopen(filename, "r"); | 790 | lf.f = fopen(filename, "r"); |
790 | if (lf.f == NULL) return errfile(L, "open", fnameindex); | 791 | if (lf.f == NULL) return errfile(L, "open", fnameindex); |
791 | } | 792 | } |
792 | if (skipcomment(&lf, &c)) /* read initial portion */ | 793 | lf.n = 0; |
793 | lf.buff[lf.n++] = '\n'; /* add line to correct line numbers */ | 794 | if (skipcomment(lf.f, &c)) /* read initial portion */ |
794 | if (c == LUA_SIGNATURE[0] && filename) { /* binary file? */ | 795 | lf.buff[lf.n++] = '\n'; /* add newline to correct line numbers */ |
795 | lf.f = freopen(filename, "rb", lf.f); /* reopen in binary mode */ | 796 | if (c == LUA_SIGNATURE[0]) { /* binary file? */ |
796 | if (lf.f == NULL) return errfile(L, "reopen", fnameindex); | 797 | lf.n = 0; /* remove possible newline */ |
797 | skipcomment(&lf, &c); /* re-read initial portion */ | 798 | if (filename) { /* "real" file? */ |
799 | lf.f = freopen(filename, "rb", lf.f); /* reopen in binary mode */ | ||
800 | if (lf.f == NULL) return errfile(L, "reopen", fnameindex); | ||
801 | skipcomment(lf.f, &c); /* re-read initial portion */ | ||
802 | } | ||
798 | } | 803 | } |
799 | if (c != EOF) | 804 | if (c != EOF) |
800 | lf.buff[lf.n++] = c; /* 'c' is the first character of the stream */ | 805 | lf.buff[lf.n++] = c; /* 'c' is the first character of the stream */ |
diff --git a/testes/main.lua b/testes/main.lua index 9def6386..9187420e 100644 --- a/testes/main.lua +++ b/testes/main.lua | |||
@@ -94,6 +94,33 @@ RUN('echo "print(10)\nprint(2)\n" | lua > %s', out) | |||
94 | checkout("10\n2\n") | 94 | checkout("10\n2\n") |
95 | 95 | ||
96 | 96 | ||
97 | -- testing BOM | ||
98 | prepfile("\xEF\xBB\xBF") | ||
99 | RUN('lua %s > %s', prog, out) | ||
100 | checkout("") | ||
101 | |||
102 | prepfile("\xEF\xBB\xBFprint(3)") | ||
103 | RUN('lua %s > %s', prog, out) | ||
104 | checkout("3\n") | ||
105 | |||
106 | prepfile("\xEF\xBB\xBF# comment!!\nprint(3)") | ||
107 | RUN('lua %s > %s', prog, out) | ||
108 | checkout("3\n") | ||
109 | |||
110 | -- bad BOMs | ||
111 | prepfile("\xEF") | ||
112 | NoRun("unexpected symbol", 'lua %s > %s', prog, out) | ||
113 | |||
114 | prepfile("\xEF\xBB") | ||
115 | NoRun("unexpected symbol", 'lua %s > %s', prog, out) | ||
116 | |||
117 | prepfile("\xEFprint(3)") | ||
118 | NoRun("unexpected symbol", 'lua %s > %s', prog, out) | ||
119 | |||
120 | prepfile("\xEF\xBBprint(3)") | ||
121 | NoRun("unexpected symbol", 'lua %s > %s', prog, out) | ||
122 | |||
123 | |||
97 | -- test option '-' | 124 | -- test option '-' |
98 | RUN('echo "print(arg[1])" | lua - -h > %s', out) | 125 | RUN('echo "print(arg[1])" | lua - -h > %s', out) |
99 | checkout("-h\n") | 126 | checkout("-h\n") |
@@ -385,12 +412,10 @@ checkprogout("101\n13\t22\n\n") | |||
385 | prepfile[[#comment in 1st line without \n at the end]] | 412 | prepfile[[#comment in 1st line without \n at the end]] |
386 | RUN('lua %s', prog) | 413 | RUN('lua %s', prog) |
387 | 414 | ||
388 | prepfile[[#test line number when file starts with comment line | 415 | -- first-line comment with binary file |
389 | debug = require"debug" | 416 | prepfile("#comment\n" .. string.dump(load("print(3)"))) |
390 | print(debug.getinfo(1).currentline) | ||
391 | ]] | ||
392 | RUN('lua %s > %s', prog, out) | 417 | RUN('lua %s > %s', prog, out) |
393 | checkprogout('3\n') | 418 | checkout('3\n') |
394 | 419 | ||
395 | -- close Lua with an open file | 420 | -- close Lua with an open file |
396 | prepfile(string.format([[io.output(%q); io.write('alo')]], out)) | 421 | prepfile(string.format([[io.output(%q); io.write('alo')]], out)) |