diff options
| author | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2022-08-23 16:08:53 -0300 |
|---|---|---|
| committer | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2022-08-23 16:08:53 -0300 |
| commit | 02060b7a37d88d4e92cf64a008c0651eae432c12 (patch) | |
| tree | 6640e82d6dc7fe3f8e1601b490ad33babac6d95e | |
| parent | a1f77a234a053da46b06d5d4be00ffb30d3eb45b (diff) | |
| download | lua-02060b7a37d88d4e92cf64a008c0651eae432c12.tar.gz lua-02060b7a37d88d4e92cf64a008c0651eae432c12.tar.bz2 lua-02060b7a37d88d4e92cf64a008c0651eae432c12.zip | |
Simpler handling of Byte Order Mark (BOM)
| -rw-r--r-- | lauxlib.c | 47 | ||||
| -rw-r--r-- | testes/main.lua | 35 |
2 files changed, 56 insertions, 26 deletions
| @@ -740,17 +740,18 @@ static int errfile (lua_State *L, const char *what, int fnameindex) { | |||
| 740 | } | 740 | } |
| 741 | 741 | ||
| 742 | 742 | ||
| 743 | static int skipBOM (LoadF *lf) { | 743 | /* |
| 744 | const char *p = "\xEF\xBB\xBF"; /* UTF-8 BOM mark */ | 744 | ** Skip an optional BOM at the start of a stream. If there is an |
| 745 | int c; | 745 | ** incomplete BOM (the first character is correct but the rest is |
| 746 | lf->n = 0; | 746 | ** not), returns the first character anyway to force an error |
| 747 | do { | 747 | ** (as no chunk can start with 0xEF). |
| 748 | c = getc(lf->f); | 748 | */ |
| 749 | if (c == EOF || c != *(const unsigned char *)p++) return c; | 749 | static int skipBOM (FILE *f) { |
| 750 | lf->buff[lf->n++] = c; /* to be read by the parser */ | 750 | int c = getc(f); /* read first character */ |
| 751 | } while (*p != '\0'); | 751 | if (c == 0xEF && getc(f) == 0xBB && getc(f) == 0xBF) /* correct BOM? */ |
| 752 | lf->n = 0; /* prefix matched; discard it */ | 752 | return getc(f); /* ignore BOM and return next char */ |
| 753 | return getc(lf->f); /* return next character */ | 753 | else /* no (valid) BOM */ |
| 754 | return c; /* return first character */ | ||
| 754 | } | 755 | } |
| 755 | 756 | ||
| 756 | 757 | ||
| @@ -761,13 +762,13 @@ static int skipBOM (LoadF *lf) { | |||
| 761 | ** first "valid" character of the file (after the optional BOM and | 762 | ** first "valid" character of the file (after the optional BOM and |
| 762 | ** a first-line comment). | 763 | ** a first-line comment). |
| 763 | */ | 764 | */ |
| 764 | static int skipcomment (LoadF *lf, int *cp) { | 765 | static int skipcomment (FILE *f, int *cp) { |
| 765 | int c = *cp = skipBOM(lf); | 766 | int c = *cp = skipBOM(f); |
| 766 | if (c == '#') { /* first line is a comment (Unix exec. file)? */ | 767 | if (c == '#') { /* first line is a comment (Unix exec. file)? */ |
| 767 | do { /* skip first line */ | 768 | do { /* skip first line */ |
| 768 | c = getc(lf->f); | 769 | c = getc(f); |
| 769 | } while (c != EOF && c != '\n'); | 770 | } while (c != EOF && c != '\n'); |
| 770 | *cp = getc(lf->f); /* skip end-of-line, if present */ | 771 | *cp = getc(f); /* next character after comment, if present */ |
| 771 | return 1; /* there was a comment */ | 772 | return 1; /* there was a comment */ |
| 772 | } | 773 | } |
| 773 | else return 0; /* no comment */ | 774 | else return 0; /* no comment */ |
| @@ -789,12 +790,16 @@ LUALIB_API int luaL_loadfilex (lua_State *L, const char *filename, | |||
| 789 | lf.f = fopen(filename, "r"); | 790 | lf.f = fopen(filename, "r"); |
| 790 | if (lf.f == NULL) return errfile(L, "open", fnameindex); | 791 | if (lf.f == NULL) return errfile(L, "open", fnameindex); |
| 791 | } | 792 | } |
| 792 | if (skipcomment(&lf, &c)) /* read initial portion */ | 793 | lf.n = 0; |
| 793 | lf.buff[lf.n++] = '\n'; /* add line to correct line numbers */ | 794 | if (skipcomment(lf.f, &c)) /* read initial portion */ |
| 794 | if (c == LUA_SIGNATURE[0] && filename) { /* binary file? */ | 795 | lf.buff[lf.n++] = '\n'; /* add newline to correct line numbers */ |
| 795 | lf.f = freopen(filename, "rb", lf.f); /* reopen in binary mode */ | 796 | if (c == LUA_SIGNATURE[0]) { /* binary file? */ |
| 796 | if (lf.f == NULL) return errfile(L, "reopen", fnameindex); | 797 | lf.n = 0; /* remove possible newline */ |
| 797 | skipcomment(&lf, &c); /* re-read initial portion */ | 798 | if (filename) { /* "real" file? */ |
| 799 | lf.f = freopen(filename, "rb", lf.f); /* reopen in binary mode */ | ||
| 800 | if (lf.f == NULL) return errfile(L, "reopen", fnameindex); | ||
| 801 | skipcomment(lf.f, &c); /* re-read initial portion */ | ||
| 802 | } | ||
| 798 | } | 803 | } |
| 799 | if (c != EOF) | 804 | if (c != EOF) |
| 800 | lf.buff[lf.n++] = c; /* 'c' is the first character of the stream */ | 805 | lf.buff[lf.n++] = c; /* 'c' is the first character of the stream */ |
diff --git a/testes/main.lua b/testes/main.lua index 9def6386..9187420e 100644 --- a/testes/main.lua +++ b/testes/main.lua | |||
| @@ -94,6 +94,33 @@ RUN('echo "print(10)\nprint(2)\n" | lua > %s', out) | |||
| 94 | checkout("10\n2\n") | 94 | checkout("10\n2\n") |
| 95 | 95 | ||
| 96 | 96 | ||
| 97 | -- testing BOM | ||
| 98 | prepfile("\xEF\xBB\xBF") | ||
| 99 | RUN('lua %s > %s', prog, out) | ||
| 100 | checkout("") | ||
| 101 | |||
| 102 | prepfile("\xEF\xBB\xBFprint(3)") | ||
| 103 | RUN('lua %s > %s', prog, out) | ||
| 104 | checkout("3\n") | ||
| 105 | |||
| 106 | prepfile("\xEF\xBB\xBF# comment!!\nprint(3)") | ||
| 107 | RUN('lua %s > %s', prog, out) | ||
| 108 | checkout("3\n") | ||
| 109 | |||
| 110 | -- bad BOMs | ||
| 111 | prepfile("\xEF") | ||
| 112 | NoRun("unexpected symbol", 'lua %s > %s', prog, out) | ||
| 113 | |||
| 114 | prepfile("\xEF\xBB") | ||
| 115 | NoRun("unexpected symbol", 'lua %s > %s', prog, out) | ||
| 116 | |||
| 117 | prepfile("\xEFprint(3)") | ||
| 118 | NoRun("unexpected symbol", 'lua %s > %s', prog, out) | ||
| 119 | |||
| 120 | prepfile("\xEF\xBBprint(3)") | ||
| 121 | NoRun("unexpected symbol", 'lua %s > %s', prog, out) | ||
| 122 | |||
| 123 | |||
| 97 | -- test option '-' | 124 | -- test option '-' |
| 98 | RUN('echo "print(arg[1])" | lua - -h > %s', out) | 125 | RUN('echo "print(arg[1])" | lua - -h > %s', out) |
| 99 | checkout("-h\n") | 126 | checkout("-h\n") |
| @@ -385,12 +412,10 @@ checkprogout("101\n13\t22\n\n") | |||
| 385 | prepfile[[#comment in 1st line without \n at the end]] | 412 | prepfile[[#comment in 1st line without \n at the end]] |
| 386 | RUN('lua %s', prog) | 413 | RUN('lua %s', prog) |
| 387 | 414 | ||
| 388 | prepfile[[#test line number when file starts with comment line | 415 | -- first-line comment with binary file |
| 389 | debug = require"debug" | 416 | prepfile("#comment\n" .. string.dump(load("print(3)"))) |
| 390 | print(debug.getinfo(1).currentline) | ||
| 391 | ]] | ||
| 392 | RUN('lua %s > %s', prog, out) | 417 | RUN('lua %s > %s', prog, out) |
| 393 | checkprogout('3\n') | 418 | checkout('3\n') |
| 394 | 419 | ||
| 395 | -- close Lua with an open file | 420 | -- close Lua with an open file |
| 396 | prepfile(string.format([[io.output(%q); io.write('alo')]], out)) | 421 | prepfile(string.format([[io.output(%q); io.write('alo')]], out)) |
