aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRoberto Ierusalimschy <roberto@inf.puc-rio.br>2021-09-03 13:14:56 -0300
committerRoberto Ierusalimschy <roberto@inf.puc-rio.br>2021-09-03 13:14:56 -0300
commit9db4bfed6bb9d5828c99c0f24749eedf54d70cc2 (patch)
tree5a7bae2573f3e08813680a2506840f2356d18cd8
parent91673a8ec0ae55e188a790bd2dfdc99246adf20e (diff)
downloadlua-9db4bfed6bb9d5828c99c0f24749eedf54d70cc2.tar.gz
lua-9db4bfed6bb9d5828c99c0f24749eedf54d70cc2.tar.bz2
lua-9db4bfed6bb9d5828c99c0f24749eedf54d70cc2.zip
Revamp of format validation in 'string.format'
When calling 'sprintf', not all conversion specifiers accept all flags; some combinations are undefined behavior.
-rw-r--r--lstrlib.c112
-rw-r--r--manual/manual.of6
-rw-r--r--testes/strings.lua36
3 files changed, 118 insertions, 36 deletions
diff --git a/lstrlib.c b/lstrlib.c
index 74501f78..e3b8df0f 100644
--- a/lstrlib.c
+++ b/lstrlib.c
@@ -1090,13 +1090,31 @@ static int lua_number2strx (lua_State *L, char *buff, int sz,
1090 1090
1091 1091
1092/* valid flags in a format specification */ 1092/* valid flags in a format specification */
1093#if !defined(L_FMTFLAGS) 1093#if !defined(L_FMTFLAGSF)
1094#define L_FMTFLAGS "-+ #0" 1094
1095/* valid flags for a, A, e, E, f, F, g, and G conversions */
1096#define L_FMTFLAGSF "-+#0 "
1097
1098/* valid flags for o, x, and X conversions */
1099#define L_FMTFLAGSX "-#0"
1100
1101/* valid flags for d and i conversions */
1102#define L_FMTFLAGSI "-+0 "
1103
1104/* valid flags for u conversions */
1105#define L_FMTFLAGSU "-0"
1106
1107/* valid flags for c, p, and s conversions */
1108#define L_FMTFLAGSC "-"
1109
1095#endif 1110#endif
1096 1111
1097 1112
1098/* 1113/*
1099** maximum size of each format specification (such as "%-099.99d") 1114** Maximum size of each format specification (such as "%-099.99d"):
1115** Initial '%', flags (up to 5), width (2), period, precision (2),
1116** length modifier (8), conversion specifier, and final '\0', plus some
1117** extra.
1100*/ 1118*/
1101#define MAX_FORMAT 32 1119#define MAX_FORMAT 32
1102 1120
@@ -1189,25 +1207,53 @@ static void addliteral (lua_State *L, luaL_Buffer *b, int arg) {
1189} 1207}
1190 1208
1191 1209
1192static const char *scanformat (lua_State *L, const char *strfrmt, char *form) { 1210static const char *get2digits (const char *s) {
1193 const char *p = strfrmt; 1211 if (isdigit(uchar(*s))) {
1194 while (*p != '\0' && strchr(L_FMTFLAGS, *p) != NULL) p++; /* skip flags */ 1212 s++;
1195 if ((size_t)(p - strfrmt) >= sizeof(L_FMTFLAGS)/sizeof(char)) 1213 if (isdigit(uchar(*s))) s++; /* (2 digits at most) */
1196 luaL_error(L, "invalid format (repeated flags)"); 1214 }
1197 if (isdigit(uchar(*p))) p++; /* skip width */ 1215 return s;
1198 if (isdigit(uchar(*p))) p++; /* (2 digits at most) */ 1216}
1199 if (*p == '.') { 1217
1200 p++; 1218
1201 if (isdigit(uchar(*p))) p++; /* skip precision */ 1219/*
1202 if (isdigit(uchar(*p))) p++; /* (2 digits at most) */ 1220** Chech whether a conversion specification is valid. When called,
1221** first character in 'form' must be '%' and last character must
1222** be a valid conversion specifier. 'flags' are the accepted flags;
1223** 'precision' signals whether to accept a precision.
1224*/
1225static void checkformat (lua_State *L, const char *form, const char *flags,
1226 int precision) {
1227 const char *spec = form + 1; /* skip '%' */
1228 spec += strspn(spec, flags); /* skip flags */
1229 if (*spec != '0') { /* a width cannot start with '0' */
1230 spec = get2digits(spec); /* skip width */
1231 if (*spec == '.' && precision) {
1232 spec++;
1233 spec = get2digits(spec); /* skip precision */
1234 }
1203 } 1235 }
1204 if (isdigit(uchar(*p))) 1236 if (!isalpha(uchar(*spec))) /* did not go to the end? */
1205 luaL_error(L, "invalid format (width or precision too long)"); 1237 luaL_error(L, "invalid conversion specification: '%s'", form);
1238}
1239
1240
1241/*
1242** Get a conversion specification and copy it to 'form'.
1243** Return the address of its last character.
1244*/
1245static const char *getformat (lua_State *L, const char *strfrmt,
1246 char *form) {
1247 /* spans flags, width, and precision ('0' is included as a flag) */
1248 size_t len = strspn(strfrmt, L_FMTFLAGSF "123456789.");
1249 len++; /* adds following character (should be the specifier) */
1250 /* still needs space for '%', '\0', plus a length modifier */
1251 if (len >= MAX_FORMAT - 10)
1252 luaL_error(L, "invalid format (too long)");
1206 *(form++) = '%'; 1253 *(form++) = '%';
1207 memcpy(form, strfrmt, ((p - strfrmt) + 1) * sizeof(char)); 1254 memcpy(form, strfrmt, len * sizeof(char));
1208 form += (p - strfrmt) + 1; 1255 *(form + len) = '\0';
1209 *form = '\0'; 1256 return strfrmt + len - 1;
1210 return p;
1211} 1257}
1212 1258
1213 1259
@@ -1230,6 +1276,7 @@ static int str_format (lua_State *L) {
1230 size_t sfl; 1276 size_t sfl;
1231 const char *strfrmt = luaL_checklstring(L, arg, &sfl); 1277 const char *strfrmt = luaL_checklstring(L, arg, &sfl);
1232 const char *strfrmt_end = strfrmt+sfl; 1278 const char *strfrmt_end = strfrmt+sfl;
1279 const char *flags;
1233 luaL_Buffer b; 1280 luaL_Buffer b;
1234 luaL_buffinit(L, &b); 1281 luaL_buffinit(L, &b);
1235 while (strfrmt < strfrmt_end) { 1282 while (strfrmt < strfrmt_end) {
@@ -1239,25 +1286,35 @@ static int str_format (lua_State *L) {
1239 luaL_addchar(&b, *strfrmt++); /* %% */ 1286 luaL_addchar(&b, *strfrmt++); /* %% */
1240 else { /* format item */ 1287 else { /* format item */
1241 char form[MAX_FORMAT]; /* to store the format ('%...') */ 1288 char form[MAX_FORMAT]; /* to store the format ('%...') */
1242 int maxitem = MAX_ITEM; 1289 int maxitem = MAX_ITEM; /* maximum length for the result */
1243 char *buff = luaL_prepbuffsize(&b, maxitem); /* to put formatted item */ 1290 char *buff = luaL_prepbuffsize(&b, maxitem); /* to put result */
1244 int nb = 0; /* number of bytes in added item */ 1291 int nb = 0; /* number of bytes in result */
1245 if (++arg > top) 1292 if (++arg > top)
1246 return luaL_argerror(L, arg, "no value"); 1293 return luaL_argerror(L, arg, "no value");
1247 strfrmt = scanformat(L, strfrmt, form); 1294 strfrmt = getformat(L, strfrmt, form);
1248 switch (*strfrmt++) { 1295 switch (*strfrmt++) {
1249 case 'c': { 1296 case 'c': {
1297 checkformat(L, form, L_FMTFLAGSC, 0);
1250 nb = l_sprintf(buff, maxitem, form, (int)luaL_checkinteger(L, arg)); 1298 nb = l_sprintf(buff, maxitem, form, (int)luaL_checkinteger(L, arg));
1251 break; 1299 break;
1252 } 1300 }
1253 case 'd': case 'i': 1301 case 'd': case 'i':
1254 case 'o': case 'u': case 'x': case 'X': { 1302 flags = L_FMTFLAGSI;
1303 goto intcase;
1304 case 'u':
1305 flags = L_FMTFLAGSU;
1306 goto intcase;
1307 case 'o': case 'x': case 'X':
1308 flags = L_FMTFLAGSX;
1309 intcase: {
1255 lua_Integer n = luaL_checkinteger(L, arg); 1310 lua_Integer n = luaL_checkinteger(L, arg);
1311 checkformat(L, form, flags, 1);
1256 addlenmod(form, LUA_INTEGER_FRMLEN); 1312 addlenmod(form, LUA_INTEGER_FRMLEN);
1257 nb = l_sprintf(buff, maxitem, form, (LUAI_UACINT)n); 1313 nb = l_sprintf(buff, maxitem, form, (LUAI_UACINT)n);
1258 break; 1314 break;
1259 } 1315 }
1260 case 'a': case 'A': 1316 case 'a': case 'A':
1317 checkformat(L, form, L_FMTFLAGSF, 1);
1261 addlenmod(form, LUA_NUMBER_FRMLEN); 1318 addlenmod(form, LUA_NUMBER_FRMLEN);
1262 nb = lua_number2strx(L, buff, maxitem, form, 1319 nb = lua_number2strx(L, buff, maxitem, form,
1263 luaL_checknumber(L, arg)); 1320 luaL_checknumber(L, arg));
@@ -1268,12 +1325,14 @@ static int str_format (lua_State *L) {
1268 /* FALLTHROUGH */ 1325 /* FALLTHROUGH */
1269 case 'e': case 'E': case 'g': case 'G': { 1326 case 'e': case 'E': case 'g': case 'G': {
1270 lua_Number n = luaL_checknumber(L, arg); 1327 lua_Number n = luaL_checknumber(L, arg);
1328 checkformat(L, form, L_FMTFLAGSF, 1);
1271 addlenmod(form, LUA_NUMBER_FRMLEN); 1329 addlenmod(form, LUA_NUMBER_FRMLEN);
1272 nb = l_sprintf(buff, maxitem, form, (LUAI_UACNUMBER)n); 1330 nb = l_sprintf(buff, maxitem, form, (LUAI_UACNUMBER)n);
1273 break; 1331 break;
1274 } 1332 }
1275 case 'p': { 1333 case 'p': {
1276 const void *p = lua_topointer(L, arg); 1334 const void *p = lua_topointer(L, arg);
1335 checkformat(L, form, L_FMTFLAGSC, 0);
1277 if (p == NULL) { /* avoid calling 'printf' with argument NULL */ 1336 if (p == NULL) { /* avoid calling 'printf' with argument NULL */
1278 p = "(null)"; /* result */ 1337 p = "(null)"; /* result */
1279 form[strlen(form) - 1] = 's'; /* format it as a string */ 1338 form[strlen(form) - 1] = 's'; /* format it as a string */
@@ -1294,7 +1353,8 @@ static int str_format (lua_State *L) {
1294 luaL_addvalue(&b); /* keep entire string */ 1353 luaL_addvalue(&b); /* keep entire string */
1295 else { 1354 else {
1296 luaL_argcheck(L, l == strlen(s), arg, "string contains zeros"); 1355 luaL_argcheck(L, l == strlen(s), arg, "string contains zeros");
1297 if (!strchr(form, '.') && l >= 100) { 1356 checkformat(L, form, L_FMTFLAGSC, 1);
1357 if (strchr(form, '.') == NULL && l >= 100) {
1298 /* no precision and string is too long to be formatted */ 1358 /* no precision and string is too long to be formatted */
1299 luaL_addvalue(&b); /* keep entire string */ 1359 luaL_addvalue(&b); /* keep entire string */
1300 } 1360 }
diff --git a/manual/manual.of b/manual/manual.of
index 664b5c1e..ea9a0302 100644
--- a/manual/manual.of
+++ b/manual/manual.of
@@ -7078,8 +7078,10 @@ following the description given in its first argument,
7078which must be a string. 7078which must be a string.
7079The format string follows the same rules as the @ANSI{sprintf}. 7079The format string follows the same rules as the @ANSI{sprintf}.
7080The only differences are that the conversion specifiers and modifiers 7080The only differences are that the conversion specifiers and modifiers
7081@T{*}, @id{h}, @id{L}, @id{l}, and @id{n} are not supported 7081@id{F}, @id{n}, @T{*}, @id{h}, @id{L}, and @id{l} are not supported
7082and that there is an extra specifier, @id{q}. 7082and that there is an extra specifier, @id{q}.
7083Both width and precision, when present,
7084are limited to two digits.
7083 7085
7084The specifier @id{q} formats booleans, nil, numbers, and strings 7086The specifier @id{q} formats booleans, nil, numbers, and strings
7085in a way that the result is a valid constant in Lua source code. 7087in a way that the result is a valid constant in Lua source code.
@@ -7099,7 +7101,7 @@ may produce the string:
7099"a string with \"quotes\" and \ 7101"a string with \"quotes\" and \
7100 new line" 7102 new line"
7101} 7103}
7102This specifier does not support modifiers (flags, width, length). 7104This specifier does not support modifiers (flags, width, precision).
7103 7105
7104The conversion specifiers 7106The conversion specifiers
7105@id{A}, @id{a}, @id{E}, @id{e}, @id{f}, 7107@id{A}, @id{a}, @id{E}, @id{e}, @id{f},
diff --git a/testes/strings.lua b/testes/strings.lua
index 61a06a25..184fa651 100644
--- a/testes/strings.lua
+++ b/testes/strings.lua
@@ -202,13 +202,11 @@ assert(string.format("\0%c\0%c%x\0", string.byte("\xe4"), string.byte("b"), 140)
202 "\0\xe4\0b8c\0") 202 "\0\xe4\0b8c\0")
203assert(string.format('') == "") 203assert(string.format('') == "")
204assert(string.format("%c",34)..string.format("%c",48)..string.format("%c",90)..string.format("%c",100) == 204assert(string.format("%c",34)..string.format("%c",48)..string.format("%c",90)..string.format("%c",100) ==
205 string.format("%c%c%c%c", 34, 48, 90, 100)) 205 string.format("%1c%-c%-1c%c", 34, 48, 90, 100))
206assert(string.format("%s\0 is not \0%s", 'not be', 'be') == 'not be\0 is not \0be') 206assert(string.format("%s\0 is not \0%s", 'not be', 'be') == 'not be\0 is not \0be')
207assert(string.format("%%%d %010d", 10, 23) == "%10 0000000023") 207assert(string.format("%%%d %010d", 10, 23) == "%10 0000000023")
208assert(tonumber(string.format("%f", 10.3)) == 10.3) 208assert(tonumber(string.format("%f", 10.3)) == 10.3)
209x = string.format('"%-50s"', 'a') 209assert(string.format('"%-50s"', 'a') == '"a' .. string.rep(' ', 49) .. '"')
210assert(#x == 52)
211assert(string.sub(x, 1, 4) == '"a ')
212 210
213assert(string.format("-%.20s.20s", string.rep("%", 2000)) == 211assert(string.format("-%.20s.20s", string.rep("%", 2000)) ==
214 "-"..string.rep("%", 20)..".20s") 212 "-"..string.rep("%", 20)..".20s")
@@ -237,7 +235,6 @@ end
237 235
238assert(string.format("\0%s\0", "\0\0\1") == "\0\0\0\1\0") 236assert(string.format("\0%s\0", "\0\0\1") == "\0\0\0\1\0")
239checkerror("contains zeros", string.format, "%10s", "\0") 237checkerror("contains zeros", string.format, "%10s", "\0")
240checkerror("cannot have modifiers", string.format, "%10q", "1")
241 238
242-- format x tostring 239-- format x tostring
243assert(string.format("%s %s", nil, true) == "nil true") 240assert(string.format("%s %s", nil, true) == "nil true")
@@ -341,6 +338,21 @@ do print("testing 'format %a %A'")
341end 338end
342 339
343 340
341-- testing some flags (all these results are required by ISO C)
342assert(string.format("%#12o", 10) == " 012")
343assert(string.format("%#10x", 100) == " 0x64")
344assert(string.format("%#-17X", 100) == "0X64 ")
345assert(string.format("%013i", -100) == "-000000000100")
346assert(string.format("%2.5d", -100) == "-00100")
347assert(string.format("%.u", 0) == "")
348assert(string.format("%+#014.0f", 100) == "+000000000100.")
349assert(string.format("% 1.0E", 100) == " 1E+02")
350assert(string.format("%-16c", 97) == "a ")
351assert(string.format("%+.3G", 1.5) == "+1.5")
352assert(string.format("% .1g", 2^10) == " 1e+03")
353assert(string.format("%.0s", "alo") == "")
354assert(string.format("%.s", "alo") == "")
355
344-- errors in format 356-- errors in format
345 357
346local function check (fmt, msg) 358local function check (fmt, msg)
@@ -348,13 +360,21 @@ local function check (fmt, msg)
348end 360end
349 361
350local aux = string.rep('0', 600) 362local aux = string.rep('0', 600)
351check("%100.3d", "too long") 363check("%100.3d", "invalid conversion")
352check("%1"..aux..".3d", "too long") 364check("%1"..aux..".3d", "too long")
353check("%1.100d", "too long") 365check("%1.100d", "invalid conversion")
354check("%10.1"..aux.."004d", "too long") 366check("%10.1"..aux.."004d", "too long")
355check("%t", "invalid conversion") 367check("%t", "invalid conversion")
356check("%"..aux.."d", "repeated flags") 368check("%"..aux.."d", "too long")
357check("%d %d", "no value") 369check("%d %d", "no value")
370check("%010c", "invalid conversion")
371check("%.10c", "invalid conversion")
372check("%0.34s", "invalid conversion")
373check("%#i", "invalid conversion")
374check("%3.1p", "invalid conversion")
375check("%0.s", "invalid conversion")
376check("%10q", "cannot have modifiers")
377check("%F", "invalid conversion") -- useless and not in C89
358 378
359 379
360assert(load("return 1\n--comment without ending EOL")() == 1) 380assert(load("return 1\n--comment without ending EOL")() == 1)