diff options
author | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2021-09-03 13:14:56 -0300 |
---|---|---|
committer | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2021-09-03 13:14:56 -0300 |
commit | 9db4bfed6bb9d5828c99c0f24749eedf54d70cc2 (patch) | |
tree | 5a7bae2573f3e08813680a2506840f2356d18cd8 | |
parent | 91673a8ec0ae55e188a790bd2dfdc99246adf20e (diff) | |
download | lua-9db4bfed6bb9d5828c99c0f24749eedf54d70cc2.tar.gz lua-9db4bfed6bb9d5828c99c0f24749eedf54d70cc2.tar.bz2 lua-9db4bfed6bb9d5828c99c0f24749eedf54d70cc2.zip |
Revamp of format validation in 'string.format'
When calling 'sprintf', not all conversion specifiers accept all
flags; some combinations are undefined behavior.
-rw-r--r-- | lstrlib.c | 112 | ||||
-rw-r--r-- | manual/manual.of | 6 | ||||
-rw-r--r-- | testes/strings.lua | 36 |
3 files changed, 118 insertions, 36 deletions
@@ -1090,13 +1090,31 @@ static int lua_number2strx (lua_State *L, char *buff, int sz, | |||
1090 | 1090 | ||
1091 | 1091 | ||
1092 | /* valid flags in a format specification */ | 1092 | /* valid flags in a format specification */ |
1093 | #if !defined(L_FMTFLAGS) | 1093 | #if !defined(L_FMTFLAGSF) |
1094 | #define L_FMTFLAGS "-+ #0" | 1094 | |
1095 | /* valid flags for a, A, e, E, f, F, g, and G conversions */ | ||
1096 | #define L_FMTFLAGSF "-+#0 " | ||
1097 | |||
1098 | /* valid flags for o, x, and X conversions */ | ||
1099 | #define L_FMTFLAGSX "-#0" | ||
1100 | |||
1101 | /* valid flags for d and i conversions */ | ||
1102 | #define L_FMTFLAGSI "-+0 " | ||
1103 | |||
1104 | /* valid flags for u conversions */ | ||
1105 | #define L_FMTFLAGSU "-0" | ||
1106 | |||
1107 | /* valid flags for c, p, and s conversions */ | ||
1108 | #define L_FMTFLAGSC "-" | ||
1109 | |||
1095 | #endif | 1110 | #endif |
1096 | 1111 | ||
1097 | 1112 | ||
1098 | /* | 1113 | /* |
1099 | ** maximum size of each format specification (such as "%-099.99d") | 1114 | ** Maximum size of each format specification (such as "%-099.99d"): |
1115 | ** Initial '%', flags (up to 5), width (2), period, precision (2), | ||
1116 | ** length modifier (8), conversion specifier, and final '\0', plus some | ||
1117 | ** extra. | ||
1100 | */ | 1118 | */ |
1101 | #define MAX_FORMAT 32 | 1119 | #define MAX_FORMAT 32 |
1102 | 1120 | ||
@@ -1189,25 +1207,53 @@ static void addliteral (lua_State *L, luaL_Buffer *b, int arg) { | |||
1189 | } | 1207 | } |
1190 | 1208 | ||
1191 | 1209 | ||
1192 | static const char *scanformat (lua_State *L, const char *strfrmt, char *form) { | 1210 | static const char *get2digits (const char *s) { |
1193 | const char *p = strfrmt; | 1211 | if (isdigit(uchar(*s))) { |
1194 | while (*p != '\0' && strchr(L_FMTFLAGS, *p) != NULL) p++; /* skip flags */ | 1212 | s++; |
1195 | if ((size_t)(p - strfrmt) >= sizeof(L_FMTFLAGS)/sizeof(char)) | 1213 | if (isdigit(uchar(*s))) s++; /* (2 digits at most) */ |
1196 | luaL_error(L, "invalid format (repeated flags)"); | 1214 | } |
1197 | if (isdigit(uchar(*p))) p++; /* skip width */ | 1215 | return s; |
1198 | if (isdigit(uchar(*p))) p++; /* (2 digits at most) */ | 1216 | } |
1199 | if (*p == '.') { | 1217 | |
1200 | p++; | 1218 | |
1201 | if (isdigit(uchar(*p))) p++; /* skip precision */ | 1219 | /* |
1202 | if (isdigit(uchar(*p))) p++; /* (2 digits at most) */ | 1220 | ** Chech whether a conversion specification is valid. When called, |
1221 | ** first character in 'form' must be '%' and last character must | ||
1222 | ** be a valid conversion specifier. 'flags' are the accepted flags; | ||
1223 | ** 'precision' signals whether to accept a precision. | ||
1224 | */ | ||
1225 | static void checkformat (lua_State *L, const char *form, const char *flags, | ||
1226 | int precision) { | ||
1227 | const char *spec = form + 1; /* skip '%' */ | ||
1228 | spec += strspn(spec, flags); /* skip flags */ | ||
1229 | if (*spec != '0') { /* a width cannot start with '0' */ | ||
1230 | spec = get2digits(spec); /* skip width */ | ||
1231 | if (*spec == '.' && precision) { | ||
1232 | spec++; | ||
1233 | spec = get2digits(spec); /* skip precision */ | ||
1234 | } | ||
1203 | } | 1235 | } |
1204 | if (isdigit(uchar(*p))) | 1236 | if (!isalpha(uchar(*spec))) /* did not go to the end? */ |
1205 | luaL_error(L, "invalid format (width or precision too long)"); | 1237 | luaL_error(L, "invalid conversion specification: '%s'", form); |
1238 | } | ||
1239 | |||
1240 | |||
1241 | /* | ||
1242 | ** Get a conversion specification and copy it to 'form'. | ||
1243 | ** Return the address of its last character. | ||
1244 | */ | ||
1245 | static const char *getformat (lua_State *L, const char *strfrmt, | ||
1246 | char *form) { | ||
1247 | /* spans flags, width, and precision ('0' is included as a flag) */ | ||
1248 | size_t len = strspn(strfrmt, L_FMTFLAGSF "123456789."); | ||
1249 | len++; /* adds following character (should be the specifier) */ | ||
1250 | /* still needs space for '%', '\0', plus a length modifier */ | ||
1251 | if (len >= MAX_FORMAT - 10) | ||
1252 | luaL_error(L, "invalid format (too long)"); | ||
1206 | *(form++) = '%'; | 1253 | *(form++) = '%'; |
1207 | memcpy(form, strfrmt, ((p - strfrmt) + 1) * sizeof(char)); | 1254 | memcpy(form, strfrmt, len * sizeof(char)); |
1208 | form += (p - strfrmt) + 1; | 1255 | *(form + len) = '\0'; |
1209 | *form = '\0'; | 1256 | return strfrmt + len - 1; |
1210 | return p; | ||
1211 | } | 1257 | } |
1212 | 1258 | ||
1213 | 1259 | ||
@@ -1230,6 +1276,7 @@ static int str_format (lua_State *L) { | |||
1230 | size_t sfl; | 1276 | size_t sfl; |
1231 | const char *strfrmt = luaL_checklstring(L, arg, &sfl); | 1277 | const char *strfrmt = luaL_checklstring(L, arg, &sfl); |
1232 | const char *strfrmt_end = strfrmt+sfl; | 1278 | const char *strfrmt_end = strfrmt+sfl; |
1279 | const char *flags; | ||
1233 | luaL_Buffer b; | 1280 | luaL_Buffer b; |
1234 | luaL_buffinit(L, &b); | 1281 | luaL_buffinit(L, &b); |
1235 | while (strfrmt < strfrmt_end) { | 1282 | while (strfrmt < strfrmt_end) { |
@@ -1239,25 +1286,35 @@ static int str_format (lua_State *L) { | |||
1239 | luaL_addchar(&b, *strfrmt++); /* %% */ | 1286 | luaL_addchar(&b, *strfrmt++); /* %% */ |
1240 | else { /* format item */ | 1287 | else { /* format item */ |
1241 | char form[MAX_FORMAT]; /* to store the format ('%...') */ | 1288 | char form[MAX_FORMAT]; /* to store the format ('%...') */ |
1242 | int maxitem = MAX_ITEM; | 1289 | int maxitem = MAX_ITEM; /* maximum length for the result */ |
1243 | char *buff = luaL_prepbuffsize(&b, maxitem); /* to put formatted item */ | 1290 | char *buff = luaL_prepbuffsize(&b, maxitem); /* to put result */ |
1244 | int nb = 0; /* number of bytes in added item */ | 1291 | int nb = 0; /* number of bytes in result */ |
1245 | if (++arg > top) | 1292 | if (++arg > top) |
1246 | return luaL_argerror(L, arg, "no value"); | 1293 | return luaL_argerror(L, arg, "no value"); |
1247 | strfrmt = scanformat(L, strfrmt, form); | 1294 | strfrmt = getformat(L, strfrmt, form); |
1248 | switch (*strfrmt++) { | 1295 | switch (*strfrmt++) { |
1249 | case 'c': { | 1296 | case 'c': { |
1297 | checkformat(L, form, L_FMTFLAGSC, 0); | ||
1250 | nb = l_sprintf(buff, maxitem, form, (int)luaL_checkinteger(L, arg)); | 1298 | nb = l_sprintf(buff, maxitem, form, (int)luaL_checkinteger(L, arg)); |
1251 | break; | 1299 | break; |
1252 | } | 1300 | } |
1253 | case 'd': case 'i': | 1301 | case 'd': case 'i': |
1254 | case 'o': case 'u': case 'x': case 'X': { | 1302 | flags = L_FMTFLAGSI; |
1303 | goto intcase; | ||
1304 | case 'u': | ||
1305 | flags = L_FMTFLAGSU; | ||
1306 | goto intcase; | ||
1307 | case 'o': case 'x': case 'X': | ||
1308 | flags = L_FMTFLAGSX; | ||
1309 | intcase: { | ||
1255 | lua_Integer n = luaL_checkinteger(L, arg); | 1310 | lua_Integer n = luaL_checkinteger(L, arg); |
1311 | checkformat(L, form, flags, 1); | ||
1256 | addlenmod(form, LUA_INTEGER_FRMLEN); | 1312 | addlenmod(form, LUA_INTEGER_FRMLEN); |
1257 | nb = l_sprintf(buff, maxitem, form, (LUAI_UACINT)n); | 1313 | nb = l_sprintf(buff, maxitem, form, (LUAI_UACINT)n); |
1258 | break; | 1314 | break; |
1259 | } | 1315 | } |
1260 | case 'a': case 'A': | 1316 | case 'a': case 'A': |
1317 | checkformat(L, form, L_FMTFLAGSF, 1); | ||
1261 | addlenmod(form, LUA_NUMBER_FRMLEN); | 1318 | addlenmod(form, LUA_NUMBER_FRMLEN); |
1262 | nb = lua_number2strx(L, buff, maxitem, form, | 1319 | nb = lua_number2strx(L, buff, maxitem, form, |
1263 | luaL_checknumber(L, arg)); | 1320 | luaL_checknumber(L, arg)); |
@@ -1268,12 +1325,14 @@ static int str_format (lua_State *L) { | |||
1268 | /* FALLTHROUGH */ | 1325 | /* FALLTHROUGH */ |
1269 | case 'e': case 'E': case 'g': case 'G': { | 1326 | case 'e': case 'E': case 'g': case 'G': { |
1270 | lua_Number n = luaL_checknumber(L, arg); | 1327 | lua_Number n = luaL_checknumber(L, arg); |
1328 | checkformat(L, form, L_FMTFLAGSF, 1); | ||
1271 | addlenmod(form, LUA_NUMBER_FRMLEN); | 1329 | addlenmod(form, LUA_NUMBER_FRMLEN); |
1272 | nb = l_sprintf(buff, maxitem, form, (LUAI_UACNUMBER)n); | 1330 | nb = l_sprintf(buff, maxitem, form, (LUAI_UACNUMBER)n); |
1273 | break; | 1331 | break; |
1274 | } | 1332 | } |
1275 | case 'p': { | 1333 | case 'p': { |
1276 | const void *p = lua_topointer(L, arg); | 1334 | const void *p = lua_topointer(L, arg); |
1335 | checkformat(L, form, L_FMTFLAGSC, 0); | ||
1277 | if (p == NULL) { /* avoid calling 'printf' with argument NULL */ | 1336 | if (p == NULL) { /* avoid calling 'printf' with argument NULL */ |
1278 | p = "(null)"; /* result */ | 1337 | p = "(null)"; /* result */ |
1279 | form[strlen(form) - 1] = 's'; /* format it as a string */ | 1338 | form[strlen(form) - 1] = 's'; /* format it as a string */ |
@@ -1294,7 +1353,8 @@ static int str_format (lua_State *L) { | |||
1294 | luaL_addvalue(&b); /* keep entire string */ | 1353 | luaL_addvalue(&b); /* keep entire string */ |
1295 | else { | 1354 | else { |
1296 | luaL_argcheck(L, l == strlen(s), arg, "string contains zeros"); | 1355 | luaL_argcheck(L, l == strlen(s), arg, "string contains zeros"); |
1297 | if (!strchr(form, '.') && l >= 100) { | 1356 | checkformat(L, form, L_FMTFLAGSC, 1); |
1357 | if (strchr(form, '.') == NULL && l >= 100) { | ||
1298 | /* no precision and string is too long to be formatted */ | 1358 | /* no precision and string is too long to be formatted */ |
1299 | luaL_addvalue(&b); /* keep entire string */ | 1359 | luaL_addvalue(&b); /* keep entire string */ |
1300 | } | 1360 | } |
diff --git a/manual/manual.of b/manual/manual.of index 664b5c1e..ea9a0302 100644 --- a/manual/manual.of +++ b/manual/manual.of | |||
@@ -7078,8 +7078,10 @@ following the description given in its first argument, | |||
7078 | which must be a string. | 7078 | which must be a string. |
7079 | The format string follows the same rules as the @ANSI{sprintf}. | 7079 | The format string follows the same rules as the @ANSI{sprintf}. |
7080 | The only differences are that the conversion specifiers and modifiers | 7080 | The only differences are that the conversion specifiers and modifiers |
7081 | @T{*}, @id{h}, @id{L}, @id{l}, and @id{n} are not supported | 7081 | @id{F}, @id{n}, @T{*}, @id{h}, @id{L}, and @id{l} are not supported |
7082 | and that there is an extra specifier, @id{q}. | 7082 | and that there is an extra specifier, @id{q}. |
7083 | Both width and precision, when present, | ||
7084 | are limited to two digits. | ||
7083 | 7085 | ||
7084 | The specifier @id{q} formats booleans, nil, numbers, and strings | 7086 | The specifier @id{q} formats booleans, nil, numbers, and strings |
7085 | in a way that the result is a valid constant in Lua source code. | 7087 | in a way that the result is a valid constant in Lua source code. |
@@ -7099,7 +7101,7 @@ may produce the string: | |||
7099 | "a string with \"quotes\" and \ | 7101 | "a string with \"quotes\" and \ |
7100 | new line" | 7102 | new line" |
7101 | } | 7103 | } |
7102 | This specifier does not support modifiers (flags, width, length). | 7104 | This specifier does not support modifiers (flags, width, precision). |
7103 | 7105 | ||
7104 | The conversion specifiers | 7106 | The conversion specifiers |
7105 | @id{A}, @id{a}, @id{E}, @id{e}, @id{f}, | 7107 | @id{A}, @id{a}, @id{E}, @id{e}, @id{f}, |
diff --git a/testes/strings.lua b/testes/strings.lua index 61a06a25..184fa651 100644 --- a/testes/strings.lua +++ b/testes/strings.lua | |||
@@ -202,13 +202,11 @@ assert(string.format("\0%c\0%c%x\0", string.byte("\xe4"), string.byte("b"), 140) | |||
202 | "\0\xe4\0b8c\0") | 202 | "\0\xe4\0b8c\0") |
203 | assert(string.format('') == "") | 203 | assert(string.format('') == "") |
204 | assert(string.format("%c",34)..string.format("%c",48)..string.format("%c",90)..string.format("%c",100) == | 204 | assert(string.format("%c",34)..string.format("%c",48)..string.format("%c",90)..string.format("%c",100) == |
205 | string.format("%c%c%c%c", 34, 48, 90, 100)) | 205 | string.format("%1c%-c%-1c%c", 34, 48, 90, 100)) |
206 | assert(string.format("%s\0 is not \0%s", 'not be', 'be') == 'not be\0 is not \0be') | 206 | assert(string.format("%s\0 is not \0%s", 'not be', 'be') == 'not be\0 is not \0be') |
207 | assert(string.format("%%%d %010d", 10, 23) == "%10 0000000023") | 207 | assert(string.format("%%%d %010d", 10, 23) == "%10 0000000023") |
208 | assert(tonumber(string.format("%f", 10.3)) == 10.3) | 208 | assert(tonumber(string.format("%f", 10.3)) == 10.3) |
209 | x = string.format('"%-50s"', 'a') | 209 | assert(string.format('"%-50s"', 'a') == '"a' .. string.rep(' ', 49) .. '"') |
210 | assert(#x == 52) | ||
211 | assert(string.sub(x, 1, 4) == '"a ') | ||
212 | 210 | ||
213 | assert(string.format("-%.20s.20s", string.rep("%", 2000)) == | 211 | assert(string.format("-%.20s.20s", string.rep("%", 2000)) == |
214 | "-"..string.rep("%", 20)..".20s") | 212 | "-"..string.rep("%", 20)..".20s") |
@@ -237,7 +235,6 @@ end | |||
237 | 235 | ||
238 | assert(string.format("\0%s\0", "\0\0\1") == "\0\0\0\1\0") | 236 | assert(string.format("\0%s\0", "\0\0\1") == "\0\0\0\1\0") |
239 | checkerror("contains zeros", string.format, "%10s", "\0") | 237 | checkerror("contains zeros", string.format, "%10s", "\0") |
240 | checkerror("cannot have modifiers", string.format, "%10q", "1") | ||
241 | 238 | ||
242 | -- format x tostring | 239 | -- format x tostring |
243 | assert(string.format("%s %s", nil, true) == "nil true") | 240 | assert(string.format("%s %s", nil, true) == "nil true") |
@@ -341,6 +338,21 @@ do print("testing 'format %a %A'") | |||
341 | end | 338 | end |
342 | 339 | ||
343 | 340 | ||
341 | -- testing some flags (all these results are required by ISO C) | ||
342 | assert(string.format("%#12o", 10) == " 012") | ||
343 | assert(string.format("%#10x", 100) == " 0x64") | ||
344 | assert(string.format("%#-17X", 100) == "0X64 ") | ||
345 | assert(string.format("%013i", -100) == "-000000000100") | ||
346 | assert(string.format("%2.5d", -100) == "-00100") | ||
347 | assert(string.format("%.u", 0) == "") | ||
348 | assert(string.format("%+#014.0f", 100) == "+000000000100.") | ||
349 | assert(string.format("% 1.0E", 100) == " 1E+02") | ||
350 | assert(string.format("%-16c", 97) == "a ") | ||
351 | assert(string.format("%+.3G", 1.5) == "+1.5") | ||
352 | assert(string.format("% .1g", 2^10) == " 1e+03") | ||
353 | assert(string.format("%.0s", "alo") == "") | ||
354 | assert(string.format("%.s", "alo") == "") | ||
355 | |||
344 | -- errors in format | 356 | -- errors in format |
345 | 357 | ||
346 | local function check (fmt, msg) | 358 | local function check (fmt, msg) |
@@ -348,13 +360,21 @@ local function check (fmt, msg) | |||
348 | end | 360 | end |
349 | 361 | ||
350 | local aux = string.rep('0', 600) | 362 | local aux = string.rep('0', 600) |
351 | check("%100.3d", "too long") | 363 | check("%100.3d", "invalid conversion") |
352 | check("%1"..aux..".3d", "too long") | 364 | check("%1"..aux..".3d", "too long") |
353 | check("%1.100d", "too long") | 365 | check("%1.100d", "invalid conversion") |
354 | check("%10.1"..aux.."004d", "too long") | 366 | check("%10.1"..aux.."004d", "too long") |
355 | check("%t", "invalid conversion") | 367 | check("%t", "invalid conversion") |
356 | check("%"..aux.."d", "repeated flags") | 368 | check("%"..aux.."d", "too long") |
357 | check("%d %d", "no value") | 369 | check("%d %d", "no value") |
370 | check("%010c", "invalid conversion") | ||
371 | check("%.10c", "invalid conversion") | ||
372 | check("%0.34s", "invalid conversion") | ||
373 | check("%#i", "invalid conversion") | ||
374 | check("%3.1p", "invalid conversion") | ||
375 | check("%0.s", "invalid conversion") | ||
376 | check("%10q", "cannot have modifiers") | ||
377 | check("%F", "invalid conversion") -- useless and not in C89 | ||
358 | 378 | ||
359 | 379 | ||
360 | assert(load("return 1\n--comment without ending EOL")() == 1) | 380 | assert(load("return 1\n--comment without ending EOL")() == 1) |