diff options
author | Denys Vlasenko <vda.linux@googlemail.com> | 2015-10-02 02:41:39 +0200 |
---|---|---|
committer | Denys Vlasenko <vda.linux@googlemail.com> | 2015-10-02 02:41:39 +0200 |
commit | 28b00ce6ff8cde91f3e83632e705709b7cd2ab20 (patch) | |
tree | a87a9077c2a26813f87d3403c60296d7c12d6c77 | |
parent | c919d561adaf152d9b8834475539e2366c8aa484 (diff) | |
download | busybox-w32-28b00ce6ff8cde91f3e83632e705709b7cd2ab20.tar.gz busybox-w32-28b00ce6ff8cde91f3e83632e705709b7cd2ab20.tar.bz2 busybox-w32-28b00ce6ff8cde91f3e83632e705709b7cd2ab20.zip |
awk: support "length" form of "length()". Closes 8371
function old new delta
parse_expr 805 848 +43
tokenlist 447 448 +1
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r-- | editors/awk.c | 115 | ||||
-rwxr-xr-x | testsuite/awk.tests | 5 |
2 files changed, 72 insertions, 48 deletions
diff --git a/editors/awk.c b/editors/awk.c index 9c6819ad2..69816464d 100644 --- a/editors/awk.c +++ b/editors/awk.c | |||
@@ -207,7 +207,7 @@ typedef struct tsplitter_s { | |||
207 | 207 | ||
208 | /* simple token classes */ | 208 | /* simple token classes */ |
209 | /* Order and hex values are very important!!! See next_token() */ | 209 | /* Order and hex values are very important!!! See next_token() */ |
210 | #define TC_SEQSTART 1 /* ( */ | 210 | #define TC_SEQSTART (1 << 0) /* ( */ |
211 | #define TC_SEQTERM (1 << 1) /* ) */ | 211 | #define TC_SEQTERM (1 << 1) /* ) */ |
212 | #define TC_REGEXP (1 << 2) /* /.../ */ | 212 | #define TC_REGEXP (1 << 2) /* /.../ */ |
213 | #define TC_OUTRDR (1 << 3) /* | > >> */ | 213 | #define TC_OUTRDR (1 << 3) /* | > >> */ |
@@ -227,16 +227,22 @@ typedef struct tsplitter_s { | |||
227 | #define TC_WHILE (1 << 17) | 227 | #define TC_WHILE (1 << 17) |
228 | #define TC_ELSE (1 << 18) | 228 | #define TC_ELSE (1 << 18) |
229 | #define TC_BUILTIN (1 << 19) | 229 | #define TC_BUILTIN (1 << 19) |
230 | #define TC_GETLINE (1 << 20) | 230 | /* This costs ~50 bytes of code. |
231 | #define TC_FUNCDECL (1 << 21) /* `function' `func' */ | 231 | * A separate class to support deprecated "length" form. If we don't need that |
232 | #define TC_BEGIN (1 << 22) | 232 | * (i.e. if we demand that only "length()" with () is valid), then TC_LENGTH |
233 | #define TC_END (1 << 23) | 233 | * can be merged with TC_BUILTIN: |
234 | #define TC_EOF (1 << 24) | 234 | */ |
235 | #define TC_VARIABLE (1 << 25) | 235 | #define TC_LENGTH (1 << 20) |
236 | #define TC_ARRAY (1 << 26) | 236 | #define TC_GETLINE (1 << 21) |
237 | #define TC_FUNCTION (1 << 27) | 237 | #define TC_FUNCDECL (1 << 22) /* `function' `func' */ |
238 | #define TC_STRING (1 << 28) | 238 | #define TC_BEGIN (1 << 23) |
239 | #define TC_NUMBER (1 << 29) | 239 | #define TC_END (1 << 24) |
240 | #define TC_EOF (1 << 25) | ||
241 | #define TC_VARIABLE (1 << 26) | ||
242 | #define TC_ARRAY (1 << 27) | ||
243 | #define TC_FUNCTION (1 << 28) | ||
244 | #define TC_STRING (1 << 29) | ||
245 | #define TC_NUMBER (1 << 30) | ||
240 | 246 | ||
241 | #define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2) | 247 | #define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2) |
242 | 248 | ||
@@ -244,14 +250,16 @@ typedef struct tsplitter_s { | |||
244 | #define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN) | 250 | #define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN) |
245 | //#define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST) | 251 | //#define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST) |
246 | #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \ | 252 | #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \ |
247 | | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER) | 253 | | TC_BUILTIN | TC_LENGTH | TC_GETLINE \ |
254 | | TC_SEQSTART | TC_STRING | TC_NUMBER) | ||
248 | 255 | ||
249 | #define TC_STATEMNT (TC_STATX | TC_WHILE) | 256 | #define TC_STATEMNT (TC_STATX | TC_WHILE) |
250 | #define TC_OPTERM (TC_SEMICOL | TC_NEWLINE) | 257 | #define TC_OPTERM (TC_SEMICOL | TC_NEWLINE) |
251 | 258 | ||
252 | /* word tokens, cannot mean something else if not expected */ | 259 | /* word tokens, cannot mean something else if not expected */ |
253 | #define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \ | 260 | #define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE \ |
254 | | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END) | 261 | | TC_BUILTIN | TC_LENGTH | TC_GETLINE \ |
262 | | TC_FUNCDECL | TC_BEGIN | TC_END) | ||
255 | 263 | ||
256 | /* discard newlines after these */ | 264 | /* discard newlines after these */ |
257 | #define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM \ | 265 | #define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM \ |
@@ -346,54 +354,54 @@ enum { | |||
346 | #define NTC "\377" /* switch to next token class (tc<<1) */ | 354 | #define NTC "\377" /* switch to next token class (tc<<1) */ |
347 | #define NTCC '\377' | 355 | #define NTCC '\377' |
348 | 356 | ||
349 | #define OC_B OC_BUILTIN | ||
350 | |||
351 | static const char tokenlist[] ALIGN1 = | 357 | static const char tokenlist[] ALIGN1 = |
352 | "\1(" NTC | 358 | "\1(" NTC /* TC_SEQSTART */ |
353 | "\1)" NTC | 359 | "\1)" NTC /* TC_SEQTERM */ |
354 | "\1/" NTC /* REGEXP */ | 360 | "\1/" NTC /* TC_REGEXP */ |
355 | "\2>>" "\1>" "\1|" NTC /* OUTRDR */ | 361 | "\2>>" "\1>" "\1|" NTC /* TC_OUTRDR */ |
356 | "\2++" "\2--" NTC /* UOPPOST */ | 362 | "\2++" "\2--" NTC /* TC_UOPPOST */ |
357 | "\2++" "\2--" "\1$" NTC /* UOPPRE1 */ | 363 | "\2++" "\2--" "\1$" NTC /* TC_UOPPRE1 */ |
358 | "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */ | 364 | "\2==" "\1=" "\2+=" "\2-=" /* TC_BINOPX */ |
359 | "\2*=" "\2/=" "\2%=" "\2^=" | 365 | "\2*=" "\2/=" "\2%=" "\2^=" |
360 | "\1+" "\1-" "\3**=" "\2**" | 366 | "\1+" "\1-" "\3**=" "\2**" |
361 | "\1/" "\1%" "\1^" "\1*" | 367 | "\1/" "\1%" "\1^" "\1*" |
362 | "\2!=" "\2>=" "\2<=" "\1>" | 368 | "\2!=" "\2>=" "\2<=" "\1>" |
363 | "\1<" "\2!~" "\1~" "\2&&" | 369 | "\1<" "\2!~" "\1~" "\2&&" |
364 | "\2||" "\1?" "\1:" NTC | 370 | "\2||" "\1?" "\1:" NTC |
365 | "\2in" NTC | 371 | "\2in" NTC /* TC_IN */ |
366 | "\1," NTC | 372 | "\1," NTC /* TC_COMMA */ |
367 | "\1|" NTC | 373 | "\1|" NTC /* TC_PIPE */ |
368 | "\1+" "\1-" "\1!" NTC /* UOPPRE2 */ | 374 | "\1+" "\1-" "\1!" NTC /* TC_UOPPRE2 */ |
369 | "\1]" NTC | 375 | "\1]" NTC /* TC_ARRTERM */ |
370 | "\1{" NTC | 376 | "\1{" NTC /* TC_GRPSTART */ |
371 | "\1}" NTC | 377 | "\1}" NTC /* TC_GRPTERM */ |
372 | "\1;" NTC | 378 | "\1;" NTC /* TC_SEMICOL */ |
373 | "\1\n" NTC | 379 | "\1\n" NTC /* TC_NEWLINE */ |
374 | "\2if" "\2do" "\3for" "\5break" /* STATX */ | 380 | "\2if" "\2do" "\3for" "\5break" /* TC_STATX */ |
375 | "\10continue" "\6delete" "\5print" | 381 | "\10continue" "\6delete" "\5print" |
376 | "\6printf" "\4next" "\10nextfile" | 382 | "\6printf" "\4next" "\10nextfile" |
377 | "\6return" "\4exit" NTC | 383 | "\6return" "\4exit" NTC |
378 | "\5while" NTC | 384 | "\5while" NTC /* TC_WHILE */ |
379 | "\4else" NTC | 385 | "\4else" NTC /* TC_ELSE */ |
380 | 386 | "\3and" "\5compl" "\6lshift" "\2or" /* TC_BUILTIN */ | |
381 | "\3and" "\5compl" "\6lshift" "\2or" | ||
382 | "\6rshift" "\3xor" | 387 | "\6rshift" "\3xor" |
383 | "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */ | 388 | "\5close" "\6system" "\6fflush" "\5atan2" |
384 | "\3cos" "\3exp" "\3int" "\3log" | 389 | "\3cos" "\3exp" "\3int" "\3log" |
385 | "\4rand" "\3sin" "\4sqrt" "\5srand" | 390 | "\4rand" "\3sin" "\4sqrt" "\5srand" |
386 | "\6gensub" "\4gsub" "\5index" "\6length" | 391 | "\6gensub" "\4gsub" "\5index" /* "\6length" was here */ |
387 | "\5match" "\5split" "\7sprintf" "\3sub" | 392 | "\5match" "\5split" "\7sprintf" "\3sub" |
388 | "\6substr" "\7systime" "\10strftime" "\6mktime" | 393 | "\6substr" "\7systime" "\10strftime" "\6mktime" |
389 | "\7tolower" "\7toupper" NTC | 394 | "\7tolower" "\7toupper" NTC |
390 | "\7getline" NTC | 395 | "\6length" NTC /* TC_LENGTH */ |
391 | "\4func" "\10function" NTC | 396 | "\7getline" NTC /* TC_GETLINE */ |
392 | "\5BEGIN" NTC | 397 | "\4func" "\10function" NTC /* TC_FUNCDECL */ |
393 | "\3END" | 398 | "\5BEGIN" NTC /* TC_BEGIN */ |
399 | "\3END" /* TC_END */ | ||
394 | /* compiler adds trailing "\0" */ | 400 | /* compiler adds trailing "\0" */ |
395 | ; | 401 | ; |
396 | 402 | ||
403 | #define OC_B OC_BUILTIN | ||
404 | |||
397 | static const uint32_t tokeninfo[] = { | 405 | static const uint32_t tokeninfo[] = { |
398 | 0, | 406 | 0, |
399 | 0, | 407 | 0, |
@@ -408,7 +416,7 @@ static const uint32_t tokeninfo[] = { | |||
408 | OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3, OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1, | 416 | OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3, OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1, |
409 | OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!', OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55), | 417 | OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!', OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55), |
410 | OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?', OC_COLON|xx|P(67)|':', | 418 | OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?', OC_COLON|xx|P(67)|':', |
411 | OC_IN|SV|P(49), /* in */ | 419 | OC_IN|SV|P(49), /* TC_IN */ |
412 | OC_COMMA|SS|P(80), | 420 | OC_COMMA|SS|P(80), |
413 | OC_PGETLINE|SV|P(37), | 421 | OC_PGETLINE|SV|P(37), |
414 | OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-', OC_UNARY|xV|P(19)|'!', | 422 | OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-', OC_UNARY|xV|P(19)|'!', |
@@ -423,20 +431,20 @@ static const uint32_t tokeninfo[] = { | |||
423 | OC_RETURN|Vx, OC_EXIT|Nx, | 431 | OC_RETURN|Vx, OC_EXIT|Nx, |
424 | ST_WHILE, | 432 | ST_WHILE, |
425 | 0, /* else */ | 433 | 0, /* else */ |
426 | |||
427 | OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83), | 434 | OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83), |
428 | OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83), | 435 | OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83), |
429 | OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83), | 436 | OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83), |
430 | OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg, | 437 | OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg, |
431 | OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr, | 438 | OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr, |
432 | OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le, | 439 | OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), /* OC_FBLTIN|Sx|F_le, was here */ |
433 | OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6), | 440 | OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6), |
434 | OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b), | 441 | OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b), |
435 | OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49), | 442 | OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49), |
443 | OC_FBLTIN|Sx|F_le, /* TC_LENGTH */ | ||
436 | OC_GETLINE|SV|P(0), | 444 | OC_GETLINE|SV|P(0), |
437 | 0, 0, | 445 | 0, 0, |
438 | 0, | 446 | 0, |
439 | 0 /* END */ | 447 | 0 /* TC_END */ |
440 | }; | 448 | }; |
441 | 449 | ||
442 | /* internal variable names and their initial values */ | 450 | /* internal variable names and their initial values */ |
@@ -1202,9 +1210,10 @@ static uint32_t next_token(uint32_t expected) | |||
1202 | ltclass = t_tclass; | 1210 | ltclass = t_tclass; |
1203 | 1211 | ||
1204 | /* Are we ready for this? */ | 1212 | /* Are we ready for this? */ |
1205 | if (!(ltclass & expected)) | 1213 | if (!(ltclass & expected)) { |
1206 | syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ? | 1214 | syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ? |
1207 | EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN); | 1215 | EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN); |
1216 | } | ||
1208 | 1217 | ||
1209 | return ltclass; | 1218 | return ltclass; |
1210 | #undef concat_inserted | 1219 | #undef concat_inserted |
@@ -1371,6 +1380,16 @@ static node *parse_expr(uint32_t iexp) | |||
1371 | debug_printf_parse("%s: TC_BUILTIN\n", __func__); | 1380 | debug_printf_parse("%s: TC_BUILTIN\n", __func__); |
1372 | cn->l.n = condition(); | 1381 | cn->l.n = condition(); |
1373 | break; | 1382 | break; |
1383 | |||
1384 | case TC_LENGTH: | ||
1385 | debug_printf_parse("%s: TC_LENGTH\n", __func__); | ||
1386 | next_token(TC_SEQSTART | TC_OPTERM | TC_GRPTERM); | ||
1387 | rollback_token(); | ||
1388 | if (t_tclass & TC_SEQSTART) { | ||
1389 | /* It was a "(" token. Handle just like TC_BUILTIN */ | ||
1390 | cn->l.n = condition(); | ||
1391 | } | ||
1392 | break; | ||
1374 | } | 1393 | } |
1375 | } | 1394 | } |
1376 | } | 1395 | } |
diff --git a/testsuite/awk.tests b/testsuite/awk.tests index 9e6952ffd..adab4ae1e 100755 --- a/testsuite/awk.tests +++ b/testsuite/awk.tests | |||
@@ -281,6 +281,11 @@ testing "awk length(array)" \ | |||
281 | "2\n" \ | 281 | "2\n" \ |
282 | "" "" | 282 | "" "" |
283 | 283 | ||
284 | testing "awk length()" \ | ||
285 | "awk '{print length; print length(); print length(\"qwe\"); print length(99+9)}'" \ | ||
286 | "3\n3\n3\n3\n" \ | ||
287 | "" "qwe" | ||
288 | |||
284 | testing "awk -f and ARGC" \ | 289 | testing "awk -f and ARGC" \ |
285 | "awk -f - input" \ | 290 | "awk -f - input" \ |
286 | "re\n2\n" \ | 291 | "re\n2\n" \ |