aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDenys Vlasenko <vda.linux@googlemail.com>2015-10-02 02:41:39 +0200
committerDenys Vlasenko <vda.linux@googlemail.com>2015-10-02 02:41:39 +0200
commit28b00ce6ff8cde91f3e83632e705709b7cd2ab20 (patch)
treea87a9077c2a26813f87d3403c60296d7c12d6c77
parentc919d561adaf152d9b8834475539e2366c8aa484 (diff)
downloadbusybox-w32-28b00ce6ff8cde91f3e83632e705709b7cd2ab20.tar.gz
busybox-w32-28b00ce6ff8cde91f3e83632e705709b7cd2ab20.tar.bz2
busybox-w32-28b00ce6ff8cde91f3e83632e705709b7cd2ab20.zip
awk: support "length" form of "length()". Closes 8371
function old new delta parse_expr 805 848 +43 tokenlist 447 448 +1 Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r--editors/awk.c115
-rwxr-xr-xtestsuite/awk.tests5
2 files changed, 72 insertions, 48 deletions
diff --git a/editors/awk.c b/editors/awk.c
index 9c6819ad2..69816464d 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -207,7 +207,7 @@ typedef struct tsplitter_s {
207 207
208/* simple token classes */ 208/* simple token classes */
209/* Order and hex values are very important!!! See next_token() */ 209/* Order and hex values are very important!!! See next_token() */
210#define TC_SEQSTART 1 /* ( */ 210#define TC_SEQSTART (1 << 0) /* ( */
211#define TC_SEQTERM (1 << 1) /* ) */ 211#define TC_SEQTERM (1 << 1) /* ) */
212#define TC_REGEXP (1 << 2) /* /.../ */ 212#define TC_REGEXP (1 << 2) /* /.../ */
213#define TC_OUTRDR (1 << 3) /* | > >> */ 213#define TC_OUTRDR (1 << 3) /* | > >> */
@@ -227,16 +227,22 @@ typedef struct tsplitter_s {
227#define TC_WHILE (1 << 17) 227#define TC_WHILE (1 << 17)
228#define TC_ELSE (1 << 18) 228#define TC_ELSE (1 << 18)
229#define TC_BUILTIN (1 << 19) 229#define TC_BUILTIN (1 << 19)
230#define TC_GETLINE (1 << 20) 230/* This costs ~50 bytes of code.
231#define TC_FUNCDECL (1 << 21) /* `function' `func' */ 231 * A separate class to support deprecated "length" form. If we don't need that
232#define TC_BEGIN (1 << 22) 232 * (i.e. if we demand that only "length()" with () is valid), then TC_LENGTH
233#define TC_END (1 << 23) 233 * can be merged with TC_BUILTIN:
234#define TC_EOF (1 << 24) 234 */
235#define TC_VARIABLE (1 << 25) 235#define TC_LENGTH (1 << 20)
236#define TC_ARRAY (1 << 26) 236#define TC_GETLINE (1 << 21)
237#define TC_FUNCTION (1 << 27) 237#define TC_FUNCDECL (1 << 22) /* `function' `func' */
238#define TC_STRING (1 << 28) 238#define TC_BEGIN (1 << 23)
239#define TC_NUMBER (1 << 29) 239#define TC_END (1 << 24)
240#define TC_EOF (1 << 25)
241#define TC_VARIABLE (1 << 26)
242#define TC_ARRAY (1 << 27)
243#define TC_FUNCTION (1 << 28)
244#define TC_STRING (1 << 29)
245#define TC_NUMBER (1 << 30)
240 246
241#define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2) 247#define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
242 248
@@ -244,14 +250,16 @@ typedef struct tsplitter_s {
244#define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN) 250#define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
245//#define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST) 251//#define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
246#define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \ 252#define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
247 | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER) 253 | TC_BUILTIN | TC_LENGTH | TC_GETLINE \
254 | TC_SEQSTART | TC_STRING | TC_NUMBER)
248 255
249#define TC_STATEMNT (TC_STATX | TC_WHILE) 256#define TC_STATEMNT (TC_STATX | TC_WHILE)
250#define TC_OPTERM (TC_SEMICOL | TC_NEWLINE) 257#define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
251 258
252/* word tokens, cannot mean something else if not expected */ 259/* word tokens, cannot mean something else if not expected */
253#define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \ 260#define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE \
254 | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END) 261 | TC_BUILTIN | TC_LENGTH | TC_GETLINE \
262 | TC_FUNCDECL | TC_BEGIN | TC_END)
255 263
256/* discard newlines after these */ 264/* discard newlines after these */
257#define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM \ 265#define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
@@ -346,54 +354,54 @@ enum {
346#define NTC "\377" /* switch to next token class (tc<<1) */ 354#define NTC "\377" /* switch to next token class (tc<<1) */
347#define NTCC '\377' 355#define NTCC '\377'
348 356
349#define OC_B OC_BUILTIN
350
351static const char tokenlist[] ALIGN1 = 357static const char tokenlist[] ALIGN1 =
352 "\1(" NTC 358 "\1(" NTC /* TC_SEQSTART */
353 "\1)" NTC 359 "\1)" NTC /* TC_SEQTERM */
354 "\1/" NTC /* REGEXP */ 360 "\1/" NTC /* TC_REGEXP */
355 "\2>>" "\1>" "\1|" NTC /* OUTRDR */ 361 "\2>>" "\1>" "\1|" NTC /* TC_OUTRDR */
356 "\2++" "\2--" NTC /* UOPPOST */ 362 "\2++" "\2--" NTC /* TC_UOPPOST */
357 "\2++" "\2--" "\1$" NTC /* UOPPRE1 */ 363 "\2++" "\2--" "\1$" NTC /* TC_UOPPRE1 */
358 "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */ 364 "\2==" "\1=" "\2+=" "\2-=" /* TC_BINOPX */
359 "\2*=" "\2/=" "\2%=" "\2^=" 365 "\2*=" "\2/=" "\2%=" "\2^="
360 "\1+" "\1-" "\3**=" "\2**" 366 "\1+" "\1-" "\3**=" "\2**"
361 "\1/" "\1%" "\1^" "\1*" 367 "\1/" "\1%" "\1^" "\1*"
362 "\2!=" "\2>=" "\2<=" "\1>" 368 "\2!=" "\2>=" "\2<=" "\1>"
363 "\1<" "\2!~" "\1~" "\2&&" 369 "\1<" "\2!~" "\1~" "\2&&"
364 "\2||" "\1?" "\1:" NTC 370 "\2||" "\1?" "\1:" NTC
365 "\2in" NTC 371 "\2in" NTC /* TC_IN */
366 "\1," NTC 372 "\1," NTC /* TC_COMMA */
367 "\1|" NTC 373 "\1|" NTC /* TC_PIPE */
368 "\1+" "\1-" "\1!" NTC /* UOPPRE2 */ 374 "\1+" "\1-" "\1!" NTC /* TC_UOPPRE2 */
369 "\1]" NTC 375 "\1]" NTC /* TC_ARRTERM */
370 "\1{" NTC 376 "\1{" NTC /* TC_GRPSTART */
371 "\1}" NTC 377 "\1}" NTC /* TC_GRPTERM */
372 "\1;" NTC 378 "\1;" NTC /* TC_SEMICOL */
373 "\1\n" NTC 379 "\1\n" NTC /* TC_NEWLINE */
374 "\2if" "\2do" "\3for" "\5break" /* STATX */ 380 "\2if" "\2do" "\3for" "\5break" /* TC_STATX */
375 "\10continue" "\6delete" "\5print" 381 "\10continue" "\6delete" "\5print"
376 "\6printf" "\4next" "\10nextfile" 382 "\6printf" "\4next" "\10nextfile"
377 "\6return" "\4exit" NTC 383 "\6return" "\4exit" NTC
378 "\5while" NTC 384 "\5while" NTC /* TC_WHILE */
379 "\4else" NTC 385 "\4else" NTC /* TC_ELSE */
380 386 "\3and" "\5compl" "\6lshift" "\2or" /* TC_BUILTIN */
381 "\3and" "\5compl" "\6lshift" "\2or"
382 "\6rshift" "\3xor" 387 "\6rshift" "\3xor"
383 "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */ 388 "\5close" "\6system" "\6fflush" "\5atan2"
384 "\3cos" "\3exp" "\3int" "\3log" 389 "\3cos" "\3exp" "\3int" "\3log"
385 "\4rand" "\3sin" "\4sqrt" "\5srand" 390 "\4rand" "\3sin" "\4sqrt" "\5srand"
386 "\6gensub" "\4gsub" "\5index" "\6length" 391 "\6gensub" "\4gsub" "\5index" /* "\6length" was here */
387 "\5match" "\5split" "\7sprintf" "\3sub" 392 "\5match" "\5split" "\7sprintf" "\3sub"
388 "\6substr" "\7systime" "\10strftime" "\6mktime" 393 "\6substr" "\7systime" "\10strftime" "\6mktime"
389 "\7tolower" "\7toupper" NTC 394 "\7tolower" "\7toupper" NTC
390 "\7getline" NTC 395 "\6length" NTC /* TC_LENGTH */
391 "\4func" "\10function" NTC 396 "\7getline" NTC /* TC_GETLINE */
392 "\5BEGIN" NTC 397 "\4func" "\10function" NTC /* TC_FUNCDECL */
393 "\3END" 398 "\5BEGIN" NTC /* TC_BEGIN */
399 "\3END" /* TC_END */
394 /* compiler adds trailing "\0" */ 400 /* compiler adds trailing "\0" */
395 ; 401 ;
396 402
403#define OC_B OC_BUILTIN
404
397static const uint32_t tokeninfo[] = { 405static const uint32_t tokeninfo[] = {
398 0, 406 0,
399 0, 407 0,
@@ -408,7 +416,7 @@ static const uint32_t tokeninfo[] = {
408 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3, OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1, 416 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3, OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
409 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!', OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55), 417 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!', OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
410 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?', OC_COLON|xx|P(67)|':', 418 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?', OC_COLON|xx|P(67)|':',
411 OC_IN|SV|P(49), /* in */ 419 OC_IN|SV|P(49), /* TC_IN */
412 OC_COMMA|SS|P(80), 420 OC_COMMA|SS|P(80),
413 OC_PGETLINE|SV|P(37), 421 OC_PGETLINE|SV|P(37),
414 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-', OC_UNARY|xV|P(19)|'!', 422 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-', OC_UNARY|xV|P(19)|'!',
@@ -423,20 +431,20 @@ static const uint32_t tokeninfo[] = {
423 OC_RETURN|Vx, OC_EXIT|Nx, 431 OC_RETURN|Vx, OC_EXIT|Nx,
424 ST_WHILE, 432 ST_WHILE,
425 0, /* else */ 433 0, /* else */
426
427 OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83), 434 OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
428 OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83), 435 OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
429 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83), 436 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
430 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg, 437 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
431 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr, 438 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
432 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le, 439 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), /* OC_FBLTIN|Sx|F_le, was here */
433 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6), 440 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
434 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b), 441 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b),
435 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49), 442 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
443 OC_FBLTIN|Sx|F_le, /* TC_LENGTH */
436 OC_GETLINE|SV|P(0), 444 OC_GETLINE|SV|P(0),
437 0, 0, 445 0, 0,
438 0, 446 0,
439 0 /* END */ 447 0 /* TC_END */
440}; 448};
441 449
442/* internal variable names and their initial values */ 450/* internal variable names and their initial values */
@@ -1202,9 +1210,10 @@ static uint32_t next_token(uint32_t expected)
1202 ltclass = t_tclass; 1210 ltclass = t_tclass;
1203 1211
1204 /* Are we ready for this? */ 1212 /* Are we ready for this? */
1205 if (!(ltclass & expected)) 1213 if (!(ltclass & expected)) {
1206 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ? 1214 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1207 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN); 1215 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1216 }
1208 1217
1209 return ltclass; 1218 return ltclass;
1210#undef concat_inserted 1219#undef concat_inserted
@@ -1371,6 +1380,16 @@ static node *parse_expr(uint32_t iexp)
1371 debug_printf_parse("%s: TC_BUILTIN\n", __func__); 1380 debug_printf_parse("%s: TC_BUILTIN\n", __func__);
1372 cn->l.n = condition(); 1381 cn->l.n = condition();
1373 break; 1382 break;
1383
1384 case TC_LENGTH:
1385 debug_printf_parse("%s: TC_LENGTH\n", __func__);
1386 next_token(TC_SEQSTART | TC_OPTERM | TC_GRPTERM);
1387 rollback_token();
1388 if (t_tclass & TC_SEQSTART) {
1389 /* It was a "(" token. Handle just like TC_BUILTIN */
1390 cn->l.n = condition();
1391 }
1392 break;
1374 } 1393 }
1375 } 1394 }
1376 } 1395 }
diff --git a/testsuite/awk.tests b/testsuite/awk.tests
index 9e6952ffd..adab4ae1e 100755
--- a/testsuite/awk.tests
+++ b/testsuite/awk.tests
@@ -281,6 +281,11 @@ testing "awk length(array)" \
281 "2\n" \ 281 "2\n" \
282 "" "" 282 "" ""
283 283
284testing "awk length()" \
285 "awk '{print length; print length(); print length(\"qwe\"); print length(99+9)}'" \
286 "3\n3\n3\n3\n" \
287 "" "qwe"
288
284testing "awk -f and ARGC" \ 289testing "awk -f and ARGC" \
285 "awk -f - input" \ 290 "awk -f - input" \
286 "re\n2\n" \ 291 "re\n2\n" \