aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRon Yorston <rmy@pobox.com>2021-07-05 12:24:55 +0100
committerRon Yorston <rmy@pobox.com>2021-07-05 12:24:55 +0100
commit57261b6b3a6d2955a5abd6a0563fe78ba43abf3f (patch)
tree3069f9f8bbe05104a8a4339479b3f07d246ff540
parente1ad66c0b8fd58a7158d40771175a7dab224202d (diff)
parent08ca313d7edb99687068b93b5d2435b59f3db23a (diff)
downloadbusybox-w32-57261b6b3a6d2955a5abd6a0563fe78ba43abf3f.tar.gz
busybox-w32-57261b6b3a6d2955a5abd6a0563fe78ba43abf3f.tar.bz2
busybox-w32-57261b6b3a6d2955a5abd6a0563fe78ba43abf3f.zip
Merge branch 'busybox' into merge
-rw-r--r--editors/awk.c1694
-rw-r--r--modutils/modprobe.c3
-rwxr-xr-xtestsuite/awk.tests32
3 files changed, 957 insertions, 772 deletions
diff --git a/editors/awk.c b/editors/awk.c
index 9b9b202db..c88b8e1c4 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -93,7 +93,6 @@ enum {
93}; 93};
94 94
95#define MAXVARFMT 240 95#define MAXVARFMT 240
96#define MINNVBLOCK 64
97 96
98/* variable flags */ 97/* variable flags */
99#define VF_NUMBER 0x0001 /* 1 = primary type is number */ 98#define VF_NUMBER 0x0001 /* 1 = primary type is number */
@@ -103,7 +102,7 @@ enum {
103#define VF_USER 0x0200 /* 1 = user input (may be numeric string) */ 102#define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
104#define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */ 103#define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
105#define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */ 104#define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
106#define VF_FSTR 0x1000 /* 1 = var::string points to fstring buffer */ 105#define VF_FSTR 0x1000 /* 1 = don't free() var::string (not malloced, or is owned by something else) */
107#define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */ 106#define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
108#define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */ 107#define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
109 108
@@ -120,8 +119,8 @@ typedef struct walker_list {
120/* Variable */ 119/* Variable */
121typedef struct var_s { 120typedef struct var_s {
122 unsigned type; /* flags */ 121 unsigned type; /* flags */
123 double number;
124 char *string; 122 char *string;
123 double number;
125 union { 124 union {
126 int aidx; /* func arg idx (for compilation stage) */ 125 int aidx; /* func arg idx (for compilation stage) */
127 struct xhash_s *array; /* array ptr */ 126 struct xhash_s *array; /* array ptr */
@@ -140,6 +139,7 @@ typedef struct chain_s {
140/* Function */ 139/* Function */
141typedef struct func_s { 140typedef struct func_s {
142 unsigned nargs; 141 unsigned nargs;
142 smallint defined;
143 struct chain_s body; 143 struct chain_s body;
144} func; 144} func;
145 145
@@ -179,7 +179,7 @@ typedef struct node_s {
179 struct node_s *n; 179 struct node_s *n;
180 var *v; 180 var *v;
181 int aidx; 181 int aidx;
182 char *new_progname; 182 const char *new_progname;
183 regex_t *re; 183 regex_t *re;
184 } l; 184 } l;
185 union { 185 union {
@@ -192,63 +192,54 @@ typedef struct node_s {
192 } a; 192 } a;
193} node; 193} node;
194 194
195/* Block of temporary variables */
196typedef struct nvblock_s {
197 int size;
198 var *pos;
199 struct nvblock_s *prev;
200 struct nvblock_s *next;
201 var nv[];
202} nvblock;
203
204typedef struct tsplitter_s { 195typedef struct tsplitter_s {
205 node n; 196 node n;
206 regex_t re[2]; 197 regex_t re[2];
207} tsplitter; 198} tsplitter;
208 199
209/* simple token classes */ 200/* simple token classes */
210/* Order and hex values are very important!!! See next_token() */ 201/* order and hex values are very important!!! See next_token() */
211#define TC_SEQSTART (1 << 0) /* ( */ 202#define TC_LPAREN (1 << 0) /* ( */
212#define TC_SEQTERM (1 << 1) /* ) */ 203#define TC_RPAREN (1 << 1) /* ) */
213#define TC_REGEXP (1 << 2) /* /.../ */ 204#define TC_REGEXP (1 << 2) /* /.../ */
214#define TC_OUTRDR (1 << 3) /* | > >> */ 205#define TC_OUTRDR (1 << 3) /* | > >> */
215#define TC_UOPPOST (1 << 4) /* unary postfix operator ++ -- */ 206#define TC_UOPPOST (1 << 4) /* unary postfix operator ++ -- */
216#define TC_UOPPRE1 (1 << 5) /* unary prefix operator ++ -- $ */ 207#define TC_UOPPRE1 (1 << 5) /* unary prefix operator ++ -- $ */
217#define TC_BINOPX (1 << 6) /* two-opnd operator */ 208#define TC_BINOPX (1 << 6) /* two-opnd operator */
218#define TC_IN (1 << 7) 209#define TC_IN (1 << 7) /* 'in' */
219#define TC_COMMA (1 << 8) 210#define TC_COMMA (1 << 8) /* , */
220#define TC_PIPE (1 << 9) /* input redirection pipe */ 211#define TC_PIPE (1 << 9) /* input redirection pipe | */
221#define TC_UOPPRE2 (1 << 10) /* unary prefix operator + - ! */ 212#define TC_UOPPRE2 (1 << 10) /* unary prefix operator + - ! */
222#define TC_ARRTERM (1 << 11) /* ] */ 213#define TC_ARRTERM (1 << 11) /* ] */
223#define TC_GRPSTART (1 << 12) /* { */ 214#define TC_LBRACE (1 << 12) /* { */
224#define TC_GRPTERM (1 << 13) /* } */ 215#define TC_RBRACE (1 << 13) /* } */
225#define TC_SEMICOL (1 << 14) 216#define TC_SEMICOL (1 << 14) /* ; */
226#define TC_NEWLINE (1 << 15) 217#define TC_NEWLINE (1 << 15)
227#define TC_STATX (1 << 16) /* ctl statement (for, next...) */ 218#define TC_STATX (1 << 16) /* ctl statement (for, next...) */
228#define TC_WHILE (1 << 17) 219#define TC_WHILE (1 << 17) /* 'while' */
229#define TC_ELSE (1 << 18) 220#define TC_ELSE (1 << 18) /* 'else' */
230#define TC_BUILTIN (1 << 19) 221#define TC_BUILTIN (1 << 19)
231/* This costs ~50 bytes of code. 222/* This costs ~50 bytes of code.
232 * A separate class to support deprecated "length" form. If we don't need that 223 * A separate class to support deprecated "length" form. If we don't need that
233 * (i.e. if we demand that only "length()" with () is valid), then TC_LENGTH 224 * (i.e. if we demand that only "length()" with () is valid), then TC_LENGTH
234 * can be merged with TC_BUILTIN: 225 * can be merged with TC_BUILTIN:
235 */ 226 */
236#define TC_LENGTH (1 << 20) 227#define TC_LENGTH (1 << 20) /* 'length' */
237#define TC_GETLINE (1 << 21) 228#define TC_GETLINE (1 << 21) /* 'getline' */
238#define TC_FUNCDECL (1 << 22) /* 'function' 'func' */ 229#define TC_FUNCDECL (1 << 22) /* 'function' 'func' */
239#define TC_BEGIN (1 << 23) 230#define TC_BEGIN (1 << 23) /* 'BEGIN' */
240#define TC_END (1 << 24) 231#define TC_END (1 << 24) /* 'END' */
241#define TC_EOF (1 << 25) 232#define TC_EOF (1 << 25)
242#define TC_VARIABLE (1 << 26) 233#define TC_VARIABLE (1 << 26) /* name */
243#define TC_ARRAY (1 << 27) 234#define TC_ARRAY (1 << 27) /* name[ */
244#define TC_FUNCTION (1 << 28) 235#define TC_FUNCTION (1 << 28) /* name( */
245#define TC_STRING (1 << 29) 236#define TC_STRING (1 << 29) /* "..." */
246#define TC_NUMBER (1 << 30) 237#define TC_NUMBER (1 << 30)
247 238
248#ifndef debug_parse_print_tc 239#ifndef debug_parse_print_tc
249#define debug_parse_print_tc(n) do { \ 240#define debug_parse_print_tc(n) do { \
250if ((n) & TC_SEQSTART) debug_printf_parse(" SEQSTART"); \ 241if ((n) & TC_LPAREN ) debug_printf_parse(" LPAREN" ); \
251if ((n) & TC_SEQTERM ) debug_printf_parse(" SEQTERM" ); \ 242if ((n) & TC_RPAREN ) debug_printf_parse(" RPAREN" ); \
252if ((n) & TC_REGEXP ) debug_printf_parse(" REGEXP" ); \ 243if ((n) & TC_REGEXP ) debug_printf_parse(" REGEXP" ); \
253if ((n) & TC_OUTRDR ) debug_printf_parse(" OUTRDR" ); \ 244if ((n) & TC_OUTRDR ) debug_printf_parse(" OUTRDR" ); \
254if ((n) & TC_UOPPOST ) debug_printf_parse(" UOPPOST" ); \ 245if ((n) & TC_UOPPOST ) debug_printf_parse(" UOPPOST" ); \
@@ -259,8 +250,8 @@ if ((n) & TC_COMMA ) debug_printf_parse(" COMMA" ); \
259if ((n) & TC_PIPE ) debug_printf_parse(" PIPE" ); \ 250if ((n) & TC_PIPE ) debug_printf_parse(" PIPE" ); \
260if ((n) & TC_UOPPRE2 ) debug_printf_parse(" UOPPRE2" ); \ 251if ((n) & TC_UOPPRE2 ) debug_printf_parse(" UOPPRE2" ); \
261if ((n) & TC_ARRTERM ) debug_printf_parse(" ARRTERM" ); \ 252if ((n) & TC_ARRTERM ) debug_printf_parse(" ARRTERM" ); \
262if ((n) & TC_GRPSTART) debug_printf_parse(" GRPSTART"); \ 253if ((n) & TC_LBRACE ) debug_printf_parse(" LBRACE" ); \
263if ((n) & TC_GRPTERM ) debug_printf_parse(" GRPTERM" ); \ 254if ((n) & TC_RBRACE ) debug_printf_parse(" RBRACE" ); \
264if ((n) & TC_SEMICOL ) debug_printf_parse(" SEMICOL" ); \ 255if ((n) & TC_SEMICOL ) debug_printf_parse(" SEMICOL" ); \
265if ((n) & TC_NEWLINE ) debug_printf_parse(" NEWLINE" ); \ 256if ((n) & TC_NEWLINE ) debug_printf_parse(" NEWLINE" ); \
266if ((n) & TC_STATX ) debug_printf_parse(" STATX" ); \ 257if ((n) & TC_STATX ) debug_printf_parse(" STATX" ); \
@@ -281,39 +272,39 @@ if ((n) & TC_NUMBER ) debug_printf_parse(" NUMBER" ); \
281} while (0) 272} while (0)
282#endif 273#endif
283 274
284/* combined token classes */ 275/* combined token classes ("token [class] sets") */
285#define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2) 276#define TS_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
286 277
287#define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN) 278#define TS_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
288//#define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST) 279//#define TS_UNARYOP (TS_UOPPRE | TC_UOPPOST)
289#define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \ 280#define TS_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
290 | TC_BUILTIN | TC_LENGTH | TC_GETLINE \ 281 | TC_BUILTIN | TC_LENGTH | TC_GETLINE \
291 | TC_SEQSTART | TC_STRING | TC_NUMBER) 282 | TC_LPAREN | TC_STRING | TC_NUMBER)
292#define TC_LVALUE (TC_VARIABLE | TC_ARRAY)
293 283
294#define TC_STATEMNT (TC_STATX | TC_WHILE) 284#define TS_LVALUE (TC_VARIABLE | TC_ARRAY)
295#define TC_OPTERM (TC_SEMICOL | TC_NEWLINE) 285#define TS_STATEMNT (TC_STATX | TC_WHILE)
296 286
297/* word tokens, cannot mean something else if not expected */ 287/* word tokens, cannot mean something else if not expected */
298#define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE \ 288#define TS_WORD (TC_IN | TS_STATEMNT | TC_ELSE \
299 | TC_BUILTIN | TC_LENGTH | TC_GETLINE \ 289 | TC_BUILTIN | TC_LENGTH | TC_GETLINE \
300 | TC_FUNCDECL | TC_BEGIN | TC_END) 290 | TC_FUNCDECL | TC_BEGIN | TC_END)
301 291
302/* discard newlines after these */ 292/* discard newlines after these */
303#define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM \ 293#define TS_NOTERM (TS_BINOP | TC_COMMA | TC_LBRACE | TC_RBRACE \
304 | TC_BINOP | TC_OPTERM) 294 | TC_SEMICOL | TC_NEWLINE)
305 295
306/* what can expression begin with */ 296/* what can expression begin with */
307#define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP) 297#define TS_OPSEQ (TS_OPERAND | TS_UOPPRE | TC_REGEXP)
308/* what can group begin with */ 298/* what can group begin with */
309#define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART) 299#define TS_GRPSEQ (TS_OPSEQ | TS_STATEMNT \
300 | TC_SEMICOL | TC_NEWLINE | TC_LBRACE)
310 301
311/* if previous token class is CONCAT1 and next is CONCAT2, concatenation */ 302/* if previous token class is CONCAT_L and next is CONCAT_R, concatenation */
312/* operator is inserted between them */ 303/* operator is inserted between them */
313#define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \ 304#define TS_CONCAT_L (TC_VARIABLE | TC_ARRTERM | TC_RPAREN \
314 | TC_STRING | TC_NUMBER | TC_UOPPOST \ 305 | TC_STRING | TC_NUMBER | TC_UOPPOST \
315 | TC_LENGTH) 306 | TC_LENGTH)
316#define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE) 307#define TS_CONCAT_R (TS_OPERAND | TS_UOPPRE)
317 308
318#define OF_RES1 0x010000 309#define OF_RES1 0x010000
319#define OF_RES2 0x020000 310#define OF_RES2 0x020000
@@ -328,7 +319,7 @@ if ((n) & TC_NUMBER ) debug_printf_parse(" NUMBER" ); \
328#define xV OF_RES2 319#define xV OF_RES2
329#define xS (OF_RES2 | OF_STR2) 320#define xS (OF_RES2 | OF_STR2)
330#define Vx OF_RES1 321#define Vx OF_RES1
331#define Rx (OF_RES1 | OF_NUM1 | OF_REQUIRED) 322#define Rx OF_REQUIRED
332#define VV (OF_RES1 | OF_RES2) 323#define VV (OF_RES1 | OF_RES2)
333#define Nx (OF_RES1 | OF_NUM1) 324#define Nx (OF_RES1 | OF_NUM1)
334#define NV (OF_RES1 | OF_NUM1 | OF_RES2) 325#define NV (OF_RES1 | OF_NUM1 | OF_RES2)
@@ -340,8 +331,7 @@ if ((n) & TC_NUMBER ) debug_printf_parse(" NUMBER" ); \
340#define OPNMASK 0x007F 331#define OPNMASK 0x007F
341 332
342/* operator priority is a highest byte (even: r->l, odd: l->r grouping) 333/* operator priority is a highest byte (even: r->l, odd: l->r grouping)
343 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1, 334 * (for builtins it has different meaning)
344 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
345 */ 335 */
346#undef P 336#undef P
347#undef PRIMASK 337#undef PRIMASK
@@ -394,8 +384,8 @@ enum {
394#define NTCC '\377' 384#define NTCC '\377'
395 385
396static const char tokenlist[] ALIGN1 = 386static const char tokenlist[] ALIGN1 =
397 "\1(" NTC /* TC_SEQSTART */ 387 "\1(" NTC /* TC_LPAREN */
398 "\1)" NTC /* TC_SEQTERM */ 388 "\1)" NTC /* TC_RPAREN */
399 "\1/" NTC /* TC_REGEXP */ 389 "\1/" NTC /* TC_REGEXP */
400 "\2>>" "\1>" "\1|" NTC /* TC_OUTRDR */ 390 "\2>>" "\1>" "\1|" NTC /* TC_OUTRDR */
401 "\2++" "\2--" NTC /* TC_UOPPOST */ 391 "\2++" "\2--" NTC /* TC_UOPPOST */
@@ -412,8 +402,8 @@ static const char tokenlist[] ALIGN1 =
412 "\1|" NTC /* TC_PIPE */ 402 "\1|" NTC /* TC_PIPE */
413 "\1+" "\1-" "\1!" NTC /* TC_UOPPRE2 */ 403 "\1+" "\1-" "\1!" NTC /* TC_UOPPRE2 */
414 "\1]" NTC /* TC_ARRTERM */ 404 "\1]" NTC /* TC_ARRTERM */
415 "\1{" NTC /* TC_GRPSTART */ 405 "\1{" NTC /* TC_LBRACE */
416 "\1}" NTC /* TC_GRPTERM */ 406 "\1}" NTC /* TC_RBRACE */
417 "\1;" NTC /* TC_SEMICOL */ 407 "\1;" NTC /* TC_SEMICOL */
418 "\1\n" NTC /* TC_NEWLINE */ 408 "\1\n" NTC /* TC_NEWLINE */
419 "\2if" "\2do" "\3for" "\5break" /* TC_STATX */ 409 "\2if" "\2do" "\3for" "\5break" /* TC_STATX */
@@ -439,12 +429,11 @@ static const char tokenlist[] ALIGN1 =
439 /* compiler adds trailing "\0" */ 429 /* compiler adds trailing "\0" */
440 ; 430 ;
441 431
442#define OC_B OC_BUILTIN
443
444static const uint32_t tokeninfo[] ALIGN4 = { 432static const uint32_t tokeninfo[] ALIGN4 = {
445 0, 433 0,
446 0, 434 0,
447 OC_REGEXP, 435#define TI_REGEXP OC_REGEXP
436 TI_REGEXP,
448 xS|'a', xS|'w', xS|'|', 437 xS|'a', xS|'w', xS|'|',
449 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m', 438 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
450#define TI_PREINC (OC_UNARY|xV|P(9)|'P') 439#define TI_PREINC (OC_UNARY|xV|P(9)|'P')
@@ -455,12 +444,17 @@ static const uint32_t tokeninfo[] ALIGN4 = {
455 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-', OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&', 444 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-', OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
456 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%', OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*', 445 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%', OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
457 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3, OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1, 446 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3, OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
458#define TI_LESS (OC_COMPARE|VV|P(39)|2) 447#define TI_LESS (OC_COMPARE|VV|P(39)|2)
459 TI_LESS, OC_MATCH|Sx|P(45)|'!', OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55), 448 TI_LESS, OC_MATCH|Sx|P(45)|'!', OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
460 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?', OC_COLON|xx|P(67)|':', 449#define TI_TERNARY (OC_TERNARY|Vx|P(64)|'?')
461 OC_IN|SV|P(49), /* TC_IN */ 450#define TI_COLON (OC_COLON|xx|P(67)|':')
462 OC_COMMA|SS|P(80), 451 OC_LOR|Vx|P(59), TI_TERNARY, TI_COLON,
463 OC_PGETLINE|SV|P(37), 452#define TI_IN (OC_IN|SV|P(49))
453 TI_IN,
454#define TI_COMMA (OC_COMMA|SS|P(80))
455 TI_COMMA,
456#define TI_PGETLINE (OC_PGETLINE|SV|P(37))
457 TI_PGETLINE,
464 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-', OC_UNARY|xV|P(19)|'!', 458 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-', OC_UNARY|xV|P(19)|'!',
465 0, /* ] */ 459 0, /* ] */
466 0, 460 0,
@@ -468,25 +462,51 @@ static const uint32_t tokeninfo[] ALIGN4 = {
468 0, 462 0,
469 0, /* \n */ 463 0, /* \n */
470 ST_IF, ST_DO, ST_FOR, OC_BREAK, 464 ST_IF, ST_DO, ST_FOR, OC_BREAK,
471 OC_CONTINUE, OC_DELETE|Rx, OC_PRINT, 465#define TI_PRINT OC_PRINT
466 OC_CONTINUE, OC_DELETE|Rx, TI_PRINT,
472 OC_PRINTF, OC_NEXT, OC_NEXTFILE, 467 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
473 OC_RETURN|Vx, OC_EXIT|Nx, 468 OC_RETURN|Vx, OC_EXIT|Nx,
474 ST_WHILE, 469 ST_WHILE,
475 0, /* else */ 470 0, /* else */
476 OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83), 471// OC_B's are builtins with enforced minimum number of arguments (two upper bits).
477 OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83), 472// Highest byte bit pattern: nn s3s2s1 v3v2v1
478 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83), 473// nn - min. number of args, sN - resolve Nth arg to string, vN - resolve to var
479 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg, 474// OC_F's are builtins with zero or one argument.
480 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr, 475// |Rx| enforces that arg is present for: system, close, cos, sin, exp, int, log, sqrt
481 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), /* OC_FBLTIN|Sx|F_le, was here */ 476// Check for no args is present in builtins' code (not in this table): rand, systime
482 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6), 477// Have one _optional_ arg: fflush, srand, length
483 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b), 478#define OC_B OC_BUILTIN
484 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49), 479#define OC_F OC_FBLTIN
485 OC_FBLTIN|Sx|F_le, /* TC_LENGTH */ 480#define A1 P(0x40) /*one arg*/
486 OC_GETLINE|SV|P(0), 481#define A2 P(0x80) /*two args*/
487 0, 0, 482#define A3 P(0xc0) /*three args*/
488 0, 483#define __v P(1)
489 0 /* TC_END */ 484#define _vv P(3)
485#define __s__v P(9)
486#define __s_vv P(0x0b)
487#define __svvv P(0x0f)
488#define _ss_vv P(0x1b)
489#define _s_vv_ P(0x16)
490#define ss_vv_ P(0x36)
491 OC_B|B_an|_vv|A2, OC_B|B_co|__v|A1, OC_B|B_ls|_vv|A2, OC_B|B_or|_vv|A2, // and compl lshift or
492 OC_B|B_rs|_vv|A2, OC_B|B_xo|_vv|A2, // rshift xor
493 OC_F|F_cl|Sx|Rx, OC_F|F_sy|Sx|Rx, OC_F|F_ff|Sx, OC_B|B_a2|_vv|A2, // close system fflush atan2
494 OC_F|F_co|Nx|Rx, OC_F|F_ex|Nx|Rx, OC_F|F_in|Nx|Rx, OC_F|F_lg|Nx|Rx, // cos exp int log
495 OC_F|F_rn, OC_F|F_si|Nx|Rx, OC_F|F_sq|Nx|Rx, OC_F|F_sr|Nx, // rand sin sqrt srand
496 OC_B|B_ge|_s_vv_|A3,OC_B|B_gs|ss_vv_|A2,OC_B|B_ix|_ss_vv|A2, // gensub gsub index /*length was here*/
497 OC_B|B_ma|__s__v|A2,OC_B|B_sp|__s_vv|A2,OC_SPRINTF, OC_B|B_su|ss_vv_|A2,// match split sprintf sub
498 OC_B|B_ss|__svvv|A2,OC_F|F_ti, OC_B|B_ti|__s_vv, OC_B|B_mt|__s_vv, // substr systime strftime mktime
499 OC_B|B_lo|__s__v|A1,OC_B|B_up|__s__v|A1, // tolower toupper
500 OC_F|F_le|Sx, // length
501 OC_GETLINE|SV, // getline
502 0, 0, // func function
503 0, // BEGIN
504 0 // END
505#undef A1
506#undef A2
507#undef A3
508#undef OC_B
509#undef OC_F
490}; 510};
491 511
492/* internal variable names and their initial values */ 512/* internal variable names and their initial values */
@@ -527,21 +547,29 @@ struct globals {
527 chain *seq; 547 chain *seq;
528 node *break_ptr, *continue_ptr; 548 node *break_ptr, *continue_ptr;
529 rstream *iF; 549 rstream *iF;
530 xhash *vhash, *ahash, *fdhash, *fnhash; 550 xhash *ahash; /* argument names, used only while parsing function bodies */
551 xhash *fnhash; /* function names, used only in parsing stage */
552 xhash *vhash; /* variables and arrays */
553 //xhash *fdhash; /* file objects, used only in execution stage */
554 //we are reusing ahash as fdhash, via define (see later)
531 const char *g_progname; 555 const char *g_progname;
532 int g_lineno; 556 int g_lineno;
533 int nfields; 557 int nfields;
534 int maxfields; /* used in fsrealloc() only */ 558 int maxfields; /* used in fsrealloc() only */
535 var *Fields; 559 var *Fields;
536 nvblock *g_cb;
537 char *g_pos; 560 char *g_pos;
538 char *g_buf; 561 char g_saved_ch;
539 smallint icase; 562 smallint icase;
540 smallint exiting; 563 smallint exiting;
541 smallint nextrec; 564 smallint nextrec;
542 smallint nextfile; 565 smallint nextfile;
543 smallint is_f0_split; 566 smallint is_f0_split;
544 smallint t_rollback; 567 smallint t_rollback;
568
569 /* former statics from various functions */
570 smallint next_token__concat_inserted;
571 uint32_t next_token__save_tclass;
572 uint32_t next_token__save_info;
545}; 573};
546struct globals2 { 574struct globals2 {
547 uint32_t t_info; /* often used */ 575 uint32_t t_info; /* often used */
@@ -554,32 +582,35 @@ struct globals2 {
554 /* former statics from various functions */ 582 /* former statics from various functions */
555 char *split_f0__fstrings; 583 char *split_f0__fstrings;
556 584
557 uint32_t next_token__save_tclass;
558 uint32_t next_token__save_info;
559 uint32_t next_token__ltclass;
560 smallint next_token__concat_inserted;
561
562 smallint next_input_file__files_happen;
563 rstream next_input_file__rsm; 585 rstream next_input_file__rsm;
586 smallint next_input_file__files_happen;
587
588 smalluint exitcode;
564 589
565 var *evaluate__fnargs;
566 unsigned evaluate__seed; 590 unsigned evaluate__seed;
591 var *evaluate__fnargs;
567 regex_t evaluate__sreg; 592 regex_t evaluate__sreg;
568 593
569 var ptest__v; 594 var ptest__tmpvar;
595 var awk_printf__tmpvar;
596 var as_regex__tmpvar;
597 var exit__tmpvar;
598 var main__tmpvar;
570 599
571 tsplitter exec_builtin__tspl; 600 tsplitter exec_builtin__tspl;
572 601
573 /* biggest and least used members go last */ 602 /* biggest and least used members go last */
574 tsplitter fsplitter, rsplitter; 603 tsplitter fsplitter, rsplitter;
604
605 char g_buf[MAXVARFMT + 1];
575}; 606};
576#define G1 (ptr_to_globals[-1]) 607#define G1 (ptr_to_globals[-1])
577#define G (*(struct globals2 *)ptr_to_globals) 608#define G (*(struct globals2 *)ptr_to_globals)
578/* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */ 609/* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
579/*char G1size[sizeof(G1)]; - 0x74 */ 610//char G1size[sizeof(G1)]; // 0x70
580/*char Gsize[sizeof(G)]; - 0x1c4 */ 611//char Gsize[sizeof(G)]; // 0x2f8
581/* Trying to keep most of members accessible with short offsets: */ 612/* Trying to keep most of members accessible with short offsets: */
582/*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */ 613//char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; // 0x7c
583#define t_double (G1.t_double ) 614#define t_double (G1.t_double )
584#define beginseq (G1.beginseq ) 615#define beginseq (G1.beginseq )
585#define mainseq (G1.mainseq ) 616#define mainseq (G1.mainseq )
@@ -588,18 +619,20 @@ struct globals2 {
588#define break_ptr (G1.break_ptr ) 619#define break_ptr (G1.break_ptr )
589#define continue_ptr (G1.continue_ptr) 620#define continue_ptr (G1.continue_ptr)
590#define iF (G1.iF ) 621#define iF (G1.iF )
591#define vhash (G1.vhash )
592#define ahash (G1.ahash ) 622#define ahash (G1.ahash )
593#define fdhash (G1.fdhash )
594#define fnhash (G1.fnhash ) 623#define fnhash (G1.fnhash )
624#define vhash (G1.vhash )
625#define fdhash ahash
626//^^^^^^^^^^^^^^^^^^ ahash is cleared after every function parsing,
627// and ends up empty after parsing phase. Thus, we can simply reuse it
628// for fdhash in execution stage.
595#define g_progname (G1.g_progname ) 629#define g_progname (G1.g_progname )
596#define g_lineno (G1.g_lineno ) 630#define g_lineno (G1.g_lineno )
597#define nfields (G1.nfields ) 631#define nfields (G1.nfields )
598#define maxfields (G1.maxfields ) 632#define maxfields (G1.maxfields )
599#define Fields (G1.Fields ) 633#define Fields (G1.Fields )
600#define g_cb (G1.g_cb )
601#define g_pos (G1.g_pos ) 634#define g_pos (G1.g_pos )
602#define g_buf (G1.g_buf ) 635#define g_saved_ch (G1.g_saved_ch )
603#define icase (G1.icase ) 636#define icase (G1.icase )
604#define exiting (G1.exiting ) 637#define exiting (G1.exiting )
605#define nextrec (G1.nextrec ) 638#define nextrec (G1.nextrec )
@@ -613,25 +646,13 @@ struct globals2 {
613#define intvar (G.intvar ) 646#define intvar (G.intvar )
614#define fsplitter (G.fsplitter ) 647#define fsplitter (G.fsplitter )
615#define rsplitter (G.rsplitter ) 648#define rsplitter (G.rsplitter )
649#define g_buf (G.g_buf )
616#define INIT_G() do { \ 650#define INIT_G() do { \
617 SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \ 651 SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
618 G.next_token__ltclass = TC_OPTERM; \ 652 t_tclass = TC_NEWLINE; \
619 G.evaluate__seed = 1; \ 653 G.evaluate__seed = 1; \
620} while (0) 654} while (0)
621 655
622
623/* function prototypes */
624static void handle_special(var *);
625static node *parse_expr(uint32_t);
626static void chain_group(void);
627static var *evaluate(node *, var *);
628static rstream *next_input_file(void);
629static int fmt_num(char *, int, const char *, double, int);
630static int awk_exit(int) NORETURN;
631
632/* ---- error handling ---- */
633
634static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
635static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string"; 656static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
636static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token"; 657static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
637static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero"; 658static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
@@ -643,10 +664,7 @@ static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
643static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in"; 664static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
644static const char EMSG_NEGATIVE_FIELD[] ALIGN1 = "Access to negative field"; 665static const char EMSG_NEGATIVE_FIELD[] ALIGN1 = "Access to negative field";
645 666
646static void zero_out_var(var *vp) 667static int awk_exit(void) NORETURN;
647{
648 memset(vp, 0, sizeof(*vp));
649}
650 668
651static void syntax_error(const char *message) NORETURN; 669static void syntax_error(const char *message) NORETURN;
652static void syntax_error(const char *message) 670static void syntax_error(const char *message)
@@ -677,12 +695,40 @@ static xhash *hash_init(void)
677 return newhash; 695 return newhash;
678} 696}
679 697
698static void hash_clear(xhash *hash)
699{
700 unsigned i;
701 hash_item *hi, *thi;
702
703 for (i = 0; i < hash->csize; i++) {
704 hi = hash->items[i];
705 while (hi) {
706 thi = hi;
707 hi = hi->next;
708//FIXME: this assumes that it's a hash of *variables*:
709 free(thi->data.v.string);
710 free(thi);
711 }
712 hash->items[i] = NULL;
713 }
714 hash->glen = hash->nel = 0;
715}
716
717#if 0 //UNUSED
718static void hash_free(xhash *hash)
719{
720 hash_clear(hash);
721 free(hash->items);
722 free(hash);
723}
724#endif
725
680/* find item in hash, return ptr to data, NULL if not found */ 726/* find item in hash, return ptr to data, NULL if not found */
681static void *hash_search(xhash *hash, const char *name) 727static NOINLINE void *hash_search3(xhash *hash, const char *name, unsigned idx)
682{ 728{
683 hash_item *hi; 729 hash_item *hi;
684 730
685 hi = hash->items[hashidx(name) % hash->csize]; 731 hi = hash->items[idx % hash->csize];
686 while (hi) { 732 while (hi) {
687 if (strcmp(hi->name, name) == 0) 733 if (strcmp(hi->name, name) == 0)
688 return &hi->data; 734 return &hi->data;
@@ -691,6 +737,11 @@ static void *hash_search(xhash *hash, const char *name)
691 return NULL; 737 return NULL;
692} 738}
693 739
740static void *hash_search(xhash *hash, const char *name)
741{
742 return hash_search3(hash, name, hashidx(name));
743}
744
694/* grow hash if it becomes too big */ 745/* grow hash if it becomes too big */
695static void hash_rebuild(xhash *hash) 746static void hash_rebuild(xhash *hash)
696{ 747{
@@ -726,16 +777,17 @@ static void *hash_find(xhash *hash, const char *name)
726 unsigned idx; 777 unsigned idx;
727 int l; 778 int l;
728 779
729 hi = hash_search(hash, name); 780 idx = hashidx(name);
781 hi = hash_search3(hash, name, idx);
730 if (!hi) { 782 if (!hi) {
731 if (++hash->nel / hash->csize > 10) 783 if (++hash->nel > hash->csize * 8)
732 hash_rebuild(hash); 784 hash_rebuild(hash);
733 785
734 l = strlen(name) + 1; 786 l = strlen(name) + 1;
735 hi = xzalloc(sizeof(*hi) + l); 787 hi = xzalloc(sizeof(*hi) + l);
736 strcpy(hi->name, name); 788 strcpy(hi->name, name);
737 789
738 idx = hashidx(name) % hash->csize; 790 idx = idx % hash->csize;
739 hi->next = hash->items[idx]; 791 hi->next = hash->items[idx];
740 hash->items[idx] = hi; 792 hash->items[idx] = hi;
741 hash->glen += l; 793 hash->glen += l;
@@ -770,7 +822,7 @@ static void hash_remove(xhash *hash, const char *name)
770 822
771static char *skip_spaces(char *p) 823static char *skip_spaces(char *p)
772{ 824{
773 while (1) { 825 for (;;) {
774 if (*p == '\\' && p[1] == '\n') { 826 if (*p == '\\' && p[1] == '\n') {
775 p++; 827 p++;
776 t_lineno++; 828 t_lineno++;
@@ -790,8 +842,10 @@ static char *skip_spaces(char *p)
790static char *nextword(char **s) 842static char *nextword(char **s)
791{ 843{
792 char *p = *s; 844 char *p = *s;
793 while (*(*s)++ != '\0') 845 char *q = p;
846 while (*q++ != '\0')
794 continue; 847 continue;
848 *s = q;
795 return p; 849 return p;
796} 850}
797 851
@@ -854,10 +908,29 @@ static double my_strtod(char **pp)
854 908
855/* -------- working with variables (set/get/copy/etc) -------- */ 909/* -------- working with variables (set/get/copy/etc) -------- */
856 910
857static xhash *iamarray(var *v) 911static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
858{ 912{
859 var *a = v; 913 int r = 0;
914 char c;
915 const char *s = format;
860 916
917 if (int_as_int && n == (long long)n) {
918 r = snprintf(b, size, "%lld", (long long)n);
919 } else {
920 do { c = *s; } while (c && *++s);
921 if (strchr("diouxX", c)) {
922 r = snprintf(b, size, format, (int)n);
923 } else if (strchr("eEfFgGaA", c)) {
924 r = snprintf(b, size, format, n);
925 } else {
926 syntax_error(EMSG_INV_FMT);
927 }
928 }
929 return r;
930}
931
932static xhash *iamarray(var *a)
933{
861 while (a->type & VF_CHILD) 934 while (a->type & VF_CHILD)
862 a = a->x.parent; 935 a = a->x.parent;
863 936
@@ -868,23 +941,7 @@ static xhash *iamarray(var *v)
868 return a->x.array; 941 return a->x.array;
869} 942}
870 943
871static void clear_array(xhash *array) 944#define clear_array(array) hash_clear(array)
872{
873 unsigned i;
874 hash_item *hi, *thi;
875
876 for (i = 0; i < array->csize; i++) {
877 hi = array->items[i];
878 while (hi) {
879 thi = hi;
880 hi = hi->next;
881 free(thi->data.v.string);
882 free(thi);
883 }
884 array->items[i] = NULL;
885 }
886 array->glen = array->nel = 0;
887}
888 945
889/* clear a variable */ 946/* clear a variable */
890static var *clrvar(var *v) 947static var *clrvar(var *v)
@@ -898,6 +955,8 @@ static var *clrvar(var *v)
898 return v; 955 return v;
899} 956}
900 957
958static void handle_special(var *);
959
901/* assign string value to variable */ 960/* assign string value to variable */
902static var *setvar_p(var *v, char *value) 961static var *setvar_p(var *v, char *value)
903{ 962{
@@ -963,6 +1022,7 @@ static double getvar_i(var *v)
963 v->number = my_strtod(&s); 1022 v->number = my_strtod(&s);
964 debug_printf_eval("%f (s:'%s')\n", v->number, s); 1023 debug_printf_eval("%f (s:'%s')\n", v->number, s);
965 if (v->type & VF_USER) { 1024 if (v->type & VF_USER) {
1025//TODO: skip_spaces() also skips backslash+newline, is it intended here?
966 s = skip_spaces(s); 1026 s = skip_spaces(s);
967 if (*s != '\0') 1027 if (*s != '\0')
968 v->type &= ~VF_USER; 1028 v->type &= ~VF_USER;
@@ -1024,94 +1084,24 @@ static int istrue(var *v)
1024 return (v->string && v->string[0]); 1084 return (v->string && v->string[0]);
1025} 1085}
1026 1086
1027/* temporary variables allocator. Last allocated should be first freed */
1028static var *nvalloc(int n)
1029{
1030 nvblock *pb = NULL;
1031 var *v, *r;
1032 int size;
1033
1034 while (g_cb) {
1035 pb = g_cb;
1036 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size)
1037 break;
1038 g_cb = g_cb->next;
1039 }
1040
1041 if (!g_cb) {
1042 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
1043 g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
1044 g_cb->size = size;
1045 g_cb->pos = g_cb->nv;
1046 g_cb->prev = pb;
1047 /*g_cb->next = NULL; - xzalloc did it */
1048 if (pb)
1049 pb->next = g_cb;
1050 }
1051
1052 v = r = g_cb->pos;
1053 g_cb->pos += n;
1054
1055 while (v < g_cb->pos) {
1056 v->type = 0;
1057 v->string = NULL;
1058 v++;
1059 }
1060
1061 return r;
1062}
1063
1064static void nvfree(var *v)
1065{
1066 var *p;
1067
1068 if (v < g_cb->nv || v >= g_cb->pos)
1069 syntax_error(EMSG_INTERNAL_ERROR);
1070
1071 for (p = v; p < g_cb->pos; p++) {
1072 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
1073 clear_array(iamarray(p));
1074 free(p->x.array->items);
1075 free(p->x.array);
1076 }
1077 if (p->type & VF_WALK) {
1078 walker_list *n;
1079 walker_list *w = p->x.walker;
1080 debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker);
1081 p->x.walker = NULL;
1082 while (w) {
1083 n = w->prev;
1084 debug_printf_walker(" free(%p)\n", w);
1085 free(w);
1086 w = n;
1087 }
1088 }
1089 clrvar(p);
1090 }
1091
1092 g_cb->pos = v;
1093 while (g_cb->prev && g_cb->pos == g_cb->nv) {
1094 g_cb = g_cb->prev;
1095 }
1096}
1097
1098/* ------- awk program text parsing ------- */ 1087/* ------- awk program text parsing ------- */
1099 1088
1100/* Parse next token pointed by global pos, place results into global ttt. 1089/* Parse next token pointed by global pos, place results into global t_XYZ variables.
1101 * If token isn't expected, give away. Return token class 1090 * If token isn't expected, print error message and die.
1091 * Return token class (also store it in t_tclass).
1102 */ 1092 */
1103static uint32_t next_token(uint32_t expected) 1093static uint32_t next_token(uint32_t expected)
1104{ 1094{
1105#define concat_inserted (G.next_token__concat_inserted) 1095#define concat_inserted (G1.next_token__concat_inserted)
1106#define save_tclass (G.next_token__save_tclass) 1096#define save_tclass (G1.next_token__save_tclass)
1107#define save_info (G.next_token__save_info) 1097#define save_info (G1.next_token__save_info)
1108/* Initialized to TC_OPTERM: */
1109#define ltclass (G.next_token__ltclass)
1110 1098
1111 char *p, *s; 1099 char *p;
1112 const char *tl; 1100 const char *tl;
1113 uint32_t tc;
1114 const uint32_t *ti; 1101 const uint32_t *ti;
1102 uint32_t tc, last_token_class;
1103
1104 last_token_class = t_tclass; /* t_tclass is initialized to TC_NEWLINE */
1115 1105
1116 debug_printf_parse("%s() expected(%x):", __func__, expected); 1106 debug_printf_parse("%s() expected(%x):", __func__, expected);
1117 debug_parse_print_tc(expected); 1107 debug_parse_print_tc(expected);
@@ -1127,6 +1117,10 @@ static uint32_t next_token(uint32_t expected)
1127 t_info = save_info; 1117 t_info = save_info;
1128 } else { 1118 } else {
1129 p = g_pos; 1119 p = g_pos;
1120 if (g_saved_ch != '\0') {
1121 *p = g_saved_ch;
1122 g_saved_ch = '\0';
1123 }
1130 readnext: 1124 readnext:
1131 p = skip_spaces(p); 1125 p = skip_spaces(p);
1132 g_lineno = t_lineno; 1126 g_lineno = t_lineno;
@@ -1134,15 +1128,12 @@ static uint32_t next_token(uint32_t expected)
1134 while (*p != '\n' && *p != '\0') 1128 while (*p != '\n' && *p != '\0')
1135 p++; 1129 p++;
1136 1130
1137 if (*p == '\n')
1138 t_lineno++;
1139
1140 if (*p == '\0') { 1131 if (*p == '\0') {
1141 tc = TC_EOF; 1132 tc = TC_EOF;
1142 debug_printf_parse("%s: token found: TC_EOF\n", __func__); 1133 debug_printf_parse("%s: token found: TC_EOF\n", __func__);
1143 } else if (*p == '\"') { 1134 } else if (*p == '\"') {
1144 /* it's a string */ 1135 /* it's a string */
1145 t_string = s = ++p; 1136 char *s = t_string = ++p;
1146 while (*p != '\"') { 1137 while (*p != '\"') {
1147 char *pp; 1138 char *pp;
1148 if (*p == '\0' || *p == '\n') 1139 if (*p == '\0' || *p == '\n')
@@ -1157,7 +1148,7 @@ static uint32_t next_token(uint32_t expected)
1157 debug_printf_parse("%s: token found:'%s' TC_STRING\n", __func__, t_string); 1148 debug_printf_parse("%s: token found:'%s' TC_STRING\n", __func__, t_string);
1158 } else if ((expected & TC_REGEXP) && *p == '/') { 1149 } else if ((expected & TC_REGEXP) && *p == '/') {
1159 /* it's regexp */ 1150 /* it's regexp */
1160 t_string = s = ++p; 1151 char *s = t_string = ++p;
1161 while (*p != '/') { 1152 while (*p != '/') {
1162 if (*p == '\0' || *p == '\n') 1153 if (*p == '\0' || *p == '\n')
1163 syntax_error(EMSG_UNEXP_EOS); 1154 syntax_error(EMSG_UNEXP_EOS);
@@ -1188,6 +1179,11 @@ static uint32_t next_token(uint32_t expected)
1188 tc = TC_NUMBER; 1179 tc = TC_NUMBER;
1189 debug_printf_parse("%s: token found:%f TC_NUMBER\n", __func__, t_double); 1180 debug_printf_parse("%s: token found:%f TC_NUMBER\n", __func__, t_double);
1190 } else { 1181 } else {
1182 char *end_of_name;
1183
1184 if (*p == '\n')
1185 t_lineno++;
1186
1191 /* search for something known */ 1187 /* search for something known */
1192 tl = tokenlist; 1188 tl = tokenlist;
1193 tc = 0x00000001; 1189 tc = 0x00000001;
@@ -1202,9 +1198,9 @@ static uint32_t next_token(uint32_t expected)
1202 * token matches, 1198 * token matches,
1203 * and it's not a longer word, 1199 * and it's not a longer word,
1204 */ 1200 */
1205 if ((tc & (expected | TC_WORD | TC_NEWLINE)) 1201 if ((tc & (expected | TS_WORD | TC_NEWLINE))
1206 && strncmp(p, tl, l) == 0 1202 && strncmp(p, tl, l) == 0
1207 && !((tc & TC_WORD) && isalnum_(p[l])) 1203 && !((tc & TS_WORD) && isalnum_(p[l]))
1208 ) { 1204 ) {
1209 /* then this is what we are looking for */ 1205 /* then this is what we are looking for */
1210 t_info = *ti; 1206 t_info = *ti;
@@ -1221,71 +1217,94 @@ static uint32_t next_token(uint32_t expected)
1221 if (!isalnum_(*p)) 1217 if (!isalnum_(*p))
1222 syntax_error(EMSG_UNEXP_TOKEN); /* no */ 1218 syntax_error(EMSG_UNEXP_TOKEN); /* no */
1223 /* yes */ 1219 /* yes */
1224/* "move name one char back" trick: we need a byte for NUL terminator */ 1220 t_string = p;
1225/* NB: this results in argv[i][-1] being used (!!!) in e.g. "awk -e 'NAME'" case */ 1221 while (isalnum_(*p))
1226 t_string = --p; 1222 p++;
1227 while (isalnum_(*++p)) { 1223 end_of_name = p;
1228 p[-1] = *p; 1224
1229 } 1225 if (last_token_class == TC_FUNCDECL)
1230 p[-1] = '\0'; 1226 /* eat space in "function FUNC (...) {...}" declaration */
1231 tc = TC_VARIABLE;
1232 /* also consume whitespace between functionname and bracket */
1233 if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY))
1234 p = skip_spaces(p); 1227 p = skip_spaces(p);
1228 else if (expected & TC_ARRAY) {
1229 /* eat space between array name and [ */
1230 char *s = skip_spaces(p);
1231 if (*s == '[') /* array ref, not just a name? */
1232 p = s;
1233 }
1234 /* else: do NOT consume whitespace after variable name!
1235 * gawk allows definition "function FUNC (p) {...}" - note space,
1236 * but disallows the call "FUNC (p)" because it isn't one -
1237 * expression "v (a)" should NOT be parsed as TC_FUNCTION:
1238 * it is a valid concatenation if "v" is a variable,
1239 * not a function name (and type of name is not known at parse time).
1240 */
1241
1235 if (*p == '(') { 1242 if (*p == '(') {
1243 p++;
1236 tc = TC_FUNCTION; 1244 tc = TC_FUNCTION;
1237 debug_printf_parse("%s: token found:'%s' TC_FUNCTION\n", __func__, t_string); 1245 debug_printf_parse("%s: token found:'%s' TC_FUNCTION\n", __func__, t_string);
1246 } else if (*p == '[') {
1247 p++;
1248 tc = TC_ARRAY;
1249 debug_printf_parse("%s: token found:'%s' TC_ARRAY\n", __func__, t_string);
1238 } else { 1250 } else {
1239 if (*p == '[') { 1251 tc = TC_VARIABLE;
1240 p++; 1252 debug_printf_parse("%s: token found:'%s' TC_VARIABLE\n", __func__, t_string);
1241 tc = TC_ARRAY; 1253 if (end_of_name == p) {
1242 debug_printf_parse("%s: token found:'%s' TC_ARRAY\n", __func__, t_string); 1254 /* there is no space for trailing NUL in t_string!
1243 } else 1255 * We need to save the char we are going to NUL.
1244 debug_printf_parse("%s: token found:'%s' TC_VARIABLE\n", __func__, t_string); 1256 * (we'll use it in future call to next_token())
1257 */
1258 g_saved_ch = *end_of_name;
1259// especially pathological example is V="abc"; V.2 - it's V concatenated to .2
1260// (it evaluates to "abc0.2"). Because of this case, we can't simply cache
1261// '.' and analyze it later: we also have to *store it back* in next
1262// next_token(), in order to give my_strtod() the undamaged ".2" string.
1263 }
1245 } 1264 }
1265 *end_of_name = '\0'; /* terminate t_string */
1246 } 1266 }
1247 token_found: 1267 token_found:
1248 g_pos = p; 1268 g_pos = p;
1249 1269
1250 /* skipping newlines in some cases */ 1270 /* skipping newlines in some cases */
1251 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE)) 1271 if ((last_token_class & TS_NOTERM) && (tc & TC_NEWLINE))
1252 goto readnext; 1272 goto readnext;
1253 1273
1254 /* insert concatenation operator when needed */ 1274 /* insert concatenation operator when needed */
1255 debug_printf_parse("%s: %x %x %x concat_inserted?\n", __func__, 1275 debug_printf_parse("%s: concat_inserted if all nonzero: %x %x %x %x\n", __func__,
1256 (ltclass & TC_CONCAT1), (tc & TC_CONCAT2), (expected & TC_BINOP)); 1276 (last_token_class & TS_CONCAT_L), (tc & TS_CONCAT_R), (expected & TS_BINOP),
1257 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP) 1277 !(last_token_class == TC_LENGTH && tc == TC_LPAREN));
1258 && !(ltclass == TC_LENGTH && tc == TC_SEQSTART) /* but not for "length(..." */ 1278 if ((last_token_class & TS_CONCAT_L) && (tc & TS_CONCAT_R) && (expected & TS_BINOP)
1279 && !(last_token_class == TC_LENGTH && tc == TC_LPAREN) /* but not for "length(..." */
1259 ) { 1280 ) {
1260 concat_inserted = TRUE; 1281 concat_inserted = TRUE;
1261 save_tclass = tc; 1282 save_tclass = tc;
1262 save_info = t_info; 1283 save_info = t_info;
1263 tc = TC_BINOP; 1284 tc = TC_BINOPX;
1264 t_info = OC_CONCAT | SS | P(35); 1285 t_info = OC_CONCAT | SS | P(35);
1265 } 1286 }
1266 1287
1267 debug_printf_parse("%s: t_tclass=tc=%x\n", __func__, t_tclass);
1268 t_tclass = tc; 1288 t_tclass = tc;
1289 debug_printf_parse("%s: t_tclass=tc=%x\n", __func__, tc);
1269 } 1290 }
1270 ltclass = t_tclass;
1271
1272 /* Are we ready for this? */ 1291 /* Are we ready for this? */
1273 if (!(ltclass & expected)) { 1292 if (!(t_tclass & expected)) {
1274 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ? 1293 syntax_error((last_token_class & (TC_NEWLINE | TC_EOF)) ?
1275 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN); 1294 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1276 } 1295 }
1277 1296
1278 debug_printf_parse("%s: returning, t_double:%f ltclass:", __func__, t_double); 1297 debug_printf_parse("%s: returning, t_double:%f t_tclass:", __func__, t_double);
1279 debug_parse_print_tc(ltclass); 1298 debug_parse_print_tc(t_tclass);
1280 debug_printf_parse("\n"); 1299 debug_printf_parse("\n");
1281 return ltclass; 1300
1301 return t_tclass;
1282#undef concat_inserted 1302#undef concat_inserted
1283#undef save_tclass 1303#undef save_tclass
1284#undef save_info 1304#undef save_info
1285#undef ltclass
1286} 1305}
1287 1306
1288static void rollback_token(void) 1307static ALWAYS_INLINE void rollback_token(void)
1289{ 1308{
1290 t_rollback = TRUE; 1309 t_rollback = TRUE;
1291} 1310}
@@ -1302,17 +1321,19 @@ static node *new_node(uint32_t info)
1302 1321
1303static void mk_re_node(const char *s, node *n, regex_t *re) 1322static void mk_re_node(const char *s, node *n, regex_t *re)
1304{ 1323{
1305 n->info = OC_REGEXP; 1324 n->info = TI_REGEXP;
1306 n->l.re = re; 1325 n->l.re = re;
1307 n->r.ire = re + 1; 1326 n->r.ire = re + 1;
1308 xregcomp(re, s, REG_EXTENDED); 1327 xregcomp(re, s, REG_EXTENDED);
1309 xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE); 1328 xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1310} 1329}
1311 1330
1312static node *condition(void) 1331static node *parse_expr(uint32_t);
1332
1333static node *parse_lrparen_list(void)
1313{ 1334{
1314 next_token(TC_SEQSTART); 1335 next_token(TC_LPAREN);
1315 return parse_expr(TC_SEQTERM); 1336 return parse_expr(TC_RPAREN);
1316} 1337}
1317 1338
1318/* parse expression terminated by given argument, return ptr 1339/* parse expression terminated by given argument, return ptr
@@ -1322,7 +1343,7 @@ static node *parse_expr(uint32_t term_tc)
1322 node sn; 1343 node sn;
1323 node *cn = &sn; 1344 node *cn = &sn;
1324 node *vn, *glptr; 1345 node *vn, *glptr;
1325 uint32_t tc, xtc; 1346 uint32_t tc, expected_tc;
1326 var *v; 1347 var *v;
1327 1348
1328 debug_printf_parse("%s() term_tc(%x):", __func__, term_tc); 1349 debug_printf_parse("%s() term_tc(%x):", __func__, term_tc);
@@ -1331,145 +1352,157 @@ static node *parse_expr(uint32_t term_tc)
1331 1352
1332 sn.info = PRIMASK; 1353 sn.info = PRIMASK;
1333 sn.r.n = sn.a.n = glptr = NULL; 1354 sn.r.n = sn.a.n = glptr = NULL;
1334 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | term_tc; 1355 expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP | term_tc;
1335 1356
1336 while (!((tc = next_token(xtc)) & term_tc)) { 1357 while (!((tc = next_token(expected_tc)) & term_tc)) {
1337 1358
1338 if (glptr && (t_info == TI_LESS)) { 1359 if (glptr && (t_info == TI_LESS)) {
1339 /* input redirection (<) attached to glptr node */ 1360 /* input redirection (<) attached to glptr node */
1340 debug_printf_parse("%s: input redir\n", __func__); 1361 debug_printf_parse("%s: input redir\n", __func__);
1341 cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37)); 1362 cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1342 cn->a.n = glptr; 1363 cn->a.n = glptr;
1343 xtc = TC_OPERAND | TC_UOPPRE; 1364 expected_tc = TS_OPERAND | TS_UOPPRE;
1344 glptr = NULL; 1365 glptr = NULL;
1345 1366 continue;
1346 } else if (tc & (TC_BINOP | TC_UOPPOST)) { 1367 }
1347 debug_printf_parse("%s: TC_BINOP | TC_UOPPOST tc:%x\n", __func__, tc); 1368 if (tc & (TS_BINOP | TC_UOPPOST)) {
1369 debug_printf_parse("%s: TS_BINOP | TC_UOPPOST tc:%x\n", __func__, tc);
1348 /* for binary and postfix-unary operators, jump back over 1370 /* for binary and postfix-unary operators, jump back over
1349 * previous operators with higher priority */ 1371 * previous operators with higher priority */
1350 vn = cn; 1372 vn = cn;
1351 while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2)) 1373 while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1352 || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON)) 1374 || ((t_info == vn->info) && t_info == TI_COLON)
1353 ) { 1375 ) {
1354 vn = vn->a.n; 1376 vn = vn->a.n;
1355 if (!vn->a.n) syntax_error(EMSG_UNEXP_TOKEN); 1377 if (!vn->a.n) syntax_error(EMSG_UNEXP_TOKEN);
1356 } 1378 }
1357 if ((t_info & OPCLSMASK) == OC_TERNARY) 1379 if (t_info == TI_TERNARY)
1380//TODO: why?
1358 t_info += P(6); 1381 t_info += P(6);
1359 cn = vn->a.n->r.n = new_node(t_info); 1382 cn = vn->a.n->r.n = new_node(t_info);
1360 cn->a.n = vn->a.n; 1383 cn->a.n = vn->a.n;
1361 if (tc & TC_BINOP) { 1384 if (tc & TS_BINOP) {
1362 cn->l.n = vn; 1385 cn->l.n = vn;
1363 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP; 1386//FIXME: this is the place to detect and reject assignments to non-lvalues.
1364 if ((t_info & OPCLSMASK) == OC_PGETLINE) { 1387//Currently we allow "assignments" to consts and temporaries, nonsense like this:
1388// awk 'BEGIN { "qwe" = 1 }'
1389// awk 'BEGIN { 7 *= 7 }'
1390// awk 'BEGIN { length("qwe") = 1 }'
1391// awk 'BEGIN { (1+1) += 3 }'
1392 expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP;
1393 if (t_info == TI_PGETLINE) {
1365 /* it's a pipe */ 1394 /* it's a pipe */
1366 next_token(TC_GETLINE); 1395 next_token(TC_GETLINE);
1367 /* give maximum priority to this pipe */ 1396 /* give maximum priority to this pipe */
1368 cn->info &= ~PRIMASK; 1397 cn->info &= ~PRIMASK;
1369 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | term_tc; 1398 expected_tc = TS_OPERAND | TS_UOPPRE | TS_BINOP | term_tc;
1370 } 1399 }
1371 } else { 1400 } else {
1372 cn->r.n = vn; 1401 cn->r.n = vn;
1373 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | term_tc; 1402 expected_tc = TS_OPERAND | TS_UOPPRE | TS_BINOP | term_tc;
1374 } 1403 }
1375 vn->a.n = cn; 1404 vn->a.n = cn;
1405 continue;
1406 }
1376 1407
1377 } else { 1408 debug_printf_parse("%s: other, t_info:%x\n", __func__, t_info);
1378 debug_printf_parse("%s: other, t_info:%x\n", __func__, t_info); 1409 /* for operands and prefix-unary operators, attach them
1379 /* for operands and prefix-unary operators, attach them 1410 * to last node */
1380 * to last node */ 1411 vn = cn;
1381 vn = cn; 1412 cn = vn->r.n = new_node(t_info);
1382 cn = vn->r.n = new_node(t_info); 1413 cn->a.n = vn;
1383 cn->a.n = vn;
1384 1414
1385 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP; 1415 expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP;
1386 if (t_info == TI_PREINC || t_info == TI_PREDEC) 1416 if (t_info == TI_PREINC || t_info == TI_PREDEC)
1387 xtc = TC_LVALUE | TC_UOPPRE1; 1417 expected_tc = TS_LVALUE | TC_UOPPRE1;
1388 if (tc & (TC_OPERAND | TC_REGEXP)) {
1389 debug_printf_parse("%s: TC_OPERAND | TC_REGEXP\n", __func__);
1390 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | term_tc;
1391 /* one should be very careful with switch on tclass -
1392 * only simple tclasses should be used! */
1393 switch (tc) {
1394 case TC_VARIABLE:
1395 case TC_ARRAY:
1396 debug_printf_parse("%s: TC_VARIABLE | TC_ARRAY\n", __func__);
1397 cn->info = OC_VAR;
1398 v = hash_search(ahash, t_string);
1399 if (v != NULL) {
1400 cn->info = OC_FNARG;
1401 cn->l.aidx = v->x.aidx;
1402 } else {
1403 cn->l.v = newvar(t_string);
1404 }
1405 if (tc & TC_ARRAY) {
1406 cn->info |= xS;
1407 cn->r.n = parse_expr(TC_ARRTERM);
1408 }
1409 break;
1410 1418
1411 case TC_NUMBER: 1419 if (!(tc & (TS_OPERAND | TC_REGEXP)))
1412 case TC_STRING: 1420 continue;
1413 debug_printf_parse("%s: TC_NUMBER | TC_STRING\n", __func__);
1414 cn->info = OC_VAR;
1415 v = cn->l.v = xzalloc(sizeof(var));
1416 if (tc & TC_NUMBER)
1417 setvar_i(v, t_double);
1418 else {
1419 setvar_s(v, t_string);
1420 xtc &= ~TC_UOPPOST; /* "str"++ is not allowed */
1421 }
1422 break;
1423 1421
1424 case TC_REGEXP: 1422 debug_printf_parse("%s: TS_OPERAND | TC_REGEXP\n", __func__);
1425 debug_printf_parse("%s: TC_REGEXP\n", __func__); 1423 expected_tc = TS_UOPPRE | TC_UOPPOST | TS_BINOP | TS_OPERAND | term_tc;
1426 mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2)); 1424 /* one should be very careful with switch on tclass -
1427 break; 1425 * only simple tclasses should be used (TC_xyz, not TS_xyz) */
1426 switch (tc) {
1427 case TC_VARIABLE:
1428 case TC_ARRAY:
1429 debug_printf_parse("%s: TC_VARIABLE | TC_ARRAY\n", __func__);
1430 cn->info = OC_VAR;
1431 v = hash_search(ahash, t_string);
1432 if (v != NULL) {
1433 cn->info = OC_FNARG;
1434 cn->l.aidx = v->x.aidx;
1435 } else {
1436 cn->l.v = newvar(t_string);
1437 }
1438 if (tc & TC_ARRAY) {
1439 cn->info |= xS;
1440 cn->r.n = parse_expr(TC_ARRTERM);
1441 }
1442 break;
1428 1443
1429 case TC_FUNCTION: 1444 case TC_NUMBER:
1430 debug_printf_parse("%s: TC_FUNCTION\n", __func__); 1445 case TC_STRING:
1431 cn->info = OC_FUNC; 1446 debug_printf_parse("%s: TC_NUMBER | TC_STRING\n", __func__);
1432 cn->r.f = newfunc(t_string); 1447 cn->info = OC_VAR;
1433 cn->l.n = condition(); 1448 v = cn->l.v = xzalloc(sizeof(var));
1434 break; 1449 if (tc & TC_NUMBER)
1450 setvar_i(v, t_double);
1451 else {
1452 setvar_s(v, t_string);
1453 expected_tc &= ~TC_UOPPOST; /* "str"++ is not allowed */
1454 }
1455 break;
1435 1456
1436 case TC_SEQSTART: 1457 case TC_REGEXP:
1437 debug_printf_parse("%s: TC_SEQSTART\n", __func__); 1458 debug_printf_parse("%s: TC_REGEXP\n", __func__);
1438 cn = vn->r.n = parse_expr(TC_SEQTERM); 1459 mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1439 if (!cn) 1460 break;
1440 syntax_error("Empty sequence");
1441 cn->a.n = vn;
1442 break;
1443 1461
1444 case TC_GETLINE: 1462 case TC_FUNCTION:
1445 debug_printf_parse("%s: TC_GETLINE\n", __func__); 1463 debug_printf_parse("%s: TC_FUNCTION\n", __func__);
1446 glptr = cn; 1464 cn->info = OC_FUNC;
1447 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | term_tc; 1465 cn->r.f = newfunc(t_string);
1448 break; 1466 cn->l.n = parse_expr(TC_RPAREN);
1467 break;
1449 1468
1450 case TC_BUILTIN: 1469 case TC_LPAREN:
1451 debug_printf_parse("%s: TC_BUILTIN\n", __func__); 1470 debug_printf_parse("%s: TC_LPAREN\n", __func__);
1452 cn->l.n = condition(); 1471 cn = vn->r.n = parse_expr(TC_RPAREN);
1453 break; 1472 if (!cn)
1473 syntax_error("Empty sequence");
1474 cn->a.n = vn;
1475 break;
1454 1476
1455 case TC_LENGTH: 1477 case TC_GETLINE:
1456 debug_printf_parse("%s: TC_LENGTH\n", __func__); 1478 debug_printf_parse("%s: TC_GETLINE\n", __func__);
1457 next_token(TC_SEQSTART /* length(...) */ 1479 glptr = cn;
1458 | TC_OPTERM /* length; (or newline)*/ 1480 expected_tc = TS_OPERAND | TS_UOPPRE | TS_BINOP | term_tc;
1459 | TC_GRPTERM /* length } */ 1481 break;
1460 | TC_BINOPX /* length <op> NUM */ 1482
1461 | TC_COMMA /* print length, 1 */ 1483 case TC_BUILTIN:
1462 ); 1484 debug_printf_parse("%s: TC_BUILTIN\n", __func__);
1463 rollback_token(); 1485 cn->l.n = parse_lrparen_list();
1464 if (t_tclass & TC_SEQSTART) { 1486 break;
1465 /* It was a "(" token. Handle just like TC_BUILTIN */ 1487
1466 cn->l.n = condition(); 1488 case TC_LENGTH:
1467 } 1489 debug_printf_parse("%s: TC_LENGTH\n", __func__);
1468 break; 1490 tc = next_token(TC_LPAREN /* length(...) */
1469 } 1491 | TC_SEMICOL /* length; */
1492 | TC_NEWLINE /* length<newline> */
1493 | TC_RBRACE /* length } */
1494 | TC_BINOPX /* length <op> NUM */
1495 | TC_COMMA /* print length, 1 */
1496 );
1497 if (tc != TC_LPAREN)
1498 rollback_token();
1499 else {
1500 /* It was a "(" token. Handle just like TC_BUILTIN */
1501 cn->l.n = parse_expr(TC_RPAREN);
1470 } 1502 }
1503 break;
1471 } 1504 }
1472 } 1505 } /* while() */
1473 1506
1474 debug_printf_parse("%s() returns %p\n", __func__, sn.r.n); 1507 debug_printf_parse("%s() returns %p\n", __func__, sn.r.n);
1475 return sn.r.n; 1508 return sn.r.n;
@@ -1486,7 +1519,7 @@ static node *chain_node(uint32_t info)
1486 if (seq->programname != g_progname) { 1519 if (seq->programname != g_progname) {
1487 seq->programname = g_progname; 1520 seq->programname = g_progname;
1488 n = chain_node(OC_NEWSOURCE); 1521 n = chain_node(OC_NEWSOURCE);
1489 n->l.new_progname = xstrdup(g_progname); 1522 n->l.new_progname = g_progname;
1490 } 1523 }
1491 1524
1492 n = seq->last; 1525 n = seq->last;
@@ -1502,14 +1535,16 @@ static void chain_expr(uint32_t info)
1502 1535
1503 n = chain_node(info); 1536 n = chain_node(info);
1504 1537
1505 n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM); 1538 n->l.n = parse_expr(TC_SEMICOL | TC_NEWLINE | TC_RBRACE);
1506 if ((info & OF_REQUIRED) && !n->l.n) 1539 if ((info & OF_REQUIRED) && !n->l.n)
1507 syntax_error(EMSG_TOO_FEW_ARGS); 1540 syntax_error(EMSG_TOO_FEW_ARGS);
1508 1541
1509 if (t_tclass & TC_GRPTERM) 1542 if (t_tclass & TC_RBRACE)
1510 rollback_token(); 1543 rollback_token();
1511} 1544}
1512 1545
1546static void chain_group(void);
1547
1513static node *chain_loop(node *nn) 1548static node *chain_loop(node *nn)
1514{ 1549{
1515 node *n, *n2, *save_brk, *save_cont; 1550 node *n, *n2, *save_brk, *save_cont;
@@ -1533,209 +1568,276 @@ static node *chain_loop(node *nn)
1533 return n; 1568 return n;
1534} 1569}
1535 1570
1571static void chain_until_rbrace(void)
1572{
1573 uint32_t tc;
1574 while ((tc = next_token(TS_GRPSEQ | TC_RBRACE)) != TC_RBRACE) {
1575 debug_printf_parse("%s: !TC_RBRACE\n", __func__);
1576 if (tc == TC_NEWLINE)
1577 continue;
1578 rollback_token();
1579 chain_group();
1580 }
1581 debug_printf_parse("%s: TC_RBRACE\n", __func__);
1582}
1583
1536/* parse group and attach it to chain */ 1584/* parse group and attach it to chain */
1537static void chain_group(void) 1585static void chain_group(void)
1538{ 1586{
1539 uint32_t c; 1587 uint32_t tc;
1540 node *n, *n2, *n3; 1588 node *n, *n2, *n3;
1541 1589
1542 do { 1590 do {
1543 c = next_token(TC_GRPSEQ); 1591 tc = next_token(TS_GRPSEQ);
1544 } while (c & TC_NEWLINE); 1592 } while (tc == TC_NEWLINE);
1545 1593
1546 if (c & TC_GRPSTART) { 1594 if (tc == TC_LBRACE) {
1547 debug_printf_parse("%s: TC_GRPSTART\n", __func__); 1595 debug_printf_parse("%s: TC_LBRACE\n", __func__);
1548 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) { 1596 chain_until_rbrace();
1549 debug_printf_parse("%s: !TC_GRPTERM\n", __func__); 1597 return;
1550 if (t_tclass & TC_NEWLINE) 1598 }
1551 continue; 1599 if (tc & (TS_OPSEQ | TC_SEMICOL | TC_NEWLINE)) {
1552 rollback_token(); 1600 debug_printf_parse("%s: TS_OPSEQ | TC_SEMICOL | TC_NEWLINE\n", __func__);
1553 chain_group();
1554 }
1555 debug_printf_parse("%s: TC_GRPTERM\n", __func__);
1556 } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1557 debug_printf_parse("%s: TC_OPSEQ | TC_OPTERM\n", __func__);
1558 rollback_token(); 1601 rollback_token();
1559 chain_expr(OC_EXEC | Vx); 1602 chain_expr(OC_EXEC | Vx);
1560 } else { 1603 return;
1561 /* TC_STATEMNT */ 1604 }
1562 debug_printf_parse("%s: TC_STATEMNT(?)\n", __func__); 1605
1563 switch (t_info & OPCLSMASK) { 1606 /* TS_STATEMNT */
1564 case ST_IF: 1607 debug_printf_parse("%s: TS_STATEMNT(?)\n", __func__);
1565 debug_printf_parse("%s: ST_IF\n", __func__); 1608 switch (t_info & OPCLSMASK) {
1566 n = chain_node(OC_BR | Vx); 1609 case ST_IF:
1567 n->l.n = condition(); 1610 debug_printf_parse("%s: ST_IF\n", __func__);
1611 n = chain_node(OC_BR | Vx);
1612 n->l.n = parse_lrparen_list();
1613 chain_group();
1614 n2 = chain_node(OC_EXEC);
1615 n->r.n = seq->last;
1616 if (next_token(TS_GRPSEQ | TC_RBRACE | TC_ELSE) == TC_ELSE) {
1568 chain_group(); 1617 chain_group();
1569 n2 = chain_node(OC_EXEC); 1618 n2->a.n = seq->last;
1570 n->r.n = seq->last; 1619 } else {
1571 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) { 1620 rollback_token();
1572 chain_group(); 1621 }
1573 n2->a.n = seq->last; 1622 break;
1574 } else {
1575 rollback_token();
1576 }
1577 break;
1578 1623
1579 case ST_WHILE: 1624 case ST_WHILE:
1580 debug_printf_parse("%s: ST_WHILE\n", __func__); 1625 debug_printf_parse("%s: ST_WHILE\n", __func__);
1581 n2 = condition(); 1626 n2 = parse_lrparen_list();
1582 n = chain_loop(NULL); 1627 n = chain_loop(NULL);
1583 n->l.n = n2; 1628 n->l.n = n2;
1584 break; 1629 break;
1585 1630
1586 case ST_DO: 1631 case ST_DO:
1587 debug_printf_parse("%s: ST_DO\n", __func__); 1632 debug_printf_parse("%s: ST_DO\n", __func__);
1588 n2 = chain_node(OC_EXEC); 1633 n2 = chain_node(OC_EXEC);
1589 n = chain_loop(NULL); 1634 n = chain_loop(NULL);
1590 n2->a.n = n->a.n; 1635 n2->a.n = n->a.n;
1591 next_token(TC_WHILE); 1636 next_token(TC_WHILE);
1592 n->l.n = condition(); 1637 n->l.n = parse_lrparen_list();
1593 break; 1638 break;
1594 1639
1595 case ST_FOR: 1640 case ST_FOR:
1596 debug_printf_parse("%s: ST_FOR\n", __func__); 1641 debug_printf_parse("%s: ST_FOR\n", __func__);
1597 next_token(TC_SEQSTART); 1642 next_token(TC_LPAREN);
1598 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM); 1643 n2 = parse_expr(TC_SEMICOL | TC_RPAREN);
1599 if (t_tclass & TC_SEQTERM) { /* for-in */ 1644 if (t_tclass & TC_RPAREN) { /* for-in */
1600 if (!n2 || (n2->info & OPCLSMASK) != OC_IN) 1645 if (!n2 || n2->info != TI_IN)
1601 syntax_error(EMSG_UNEXP_TOKEN); 1646 syntax_error(EMSG_UNEXP_TOKEN);
1602 n = chain_node(OC_WALKINIT | VV); 1647 n = chain_node(OC_WALKINIT | VV);
1603 n->l.n = n2->l.n; 1648 n->l.n = n2->l.n;
1604 n->r.n = n2->r.n; 1649 n->r.n = n2->r.n;
1605 n = chain_loop(NULL); 1650 n = chain_loop(NULL);
1606 n->info = OC_WALKNEXT | Vx; 1651 n->info = OC_WALKNEXT | Vx;
1607 n->l.n = n2->l.n; 1652 n->l.n = n2->l.n;
1608 } else { /* for (;;) */ 1653 } else { /* for (;;) */
1609 n = chain_node(OC_EXEC | Vx); 1654 n = chain_node(OC_EXEC | Vx);
1610 n->l.n = n2; 1655 n->l.n = n2;
1611 n2 = parse_expr(TC_SEMICOL); 1656 n2 = parse_expr(TC_SEMICOL);
1612 n3 = parse_expr(TC_SEQTERM); 1657 n3 = parse_expr(TC_RPAREN);
1613 n = chain_loop(n3); 1658 n = chain_loop(n3);
1614 n->l.n = n2; 1659 n->l.n = n2;
1615 if (!n2) 1660 if (!n2)
1616 n->info = OC_EXEC; 1661 n->info = OC_EXEC;
1617 } 1662 }
1618 break; 1663 break;
1619 1664
1620 case OC_PRINT: 1665 case OC_PRINT:
1621 case OC_PRINTF: 1666 case OC_PRINTF:
1622 debug_printf_parse("%s: OC_PRINT[F]\n", __func__); 1667 debug_printf_parse("%s: OC_PRINT[F]\n", __func__);
1623 n = chain_node(t_info); 1668 n = chain_node(t_info);
1624 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM); 1669 n->l.n = parse_expr(TC_SEMICOL | TC_NEWLINE | TC_OUTRDR | TC_RBRACE);
1625 if (t_tclass & TC_OUTRDR) { 1670 if (t_tclass & TC_OUTRDR) {
1626 n->info |= t_info; 1671 n->info |= t_info;
1627 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM); 1672 n->r.n = parse_expr(TC_SEMICOL | TC_NEWLINE | TC_RBRACE);
1628 } 1673 }
1629 if (t_tclass & TC_GRPTERM) 1674 if (t_tclass & TC_RBRACE)
1630 rollback_token(); 1675 rollback_token();
1631 break; 1676 break;
1632 1677
1633 case OC_BREAK: 1678 case OC_BREAK:
1634 debug_printf_parse("%s: OC_BREAK\n", __func__); 1679 debug_printf_parse("%s: OC_BREAK\n", __func__);
1635 n = chain_node(OC_EXEC); 1680 n = chain_node(OC_EXEC);
1636 n->a.n = break_ptr; 1681 n->a.n = break_ptr;
1637 chain_expr(t_info); 1682//TODO: if break_ptr is NULL, syntax error (not in the loop)?
1638 break; 1683 chain_expr(t_info);
1684 break;
1639 1685
1640 case OC_CONTINUE: 1686 case OC_CONTINUE:
1641 debug_printf_parse("%s: OC_CONTINUE\n", __func__); 1687 debug_printf_parse("%s: OC_CONTINUE\n", __func__);
1642 n = chain_node(OC_EXEC); 1688 n = chain_node(OC_EXEC);
1643 n->a.n = continue_ptr; 1689 n->a.n = continue_ptr;
1644 chain_expr(t_info); 1690//TODO: if continue_ptr is NULL, syntax error (not in the loop)?
1645 break; 1691 chain_expr(t_info);
1692 break;
1646 1693
1647 /* delete, next, nextfile, return, exit */ 1694 /* delete, next, nextfile, return, exit */
1648 default: 1695 default:
1649 debug_printf_parse("%s: default\n", __func__); 1696 debug_printf_parse("%s: default\n", __func__);
1650 chain_expr(t_info); 1697 chain_expr(t_info);
1651 }
1652 } 1698 }
1653} 1699}
1654 1700
1655static void parse_program(char *p) 1701static void parse_program(char *p)
1656{ 1702{
1657 uint32_t tclass;
1658 node *cn;
1659 func *f;
1660 var *v;
1661
1662 debug_printf_parse("%s()\n", __func__); 1703 debug_printf_parse("%s()\n", __func__);
1663 1704
1664 g_pos = p; 1705 g_pos = p;
1665 t_lineno = 1; 1706 t_lineno = 1;
1666 while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART | 1707 for (;;) {
1667 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) { 1708 uint32_t tclass;
1709
1710 tclass = next_token(TC_EOF | TS_OPSEQ | TC_LBRACE |
1711 TC_SEMICOL | TC_NEWLINE | TC_BEGIN | TC_END | TC_FUNCDECL);
1668 1712
1669 if (tclass & TC_OPTERM) { 1713 if (tclass == TC_EOF) {
1670 debug_printf_parse("%s: TC_OPTERM\n", __func__); 1714 debug_printf_parse("%s: TC_EOF\n", __func__);
1715 break;
1716 }
1717 if (tclass & (TC_SEMICOL | TC_NEWLINE)) {
1718 debug_printf_parse("%s: TC_SEMICOL | TC_NEWLINE\n", __func__);
1719//NB: gawk allows many newlines, but does not allow more than one semicolon:
1720// BEGIN {...}<newline>;<newline>;
1721//would complain "each rule must have a pattern or an action part".
1722//Same message for
1723// ; BEGIN {...}
1671 continue; 1724 continue;
1672 } 1725 }
1673 1726 if (tclass == TC_BEGIN) {
1674 seq = &mainseq;
1675 if (tclass & TC_BEGIN) {
1676 debug_printf_parse("%s: TC_BEGIN\n", __func__); 1727 debug_printf_parse("%s: TC_BEGIN\n", __func__);
1677 seq = &beginseq; 1728 seq = &beginseq;
1678 chain_group(); 1729 /* ensure there is no newline between BEGIN and { */
1679 } else if (tclass & TC_END) { 1730 next_token(TC_LBRACE);
1731 chain_until_rbrace();
1732 continue;
1733 }
1734 if (tclass == TC_END) {
1680 debug_printf_parse("%s: TC_END\n", __func__); 1735 debug_printf_parse("%s: TC_END\n", __func__);
1681 seq = &endseq; 1736 seq = &endseq;
1682 chain_group(); 1737 /* ensure there is no newline between END and { */
1683 } else if (tclass & TC_FUNCDECL) { 1738 next_token(TC_LBRACE);
1739 chain_until_rbrace();
1740 continue;
1741 }
1742 if (tclass == TC_FUNCDECL) {
1743 func *f;
1744
1684 debug_printf_parse("%s: TC_FUNCDECL\n", __func__); 1745 debug_printf_parse("%s: TC_FUNCDECL\n", __func__);
1685 next_token(TC_FUNCTION); 1746 next_token(TC_FUNCTION);
1686 g_pos++;
1687 f = newfunc(t_string); 1747 f = newfunc(t_string);
1688 f->body.first = NULL; 1748 if (f->defined)
1689 f->nargs = 0; 1749 syntax_error("Duplicate function");
1690 /* Match func arg list: a comma sep list of >= 0 args, and a close paren */ 1750 f->defined = 1;
1691 while (next_token(TC_VARIABLE | TC_SEQTERM | TC_COMMA)) { 1751 //f->body.first = NULL; - already is
1692 /* Either an empty arg list, or trailing comma from prev iter 1752 //f->nargs = 0; - already is
1693 * must be followed by an arg */ 1753 /* func arg list: comma sep list of args, and a close paren */
1694 if (f->nargs == 0 && t_tclass == TC_SEQTERM) 1754 for (;;) {
1695 break; 1755 var *v;
1696 1756 if (next_token(TC_VARIABLE | TC_RPAREN) == TC_RPAREN) {
1697 /* TC_SEQSTART/TC_COMMA must be followed by TC_VARIABLE */ 1757 if (f->nargs == 0)
1698 if (t_tclass != TC_VARIABLE) 1758 break; /* func() is ok */
1759 /* func(a,) is not ok */
1699 syntax_error(EMSG_UNEXP_TOKEN); 1760 syntax_error(EMSG_UNEXP_TOKEN);
1700 1761 }
1701 v = findvar(ahash, t_string); 1762 v = findvar(ahash, t_string);
1702 v->x.aidx = f->nargs++; 1763 v->x.aidx = f->nargs++;
1703
1704 /* Arg followed either by end of arg list or 1 comma */ 1764 /* Arg followed either by end of arg list or 1 comma */
1705 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM) 1765 if (next_token(TC_COMMA | TC_RPAREN) == TC_RPAREN)
1706 break; 1766 break;
1707 if (t_tclass != TC_COMMA) 1767 /* it was a comma, we ate it */
1708 syntax_error(EMSG_UNEXP_TOKEN);
1709 } 1768 }
1710 seq = &f->body; 1769 seq = &f->body;
1711 chain_group(); 1770 /* ensure there is { after "func F(...)" - but newlines are allowed */
1712 clear_array(ahash); 1771 while (next_token(TC_LBRACE | TC_NEWLINE) == TC_NEWLINE)
1713 } else if (tclass & TC_OPSEQ) { 1772 continue;
1714 debug_printf_parse("%s: TC_OPSEQ\n", __func__); 1773 chain_until_rbrace();
1774 hash_clear(ahash);
1775 continue;
1776 }
1777 seq = &mainseq;
1778 if (tclass & TS_OPSEQ) {
1779 node *cn;
1780
1781 debug_printf_parse("%s: TS_OPSEQ\n", __func__);
1715 rollback_token(); 1782 rollback_token();
1716 cn = chain_node(OC_TEST); 1783 cn = chain_node(OC_TEST);
1717 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART); 1784 cn->l.n = parse_expr(TC_SEMICOL | TC_NEWLINE | TC_EOF | TC_LBRACE);
1718 if (t_tclass & TC_GRPSTART) { 1785 if (t_tclass == TC_LBRACE) {
1719 debug_printf_parse("%s: TC_GRPSTART\n", __func__); 1786 debug_printf_parse("%s: TC_LBRACE\n", __func__);
1720 rollback_token(); 1787 rollback_token();
1721 chain_group(); 1788 chain_group();
1722 } else { 1789 } else {
1723 debug_printf_parse("%s: !TC_GRPSTART\n", __func__); 1790 /* no action, assume default "{ print }" */
1791 debug_printf_parse("%s: !TC_LBRACE\n", __func__);
1724 chain_node(OC_PRINT); 1792 chain_node(OC_PRINT);
1725 } 1793 }
1726 cn->r.n = mainseq.last; 1794 cn->r.n = mainseq.last;
1727 } else /* if (tclass & TC_GRPSTART) */ { 1795 continue;
1728 debug_printf_parse("%s: TC_GRPSTART(?)\n", __func__);
1729 rollback_token();
1730 chain_group();
1731 } 1796 }
1797 /* tclass == TC_LBRACE */
1798 debug_printf_parse("%s: TC_LBRACE(?)\n", __func__);
1799 chain_until_rbrace();
1732 } 1800 }
1733 debug_printf_parse("%s: TC_EOF\n", __func__);
1734} 1801}
1735 1802
1736 1803
1737/* -------- program execution part -------- */ 1804/* -------- program execution part -------- */
1738 1805
1806/* temporary variables allocator */
1807static var *nvalloc(int sz)
1808{
1809 return xzalloc(sz * sizeof(var));
1810}
1811
1812static void nvfree(var *v, int sz)
1813{
1814 var *p = v;
1815
1816 while (--sz >= 0) {
1817 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
1818 clear_array(iamarray(p));
1819 free(p->x.array->items);
1820 free(p->x.array);
1821 }
1822 if (p->type & VF_WALK) {
1823 walker_list *n;
1824 walker_list *w = p->x.walker;
1825 debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker);
1826 p->x.walker = NULL;
1827 while (w) {
1828 n = w->prev;
1829 debug_printf_walker(" free(%p)\n", w);
1830 free(w);
1831 w = n;
1832 }
1833 }
1834 clrvar(p);
1835 p++;
1836 }
1837
1838 free(v);
1839}
1840
1739static node *mk_splitter(const char *s, tsplitter *spl) 1841static node *mk_splitter(const char *s, tsplitter *spl)
1740{ 1842{
1741 regex_t *re, *ire; 1843 regex_t *re, *ire;
@@ -1744,7 +1846,7 @@ static node *mk_splitter(const char *s, tsplitter *spl)
1744 re = &spl->re[0]; 1846 re = &spl->re[0];
1745 ire = &spl->re[1]; 1847 ire = &spl->re[1];
1746 n = &spl->n; 1848 n = &spl->n;
1747 if ((n->info & OPCLSMASK) == OC_REGEXP) { 1849 if (n->info == TI_REGEXP) {
1748 regfree(re); 1850 regfree(re);
1749 regfree(ire); // TODO: nuke ire, use re+1? 1851 regfree(ire); // TODO: nuke ire, use re+1?
1750 } 1852 }
@@ -1757,21 +1859,28 @@ static node *mk_splitter(const char *s, tsplitter *spl)
1757 return n; 1859 return n;
1758} 1860}
1759 1861
1760/* use node as a regular expression. Supplied with node ptr and regex_t 1862static var *evaluate(node *, var *);
1863
1864/* Use node as a regular expression. Supplied with node ptr and regex_t
1761 * storage space. Return ptr to regex (if result points to preg, it should 1865 * storage space. Return ptr to regex (if result points to preg, it should
1762 * be later regfree'd manually 1866 * be later regfree'd manually).
1763 */ 1867 */
1764static regex_t *as_regex(node *op, regex_t *preg) 1868static regex_t *as_regex(node *op, regex_t *preg)
1765{ 1869{
1766 int cflags; 1870 int cflags;
1767 var *v;
1768 const char *s; 1871 const char *s;
1769 1872
1770 if ((op->info & OPCLSMASK) == OC_REGEXP) { 1873 if (op->info == TI_REGEXP) {
1771 return icase ? op->r.ire : op->l.re; 1874 return icase ? op->r.ire : op->l.re;
1772 } 1875 }
1773 v = nvalloc(1); 1876
1774 s = getvar_s(evaluate(op, v)); 1877 //tmpvar = nvalloc(1);
1878#define TMPVAR (&G.as_regex__tmpvar)
1879 // We use a single "static" tmpvar (instead of on-stack or malloced one)
1880 // to decrease memory consumption in deeply-recursive awk programs.
1881 // The rule to work safely is to never call evaluate() while our static
1882 // TMPVAR's value is still needed.
1883 s = getvar_s(evaluate(op, TMPVAR));
1775 1884
1776 cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED; 1885 cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
1777 /* Testcase where REG_EXTENDED fails (unpaired '{'): 1886 /* Testcase where REG_EXTENDED fails (unpaired '{'):
@@ -1783,7 +1892,8 @@ static regex_t *as_regex(node *op, regex_t *preg)
1783 cflags &= ~REG_EXTENDED; 1892 cflags &= ~REG_EXTENDED;
1784 xregcomp(preg, s, cflags); 1893 xregcomp(preg, s, cflags);
1785 } 1894 }
1786 nvfree(v); 1895 //nvfree(tmpvar, 1);
1896#undef TMPVAR
1787 return preg; 1897 return preg;
1788} 1898}
1789 1899
@@ -1870,13 +1980,13 @@ static int awk_split(const char *s, node *spl, char **slist)
1870 c[2] = '\n'; 1980 c[2] = '\n';
1871 1981
1872 n = 0; 1982 n = 0;
1873 if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */ 1983 if (spl->info == TI_REGEXP) { /* regex split */
1874 if (!*s) 1984 if (!*s)
1875 return n; /* "": zero fields */ 1985 return n; /* "": zero fields */
1876 n++; /* at least one field will be there */ 1986 n++; /* at least one field will be there */
1877 do { 1987 do {
1878 int l; 1988 int l;
1879 regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough... 1989 regmatch_t pmatch[1];
1880 1990
1881 l = strcspn(s, c+2); /* len till next NUL or \n */ 1991 l = strcspn(s, c+2); /* len till next NUL or \n */
1882 if (regexec1_nonempty(icase ? spl->r.ire : spl->l.re, s, pmatch) == 0 1992 if (regexec1_nonempty(icase ? spl->r.ire : spl->l.re, s, pmatch) == 0
@@ -2037,7 +2147,7 @@ static node *nextarg(node **pn)
2037 node *n; 2147 node *n;
2038 2148
2039 n = *pn; 2149 n = *pn;
2040 if (n && (n->info & OPCLSMASK) == OC_COMMA) { 2150 if (n && n->info == TI_COMMA) {
2041 *pn = n->r.n; 2151 *pn = n->r.n;
2042 n = n->l.n; 2152 n = n->l.n;
2043 } else { 2153 } else {
@@ -2068,8 +2178,7 @@ static void hashwalk_init(var *v, xhash *array)
2068 for (i = 0; i < array->csize; i++) { 2178 for (i = 0; i < array->csize; i++) {
2069 hi = array->items[i]; 2179 hi = array->items[i];
2070 while (hi) { 2180 while (hi) {
2071 strcpy(w->end, hi->name); 2181 w->end = stpcpy(w->end, hi->name) + 1;
2072 nextword(&w->end);
2073 hi = hi->next; 2182 hi = hi->next;
2074 } 2183 }
2075 } 2184 }
@@ -2095,8 +2204,11 @@ static int hashwalk_next(var *v)
2095/* evaluate node, return 1 when result is true, 0 otherwise */ 2204/* evaluate node, return 1 when result is true, 0 otherwise */
2096static int ptest(node *pattern) 2205static int ptest(node *pattern)
2097{ 2206{
2098 /* ptest__v is "static": to save stack space? */ 2207 // We use a single "static" tmpvar (instead of on-stack or malloced one)
2099 return istrue(evaluate(pattern, &G.ptest__v)); 2208 // to decrease memory consumption in deeply-recursive awk programs.
2209 // The rule to work safely is to never call evaluate() while our static
2210 // TMPVAR's value is still needed.
2211 return istrue(evaluate(pattern, &G.ptest__tmpvar));
2100} 2212}
2101 2213
2102#if ENABLE_PLATFORM_MINGW32 2214#if ENABLE_PLATFORM_MINGW32
@@ -2118,7 +2230,7 @@ static ssize_t FAST_FUNC safe_read_strip_cr(int fd, void *buf, size_t count)
2118static int awk_getline(rstream *rsm, var *v) 2230static int awk_getline(rstream *rsm, var *v)
2119{ 2231{
2120 char *b; 2232 char *b;
2121 regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough... 2233 regmatch_t pmatch[1];
2122 int size, a, p, pp = 0; 2234 int size, a, p, pp = 0;
2123 int fd, so, eo, r, rp; 2235 int fd, so, eo, r, rp;
2124 char c, *m, *s; 2236 char c, *m, *s;
@@ -2144,7 +2256,7 @@ static int awk_getline(rstream *rsm, var *v)
2144 so = eo = p; 2256 so = eo = p;
2145 r = 1; 2257 r = 1;
2146 if (p > 0) { 2258 if (p > 0) {
2147 if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) { 2259 if (rsplitter.n.info == TI_REGEXP) {
2148 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re, 2260 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
2149 b, 1, pmatch, 0) == 0) { 2261 b, 1, pmatch, 0) == 0) {
2150 so = pmatch[0].rm_so; 2262 so = pmatch[0].rm_so;
@@ -2216,27 +2328,6 @@ static int awk_getline(rstream *rsm, var *v)
2216 return r; 2328 return r;
2217} 2329}
2218 2330
2219static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
2220{
2221 int r = 0;
2222 char c;
2223 const char *s = format;
2224
2225 if (int_as_int && n == (long long)n) {
2226 r = snprintf(b, size, "%"LL_FMT"d", (long long)n);
2227 } else {
2228 do { c = *s; } while (c && *++s);
2229 if (strchr("diouxX", c)) {
2230 r = snprintf(b, size, format, (int)n);
2231 } else if (strchr("eEfgG", c)) {
2232 r = snprintf(b, size, format, n);
2233 } else {
2234 syntax_error(EMSG_INV_FMT);
2235 }
2236 }
2237 return r;
2238}
2239
2240/* formatted output into an allocated buffer, return ptr to buffer */ 2331/* formatted output into an allocated buffer, return ptr to buffer */
2241#if !ENABLE_FEATURE_AWK_GNU_EXTENSIONS 2332#if !ENABLE_FEATURE_AWK_GNU_EXTENSIONS
2242# define awk_printf(a, b) awk_printf(a) 2333# define awk_printf(a, b) awk_printf(a)
@@ -2248,10 +2339,18 @@ static char *awk_printf(node *n, int *len)
2248 const char *s1; 2339 const char *s1;
2249 int i, j, incr, bsize; 2340 int i, j, incr, bsize;
2250 char c, c1; 2341 char c, c1;
2251 var *v, *arg; 2342 var *arg;
2252 2343
2253 v = nvalloc(1); 2344 //tmpvar = nvalloc(1);
2254 fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v))); 2345#define TMPVAR (&G.awk_printf__tmpvar)
2346 // We use a single "static" tmpvar (instead of on-stack or malloced one)
2347 // to decrease memory consumption in deeply-recursive awk programs.
2348 // The rule to work safely is to never call evaluate() while our static
2349 // TMPVAR's value is still needed.
2350 fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), TMPVAR)));
2351 // ^^^^^^^^^ here we immediately strdup() the value, so the later call
2352 // to evaluate() potentially recursing into another awk_printf() can't
2353 // mangle the value.
2255 2354
2256 i = 0; 2355 i = 0;
2257 while (*f) { 2356 while (*f) {
@@ -2271,7 +2370,7 @@ static char *awk_printf(node *n, int *len)
2271 f++; 2370 f++;
2272 c1 = *f; 2371 c1 = *f;
2273 *f = '\0'; 2372 *f = '\0';
2274 arg = evaluate(nextarg(&n), v); 2373 arg = evaluate(nextarg(&n), TMPVAR);
2275 2374
2276 j = i; 2375 j = i;
2277 if (c == 'c' || !c) { 2376 if (c == 'c' || !c) {
@@ -2292,7 +2391,9 @@ static char *awk_printf(node *n, int *len)
2292 } 2391 }
2293 2392
2294 free(fmt); 2393 free(fmt);
2295 nvfree(v); 2394 //nvfree(tmpvar, 1);
2395#undef TMPVAR
2396
2296 b = xrealloc(b, i + 1); 2397 b = xrealloc(b, i + 1);
2297 b[i] = '\0'; 2398 b[i] = '\0';
2298#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS 2399#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
@@ -2428,23 +2529,48 @@ static NOINLINE int do_mktime(const char *ds)
2428 return mktime(&then); 2529 return mktime(&then);
2429} 2530}
2430 2531
2532/* Reduce stack usage in exec_builtin() by keeping match() code separate */
2533static NOINLINE var *do_match(node *an1, const char *as0)
2534{
2535 regmatch_t pmatch[1];
2536 regex_t sreg, *re;
2537 int n, start, len;
2538
2539 re = as_regex(an1, &sreg);
2540 n = regexec(re, as0, 1, pmatch, 0);
2541 if (re == &sreg)
2542 regfree(re);
2543 start = 0;
2544 len = -1;
2545 if (n == 0) {
2546 start = pmatch[0].rm_so + 1;
2547 len = pmatch[0].rm_eo - pmatch[0].rm_so;
2548 }
2549 setvar_i(newvar("RLENGTH"), len);
2550 return setvar_i(newvar("RSTART"), start);
2551}
2552
2553/* Reduce stack usage in evaluate() by keeping builtins' code separate */
2431static NOINLINE var *exec_builtin(node *op, var *res) 2554static NOINLINE var *exec_builtin(node *op, var *res)
2432{ 2555{
2433#define tspl (G.exec_builtin__tspl) 2556#define tspl (G.exec_builtin__tspl)
2434 2557
2435 var *tv; 2558 var *tmpvars;
2436 node *an[4]; 2559 node *an[4];
2437 var *av[4]; 2560 var *av[4];
2438 const char *as[4]; 2561 const char *as[4];
2439 regmatch_t pmatch[2];
2440 regex_t sreg, *re;
2441 node *spl; 2562 node *spl;
2442 uint32_t isr, info; 2563 uint32_t isr, info;
2443 int nargs; 2564 int nargs;
2444 time_t tt; 2565 time_t tt;
2445 int i, l, ll, n; 2566 int i, l, ll, n;
2446 2567
2447 tv = nvalloc(4); 2568 tmpvars = nvalloc(4);
2569#define TMPVAR0 (tmpvars)
2570#define TMPVAR1 (tmpvars + 1)
2571#define TMPVAR2 (tmpvars + 2)
2572#define TMPVAR3 (tmpvars + 3)
2573#define TMPVAR(i) (tmpvars + (i))
2448 isr = info = op->info; 2574 isr = info = op->info;
2449 op = op->l.n; 2575 op = op->l.n;
2450 2576
@@ -2452,7 +2578,7 @@ static NOINLINE var *exec_builtin(node *op, var *res)
2452 for (i = 0; i < 4 && op; i++) { 2578 for (i = 0; i < 4 && op; i++) {
2453 an[i] = nextarg(&op); 2579 an[i] = nextarg(&op);
2454 if (isr & 0x09000000) 2580 if (isr & 0x09000000)
2455 av[i] = evaluate(an[i], &tv[i]); 2581 av[i] = evaluate(an[i], TMPVAR(i));
2456 if (isr & 0x08000000) 2582 if (isr & 0x08000000)
2457 as[i] = getvar_s(av[i]); 2583 as[i] = getvar_s(av[i]);
2458 isr >>= 1; 2584 isr >>= 1;
@@ -2476,8 +2602,8 @@ static NOINLINE var *exec_builtin(node *op, var *res)
2476 char *s, *s1; 2602 char *s, *s1;
2477 2603
2478 if (nargs > 2) { 2604 if (nargs > 2) {
2479 spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ? 2605 spl = (an[2]->info == TI_REGEXP) ? an[2]
2480 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl); 2606 : mk_splitter(getvar_s(evaluate(an[2], TMPVAR2)), &tspl);
2481 } else { 2607 } else {
2482 spl = &fsplitter.n; 2608 spl = &fsplitter.n;
2483 } 2609 }
@@ -2591,20 +2717,7 @@ static NOINLINE var *exec_builtin(node *op, var *res)
2591 break; 2717 break;
2592 2718
2593 case B_ma: 2719 case B_ma:
2594 re = as_regex(an[1], &sreg); 2720 res = do_match(an[1], as[0]);
2595 n = regexec(re, as[0], 1, pmatch, 0);
2596 if (n == 0) {
2597 pmatch[0].rm_so++;
2598 pmatch[0].rm_eo++;
2599 } else {
2600 pmatch[0].rm_so = 0;
2601 pmatch[0].rm_eo = -1;
2602 }
2603 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2604 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2605 setvar_i(res, pmatch[0].rm_so);
2606 if (re == &sreg)
2607 regfree(re);
2608 break; 2721 break;
2609 2722
2610 case B_ge: 2723 case B_ge:
@@ -2620,14 +2733,79 @@ static NOINLINE var *exec_builtin(node *op, var *res)
2620 break; 2733 break;
2621 } 2734 }
2622 2735
2623 nvfree(tv); 2736 nvfree(tmpvars, 4);
2737#undef TMPVAR0
2738#undef TMPVAR1
2739#undef TMPVAR2
2740#undef TMPVAR3
2741#undef TMPVAR
2742
2624 return res; 2743 return res;
2625#undef tspl 2744#undef tspl
2626} 2745}
2627 2746
2747/* if expr looks like "var=value", perform assignment and return 1,
2748 * otherwise return 0 */
2749static int is_assignment(const char *expr)
2750{
2751 char *exprc, *val;
2752
2753 val = (char*)endofname(expr);
2754 if (val == (char*)expr || *val != '=') {
2755 return FALSE;
2756 }
2757
2758 exprc = xstrdup(expr);
2759 val = exprc + (val - expr);
2760 *val++ = '\0';
2761
2762 unescape_string_in_place(val);
2763 setvar_u(newvar(exprc), val);
2764 free(exprc);
2765 return TRUE;
2766}
2767
2768/* switch to next input file */
2769static rstream *next_input_file(void)
2770{
2771#define rsm (G.next_input_file__rsm)
2772#define files_happen (G.next_input_file__files_happen)
2773
2774 const char *fname, *ind;
2775
2776 if (rsm.F)
2777 fclose(rsm.F);
2778 rsm.F = NULL;
2779 rsm.pos = rsm.adv = 0;
2780
2781 for (;;) {
2782 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
2783 if (files_happen)
2784 return NULL;
2785 fname = "-";
2786 rsm.F = stdin;
2787 break;
2788 }
2789 ind = getvar_s(incvar(intvar[ARGIND]));
2790 fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
2791 if (fname && *fname && !is_assignment(fname)) {
2792 rsm.F = xfopen_stdin(fname);
2793 break;
2794 }
2795 }
2796
2797 files_happen = TRUE;
2798 setvar_s(intvar[FILENAME], fname);
2799 return &rsm;
2800#undef rsm
2801#undef files_happen
2802}
2803
2628/* 2804/*
2629 * Evaluate node - the heart of the program. Supplied with subtree 2805 * Evaluate node - the heart of the program. Supplied with subtree
2630 * and place where to store result. returns ptr to result. 2806 * and "res" variable to assign the result to if we evaluate an expression.
2807 * If node refers to e.g. a variable or a field, no assignment happens.
2808 * Return ptr to the result (which may or may not be the "res" variable!)
2631 */ 2809 */
2632#define XC(n) ((n) >> 8) 2810#define XC(n) ((n) >> 8)
2633 2811
@@ -2639,14 +2817,16 @@ static var *evaluate(node *op, var *res)
2639#define seed (G.evaluate__seed) 2817#define seed (G.evaluate__seed)
2640#define sreg (G.evaluate__sreg) 2818#define sreg (G.evaluate__sreg)
2641 2819
2642 var *v1; 2820 var *tmpvars;
2643 2821
2644 if (!op) 2822 if (!op)
2645 return setvar_s(res, NULL); 2823 return setvar_s(res, NULL);
2646 2824
2647 debug_printf_eval("entered %s()\n", __func__); 2825 debug_printf_eval("entered %s()\n", __func__);
2648 2826
2649 v1 = nvalloc(2); 2827 tmpvars = nvalloc(2);
2828#define TMPVAR0 (tmpvars)
2829#define TMPVAR1 (tmpvars + 1)
2650 2830
2651 while (op) { 2831 while (op) {
2652 struct { 2832 struct {
@@ -2668,42 +2848,19 @@ static var *evaluate(node *op, var *res)
2668 op1 = op->l.n; 2848 op1 = op->l.n;
2669 debug_printf_eval("opinfo:%08x opn:%08x\n", opinfo, opn); 2849 debug_printf_eval("opinfo:%08x opn:%08x\n", opinfo, opn);
2670 2850
2671 /* "delete" is special: 2851 /* execute inevitable things */
2672 * "delete array[var--]" must evaluate index expr only once, 2852 if (opinfo & OF_RES1) {
2673 * must not evaluate it in "execute inevitable things" part. 2853 if ((opinfo & OF_REQUIRED) && !op1)
2674 */ 2854 syntax_error(EMSG_TOO_FEW_ARGS);
2675 if (XC(opinfo & OPCLSMASK) == XC(OC_DELETE)) { 2855 L.v = evaluate(op1, TMPVAR0);
2676 uint32_t info = op1->info & OPCLSMASK; 2856 if (opinfo & OF_STR1) {
2677 var *v; 2857 L.s = getvar_s(L.v);
2678 2858 debug_printf_eval("L.s:'%s'\n", L.s);
2679 debug_printf_eval("DELETE\n");
2680 if (info == OC_VAR) {
2681 v = op1->l.v;
2682 } else if (info == OC_FNARG) {
2683 v = &fnargs[op1->l.aidx];
2684 } else {
2685 syntax_error(EMSG_NOT_ARRAY);
2686 } 2859 }
2687 if (op1->r.n) { /* array ref? */ 2860 if (opinfo & OF_NUM1) {
2688 const char *s; 2861 L_d = getvar_i(L.v);
2689 s = getvar_s(evaluate(op1->r.n, v1)); 2862 debug_printf_eval("L_d:%f\n", L_d);
2690 hash_remove(iamarray(v), s);
2691 } else {
2692 clear_array(iamarray(v));
2693 } 2863 }
2694 goto next;
2695 }
2696
2697 /* execute inevitable things */
2698 if (opinfo & OF_RES1)
2699 L.v = evaluate(op1, v1);
2700 if (opinfo & OF_STR1) {
2701 L.s = getvar_s(L.v);
2702 debug_printf_eval("L.s:'%s'\n", L.s);
2703 }
2704 if (opinfo & OF_NUM1) {
2705 L_d = getvar_i(L.v);
2706 debug_printf_eval("L_d:%f\n", L_d);
2707 } 2864 }
2708 /* NB: Must get string/numeric values of L (done above) 2865 /* NB: Must get string/numeric values of L (done above)
2709 * _before_ evaluate()'ing R.v: if both L and R are $NNNs, 2866 * _before_ evaluate()'ing R.v: if both L and R are $NNNs,
@@ -2713,13 +2870,13 @@ static var *evaluate(node *op, var *res)
2713 * (Seen trying to evaluate "$444 $44444") 2870 * (Seen trying to evaluate "$444 $44444")
2714 */ 2871 */
2715 if (opinfo & OF_RES2) { 2872 if (opinfo & OF_RES2) {
2716 R.v = evaluate(op->r.n, v1+1); 2873 R.v = evaluate(op->r.n, TMPVAR1);
2717 //TODO: L.v may be invalid now, set L.v to NULL to catch bugs? 2874 //TODO: L.v may be invalid now, set L.v to NULL to catch bugs?
2718 //L.v = NULL; 2875 //L.v = NULL;
2719 } 2876 if (opinfo & OF_STR2) {
2720 if (opinfo & OF_STR2) { 2877 R.s = getvar_s(R.v);
2721 R.s = getvar_s(R.v); 2878 debug_printf_eval("R.s:'%s'\n", R.s);
2722 debug_printf_eval("R.s:'%s'\n", R.s); 2879 }
2723 } 2880 }
2724 2881
2725 debug_printf_eval("switch(0x%x)\n", XC(opinfo & OPCLSMASK)); 2882 debug_printf_eval("switch(0x%x)\n", XC(opinfo & OPCLSMASK));
@@ -2730,7 +2887,7 @@ static var *evaluate(node *op, var *res)
2730 /* test pattern */ 2887 /* test pattern */
2731 case XC( OC_TEST ): 2888 case XC( OC_TEST ):
2732 debug_printf_eval("TEST\n"); 2889 debug_printf_eval("TEST\n");
2733 if ((op1->info & OPCLSMASK) == OC_COMMA) { 2890 if (op1->info == TI_COMMA) {
2734 /* it's range pattern */ 2891 /* it's range pattern */
2735 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) { 2892 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2736 op->info |= OF_CHECKED; 2893 op->info |= OF_CHECKED;
@@ -2791,12 +2948,12 @@ static var *evaluate(node *op, var *res)
2791 F = rsm->F; 2948 F = rsm->F;
2792 } 2949 }
2793 2950
2794 if ((opinfo & OPCLSMASK) == OC_PRINT) { 2951 if (opinfo == TI_PRINT) {
2795 if (!op1) { 2952 if (!op1) {
2796 fputs(getvar_s(intvar[F0]), F); 2953 fputs(getvar_s(intvar[F0]), F);
2797 } else { 2954 } else {
2798 while (op1) { 2955 for (;;) {
2799 var *v = evaluate(nextarg(&op1), v1); 2956 var *v = evaluate(nextarg(&op1), TMPVAR0);
2800 if (v->type & VF_NUMBER) { 2957 if (v->type & VF_NUMBER) {
2801 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]), 2958 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2802 getvar_i(v), TRUE); 2959 getvar_i(v), TRUE);
@@ -2804,14 +2961,13 @@ static var *evaluate(node *op, var *res)
2804 } else { 2961 } else {
2805 fputs(getvar_s(v), F); 2962 fputs(getvar_s(v), F);
2806 } 2963 }
2807 2964 if (!op1)
2808 if (op1) 2965 break;
2809 fputs(getvar_s(intvar[OFS]), F); 2966 fputs(getvar_s(intvar[OFS]), F);
2810 } 2967 }
2811 } 2968 }
2812 fputs(getvar_s(intvar[ORS]), F); 2969 fputs(getvar_s(intvar[ORS]), F);
2813 2970 } else { /* PRINTF */
2814 } else { /* OC_PRINTF */
2815 char *s = awk_printf(op1, &len); 2971 char *s = awk_printf(op1, &len);
2816#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS 2972#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
2817 fwrite(s, len, 1, F); 2973 fwrite(s, len, 1, F);
@@ -2824,7 +2980,31 @@ static var *evaluate(node *op, var *res)
2824 break; 2980 break;
2825 } 2981 }
2826 2982
2827 /* case XC( OC_DELETE ): - moved to happen before arg evaluation */ 2983 case XC( OC_DELETE ):
2984 debug_printf_eval("DELETE\n");
2985 {
2986 /* "delete" is special:
2987 * "delete array[var--]" must evaluate index expr only once.
2988 */
2989 uint32_t info = op1->info & OPCLSMASK;
2990 var *v;
2991
2992 if (info == OC_VAR) {
2993 v = op1->l.v;
2994 } else if (info == OC_FNARG) {
2995 v = &fnargs[op1->l.aidx];
2996 } else {
2997 syntax_error(EMSG_NOT_ARRAY);
2998 }
2999 if (op1->r.n) { /* array ref? */
3000 const char *s;
3001 s = getvar_s(evaluate(op1->r.n, TMPVAR0));
3002 hash_remove(iamarray(v), s);
3003 } else {
3004 clear_array(iamarray(v));
3005 }
3006 break;
3007 }
2828 3008
2829 case XC( OC_NEWSOURCE ): 3009 case XC( OC_NEWSOURCE ):
2830 debug_printf_eval("NEWSOURCE\n"); 3010 debug_printf_eval("NEWSOURCE\n");
@@ -2849,7 +3029,9 @@ static var *evaluate(node *op, var *res)
2849 3029
2850 case XC( OC_EXIT ): 3030 case XC( OC_EXIT ):
2851 debug_printf_eval("EXIT\n"); 3031 debug_printf_eval("EXIT\n");
2852 awk_exit(L_d); 3032 if (op1)
3033 G.exitcode = (int)L_d;
3034 awk_exit();
2853 3035
2854 /* -- recursive node type -- */ 3036 /* -- recursive node type -- */
2855 3037
@@ -2894,51 +3076,64 @@ static var *evaluate(node *op, var *res)
2894 case XC( OC_MOVE ): 3076 case XC( OC_MOVE ):
2895 debug_printf_eval("MOVE\n"); 3077 debug_printf_eval("MOVE\n");
2896 /* if source is a temporary string, jusk relink it to dest */ 3078 /* if source is a temporary string, jusk relink it to dest */
2897//Disabled: if R.v is numeric but happens to have cached R.v->string, 3079 if (R.v == TMPVAR1
2898//then L.v ends up being a string, which is wrong 3080 && !(R.v->type & VF_NUMBER)
2899// if (R.v == v1+1 && R.v->string) { 3081 /* Why check !NUMBER? if R.v is a number but has cached R.v->string,
2900// res = setvar_p(L.v, R.v->string); 3082 * L.v ends up a string, which is wrong */
2901// R.v->string = NULL; 3083 /*&& R.v->string - always not NULL (right?) */
2902// } else { 3084 ) {
3085 res = setvar_p(L.v, R.v->string); /* avoids strdup */
3086 R.v->string = NULL;
3087 } else {
2903 res = copyvar(L.v, R.v); 3088 res = copyvar(L.v, R.v);
2904// } 3089 }
2905 break; 3090 break;
2906 3091
2907 case XC( OC_TERNARY ): 3092 case XC( OC_TERNARY ):
2908 debug_printf_eval("TERNARY\n"); 3093 debug_printf_eval("TERNARY\n");
2909 if ((op->r.n->info & OPCLSMASK) != OC_COLON) 3094 if (op->r.n->info != TI_COLON)
2910 syntax_error(EMSG_POSSIBLE_ERROR); 3095 syntax_error(EMSG_POSSIBLE_ERROR);
2911 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res); 3096 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2912 break; 3097 break;
2913 3098
2914 case XC( OC_FUNC ): { 3099 case XC( OC_FUNC ): {
2915 var *vbeg, *v; 3100 var *argvars, *sv_fnargs;
2916 const char *sv_progname; 3101 const char *sv_progname;
3102 int nargs, i;
3103
2917 debug_printf_eval("FUNC\n"); 3104 debug_printf_eval("FUNC\n");
2918 3105
2919 /* The body might be empty, still has to eval the args */ 3106 if (!op->r.f->defined)
2920 if (!op->r.n->info && !op->r.f->body.first)
2921 syntax_error(EMSG_UNDEF_FUNC); 3107 syntax_error(EMSG_UNDEF_FUNC);
2922 3108
2923 vbeg = v = nvalloc(op->r.f->nargs + 1); 3109 /* The body might be empty, still has to eval the args */
3110 nargs = op->r.f->nargs;
3111 argvars = nvalloc(nargs);
3112 i = 0;
2924 while (op1) { 3113 while (op1) {
2925 var *arg = evaluate(nextarg(&op1), v1); 3114 var *arg = evaluate(nextarg(&op1), TMPVAR0);
2926 copyvar(v, arg); 3115 if (i == nargs) {
2927 v->type |= VF_CHILD; 3116 /* call with more arguments than function takes.
2928 v->x.parent = arg; 3117 * (gawk warns: "warning: function 'f' called with more arguments than declared").
2929 if (++v - vbeg >= op->r.f->nargs) 3118 * They are still evaluated, but discarded: */
2930 break; 3119 clrvar(arg);
3120 continue;
3121 }
3122 copyvar(&argvars[i], arg);
3123 argvars[i].type |= VF_CHILD;
3124 argvars[i].x.parent = arg;
3125 i++;
2931 } 3126 }
2932 3127
2933 v = fnargs; 3128 sv_fnargs = fnargs;
2934 fnargs = vbeg;
2935 sv_progname = g_progname; 3129 sv_progname = g_progname;
2936 3130
3131 fnargs = argvars;
2937 res = evaluate(op->r.f->body.first, res); 3132 res = evaluate(op->r.f->body.first, res);
3133 nvfree(argvars, nargs);
2938 3134
2939 g_progname = sv_progname; 3135 g_progname = sv_progname;
2940 nvfree(fnargs); 3136 fnargs = sv_fnargs;
2941 fnargs = v;
2942 3137
2943 break; 3138 break;
2944 } 3139 }
@@ -2954,7 +3149,7 @@ static var *evaluate(node *op, var *res)
2954 if (op1) { 3149 if (op1) {
2955 rsm = newfile(L.s); 3150 rsm = newfile(L.s);
2956 if (!rsm->F) { 3151 if (!rsm->F) {
2957 if ((opinfo & OPCLSMASK) == OC_PGETLINE) { 3152 if (opinfo == TI_PGETLINE) {
2958 rsm->F = popen(L.s, "r"); 3153 rsm->F = popen(L.s, "r");
2959 rsm->is_pipe = TRUE; 3154 rsm->is_pipe = TRUE;
2960 } else { 3155 } else {
@@ -2990,15 +3185,32 @@ static var *evaluate(node *op, var *res)
2990 double R_d = R_d; /* for compiler */ 3185 double R_d = R_d; /* for compiler */
2991 debug_printf_eval("FBLTIN\n"); 3186 debug_printf_eval("FBLTIN\n");
2992 3187
3188 if (op1 && op1->info == TI_COMMA)
3189 /* Simple builtins take one arg maximum */
3190 syntax_error("Too many arguments");
3191
2993 switch (opn) { 3192 switch (opn) {
2994 case F_in: 3193 case F_in:
2995 R_d = (long long)L_d; 3194 R_d = (long long)L_d;
2996 break; 3195 break;
2997 3196
2998 case F_rn: 3197 case F_rn: /*rand*/
2999 R_d = (double)rand() / (double)RAND_MAX; 3198 if (op1)
3199 syntax_error("Too many arguments");
3200 {
3201#if RAND_MAX >= 0x7fffffff
3202 uint32_t u = ((uint32_t)rand() << 16) ^ rand();
3203 uint64_t v = ((uint64_t)rand() << 32) | u;
3204 /* the above shift+or is optimized out on 32-bit arches */
3205# if RAND_MAX > 0x7fffffff
3206 v &= 0x7fffffffffffffffULL;
3207# endif
3208 R_d = (double)v / 0x8000000000000000ULL;
3209#else
3210# error Not implemented for this value of RAND_MAX
3211#endif
3000 break; 3212 break;
3001 3213 }
3002 case F_co: 3214 case F_co:
3003 if (ENABLE_FEATURE_AWK_LIBM) { 3215 if (ENABLE_FEATURE_AWK_LIBM) {
3004 R_d = cos(L_d); 3216 R_d = cos(L_d);
@@ -3038,7 +3250,9 @@ static var *evaluate(node *op, var *res)
3038 srand(seed); 3250 srand(seed);
3039 break; 3251 break;
3040 3252
3041 case F_ti: 3253 case F_ti: /*systime*/
3254 if (op1)
3255 syntax_error("Too many arguments");
3042 R_d = time(NULL); 3256 R_d = time(NULL);
3043 break; 3257 break;
3044 3258
@@ -3077,7 +3291,7 @@ static var *evaluate(node *op, var *res)
3077 rstream *rsm; 3291 rstream *rsm;
3078 int err = 0; 3292 int err = 0;
3079 rsm = (rstream *)hash_search(fdhash, L.s); 3293 rsm = (rstream *)hash_search(fdhash, L.s);
3080 debug_printf_eval("OC_FBLTIN F_cl rsm:%p\n", rsm); 3294 debug_printf_eval("OC_FBLTIN close: op1:%p s:'%s' rsm:%p\n", op1, L.s, rsm);
3081 if (rsm) { 3295 if (rsm) {
3082 debug_printf_eval("OC_FBLTIN F_cl " 3296 debug_printf_eval("OC_FBLTIN F_cl "
3083 "rsm->is_pipe:%d, ->F:%p\n", 3297 "rsm->is_pipe:%d, ->F:%p\n",
@@ -3088,6 +3302,11 @@ static var *evaluate(node *op, var *res)
3088 */ 3302 */
3089 if (rsm->F) 3303 if (rsm->F)
3090 err = rsm->is_pipe ? pclose(rsm->F) : fclose(rsm->F); 3304 err = rsm->is_pipe ? pclose(rsm->F) : fclose(rsm->F);
3305//TODO: fix this case:
3306// $ awk 'BEGIN { print close(""); print ERRNO }'
3307// -1
3308// close of redirection that was never opened
3309// (we print 0, 0)
3091 free(rsm->buffer); 3310 free(rsm->buffer);
3092 hash_remove(fdhash, L.s); 3311 hash_remove(fdhash, L.s);
3093 } 3312 }
@@ -3166,7 +3385,7 @@ static var *evaluate(node *op, var *res)
3166 case XC( OC_COMMA ): { 3385 case XC( OC_COMMA ): {
3167 const char *sep = ""; 3386 const char *sep = "";
3168 debug_printf_eval("COMMA\n"); 3387 debug_printf_eval("COMMA\n");
3169 if ((opinfo & OPCLSMASK) == OC_COMMA) 3388 if (opinfo == TI_COMMA)
3170 sep = getvar_s(intvar[SUBSEP]); 3389 sep = getvar_s(intvar[SUBSEP]);
3171 setvar_p(res, xasprintf("%s%s%s", L.s, sep, R.s)); 3390 setvar_p(res, xasprintf("%s%s%s", L.s, sep, R.s));
3172 break; 3391 break;
@@ -3251,7 +3470,7 @@ static var *evaluate(node *op, var *res)
3251 default: 3470 default:
3252 syntax_error(EMSG_POSSIBLE_ERROR); 3471 syntax_error(EMSG_POSSIBLE_ERROR);
3253 } /* switch */ 3472 } /* switch */
3254 next: 3473
3255 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS) 3474 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
3256 op = op->a.n; 3475 op = op->a.n;
3257 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS) 3476 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
@@ -3260,7 +3479,10 @@ static var *evaluate(node *op, var *res)
3260 break; 3479 break;
3261 } /* while (op) */ 3480 } /* while (op) */
3262 3481
3263 nvfree(v1); 3482 nvfree(tmpvars, 2);
3483#undef TMPVAR0
3484#undef TMPVAR1
3485
3264 debug_printf_eval("returning from %s(): %p\n", __func__, res); 3486 debug_printf_eval("returning from %s(): %p\n", __func__, res);
3265 return res; 3487 return res;
3266#undef fnargs 3488#undef fnargs
@@ -3271,16 +3493,14 @@ static var *evaluate(node *op, var *res)
3271 3493
3272/* -------- main & co. -------- */ 3494/* -------- main & co. -------- */
3273 3495
3274static int awk_exit(int r) 3496static int awk_exit(void)
3275{ 3497{
3276 unsigned i; 3498 unsigned i;
3277 3499
3278 if (!exiting) { 3500 if (!exiting) {
3279 var tv;
3280 exiting = TRUE; 3501 exiting = TRUE;
3281 nextrec = FALSE; 3502 nextrec = FALSE;
3282 zero_out_var(&tv); 3503 evaluate(endseq.first, &G.exit__tmpvar);
3283 evaluate(endseq.first, &tv);
3284 } 3504 }
3285 3505
3286 /* waiting for children */ 3506 /* waiting for children */
@@ -3294,65 +3514,7 @@ static int awk_exit(int r)
3294 } 3514 }
3295 } 3515 }
3296 3516
3297 exit(r); 3517 exit(G.exitcode);
3298}
3299
3300/* if expr looks like "var=value", perform assignment and return 1,
3301 * otherwise return 0 */
3302static int is_assignment(const char *expr)
3303{
3304 char *exprc, *val;
3305
3306 if (!isalnum_(*expr) || (val = strchr(expr, '=')) == NULL) {
3307 return FALSE;
3308 }
3309
3310 exprc = xstrdup(expr);
3311 val = exprc + (val - expr);
3312 *val++ = '\0';
3313
3314 unescape_string_in_place(val);
3315 setvar_u(newvar(exprc), val);
3316 free(exprc);
3317 return TRUE;
3318}
3319
3320/* switch to next input file */
3321static rstream *next_input_file(void)
3322{
3323#define rsm (G.next_input_file__rsm)
3324#define files_happen (G.next_input_file__files_happen)
3325
3326 FILE *F;
3327 const char *fname, *ind;
3328
3329 if (rsm.F)
3330 fclose(rsm.F);
3331 rsm.F = NULL;
3332 rsm.pos = rsm.adv = 0;
3333
3334 for (;;) {
3335 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
3336 if (files_happen)
3337 return NULL;
3338 fname = "-";
3339 F = stdin;
3340 break;
3341 }
3342 ind = getvar_s(incvar(intvar[ARGIND]));
3343 fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
3344 if (fname && *fname && !is_assignment(fname)) {
3345 F = xfopen_stdin(fname);
3346 break;
3347 }
3348 }
3349
3350 files_happen = TRUE;
3351 setvar_s(intvar[FILENAME], fname);
3352 rsm.F = F;
3353 return &rsm;
3354#undef rsm
3355#undef files_happen
3356} 3518}
3357 3519
3358int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; 3520int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
@@ -3366,7 +3528,6 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
3366 llist_t *list_e = NULL; 3528 llist_t *list_e = NULL;
3367#endif 3529#endif
3368 int i; 3530 int i;
3369 var tv;
3370 3531
3371 INIT_G(); 3532 INIT_G();
3372 3533
@@ -3375,15 +3536,8 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
3375 if (ENABLE_LOCALE_SUPPORT) 3536 if (ENABLE_LOCALE_SUPPORT)
3376 setlocale(LC_NUMERIC, "C"); 3537 setlocale(LC_NUMERIC, "C");
3377 3538
3378 /* allocate global buffer */
3379 g_buf = xmalloc(MAXVARFMT + 1);
3380
3381 vhash = hash_init();
3382 ahash = hash_init();
3383 fdhash = hash_init();
3384 fnhash = hash_init();
3385
3386 /* initialize variables */ 3539 /* initialize variables */
3540 vhash = hash_init();
3387 { 3541 {
3388 char *vnames = (char *)vNames; /* cheat */ 3542 char *vnames = (char *)vNames; /* cheat */
3389 char *vvalues = (char *)vValues; 3543 char *vvalues = (char *)vValues;
@@ -3405,10 +3559,6 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
3405 handle_special(intvar[FS]); 3559 handle_special(intvar[FS]);
3406 handle_special(intvar[RS]); 3560 handle_special(intvar[RS]);
3407 3561
3408 newfile("/dev/stdin")->F = stdin;
3409 newfile("/dev/stdout")->F = stdout;
3410 newfile("/dev/stderr")->F = stderr;
3411
3412 /* Huh, people report that sometimes environ is NULL. Oh well. */ 3562 /* Huh, people report that sometimes environ is NULL. Oh well. */
3413 if (environ) { 3563 if (environ) {
3414 char **envp; 3564 char **envp;
@@ -3438,46 +3588,44 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
3438 if (!is_assignment(llist_pop(&list_v))) 3588 if (!is_assignment(llist_pop(&list_v)))
3439 bb_show_usage(); 3589 bb_show_usage();
3440 } 3590 }
3591
3592 /* Parse all supplied programs */
3593 fnhash = hash_init();
3594 ahash = hash_init();
3441 while (list_f) { 3595 while (list_f) {
3442 int fd; 3596 int fd;
3443 char *s; 3597 char *s;
3444 3598
3445 g_progname = llist_pop(&list_f); 3599 g_progname = llist_pop(&list_f);
3446 fd = xopen_stdin(g_progname); 3600 fd = xopen_stdin(g_progname);
3447 /* 1st byte is reserved for "move name one char back" trick in next_token */ 3601 s = xmalloc_read(fd, NULL); /* it's NUL-terminated */
3448 i = 1;
3449 s = NULL;
3450 for (;;) {
3451 int sz;
3452 s = xrealloc(s, i + 1000);
3453 sz = safe_read(fd, s + i, 1000);
3454 if (sz <= 0)
3455 break;
3456 i += sz;
3457 }
3458 s = xrealloc(s, i + 1); /* trim unused 999 bytes */
3459 s[i] = '\0';
3460 close(fd); 3602 close(fd);
3461 parse_program(s + 1); 3603 parse_program(s);
3462 free(s); 3604 free(s);
3463 } 3605 }
3464 g_progname = "cmd. line"; 3606 g_progname = "cmd. line";
3465#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS 3607#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
3466 while (list_e) { 3608 while (list_e) {
3467 /* NB: "move name one char back" trick in next_token
3468 * can use argv[i][-1] here.
3469 */
3470 parse_program(llist_pop(&list_e)); 3609 parse_program(llist_pop(&list_e));
3471 } 3610 }
3472#endif 3611#endif
3612//FIXME: preserve order of -e and -f
3613//TODO: implement -i LIBRARY and -E FILE too, they are easy-ish
3473 if (!(opt & (OPT_f | OPT_e))) { 3614 if (!(opt & (OPT_f | OPT_e))) {
3474 if (!*argv) 3615 if (!*argv)
3475 bb_show_usage(); 3616 bb_show_usage();
3476 /* NB: "move name one char back" trick in next_token
3477 * can use argv[i][-1] here.
3478 */
3479 parse_program(*argv++); 3617 parse_program(*argv++);
3480 } 3618 }
3619 /* Free unused parse structures */
3620 //hash_free(fnhash); // ~250 bytes when empty, used only for function names
3621 //^^^^^^^^^^^^^^^^^ does not work, hash_clear() inside SEGVs
3622 // (IOW: hash_clear() assumes it's a hash of variables. fnhash is not).
3623 free(fnhash->items);
3624 free(fnhash);
3625 fnhash = NULL; // debug
3626 //hash_free(ahash); // empty after parsing, will reuse as fdhash instead of freeing
3627
3628 /* Parsing done, on to executing */
3481 3629
3482 /* fill in ARGV array */ 3630 /* fill in ARGV array */
3483 setari_u(intvar[ARGV], 0, "awk"); 3631 setari_u(intvar[ARGV], 0, "awk");
@@ -3486,10 +3634,14 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
3486 setari_u(intvar[ARGV], ++i, *argv++); 3634 setari_u(intvar[ARGV], ++i, *argv++);
3487 setvar_i(intvar[ARGC], i + 1); 3635 setvar_i(intvar[ARGC], i + 1);
3488 3636
3489 zero_out_var(&tv); 3637 //fdhash = ahash; // done via define
3490 evaluate(beginseq.first, &tv); 3638 newfile("/dev/stdin")->F = stdin;
3639 newfile("/dev/stdout")->F = stdout;
3640 newfile("/dev/stderr")->F = stderr;
3641
3642 evaluate(beginseq.first, &G.main__tmpvar);
3491 if (!mainseq.first && !endseq.first) 3643 if (!mainseq.first && !endseq.first)
3492 awk_exit(EXIT_SUCCESS); 3644 awk_exit();
3493 3645
3494 /* input file could already be opened in BEGIN block */ 3646 /* input file could already be opened in BEGIN block */
3495 if (!iF) 3647 if (!iF)
@@ -3504,7 +3656,7 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
3504 nextrec = FALSE; 3656 nextrec = FALSE;
3505 incvar(intvar[NR]); 3657 incvar(intvar[NR]);
3506 incvar(intvar[FNR]); 3658 incvar(intvar[FNR]);
3507 evaluate(mainseq.first, &tv); 3659 evaluate(mainseq.first, &G.main__tmpvar);
3508 3660
3509 if (nextfile) 3661 if (nextfile)
3510 break; 3662 break;
@@ -3516,6 +3668,6 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
3516 iF = next_input_file(); 3668 iF = next_input_file();
3517 } 3669 }
3518 3670
3519 awk_exit(EXIT_SUCCESS); 3671 awk_exit();
3520 /*return 0;*/ 3672 /*return 0;*/
3521} 3673}
diff --git a/modutils/modprobe.c b/modutils/modprobe.c
index c334186b8..235706fd5 100644
--- a/modutils/modprobe.c
+++ b/modutils/modprobe.c
@@ -629,8 +629,9 @@ int modprobe_main(int argc UNUSED_PARAM, char **argv)
629 config_close(parser); 629 config_close(parser);
630 630
631 parser = config_open2("modules.builtin", fopen_for_read); 631 parser = config_open2("modules.builtin", fopen_for_read);
632 /* this file contains lines like "kernel/fs/binfmt_script.ko" */
632 while (config_read(parser, &s, 1, 1, "# \t", PARSE_NORMAL)) 633 while (config_read(parser, &s, 1, 1, "# \t", PARSE_NORMAL))
633 get_or_add_modentry(s)->flags |= MODULE_FLAG_BUILTIN; 634 get_or_add_modentry(bb_basename(s))->flags |= MODULE_FLAG_BUILTIN;
634 config_close(parser); 635 config_close(parser);
635 } 636 }
636 637
diff --git a/testsuite/awk.tests b/testsuite/awk.tests
index cf9b722dc..770d8ffce 100755
--- a/testsuite/awk.tests
+++ b/testsuite/awk.tests
@@ -45,6 +45,16 @@ testing "awk handles empty function f(arg){}" \
45 "" "" 45 "" ""
46 46
47prg=' 47prg='
48function empty_fun(){}
49END {empty_fun()
50 print "Ok"
51}'
52testing "awk handles empty function f(){}" \
53 "awk '$prg'" \
54 "Ok\n" \
55 "" ""
56
57prg='
48function outer_fun() { 58function outer_fun() {
49 return 1 59 return 1
50} 60}
@@ -71,6 +81,23 @@ testing "awk properly handles undefined function" \
71 "L1\n\nawk: cmd. line:5: Call to undefined function\n" \ 81 "L1\n\nawk: cmd. line:5: Call to undefined function\n" \
72 "" "" 82 "" ""
73 83
84prg='
85BEGIN {
86 v=1
87 a=2
88 print v (a)
89}'
90testing "awk 'v (a)' is not a function call, it is a concatenation" \
91 "awk '$prg' 2>&1" \
92 "12\n" \
93 "" ""
94
95prg='func f(){print"F"};func g(){print"G"};BEGIN{f(g(),g())}'
96testing "awk unused function args are evaluated" \
97 "awk '$prg' 2>&1" \
98 "G\nG\nF\n" \
99 "" ""
100
74 101
75optional DESKTOP 102optional DESKTOP
76testing "awk hex const 1" "awk '{ print or(0xffffffff,1) }'" "4294967295\n" "" "\n" 103testing "awk hex const 1" "awk '{ print or(0xffffffff,1) }'" "4294967295\n" "" "\n"
@@ -418,4 +445,9 @@ testing 'awk $NF is empty' \
418 '' \ 445 '' \
419 'a=====123=' 446 'a=====123='
420 447
448testing "awk exit N propagates through END's exit" \
449 "awk 'BEGIN { exit 42 } END { exit }'; echo \$?" \
450 "42\n" \
451 '' ''
452
421exit $FAILCOUNT 453exit $FAILCOUNT