diff options
author | Ron Yorston <rmy@pobox.com> | 2021-07-05 12:24:55 +0100 |
---|---|---|
committer | Ron Yorston <rmy@pobox.com> | 2021-07-05 12:24:55 +0100 |
commit | 57261b6b3a6d2955a5abd6a0563fe78ba43abf3f (patch) | |
tree | 3069f9f8bbe05104a8a4339479b3f07d246ff540 | |
parent | e1ad66c0b8fd58a7158d40771175a7dab224202d (diff) | |
parent | 08ca313d7edb99687068b93b5d2435b59f3db23a (diff) | |
download | busybox-w32-57261b6b3a6d2955a5abd6a0563fe78ba43abf3f.tar.gz busybox-w32-57261b6b3a6d2955a5abd6a0563fe78ba43abf3f.tar.bz2 busybox-w32-57261b6b3a6d2955a5abd6a0563fe78ba43abf3f.zip |
Merge branch 'busybox' into merge
-rw-r--r-- | editors/awk.c | 1694 | ||||
-rw-r--r-- | modutils/modprobe.c | 3 | ||||
-rwxr-xr-x | testsuite/awk.tests | 32 |
3 files changed, 957 insertions, 772 deletions
diff --git a/editors/awk.c b/editors/awk.c index 9b9b202db..c88b8e1c4 100644 --- a/editors/awk.c +++ b/editors/awk.c | |||
@@ -93,7 +93,6 @@ enum { | |||
93 | }; | 93 | }; |
94 | 94 | ||
95 | #define MAXVARFMT 240 | 95 | #define MAXVARFMT 240 |
96 | #define MINNVBLOCK 64 | ||
97 | 96 | ||
98 | /* variable flags */ | 97 | /* variable flags */ |
99 | #define VF_NUMBER 0x0001 /* 1 = primary type is number */ | 98 | #define VF_NUMBER 0x0001 /* 1 = primary type is number */ |
@@ -103,7 +102,7 @@ enum { | |||
103 | #define VF_USER 0x0200 /* 1 = user input (may be numeric string) */ | 102 | #define VF_USER 0x0200 /* 1 = user input (may be numeric string) */ |
104 | #define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */ | 103 | #define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */ |
105 | #define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */ | 104 | #define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */ |
106 | #define VF_FSTR 0x1000 /* 1 = var::string points to fstring buffer */ | 105 | #define VF_FSTR 0x1000 /* 1 = don't free() var::string (not malloced, or is owned by something else) */ |
107 | #define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */ | 106 | #define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */ |
108 | #define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */ | 107 | #define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */ |
109 | 108 | ||
@@ -120,8 +119,8 @@ typedef struct walker_list { | |||
120 | /* Variable */ | 119 | /* Variable */ |
121 | typedef struct var_s { | 120 | typedef struct var_s { |
122 | unsigned type; /* flags */ | 121 | unsigned type; /* flags */ |
123 | double number; | ||
124 | char *string; | 122 | char *string; |
123 | double number; | ||
125 | union { | 124 | union { |
126 | int aidx; /* func arg idx (for compilation stage) */ | 125 | int aidx; /* func arg idx (for compilation stage) */ |
127 | struct xhash_s *array; /* array ptr */ | 126 | struct xhash_s *array; /* array ptr */ |
@@ -140,6 +139,7 @@ typedef struct chain_s { | |||
140 | /* Function */ | 139 | /* Function */ |
141 | typedef struct func_s { | 140 | typedef struct func_s { |
142 | unsigned nargs; | 141 | unsigned nargs; |
142 | smallint defined; | ||
143 | struct chain_s body; | 143 | struct chain_s body; |
144 | } func; | 144 | } func; |
145 | 145 | ||
@@ -179,7 +179,7 @@ typedef struct node_s { | |||
179 | struct node_s *n; | 179 | struct node_s *n; |
180 | var *v; | 180 | var *v; |
181 | int aidx; | 181 | int aidx; |
182 | char *new_progname; | 182 | const char *new_progname; |
183 | regex_t *re; | 183 | regex_t *re; |
184 | } l; | 184 | } l; |
185 | union { | 185 | union { |
@@ -192,63 +192,54 @@ typedef struct node_s { | |||
192 | } a; | 192 | } a; |
193 | } node; | 193 | } node; |
194 | 194 | ||
195 | /* Block of temporary variables */ | ||
196 | typedef struct nvblock_s { | ||
197 | int size; | ||
198 | var *pos; | ||
199 | struct nvblock_s *prev; | ||
200 | struct nvblock_s *next; | ||
201 | var nv[]; | ||
202 | } nvblock; | ||
203 | |||
204 | typedef struct tsplitter_s { | 195 | typedef struct tsplitter_s { |
205 | node n; | 196 | node n; |
206 | regex_t re[2]; | 197 | regex_t re[2]; |
207 | } tsplitter; | 198 | } tsplitter; |
208 | 199 | ||
209 | /* simple token classes */ | 200 | /* simple token classes */ |
210 | /* Order and hex values are very important!!! See next_token() */ | 201 | /* order and hex values are very important!!! See next_token() */ |
211 | #define TC_SEQSTART (1 << 0) /* ( */ | 202 | #define TC_LPAREN (1 << 0) /* ( */ |
212 | #define TC_SEQTERM (1 << 1) /* ) */ | 203 | #define TC_RPAREN (1 << 1) /* ) */ |
213 | #define TC_REGEXP (1 << 2) /* /.../ */ | 204 | #define TC_REGEXP (1 << 2) /* /.../ */ |
214 | #define TC_OUTRDR (1 << 3) /* | > >> */ | 205 | #define TC_OUTRDR (1 << 3) /* | > >> */ |
215 | #define TC_UOPPOST (1 << 4) /* unary postfix operator ++ -- */ | 206 | #define TC_UOPPOST (1 << 4) /* unary postfix operator ++ -- */ |
216 | #define TC_UOPPRE1 (1 << 5) /* unary prefix operator ++ -- $ */ | 207 | #define TC_UOPPRE1 (1 << 5) /* unary prefix operator ++ -- $ */ |
217 | #define TC_BINOPX (1 << 6) /* two-opnd operator */ | 208 | #define TC_BINOPX (1 << 6) /* two-opnd operator */ |
218 | #define TC_IN (1 << 7) | 209 | #define TC_IN (1 << 7) /* 'in' */ |
219 | #define TC_COMMA (1 << 8) | 210 | #define TC_COMMA (1 << 8) /* , */ |
220 | #define TC_PIPE (1 << 9) /* input redirection pipe */ | 211 | #define TC_PIPE (1 << 9) /* input redirection pipe | */ |
221 | #define TC_UOPPRE2 (1 << 10) /* unary prefix operator + - ! */ | 212 | #define TC_UOPPRE2 (1 << 10) /* unary prefix operator + - ! */ |
222 | #define TC_ARRTERM (1 << 11) /* ] */ | 213 | #define TC_ARRTERM (1 << 11) /* ] */ |
223 | #define TC_GRPSTART (1 << 12) /* { */ | 214 | #define TC_LBRACE (1 << 12) /* { */ |
224 | #define TC_GRPTERM (1 << 13) /* } */ | 215 | #define TC_RBRACE (1 << 13) /* } */ |
225 | #define TC_SEMICOL (1 << 14) | 216 | #define TC_SEMICOL (1 << 14) /* ; */ |
226 | #define TC_NEWLINE (1 << 15) | 217 | #define TC_NEWLINE (1 << 15) |
227 | #define TC_STATX (1 << 16) /* ctl statement (for, next...) */ | 218 | #define TC_STATX (1 << 16) /* ctl statement (for, next...) */ |
228 | #define TC_WHILE (1 << 17) | 219 | #define TC_WHILE (1 << 17) /* 'while' */ |
229 | #define TC_ELSE (1 << 18) | 220 | #define TC_ELSE (1 << 18) /* 'else' */ |
230 | #define TC_BUILTIN (1 << 19) | 221 | #define TC_BUILTIN (1 << 19) |
231 | /* This costs ~50 bytes of code. | 222 | /* This costs ~50 bytes of code. |
232 | * A separate class to support deprecated "length" form. If we don't need that | 223 | * A separate class to support deprecated "length" form. If we don't need that |
233 | * (i.e. if we demand that only "length()" with () is valid), then TC_LENGTH | 224 | * (i.e. if we demand that only "length()" with () is valid), then TC_LENGTH |
234 | * can be merged with TC_BUILTIN: | 225 | * can be merged with TC_BUILTIN: |
235 | */ | 226 | */ |
236 | #define TC_LENGTH (1 << 20) | 227 | #define TC_LENGTH (1 << 20) /* 'length' */ |
237 | #define TC_GETLINE (1 << 21) | 228 | #define TC_GETLINE (1 << 21) /* 'getline' */ |
238 | #define TC_FUNCDECL (1 << 22) /* 'function' 'func' */ | 229 | #define TC_FUNCDECL (1 << 22) /* 'function' 'func' */ |
239 | #define TC_BEGIN (1 << 23) | 230 | #define TC_BEGIN (1 << 23) /* 'BEGIN' */ |
240 | #define TC_END (1 << 24) | 231 | #define TC_END (1 << 24) /* 'END' */ |
241 | #define TC_EOF (1 << 25) | 232 | #define TC_EOF (1 << 25) |
242 | #define TC_VARIABLE (1 << 26) | 233 | #define TC_VARIABLE (1 << 26) /* name */ |
243 | #define TC_ARRAY (1 << 27) | 234 | #define TC_ARRAY (1 << 27) /* name[ */ |
244 | #define TC_FUNCTION (1 << 28) | 235 | #define TC_FUNCTION (1 << 28) /* name( */ |
245 | #define TC_STRING (1 << 29) | 236 | #define TC_STRING (1 << 29) /* "..." */ |
246 | #define TC_NUMBER (1 << 30) | 237 | #define TC_NUMBER (1 << 30) |
247 | 238 | ||
248 | #ifndef debug_parse_print_tc | 239 | #ifndef debug_parse_print_tc |
249 | #define debug_parse_print_tc(n) do { \ | 240 | #define debug_parse_print_tc(n) do { \ |
250 | if ((n) & TC_SEQSTART) debug_printf_parse(" SEQSTART"); \ | 241 | if ((n) & TC_LPAREN ) debug_printf_parse(" LPAREN" ); \ |
251 | if ((n) & TC_SEQTERM ) debug_printf_parse(" SEQTERM" ); \ | 242 | if ((n) & TC_RPAREN ) debug_printf_parse(" RPAREN" ); \ |
252 | if ((n) & TC_REGEXP ) debug_printf_parse(" REGEXP" ); \ | 243 | if ((n) & TC_REGEXP ) debug_printf_parse(" REGEXP" ); \ |
253 | if ((n) & TC_OUTRDR ) debug_printf_parse(" OUTRDR" ); \ | 244 | if ((n) & TC_OUTRDR ) debug_printf_parse(" OUTRDR" ); \ |
254 | if ((n) & TC_UOPPOST ) debug_printf_parse(" UOPPOST" ); \ | 245 | if ((n) & TC_UOPPOST ) debug_printf_parse(" UOPPOST" ); \ |
@@ -259,8 +250,8 @@ if ((n) & TC_COMMA ) debug_printf_parse(" COMMA" ); \ | |||
259 | if ((n) & TC_PIPE ) debug_printf_parse(" PIPE" ); \ | 250 | if ((n) & TC_PIPE ) debug_printf_parse(" PIPE" ); \ |
260 | if ((n) & TC_UOPPRE2 ) debug_printf_parse(" UOPPRE2" ); \ | 251 | if ((n) & TC_UOPPRE2 ) debug_printf_parse(" UOPPRE2" ); \ |
261 | if ((n) & TC_ARRTERM ) debug_printf_parse(" ARRTERM" ); \ | 252 | if ((n) & TC_ARRTERM ) debug_printf_parse(" ARRTERM" ); \ |
262 | if ((n) & TC_GRPSTART) debug_printf_parse(" GRPSTART"); \ | 253 | if ((n) & TC_LBRACE ) debug_printf_parse(" LBRACE" ); \ |
263 | if ((n) & TC_GRPTERM ) debug_printf_parse(" GRPTERM" ); \ | 254 | if ((n) & TC_RBRACE ) debug_printf_parse(" RBRACE" ); \ |
264 | if ((n) & TC_SEMICOL ) debug_printf_parse(" SEMICOL" ); \ | 255 | if ((n) & TC_SEMICOL ) debug_printf_parse(" SEMICOL" ); \ |
265 | if ((n) & TC_NEWLINE ) debug_printf_parse(" NEWLINE" ); \ | 256 | if ((n) & TC_NEWLINE ) debug_printf_parse(" NEWLINE" ); \ |
266 | if ((n) & TC_STATX ) debug_printf_parse(" STATX" ); \ | 257 | if ((n) & TC_STATX ) debug_printf_parse(" STATX" ); \ |
@@ -281,39 +272,39 @@ if ((n) & TC_NUMBER ) debug_printf_parse(" NUMBER" ); \ | |||
281 | } while (0) | 272 | } while (0) |
282 | #endif | 273 | #endif |
283 | 274 | ||
284 | /* combined token classes */ | 275 | /* combined token classes ("token [class] sets") */ |
285 | #define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2) | 276 | #define TS_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2) |
286 | 277 | ||
287 | #define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN) | 278 | #define TS_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN) |
288 | //#define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST) | 279 | //#define TS_UNARYOP (TS_UOPPRE | TC_UOPPOST) |
289 | #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \ | 280 | #define TS_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \ |
290 | | TC_BUILTIN | TC_LENGTH | TC_GETLINE \ | 281 | | TC_BUILTIN | TC_LENGTH | TC_GETLINE \ |
291 | | TC_SEQSTART | TC_STRING | TC_NUMBER) | 282 | | TC_LPAREN | TC_STRING | TC_NUMBER) |
292 | #define TC_LVALUE (TC_VARIABLE | TC_ARRAY) | ||
293 | 283 | ||
294 | #define TC_STATEMNT (TC_STATX | TC_WHILE) | 284 | #define TS_LVALUE (TC_VARIABLE | TC_ARRAY) |
295 | #define TC_OPTERM (TC_SEMICOL | TC_NEWLINE) | 285 | #define TS_STATEMNT (TC_STATX | TC_WHILE) |
296 | 286 | ||
297 | /* word tokens, cannot mean something else if not expected */ | 287 | /* word tokens, cannot mean something else if not expected */ |
298 | #define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE \ | 288 | #define TS_WORD (TC_IN | TS_STATEMNT | TC_ELSE \ |
299 | | TC_BUILTIN | TC_LENGTH | TC_GETLINE \ | 289 | | TC_BUILTIN | TC_LENGTH | TC_GETLINE \ |
300 | | TC_FUNCDECL | TC_BEGIN | TC_END) | 290 | | TC_FUNCDECL | TC_BEGIN | TC_END) |
301 | 291 | ||
302 | /* discard newlines after these */ | 292 | /* discard newlines after these */ |
303 | #define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM \ | 293 | #define TS_NOTERM (TS_BINOP | TC_COMMA | TC_LBRACE | TC_RBRACE \ |
304 | | TC_BINOP | TC_OPTERM) | 294 | | TC_SEMICOL | TC_NEWLINE) |
305 | 295 | ||
306 | /* what can expression begin with */ | 296 | /* what can expression begin with */ |
307 | #define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP) | 297 | #define TS_OPSEQ (TS_OPERAND | TS_UOPPRE | TC_REGEXP) |
308 | /* what can group begin with */ | 298 | /* what can group begin with */ |
309 | #define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART) | 299 | #define TS_GRPSEQ (TS_OPSEQ | TS_STATEMNT \ |
300 | | TC_SEMICOL | TC_NEWLINE | TC_LBRACE) | ||
310 | 301 | ||
311 | /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */ | 302 | /* if previous token class is CONCAT_L and next is CONCAT_R, concatenation */ |
312 | /* operator is inserted between them */ | 303 | /* operator is inserted between them */ |
313 | #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \ | 304 | #define TS_CONCAT_L (TC_VARIABLE | TC_ARRTERM | TC_RPAREN \ |
314 | | TC_STRING | TC_NUMBER | TC_UOPPOST \ | 305 | | TC_STRING | TC_NUMBER | TC_UOPPOST \ |
315 | | TC_LENGTH) | 306 | | TC_LENGTH) |
316 | #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE) | 307 | #define TS_CONCAT_R (TS_OPERAND | TS_UOPPRE) |
317 | 308 | ||
318 | #define OF_RES1 0x010000 | 309 | #define OF_RES1 0x010000 |
319 | #define OF_RES2 0x020000 | 310 | #define OF_RES2 0x020000 |
@@ -328,7 +319,7 @@ if ((n) & TC_NUMBER ) debug_printf_parse(" NUMBER" ); \ | |||
328 | #define xV OF_RES2 | 319 | #define xV OF_RES2 |
329 | #define xS (OF_RES2 | OF_STR2) | 320 | #define xS (OF_RES2 | OF_STR2) |
330 | #define Vx OF_RES1 | 321 | #define Vx OF_RES1 |
331 | #define Rx (OF_RES1 | OF_NUM1 | OF_REQUIRED) | 322 | #define Rx OF_REQUIRED |
332 | #define VV (OF_RES1 | OF_RES2) | 323 | #define VV (OF_RES1 | OF_RES2) |
333 | #define Nx (OF_RES1 | OF_NUM1) | 324 | #define Nx (OF_RES1 | OF_NUM1) |
334 | #define NV (OF_RES1 | OF_NUM1 | OF_RES2) | 325 | #define NV (OF_RES1 | OF_NUM1 | OF_RES2) |
@@ -340,8 +331,7 @@ if ((n) & TC_NUMBER ) debug_printf_parse(" NUMBER" ); \ | |||
340 | #define OPNMASK 0x007F | 331 | #define OPNMASK 0x007F |
341 | 332 | ||
342 | /* operator priority is a highest byte (even: r->l, odd: l->r grouping) | 333 | /* operator priority is a highest byte (even: r->l, odd: l->r grouping) |
343 | * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1, | 334 | * (for builtins it has different meaning) |
344 | * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string | ||
345 | */ | 335 | */ |
346 | #undef P | 336 | #undef P |
347 | #undef PRIMASK | 337 | #undef PRIMASK |
@@ -394,8 +384,8 @@ enum { | |||
394 | #define NTCC '\377' | 384 | #define NTCC '\377' |
395 | 385 | ||
396 | static const char tokenlist[] ALIGN1 = | 386 | static const char tokenlist[] ALIGN1 = |
397 | "\1(" NTC /* TC_SEQSTART */ | 387 | "\1(" NTC /* TC_LPAREN */ |
398 | "\1)" NTC /* TC_SEQTERM */ | 388 | "\1)" NTC /* TC_RPAREN */ |
399 | "\1/" NTC /* TC_REGEXP */ | 389 | "\1/" NTC /* TC_REGEXP */ |
400 | "\2>>" "\1>" "\1|" NTC /* TC_OUTRDR */ | 390 | "\2>>" "\1>" "\1|" NTC /* TC_OUTRDR */ |
401 | "\2++" "\2--" NTC /* TC_UOPPOST */ | 391 | "\2++" "\2--" NTC /* TC_UOPPOST */ |
@@ -412,8 +402,8 @@ static const char tokenlist[] ALIGN1 = | |||
412 | "\1|" NTC /* TC_PIPE */ | 402 | "\1|" NTC /* TC_PIPE */ |
413 | "\1+" "\1-" "\1!" NTC /* TC_UOPPRE2 */ | 403 | "\1+" "\1-" "\1!" NTC /* TC_UOPPRE2 */ |
414 | "\1]" NTC /* TC_ARRTERM */ | 404 | "\1]" NTC /* TC_ARRTERM */ |
415 | "\1{" NTC /* TC_GRPSTART */ | 405 | "\1{" NTC /* TC_LBRACE */ |
416 | "\1}" NTC /* TC_GRPTERM */ | 406 | "\1}" NTC /* TC_RBRACE */ |
417 | "\1;" NTC /* TC_SEMICOL */ | 407 | "\1;" NTC /* TC_SEMICOL */ |
418 | "\1\n" NTC /* TC_NEWLINE */ | 408 | "\1\n" NTC /* TC_NEWLINE */ |
419 | "\2if" "\2do" "\3for" "\5break" /* TC_STATX */ | 409 | "\2if" "\2do" "\3for" "\5break" /* TC_STATX */ |
@@ -439,12 +429,11 @@ static const char tokenlist[] ALIGN1 = | |||
439 | /* compiler adds trailing "\0" */ | 429 | /* compiler adds trailing "\0" */ |
440 | ; | 430 | ; |
441 | 431 | ||
442 | #define OC_B OC_BUILTIN | ||
443 | |||
444 | static const uint32_t tokeninfo[] ALIGN4 = { | 432 | static const uint32_t tokeninfo[] ALIGN4 = { |
445 | 0, | 433 | 0, |
446 | 0, | 434 | 0, |
447 | OC_REGEXP, | 435 | #define TI_REGEXP OC_REGEXP |
436 | TI_REGEXP, | ||
448 | xS|'a', xS|'w', xS|'|', | 437 | xS|'a', xS|'w', xS|'|', |
449 | OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m', | 438 | OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m', |
450 | #define TI_PREINC (OC_UNARY|xV|P(9)|'P') | 439 | #define TI_PREINC (OC_UNARY|xV|P(9)|'P') |
@@ -455,12 +444,17 @@ static const uint32_t tokeninfo[] ALIGN4 = { | |||
455 | OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-', OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&', | 444 | OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-', OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&', |
456 | OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%', OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*', | 445 | OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%', OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*', |
457 | OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3, OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1, | 446 | OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3, OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1, |
458 | #define TI_LESS (OC_COMPARE|VV|P(39)|2) | 447 | #define TI_LESS (OC_COMPARE|VV|P(39)|2) |
459 | TI_LESS, OC_MATCH|Sx|P(45)|'!', OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55), | 448 | TI_LESS, OC_MATCH|Sx|P(45)|'!', OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55), |
460 | OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?', OC_COLON|xx|P(67)|':', | 449 | #define TI_TERNARY (OC_TERNARY|Vx|P(64)|'?') |
461 | OC_IN|SV|P(49), /* TC_IN */ | 450 | #define TI_COLON (OC_COLON|xx|P(67)|':') |
462 | OC_COMMA|SS|P(80), | 451 | OC_LOR|Vx|P(59), TI_TERNARY, TI_COLON, |
463 | OC_PGETLINE|SV|P(37), | 452 | #define TI_IN (OC_IN|SV|P(49)) |
453 | TI_IN, | ||
454 | #define TI_COMMA (OC_COMMA|SS|P(80)) | ||
455 | TI_COMMA, | ||
456 | #define TI_PGETLINE (OC_PGETLINE|SV|P(37)) | ||
457 | TI_PGETLINE, | ||
464 | OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-', OC_UNARY|xV|P(19)|'!', | 458 | OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-', OC_UNARY|xV|P(19)|'!', |
465 | 0, /* ] */ | 459 | 0, /* ] */ |
466 | 0, | 460 | 0, |
@@ -468,25 +462,51 @@ static const uint32_t tokeninfo[] ALIGN4 = { | |||
468 | 0, | 462 | 0, |
469 | 0, /* \n */ | 463 | 0, /* \n */ |
470 | ST_IF, ST_DO, ST_FOR, OC_BREAK, | 464 | ST_IF, ST_DO, ST_FOR, OC_BREAK, |
471 | OC_CONTINUE, OC_DELETE|Rx, OC_PRINT, | 465 | #define TI_PRINT OC_PRINT |
466 | OC_CONTINUE, OC_DELETE|Rx, TI_PRINT, | ||
472 | OC_PRINTF, OC_NEXT, OC_NEXTFILE, | 467 | OC_PRINTF, OC_NEXT, OC_NEXTFILE, |
473 | OC_RETURN|Vx, OC_EXIT|Nx, | 468 | OC_RETURN|Vx, OC_EXIT|Nx, |
474 | ST_WHILE, | 469 | ST_WHILE, |
475 | 0, /* else */ | 470 | 0, /* else */ |
476 | OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83), | 471 | // OC_B's are builtins with enforced minimum number of arguments (two upper bits). |
477 | OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83), | 472 | // Highest byte bit pattern: nn s3s2s1 v3v2v1 |
478 | OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83), | 473 | // nn - min. number of args, sN - resolve Nth arg to string, vN - resolve to var |
479 | OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg, | 474 | // OC_F's are builtins with zero or one argument. |
480 | OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr, | 475 | // |Rx| enforces that arg is present for: system, close, cos, sin, exp, int, log, sqrt |
481 | OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), /* OC_FBLTIN|Sx|F_le, was here */ | 476 | // Check for no args is present in builtins' code (not in this table): rand, systime |
482 | OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6), | 477 | // Have one _optional_ arg: fflush, srand, length |
483 | OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b), | 478 | #define OC_B OC_BUILTIN |
484 | OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49), | 479 | #define OC_F OC_FBLTIN |
485 | OC_FBLTIN|Sx|F_le, /* TC_LENGTH */ | 480 | #define A1 P(0x40) /*one arg*/ |
486 | OC_GETLINE|SV|P(0), | 481 | #define A2 P(0x80) /*two args*/ |
487 | 0, 0, | 482 | #define A3 P(0xc0) /*three args*/ |
488 | 0, | 483 | #define __v P(1) |
489 | 0 /* TC_END */ | 484 | #define _vv P(3) |
485 | #define __s__v P(9) | ||
486 | #define __s_vv P(0x0b) | ||
487 | #define __svvv P(0x0f) | ||
488 | #define _ss_vv P(0x1b) | ||
489 | #define _s_vv_ P(0x16) | ||
490 | #define ss_vv_ P(0x36) | ||
491 | OC_B|B_an|_vv|A2, OC_B|B_co|__v|A1, OC_B|B_ls|_vv|A2, OC_B|B_or|_vv|A2, // and compl lshift or | ||
492 | OC_B|B_rs|_vv|A2, OC_B|B_xo|_vv|A2, // rshift xor | ||
493 | OC_F|F_cl|Sx|Rx, OC_F|F_sy|Sx|Rx, OC_F|F_ff|Sx, OC_B|B_a2|_vv|A2, // close system fflush atan2 | ||
494 | OC_F|F_co|Nx|Rx, OC_F|F_ex|Nx|Rx, OC_F|F_in|Nx|Rx, OC_F|F_lg|Nx|Rx, // cos exp int log | ||
495 | OC_F|F_rn, OC_F|F_si|Nx|Rx, OC_F|F_sq|Nx|Rx, OC_F|F_sr|Nx, // rand sin sqrt srand | ||
496 | OC_B|B_ge|_s_vv_|A3,OC_B|B_gs|ss_vv_|A2,OC_B|B_ix|_ss_vv|A2, // gensub gsub index /*length was here*/ | ||
497 | OC_B|B_ma|__s__v|A2,OC_B|B_sp|__s_vv|A2,OC_SPRINTF, OC_B|B_su|ss_vv_|A2,// match split sprintf sub | ||
498 | OC_B|B_ss|__svvv|A2,OC_F|F_ti, OC_B|B_ti|__s_vv, OC_B|B_mt|__s_vv, // substr systime strftime mktime | ||
499 | OC_B|B_lo|__s__v|A1,OC_B|B_up|__s__v|A1, // tolower toupper | ||
500 | OC_F|F_le|Sx, // length | ||
501 | OC_GETLINE|SV, // getline | ||
502 | 0, 0, // func function | ||
503 | 0, // BEGIN | ||
504 | 0 // END | ||
505 | #undef A1 | ||
506 | #undef A2 | ||
507 | #undef A3 | ||
508 | #undef OC_B | ||
509 | #undef OC_F | ||
490 | }; | 510 | }; |
491 | 511 | ||
492 | /* internal variable names and their initial values */ | 512 | /* internal variable names and their initial values */ |
@@ -527,21 +547,29 @@ struct globals { | |||
527 | chain *seq; | 547 | chain *seq; |
528 | node *break_ptr, *continue_ptr; | 548 | node *break_ptr, *continue_ptr; |
529 | rstream *iF; | 549 | rstream *iF; |
530 | xhash *vhash, *ahash, *fdhash, *fnhash; | 550 | xhash *ahash; /* argument names, used only while parsing function bodies */ |
551 | xhash *fnhash; /* function names, used only in parsing stage */ | ||
552 | xhash *vhash; /* variables and arrays */ | ||
553 | //xhash *fdhash; /* file objects, used only in execution stage */ | ||
554 | //we are reusing ahash as fdhash, via define (see later) | ||
531 | const char *g_progname; | 555 | const char *g_progname; |
532 | int g_lineno; | 556 | int g_lineno; |
533 | int nfields; | 557 | int nfields; |
534 | int maxfields; /* used in fsrealloc() only */ | 558 | int maxfields; /* used in fsrealloc() only */ |
535 | var *Fields; | 559 | var *Fields; |
536 | nvblock *g_cb; | ||
537 | char *g_pos; | 560 | char *g_pos; |
538 | char *g_buf; | 561 | char g_saved_ch; |
539 | smallint icase; | 562 | smallint icase; |
540 | smallint exiting; | 563 | smallint exiting; |
541 | smallint nextrec; | 564 | smallint nextrec; |
542 | smallint nextfile; | 565 | smallint nextfile; |
543 | smallint is_f0_split; | 566 | smallint is_f0_split; |
544 | smallint t_rollback; | 567 | smallint t_rollback; |
568 | |||
569 | /* former statics from various functions */ | ||
570 | smallint next_token__concat_inserted; | ||
571 | uint32_t next_token__save_tclass; | ||
572 | uint32_t next_token__save_info; | ||
545 | }; | 573 | }; |
546 | struct globals2 { | 574 | struct globals2 { |
547 | uint32_t t_info; /* often used */ | 575 | uint32_t t_info; /* often used */ |
@@ -554,32 +582,35 @@ struct globals2 { | |||
554 | /* former statics from various functions */ | 582 | /* former statics from various functions */ |
555 | char *split_f0__fstrings; | 583 | char *split_f0__fstrings; |
556 | 584 | ||
557 | uint32_t next_token__save_tclass; | ||
558 | uint32_t next_token__save_info; | ||
559 | uint32_t next_token__ltclass; | ||
560 | smallint next_token__concat_inserted; | ||
561 | |||
562 | smallint next_input_file__files_happen; | ||
563 | rstream next_input_file__rsm; | 585 | rstream next_input_file__rsm; |
586 | smallint next_input_file__files_happen; | ||
587 | |||
588 | smalluint exitcode; | ||
564 | 589 | ||
565 | var *evaluate__fnargs; | ||
566 | unsigned evaluate__seed; | 590 | unsigned evaluate__seed; |
591 | var *evaluate__fnargs; | ||
567 | regex_t evaluate__sreg; | 592 | regex_t evaluate__sreg; |
568 | 593 | ||
569 | var ptest__v; | 594 | var ptest__tmpvar; |
595 | var awk_printf__tmpvar; | ||
596 | var as_regex__tmpvar; | ||
597 | var exit__tmpvar; | ||
598 | var main__tmpvar; | ||
570 | 599 | ||
571 | tsplitter exec_builtin__tspl; | 600 | tsplitter exec_builtin__tspl; |
572 | 601 | ||
573 | /* biggest and least used members go last */ | 602 | /* biggest and least used members go last */ |
574 | tsplitter fsplitter, rsplitter; | 603 | tsplitter fsplitter, rsplitter; |
604 | |||
605 | char g_buf[MAXVARFMT + 1]; | ||
575 | }; | 606 | }; |
576 | #define G1 (ptr_to_globals[-1]) | 607 | #define G1 (ptr_to_globals[-1]) |
577 | #define G (*(struct globals2 *)ptr_to_globals) | 608 | #define G (*(struct globals2 *)ptr_to_globals) |
578 | /* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */ | 609 | /* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */ |
579 | /*char G1size[sizeof(G1)]; - 0x74 */ | 610 | //char G1size[sizeof(G1)]; // 0x70 |
580 | /*char Gsize[sizeof(G)]; - 0x1c4 */ | 611 | //char Gsize[sizeof(G)]; // 0x2f8 |
581 | /* Trying to keep most of members accessible with short offsets: */ | 612 | /* Trying to keep most of members accessible with short offsets: */ |
582 | /*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */ | 613 | //char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; // 0x7c |
583 | #define t_double (G1.t_double ) | 614 | #define t_double (G1.t_double ) |
584 | #define beginseq (G1.beginseq ) | 615 | #define beginseq (G1.beginseq ) |
585 | #define mainseq (G1.mainseq ) | 616 | #define mainseq (G1.mainseq ) |
@@ -588,18 +619,20 @@ struct globals2 { | |||
588 | #define break_ptr (G1.break_ptr ) | 619 | #define break_ptr (G1.break_ptr ) |
589 | #define continue_ptr (G1.continue_ptr) | 620 | #define continue_ptr (G1.continue_ptr) |
590 | #define iF (G1.iF ) | 621 | #define iF (G1.iF ) |
591 | #define vhash (G1.vhash ) | ||
592 | #define ahash (G1.ahash ) | 622 | #define ahash (G1.ahash ) |
593 | #define fdhash (G1.fdhash ) | ||
594 | #define fnhash (G1.fnhash ) | 623 | #define fnhash (G1.fnhash ) |
624 | #define vhash (G1.vhash ) | ||
625 | #define fdhash ahash | ||
626 | //^^^^^^^^^^^^^^^^^^ ahash is cleared after every function parsing, | ||
627 | // and ends up empty after parsing phase. Thus, we can simply reuse it | ||
628 | // for fdhash in execution stage. | ||
595 | #define g_progname (G1.g_progname ) | 629 | #define g_progname (G1.g_progname ) |
596 | #define g_lineno (G1.g_lineno ) | 630 | #define g_lineno (G1.g_lineno ) |
597 | #define nfields (G1.nfields ) | 631 | #define nfields (G1.nfields ) |
598 | #define maxfields (G1.maxfields ) | 632 | #define maxfields (G1.maxfields ) |
599 | #define Fields (G1.Fields ) | 633 | #define Fields (G1.Fields ) |
600 | #define g_cb (G1.g_cb ) | ||
601 | #define g_pos (G1.g_pos ) | 634 | #define g_pos (G1.g_pos ) |
602 | #define g_buf (G1.g_buf ) | 635 | #define g_saved_ch (G1.g_saved_ch ) |
603 | #define icase (G1.icase ) | 636 | #define icase (G1.icase ) |
604 | #define exiting (G1.exiting ) | 637 | #define exiting (G1.exiting ) |
605 | #define nextrec (G1.nextrec ) | 638 | #define nextrec (G1.nextrec ) |
@@ -613,25 +646,13 @@ struct globals2 { | |||
613 | #define intvar (G.intvar ) | 646 | #define intvar (G.intvar ) |
614 | #define fsplitter (G.fsplitter ) | 647 | #define fsplitter (G.fsplitter ) |
615 | #define rsplitter (G.rsplitter ) | 648 | #define rsplitter (G.rsplitter ) |
649 | #define g_buf (G.g_buf ) | ||
616 | #define INIT_G() do { \ | 650 | #define INIT_G() do { \ |
617 | SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \ | 651 | SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \ |
618 | G.next_token__ltclass = TC_OPTERM; \ | 652 | t_tclass = TC_NEWLINE; \ |
619 | G.evaluate__seed = 1; \ | 653 | G.evaluate__seed = 1; \ |
620 | } while (0) | 654 | } while (0) |
621 | 655 | ||
622 | |||
623 | /* function prototypes */ | ||
624 | static void handle_special(var *); | ||
625 | static node *parse_expr(uint32_t); | ||
626 | static void chain_group(void); | ||
627 | static var *evaluate(node *, var *); | ||
628 | static rstream *next_input_file(void); | ||
629 | static int fmt_num(char *, int, const char *, double, int); | ||
630 | static int awk_exit(int) NORETURN; | ||
631 | |||
632 | /* ---- error handling ---- */ | ||
633 | |||
634 | static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error"; | ||
635 | static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string"; | 656 | static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string"; |
636 | static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token"; | 657 | static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token"; |
637 | static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero"; | 658 | static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero"; |
@@ -643,10 +664,7 @@ static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function"; | |||
643 | static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in"; | 664 | static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in"; |
644 | static const char EMSG_NEGATIVE_FIELD[] ALIGN1 = "Access to negative field"; | 665 | static const char EMSG_NEGATIVE_FIELD[] ALIGN1 = "Access to negative field"; |
645 | 666 | ||
646 | static void zero_out_var(var *vp) | 667 | static int awk_exit(void) NORETURN; |
647 | { | ||
648 | memset(vp, 0, sizeof(*vp)); | ||
649 | } | ||
650 | 668 | ||
651 | static void syntax_error(const char *message) NORETURN; | 669 | static void syntax_error(const char *message) NORETURN; |
652 | static void syntax_error(const char *message) | 670 | static void syntax_error(const char *message) |
@@ -677,12 +695,40 @@ static xhash *hash_init(void) | |||
677 | return newhash; | 695 | return newhash; |
678 | } | 696 | } |
679 | 697 | ||
698 | static void hash_clear(xhash *hash) | ||
699 | { | ||
700 | unsigned i; | ||
701 | hash_item *hi, *thi; | ||
702 | |||
703 | for (i = 0; i < hash->csize; i++) { | ||
704 | hi = hash->items[i]; | ||
705 | while (hi) { | ||
706 | thi = hi; | ||
707 | hi = hi->next; | ||
708 | //FIXME: this assumes that it's a hash of *variables*: | ||
709 | free(thi->data.v.string); | ||
710 | free(thi); | ||
711 | } | ||
712 | hash->items[i] = NULL; | ||
713 | } | ||
714 | hash->glen = hash->nel = 0; | ||
715 | } | ||
716 | |||
717 | #if 0 //UNUSED | ||
718 | static void hash_free(xhash *hash) | ||
719 | { | ||
720 | hash_clear(hash); | ||
721 | free(hash->items); | ||
722 | free(hash); | ||
723 | } | ||
724 | #endif | ||
725 | |||
680 | /* find item in hash, return ptr to data, NULL if not found */ | 726 | /* find item in hash, return ptr to data, NULL if not found */ |
681 | static void *hash_search(xhash *hash, const char *name) | 727 | static NOINLINE void *hash_search3(xhash *hash, const char *name, unsigned idx) |
682 | { | 728 | { |
683 | hash_item *hi; | 729 | hash_item *hi; |
684 | 730 | ||
685 | hi = hash->items[hashidx(name) % hash->csize]; | 731 | hi = hash->items[idx % hash->csize]; |
686 | while (hi) { | 732 | while (hi) { |
687 | if (strcmp(hi->name, name) == 0) | 733 | if (strcmp(hi->name, name) == 0) |
688 | return &hi->data; | 734 | return &hi->data; |
@@ -691,6 +737,11 @@ static void *hash_search(xhash *hash, const char *name) | |||
691 | return NULL; | 737 | return NULL; |
692 | } | 738 | } |
693 | 739 | ||
740 | static void *hash_search(xhash *hash, const char *name) | ||
741 | { | ||
742 | return hash_search3(hash, name, hashidx(name)); | ||
743 | } | ||
744 | |||
694 | /* grow hash if it becomes too big */ | 745 | /* grow hash if it becomes too big */ |
695 | static void hash_rebuild(xhash *hash) | 746 | static void hash_rebuild(xhash *hash) |
696 | { | 747 | { |
@@ -726,16 +777,17 @@ static void *hash_find(xhash *hash, const char *name) | |||
726 | unsigned idx; | 777 | unsigned idx; |
727 | int l; | 778 | int l; |
728 | 779 | ||
729 | hi = hash_search(hash, name); | 780 | idx = hashidx(name); |
781 | hi = hash_search3(hash, name, idx); | ||
730 | if (!hi) { | 782 | if (!hi) { |
731 | if (++hash->nel / hash->csize > 10) | 783 | if (++hash->nel > hash->csize * 8) |
732 | hash_rebuild(hash); | 784 | hash_rebuild(hash); |
733 | 785 | ||
734 | l = strlen(name) + 1; | 786 | l = strlen(name) + 1; |
735 | hi = xzalloc(sizeof(*hi) + l); | 787 | hi = xzalloc(sizeof(*hi) + l); |
736 | strcpy(hi->name, name); | 788 | strcpy(hi->name, name); |
737 | 789 | ||
738 | idx = hashidx(name) % hash->csize; | 790 | idx = idx % hash->csize; |
739 | hi->next = hash->items[idx]; | 791 | hi->next = hash->items[idx]; |
740 | hash->items[idx] = hi; | 792 | hash->items[idx] = hi; |
741 | hash->glen += l; | 793 | hash->glen += l; |
@@ -770,7 +822,7 @@ static void hash_remove(xhash *hash, const char *name) | |||
770 | 822 | ||
771 | static char *skip_spaces(char *p) | 823 | static char *skip_spaces(char *p) |
772 | { | 824 | { |
773 | while (1) { | 825 | for (;;) { |
774 | if (*p == '\\' && p[1] == '\n') { | 826 | if (*p == '\\' && p[1] == '\n') { |
775 | p++; | 827 | p++; |
776 | t_lineno++; | 828 | t_lineno++; |
@@ -790,8 +842,10 @@ static char *skip_spaces(char *p) | |||
790 | static char *nextword(char **s) | 842 | static char *nextword(char **s) |
791 | { | 843 | { |
792 | char *p = *s; | 844 | char *p = *s; |
793 | while (*(*s)++ != '\0') | 845 | char *q = p; |
846 | while (*q++ != '\0') | ||
794 | continue; | 847 | continue; |
848 | *s = q; | ||
795 | return p; | 849 | return p; |
796 | } | 850 | } |
797 | 851 | ||
@@ -854,10 +908,29 @@ static double my_strtod(char **pp) | |||
854 | 908 | ||
855 | /* -------- working with variables (set/get/copy/etc) -------- */ | 909 | /* -------- working with variables (set/get/copy/etc) -------- */ |
856 | 910 | ||
857 | static xhash *iamarray(var *v) | 911 | static int fmt_num(char *b, int size, const char *format, double n, int int_as_int) |
858 | { | 912 | { |
859 | var *a = v; | 913 | int r = 0; |
914 | char c; | ||
915 | const char *s = format; | ||
860 | 916 | ||
917 | if (int_as_int && n == (long long)n) { | ||
918 | r = snprintf(b, size, "%lld", (long long)n); | ||
919 | } else { | ||
920 | do { c = *s; } while (c && *++s); | ||
921 | if (strchr("diouxX", c)) { | ||
922 | r = snprintf(b, size, format, (int)n); | ||
923 | } else if (strchr("eEfFgGaA", c)) { | ||
924 | r = snprintf(b, size, format, n); | ||
925 | } else { | ||
926 | syntax_error(EMSG_INV_FMT); | ||
927 | } | ||
928 | } | ||
929 | return r; | ||
930 | } | ||
931 | |||
932 | static xhash *iamarray(var *a) | ||
933 | { | ||
861 | while (a->type & VF_CHILD) | 934 | while (a->type & VF_CHILD) |
862 | a = a->x.parent; | 935 | a = a->x.parent; |
863 | 936 | ||
@@ -868,23 +941,7 @@ static xhash *iamarray(var *v) | |||
868 | return a->x.array; | 941 | return a->x.array; |
869 | } | 942 | } |
870 | 943 | ||
871 | static void clear_array(xhash *array) | 944 | #define clear_array(array) hash_clear(array) |
872 | { | ||
873 | unsigned i; | ||
874 | hash_item *hi, *thi; | ||
875 | |||
876 | for (i = 0; i < array->csize; i++) { | ||
877 | hi = array->items[i]; | ||
878 | while (hi) { | ||
879 | thi = hi; | ||
880 | hi = hi->next; | ||
881 | free(thi->data.v.string); | ||
882 | free(thi); | ||
883 | } | ||
884 | array->items[i] = NULL; | ||
885 | } | ||
886 | array->glen = array->nel = 0; | ||
887 | } | ||
888 | 945 | ||
889 | /* clear a variable */ | 946 | /* clear a variable */ |
890 | static var *clrvar(var *v) | 947 | static var *clrvar(var *v) |
@@ -898,6 +955,8 @@ static var *clrvar(var *v) | |||
898 | return v; | 955 | return v; |
899 | } | 956 | } |
900 | 957 | ||
958 | static void handle_special(var *); | ||
959 | |||
901 | /* assign string value to variable */ | 960 | /* assign string value to variable */ |
902 | static var *setvar_p(var *v, char *value) | 961 | static var *setvar_p(var *v, char *value) |
903 | { | 962 | { |
@@ -963,6 +1022,7 @@ static double getvar_i(var *v) | |||
963 | v->number = my_strtod(&s); | 1022 | v->number = my_strtod(&s); |
964 | debug_printf_eval("%f (s:'%s')\n", v->number, s); | 1023 | debug_printf_eval("%f (s:'%s')\n", v->number, s); |
965 | if (v->type & VF_USER) { | 1024 | if (v->type & VF_USER) { |
1025 | //TODO: skip_spaces() also skips backslash+newline, is it intended here? | ||
966 | s = skip_spaces(s); | 1026 | s = skip_spaces(s); |
967 | if (*s != '\0') | 1027 | if (*s != '\0') |
968 | v->type &= ~VF_USER; | 1028 | v->type &= ~VF_USER; |
@@ -1024,94 +1084,24 @@ static int istrue(var *v) | |||
1024 | return (v->string && v->string[0]); | 1084 | return (v->string && v->string[0]); |
1025 | } | 1085 | } |
1026 | 1086 | ||
1027 | /* temporary variables allocator. Last allocated should be first freed */ | ||
1028 | static var *nvalloc(int n) | ||
1029 | { | ||
1030 | nvblock *pb = NULL; | ||
1031 | var *v, *r; | ||
1032 | int size; | ||
1033 | |||
1034 | while (g_cb) { | ||
1035 | pb = g_cb; | ||
1036 | if ((g_cb->pos - g_cb->nv) + n <= g_cb->size) | ||
1037 | break; | ||
1038 | g_cb = g_cb->next; | ||
1039 | } | ||
1040 | |||
1041 | if (!g_cb) { | ||
1042 | size = (n <= MINNVBLOCK) ? MINNVBLOCK : n; | ||
1043 | g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var)); | ||
1044 | g_cb->size = size; | ||
1045 | g_cb->pos = g_cb->nv; | ||
1046 | g_cb->prev = pb; | ||
1047 | /*g_cb->next = NULL; - xzalloc did it */ | ||
1048 | if (pb) | ||
1049 | pb->next = g_cb; | ||
1050 | } | ||
1051 | |||
1052 | v = r = g_cb->pos; | ||
1053 | g_cb->pos += n; | ||
1054 | |||
1055 | while (v < g_cb->pos) { | ||
1056 | v->type = 0; | ||
1057 | v->string = NULL; | ||
1058 | v++; | ||
1059 | } | ||
1060 | |||
1061 | return r; | ||
1062 | } | ||
1063 | |||
1064 | static void nvfree(var *v) | ||
1065 | { | ||
1066 | var *p; | ||
1067 | |||
1068 | if (v < g_cb->nv || v >= g_cb->pos) | ||
1069 | syntax_error(EMSG_INTERNAL_ERROR); | ||
1070 | |||
1071 | for (p = v; p < g_cb->pos; p++) { | ||
1072 | if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) { | ||
1073 | clear_array(iamarray(p)); | ||
1074 | free(p->x.array->items); | ||
1075 | free(p->x.array); | ||
1076 | } | ||
1077 | if (p->type & VF_WALK) { | ||
1078 | walker_list *n; | ||
1079 | walker_list *w = p->x.walker; | ||
1080 | debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker); | ||
1081 | p->x.walker = NULL; | ||
1082 | while (w) { | ||
1083 | n = w->prev; | ||
1084 | debug_printf_walker(" free(%p)\n", w); | ||
1085 | free(w); | ||
1086 | w = n; | ||
1087 | } | ||
1088 | } | ||
1089 | clrvar(p); | ||
1090 | } | ||
1091 | |||
1092 | g_cb->pos = v; | ||
1093 | while (g_cb->prev && g_cb->pos == g_cb->nv) { | ||
1094 | g_cb = g_cb->prev; | ||
1095 | } | ||
1096 | } | ||
1097 | |||
1098 | /* ------- awk program text parsing ------- */ | 1087 | /* ------- awk program text parsing ------- */ |
1099 | 1088 | ||
1100 | /* Parse next token pointed by global pos, place results into global ttt. | 1089 | /* Parse next token pointed by global pos, place results into global t_XYZ variables. |
1101 | * If token isn't expected, give away. Return token class | 1090 | * If token isn't expected, print error message and die. |
1091 | * Return token class (also store it in t_tclass). | ||
1102 | */ | 1092 | */ |
1103 | static uint32_t next_token(uint32_t expected) | 1093 | static uint32_t next_token(uint32_t expected) |
1104 | { | 1094 | { |
1105 | #define concat_inserted (G.next_token__concat_inserted) | 1095 | #define concat_inserted (G1.next_token__concat_inserted) |
1106 | #define save_tclass (G.next_token__save_tclass) | 1096 | #define save_tclass (G1.next_token__save_tclass) |
1107 | #define save_info (G.next_token__save_info) | 1097 | #define save_info (G1.next_token__save_info) |
1108 | /* Initialized to TC_OPTERM: */ | ||
1109 | #define ltclass (G.next_token__ltclass) | ||
1110 | 1098 | ||
1111 | char *p, *s; | 1099 | char *p; |
1112 | const char *tl; | 1100 | const char *tl; |
1113 | uint32_t tc; | ||
1114 | const uint32_t *ti; | 1101 | const uint32_t *ti; |
1102 | uint32_t tc, last_token_class; | ||
1103 | |||
1104 | last_token_class = t_tclass; /* t_tclass is initialized to TC_NEWLINE */ | ||
1115 | 1105 | ||
1116 | debug_printf_parse("%s() expected(%x):", __func__, expected); | 1106 | debug_printf_parse("%s() expected(%x):", __func__, expected); |
1117 | debug_parse_print_tc(expected); | 1107 | debug_parse_print_tc(expected); |
@@ -1127,6 +1117,10 @@ static uint32_t next_token(uint32_t expected) | |||
1127 | t_info = save_info; | 1117 | t_info = save_info; |
1128 | } else { | 1118 | } else { |
1129 | p = g_pos; | 1119 | p = g_pos; |
1120 | if (g_saved_ch != '\0') { | ||
1121 | *p = g_saved_ch; | ||
1122 | g_saved_ch = '\0'; | ||
1123 | } | ||
1130 | readnext: | 1124 | readnext: |
1131 | p = skip_spaces(p); | 1125 | p = skip_spaces(p); |
1132 | g_lineno = t_lineno; | 1126 | g_lineno = t_lineno; |
@@ -1134,15 +1128,12 @@ static uint32_t next_token(uint32_t expected) | |||
1134 | while (*p != '\n' && *p != '\0') | 1128 | while (*p != '\n' && *p != '\0') |
1135 | p++; | 1129 | p++; |
1136 | 1130 | ||
1137 | if (*p == '\n') | ||
1138 | t_lineno++; | ||
1139 | |||
1140 | if (*p == '\0') { | 1131 | if (*p == '\0') { |
1141 | tc = TC_EOF; | 1132 | tc = TC_EOF; |
1142 | debug_printf_parse("%s: token found: TC_EOF\n", __func__); | 1133 | debug_printf_parse("%s: token found: TC_EOF\n", __func__); |
1143 | } else if (*p == '\"') { | 1134 | } else if (*p == '\"') { |
1144 | /* it's a string */ | 1135 | /* it's a string */ |
1145 | t_string = s = ++p; | 1136 | char *s = t_string = ++p; |
1146 | while (*p != '\"') { | 1137 | while (*p != '\"') { |
1147 | char *pp; | 1138 | char *pp; |
1148 | if (*p == '\0' || *p == '\n') | 1139 | if (*p == '\0' || *p == '\n') |
@@ -1157,7 +1148,7 @@ static uint32_t next_token(uint32_t expected) | |||
1157 | debug_printf_parse("%s: token found:'%s' TC_STRING\n", __func__, t_string); | 1148 | debug_printf_parse("%s: token found:'%s' TC_STRING\n", __func__, t_string); |
1158 | } else if ((expected & TC_REGEXP) && *p == '/') { | 1149 | } else if ((expected & TC_REGEXP) && *p == '/') { |
1159 | /* it's regexp */ | 1150 | /* it's regexp */ |
1160 | t_string = s = ++p; | 1151 | char *s = t_string = ++p; |
1161 | while (*p != '/') { | 1152 | while (*p != '/') { |
1162 | if (*p == '\0' || *p == '\n') | 1153 | if (*p == '\0' || *p == '\n') |
1163 | syntax_error(EMSG_UNEXP_EOS); | 1154 | syntax_error(EMSG_UNEXP_EOS); |
@@ -1188,6 +1179,11 @@ static uint32_t next_token(uint32_t expected) | |||
1188 | tc = TC_NUMBER; | 1179 | tc = TC_NUMBER; |
1189 | debug_printf_parse("%s: token found:%f TC_NUMBER\n", __func__, t_double); | 1180 | debug_printf_parse("%s: token found:%f TC_NUMBER\n", __func__, t_double); |
1190 | } else { | 1181 | } else { |
1182 | char *end_of_name; | ||
1183 | |||
1184 | if (*p == '\n') | ||
1185 | t_lineno++; | ||
1186 | |||
1191 | /* search for something known */ | 1187 | /* search for something known */ |
1192 | tl = tokenlist; | 1188 | tl = tokenlist; |
1193 | tc = 0x00000001; | 1189 | tc = 0x00000001; |
@@ -1202,9 +1198,9 @@ static uint32_t next_token(uint32_t expected) | |||
1202 | * token matches, | 1198 | * token matches, |
1203 | * and it's not a longer word, | 1199 | * and it's not a longer word, |
1204 | */ | 1200 | */ |
1205 | if ((tc & (expected | TC_WORD | TC_NEWLINE)) | 1201 | if ((tc & (expected | TS_WORD | TC_NEWLINE)) |
1206 | && strncmp(p, tl, l) == 0 | 1202 | && strncmp(p, tl, l) == 0 |
1207 | && !((tc & TC_WORD) && isalnum_(p[l])) | 1203 | && !((tc & TS_WORD) && isalnum_(p[l])) |
1208 | ) { | 1204 | ) { |
1209 | /* then this is what we are looking for */ | 1205 | /* then this is what we are looking for */ |
1210 | t_info = *ti; | 1206 | t_info = *ti; |
@@ -1221,71 +1217,94 @@ static uint32_t next_token(uint32_t expected) | |||
1221 | if (!isalnum_(*p)) | 1217 | if (!isalnum_(*p)) |
1222 | syntax_error(EMSG_UNEXP_TOKEN); /* no */ | 1218 | syntax_error(EMSG_UNEXP_TOKEN); /* no */ |
1223 | /* yes */ | 1219 | /* yes */ |
1224 | /* "move name one char back" trick: we need a byte for NUL terminator */ | 1220 | t_string = p; |
1225 | /* NB: this results in argv[i][-1] being used (!!!) in e.g. "awk -e 'NAME'" case */ | 1221 | while (isalnum_(*p)) |
1226 | t_string = --p; | 1222 | p++; |
1227 | while (isalnum_(*++p)) { | 1223 | end_of_name = p; |
1228 | p[-1] = *p; | 1224 | |
1229 | } | 1225 | if (last_token_class == TC_FUNCDECL) |
1230 | p[-1] = '\0'; | 1226 | /* eat space in "function FUNC (...) {...}" declaration */ |
1231 | tc = TC_VARIABLE; | ||
1232 | /* also consume whitespace between functionname and bracket */ | ||
1233 | if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY)) | ||
1234 | p = skip_spaces(p); | 1227 | p = skip_spaces(p); |
1228 | else if (expected & TC_ARRAY) { | ||
1229 | /* eat space between array name and [ */ | ||
1230 | char *s = skip_spaces(p); | ||
1231 | if (*s == '[') /* array ref, not just a name? */ | ||
1232 | p = s; | ||
1233 | } | ||
1234 | /* else: do NOT consume whitespace after variable name! | ||
1235 | * gawk allows definition "function FUNC (p) {...}" - note space, | ||
1236 | * but disallows the call "FUNC (p)" because it isn't one - | ||
1237 | * expression "v (a)" should NOT be parsed as TC_FUNCTION: | ||
1238 | * it is a valid concatenation if "v" is a variable, | ||
1239 | * not a function name (and type of name is not known at parse time). | ||
1240 | */ | ||
1241 | |||
1235 | if (*p == '(') { | 1242 | if (*p == '(') { |
1243 | p++; | ||
1236 | tc = TC_FUNCTION; | 1244 | tc = TC_FUNCTION; |
1237 | debug_printf_parse("%s: token found:'%s' TC_FUNCTION\n", __func__, t_string); | 1245 | debug_printf_parse("%s: token found:'%s' TC_FUNCTION\n", __func__, t_string); |
1246 | } else if (*p == '[') { | ||
1247 | p++; | ||
1248 | tc = TC_ARRAY; | ||
1249 | debug_printf_parse("%s: token found:'%s' TC_ARRAY\n", __func__, t_string); | ||
1238 | } else { | 1250 | } else { |
1239 | if (*p == '[') { | 1251 | tc = TC_VARIABLE; |
1240 | p++; | 1252 | debug_printf_parse("%s: token found:'%s' TC_VARIABLE\n", __func__, t_string); |
1241 | tc = TC_ARRAY; | 1253 | if (end_of_name == p) { |
1242 | debug_printf_parse("%s: token found:'%s' TC_ARRAY\n", __func__, t_string); | 1254 | /* there is no space for trailing NUL in t_string! |
1243 | } else | 1255 | * We need to save the char we are going to NUL. |
1244 | debug_printf_parse("%s: token found:'%s' TC_VARIABLE\n", __func__, t_string); | 1256 | * (we'll use it in future call to next_token()) |
1257 | */ | ||
1258 | g_saved_ch = *end_of_name; | ||
1259 | // especially pathological example is V="abc"; V.2 - it's V concatenated to .2 | ||
1260 | // (it evaluates to "abc0.2"). Because of this case, we can't simply cache | ||
1261 | // '.' and analyze it later: we also have to *store it back* in next | ||
1262 | // next_token(), in order to give my_strtod() the undamaged ".2" string. | ||
1263 | } | ||
1245 | } | 1264 | } |
1265 | *end_of_name = '\0'; /* terminate t_string */ | ||
1246 | } | 1266 | } |
1247 | token_found: | 1267 | token_found: |
1248 | g_pos = p; | 1268 | g_pos = p; |
1249 | 1269 | ||
1250 | /* skipping newlines in some cases */ | 1270 | /* skipping newlines in some cases */ |
1251 | if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE)) | 1271 | if ((last_token_class & TS_NOTERM) && (tc & TC_NEWLINE)) |
1252 | goto readnext; | 1272 | goto readnext; |
1253 | 1273 | ||
1254 | /* insert concatenation operator when needed */ | 1274 | /* insert concatenation operator when needed */ |
1255 | debug_printf_parse("%s: %x %x %x concat_inserted?\n", __func__, | 1275 | debug_printf_parse("%s: concat_inserted if all nonzero: %x %x %x %x\n", __func__, |
1256 | (ltclass & TC_CONCAT1), (tc & TC_CONCAT2), (expected & TC_BINOP)); | 1276 | (last_token_class & TS_CONCAT_L), (tc & TS_CONCAT_R), (expected & TS_BINOP), |
1257 | if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP) | 1277 | !(last_token_class == TC_LENGTH && tc == TC_LPAREN)); |
1258 | && !(ltclass == TC_LENGTH && tc == TC_SEQSTART) /* but not for "length(..." */ | 1278 | if ((last_token_class & TS_CONCAT_L) && (tc & TS_CONCAT_R) && (expected & TS_BINOP) |
1279 | && !(last_token_class == TC_LENGTH && tc == TC_LPAREN) /* but not for "length(..." */ | ||
1259 | ) { | 1280 | ) { |
1260 | concat_inserted = TRUE; | 1281 | concat_inserted = TRUE; |
1261 | save_tclass = tc; | 1282 | save_tclass = tc; |
1262 | save_info = t_info; | 1283 | save_info = t_info; |
1263 | tc = TC_BINOP; | 1284 | tc = TC_BINOPX; |
1264 | t_info = OC_CONCAT | SS | P(35); | 1285 | t_info = OC_CONCAT | SS | P(35); |
1265 | } | 1286 | } |
1266 | 1287 | ||
1267 | debug_printf_parse("%s: t_tclass=tc=%x\n", __func__, t_tclass); | ||
1268 | t_tclass = tc; | 1288 | t_tclass = tc; |
1289 | debug_printf_parse("%s: t_tclass=tc=%x\n", __func__, tc); | ||
1269 | } | 1290 | } |
1270 | ltclass = t_tclass; | ||
1271 | |||
1272 | /* Are we ready for this? */ | 1291 | /* Are we ready for this? */ |
1273 | if (!(ltclass & expected)) { | 1292 | if (!(t_tclass & expected)) { |
1274 | syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ? | 1293 | syntax_error((last_token_class & (TC_NEWLINE | TC_EOF)) ? |
1275 | EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN); | 1294 | EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN); |
1276 | } | 1295 | } |
1277 | 1296 | ||
1278 | debug_printf_parse("%s: returning, t_double:%f ltclass:", __func__, t_double); | 1297 | debug_printf_parse("%s: returning, t_double:%f t_tclass:", __func__, t_double); |
1279 | debug_parse_print_tc(ltclass); | 1298 | debug_parse_print_tc(t_tclass); |
1280 | debug_printf_parse("\n"); | 1299 | debug_printf_parse("\n"); |
1281 | return ltclass; | 1300 | |
1301 | return t_tclass; | ||
1282 | #undef concat_inserted | 1302 | #undef concat_inserted |
1283 | #undef save_tclass | 1303 | #undef save_tclass |
1284 | #undef save_info | 1304 | #undef save_info |
1285 | #undef ltclass | ||
1286 | } | 1305 | } |
1287 | 1306 | ||
1288 | static void rollback_token(void) | 1307 | static ALWAYS_INLINE void rollback_token(void) |
1289 | { | 1308 | { |
1290 | t_rollback = TRUE; | 1309 | t_rollback = TRUE; |
1291 | } | 1310 | } |
@@ -1302,17 +1321,19 @@ static node *new_node(uint32_t info) | |||
1302 | 1321 | ||
1303 | static void mk_re_node(const char *s, node *n, regex_t *re) | 1322 | static void mk_re_node(const char *s, node *n, regex_t *re) |
1304 | { | 1323 | { |
1305 | n->info = OC_REGEXP; | 1324 | n->info = TI_REGEXP; |
1306 | n->l.re = re; | 1325 | n->l.re = re; |
1307 | n->r.ire = re + 1; | 1326 | n->r.ire = re + 1; |
1308 | xregcomp(re, s, REG_EXTENDED); | 1327 | xregcomp(re, s, REG_EXTENDED); |
1309 | xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE); | 1328 | xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE); |
1310 | } | 1329 | } |
1311 | 1330 | ||
1312 | static node *condition(void) | 1331 | static node *parse_expr(uint32_t); |
1332 | |||
1333 | static node *parse_lrparen_list(void) | ||
1313 | { | 1334 | { |
1314 | next_token(TC_SEQSTART); | 1335 | next_token(TC_LPAREN); |
1315 | return parse_expr(TC_SEQTERM); | 1336 | return parse_expr(TC_RPAREN); |
1316 | } | 1337 | } |
1317 | 1338 | ||
1318 | /* parse expression terminated by given argument, return ptr | 1339 | /* parse expression terminated by given argument, return ptr |
@@ -1322,7 +1343,7 @@ static node *parse_expr(uint32_t term_tc) | |||
1322 | node sn; | 1343 | node sn; |
1323 | node *cn = &sn; | 1344 | node *cn = &sn; |
1324 | node *vn, *glptr; | 1345 | node *vn, *glptr; |
1325 | uint32_t tc, xtc; | 1346 | uint32_t tc, expected_tc; |
1326 | var *v; | 1347 | var *v; |
1327 | 1348 | ||
1328 | debug_printf_parse("%s() term_tc(%x):", __func__, term_tc); | 1349 | debug_printf_parse("%s() term_tc(%x):", __func__, term_tc); |
@@ -1331,145 +1352,157 @@ static node *parse_expr(uint32_t term_tc) | |||
1331 | 1352 | ||
1332 | sn.info = PRIMASK; | 1353 | sn.info = PRIMASK; |
1333 | sn.r.n = sn.a.n = glptr = NULL; | 1354 | sn.r.n = sn.a.n = glptr = NULL; |
1334 | xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | term_tc; | 1355 | expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP | term_tc; |
1335 | 1356 | ||
1336 | while (!((tc = next_token(xtc)) & term_tc)) { | 1357 | while (!((tc = next_token(expected_tc)) & term_tc)) { |
1337 | 1358 | ||
1338 | if (glptr && (t_info == TI_LESS)) { | 1359 | if (glptr && (t_info == TI_LESS)) { |
1339 | /* input redirection (<) attached to glptr node */ | 1360 | /* input redirection (<) attached to glptr node */ |
1340 | debug_printf_parse("%s: input redir\n", __func__); | 1361 | debug_printf_parse("%s: input redir\n", __func__); |
1341 | cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37)); | 1362 | cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37)); |
1342 | cn->a.n = glptr; | 1363 | cn->a.n = glptr; |
1343 | xtc = TC_OPERAND | TC_UOPPRE; | 1364 | expected_tc = TS_OPERAND | TS_UOPPRE; |
1344 | glptr = NULL; | 1365 | glptr = NULL; |
1345 | 1366 | continue; | |
1346 | } else if (tc & (TC_BINOP | TC_UOPPOST)) { | 1367 | } |
1347 | debug_printf_parse("%s: TC_BINOP | TC_UOPPOST tc:%x\n", __func__, tc); | 1368 | if (tc & (TS_BINOP | TC_UOPPOST)) { |
1369 | debug_printf_parse("%s: TS_BINOP | TC_UOPPOST tc:%x\n", __func__, tc); | ||
1348 | /* for binary and postfix-unary operators, jump back over | 1370 | /* for binary and postfix-unary operators, jump back over |
1349 | * previous operators with higher priority */ | 1371 | * previous operators with higher priority */ |
1350 | vn = cn; | 1372 | vn = cn; |
1351 | while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2)) | 1373 | while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2)) |
1352 | || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON)) | 1374 | || ((t_info == vn->info) && t_info == TI_COLON) |
1353 | ) { | 1375 | ) { |
1354 | vn = vn->a.n; | 1376 | vn = vn->a.n; |
1355 | if (!vn->a.n) syntax_error(EMSG_UNEXP_TOKEN); | 1377 | if (!vn->a.n) syntax_error(EMSG_UNEXP_TOKEN); |
1356 | } | 1378 | } |
1357 | if ((t_info & OPCLSMASK) == OC_TERNARY) | 1379 | if (t_info == TI_TERNARY) |
1380 | //TODO: why? | ||
1358 | t_info += P(6); | 1381 | t_info += P(6); |
1359 | cn = vn->a.n->r.n = new_node(t_info); | 1382 | cn = vn->a.n->r.n = new_node(t_info); |
1360 | cn->a.n = vn->a.n; | 1383 | cn->a.n = vn->a.n; |
1361 | if (tc & TC_BINOP) { | 1384 | if (tc & TS_BINOP) { |
1362 | cn->l.n = vn; | 1385 | cn->l.n = vn; |
1363 | xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP; | 1386 | //FIXME: this is the place to detect and reject assignments to non-lvalues. |
1364 | if ((t_info & OPCLSMASK) == OC_PGETLINE) { | 1387 | //Currently we allow "assignments" to consts and temporaries, nonsense like this: |
1388 | // awk 'BEGIN { "qwe" = 1 }' | ||
1389 | // awk 'BEGIN { 7 *= 7 }' | ||
1390 | // awk 'BEGIN { length("qwe") = 1 }' | ||
1391 | // awk 'BEGIN { (1+1) += 3 }' | ||
1392 | expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP; | ||
1393 | if (t_info == TI_PGETLINE) { | ||
1365 | /* it's a pipe */ | 1394 | /* it's a pipe */ |
1366 | next_token(TC_GETLINE); | 1395 | next_token(TC_GETLINE); |
1367 | /* give maximum priority to this pipe */ | 1396 | /* give maximum priority to this pipe */ |
1368 | cn->info &= ~PRIMASK; | 1397 | cn->info &= ~PRIMASK; |
1369 | xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | term_tc; | 1398 | expected_tc = TS_OPERAND | TS_UOPPRE | TS_BINOP | term_tc; |
1370 | } | 1399 | } |
1371 | } else { | 1400 | } else { |
1372 | cn->r.n = vn; | 1401 | cn->r.n = vn; |
1373 | xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | term_tc; | 1402 | expected_tc = TS_OPERAND | TS_UOPPRE | TS_BINOP | term_tc; |
1374 | } | 1403 | } |
1375 | vn->a.n = cn; | 1404 | vn->a.n = cn; |
1405 | continue; | ||
1406 | } | ||
1376 | 1407 | ||
1377 | } else { | 1408 | debug_printf_parse("%s: other, t_info:%x\n", __func__, t_info); |
1378 | debug_printf_parse("%s: other, t_info:%x\n", __func__, t_info); | 1409 | /* for operands and prefix-unary operators, attach them |
1379 | /* for operands and prefix-unary operators, attach them | 1410 | * to last node */ |
1380 | * to last node */ | 1411 | vn = cn; |
1381 | vn = cn; | 1412 | cn = vn->r.n = new_node(t_info); |
1382 | cn = vn->r.n = new_node(t_info); | 1413 | cn->a.n = vn; |
1383 | cn->a.n = vn; | ||
1384 | 1414 | ||
1385 | xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP; | 1415 | expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP; |
1386 | if (t_info == TI_PREINC || t_info == TI_PREDEC) | 1416 | if (t_info == TI_PREINC || t_info == TI_PREDEC) |
1387 | xtc = TC_LVALUE | TC_UOPPRE1; | 1417 | expected_tc = TS_LVALUE | TC_UOPPRE1; |
1388 | if (tc & (TC_OPERAND | TC_REGEXP)) { | ||
1389 | debug_printf_parse("%s: TC_OPERAND | TC_REGEXP\n", __func__); | ||
1390 | xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | term_tc; | ||
1391 | /* one should be very careful with switch on tclass - | ||
1392 | * only simple tclasses should be used! */ | ||
1393 | switch (tc) { | ||
1394 | case TC_VARIABLE: | ||
1395 | case TC_ARRAY: | ||
1396 | debug_printf_parse("%s: TC_VARIABLE | TC_ARRAY\n", __func__); | ||
1397 | cn->info = OC_VAR; | ||
1398 | v = hash_search(ahash, t_string); | ||
1399 | if (v != NULL) { | ||
1400 | cn->info = OC_FNARG; | ||
1401 | cn->l.aidx = v->x.aidx; | ||
1402 | } else { | ||
1403 | cn->l.v = newvar(t_string); | ||
1404 | } | ||
1405 | if (tc & TC_ARRAY) { | ||
1406 | cn->info |= xS; | ||
1407 | cn->r.n = parse_expr(TC_ARRTERM); | ||
1408 | } | ||
1409 | break; | ||
1410 | 1418 | ||
1411 | case TC_NUMBER: | 1419 | if (!(tc & (TS_OPERAND | TC_REGEXP))) |
1412 | case TC_STRING: | 1420 | continue; |
1413 | debug_printf_parse("%s: TC_NUMBER | TC_STRING\n", __func__); | ||
1414 | cn->info = OC_VAR; | ||
1415 | v = cn->l.v = xzalloc(sizeof(var)); | ||
1416 | if (tc & TC_NUMBER) | ||
1417 | setvar_i(v, t_double); | ||
1418 | else { | ||
1419 | setvar_s(v, t_string); | ||
1420 | xtc &= ~TC_UOPPOST; /* "str"++ is not allowed */ | ||
1421 | } | ||
1422 | break; | ||
1423 | 1421 | ||
1424 | case TC_REGEXP: | 1422 | debug_printf_parse("%s: TS_OPERAND | TC_REGEXP\n", __func__); |
1425 | debug_printf_parse("%s: TC_REGEXP\n", __func__); | 1423 | expected_tc = TS_UOPPRE | TC_UOPPOST | TS_BINOP | TS_OPERAND | term_tc; |
1426 | mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2)); | 1424 | /* one should be very careful with switch on tclass - |
1427 | break; | 1425 | * only simple tclasses should be used (TC_xyz, not TS_xyz) */ |
1426 | switch (tc) { | ||
1427 | case TC_VARIABLE: | ||
1428 | case TC_ARRAY: | ||
1429 | debug_printf_parse("%s: TC_VARIABLE | TC_ARRAY\n", __func__); | ||
1430 | cn->info = OC_VAR; | ||
1431 | v = hash_search(ahash, t_string); | ||
1432 | if (v != NULL) { | ||
1433 | cn->info = OC_FNARG; | ||
1434 | cn->l.aidx = v->x.aidx; | ||
1435 | } else { | ||
1436 | cn->l.v = newvar(t_string); | ||
1437 | } | ||
1438 | if (tc & TC_ARRAY) { | ||
1439 | cn->info |= xS; | ||
1440 | cn->r.n = parse_expr(TC_ARRTERM); | ||
1441 | } | ||
1442 | break; | ||
1428 | 1443 | ||
1429 | case TC_FUNCTION: | 1444 | case TC_NUMBER: |
1430 | debug_printf_parse("%s: TC_FUNCTION\n", __func__); | 1445 | case TC_STRING: |
1431 | cn->info = OC_FUNC; | 1446 | debug_printf_parse("%s: TC_NUMBER | TC_STRING\n", __func__); |
1432 | cn->r.f = newfunc(t_string); | 1447 | cn->info = OC_VAR; |
1433 | cn->l.n = condition(); | 1448 | v = cn->l.v = xzalloc(sizeof(var)); |
1434 | break; | 1449 | if (tc & TC_NUMBER) |
1450 | setvar_i(v, t_double); | ||
1451 | else { | ||
1452 | setvar_s(v, t_string); | ||
1453 | expected_tc &= ~TC_UOPPOST; /* "str"++ is not allowed */ | ||
1454 | } | ||
1455 | break; | ||
1435 | 1456 | ||
1436 | case TC_SEQSTART: | 1457 | case TC_REGEXP: |
1437 | debug_printf_parse("%s: TC_SEQSTART\n", __func__); | 1458 | debug_printf_parse("%s: TC_REGEXP\n", __func__); |
1438 | cn = vn->r.n = parse_expr(TC_SEQTERM); | 1459 | mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2)); |
1439 | if (!cn) | 1460 | break; |
1440 | syntax_error("Empty sequence"); | ||
1441 | cn->a.n = vn; | ||
1442 | break; | ||
1443 | 1461 | ||
1444 | case TC_GETLINE: | 1462 | case TC_FUNCTION: |
1445 | debug_printf_parse("%s: TC_GETLINE\n", __func__); | 1463 | debug_printf_parse("%s: TC_FUNCTION\n", __func__); |
1446 | glptr = cn; | 1464 | cn->info = OC_FUNC; |
1447 | xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | term_tc; | 1465 | cn->r.f = newfunc(t_string); |
1448 | break; | 1466 | cn->l.n = parse_expr(TC_RPAREN); |
1467 | break; | ||
1449 | 1468 | ||
1450 | case TC_BUILTIN: | 1469 | case TC_LPAREN: |
1451 | debug_printf_parse("%s: TC_BUILTIN\n", __func__); | 1470 | debug_printf_parse("%s: TC_LPAREN\n", __func__); |
1452 | cn->l.n = condition(); | 1471 | cn = vn->r.n = parse_expr(TC_RPAREN); |
1453 | break; | 1472 | if (!cn) |
1473 | syntax_error("Empty sequence"); | ||
1474 | cn->a.n = vn; | ||
1475 | break; | ||
1454 | 1476 | ||
1455 | case TC_LENGTH: | 1477 | case TC_GETLINE: |
1456 | debug_printf_parse("%s: TC_LENGTH\n", __func__); | 1478 | debug_printf_parse("%s: TC_GETLINE\n", __func__); |
1457 | next_token(TC_SEQSTART /* length(...) */ | 1479 | glptr = cn; |
1458 | | TC_OPTERM /* length; (or newline)*/ | 1480 | expected_tc = TS_OPERAND | TS_UOPPRE | TS_BINOP | term_tc; |
1459 | | TC_GRPTERM /* length } */ | 1481 | break; |
1460 | | TC_BINOPX /* length <op> NUM */ | 1482 | |
1461 | | TC_COMMA /* print length, 1 */ | 1483 | case TC_BUILTIN: |
1462 | ); | 1484 | debug_printf_parse("%s: TC_BUILTIN\n", __func__); |
1463 | rollback_token(); | 1485 | cn->l.n = parse_lrparen_list(); |
1464 | if (t_tclass & TC_SEQSTART) { | 1486 | break; |
1465 | /* It was a "(" token. Handle just like TC_BUILTIN */ | 1487 | |
1466 | cn->l.n = condition(); | 1488 | case TC_LENGTH: |
1467 | } | 1489 | debug_printf_parse("%s: TC_LENGTH\n", __func__); |
1468 | break; | 1490 | tc = next_token(TC_LPAREN /* length(...) */ |
1469 | } | 1491 | | TC_SEMICOL /* length; */ |
1492 | | TC_NEWLINE /* length<newline> */ | ||
1493 | | TC_RBRACE /* length } */ | ||
1494 | | TC_BINOPX /* length <op> NUM */ | ||
1495 | | TC_COMMA /* print length, 1 */ | ||
1496 | ); | ||
1497 | if (tc != TC_LPAREN) | ||
1498 | rollback_token(); | ||
1499 | else { | ||
1500 | /* It was a "(" token. Handle just like TC_BUILTIN */ | ||
1501 | cn->l.n = parse_expr(TC_RPAREN); | ||
1470 | } | 1502 | } |
1503 | break; | ||
1471 | } | 1504 | } |
1472 | } | 1505 | } /* while() */ |
1473 | 1506 | ||
1474 | debug_printf_parse("%s() returns %p\n", __func__, sn.r.n); | 1507 | debug_printf_parse("%s() returns %p\n", __func__, sn.r.n); |
1475 | return sn.r.n; | 1508 | return sn.r.n; |
@@ -1486,7 +1519,7 @@ static node *chain_node(uint32_t info) | |||
1486 | if (seq->programname != g_progname) { | 1519 | if (seq->programname != g_progname) { |
1487 | seq->programname = g_progname; | 1520 | seq->programname = g_progname; |
1488 | n = chain_node(OC_NEWSOURCE); | 1521 | n = chain_node(OC_NEWSOURCE); |
1489 | n->l.new_progname = xstrdup(g_progname); | 1522 | n->l.new_progname = g_progname; |
1490 | } | 1523 | } |
1491 | 1524 | ||
1492 | n = seq->last; | 1525 | n = seq->last; |
@@ -1502,14 +1535,16 @@ static void chain_expr(uint32_t info) | |||
1502 | 1535 | ||
1503 | n = chain_node(info); | 1536 | n = chain_node(info); |
1504 | 1537 | ||
1505 | n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM); | 1538 | n->l.n = parse_expr(TC_SEMICOL | TC_NEWLINE | TC_RBRACE); |
1506 | if ((info & OF_REQUIRED) && !n->l.n) | 1539 | if ((info & OF_REQUIRED) && !n->l.n) |
1507 | syntax_error(EMSG_TOO_FEW_ARGS); | 1540 | syntax_error(EMSG_TOO_FEW_ARGS); |
1508 | 1541 | ||
1509 | if (t_tclass & TC_GRPTERM) | 1542 | if (t_tclass & TC_RBRACE) |
1510 | rollback_token(); | 1543 | rollback_token(); |
1511 | } | 1544 | } |
1512 | 1545 | ||
1546 | static void chain_group(void); | ||
1547 | |||
1513 | static node *chain_loop(node *nn) | 1548 | static node *chain_loop(node *nn) |
1514 | { | 1549 | { |
1515 | node *n, *n2, *save_brk, *save_cont; | 1550 | node *n, *n2, *save_brk, *save_cont; |
@@ -1533,209 +1568,276 @@ static node *chain_loop(node *nn) | |||
1533 | return n; | 1568 | return n; |
1534 | } | 1569 | } |
1535 | 1570 | ||
1571 | static void chain_until_rbrace(void) | ||
1572 | { | ||
1573 | uint32_t tc; | ||
1574 | while ((tc = next_token(TS_GRPSEQ | TC_RBRACE)) != TC_RBRACE) { | ||
1575 | debug_printf_parse("%s: !TC_RBRACE\n", __func__); | ||
1576 | if (tc == TC_NEWLINE) | ||
1577 | continue; | ||
1578 | rollback_token(); | ||
1579 | chain_group(); | ||
1580 | } | ||
1581 | debug_printf_parse("%s: TC_RBRACE\n", __func__); | ||
1582 | } | ||
1583 | |||
1536 | /* parse group and attach it to chain */ | 1584 | /* parse group and attach it to chain */ |
1537 | static void chain_group(void) | 1585 | static void chain_group(void) |
1538 | { | 1586 | { |
1539 | uint32_t c; | 1587 | uint32_t tc; |
1540 | node *n, *n2, *n3; | 1588 | node *n, *n2, *n3; |
1541 | 1589 | ||
1542 | do { | 1590 | do { |
1543 | c = next_token(TC_GRPSEQ); | 1591 | tc = next_token(TS_GRPSEQ); |
1544 | } while (c & TC_NEWLINE); | 1592 | } while (tc == TC_NEWLINE); |
1545 | 1593 | ||
1546 | if (c & TC_GRPSTART) { | 1594 | if (tc == TC_LBRACE) { |
1547 | debug_printf_parse("%s: TC_GRPSTART\n", __func__); | 1595 | debug_printf_parse("%s: TC_LBRACE\n", __func__); |
1548 | while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) { | 1596 | chain_until_rbrace(); |
1549 | debug_printf_parse("%s: !TC_GRPTERM\n", __func__); | 1597 | return; |
1550 | if (t_tclass & TC_NEWLINE) | 1598 | } |
1551 | continue; | 1599 | if (tc & (TS_OPSEQ | TC_SEMICOL | TC_NEWLINE)) { |
1552 | rollback_token(); | 1600 | debug_printf_parse("%s: TS_OPSEQ | TC_SEMICOL | TC_NEWLINE\n", __func__); |
1553 | chain_group(); | ||
1554 | } | ||
1555 | debug_printf_parse("%s: TC_GRPTERM\n", __func__); | ||
1556 | } else if (c & (TC_OPSEQ | TC_OPTERM)) { | ||
1557 | debug_printf_parse("%s: TC_OPSEQ | TC_OPTERM\n", __func__); | ||
1558 | rollback_token(); | 1601 | rollback_token(); |
1559 | chain_expr(OC_EXEC | Vx); | 1602 | chain_expr(OC_EXEC | Vx); |
1560 | } else { | 1603 | return; |
1561 | /* TC_STATEMNT */ | 1604 | } |
1562 | debug_printf_parse("%s: TC_STATEMNT(?)\n", __func__); | 1605 | |
1563 | switch (t_info & OPCLSMASK) { | 1606 | /* TS_STATEMNT */ |
1564 | case ST_IF: | 1607 | debug_printf_parse("%s: TS_STATEMNT(?)\n", __func__); |
1565 | debug_printf_parse("%s: ST_IF\n", __func__); | 1608 | switch (t_info & OPCLSMASK) { |
1566 | n = chain_node(OC_BR | Vx); | 1609 | case ST_IF: |
1567 | n->l.n = condition(); | 1610 | debug_printf_parse("%s: ST_IF\n", __func__); |
1611 | n = chain_node(OC_BR | Vx); | ||
1612 | n->l.n = parse_lrparen_list(); | ||
1613 | chain_group(); | ||
1614 | n2 = chain_node(OC_EXEC); | ||
1615 | n->r.n = seq->last; | ||
1616 | if (next_token(TS_GRPSEQ | TC_RBRACE | TC_ELSE) == TC_ELSE) { | ||
1568 | chain_group(); | 1617 | chain_group(); |
1569 | n2 = chain_node(OC_EXEC); | 1618 | n2->a.n = seq->last; |
1570 | n->r.n = seq->last; | 1619 | } else { |
1571 | if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) { | 1620 | rollback_token(); |
1572 | chain_group(); | 1621 | } |
1573 | n2->a.n = seq->last; | 1622 | break; |
1574 | } else { | ||
1575 | rollback_token(); | ||
1576 | } | ||
1577 | break; | ||
1578 | 1623 | ||
1579 | case ST_WHILE: | 1624 | case ST_WHILE: |
1580 | debug_printf_parse("%s: ST_WHILE\n", __func__); | 1625 | debug_printf_parse("%s: ST_WHILE\n", __func__); |
1581 | n2 = condition(); | 1626 | n2 = parse_lrparen_list(); |
1582 | n = chain_loop(NULL); | 1627 | n = chain_loop(NULL); |
1583 | n->l.n = n2; | 1628 | n->l.n = n2; |
1584 | break; | 1629 | break; |
1585 | 1630 | ||
1586 | case ST_DO: | 1631 | case ST_DO: |
1587 | debug_printf_parse("%s: ST_DO\n", __func__); | 1632 | debug_printf_parse("%s: ST_DO\n", __func__); |
1588 | n2 = chain_node(OC_EXEC); | 1633 | n2 = chain_node(OC_EXEC); |
1589 | n = chain_loop(NULL); | 1634 | n = chain_loop(NULL); |
1590 | n2->a.n = n->a.n; | 1635 | n2->a.n = n->a.n; |
1591 | next_token(TC_WHILE); | 1636 | next_token(TC_WHILE); |
1592 | n->l.n = condition(); | 1637 | n->l.n = parse_lrparen_list(); |
1593 | break; | 1638 | break; |
1594 | 1639 | ||
1595 | case ST_FOR: | 1640 | case ST_FOR: |
1596 | debug_printf_parse("%s: ST_FOR\n", __func__); | 1641 | debug_printf_parse("%s: ST_FOR\n", __func__); |
1597 | next_token(TC_SEQSTART); | 1642 | next_token(TC_LPAREN); |
1598 | n2 = parse_expr(TC_SEMICOL | TC_SEQTERM); | 1643 | n2 = parse_expr(TC_SEMICOL | TC_RPAREN); |
1599 | if (t_tclass & TC_SEQTERM) { /* for-in */ | 1644 | if (t_tclass & TC_RPAREN) { /* for-in */ |
1600 | if (!n2 || (n2->info & OPCLSMASK) != OC_IN) | 1645 | if (!n2 || n2->info != TI_IN) |
1601 | syntax_error(EMSG_UNEXP_TOKEN); | 1646 | syntax_error(EMSG_UNEXP_TOKEN); |
1602 | n = chain_node(OC_WALKINIT | VV); | 1647 | n = chain_node(OC_WALKINIT | VV); |
1603 | n->l.n = n2->l.n; | 1648 | n->l.n = n2->l.n; |
1604 | n->r.n = n2->r.n; | 1649 | n->r.n = n2->r.n; |
1605 | n = chain_loop(NULL); | 1650 | n = chain_loop(NULL); |
1606 | n->info = OC_WALKNEXT | Vx; | 1651 | n->info = OC_WALKNEXT | Vx; |
1607 | n->l.n = n2->l.n; | 1652 | n->l.n = n2->l.n; |
1608 | } else { /* for (;;) */ | 1653 | } else { /* for (;;) */ |
1609 | n = chain_node(OC_EXEC | Vx); | 1654 | n = chain_node(OC_EXEC | Vx); |
1610 | n->l.n = n2; | 1655 | n->l.n = n2; |
1611 | n2 = parse_expr(TC_SEMICOL); | 1656 | n2 = parse_expr(TC_SEMICOL); |
1612 | n3 = parse_expr(TC_SEQTERM); | 1657 | n3 = parse_expr(TC_RPAREN); |
1613 | n = chain_loop(n3); | 1658 | n = chain_loop(n3); |
1614 | n->l.n = n2; | 1659 | n->l.n = n2; |
1615 | if (!n2) | 1660 | if (!n2) |
1616 | n->info = OC_EXEC; | 1661 | n->info = OC_EXEC; |
1617 | } | 1662 | } |
1618 | break; | 1663 | break; |
1619 | 1664 | ||
1620 | case OC_PRINT: | 1665 | case OC_PRINT: |
1621 | case OC_PRINTF: | 1666 | case OC_PRINTF: |
1622 | debug_printf_parse("%s: OC_PRINT[F]\n", __func__); | 1667 | debug_printf_parse("%s: OC_PRINT[F]\n", __func__); |
1623 | n = chain_node(t_info); | 1668 | n = chain_node(t_info); |
1624 | n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM); | 1669 | n->l.n = parse_expr(TC_SEMICOL | TC_NEWLINE | TC_OUTRDR | TC_RBRACE); |
1625 | if (t_tclass & TC_OUTRDR) { | 1670 | if (t_tclass & TC_OUTRDR) { |
1626 | n->info |= t_info; | 1671 | n->info |= t_info; |
1627 | n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM); | 1672 | n->r.n = parse_expr(TC_SEMICOL | TC_NEWLINE | TC_RBRACE); |
1628 | } | 1673 | } |
1629 | if (t_tclass & TC_GRPTERM) | 1674 | if (t_tclass & TC_RBRACE) |
1630 | rollback_token(); | 1675 | rollback_token(); |
1631 | break; | 1676 | break; |
1632 | 1677 | ||
1633 | case OC_BREAK: | 1678 | case OC_BREAK: |
1634 | debug_printf_parse("%s: OC_BREAK\n", __func__); | 1679 | debug_printf_parse("%s: OC_BREAK\n", __func__); |
1635 | n = chain_node(OC_EXEC); | 1680 | n = chain_node(OC_EXEC); |
1636 | n->a.n = break_ptr; | 1681 | n->a.n = break_ptr; |
1637 | chain_expr(t_info); | 1682 | //TODO: if break_ptr is NULL, syntax error (not in the loop)? |
1638 | break; | 1683 | chain_expr(t_info); |
1684 | break; | ||
1639 | 1685 | ||
1640 | case OC_CONTINUE: | 1686 | case OC_CONTINUE: |
1641 | debug_printf_parse("%s: OC_CONTINUE\n", __func__); | 1687 | debug_printf_parse("%s: OC_CONTINUE\n", __func__); |
1642 | n = chain_node(OC_EXEC); | 1688 | n = chain_node(OC_EXEC); |
1643 | n->a.n = continue_ptr; | 1689 | n->a.n = continue_ptr; |
1644 | chain_expr(t_info); | 1690 | //TODO: if continue_ptr is NULL, syntax error (not in the loop)? |
1645 | break; | 1691 | chain_expr(t_info); |
1692 | break; | ||
1646 | 1693 | ||
1647 | /* delete, next, nextfile, return, exit */ | 1694 | /* delete, next, nextfile, return, exit */ |
1648 | default: | 1695 | default: |
1649 | debug_printf_parse("%s: default\n", __func__); | 1696 | debug_printf_parse("%s: default\n", __func__); |
1650 | chain_expr(t_info); | 1697 | chain_expr(t_info); |
1651 | } | ||
1652 | } | 1698 | } |
1653 | } | 1699 | } |
1654 | 1700 | ||
1655 | static void parse_program(char *p) | 1701 | static void parse_program(char *p) |
1656 | { | 1702 | { |
1657 | uint32_t tclass; | ||
1658 | node *cn; | ||
1659 | func *f; | ||
1660 | var *v; | ||
1661 | |||
1662 | debug_printf_parse("%s()\n", __func__); | 1703 | debug_printf_parse("%s()\n", __func__); |
1663 | 1704 | ||
1664 | g_pos = p; | 1705 | g_pos = p; |
1665 | t_lineno = 1; | 1706 | t_lineno = 1; |
1666 | while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART | | 1707 | for (;;) { |
1667 | TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) { | 1708 | uint32_t tclass; |
1709 | |||
1710 | tclass = next_token(TC_EOF | TS_OPSEQ | TC_LBRACE | | ||
1711 | TC_SEMICOL | TC_NEWLINE | TC_BEGIN | TC_END | TC_FUNCDECL); | ||
1668 | 1712 | ||
1669 | if (tclass & TC_OPTERM) { | 1713 | if (tclass == TC_EOF) { |
1670 | debug_printf_parse("%s: TC_OPTERM\n", __func__); | 1714 | debug_printf_parse("%s: TC_EOF\n", __func__); |
1715 | break; | ||
1716 | } | ||
1717 | if (tclass & (TC_SEMICOL | TC_NEWLINE)) { | ||
1718 | debug_printf_parse("%s: TC_SEMICOL | TC_NEWLINE\n", __func__); | ||
1719 | //NB: gawk allows many newlines, but does not allow more than one semicolon: | ||
1720 | // BEGIN {...}<newline>;<newline>; | ||
1721 | //would complain "each rule must have a pattern or an action part". | ||
1722 | //Same message for | ||
1723 | // ; BEGIN {...} | ||
1671 | continue; | 1724 | continue; |
1672 | } | 1725 | } |
1673 | 1726 | if (tclass == TC_BEGIN) { | |
1674 | seq = &mainseq; | ||
1675 | if (tclass & TC_BEGIN) { | ||
1676 | debug_printf_parse("%s: TC_BEGIN\n", __func__); | 1727 | debug_printf_parse("%s: TC_BEGIN\n", __func__); |
1677 | seq = &beginseq; | 1728 | seq = &beginseq; |
1678 | chain_group(); | 1729 | /* ensure there is no newline between BEGIN and { */ |
1679 | } else if (tclass & TC_END) { | 1730 | next_token(TC_LBRACE); |
1731 | chain_until_rbrace(); | ||
1732 | continue; | ||
1733 | } | ||
1734 | if (tclass == TC_END) { | ||
1680 | debug_printf_parse("%s: TC_END\n", __func__); | 1735 | debug_printf_parse("%s: TC_END\n", __func__); |
1681 | seq = &endseq; | 1736 | seq = &endseq; |
1682 | chain_group(); | 1737 | /* ensure there is no newline between END and { */ |
1683 | } else if (tclass & TC_FUNCDECL) { | 1738 | next_token(TC_LBRACE); |
1739 | chain_until_rbrace(); | ||
1740 | continue; | ||
1741 | } | ||
1742 | if (tclass == TC_FUNCDECL) { | ||
1743 | func *f; | ||
1744 | |||
1684 | debug_printf_parse("%s: TC_FUNCDECL\n", __func__); | 1745 | debug_printf_parse("%s: TC_FUNCDECL\n", __func__); |
1685 | next_token(TC_FUNCTION); | 1746 | next_token(TC_FUNCTION); |
1686 | g_pos++; | ||
1687 | f = newfunc(t_string); | 1747 | f = newfunc(t_string); |
1688 | f->body.first = NULL; | 1748 | if (f->defined) |
1689 | f->nargs = 0; | 1749 | syntax_error("Duplicate function"); |
1690 | /* Match func arg list: a comma sep list of >= 0 args, and a close paren */ | 1750 | f->defined = 1; |
1691 | while (next_token(TC_VARIABLE | TC_SEQTERM | TC_COMMA)) { | 1751 | //f->body.first = NULL; - already is |
1692 | /* Either an empty arg list, or trailing comma from prev iter | 1752 | //f->nargs = 0; - already is |
1693 | * must be followed by an arg */ | 1753 | /* func arg list: comma sep list of args, and a close paren */ |
1694 | if (f->nargs == 0 && t_tclass == TC_SEQTERM) | 1754 | for (;;) { |
1695 | break; | 1755 | var *v; |
1696 | 1756 | if (next_token(TC_VARIABLE | TC_RPAREN) == TC_RPAREN) { | |
1697 | /* TC_SEQSTART/TC_COMMA must be followed by TC_VARIABLE */ | 1757 | if (f->nargs == 0) |
1698 | if (t_tclass != TC_VARIABLE) | 1758 | break; /* func() is ok */ |
1759 | /* func(a,) is not ok */ | ||
1699 | syntax_error(EMSG_UNEXP_TOKEN); | 1760 | syntax_error(EMSG_UNEXP_TOKEN); |
1700 | 1761 | } | |
1701 | v = findvar(ahash, t_string); | 1762 | v = findvar(ahash, t_string); |
1702 | v->x.aidx = f->nargs++; | 1763 | v->x.aidx = f->nargs++; |
1703 | |||
1704 | /* Arg followed either by end of arg list or 1 comma */ | 1764 | /* Arg followed either by end of arg list or 1 comma */ |
1705 | if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM) | 1765 | if (next_token(TC_COMMA | TC_RPAREN) == TC_RPAREN) |
1706 | break; | 1766 | break; |
1707 | if (t_tclass != TC_COMMA) | 1767 | /* it was a comma, we ate it */ |
1708 | syntax_error(EMSG_UNEXP_TOKEN); | ||
1709 | } | 1768 | } |
1710 | seq = &f->body; | 1769 | seq = &f->body; |
1711 | chain_group(); | 1770 | /* ensure there is { after "func F(...)" - but newlines are allowed */ |
1712 | clear_array(ahash); | 1771 | while (next_token(TC_LBRACE | TC_NEWLINE) == TC_NEWLINE) |
1713 | } else if (tclass & TC_OPSEQ) { | 1772 | continue; |
1714 | debug_printf_parse("%s: TC_OPSEQ\n", __func__); | 1773 | chain_until_rbrace(); |
1774 | hash_clear(ahash); | ||
1775 | continue; | ||
1776 | } | ||
1777 | seq = &mainseq; | ||
1778 | if (tclass & TS_OPSEQ) { | ||
1779 | node *cn; | ||
1780 | |||
1781 | debug_printf_parse("%s: TS_OPSEQ\n", __func__); | ||
1715 | rollback_token(); | 1782 | rollback_token(); |
1716 | cn = chain_node(OC_TEST); | 1783 | cn = chain_node(OC_TEST); |
1717 | cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART); | 1784 | cn->l.n = parse_expr(TC_SEMICOL | TC_NEWLINE | TC_EOF | TC_LBRACE); |
1718 | if (t_tclass & TC_GRPSTART) { | 1785 | if (t_tclass == TC_LBRACE) { |
1719 | debug_printf_parse("%s: TC_GRPSTART\n", __func__); | 1786 | debug_printf_parse("%s: TC_LBRACE\n", __func__); |
1720 | rollback_token(); | 1787 | rollback_token(); |
1721 | chain_group(); | 1788 | chain_group(); |
1722 | } else { | 1789 | } else { |
1723 | debug_printf_parse("%s: !TC_GRPSTART\n", __func__); | 1790 | /* no action, assume default "{ print }" */ |
1791 | debug_printf_parse("%s: !TC_LBRACE\n", __func__); | ||
1724 | chain_node(OC_PRINT); | 1792 | chain_node(OC_PRINT); |
1725 | } | 1793 | } |
1726 | cn->r.n = mainseq.last; | 1794 | cn->r.n = mainseq.last; |
1727 | } else /* if (tclass & TC_GRPSTART) */ { | 1795 | continue; |
1728 | debug_printf_parse("%s: TC_GRPSTART(?)\n", __func__); | ||
1729 | rollback_token(); | ||
1730 | chain_group(); | ||
1731 | } | 1796 | } |
1797 | /* tclass == TC_LBRACE */ | ||
1798 | debug_printf_parse("%s: TC_LBRACE(?)\n", __func__); | ||
1799 | chain_until_rbrace(); | ||
1732 | } | 1800 | } |
1733 | debug_printf_parse("%s: TC_EOF\n", __func__); | ||
1734 | } | 1801 | } |
1735 | 1802 | ||
1736 | 1803 | ||
1737 | /* -------- program execution part -------- */ | 1804 | /* -------- program execution part -------- */ |
1738 | 1805 | ||
1806 | /* temporary variables allocator */ | ||
1807 | static var *nvalloc(int sz) | ||
1808 | { | ||
1809 | return xzalloc(sz * sizeof(var)); | ||
1810 | } | ||
1811 | |||
1812 | static void nvfree(var *v, int sz) | ||
1813 | { | ||
1814 | var *p = v; | ||
1815 | |||
1816 | while (--sz >= 0) { | ||
1817 | if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) { | ||
1818 | clear_array(iamarray(p)); | ||
1819 | free(p->x.array->items); | ||
1820 | free(p->x.array); | ||
1821 | } | ||
1822 | if (p->type & VF_WALK) { | ||
1823 | walker_list *n; | ||
1824 | walker_list *w = p->x.walker; | ||
1825 | debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker); | ||
1826 | p->x.walker = NULL; | ||
1827 | while (w) { | ||
1828 | n = w->prev; | ||
1829 | debug_printf_walker(" free(%p)\n", w); | ||
1830 | free(w); | ||
1831 | w = n; | ||
1832 | } | ||
1833 | } | ||
1834 | clrvar(p); | ||
1835 | p++; | ||
1836 | } | ||
1837 | |||
1838 | free(v); | ||
1839 | } | ||
1840 | |||
1739 | static node *mk_splitter(const char *s, tsplitter *spl) | 1841 | static node *mk_splitter(const char *s, tsplitter *spl) |
1740 | { | 1842 | { |
1741 | regex_t *re, *ire; | 1843 | regex_t *re, *ire; |
@@ -1744,7 +1846,7 @@ static node *mk_splitter(const char *s, tsplitter *spl) | |||
1744 | re = &spl->re[0]; | 1846 | re = &spl->re[0]; |
1745 | ire = &spl->re[1]; | 1847 | ire = &spl->re[1]; |
1746 | n = &spl->n; | 1848 | n = &spl->n; |
1747 | if ((n->info & OPCLSMASK) == OC_REGEXP) { | 1849 | if (n->info == TI_REGEXP) { |
1748 | regfree(re); | 1850 | regfree(re); |
1749 | regfree(ire); // TODO: nuke ire, use re+1? | 1851 | regfree(ire); // TODO: nuke ire, use re+1? |
1750 | } | 1852 | } |
@@ -1757,21 +1859,28 @@ static node *mk_splitter(const char *s, tsplitter *spl) | |||
1757 | return n; | 1859 | return n; |
1758 | } | 1860 | } |
1759 | 1861 | ||
1760 | /* use node as a regular expression. Supplied with node ptr and regex_t | 1862 | static var *evaluate(node *, var *); |
1863 | |||
1864 | /* Use node as a regular expression. Supplied with node ptr and regex_t | ||
1761 | * storage space. Return ptr to regex (if result points to preg, it should | 1865 | * storage space. Return ptr to regex (if result points to preg, it should |
1762 | * be later regfree'd manually | 1866 | * be later regfree'd manually). |
1763 | */ | 1867 | */ |
1764 | static regex_t *as_regex(node *op, regex_t *preg) | 1868 | static regex_t *as_regex(node *op, regex_t *preg) |
1765 | { | 1869 | { |
1766 | int cflags; | 1870 | int cflags; |
1767 | var *v; | ||
1768 | const char *s; | 1871 | const char *s; |
1769 | 1872 | ||
1770 | if ((op->info & OPCLSMASK) == OC_REGEXP) { | 1873 | if (op->info == TI_REGEXP) { |
1771 | return icase ? op->r.ire : op->l.re; | 1874 | return icase ? op->r.ire : op->l.re; |
1772 | } | 1875 | } |
1773 | v = nvalloc(1); | 1876 | |
1774 | s = getvar_s(evaluate(op, v)); | 1877 | //tmpvar = nvalloc(1); |
1878 | #define TMPVAR (&G.as_regex__tmpvar) | ||
1879 | // We use a single "static" tmpvar (instead of on-stack or malloced one) | ||
1880 | // to decrease memory consumption in deeply-recursive awk programs. | ||
1881 | // The rule to work safely is to never call evaluate() while our static | ||
1882 | // TMPVAR's value is still needed. | ||
1883 | s = getvar_s(evaluate(op, TMPVAR)); | ||
1775 | 1884 | ||
1776 | cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED; | 1885 | cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED; |
1777 | /* Testcase where REG_EXTENDED fails (unpaired '{'): | 1886 | /* Testcase where REG_EXTENDED fails (unpaired '{'): |
@@ -1783,7 +1892,8 @@ static regex_t *as_regex(node *op, regex_t *preg) | |||
1783 | cflags &= ~REG_EXTENDED; | 1892 | cflags &= ~REG_EXTENDED; |
1784 | xregcomp(preg, s, cflags); | 1893 | xregcomp(preg, s, cflags); |
1785 | } | 1894 | } |
1786 | nvfree(v); | 1895 | //nvfree(tmpvar, 1); |
1896 | #undef TMPVAR | ||
1787 | return preg; | 1897 | return preg; |
1788 | } | 1898 | } |
1789 | 1899 | ||
@@ -1870,13 +1980,13 @@ static int awk_split(const char *s, node *spl, char **slist) | |||
1870 | c[2] = '\n'; | 1980 | c[2] = '\n'; |
1871 | 1981 | ||
1872 | n = 0; | 1982 | n = 0; |
1873 | if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */ | 1983 | if (spl->info == TI_REGEXP) { /* regex split */ |
1874 | if (!*s) | 1984 | if (!*s) |
1875 | return n; /* "": zero fields */ | 1985 | return n; /* "": zero fields */ |
1876 | n++; /* at least one field will be there */ | 1986 | n++; /* at least one field will be there */ |
1877 | do { | 1987 | do { |
1878 | int l; | 1988 | int l; |
1879 | regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough... | 1989 | regmatch_t pmatch[1]; |
1880 | 1990 | ||
1881 | l = strcspn(s, c+2); /* len till next NUL or \n */ | 1991 | l = strcspn(s, c+2); /* len till next NUL or \n */ |
1882 | if (regexec1_nonempty(icase ? spl->r.ire : spl->l.re, s, pmatch) == 0 | 1992 | if (regexec1_nonempty(icase ? spl->r.ire : spl->l.re, s, pmatch) == 0 |
@@ -2037,7 +2147,7 @@ static node *nextarg(node **pn) | |||
2037 | node *n; | 2147 | node *n; |
2038 | 2148 | ||
2039 | n = *pn; | 2149 | n = *pn; |
2040 | if (n && (n->info & OPCLSMASK) == OC_COMMA) { | 2150 | if (n && n->info == TI_COMMA) { |
2041 | *pn = n->r.n; | 2151 | *pn = n->r.n; |
2042 | n = n->l.n; | 2152 | n = n->l.n; |
2043 | } else { | 2153 | } else { |
@@ -2068,8 +2178,7 @@ static void hashwalk_init(var *v, xhash *array) | |||
2068 | for (i = 0; i < array->csize; i++) { | 2178 | for (i = 0; i < array->csize; i++) { |
2069 | hi = array->items[i]; | 2179 | hi = array->items[i]; |
2070 | while (hi) { | 2180 | while (hi) { |
2071 | strcpy(w->end, hi->name); | 2181 | w->end = stpcpy(w->end, hi->name) + 1; |
2072 | nextword(&w->end); | ||
2073 | hi = hi->next; | 2182 | hi = hi->next; |
2074 | } | 2183 | } |
2075 | } | 2184 | } |
@@ -2095,8 +2204,11 @@ static int hashwalk_next(var *v) | |||
2095 | /* evaluate node, return 1 when result is true, 0 otherwise */ | 2204 | /* evaluate node, return 1 when result is true, 0 otherwise */ |
2096 | static int ptest(node *pattern) | 2205 | static int ptest(node *pattern) |
2097 | { | 2206 | { |
2098 | /* ptest__v is "static": to save stack space? */ | 2207 | // We use a single "static" tmpvar (instead of on-stack or malloced one) |
2099 | return istrue(evaluate(pattern, &G.ptest__v)); | 2208 | // to decrease memory consumption in deeply-recursive awk programs. |
2209 | // The rule to work safely is to never call evaluate() while our static | ||
2210 | // TMPVAR's value is still needed. | ||
2211 | return istrue(evaluate(pattern, &G.ptest__tmpvar)); | ||
2100 | } | 2212 | } |
2101 | 2213 | ||
2102 | #if ENABLE_PLATFORM_MINGW32 | 2214 | #if ENABLE_PLATFORM_MINGW32 |
@@ -2118,7 +2230,7 @@ static ssize_t FAST_FUNC safe_read_strip_cr(int fd, void *buf, size_t count) | |||
2118 | static int awk_getline(rstream *rsm, var *v) | 2230 | static int awk_getline(rstream *rsm, var *v) |
2119 | { | 2231 | { |
2120 | char *b; | 2232 | char *b; |
2121 | regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough... | 2233 | regmatch_t pmatch[1]; |
2122 | int size, a, p, pp = 0; | 2234 | int size, a, p, pp = 0; |
2123 | int fd, so, eo, r, rp; | 2235 | int fd, so, eo, r, rp; |
2124 | char c, *m, *s; | 2236 | char c, *m, *s; |
@@ -2144,7 +2256,7 @@ static int awk_getline(rstream *rsm, var *v) | |||
2144 | so = eo = p; | 2256 | so = eo = p; |
2145 | r = 1; | 2257 | r = 1; |
2146 | if (p > 0) { | 2258 | if (p > 0) { |
2147 | if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) { | 2259 | if (rsplitter.n.info == TI_REGEXP) { |
2148 | if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re, | 2260 | if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re, |
2149 | b, 1, pmatch, 0) == 0) { | 2261 | b, 1, pmatch, 0) == 0) { |
2150 | so = pmatch[0].rm_so; | 2262 | so = pmatch[0].rm_so; |
@@ -2216,27 +2328,6 @@ static int awk_getline(rstream *rsm, var *v) | |||
2216 | return r; | 2328 | return r; |
2217 | } | 2329 | } |
2218 | 2330 | ||
2219 | static int fmt_num(char *b, int size, const char *format, double n, int int_as_int) | ||
2220 | { | ||
2221 | int r = 0; | ||
2222 | char c; | ||
2223 | const char *s = format; | ||
2224 | |||
2225 | if (int_as_int && n == (long long)n) { | ||
2226 | r = snprintf(b, size, "%"LL_FMT"d", (long long)n); | ||
2227 | } else { | ||
2228 | do { c = *s; } while (c && *++s); | ||
2229 | if (strchr("diouxX", c)) { | ||
2230 | r = snprintf(b, size, format, (int)n); | ||
2231 | } else if (strchr("eEfgG", c)) { | ||
2232 | r = snprintf(b, size, format, n); | ||
2233 | } else { | ||
2234 | syntax_error(EMSG_INV_FMT); | ||
2235 | } | ||
2236 | } | ||
2237 | return r; | ||
2238 | } | ||
2239 | |||
2240 | /* formatted output into an allocated buffer, return ptr to buffer */ | 2331 | /* formatted output into an allocated buffer, return ptr to buffer */ |
2241 | #if !ENABLE_FEATURE_AWK_GNU_EXTENSIONS | 2332 | #if !ENABLE_FEATURE_AWK_GNU_EXTENSIONS |
2242 | # define awk_printf(a, b) awk_printf(a) | 2333 | # define awk_printf(a, b) awk_printf(a) |
@@ -2248,10 +2339,18 @@ static char *awk_printf(node *n, int *len) | |||
2248 | const char *s1; | 2339 | const char *s1; |
2249 | int i, j, incr, bsize; | 2340 | int i, j, incr, bsize; |
2250 | char c, c1; | 2341 | char c, c1; |
2251 | var *v, *arg; | 2342 | var *arg; |
2252 | 2343 | ||
2253 | v = nvalloc(1); | 2344 | //tmpvar = nvalloc(1); |
2254 | fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v))); | 2345 | #define TMPVAR (&G.awk_printf__tmpvar) |
2346 | // We use a single "static" tmpvar (instead of on-stack or malloced one) | ||
2347 | // to decrease memory consumption in deeply-recursive awk programs. | ||
2348 | // The rule to work safely is to never call evaluate() while our static | ||
2349 | // TMPVAR's value is still needed. | ||
2350 | fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), TMPVAR))); | ||
2351 | // ^^^^^^^^^ here we immediately strdup() the value, so the later call | ||
2352 | // to evaluate() potentially recursing into another awk_printf() can't | ||
2353 | // mangle the value. | ||
2255 | 2354 | ||
2256 | i = 0; | 2355 | i = 0; |
2257 | while (*f) { | 2356 | while (*f) { |
@@ -2271,7 +2370,7 @@ static char *awk_printf(node *n, int *len) | |||
2271 | f++; | 2370 | f++; |
2272 | c1 = *f; | 2371 | c1 = *f; |
2273 | *f = '\0'; | 2372 | *f = '\0'; |
2274 | arg = evaluate(nextarg(&n), v); | 2373 | arg = evaluate(nextarg(&n), TMPVAR); |
2275 | 2374 | ||
2276 | j = i; | 2375 | j = i; |
2277 | if (c == 'c' || !c) { | 2376 | if (c == 'c' || !c) { |
@@ -2292,7 +2391,9 @@ static char *awk_printf(node *n, int *len) | |||
2292 | } | 2391 | } |
2293 | 2392 | ||
2294 | free(fmt); | 2393 | free(fmt); |
2295 | nvfree(v); | 2394 | //nvfree(tmpvar, 1); |
2395 | #undef TMPVAR | ||
2396 | |||
2296 | b = xrealloc(b, i + 1); | 2397 | b = xrealloc(b, i + 1); |
2297 | b[i] = '\0'; | 2398 | b[i] = '\0'; |
2298 | #if ENABLE_FEATURE_AWK_GNU_EXTENSIONS | 2399 | #if ENABLE_FEATURE_AWK_GNU_EXTENSIONS |
@@ -2428,23 +2529,48 @@ static NOINLINE int do_mktime(const char *ds) | |||
2428 | return mktime(&then); | 2529 | return mktime(&then); |
2429 | } | 2530 | } |
2430 | 2531 | ||
2532 | /* Reduce stack usage in exec_builtin() by keeping match() code separate */ | ||
2533 | static NOINLINE var *do_match(node *an1, const char *as0) | ||
2534 | { | ||
2535 | regmatch_t pmatch[1]; | ||
2536 | regex_t sreg, *re; | ||
2537 | int n, start, len; | ||
2538 | |||
2539 | re = as_regex(an1, &sreg); | ||
2540 | n = regexec(re, as0, 1, pmatch, 0); | ||
2541 | if (re == &sreg) | ||
2542 | regfree(re); | ||
2543 | start = 0; | ||
2544 | len = -1; | ||
2545 | if (n == 0) { | ||
2546 | start = pmatch[0].rm_so + 1; | ||
2547 | len = pmatch[0].rm_eo - pmatch[0].rm_so; | ||
2548 | } | ||
2549 | setvar_i(newvar("RLENGTH"), len); | ||
2550 | return setvar_i(newvar("RSTART"), start); | ||
2551 | } | ||
2552 | |||
2553 | /* Reduce stack usage in evaluate() by keeping builtins' code separate */ | ||
2431 | static NOINLINE var *exec_builtin(node *op, var *res) | 2554 | static NOINLINE var *exec_builtin(node *op, var *res) |
2432 | { | 2555 | { |
2433 | #define tspl (G.exec_builtin__tspl) | 2556 | #define tspl (G.exec_builtin__tspl) |
2434 | 2557 | ||
2435 | var *tv; | 2558 | var *tmpvars; |
2436 | node *an[4]; | 2559 | node *an[4]; |
2437 | var *av[4]; | 2560 | var *av[4]; |
2438 | const char *as[4]; | 2561 | const char *as[4]; |
2439 | regmatch_t pmatch[2]; | ||
2440 | regex_t sreg, *re; | ||
2441 | node *spl; | 2562 | node *spl; |
2442 | uint32_t isr, info; | 2563 | uint32_t isr, info; |
2443 | int nargs; | 2564 | int nargs; |
2444 | time_t tt; | 2565 | time_t tt; |
2445 | int i, l, ll, n; | 2566 | int i, l, ll, n; |
2446 | 2567 | ||
2447 | tv = nvalloc(4); | 2568 | tmpvars = nvalloc(4); |
2569 | #define TMPVAR0 (tmpvars) | ||
2570 | #define TMPVAR1 (tmpvars + 1) | ||
2571 | #define TMPVAR2 (tmpvars + 2) | ||
2572 | #define TMPVAR3 (tmpvars + 3) | ||
2573 | #define TMPVAR(i) (tmpvars + (i)) | ||
2448 | isr = info = op->info; | 2574 | isr = info = op->info; |
2449 | op = op->l.n; | 2575 | op = op->l.n; |
2450 | 2576 | ||
@@ -2452,7 +2578,7 @@ static NOINLINE var *exec_builtin(node *op, var *res) | |||
2452 | for (i = 0; i < 4 && op; i++) { | 2578 | for (i = 0; i < 4 && op; i++) { |
2453 | an[i] = nextarg(&op); | 2579 | an[i] = nextarg(&op); |
2454 | if (isr & 0x09000000) | 2580 | if (isr & 0x09000000) |
2455 | av[i] = evaluate(an[i], &tv[i]); | 2581 | av[i] = evaluate(an[i], TMPVAR(i)); |
2456 | if (isr & 0x08000000) | 2582 | if (isr & 0x08000000) |
2457 | as[i] = getvar_s(av[i]); | 2583 | as[i] = getvar_s(av[i]); |
2458 | isr >>= 1; | 2584 | isr >>= 1; |
@@ -2476,8 +2602,8 @@ static NOINLINE var *exec_builtin(node *op, var *res) | |||
2476 | char *s, *s1; | 2602 | char *s, *s1; |
2477 | 2603 | ||
2478 | if (nargs > 2) { | 2604 | if (nargs > 2) { |
2479 | spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ? | 2605 | spl = (an[2]->info == TI_REGEXP) ? an[2] |
2480 | an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl); | 2606 | : mk_splitter(getvar_s(evaluate(an[2], TMPVAR2)), &tspl); |
2481 | } else { | 2607 | } else { |
2482 | spl = &fsplitter.n; | 2608 | spl = &fsplitter.n; |
2483 | } | 2609 | } |
@@ -2591,20 +2717,7 @@ static NOINLINE var *exec_builtin(node *op, var *res) | |||
2591 | break; | 2717 | break; |
2592 | 2718 | ||
2593 | case B_ma: | 2719 | case B_ma: |
2594 | re = as_regex(an[1], &sreg); | 2720 | res = do_match(an[1], as[0]); |
2595 | n = regexec(re, as[0], 1, pmatch, 0); | ||
2596 | if (n == 0) { | ||
2597 | pmatch[0].rm_so++; | ||
2598 | pmatch[0].rm_eo++; | ||
2599 | } else { | ||
2600 | pmatch[0].rm_so = 0; | ||
2601 | pmatch[0].rm_eo = -1; | ||
2602 | } | ||
2603 | setvar_i(newvar("RSTART"), pmatch[0].rm_so); | ||
2604 | setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so); | ||
2605 | setvar_i(res, pmatch[0].rm_so); | ||
2606 | if (re == &sreg) | ||
2607 | regfree(re); | ||
2608 | break; | 2721 | break; |
2609 | 2722 | ||
2610 | case B_ge: | 2723 | case B_ge: |
@@ -2620,14 +2733,79 @@ static NOINLINE var *exec_builtin(node *op, var *res) | |||
2620 | break; | 2733 | break; |
2621 | } | 2734 | } |
2622 | 2735 | ||
2623 | nvfree(tv); | 2736 | nvfree(tmpvars, 4); |
2737 | #undef TMPVAR0 | ||
2738 | #undef TMPVAR1 | ||
2739 | #undef TMPVAR2 | ||
2740 | #undef TMPVAR3 | ||
2741 | #undef TMPVAR | ||
2742 | |||
2624 | return res; | 2743 | return res; |
2625 | #undef tspl | 2744 | #undef tspl |
2626 | } | 2745 | } |
2627 | 2746 | ||
2747 | /* if expr looks like "var=value", perform assignment and return 1, | ||
2748 | * otherwise return 0 */ | ||
2749 | static int is_assignment(const char *expr) | ||
2750 | { | ||
2751 | char *exprc, *val; | ||
2752 | |||
2753 | val = (char*)endofname(expr); | ||
2754 | if (val == (char*)expr || *val != '=') { | ||
2755 | return FALSE; | ||
2756 | } | ||
2757 | |||
2758 | exprc = xstrdup(expr); | ||
2759 | val = exprc + (val - expr); | ||
2760 | *val++ = '\0'; | ||
2761 | |||
2762 | unescape_string_in_place(val); | ||
2763 | setvar_u(newvar(exprc), val); | ||
2764 | free(exprc); | ||
2765 | return TRUE; | ||
2766 | } | ||
2767 | |||
2768 | /* switch to next input file */ | ||
2769 | static rstream *next_input_file(void) | ||
2770 | { | ||
2771 | #define rsm (G.next_input_file__rsm) | ||
2772 | #define files_happen (G.next_input_file__files_happen) | ||
2773 | |||
2774 | const char *fname, *ind; | ||
2775 | |||
2776 | if (rsm.F) | ||
2777 | fclose(rsm.F); | ||
2778 | rsm.F = NULL; | ||
2779 | rsm.pos = rsm.adv = 0; | ||
2780 | |||
2781 | for (;;) { | ||
2782 | if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) { | ||
2783 | if (files_happen) | ||
2784 | return NULL; | ||
2785 | fname = "-"; | ||
2786 | rsm.F = stdin; | ||
2787 | break; | ||
2788 | } | ||
2789 | ind = getvar_s(incvar(intvar[ARGIND])); | ||
2790 | fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind)); | ||
2791 | if (fname && *fname && !is_assignment(fname)) { | ||
2792 | rsm.F = xfopen_stdin(fname); | ||
2793 | break; | ||
2794 | } | ||
2795 | } | ||
2796 | |||
2797 | files_happen = TRUE; | ||
2798 | setvar_s(intvar[FILENAME], fname); | ||
2799 | return &rsm; | ||
2800 | #undef rsm | ||
2801 | #undef files_happen | ||
2802 | } | ||
2803 | |||
2628 | /* | 2804 | /* |
2629 | * Evaluate node - the heart of the program. Supplied with subtree | 2805 | * Evaluate node - the heart of the program. Supplied with subtree |
2630 | * and place where to store result. returns ptr to result. | 2806 | * and "res" variable to assign the result to if we evaluate an expression. |
2807 | * If node refers to e.g. a variable or a field, no assignment happens. | ||
2808 | * Return ptr to the result (which may or may not be the "res" variable!) | ||
2631 | */ | 2809 | */ |
2632 | #define XC(n) ((n) >> 8) | 2810 | #define XC(n) ((n) >> 8) |
2633 | 2811 | ||
@@ -2639,14 +2817,16 @@ static var *evaluate(node *op, var *res) | |||
2639 | #define seed (G.evaluate__seed) | 2817 | #define seed (G.evaluate__seed) |
2640 | #define sreg (G.evaluate__sreg) | 2818 | #define sreg (G.evaluate__sreg) |
2641 | 2819 | ||
2642 | var *v1; | 2820 | var *tmpvars; |
2643 | 2821 | ||
2644 | if (!op) | 2822 | if (!op) |
2645 | return setvar_s(res, NULL); | 2823 | return setvar_s(res, NULL); |
2646 | 2824 | ||
2647 | debug_printf_eval("entered %s()\n", __func__); | 2825 | debug_printf_eval("entered %s()\n", __func__); |
2648 | 2826 | ||
2649 | v1 = nvalloc(2); | 2827 | tmpvars = nvalloc(2); |
2828 | #define TMPVAR0 (tmpvars) | ||
2829 | #define TMPVAR1 (tmpvars + 1) | ||
2650 | 2830 | ||
2651 | while (op) { | 2831 | while (op) { |
2652 | struct { | 2832 | struct { |
@@ -2668,42 +2848,19 @@ static var *evaluate(node *op, var *res) | |||
2668 | op1 = op->l.n; | 2848 | op1 = op->l.n; |
2669 | debug_printf_eval("opinfo:%08x opn:%08x\n", opinfo, opn); | 2849 | debug_printf_eval("opinfo:%08x opn:%08x\n", opinfo, opn); |
2670 | 2850 | ||
2671 | /* "delete" is special: | 2851 | /* execute inevitable things */ |
2672 | * "delete array[var--]" must evaluate index expr only once, | 2852 | if (opinfo & OF_RES1) { |
2673 | * must not evaluate it in "execute inevitable things" part. | 2853 | if ((opinfo & OF_REQUIRED) && !op1) |
2674 | */ | 2854 | syntax_error(EMSG_TOO_FEW_ARGS); |
2675 | if (XC(opinfo & OPCLSMASK) == XC(OC_DELETE)) { | 2855 | L.v = evaluate(op1, TMPVAR0); |
2676 | uint32_t info = op1->info & OPCLSMASK; | 2856 | if (opinfo & OF_STR1) { |
2677 | var *v; | 2857 | L.s = getvar_s(L.v); |
2678 | 2858 | debug_printf_eval("L.s:'%s'\n", L.s); | |
2679 | debug_printf_eval("DELETE\n"); | ||
2680 | if (info == OC_VAR) { | ||
2681 | v = op1->l.v; | ||
2682 | } else if (info == OC_FNARG) { | ||
2683 | v = &fnargs[op1->l.aidx]; | ||
2684 | } else { | ||
2685 | syntax_error(EMSG_NOT_ARRAY); | ||
2686 | } | 2859 | } |
2687 | if (op1->r.n) { /* array ref? */ | 2860 | if (opinfo & OF_NUM1) { |
2688 | const char *s; | 2861 | L_d = getvar_i(L.v); |
2689 | s = getvar_s(evaluate(op1->r.n, v1)); | 2862 | debug_printf_eval("L_d:%f\n", L_d); |
2690 | hash_remove(iamarray(v), s); | ||
2691 | } else { | ||
2692 | clear_array(iamarray(v)); | ||
2693 | } | 2863 | } |
2694 | goto next; | ||
2695 | } | ||
2696 | |||
2697 | /* execute inevitable things */ | ||
2698 | if (opinfo & OF_RES1) | ||
2699 | L.v = evaluate(op1, v1); | ||
2700 | if (opinfo & OF_STR1) { | ||
2701 | L.s = getvar_s(L.v); | ||
2702 | debug_printf_eval("L.s:'%s'\n", L.s); | ||
2703 | } | ||
2704 | if (opinfo & OF_NUM1) { | ||
2705 | L_d = getvar_i(L.v); | ||
2706 | debug_printf_eval("L_d:%f\n", L_d); | ||
2707 | } | 2864 | } |
2708 | /* NB: Must get string/numeric values of L (done above) | 2865 | /* NB: Must get string/numeric values of L (done above) |
2709 | * _before_ evaluate()'ing R.v: if both L and R are $NNNs, | 2866 | * _before_ evaluate()'ing R.v: if both L and R are $NNNs, |
@@ -2713,13 +2870,13 @@ static var *evaluate(node *op, var *res) | |||
2713 | * (Seen trying to evaluate "$444 $44444") | 2870 | * (Seen trying to evaluate "$444 $44444") |
2714 | */ | 2871 | */ |
2715 | if (opinfo & OF_RES2) { | 2872 | if (opinfo & OF_RES2) { |
2716 | R.v = evaluate(op->r.n, v1+1); | 2873 | R.v = evaluate(op->r.n, TMPVAR1); |
2717 | //TODO: L.v may be invalid now, set L.v to NULL to catch bugs? | 2874 | //TODO: L.v may be invalid now, set L.v to NULL to catch bugs? |
2718 | //L.v = NULL; | 2875 | //L.v = NULL; |
2719 | } | 2876 | if (opinfo & OF_STR2) { |
2720 | if (opinfo & OF_STR2) { | 2877 | R.s = getvar_s(R.v); |
2721 | R.s = getvar_s(R.v); | 2878 | debug_printf_eval("R.s:'%s'\n", R.s); |
2722 | debug_printf_eval("R.s:'%s'\n", R.s); | 2879 | } |
2723 | } | 2880 | } |
2724 | 2881 | ||
2725 | debug_printf_eval("switch(0x%x)\n", XC(opinfo & OPCLSMASK)); | 2882 | debug_printf_eval("switch(0x%x)\n", XC(opinfo & OPCLSMASK)); |
@@ -2730,7 +2887,7 @@ static var *evaluate(node *op, var *res) | |||
2730 | /* test pattern */ | 2887 | /* test pattern */ |
2731 | case XC( OC_TEST ): | 2888 | case XC( OC_TEST ): |
2732 | debug_printf_eval("TEST\n"); | 2889 | debug_printf_eval("TEST\n"); |
2733 | if ((op1->info & OPCLSMASK) == OC_COMMA) { | 2890 | if (op1->info == TI_COMMA) { |
2734 | /* it's range pattern */ | 2891 | /* it's range pattern */ |
2735 | if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) { | 2892 | if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) { |
2736 | op->info |= OF_CHECKED; | 2893 | op->info |= OF_CHECKED; |
@@ -2791,12 +2948,12 @@ static var *evaluate(node *op, var *res) | |||
2791 | F = rsm->F; | 2948 | F = rsm->F; |
2792 | } | 2949 | } |
2793 | 2950 | ||
2794 | if ((opinfo & OPCLSMASK) == OC_PRINT) { | 2951 | if (opinfo == TI_PRINT) { |
2795 | if (!op1) { | 2952 | if (!op1) { |
2796 | fputs(getvar_s(intvar[F0]), F); | 2953 | fputs(getvar_s(intvar[F0]), F); |
2797 | } else { | 2954 | } else { |
2798 | while (op1) { | 2955 | for (;;) { |
2799 | var *v = evaluate(nextarg(&op1), v1); | 2956 | var *v = evaluate(nextarg(&op1), TMPVAR0); |
2800 | if (v->type & VF_NUMBER) { | 2957 | if (v->type & VF_NUMBER) { |
2801 | fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]), | 2958 | fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]), |
2802 | getvar_i(v), TRUE); | 2959 | getvar_i(v), TRUE); |
@@ -2804,14 +2961,13 @@ static var *evaluate(node *op, var *res) | |||
2804 | } else { | 2961 | } else { |
2805 | fputs(getvar_s(v), F); | 2962 | fputs(getvar_s(v), F); |
2806 | } | 2963 | } |
2807 | 2964 | if (!op1) | |
2808 | if (op1) | 2965 | break; |
2809 | fputs(getvar_s(intvar[OFS]), F); | 2966 | fputs(getvar_s(intvar[OFS]), F); |
2810 | } | 2967 | } |
2811 | } | 2968 | } |
2812 | fputs(getvar_s(intvar[ORS]), F); | 2969 | fputs(getvar_s(intvar[ORS]), F); |
2813 | 2970 | } else { /* PRINTF */ | |
2814 | } else { /* OC_PRINTF */ | ||
2815 | char *s = awk_printf(op1, &len); | 2971 | char *s = awk_printf(op1, &len); |
2816 | #if ENABLE_FEATURE_AWK_GNU_EXTENSIONS | 2972 | #if ENABLE_FEATURE_AWK_GNU_EXTENSIONS |
2817 | fwrite(s, len, 1, F); | 2973 | fwrite(s, len, 1, F); |
@@ -2824,7 +2980,31 @@ static var *evaluate(node *op, var *res) | |||
2824 | break; | 2980 | break; |
2825 | } | 2981 | } |
2826 | 2982 | ||
2827 | /* case XC( OC_DELETE ): - moved to happen before arg evaluation */ | 2983 | case XC( OC_DELETE ): |
2984 | debug_printf_eval("DELETE\n"); | ||
2985 | { | ||
2986 | /* "delete" is special: | ||
2987 | * "delete array[var--]" must evaluate index expr only once. | ||
2988 | */ | ||
2989 | uint32_t info = op1->info & OPCLSMASK; | ||
2990 | var *v; | ||
2991 | |||
2992 | if (info == OC_VAR) { | ||
2993 | v = op1->l.v; | ||
2994 | } else if (info == OC_FNARG) { | ||
2995 | v = &fnargs[op1->l.aidx]; | ||
2996 | } else { | ||
2997 | syntax_error(EMSG_NOT_ARRAY); | ||
2998 | } | ||
2999 | if (op1->r.n) { /* array ref? */ | ||
3000 | const char *s; | ||
3001 | s = getvar_s(evaluate(op1->r.n, TMPVAR0)); | ||
3002 | hash_remove(iamarray(v), s); | ||
3003 | } else { | ||
3004 | clear_array(iamarray(v)); | ||
3005 | } | ||
3006 | break; | ||
3007 | } | ||
2828 | 3008 | ||
2829 | case XC( OC_NEWSOURCE ): | 3009 | case XC( OC_NEWSOURCE ): |
2830 | debug_printf_eval("NEWSOURCE\n"); | 3010 | debug_printf_eval("NEWSOURCE\n"); |
@@ -2849,7 +3029,9 @@ static var *evaluate(node *op, var *res) | |||
2849 | 3029 | ||
2850 | case XC( OC_EXIT ): | 3030 | case XC( OC_EXIT ): |
2851 | debug_printf_eval("EXIT\n"); | 3031 | debug_printf_eval("EXIT\n"); |
2852 | awk_exit(L_d); | 3032 | if (op1) |
3033 | G.exitcode = (int)L_d; | ||
3034 | awk_exit(); | ||
2853 | 3035 | ||
2854 | /* -- recursive node type -- */ | 3036 | /* -- recursive node type -- */ |
2855 | 3037 | ||
@@ -2894,51 +3076,64 @@ static var *evaluate(node *op, var *res) | |||
2894 | case XC( OC_MOVE ): | 3076 | case XC( OC_MOVE ): |
2895 | debug_printf_eval("MOVE\n"); | 3077 | debug_printf_eval("MOVE\n"); |
2896 | /* if source is a temporary string, jusk relink it to dest */ | 3078 | /* if source is a temporary string, jusk relink it to dest */ |
2897 | //Disabled: if R.v is numeric but happens to have cached R.v->string, | 3079 | if (R.v == TMPVAR1 |
2898 | //then L.v ends up being a string, which is wrong | 3080 | && !(R.v->type & VF_NUMBER) |
2899 | // if (R.v == v1+1 && R.v->string) { | 3081 | /* Why check !NUMBER? if R.v is a number but has cached R.v->string, |
2900 | // res = setvar_p(L.v, R.v->string); | 3082 | * L.v ends up a string, which is wrong */ |
2901 | // R.v->string = NULL; | 3083 | /*&& R.v->string - always not NULL (right?) */ |
2902 | // } else { | 3084 | ) { |
3085 | res = setvar_p(L.v, R.v->string); /* avoids strdup */ | ||
3086 | R.v->string = NULL; | ||
3087 | } else { | ||
2903 | res = copyvar(L.v, R.v); | 3088 | res = copyvar(L.v, R.v); |
2904 | // } | 3089 | } |
2905 | break; | 3090 | break; |
2906 | 3091 | ||
2907 | case XC( OC_TERNARY ): | 3092 | case XC( OC_TERNARY ): |
2908 | debug_printf_eval("TERNARY\n"); | 3093 | debug_printf_eval("TERNARY\n"); |
2909 | if ((op->r.n->info & OPCLSMASK) != OC_COLON) | 3094 | if (op->r.n->info != TI_COLON) |
2910 | syntax_error(EMSG_POSSIBLE_ERROR); | 3095 | syntax_error(EMSG_POSSIBLE_ERROR); |
2911 | res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res); | 3096 | res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res); |
2912 | break; | 3097 | break; |
2913 | 3098 | ||
2914 | case XC( OC_FUNC ): { | 3099 | case XC( OC_FUNC ): { |
2915 | var *vbeg, *v; | 3100 | var *argvars, *sv_fnargs; |
2916 | const char *sv_progname; | 3101 | const char *sv_progname; |
3102 | int nargs, i; | ||
3103 | |||
2917 | debug_printf_eval("FUNC\n"); | 3104 | debug_printf_eval("FUNC\n"); |
2918 | 3105 | ||
2919 | /* The body might be empty, still has to eval the args */ | 3106 | if (!op->r.f->defined) |
2920 | if (!op->r.n->info && !op->r.f->body.first) | ||
2921 | syntax_error(EMSG_UNDEF_FUNC); | 3107 | syntax_error(EMSG_UNDEF_FUNC); |
2922 | 3108 | ||
2923 | vbeg = v = nvalloc(op->r.f->nargs + 1); | 3109 | /* The body might be empty, still has to eval the args */ |
3110 | nargs = op->r.f->nargs; | ||
3111 | argvars = nvalloc(nargs); | ||
3112 | i = 0; | ||
2924 | while (op1) { | 3113 | while (op1) { |
2925 | var *arg = evaluate(nextarg(&op1), v1); | 3114 | var *arg = evaluate(nextarg(&op1), TMPVAR0); |
2926 | copyvar(v, arg); | 3115 | if (i == nargs) { |
2927 | v->type |= VF_CHILD; | 3116 | /* call with more arguments than function takes. |
2928 | v->x.parent = arg; | 3117 | * (gawk warns: "warning: function 'f' called with more arguments than declared"). |
2929 | if (++v - vbeg >= op->r.f->nargs) | 3118 | * They are still evaluated, but discarded: */ |
2930 | break; | 3119 | clrvar(arg); |
3120 | continue; | ||
3121 | } | ||
3122 | copyvar(&argvars[i], arg); | ||
3123 | argvars[i].type |= VF_CHILD; | ||
3124 | argvars[i].x.parent = arg; | ||
3125 | i++; | ||
2931 | } | 3126 | } |
2932 | 3127 | ||
2933 | v = fnargs; | 3128 | sv_fnargs = fnargs; |
2934 | fnargs = vbeg; | ||
2935 | sv_progname = g_progname; | 3129 | sv_progname = g_progname; |
2936 | 3130 | ||
3131 | fnargs = argvars; | ||
2937 | res = evaluate(op->r.f->body.first, res); | 3132 | res = evaluate(op->r.f->body.first, res); |
3133 | nvfree(argvars, nargs); | ||
2938 | 3134 | ||
2939 | g_progname = sv_progname; | 3135 | g_progname = sv_progname; |
2940 | nvfree(fnargs); | 3136 | fnargs = sv_fnargs; |
2941 | fnargs = v; | ||
2942 | 3137 | ||
2943 | break; | 3138 | break; |
2944 | } | 3139 | } |
@@ -2954,7 +3149,7 @@ static var *evaluate(node *op, var *res) | |||
2954 | if (op1) { | 3149 | if (op1) { |
2955 | rsm = newfile(L.s); | 3150 | rsm = newfile(L.s); |
2956 | if (!rsm->F) { | 3151 | if (!rsm->F) { |
2957 | if ((opinfo & OPCLSMASK) == OC_PGETLINE) { | 3152 | if (opinfo == TI_PGETLINE) { |
2958 | rsm->F = popen(L.s, "r"); | 3153 | rsm->F = popen(L.s, "r"); |
2959 | rsm->is_pipe = TRUE; | 3154 | rsm->is_pipe = TRUE; |
2960 | } else { | 3155 | } else { |
@@ -2990,15 +3185,32 @@ static var *evaluate(node *op, var *res) | |||
2990 | double R_d = R_d; /* for compiler */ | 3185 | double R_d = R_d; /* for compiler */ |
2991 | debug_printf_eval("FBLTIN\n"); | 3186 | debug_printf_eval("FBLTIN\n"); |
2992 | 3187 | ||
3188 | if (op1 && op1->info == TI_COMMA) | ||
3189 | /* Simple builtins take one arg maximum */ | ||
3190 | syntax_error("Too many arguments"); | ||
3191 | |||
2993 | switch (opn) { | 3192 | switch (opn) { |
2994 | case F_in: | 3193 | case F_in: |
2995 | R_d = (long long)L_d; | 3194 | R_d = (long long)L_d; |
2996 | break; | 3195 | break; |
2997 | 3196 | ||
2998 | case F_rn: | 3197 | case F_rn: /*rand*/ |
2999 | R_d = (double)rand() / (double)RAND_MAX; | 3198 | if (op1) |
3199 | syntax_error("Too many arguments"); | ||
3200 | { | ||
3201 | #if RAND_MAX >= 0x7fffffff | ||
3202 | uint32_t u = ((uint32_t)rand() << 16) ^ rand(); | ||
3203 | uint64_t v = ((uint64_t)rand() << 32) | u; | ||
3204 | /* the above shift+or is optimized out on 32-bit arches */ | ||
3205 | # if RAND_MAX > 0x7fffffff | ||
3206 | v &= 0x7fffffffffffffffULL; | ||
3207 | # endif | ||
3208 | R_d = (double)v / 0x8000000000000000ULL; | ||
3209 | #else | ||
3210 | # error Not implemented for this value of RAND_MAX | ||
3211 | #endif | ||
3000 | break; | 3212 | break; |
3001 | 3213 | } | |
3002 | case F_co: | 3214 | case F_co: |
3003 | if (ENABLE_FEATURE_AWK_LIBM) { | 3215 | if (ENABLE_FEATURE_AWK_LIBM) { |
3004 | R_d = cos(L_d); | 3216 | R_d = cos(L_d); |
@@ -3038,7 +3250,9 @@ static var *evaluate(node *op, var *res) | |||
3038 | srand(seed); | 3250 | srand(seed); |
3039 | break; | 3251 | break; |
3040 | 3252 | ||
3041 | case F_ti: | 3253 | case F_ti: /*systime*/ |
3254 | if (op1) | ||
3255 | syntax_error("Too many arguments"); | ||
3042 | R_d = time(NULL); | 3256 | R_d = time(NULL); |
3043 | break; | 3257 | break; |
3044 | 3258 | ||
@@ -3077,7 +3291,7 @@ static var *evaluate(node *op, var *res) | |||
3077 | rstream *rsm; | 3291 | rstream *rsm; |
3078 | int err = 0; | 3292 | int err = 0; |
3079 | rsm = (rstream *)hash_search(fdhash, L.s); | 3293 | rsm = (rstream *)hash_search(fdhash, L.s); |
3080 | debug_printf_eval("OC_FBLTIN F_cl rsm:%p\n", rsm); | 3294 | debug_printf_eval("OC_FBLTIN close: op1:%p s:'%s' rsm:%p\n", op1, L.s, rsm); |
3081 | if (rsm) { | 3295 | if (rsm) { |
3082 | debug_printf_eval("OC_FBLTIN F_cl " | 3296 | debug_printf_eval("OC_FBLTIN F_cl " |
3083 | "rsm->is_pipe:%d, ->F:%p\n", | 3297 | "rsm->is_pipe:%d, ->F:%p\n", |
@@ -3088,6 +3302,11 @@ static var *evaluate(node *op, var *res) | |||
3088 | */ | 3302 | */ |
3089 | if (rsm->F) | 3303 | if (rsm->F) |
3090 | err = rsm->is_pipe ? pclose(rsm->F) : fclose(rsm->F); | 3304 | err = rsm->is_pipe ? pclose(rsm->F) : fclose(rsm->F); |
3305 | //TODO: fix this case: | ||
3306 | // $ awk 'BEGIN { print close(""); print ERRNO }' | ||
3307 | // -1 | ||
3308 | // close of redirection that was never opened | ||
3309 | // (we print 0, 0) | ||
3091 | free(rsm->buffer); | 3310 | free(rsm->buffer); |
3092 | hash_remove(fdhash, L.s); | 3311 | hash_remove(fdhash, L.s); |
3093 | } | 3312 | } |
@@ -3166,7 +3385,7 @@ static var *evaluate(node *op, var *res) | |||
3166 | case XC( OC_COMMA ): { | 3385 | case XC( OC_COMMA ): { |
3167 | const char *sep = ""; | 3386 | const char *sep = ""; |
3168 | debug_printf_eval("COMMA\n"); | 3387 | debug_printf_eval("COMMA\n"); |
3169 | if ((opinfo & OPCLSMASK) == OC_COMMA) | 3388 | if (opinfo == TI_COMMA) |
3170 | sep = getvar_s(intvar[SUBSEP]); | 3389 | sep = getvar_s(intvar[SUBSEP]); |
3171 | setvar_p(res, xasprintf("%s%s%s", L.s, sep, R.s)); | 3390 | setvar_p(res, xasprintf("%s%s%s", L.s, sep, R.s)); |
3172 | break; | 3391 | break; |
@@ -3251,7 +3470,7 @@ static var *evaluate(node *op, var *res) | |||
3251 | default: | 3470 | default: |
3252 | syntax_error(EMSG_POSSIBLE_ERROR); | 3471 | syntax_error(EMSG_POSSIBLE_ERROR); |
3253 | } /* switch */ | 3472 | } /* switch */ |
3254 | next: | 3473 | |
3255 | if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS) | 3474 | if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS) |
3256 | op = op->a.n; | 3475 | op = op->a.n; |
3257 | if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS) | 3476 | if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS) |
@@ -3260,7 +3479,10 @@ static var *evaluate(node *op, var *res) | |||
3260 | break; | 3479 | break; |
3261 | } /* while (op) */ | 3480 | } /* while (op) */ |
3262 | 3481 | ||
3263 | nvfree(v1); | 3482 | nvfree(tmpvars, 2); |
3483 | #undef TMPVAR0 | ||
3484 | #undef TMPVAR1 | ||
3485 | |||
3264 | debug_printf_eval("returning from %s(): %p\n", __func__, res); | 3486 | debug_printf_eval("returning from %s(): %p\n", __func__, res); |
3265 | return res; | 3487 | return res; |
3266 | #undef fnargs | 3488 | #undef fnargs |
@@ -3271,16 +3493,14 @@ static var *evaluate(node *op, var *res) | |||
3271 | 3493 | ||
3272 | /* -------- main & co. -------- */ | 3494 | /* -------- main & co. -------- */ |
3273 | 3495 | ||
3274 | static int awk_exit(int r) | 3496 | static int awk_exit(void) |
3275 | { | 3497 | { |
3276 | unsigned i; | 3498 | unsigned i; |
3277 | 3499 | ||
3278 | if (!exiting) { | 3500 | if (!exiting) { |
3279 | var tv; | ||
3280 | exiting = TRUE; | 3501 | exiting = TRUE; |
3281 | nextrec = FALSE; | 3502 | nextrec = FALSE; |
3282 | zero_out_var(&tv); | 3503 | evaluate(endseq.first, &G.exit__tmpvar); |
3283 | evaluate(endseq.first, &tv); | ||
3284 | } | 3504 | } |
3285 | 3505 | ||
3286 | /* waiting for children */ | 3506 | /* waiting for children */ |
@@ -3294,65 +3514,7 @@ static int awk_exit(int r) | |||
3294 | } | 3514 | } |
3295 | } | 3515 | } |
3296 | 3516 | ||
3297 | exit(r); | 3517 | exit(G.exitcode); |
3298 | } | ||
3299 | |||
3300 | /* if expr looks like "var=value", perform assignment and return 1, | ||
3301 | * otherwise return 0 */ | ||
3302 | static int is_assignment(const char *expr) | ||
3303 | { | ||
3304 | char *exprc, *val; | ||
3305 | |||
3306 | if (!isalnum_(*expr) || (val = strchr(expr, '=')) == NULL) { | ||
3307 | return FALSE; | ||
3308 | } | ||
3309 | |||
3310 | exprc = xstrdup(expr); | ||
3311 | val = exprc + (val - expr); | ||
3312 | *val++ = '\0'; | ||
3313 | |||
3314 | unescape_string_in_place(val); | ||
3315 | setvar_u(newvar(exprc), val); | ||
3316 | free(exprc); | ||
3317 | return TRUE; | ||
3318 | } | ||
3319 | |||
3320 | /* switch to next input file */ | ||
3321 | static rstream *next_input_file(void) | ||
3322 | { | ||
3323 | #define rsm (G.next_input_file__rsm) | ||
3324 | #define files_happen (G.next_input_file__files_happen) | ||
3325 | |||
3326 | FILE *F; | ||
3327 | const char *fname, *ind; | ||
3328 | |||
3329 | if (rsm.F) | ||
3330 | fclose(rsm.F); | ||
3331 | rsm.F = NULL; | ||
3332 | rsm.pos = rsm.adv = 0; | ||
3333 | |||
3334 | for (;;) { | ||
3335 | if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) { | ||
3336 | if (files_happen) | ||
3337 | return NULL; | ||
3338 | fname = "-"; | ||
3339 | F = stdin; | ||
3340 | break; | ||
3341 | } | ||
3342 | ind = getvar_s(incvar(intvar[ARGIND])); | ||
3343 | fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind)); | ||
3344 | if (fname && *fname && !is_assignment(fname)) { | ||
3345 | F = xfopen_stdin(fname); | ||
3346 | break; | ||
3347 | } | ||
3348 | } | ||
3349 | |||
3350 | files_happen = TRUE; | ||
3351 | setvar_s(intvar[FILENAME], fname); | ||
3352 | rsm.F = F; | ||
3353 | return &rsm; | ||
3354 | #undef rsm | ||
3355 | #undef files_happen | ||
3356 | } | 3518 | } |
3357 | 3519 | ||
3358 | int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; | 3520 | int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; |
@@ -3366,7 +3528,6 @@ int awk_main(int argc UNUSED_PARAM, char **argv) | |||
3366 | llist_t *list_e = NULL; | 3528 | llist_t *list_e = NULL; |
3367 | #endif | 3529 | #endif |
3368 | int i; | 3530 | int i; |
3369 | var tv; | ||
3370 | 3531 | ||
3371 | INIT_G(); | 3532 | INIT_G(); |
3372 | 3533 | ||
@@ -3375,15 +3536,8 @@ int awk_main(int argc UNUSED_PARAM, char **argv) | |||
3375 | if (ENABLE_LOCALE_SUPPORT) | 3536 | if (ENABLE_LOCALE_SUPPORT) |
3376 | setlocale(LC_NUMERIC, "C"); | 3537 | setlocale(LC_NUMERIC, "C"); |
3377 | 3538 | ||
3378 | /* allocate global buffer */ | ||
3379 | g_buf = xmalloc(MAXVARFMT + 1); | ||
3380 | |||
3381 | vhash = hash_init(); | ||
3382 | ahash = hash_init(); | ||
3383 | fdhash = hash_init(); | ||
3384 | fnhash = hash_init(); | ||
3385 | |||
3386 | /* initialize variables */ | 3539 | /* initialize variables */ |
3540 | vhash = hash_init(); | ||
3387 | { | 3541 | { |
3388 | char *vnames = (char *)vNames; /* cheat */ | 3542 | char *vnames = (char *)vNames; /* cheat */ |
3389 | char *vvalues = (char *)vValues; | 3543 | char *vvalues = (char *)vValues; |
@@ -3405,10 +3559,6 @@ int awk_main(int argc UNUSED_PARAM, char **argv) | |||
3405 | handle_special(intvar[FS]); | 3559 | handle_special(intvar[FS]); |
3406 | handle_special(intvar[RS]); | 3560 | handle_special(intvar[RS]); |
3407 | 3561 | ||
3408 | newfile("/dev/stdin")->F = stdin; | ||
3409 | newfile("/dev/stdout")->F = stdout; | ||
3410 | newfile("/dev/stderr")->F = stderr; | ||
3411 | |||
3412 | /* Huh, people report that sometimes environ is NULL. Oh well. */ | 3562 | /* Huh, people report that sometimes environ is NULL. Oh well. */ |
3413 | if (environ) { | 3563 | if (environ) { |
3414 | char **envp; | 3564 | char **envp; |
@@ -3438,46 +3588,44 @@ int awk_main(int argc UNUSED_PARAM, char **argv) | |||
3438 | if (!is_assignment(llist_pop(&list_v))) | 3588 | if (!is_assignment(llist_pop(&list_v))) |
3439 | bb_show_usage(); | 3589 | bb_show_usage(); |
3440 | } | 3590 | } |
3591 | |||
3592 | /* Parse all supplied programs */ | ||
3593 | fnhash = hash_init(); | ||
3594 | ahash = hash_init(); | ||
3441 | while (list_f) { | 3595 | while (list_f) { |
3442 | int fd; | 3596 | int fd; |
3443 | char *s; | 3597 | char *s; |
3444 | 3598 | ||
3445 | g_progname = llist_pop(&list_f); | 3599 | g_progname = llist_pop(&list_f); |
3446 | fd = xopen_stdin(g_progname); | 3600 | fd = xopen_stdin(g_progname); |
3447 | /* 1st byte is reserved for "move name one char back" trick in next_token */ | 3601 | s = xmalloc_read(fd, NULL); /* it's NUL-terminated */ |
3448 | i = 1; | ||
3449 | s = NULL; | ||
3450 | for (;;) { | ||
3451 | int sz; | ||
3452 | s = xrealloc(s, i + 1000); | ||
3453 | sz = safe_read(fd, s + i, 1000); | ||
3454 | if (sz <= 0) | ||
3455 | break; | ||
3456 | i += sz; | ||
3457 | } | ||
3458 | s = xrealloc(s, i + 1); /* trim unused 999 bytes */ | ||
3459 | s[i] = '\0'; | ||
3460 | close(fd); | 3602 | close(fd); |
3461 | parse_program(s + 1); | 3603 | parse_program(s); |
3462 | free(s); | 3604 | free(s); |
3463 | } | 3605 | } |
3464 | g_progname = "cmd. line"; | 3606 | g_progname = "cmd. line"; |
3465 | #if ENABLE_FEATURE_AWK_GNU_EXTENSIONS | 3607 | #if ENABLE_FEATURE_AWK_GNU_EXTENSIONS |
3466 | while (list_e) { | 3608 | while (list_e) { |
3467 | /* NB: "move name one char back" trick in next_token | ||
3468 | * can use argv[i][-1] here. | ||
3469 | */ | ||
3470 | parse_program(llist_pop(&list_e)); | 3609 | parse_program(llist_pop(&list_e)); |
3471 | } | 3610 | } |
3472 | #endif | 3611 | #endif |
3612 | //FIXME: preserve order of -e and -f | ||
3613 | //TODO: implement -i LIBRARY and -E FILE too, they are easy-ish | ||
3473 | if (!(opt & (OPT_f | OPT_e))) { | 3614 | if (!(opt & (OPT_f | OPT_e))) { |
3474 | if (!*argv) | 3615 | if (!*argv) |
3475 | bb_show_usage(); | 3616 | bb_show_usage(); |
3476 | /* NB: "move name one char back" trick in next_token | ||
3477 | * can use argv[i][-1] here. | ||
3478 | */ | ||
3479 | parse_program(*argv++); | 3617 | parse_program(*argv++); |
3480 | } | 3618 | } |
3619 | /* Free unused parse structures */ | ||
3620 | //hash_free(fnhash); // ~250 bytes when empty, used only for function names | ||
3621 | //^^^^^^^^^^^^^^^^^ does not work, hash_clear() inside SEGVs | ||
3622 | // (IOW: hash_clear() assumes it's a hash of variables. fnhash is not). | ||
3623 | free(fnhash->items); | ||
3624 | free(fnhash); | ||
3625 | fnhash = NULL; // debug | ||
3626 | //hash_free(ahash); // empty after parsing, will reuse as fdhash instead of freeing | ||
3627 | |||
3628 | /* Parsing done, on to executing */ | ||
3481 | 3629 | ||
3482 | /* fill in ARGV array */ | 3630 | /* fill in ARGV array */ |
3483 | setari_u(intvar[ARGV], 0, "awk"); | 3631 | setari_u(intvar[ARGV], 0, "awk"); |
@@ -3486,10 +3634,14 @@ int awk_main(int argc UNUSED_PARAM, char **argv) | |||
3486 | setari_u(intvar[ARGV], ++i, *argv++); | 3634 | setari_u(intvar[ARGV], ++i, *argv++); |
3487 | setvar_i(intvar[ARGC], i + 1); | 3635 | setvar_i(intvar[ARGC], i + 1); |
3488 | 3636 | ||
3489 | zero_out_var(&tv); | 3637 | //fdhash = ahash; // done via define |
3490 | evaluate(beginseq.first, &tv); | 3638 | newfile("/dev/stdin")->F = stdin; |
3639 | newfile("/dev/stdout")->F = stdout; | ||
3640 | newfile("/dev/stderr")->F = stderr; | ||
3641 | |||
3642 | evaluate(beginseq.first, &G.main__tmpvar); | ||
3491 | if (!mainseq.first && !endseq.first) | 3643 | if (!mainseq.first && !endseq.first) |
3492 | awk_exit(EXIT_SUCCESS); | 3644 | awk_exit(); |
3493 | 3645 | ||
3494 | /* input file could already be opened in BEGIN block */ | 3646 | /* input file could already be opened in BEGIN block */ |
3495 | if (!iF) | 3647 | if (!iF) |
@@ -3504,7 +3656,7 @@ int awk_main(int argc UNUSED_PARAM, char **argv) | |||
3504 | nextrec = FALSE; | 3656 | nextrec = FALSE; |
3505 | incvar(intvar[NR]); | 3657 | incvar(intvar[NR]); |
3506 | incvar(intvar[FNR]); | 3658 | incvar(intvar[FNR]); |
3507 | evaluate(mainseq.first, &tv); | 3659 | evaluate(mainseq.first, &G.main__tmpvar); |
3508 | 3660 | ||
3509 | if (nextfile) | 3661 | if (nextfile) |
3510 | break; | 3662 | break; |
@@ -3516,6 +3668,6 @@ int awk_main(int argc UNUSED_PARAM, char **argv) | |||
3516 | iF = next_input_file(); | 3668 | iF = next_input_file(); |
3517 | } | 3669 | } |
3518 | 3670 | ||
3519 | awk_exit(EXIT_SUCCESS); | 3671 | awk_exit(); |
3520 | /*return 0;*/ | 3672 | /*return 0;*/ |
3521 | } | 3673 | } |
diff --git a/modutils/modprobe.c b/modutils/modprobe.c index c334186b8..235706fd5 100644 --- a/modutils/modprobe.c +++ b/modutils/modprobe.c | |||
@@ -629,8 +629,9 @@ int modprobe_main(int argc UNUSED_PARAM, char **argv) | |||
629 | config_close(parser); | 629 | config_close(parser); |
630 | 630 | ||
631 | parser = config_open2("modules.builtin", fopen_for_read); | 631 | parser = config_open2("modules.builtin", fopen_for_read); |
632 | /* this file contains lines like "kernel/fs/binfmt_script.ko" */ | ||
632 | while (config_read(parser, &s, 1, 1, "# \t", PARSE_NORMAL)) | 633 | while (config_read(parser, &s, 1, 1, "# \t", PARSE_NORMAL)) |
633 | get_or_add_modentry(s)->flags |= MODULE_FLAG_BUILTIN; | 634 | get_or_add_modentry(bb_basename(s))->flags |= MODULE_FLAG_BUILTIN; |
634 | config_close(parser); | 635 | config_close(parser); |
635 | } | 636 | } |
636 | 637 | ||
diff --git a/testsuite/awk.tests b/testsuite/awk.tests index cf9b722dc..770d8ffce 100755 --- a/testsuite/awk.tests +++ b/testsuite/awk.tests | |||
@@ -45,6 +45,16 @@ testing "awk handles empty function f(arg){}" \ | |||
45 | "" "" | 45 | "" "" |
46 | 46 | ||
47 | prg=' | 47 | prg=' |
48 | function empty_fun(){} | ||
49 | END {empty_fun() | ||
50 | print "Ok" | ||
51 | }' | ||
52 | testing "awk handles empty function f(){}" \ | ||
53 | "awk '$prg'" \ | ||
54 | "Ok\n" \ | ||
55 | "" "" | ||
56 | |||
57 | prg=' | ||
48 | function outer_fun() { | 58 | function outer_fun() { |
49 | return 1 | 59 | return 1 |
50 | } | 60 | } |
@@ -71,6 +81,23 @@ testing "awk properly handles undefined function" \ | |||
71 | "L1\n\nawk: cmd. line:5: Call to undefined function\n" \ | 81 | "L1\n\nawk: cmd. line:5: Call to undefined function\n" \ |
72 | "" "" | 82 | "" "" |
73 | 83 | ||
84 | prg=' | ||
85 | BEGIN { | ||
86 | v=1 | ||
87 | a=2 | ||
88 | print v (a) | ||
89 | }' | ||
90 | testing "awk 'v (a)' is not a function call, it is a concatenation" \ | ||
91 | "awk '$prg' 2>&1" \ | ||
92 | "12\n" \ | ||
93 | "" "" | ||
94 | |||
95 | prg='func f(){print"F"};func g(){print"G"};BEGIN{f(g(),g())}' | ||
96 | testing "awk unused function args are evaluated" \ | ||
97 | "awk '$prg' 2>&1" \ | ||
98 | "G\nG\nF\n" \ | ||
99 | "" "" | ||
100 | |||
74 | 101 | ||
75 | optional DESKTOP | 102 | optional DESKTOP |
76 | testing "awk hex const 1" "awk '{ print or(0xffffffff,1) }'" "4294967295\n" "" "\n" | 103 | testing "awk hex const 1" "awk '{ print or(0xffffffff,1) }'" "4294967295\n" "" "\n" |
@@ -418,4 +445,9 @@ testing 'awk $NF is empty' \ | |||
418 | '' \ | 445 | '' \ |
419 | 'a=====123=' | 446 | 'a=====123=' |
420 | 447 | ||
448 | testing "awk exit N propagates through END's exit" \ | ||
449 | "awk 'BEGIN { exit 42 } END { exit }'; echo \$?" \ | ||
450 | "42\n" \ | ||
451 | '' '' | ||
452 | |||
421 | exit $FAILCOUNT | 453 | exit $FAILCOUNT |