1 /* vi: set sw=4 ts=4: */
3 * awk implementation for busybox
5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
7 * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
14 /* This is a NOEXEC applet. Be very careful! */
17 /* If you comment out one of these below, it will be #defined later
18 * to perform debug printfs to stderr: */
19 #define debug_printf_walker(...) do {} while (0)
21 #ifndef debug_printf_walker
22 # define debug_printf_walker(...) (fprintf(stderr, __VA_ARGS__))
31 #define VF_NUMBER 0x0001 /* 1 = primary type is number */
32 #define VF_ARRAY 0x0002 /* 1 = it's an array */
34 #define VF_CACHED 0x0100 /* 1 = num/str value has cached str/num eq */
35 #define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
36 #define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
37 #define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
38 #define VF_FSTR 0x1000 /* 1 = var::string points to fstring buffer */
39 #define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
40 #define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
42 /* these flags are static, don't change them when value is changed */
43 #define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
45 typedef struct walker_list {
48 struct walker_list *prev;
53 typedef struct var_s {
54 unsigned type; /* flags */
58 int aidx; /* func arg idx (for compilation stage) */
59 struct xhash_s *array; /* array ptr */
60 struct var_s *parent; /* for func args, ptr to actual parameter */
61 walker_list *walker; /* list of array elements (for..in) */
65 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
66 typedef struct chain_s {
69 const char *programname;
73 typedef struct func_s {
79 typedef struct rstream_s {
88 typedef struct hash_item_s {
90 struct var_s v; /* variable/array hash */
91 struct rstream_s rs; /* redirect streams hash */
92 struct func_s f; /* functions hash */
94 struct hash_item_s *next; /* next in chain */
95 char name[1]; /* really it's longer */
98 typedef struct xhash_s {
99 unsigned nel; /* num of elements */
100 unsigned csize; /* current hash size */
101 unsigned nprime; /* next hash size in PRIMES[] */
102 unsigned glen; /* summary length of item names */
103 struct hash_item_s **items;
107 typedef struct node_s {
128 /* Block of temporary variables */
129 typedef struct nvblock_s {
132 struct nvblock_s *prev;
133 struct nvblock_s *next;
137 typedef struct tsplitter_s {
142 /* simple token classes */
143 /* Order and hex values are very important!!! See next_token() */
144 #define TC_SEQSTART 1 /* ( */
145 #define TC_SEQTERM (1 << 1) /* ) */
146 #define TC_REGEXP (1 << 2) /* /.../ */
147 #define TC_OUTRDR (1 << 3) /* | > >> */
148 #define TC_UOPPOST (1 << 4) /* unary postfix operator */
149 #define TC_UOPPRE1 (1 << 5) /* unary prefix operator */
150 #define TC_BINOPX (1 << 6) /* two-opnd operator */
151 #define TC_IN (1 << 7)
152 #define TC_COMMA (1 << 8)
153 #define TC_PIPE (1 << 9) /* input redirection pipe */
154 #define TC_UOPPRE2 (1 << 10) /* unary prefix operator */
155 #define TC_ARRTERM (1 << 11) /* ] */
156 #define TC_GRPSTART (1 << 12) /* { */
157 #define TC_GRPTERM (1 << 13) /* } */
158 #define TC_SEMICOL (1 << 14)
159 #define TC_NEWLINE (1 << 15)
160 #define TC_STATX (1 << 16) /* ctl statement (for, next...) */
161 #define TC_WHILE (1 << 17)
162 #define TC_ELSE (1 << 18)
163 #define TC_BUILTIN (1 << 19)
164 #define TC_GETLINE (1 << 20)
165 #define TC_FUNCDECL (1 << 21) /* `function' `func' */
166 #define TC_BEGIN (1 << 22)
167 #define TC_END (1 << 23)
168 #define TC_EOF (1 << 24)
169 #define TC_VARIABLE (1 << 25)
170 #define TC_ARRAY (1 << 26)
171 #define TC_FUNCTION (1 << 27)
172 #define TC_STRING (1 << 28)
173 #define TC_NUMBER (1 << 29)
175 #define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
177 /* combined token classes */
178 #define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
179 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
180 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
181 | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
183 #define TC_STATEMNT (TC_STATX | TC_WHILE)
184 #define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
186 /* word tokens, cannot mean something else if not expected */
187 #define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
188 | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
190 /* discard newlines after these */
191 #define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
192 | TC_BINOP | TC_OPTERM)
194 /* what can expression begin with */
195 #define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
196 /* what can group begin with */
197 #define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
199 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
200 /* operator is inserted between them */
201 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
202 | TC_STRING | TC_NUMBER | TC_UOPPOST)
203 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
205 #define OF_RES1 0x010000
206 #define OF_RES2 0x020000
207 #define OF_STR1 0x040000
208 #define OF_STR2 0x080000
209 #define OF_NUM1 0x100000
210 #define OF_CHECKED 0x200000
212 /* combined operator flags */
215 #define xS (OF_RES2 | OF_STR2)
217 #define VV (OF_RES1 | OF_RES2)
218 #define Nx (OF_RES1 | OF_NUM1)
219 #define NV (OF_RES1 | OF_NUM1 | OF_RES2)
220 #define Sx (OF_RES1 | OF_STR1)
221 #define SV (OF_RES1 | OF_STR1 | OF_RES2)
222 #define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
224 #define OPCLSMASK 0xFF00
225 #define OPNMASK 0x007F
227 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
228 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
229 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
231 #define P(x) (x << 24)
232 #define PRIMASK 0x7F000000
233 #define PRIMASK2 0x7E000000
235 /* Operation classes */
237 #define SHIFT_TIL_THIS 0x0600
238 #define RECUR_FROM_THIS 0x1000
241 OC_DELETE = 0x0100, OC_EXEC = 0x0200, OC_NEWSOURCE = 0x0300,
242 OC_PRINT = 0x0400, OC_PRINTF = 0x0500, OC_WALKINIT = 0x0600,
244 OC_BR = 0x0700, OC_BREAK = 0x0800, OC_CONTINUE = 0x0900,
245 OC_EXIT = 0x0a00, OC_NEXT = 0x0b00, OC_NEXTFILE = 0x0c00,
246 OC_TEST = 0x0d00, OC_WALKNEXT = 0x0e00,
248 OC_BINARY = 0x1000, OC_BUILTIN = 0x1100, OC_COLON = 0x1200,
249 OC_COMMA = 0x1300, OC_COMPARE = 0x1400, OC_CONCAT = 0x1500,
250 OC_FBLTIN = 0x1600, OC_FIELD = 0x1700, OC_FNARG = 0x1800,
251 OC_FUNC = 0x1900, OC_GETLINE = 0x1a00, OC_IN = 0x1b00,
252 OC_LAND = 0x1c00, OC_LOR = 0x1d00, OC_MATCH = 0x1e00,
253 OC_MOVE = 0x1f00, OC_PGETLINE = 0x2000, OC_REGEXP = 0x2100,
254 OC_REPLACE = 0x2200, OC_RETURN = 0x2300, OC_SPRINTF = 0x2400,
255 OC_TERNARY = 0x2500, OC_UNARY = 0x2600, OC_VAR = 0x2700,
258 ST_IF = 0x3000, ST_DO = 0x3100, ST_FOR = 0x3200,
262 /* simple builtins */
264 F_in, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
265 F_ti, F_le, F_sy, F_ff, F_cl
270 B_a2, B_ix, B_ma, B_sp, B_ss, B_ti, B_mt, B_lo, B_up,
272 B_an, B_co, B_ls, B_or, B_rs, B_xo,
275 /* tokens and their corresponding info values */
277 #define NTC "\377" /* switch to next token class (tc<<1) */
280 #define OC_B OC_BUILTIN
282 static const char tokenlist[] ALIGN1 =
285 "\1/" NTC /* REGEXP */
286 "\2>>" "\1>" "\1|" NTC /* OUTRDR */
287 "\2++" "\2--" NTC /* UOPPOST */
288 "\2++" "\2--" "\1$" NTC /* UOPPRE1 */
289 "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */
290 "\2*=" "\2/=" "\2%=" "\2^="
291 "\1+" "\1-" "\3**=" "\2**"
292 "\1/" "\1%" "\1^" "\1*"
293 "\2!=" "\2>=" "\2<=" "\1>"
294 "\1<" "\2!~" "\1~" "\2&&"
295 "\2||" "\1?" "\1:" NTC
299 "\1+" "\1-" "\1!" NTC /* UOPPRE2 */
305 "\2if" "\2do" "\3for" "\5break" /* STATX */
306 "\10continue" "\6delete" "\5print"
307 "\6printf" "\4next" "\10nextfile"
308 "\6return" "\4exit" NTC
312 "\3and" "\5compl" "\6lshift" "\2or"
314 "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */
315 "\3cos" "\3exp" "\3int" "\3log"
316 "\4rand" "\3sin" "\4sqrt" "\5srand"
317 "\6gensub" "\4gsub" "\5index" "\6length"
318 "\5match" "\5split" "\7sprintf" "\3sub"
319 "\6substr" "\7systime" "\10strftime" "\6mktime"
320 "\7tolower" "\7toupper" NTC
322 "\4func" "\10function" NTC
327 static const uint32_t tokeninfo[] = {
331 xS|'a', xS|'w', xS|'|',
332 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
333 OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M',
335 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74),
336 OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
337 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/',
338 OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
339 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-',
340 OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
341 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%',
342 OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
343 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3,
344 OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
345 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!',
346 OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
347 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?',
348 OC_COLON|xx|P(67)|':',
351 OC_PGETLINE|SV|P(37),
352 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-',
353 OC_UNARY|xV|P(19)|'!',
359 ST_IF, ST_DO, ST_FOR, OC_BREAK,
360 OC_CONTINUE, OC_DELETE|Vx, OC_PRINT,
361 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
362 OC_RETURN|Vx, OC_EXIT|Nx,
366 OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
367 OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
368 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
369 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
370 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
371 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
372 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
373 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b),
374 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
381 /* internal variable names and their initial values */
382 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
384 CONVFMT, OFMT, FS, OFS,
385 ORS, RS, RT, FILENAME,
386 SUBSEP, F0, ARGIND, ARGC,
387 ARGV, ERRNO, FNR, NR,
388 NF, IGNORECASE, ENVIRON, NUM_INTERNAL_VARS
391 static const char vNames[] ALIGN1 =
392 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
393 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
394 "SUBSEP\0" "$\0*" "ARGIND\0" "ARGC\0"
395 "ARGV\0" "ERRNO\0" "FNR\0" "NR\0"
396 "NF\0*" "IGNORECASE\0*" "ENVIRON\0" "\0";
398 static const char vValues[] ALIGN1 =
399 "%.6g\0" "%.6g\0" " \0" " \0"
400 "\n\0" "\n\0" "\0" "\0"
401 "\034\0" "\0" "\377";
403 /* hash size may grow to these values */
404 #define FIRST_PRIME 61
405 static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
408 /* Globals. Split in two parts so that first one is addressed
409 * with (mostly short) negative offsets.
410 * NB: it's unsafe to put members of type "double"
411 * into globals2 (gcc may fail to align them).
415 chain beginseq, mainseq, endseq;
417 node *break_ptr, *continue_ptr;
419 xhash *vhash, *ahash, *fdhash, *fnhash;
420 const char *g_progname;
423 int maxfields; /* used in fsrealloc() only */
432 smallint is_f0_split;
435 uint32_t t_info; /* often used */
441 var *intvar[NUM_INTERNAL_VARS]; /* often used */
443 /* former statics from various functions */
444 char *split_f0__fstrings;
446 uint32_t next_token__save_tclass;
447 uint32_t next_token__save_info;
448 uint32_t next_token__ltclass;
449 smallint next_token__concat_inserted;
451 smallint next_input_file__files_happen;
452 rstream next_input_file__rsm;
454 var *evaluate__fnargs;
455 unsigned evaluate__seed;
456 regex_t evaluate__sreg;
460 tsplitter exec_builtin__tspl;
462 /* biggest and least used members go last */
463 tsplitter fsplitter, rsplitter;
465 #define G1 (ptr_to_globals[-1])
466 #define G (*(struct globals2 *)ptr_to_globals)
467 /* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
468 /*char G1size[sizeof(G1)]; - 0x74 */
469 /*char Gsize[sizeof(G)]; - 0x1c4 */
470 /* Trying to keep most of members accessible with short offsets: */
471 /*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
472 #define t_double (G1.t_double )
473 #define beginseq (G1.beginseq )
474 #define mainseq (G1.mainseq )
475 #define endseq (G1.endseq )
476 #define seq (G1.seq )
477 #define break_ptr (G1.break_ptr )
478 #define continue_ptr (G1.continue_ptr)
480 #define vhash (G1.vhash )
481 #define ahash (G1.ahash )
482 #define fdhash (G1.fdhash )
483 #define fnhash (G1.fnhash )
484 #define g_progname (G1.g_progname )
485 #define g_lineno (G1.g_lineno )
486 #define nfields (G1.nfields )
487 #define maxfields (G1.maxfields )
488 #define Fields (G1.Fields )
489 #define g_cb (G1.g_cb )
490 #define g_pos (G1.g_pos )
491 #define g_buf (G1.g_buf )
492 #define icase (G1.icase )
493 #define exiting (G1.exiting )
494 #define nextrec (G1.nextrec )
495 #define nextfile (G1.nextfile )
496 #define is_f0_split (G1.is_f0_split )
497 #define t_info (G.t_info )
498 #define t_tclass (G.t_tclass )
499 #define t_string (G.t_string )
500 #define t_lineno (G.t_lineno )
501 #define t_rollback (G.t_rollback )
502 #define intvar (G.intvar )
503 #define fsplitter (G.fsplitter )
504 #define rsplitter (G.rsplitter )
505 #define INIT_G() do { \
506 SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
507 G.next_token__ltclass = TC_OPTERM; \
508 G.evaluate__seed = 1; \
512 /* function prototypes */
513 static void handle_special(var *);
514 static node *parse_expr(uint32_t);
515 static void chain_group(void);
516 static var *evaluate(node *, var *);
517 static rstream *next_input_file(void);
518 static int fmt_num(char *, int, const char *, double, int);
519 static int awk_exit(int) NORETURN;
521 /* ---- error handling ---- */
523 static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
524 static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
525 static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
526 static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
527 static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
528 static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin";
529 static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
530 static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
531 static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
532 #if !ENABLE_FEATURE_AWK_LIBM
533 static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
536 static void zero_out_var(var *vp)
538 memset(vp, 0, sizeof(*vp));
541 static void syntax_error(const char *message) NORETURN;
542 static void syntax_error(const char *message)
544 bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
547 /* ---- hash stuff ---- */
549 static unsigned hashidx(const char *name)
554 idx = *name++ + (idx << 6) - idx;
558 /* create new hash */
559 static xhash *hash_init(void)
563 newhash = xzalloc(sizeof(*newhash));
564 newhash->csize = FIRST_PRIME;
565 newhash->items = xzalloc(FIRST_PRIME * sizeof(newhash->items[0]));
570 /* find item in hash, return ptr to data, NULL if not found */
571 static void *hash_search(xhash *hash, const char *name)
575 hi = hash->items[hashidx(name) % hash->csize];
577 if (strcmp(hi->name, name) == 0)
584 /* grow hash if it becomes too big */
585 static void hash_rebuild(xhash *hash)
587 unsigned newsize, i, idx;
588 hash_item **newitems, *hi, *thi;
590 if (hash->nprime == ARRAY_SIZE(PRIMES))
593 newsize = PRIMES[hash->nprime++];
594 newitems = xzalloc(newsize * sizeof(newitems[0]));
596 for (i = 0; i < hash->csize; i++) {
601 idx = hashidx(thi->name) % newsize;
602 thi->next = newitems[idx];
608 hash->csize = newsize;
609 hash->items = newitems;
612 /* find item in hash, add it if necessary. Return ptr to data */
613 static void *hash_find(xhash *hash, const char *name)
619 hi = hash_search(hash, name);
621 if (++hash->nel / hash->csize > 10)
624 l = strlen(name) + 1;
625 hi = xzalloc(sizeof(*hi) + l);
626 strcpy(hi->name, name);
628 idx = hashidx(name) % hash->csize;
629 hi->next = hash->items[idx];
630 hash->items[idx] = hi;
636 #define findvar(hash, name) ((var*) hash_find((hash), (name)))
637 #define newvar(name) ((var*) hash_find(vhash, (name)))
638 #define newfile(name) ((rstream*)hash_find(fdhash, (name)))
639 #define newfunc(name) ((func*) hash_find(fnhash, (name)))
641 static void hash_remove(xhash *hash, const char *name)
643 hash_item *hi, **phi;
645 phi = &(hash->items[hashidx(name) % hash->csize]);
648 if (strcmp(hi->name, name) == 0) {
649 hash->glen -= (strlen(name) + 1);
659 /* ------ some useful functions ------ */
661 static void skip_spaces(char **s)
666 if (*p == '\\' && p[1] == '\n') {
669 } else if (*p != ' ' && *p != '\t') {
677 /* returns old *s, advances *s past word and terminating NUL */
678 static char *nextword(char **s)
681 while (*(*s)++ != '\0')
686 static char nextchar(char **s)
693 c = bb_process_escape_sequence((const char**)s);
694 if (c == '\\' && *s == pps)
699 static ALWAYS_INLINE int isalnum_(int c)
701 return (isalnum(c) || c == '_');
704 static double my_strtod(char **pp)
708 && ((((*pp)[1] | 0x20) == 'x') || isdigit((*pp)[1]))
710 return strtoull(*pp, pp, 0);
713 return strtod(*pp, pp);
716 /* -------- working with variables (set/get/copy/etc) -------- */
718 static xhash *iamarray(var *v)
722 while (a->type & VF_CHILD)
725 if (!(a->type & VF_ARRAY)) {
727 a->x.array = hash_init();
732 static void clear_array(xhash *array)
737 for (i = 0; i < array->csize; i++) {
738 hi = array->items[i];
742 free(thi->data.v.string);
745 array->items[i] = NULL;
747 array->glen = array->nel = 0;
750 /* clear a variable */
751 static var *clrvar(var *v)
753 if (!(v->type & VF_FSTR))
756 v->type &= VF_DONTTOUCH;
762 /* assign string value to variable */
763 static var *setvar_p(var *v, char *value)
771 /* same as setvar_p but make a copy of string */
772 static var *setvar_s(var *v, const char *value)
774 return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
777 /* same as setvar_s but sets USER flag */
778 static var *setvar_u(var *v, const char *value)
780 v = setvar_s(v, value);
785 /* set array element to user string */
786 static void setari_u(var *a, int idx, const char *s)
790 v = findvar(iamarray(a), itoa(idx));
794 /* assign numeric value to variable */
795 static var *setvar_i(var *v, double value)
798 v->type |= VF_NUMBER;
804 static const char *getvar_s(var *v)
806 /* if v is numeric and has no cached string, convert it to string */
807 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
808 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
809 v->string = xstrdup(g_buf);
810 v->type |= VF_CACHED;
812 return (v->string == NULL) ? "" : v->string;
815 static double getvar_i(var *v)
819 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
823 v->number = my_strtod(&s);
824 if (v->type & VF_USER) {
832 v->type |= VF_CACHED;
837 /* Used for operands of bitwise ops */
838 static unsigned long getvar_i_int(var *v)
840 double d = getvar_i(v);
842 /* Casting doubles to longs is undefined for values outside
843 * of target type range. Try to widen it as much as possible */
845 return (unsigned long)d;
846 /* Why? Think about d == -4294967295.0 (assuming 32bit longs) */
847 return - (long) (unsigned long) (-d);
850 static var *copyvar(var *dest, const var *src)
854 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
855 dest->number = src->number;
857 dest->string = xstrdup(src->string);
859 handle_special(dest);
863 static var *incvar(var *v)
865 return setvar_i(v, getvar_i(v) + 1.0);
868 /* return true if v is number or numeric string */
869 static int is_numeric(var *v)
872 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
875 /* return 1 when value of v corresponds to true, 0 otherwise */
876 static int istrue(var *v)
879 return (v->number != 0);
880 return (v->string && v->string[0]);
883 /* temporary variables allocator. Last allocated should be first freed */
884 static var *nvalloc(int n)
892 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size)
898 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
899 g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
901 g_cb->pos = g_cb->nv;
903 /*g_cb->next = NULL; - xzalloc did it */
911 while (v < g_cb->pos) {
920 static void nvfree(var *v)
924 if (v < g_cb->nv || v >= g_cb->pos)
925 syntax_error(EMSG_INTERNAL_ERROR);
927 for (p = v; p < g_cb->pos; p++) {
928 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
929 clear_array(iamarray(p));
930 free(p->x.array->items);
933 if (p->type & VF_WALK) {
935 walker_list *w = p->x.walker;
936 debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker);
940 debug_printf_walker(" free(%p)\n", w);
949 while (g_cb->prev && g_cb->pos == g_cb->nv) {
954 /* ------- awk program text parsing ------- */
956 /* Parse next token pointed by global pos, place results into global ttt.
957 * If token isn't expected, give away. Return token class
959 static uint32_t next_token(uint32_t expected)
961 #define concat_inserted (G.next_token__concat_inserted)
962 #define save_tclass (G.next_token__save_tclass)
963 #define save_info (G.next_token__save_info)
964 /* Initialized to TC_OPTERM: */
965 #define ltclass (G.next_token__ltclass)
976 } else if (concat_inserted) {
977 concat_inserted = FALSE;
978 t_tclass = save_tclass;
987 while (*p != '\n' && *p != '\0')
996 } else if (*p == '\"') {
1001 if (*p == '\0' || *p == '\n')
1002 syntax_error(EMSG_UNEXP_EOS);
1003 *s++ = nextchar(&pp);
1010 } else if ((expected & TC_REGEXP) && *p == '/') {
1014 if (*p == '\0' || *p == '\n')
1015 syntax_error(EMSG_UNEXP_EOS);
1019 s[-1] = bb_process_escape_sequence((const char **)&pp);
1032 } else if (*p == '.' || isdigit(*p)) {
1035 t_double = my_strtod(&pp);
1038 syntax_error(EMSG_UNEXP_TOKEN);
1042 /* search for something known */
1052 /* if token class is expected, token
1053 * matches and it's not a longer word,
1054 * then this is what we are looking for
1056 if ((tc & (expected | TC_WORD | TC_NEWLINE))
1057 && *tl == *p && strncmp(p, tl, l) == 0
1058 && !((tc & TC_WORD) && isalnum_(p[l]))
1069 /* it's a name (var/array/function),
1070 * otherwise it's something wrong
1073 syntax_error(EMSG_UNEXP_TOKEN);
1076 while (isalnum_(*++p)) {
1081 /* also consume whitespace between functionname and bracket */
1082 if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY))
1096 /* skipping newlines in some cases */
1097 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1100 /* insert concatenation operator when needed */
1101 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1102 concat_inserted = TRUE;
1106 t_info = OC_CONCAT | SS | P(35);
1113 /* Are we ready for this? */
1114 if (!(ltclass & expected))
1115 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1116 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1119 #undef concat_inserted
1125 static void rollback_token(void)
1130 static node *new_node(uint32_t info)
1134 n = xzalloc(sizeof(node));
1136 n->lineno = g_lineno;
1140 static node *mk_re_node(const char *s, node *n, regex_t *re)
1142 n->info = OC_REGEXP;
1145 xregcomp(re, s, REG_EXTENDED);
1146 xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1151 static node *condition(void)
1153 next_token(TC_SEQSTART);
1154 return parse_expr(TC_SEQTERM);
1157 /* parse expression terminated by given argument, return ptr
1158 * to built subtree. Terminator is eaten by parse_expr */
1159 static node *parse_expr(uint32_t iexp)
1168 sn.r.n = glptr = NULL;
1169 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1171 while (!((tc = next_token(xtc)) & iexp)) {
1172 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1173 /* input redirection (<) attached to glptr node */
1174 cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1176 xtc = TC_OPERAND | TC_UOPPRE;
1179 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1180 /* for binary and postfix-unary operators, jump back over
1181 * previous operators with higher priority */
1183 while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1184 || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON))
1188 if ((t_info & OPCLSMASK) == OC_TERNARY)
1190 cn = vn->a.n->r.n = new_node(t_info);
1192 if (tc & TC_BINOP) {
1194 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1195 if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1197 next_token(TC_GETLINE);
1198 /* give maximum priority to this pipe */
1199 cn->info &= ~PRIMASK;
1200 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1204 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1209 /* for operands and prefix-unary operators, attach them
1212 cn = vn->r.n = new_node(t_info);
1214 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1215 if (tc & (TC_OPERAND | TC_REGEXP)) {
1216 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1217 /* one should be very careful with switch on tclass -
1218 * only simple tclasses should be used! */
1223 v = hash_search(ahash, t_string);
1225 cn->info = OC_FNARG;
1226 cn->l.i = v->x.aidx;
1228 cn->l.v = newvar(t_string);
1230 if (tc & TC_ARRAY) {
1232 cn->r.n = parse_expr(TC_ARRTERM);
1239 v = cn->l.v = xzalloc(sizeof(var));
1241 setvar_i(v, t_double);
1243 setvar_s(v, t_string);
1247 mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1252 cn->r.f = newfunc(t_string);
1253 cn->l.n = condition();
1257 cn = vn->r.n = parse_expr(TC_SEQTERM);
1263 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1267 cn->l.n = condition();
1276 /* add node to chain. Return ptr to alloc'd node */
1277 static node *chain_node(uint32_t info)
1282 seq->first = seq->last = new_node(0);
1284 if (seq->programname != g_progname) {
1285 seq->programname = g_progname;
1286 n = chain_node(OC_NEWSOURCE);
1287 n->l.s = xstrdup(g_progname);
1292 seq->last = n->a.n = new_node(OC_DONE);
1297 static void chain_expr(uint32_t info)
1301 n = chain_node(info);
1302 n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1303 if (t_tclass & TC_GRPTERM)
1307 static node *chain_loop(node *nn)
1309 node *n, *n2, *save_brk, *save_cont;
1311 save_brk = break_ptr;
1312 save_cont = continue_ptr;
1314 n = chain_node(OC_BR | Vx);
1315 continue_ptr = new_node(OC_EXEC);
1316 break_ptr = new_node(OC_EXEC);
1318 n2 = chain_node(OC_EXEC | Vx);
1321 continue_ptr->a.n = n2;
1322 break_ptr->a.n = n->r.n = seq->last;
1324 continue_ptr = save_cont;
1325 break_ptr = save_brk;
1330 /* parse group and attach it to chain */
1331 static void chain_group(void)
1337 c = next_token(TC_GRPSEQ);
1338 } while (c & TC_NEWLINE);
1340 if (c & TC_GRPSTART) {
1341 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1342 if (t_tclass & TC_NEWLINE)
1347 } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1349 chain_expr(OC_EXEC | Vx);
1350 } else { /* TC_STATEMNT */
1351 switch (t_info & OPCLSMASK) {
1353 n = chain_node(OC_BR | Vx);
1354 n->l.n = condition();
1356 n2 = chain_node(OC_EXEC);
1358 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1360 n2->a.n = seq->last;
1368 n = chain_loop(NULL);
1373 n2 = chain_node(OC_EXEC);
1374 n = chain_loop(NULL);
1376 next_token(TC_WHILE);
1377 n->l.n = condition();
1381 next_token(TC_SEQSTART);
1382 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1383 if (t_tclass & TC_SEQTERM) { /* for-in */
1384 if ((n2->info & OPCLSMASK) != OC_IN)
1385 syntax_error(EMSG_UNEXP_TOKEN);
1386 n = chain_node(OC_WALKINIT | VV);
1389 n = chain_loop(NULL);
1390 n->info = OC_WALKNEXT | Vx;
1392 } else { /* for (;;) */
1393 n = chain_node(OC_EXEC | Vx);
1395 n2 = parse_expr(TC_SEMICOL);
1396 n3 = parse_expr(TC_SEQTERM);
1406 n = chain_node(t_info);
1407 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1408 if (t_tclass & TC_OUTRDR) {
1410 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1412 if (t_tclass & TC_GRPTERM)
1417 n = chain_node(OC_EXEC);
1422 n = chain_node(OC_EXEC);
1423 n->a.n = continue_ptr;
1426 /* delete, next, nextfile, return, exit */
1433 static void parse_program(char *p)
1442 while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1443 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1445 if (tclass & TC_OPTERM)
1449 if (tclass & TC_BEGIN) {
1453 } else if (tclass & TC_END) {
1457 } else if (tclass & TC_FUNCDECL) {
1458 next_token(TC_FUNCTION);
1460 f = newfunc(t_string);
1461 f->body.first = NULL;
1463 while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1464 v = findvar(ahash, t_string);
1465 v->x.aidx = (f->nargs)++;
1467 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1474 } else if (tclass & TC_OPSEQ) {
1476 cn = chain_node(OC_TEST);
1477 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1478 if (t_tclass & TC_GRPSTART) {
1482 chain_node(OC_PRINT);
1484 cn->r.n = mainseq.last;
1486 } else /* if (tclass & TC_GRPSTART) */ {
1494 /* -------- program execution part -------- */
1496 static node *mk_splitter(const char *s, tsplitter *spl)
1504 if ((n->info & OPCLSMASK) == OC_REGEXP) {
1506 regfree(ire); // TODO: nuke ire, use re+1?
1508 if (strlen(s) > 1) {
1509 mk_re_node(s, n, re);
1511 n->info = (uint32_t) *s;
1517 /* use node as a regular expression. Supplied with node ptr and regex_t
1518 * storage space. Return ptr to regex (if result points to preg, it should
1519 * be later regfree'd manually
1521 static regex_t *as_regex(node *op, regex_t *preg)
1527 if ((op->info & OPCLSMASK) == OC_REGEXP) {
1528 return icase ? op->r.ire : op->l.re;
1531 s = getvar_s(evaluate(op, v));
1533 cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
1534 /* Testcase where REG_EXTENDED fails (unpaired '{'):
1535 * echo Hi | awk 'gsub("@(samp|code|file)\{","");'
1536 * gawk 3.1.5 eats this. We revert to ~REG_EXTENDED
1537 * (maybe gsub is not supposed to use REG_EXTENDED?).
1539 if (regcomp(preg, s, cflags)) {
1540 cflags &= ~REG_EXTENDED;
1541 xregcomp(preg, s, cflags);
1547 /* gradually increasing buffer */
1548 static char* qrealloc(char *b, int n, int *size)
1550 if (!b || n >= *size) {
1551 *size = n + (n>>1) + 80;
1552 b = xrealloc(b, *size);
1557 /* resize field storage space */
1558 static void fsrealloc(int size)
1562 if (size >= maxfields) {
1564 maxfields = size + 16;
1565 Fields = xrealloc(Fields, maxfields * sizeof(var));
1566 for (; i < maxfields; i++) {
1567 Fields[i].type = VF_SPECIAL;
1568 Fields[i].string = NULL;
1572 if (size < nfields) {
1573 for (i = size; i < nfields; i++) {
1580 static int awk_split(const char *s, node *spl, char **slist)
1585 regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1587 /* in worst case, each char would be a separate field */
1588 *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1591 c[0] = c[1] = (char)spl->info;
1593 if (*getvar_s(intvar[RS]) == '\0')
1596 if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */
1598 return n; /* "": zero fields */
1599 n++; /* at least one field will be there */
1601 l = strcspn(s, c+2); /* len till next NUL or \n */
1602 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1603 && pmatch[0].rm_so <= l
1605 l = pmatch[0].rm_so;
1606 if (pmatch[0].rm_eo == 0) {
1610 n++; /* we saw yet another delimiter */
1612 pmatch[0].rm_eo = l;
1617 /* make sure we remove *all* of the separator chars */
1620 } while (++l < pmatch[0].rm_eo);
1622 s += pmatch[0].rm_eo;
1626 if (c[0] == '\0') { /* null split */
1634 if (c[0] != ' ') { /* single-character split */
1636 c[0] = toupper(c[0]);
1637 c[1] = tolower(c[1]);
1641 while ((s1 = strpbrk(s1, c))) {
1649 s = skip_whitespace(s);
1653 while (*s && !isspace(*s))
1660 static void split_f0(void)
1662 /* static char *fstrings; */
1663 #define fstrings (G.split_f0__fstrings)
1674 n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1677 for (i = 0; i < n; i++) {
1678 Fields[i].string = nextword(&s);
1679 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1682 /* set NF manually to avoid side effects */
1684 intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1685 intvar[NF]->number = nfields;
1689 /* perform additional actions when some internal variables changed */
1690 static void handle_special(var *v)
1694 const char *sep, *s;
1695 int sl, l, len, i, bsize;
1697 if (!(v->type & VF_SPECIAL))
1700 if (v == intvar[NF]) {
1701 n = (int)getvar_i(v);
1704 /* recalculate $0 */
1705 sep = getvar_s(intvar[OFS]);
1709 for (i = 0; i < n; i++) {
1710 s = getvar_s(&Fields[i]);
1713 memcpy(b+len, sep, sl);
1716 b = qrealloc(b, len+l+sl, &bsize);
1717 memcpy(b+len, s, l);
1722 setvar_p(intvar[F0], b);
1725 } else if (v == intvar[F0]) {
1726 is_f0_split = FALSE;
1728 } else if (v == intvar[FS]) {
1729 mk_splitter(getvar_s(v), &fsplitter);
1731 } else if (v == intvar[RS]) {
1732 mk_splitter(getvar_s(v), &rsplitter);
1734 } else if (v == intvar[IGNORECASE]) {
1738 n = getvar_i(intvar[NF]);
1739 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1740 /* right here v is invalid. Just to note... */
1744 /* step through func/builtin/etc arguments */
1745 static node *nextarg(node **pn)
1750 if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1759 static void hashwalk_init(var *v, xhash *array)
1764 walker_list *prev_walker;
1766 if (v->type & VF_WALK) {
1767 prev_walker = v->x.walker;
1772 debug_printf_walker("hashwalk_init: prev_walker:%p\n", prev_walker);
1774 w = v->x.walker = xzalloc(sizeof(*w) + array->glen + 1); /* why + 1? */
1775 debug_printf_walker(" walker@%p=%p\n", &v->x.walker, w);
1776 w->cur = w->end = w->wbuf;
1777 w->prev = prev_walker;
1778 for (i = 0; i < array->csize; i++) {
1779 hi = array->items[i];
1781 strcpy(w->end, hi->name);
1788 static int hashwalk_next(var *v)
1790 walker_list *w = v->x.walker;
1792 if (w->cur >= w->end) {
1793 walker_list *prev_walker = w->prev;
1795 debug_printf_walker("end of iteration, free(walker@%p:%p), prev_walker:%p\n", &v->x.walker, w, prev_walker);
1797 v->x.walker = prev_walker;
1801 setvar_s(v, nextword(&w->cur));
1805 /* evaluate node, return 1 when result is true, 0 otherwise */
1806 static int ptest(node *pattern)
1808 /* ptest__v is "static": to save stack space? */
1809 return istrue(evaluate(pattern, &G.ptest__v));
1812 /* read next record from stream rsm into a variable v */
1813 static int awk_getline(rstream *rsm, var *v)
1816 regmatch_t pmatch[2];
1817 int a, p, pp=0, size;
1818 int fd, so, eo, r, rp;
1821 /* we're using our own buffer since we need access to accumulating
1824 fd = fileno(rsm->F);
1829 c = (char) rsplitter.n.info;
1833 m = qrealloc(m, 256, &size);
1839 if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1840 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1841 b, 1, pmatch, 0) == 0) {
1842 so = pmatch[0].rm_so;
1843 eo = pmatch[0].rm_eo;
1847 } else if (c != '\0') {
1848 s = strchr(b+pp, c);
1850 s = memchr(b+pp, '\0', p - pp);
1857 while (b[rp] == '\n')
1859 s = strstr(b+rp, "\n\n");
1862 while (b[eo] == '\n') eo++;
1870 memmove(m, (const void *)(m+a), p+1);
1875 m = qrealloc(m, a+p+128, &size);
1878 p += safe_read(fd, b+p, size-p-1);
1882 setvar_i(intvar[ERRNO], errno);
1891 c = b[so]; b[so] = '\0';
1895 c = b[eo]; b[eo] = '\0';
1896 setvar_s(intvar[RT], b+so);
1908 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1912 const char *s = format;
1914 if (int_as_int && n == (int)n) {
1915 r = snprintf(b, size, "%d", (int)n);
1917 do { c = *s; } while (c && *++s);
1918 if (strchr("diouxX", c)) {
1919 r = snprintf(b, size, format, (int)n);
1920 } else if (strchr("eEfgG", c)) {
1921 r = snprintf(b, size, format, n);
1923 syntax_error(EMSG_INV_FMT);
1929 /* formatted output into an allocated buffer, return ptr to buffer */
1930 static char *awk_printf(node *n)
1935 int i, j, incr, bsize;
1940 fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1945 while (*f && (*f != '%' || *++f == '%'))
1947 while (*f && !isalpha(*f)) {
1949 syntax_error("%*x formats are not supported");
1953 incr = (f - s) + MAXVARFMT;
1954 b = qrealloc(b, incr + i, &bsize);
1960 arg = evaluate(nextarg(&n), v);
1963 if (c == 'c' || !c) {
1964 i += sprintf(b+i, s, is_numeric(arg) ?
1965 (char)getvar_i(arg) : *getvar_s(arg));
1966 } else if (c == 's') {
1968 b = qrealloc(b, incr+i+strlen(s1), &bsize);
1969 i += sprintf(b+i, s, s1);
1971 i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1975 /* if there was an error while sprintf, return value is negative */
1980 b = xrealloc(b, i + 1);
1987 /* common substitution routine
1988 * replace (nm) substring of (src) that match (n) with (repl), store
1989 * result into (dest), return number of substitutions. If nm=0, replace
1990 * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1991 * subexpression matching (\1-\9)
1993 static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int ex)
1998 int c, i, j, di, rl, so, eo, nbs, n, dssize;
1999 regmatch_t pmatch[10];
2002 re = as_regex(rn, &sreg);
2011 while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0 : REG_NOTBOL) == 0) {
2012 so = pmatch[0].rm_so;
2013 eo = pmatch[0].rm_eo;
2015 ds = qrealloc(ds, di + eo + rl, &dssize);
2016 memcpy(ds + di, sp, eo);
2022 for (s = repl; *s; s++) {
2028 if (c == '&' || (ex && c >= '0' && c <= '9')) {
2029 di -= ((nbs + 3) >> 1);
2038 n = pmatch[j].rm_eo - pmatch[j].rm_so;
2039 ds = qrealloc(ds, di + rl + n, &dssize);
2040 memcpy(ds + di, sp + pmatch[j].rm_so, n);
2058 ds = qrealloc(ds, di + strlen(sp), &dssize);
2059 strcpy(ds + di, sp);
2066 static NOINLINE int do_mktime(const char *ds)
2071 /*memset(&then, 0, sizeof(then)); - not needed */
2072 then.tm_isdst = -1; /* default is unknown */
2074 /* manpage of mktime says these fields are ints,
2075 * so we can sscanf stuff directly into them */
2076 count = sscanf(ds, "%u %u %u %u %u %u %d",
2077 &then.tm_year, &then.tm_mon, &then.tm_mday,
2078 &then.tm_hour, &then.tm_min, &then.tm_sec,
2082 || (unsigned)then.tm_mon < 1
2083 || (unsigned)then.tm_year < 1900
2089 then.tm_year -= 1900;
2091 return mktime(&then);
2094 static NOINLINE var *exec_builtin(node *op, var *res)
2096 #define tspl (G.exec_builtin__tspl)
2102 regmatch_t pmatch[2];
2112 isr = info = op->info;
2115 av[2] = av[3] = NULL;
2116 for (i = 0; i < 4 && op; i++) {
2117 an[i] = nextarg(&op);
2118 if (isr & 0x09000000)
2119 av[i] = evaluate(an[i], &tv[i]);
2120 if (isr & 0x08000000)
2121 as[i] = getvar_s(av[i]);
2126 if ((uint32_t)nargs < (info >> 30))
2127 syntax_error(EMSG_TOO_FEW_ARGS);
2133 #if ENABLE_FEATURE_AWK_LIBM
2134 setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1])));
2136 syntax_error(EMSG_NO_MATH);
2142 spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2143 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2148 n = awk_split(as[0], spl, &s);
2150 clear_array(iamarray(av[1]));
2151 for (i = 1; i <= n; i++)
2152 setari_u(av[1], i, nextword(&s1));
2159 i = getvar_i(av[1]) - 1;
2164 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2167 s = xstrndup(as[0]+i, n);
2171 /* Bitwise ops must assume that operands are unsigned. GNU Awk 3.1.5:
2172 * awk '{ print or(-1,1) }' gives "4.29497e+09", not "-2.xxxe+09" */
2174 setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1]));
2178 setvar_i(res, ~getvar_i_int(av[0]));
2182 setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1]));
2186 setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1]));
2190 setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1]));
2194 setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1]));
2199 s1 = s = xstrdup(as[0]);
2201 //*s1 = (info == B_up) ? toupper(*s1) : tolower(*s1);
2202 if ((unsigned char)((*s1 | 0x20) - 'a') <= ('z' - 'a'))
2203 *s1 = (info == B_up) ? (*s1 & 0xdf) : (*s1 | 0x20);
2212 l = strlen(as[0]) - ll;
2213 if (ll > 0 && l >= 0) {
2215 s = strstr(as[0], as[1]);
2217 n = (s - as[0]) + 1;
2219 /* this piece of code is terribly slow and
2220 * really should be rewritten
2222 for (i=0; i<=l; i++) {
2223 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2235 tt = getvar_i(av[1]);
2238 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2239 i = strftime(g_buf, MAXVARFMT,
2240 ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2243 setvar_s(res, g_buf);
2247 setvar_i(res, do_mktime(as[0]));
2251 re = as_regex(an[1], &sreg);
2252 n = regexec(re, as[0], 1, pmatch, 0);
2257 pmatch[0].rm_so = 0;
2258 pmatch[0].rm_eo = -1;
2260 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2261 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2262 setvar_i(res, pmatch[0].rm_so);
2268 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2272 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2276 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2286 * Evaluate node - the heart of the program. Supplied with subtree
2287 * and place where to store result. returns ptr to result.
2289 #define XC(n) ((n) >> 8)
2291 static var *evaluate(node *op, var *res)
2293 /* This procedure is recursive so we should count every byte */
2294 #define fnargs (G.evaluate__fnargs)
2295 /* seed is initialized to 1 */
2296 #define seed (G.evaluate__seed)
2297 #define sreg (G.evaluate__sreg)
2319 return setvar_s(res, NULL);
2325 opn = (opinfo & OPNMASK);
2326 g_lineno = op->lineno;
2328 /* execute inevitable things */
2330 if (opinfo & OF_RES1)
2331 X.v = L.v = evaluate(op1, v1);
2332 if (opinfo & OF_RES2)
2333 R.v = evaluate(op->r.n, v1+1);
2334 if (opinfo & OF_STR1)
2335 L.s = getvar_s(L.v);
2336 if (opinfo & OF_STR2)
2337 R.s = getvar_s(R.v);
2338 if (opinfo & OF_NUM1)
2339 L.d = getvar_i(L.v);
2341 switch (XC(opinfo & OPCLSMASK)) {
2343 /* -- iterative node type -- */
2347 if ((op1->info & OPCLSMASK) == OC_COMMA) {
2348 /* it's range pattern */
2349 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2350 op->info |= OF_CHECKED;
2351 if (ptest(op1->r.n))
2352 op->info &= ~OF_CHECKED;
2359 op = (ptest(op1)) ? op->a.n : op->r.n;
2363 /* just evaluate an expression, also used as unconditional jump */
2367 /* branch, used in if-else and various loops */
2369 op = istrue(L.v) ? op->a.n : op->r.n;
2372 /* initialize for-in loop */
2373 case XC( OC_WALKINIT ):
2374 hashwalk_init(L.v, iamarray(R.v));
2377 /* get next array item */
2378 case XC( OC_WALKNEXT ):
2379 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2382 case XC( OC_PRINT ):
2383 case XC( OC_PRINTF ):
2386 X.rsm = newfile(R.s);
2389 X.rsm->F = popen(R.s, "w");
2390 if (X.rsm->F == NULL)
2391 bb_perror_msg_and_die("popen");
2394 X.rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2400 if ((opinfo & OPCLSMASK) == OC_PRINT) {
2402 fputs(getvar_s(intvar[F0]), X.F);
2405 L.v = evaluate(nextarg(&op1), v1);
2406 if (L.v->type & VF_NUMBER) {
2407 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2408 getvar_i(L.v), TRUE);
2411 fputs(getvar_s(L.v), X.F);
2415 fputs(getvar_s(intvar[OFS]), X.F);
2418 fputs(getvar_s(intvar[ORS]), X.F);
2420 } else { /* OC_PRINTF */
2421 L.s = awk_printf(op1);
2428 case XC( OC_DELETE ):
2429 X.info = op1->info & OPCLSMASK;
2430 if (X.info == OC_VAR) {
2432 } else if (X.info == OC_FNARG) {
2433 R.v = &fnargs[op1->l.i];
2435 syntax_error(EMSG_NOT_ARRAY);
2440 L.s = getvar_s(evaluate(op1->r.n, v1));
2441 hash_remove(iamarray(R.v), L.s);
2443 clear_array(iamarray(R.v));
2447 case XC( OC_NEWSOURCE ):
2448 g_progname = op->l.s;
2451 case XC( OC_RETURN ):
2455 case XC( OC_NEXTFILE ):
2466 /* -- recursive node type -- */
2470 if (L.v == intvar[NF])
2474 case XC( OC_FNARG ):
2475 L.v = &fnargs[op->l.i];
2477 res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2481 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2484 case XC( OC_REGEXP ):
2486 L.s = getvar_s(intvar[F0]);
2489 case XC( OC_MATCH ):
2492 X.re = as_regex(op1, &sreg);
2493 R.i = regexec(X.re, L.s, 0, NULL, 0);
2496 setvar_i(res, (R.i == 0) ^ (opn == '!'));
2500 /* if source is a temporary string, jusk relink it to dest */
2501 //Disabled: if R.v is numeric but happens to have cached R.v->string,
2502 //then L.v ends up being a string, which is wrong
2503 // if (R.v == v1+1 && R.v->string) {
2504 // res = setvar_p(L.v, R.v->string);
2505 // R.v->string = NULL;
2507 res = copyvar(L.v, R.v);
2511 case XC( OC_TERNARY ):
2512 if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2513 syntax_error(EMSG_POSSIBLE_ERROR);
2514 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2518 if (!op->r.f->body.first)
2519 syntax_error(EMSG_UNDEF_FUNC);
2521 X.v = R.v = nvalloc(op->r.f->nargs + 1);
2523 L.v = evaluate(nextarg(&op1), v1);
2525 R.v->type |= VF_CHILD;
2526 R.v->x.parent = L.v;
2527 if (++R.v - X.v >= op->r.f->nargs)
2535 res = evaluate(op->r.f->body.first, res);
2542 case XC( OC_GETLINE ):
2543 case XC( OC_PGETLINE ):
2545 X.rsm = newfile(L.s);
2547 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2548 X.rsm->F = popen(L.s, "r");
2549 X.rsm->is_pipe = TRUE;
2551 X.rsm->F = fopen_for_read(L.s); /* not xfopen! */
2556 iF = next_input_file();
2561 setvar_i(intvar[ERRNO], errno);
2569 L.i = awk_getline(X.rsm, R.v);
2572 incvar(intvar[FNR]);
2579 /* simple builtins */
2580 case XC( OC_FBLTIN ):
2588 R.d = (double)rand() / (double)RAND_MAX;
2590 #if ENABLE_FEATURE_AWK_LIBM
2616 syntax_error(EMSG_NO_MATH);
2621 seed = op1 ? (unsigned)L.d : (unsigned)time(NULL);
2631 L.s = getvar_s(intvar[F0]);
2637 R.d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2638 ? (system(L.s) >> 8) : 0;
2646 X.rsm = newfile(L.s);
2655 X.rsm = (rstream *)hash_search(fdhash, L.s);
2657 R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
2658 free(X.rsm->buffer);
2659 hash_remove(fdhash, L.s);
2662 setvar_i(intvar[ERRNO], errno);
2669 case XC( OC_BUILTIN ):
2670 res = exec_builtin(op, res);
2673 case XC( OC_SPRINTF ):
2674 setvar_p(res, awk_printf(op1));
2677 case XC( OC_UNARY ):
2679 L.d = R.d = getvar_i(R.v);
2705 case XC( OC_FIELD ):
2706 R.i = (int)getvar_i(R.v);
2713 res = &Fields[R.i - 1];
2717 /* concatenation (" ") and index joining (",") */
2718 case XC( OC_CONCAT ):
2719 case XC( OC_COMMA ):
2720 opn = strlen(L.s) + strlen(R.s) + 2;
2723 if ((opinfo & OPCLSMASK) == OC_COMMA) {
2724 L.s = getvar_s(intvar[SUBSEP]);
2725 X.s = xrealloc(X.s, opn + strlen(L.s));
2733 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2737 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2740 case XC( OC_BINARY ):
2741 case XC( OC_REPLACE ):
2742 R.d = getvar_i(R.v);
2755 syntax_error(EMSG_DIV_BY_ZERO);
2759 #if ENABLE_FEATURE_AWK_LIBM
2760 L.d = pow(L.d, R.d);
2762 syntax_error(EMSG_NO_MATH);
2767 syntax_error(EMSG_DIV_BY_ZERO);
2768 L.d -= (int)(L.d / R.d) * R.d;
2771 res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2774 case XC( OC_COMPARE ):
2775 if (is_numeric(L.v) && is_numeric(R.v)) {
2776 L.d = getvar_i(L.v) - getvar_i(R.v);
2778 L.s = getvar_s(L.v);
2779 R.s = getvar_s(R.v);
2780 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2782 switch (opn & 0xfe) {
2793 setvar_i(res, (opn & 1 ? R.i : !R.i) ? 1 : 0);
2797 syntax_error(EMSG_POSSIBLE_ERROR);
2799 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2801 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2814 /* -------- main & co. -------- */
2816 static int awk_exit(int r)
2827 evaluate(endseq.first, &tv);
2830 /* waiting for children */
2831 for (i = 0; i < fdhash->csize; i++) {
2832 hi = fdhash->items[i];
2834 if (hi->data.rs.F && hi->data.rs.is_pipe)
2835 pclose(hi->data.rs.F);
2843 /* if expr looks like "var=value", perform assignment and return 1,
2844 * otherwise return 0 */
2845 static int is_assignment(const char *expr)
2847 char *exprc, *s, *s0, *s1;
2849 exprc = xstrdup(expr);
2850 if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2858 *s1++ = nextchar(&s);
2861 setvar_u(newvar(exprc), s0);
2866 /* switch to next input file */
2867 static rstream *next_input_file(void)
2869 #define rsm (G.next_input_file__rsm)
2870 #define files_happen (G.next_input_file__files_happen)
2873 const char *fname, *ind;
2878 rsm.pos = rsm.adv = 0;
2881 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
2887 ind = getvar_s(incvar(intvar[ARGIND]));
2888 fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
2889 if (fname && *fname && !is_assignment(fname))
2890 F = xfopen_stdin(fname);
2894 files_happen = TRUE;
2895 setvar_s(intvar[FILENAME], fname);
2902 int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
2903 int awk_main(int argc, char **argv)
2906 char *opt_F, *opt_W;
2907 llist_t *list_v = NULL;
2908 llist_t *list_f = NULL;
2913 char *vnames = (char *)vNames; /* cheat */
2914 char *vvalues = (char *)vValues;
2918 /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
2919 * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
2920 if (ENABLE_LOCALE_SUPPORT)
2921 setlocale(LC_NUMERIC, "C");
2925 /* allocate global buffer */
2926 g_buf = xmalloc(MAXVARFMT + 1);
2928 vhash = hash_init();
2929 ahash = hash_init();
2930 fdhash = hash_init();
2931 fnhash = hash_init();
2933 /* initialize variables */
2934 for (i = 0; *vnames; i++) {
2935 intvar[i] = v = newvar(nextword(&vnames));
2936 if (*vvalues != '\377')
2937 setvar_s(v, nextword(&vvalues));
2941 if (*vnames == '*') {
2942 v->type |= VF_SPECIAL;
2947 handle_special(intvar[FS]);
2948 handle_special(intvar[RS]);
2950 newfile("/dev/stdin")->F = stdin;
2951 newfile("/dev/stdout")->F = stdout;
2952 newfile("/dev/stderr")->F = stderr;
2954 /* Huh, people report that sometimes environ is NULL. Oh well. */
2955 if (environ) for (envp = environ; *envp; envp++) {
2956 /* environ is writable, thus we don't strdup it needlessly */
2958 char *s1 = strchr(s, '=');
2961 /* Both findvar and setvar_u take const char*
2962 * as 2nd arg -> environment is not trashed */
2963 setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
2967 opt_complementary = "v::f::"; /* -v and -f can occur multiple times */
2968 opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, &opt_W);
2972 setvar_s(intvar[FS], opt_F); // -F
2973 while (list_v) { /* -v */
2974 if (!is_assignment(llist_pop(&list_v)))
2977 if (list_f) { /* -f */
2982 g_progname = llist_pop(&list_f);
2983 from_file = xfopen_stdin(g_progname);
2984 /* one byte is reserved for some trick in next_token */
2985 for (i = j = 1; j > 0; i += j) {
2986 s = xrealloc(s, i + 4096);
2987 j = fread(s + i, 1, 4094, from_file);
2991 parse_program(s + 1);
2995 } else { // no -f: take program from 1st parameter
2998 g_progname = "cmd. line";
2999 parse_program(*argv++);
3001 if (opt & 0x8) // -W
3002 bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
3004 /* fill in ARGV array */
3005 setvar_i(intvar[ARGC], argc);
3006 setari_u(intvar[ARGV], 0, "awk");
3009 setari_u(intvar[ARGV], ++i, *argv++);
3011 evaluate(beginseq.first, &tv);
3012 if (!mainseq.first && !endseq.first)
3013 awk_exit(EXIT_SUCCESS);
3015 /* input file could already be opened in BEGIN block */
3017 iF = next_input_file();
3019 /* passing through input files */
3022 setvar_i(intvar[FNR], 0);
3024 while ((i = awk_getline(iF, intvar[F0])) > 0) {
3027 incvar(intvar[FNR]);
3028 evaluate(mainseq.first, &tv);
3035 syntax_error(strerror(errno));
3037 iF = next_input_file();
3040 awk_exit(EXIT_SUCCESS);