1 /* vi: set sw=4 ts=4: */
3 * awk implementation for busybox
5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
7 * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
14 /* This is a NOEXEC applet. Be very careful! */
21 #define VF_NUMBER 0x0001 /* 1 = primary type is number */
22 #define VF_ARRAY 0x0002 /* 1 = it's an array */
24 #define VF_CACHED 0x0100 /* 1 = num/str value has cached str/num eq */
25 #define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
26 #define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
27 #define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
28 #define VF_FSTR 0x1000 /* 1 = var::string points to fstring buffer */
29 #define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
30 #define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
32 /* these flags are static, don't change them when value is changed */
33 #define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
36 typedef struct var_s {
37 unsigned type; /* flags */
41 int aidx; /* func arg idx (for compilation stage) */
42 struct xhash_s *array; /* array ptr */
43 struct var_s *parent; /* for func args, ptr to actual parameter */
44 char **walker; /* list of array elements (for..in) */
48 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
49 typedef struct chain_s {
52 const char *programname;
56 typedef struct func_s {
62 typedef struct rstream_s {
71 typedef struct hash_item_s {
73 struct var_s v; /* variable/array hash */
74 struct rstream_s rs; /* redirect streams hash */
75 struct func_s f; /* functions hash */
77 struct hash_item_s *next; /* next in chain */
78 char name[1]; /* really it's longer */
81 typedef struct xhash_s {
82 unsigned nel; /* num of elements */
83 unsigned csize; /* current hash size */
84 unsigned nprime; /* next hash size in PRIMES[] */
85 unsigned glen; /* summary length of item names */
86 struct hash_item_s **items;
90 typedef struct node_s {
111 /* Block of temporary variables */
112 typedef struct nvblock_s {
115 struct nvblock_s *prev;
116 struct nvblock_s *next;
120 typedef struct tsplitter_s {
125 /* simple token classes */
126 /* Order and hex values are very important!!! See next_token() */
127 #define TC_SEQSTART 1 /* ( */
128 #define TC_SEQTERM (1 << 1) /* ) */
129 #define TC_REGEXP (1 << 2) /* /.../ */
130 #define TC_OUTRDR (1 << 3) /* | > >> */
131 #define TC_UOPPOST (1 << 4) /* unary postfix operator */
132 #define TC_UOPPRE1 (1 << 5) /* unary prefix operator */
133 #define TC_BINOPX (1 << 6) /* two-opnd operator */
134 #define TC_IN (1 << 7)
135 #define TC_COMMA (1 << 8)
136 #define TC_PIPE (1 << 9) /* input redirection pipe */
137 #define TC_UOPPRE2 (1 << 10) /* unary prefix operator */
138 #define TC_ARRTERM (1 << 11) /* ] */
139 #define TC_GRPSTART (1 << 12) /* { */
140 #define TC_GRPTERM (1 << 13) /* } */
141 #define TC_SEMICOL (1 << 14)
142 #define TC_NEWLINE (1 << 15)
143 #define TC_STATX (1 << 16) /* ctl statement (for, next...) */
144 #define TC_WHILE (1 << 17)
145 #define TC_ELSE (1 << 18)
146 #define TC_BUILTIN (1 << 19)
147 #define TC_GETLINE (1 << 20)
148 #define TC_FUNCDECL (1 << 21) /* `function' `func' */
149 #define TC_BEGIN (1 << 22)
150 #define TC_END (1 << 23)
151 #define TC_EOF (1 << 24)
152 #define TC_VARIABLE (1 << 25)
153 #define TC_ARRAY (1 << 26)
154 #define TC_FUNCTION (1 << 27)
155 #define TC_STRING (1 << 28)
156 #define TC_NUMBER (1 << 29)
158 #define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
160 /* combined token classes */
161 #define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
162 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
163 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
164 | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
166 #define TC_STATEMNT (TC_STATX | TC_WHILE)
167 #define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
169 /* word tokens, cannot mean something else if not expected */
170 #define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
171 | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
173 /* discard newlines after these */
174 #define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
175 | TC_BINOP | TC_OPTERM)
177 /* what can expression begin with */
178 #define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
179 /* what can group begin with */
180 #define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
182 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
183 /* operator is inserted between them */
184 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
185 | TC_STRING | TC_NUMBER | TC_UOPPOST)
186 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
188 #define OF_RES1 0x010000
189 #define OF_RES2 0x020000
190 #define OF_STR1 0x040000
191 #define OF_STR2 0x080000
192 #define OF_NUM1 0x100000
193 #define OF_CHECKED 0x200000
195 /* combined operator flags */
198 #define xS (OF_RES2 | OF_STR2)
200 #define VV (OF_RES1 | OF_RES2)
201 #define Nx (OF_RES1 | OF_NUM1)
202 #define NV (OF_RES1 | OF_NUM1 | OF_RES2)
203 #define Sx (OF_RES1 | OF_STR1)
204 #define SV (OF_RES1 | OF_STR1 | OF_RES2)
205 #define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
207 #define OPCLSMASK 0xFF00
208 #define OPNMASK 0x007F
210 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
211 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
212 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
214 #define P(x) (x << 24)
215 #define PRIMASK 0x7F000000
216 #define PRIMASK2 0x7E000000
218 /* Operation classes */
220 #define SHIFT_TIL_THIS 0x0600
221 #define RECUR_FROM_THIS 0x1000
224 OC_DELETE = 0x0100, OC_EXEC = 0x0200, OC_NEWSOURCE = 0x0300,
225 OC_PRINT = 0x0400, OC_PRINTF = 0x0500, OC_WALKINIT = 0x0600,
227 OC_BR = 0x0700, OC_BREAK = 0x0800, OC_CONTINUE = 0x0900,
228 OC_EXIT = 0x0a00, OC_NEXT = 0x0b00, OC_NEXTFILE = 0x0c00,
229 OC_TEST = 0x0d00, OC_WALKNEXT = 0x0e00,
231 OC_BINARY = 0x1000, OC_BUILTIN = 0x1100, OC_COLON = 0x1200,
232 OC_COMMA = 0x1300, OC_COMPARE = 0x1400, OC_CONCAT = 0x1500,
233 OC_FBLTIN = 0x1600, OC_FIELD = 0x1700, OC_FNARG = 0x1800,
234 OC_FUNC = 0x1900, OC_GETLINE = 0x1a00, OC_IN = 0x1b00,
235 OC_LAND = 0x1c00, OC_LOR = 0x1d00, OC_MATCH = 0x1e00,
236 OC_MOVE = 0x1f00, OC_PGETLINE = 0x2000, OC_REGEXP = 0x2100,
237 OC_REPLACE = 0x2200, OC_RETURN = 0x2300, OC_SPRINTF = 0x2400,
238 OC_TERNARY = 0x2500, OC_UNARY = 0x2600, OC_VAR = 0x2700,
241 ST_IF = 0x3000, ST_DO = 0x3100, ST_FOR = 0x3200,
245 /* simple builtins */
247 F_in, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
248 F_ti, F_le, F_sy, F_ff, F_cl
253 B_a2, B_ix, B_ma, B_sp, B_ss, B_ti, B_mt, B_lo, B_up,
255 B_an, B_co, B_ls, B_or, B_rs, B_xo,
258 /* tokens and their corresponding info values */
260 #define NTC "\377" /* switch to next token class (tc<<1) */
263 #define OC_B OC_BUILTIN
265 static const char tokenlist[] ALIGN1 =
268 "\1/" NTC /* REGEXP */
269 "\2>>" "\1>" "\1|" NTC /* OUTRDR */
270 "\2++" "\2--" NTC /* UOPPOST */
271 "\2++" "\2--" "\1$" NTC /* UOPPRE1 */
272 "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */
273 "\2*=" "\2/=" "\2%=" "\2^="
274 "\1+" "\1-" "\3**=" "\2**"
275 "\1/" "\1%" "\1^" "\1*"
276 "\2!=" "\2>=" "\2<=" "\1>"
277 "\1<" "\2!~" "\1~" "\2&&"
278 "\2||" "\1?" "\1:" NTC
282 "\1+" "\1-" "\1!" NTC /* UOPPRE2 */
288 "\2if" "\2do" "\3for" "\5break" /* STATX */
289 "\10continue" "\6delete" "\5print"
290 "\6printf" "\4next" "\10nextfile"
291 "\6return" "\4exit" NTC
295 "\3and" "\5compl" "\6lshift" "\2or"
297 "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */
298 "\3cos" "\3exp" "\3int" "\3log"
299 "\4rand" "\3sin" "\4sqrt" "\5srand"
300 "\6gensub" "\4gsub" "\5index" "\6length"
301 "\5match" "\5split" "\7sprintf" "\3sub"
302 "\6substr" "\7systime" "\10strftime" "\6mktime"
303 "\7tolower" "\7toupper" NTC
305 "\4func" "\10function" NTC
310 static const uint32_t tokeninfo[] = {
314 xS|'a', xS|'w', xS|'|',
315 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
316 OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M',
318 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74),
319 OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
320 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/',
321 OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
322 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-',
323 OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
324 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%',
325 OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
326 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3,
327 OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
328 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!',
329 OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
330 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?',
331 OC_COLON|xx|P(67)|':',
334 OC_PGETLINE|SV|P(37),
335 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-',
336 OC_UNARY|xV|P(19)|'!',
342 ST_IF, ST_DO, ST_FOR, OC_BREAK,
343 OC_CONTINUE, OC_DELETE|Vx, OC_PRINT,
344 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
345 OC_RETURN|Vx, OC_EXIT|Nx,
349 OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
350 OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
351 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
352 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
353 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
354 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
355 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
356 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b),
357 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
364 /* internal variable names and their initial values */
365 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
367 CONVFMT, OFMT, FS, OFS,
368 ORS, RS, RT, FILENAME,
369 SUBSEP, F0, ARGIND, ARGC,
370 ARGV, ERRNO, FNR, NR,
371 NF, IGNORECASE, ENVIRON, NUM_INTERNAL_VARS
374 static const char vNames[] ALIGN1 =
375 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
376 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
377 "SUBSEP\0" "$\0*" "ARGIND\0" "ARGC\0"
378 "ARGV\0" "ERRNO\0" "FNR\0" "NR\0"
379 "NF\0*" "IGNORECASE\0*" "ENVIRON\0" "\0";
381 static const char vValues[] ALIGN1 =
382 "%.6g\0" "%.6g\0" " \0" " \0"
383 "\n\0" "\n\0" "\0" "\0"
384 "\034\0" "\0" "\377";
386 /* hash size may grow to these values */
387 #define FIRST_PRIME 61
388 static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
391 /* Globals. Split in two parts so that first one is addressed
392 * with (mostly short) negative offsets.
393 * NB: it's unsafe to put members of type "double"
394 * into globals2 (gcc may fail to align them).
398 chain beginseq, mainseq, endseq;
400 node *break_ptr, *continue_ptr;
402 xhash *vhash, *ahash, *fdhash, *fnhash;
403 const char *g_progname;
406 int maxfields; /* used in fsrealloc() only */
415 smallint is_f0_split;
418 uint32_t t_info; /* often used */
424 var *intvar[NUM_INTERNAL_VARS]; /* often used */
426 /* former statics from various functions */
427 char *split_f0__fstrings;
429 uint32_t next_token__save_tclass;
430 uint32_t next_token__save_info;
431 uint32_t next_token__ltclass;
432 smallint next_token__concat_inserted;
434 smallint next_input_file__files_happen;
435 rstream next_input_file__rsm;
437 var *evaluate__fnargs;
438 unsigned evaluate__seed;
439 regex_t evaluate__sreg;
443 tsplitter exec_builtin__tspl;
445 /* biggest and least used members go last */
446 tsplitter fsplitter, rsplitter;
448 #define G1 (ptr_to_globals[-1])
449 #define G (*(struct globals2 *)ptr_to_globals)
450 /* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
451 /*char G1size[sizeof(G1)]; - 0x74 */
452 /*char Gsize[sizeof(G)]; - 0x1c4 */
453 /* Trying to keep most of members accessible with short offsets: */
454 /*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
455 #define t_double (G1.t_double )
456 #define beginseq (G1.beginseq )
457 #define mainseq (G1.mainseq )
458 #define endseq (G1.endseq )
459 #define seq (G1.seq )
460 #define break_ptr (G1.break_ptr )
461 #define continue_ptr (G1.continue_ptr)
463 #define vhash (G1.vhash )
464 #define ahash (G1.ahash )
465 #define fdhash (G1.fdhash )
466 #define fnhash (G1.fnhash )
467 #define g_progname (G1.g_progname )
468 #define g_lineno (G1.g_lineno )
469 #define nfields (G1.nfields )
470 #define maxfields (G1.maxfields )
471 #define Fields (G1.Fields )
472 #define g_cb (G1.g_cb )
473 #define g_pos (G1.g_pos )
474 #define g_buf (G1.g_buf )
475 #define icase (G1.icase )
476 #define exiting (G1.exiting )
477 #define nextrec (G1.nextrec )
478 #define nextfile (G1.nextfile )
479 #define is_f0_split (G1.is_f0_split )
480 #define t_info (G.t_info )
481 #define t_tclass (G.t_tclass )
482 #define t_string (G.t_string )
483 #define t_lineno (G.t_lineno )
484 #define t_rollback (G.t_rollback )
485 #define intvar (G.intvar )
486 #define fsplitter (G.fsplitter )
487 #define rsplitter (G.rsplitter )
488 #define INIT_G() do { \
489 SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
490 G.next_token__ltclass = TC_OPTERM; \
491 G.evaluate__seed = 1; \
495 /* function prototypes */
496 static void handle_special(var *);
497 static node *parse_expr(uint32_t);
498 static void chain_group(void);
499 static var *evaluate(node *, var *);
500 static rstream *next_input_file(void);
501 static int fmt_num(char *, int, const char *, double, int);
502 static int awk_exit(int) NORETURN;
504 /* ---- error handling ---- */
506 static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
507 static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
508 static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
509 static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
510 static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
511 static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin";
512 static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
513 static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
514 static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
515 #if !ENABLE_FEATURE_AWK_LIBM
516 static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
519 static void zero_out_var(var *vp)
521 memset(vp, 0, sizeof(*vp));
524 static void syntax_error(const char *message) NORETURN;
525 static void syntax_error(const char *message)
527 bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
530 /* ---- hash stuff ---- */
532 static unsigned hashidx(const char *name)
537 idx = *name++ + (idx << 6) - idx;
541 /* create new hash */
542 static xhash *hash_init(void)
546 newhash = xzalloc(sizeof(*newhash));
547 newhash->csize = FIRST_PRIME;
548 newhash->items = xzalloc(FIRST_PRIME * sizeof(newhash->items[0]));
553 /* find item in hash, return ptr to data, NULL if not found */
554 static void *hash_search(xhash *hash, const char *name)
558 hi = hash->items[hashidx(name) % hash->csize];
560 if (strcmp(hi->name, name) == 0)
567 /* grow hash if it becomes too big */
568 static void hash_rebuild(xhash *hash)
570 unsigned newsize, i, idx;
571 hash_item **newitems, *hi, *thi;
573 if (hash->nprime == ARRAY_SIZE(PRIMES))
576 newsize = PRIMES[hash->nprime++];
577 newitems = xzalloc(newsize * sizeof(newitems[0]));
579 for (i = 0; i < hash->csize; i++) {
584 idx = hashidx(thi->name) % newsize;
585 thi->next = newitems[idx];
591 hash->csize = newsize;
592 hash->items = newitems;
595 /* find item in hash, add it if necessary. Return ptr to data */
596 static void *hash_find(xhash *hash, const char *name)
602 hi = hash_search(hash, name);
604 if (++hash->nel / hash->csize > 10)
607 l = strlen(name) + 1;
608 hi = xzalloc(sizeof(*hi) + l);
609 strcpy(hi->name, name);
611 idx = hashidx(name) % hash->csize;
612 hi->next = hash->items[idx];
613 hash->items[idx] = hi;
619 #define findvar(hash, name) ((var*) hash_find((hash), (name)))
620 #define newvar(name) ((var*) hash_find(vhash, (name)))
621 #define newfile(name) ((rstream*)hash_find(fdhash, (name)))
622 #define newfunc(name) ((func*) hash_find(fnhash, (name)))
624 static void hash_remove(xhash *hash, const char *name)
626 hash_item *hi, **phi;
628 phi = &(hash->items[hashidx(name) % hash->csize]);
631 if (strcmp(hi->name, name) == 0) {
632 hash->glen -= (strlen(name) + 1);
642 /* ------ some useful functions ------ */
644 static void skip_spaces(char **s)
649 if (*p == '\\' && p[1] == '\n') {
652 } else if (*p != ' ' && *p != '\t') {
660 static char *nextword(char **s)
668 static char nextchar(char **s)
675 c = bb_process_escape_sequence((const char**)s);
676 if (c == '\\' && *s == pps)
681 static ALWAYS_INLINE int isalnum_(int c)
683 return (isalnum(c) || c == '_');
686 static double my_strtod(char **pp)
690 && ((((*pp)[1] | 0x20) == 'x') || isdigit((*pp)[1]))
692 return strtoull(*pp, pp, 0);
695 return strtod(*pp, pp);
698 /* -------- working with variables (set/get/copy/etc) -------- */
700 static xhash *iamarray(var *v)
704 while (a->type & VF_CHILD)
707 if (!(a->type & VF_ARRAY)) {
709 a->x.array = hash_init();
714 static void clear_array(xhash *array)
719 for (i = 0; i < array->csize; i++) {
720 hi = array->items[i];
724 free(thi->data.v.string);
727 array->items[i] = NULL;
729 array->glen = array->nel = 0;
732 /* clear a variable */
733 static var *clrvar(var *v)
735 if (!(v->type & VF_FSTR))
738 v->type &= VF_DONTTOUCH;
744 /* assign string value to variable */
745 static var *setvar_p(var *v, char *value)
753 /* same as setvar_p but make a copy of string */
754 static var *setvar_s(var *v, const char *value)
756 return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
759 /* same as setvar_s but sets USER flag */
760 static var *setvar_u(var *v, const char *value)
762 v = setvar_s(v, value);
767 /* set array element to user string */
768 static void setari_u(var *a, int idx, const char *s)
772 v = findvar(iamarray(a), itoa(idx));
776 /* assign numeric value to variable */
777 static var *setvar_i(var *v, double value)
780 v->type |= VF_NUMBER;
786 static const char *getvar_s(var *v)
788 /* if v is numeric and has no cached string, convert it to string */
789 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
790 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
791 v->string = xstrdup(g_buf);
792 v->type |= VF_CACHED;
794 return (v->string == NULL) ? "" : v->string;
797 static double getvar_i(var *v)
801 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
805 v->number = my_strtod(&s);
806 if (v->type & VF_USER) {
814 v->type |= VF_CACHED;
819 /* Used for operands of bitwise ops */
820 static unsigned long getvar_i_int(var *v)
822 double d = getvar_i(v);
824 /* Casting doubles to longs is undefined for values outside
825 * of target type range. Try to widen it as much as possible */
827 return (unsigned long)d;
828 /* Why? Think about d == -4294967295.0 (assuming 32bit longs) */
829 return - (long) (unsigned long) (-d);
832 static var *copyvar(var *dest, const var *src)
836 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
837 dest->number = src->number;
839 dest->string = xstrdup(src->string);
841 handle_special(dest);
845 static var *incvar(var *v)
847 return setvar_i(v, getvar_i(v) + 1.0);
850 /* return true if v is number or numeric string */
851 static int is_numeric(var *v)
854 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
857 /* return 1 when value of v corresponds to true, 0 otherwise */
858 static int istrue(var *v)
861 return (v->number != 0);
862 return (v->string && v->string[0]);
865 /* temporary variables allocator. Last allocated should be first freed */
866 static var *nvalloc(int n)
874 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size)
880 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
881 g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
883 g_cb->pos = g_cb->nv;
885 /*g_cb->next = NULL; - xzalloc did it */
893 while (v < g_cb->pos) {
902 static void nvfree(var *v)
906 if (v < g_cb->nv || v >= g_cb->pos)
907 syntax_error(EMSG_INTERNAL_ERROR);
909 for (p = v; p < g_cb->pos; p++) {
910 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
911 clear_array(iamarray(p));
912 free(p->x.array->items);
915 if (p->type & VF_WALK) {
916 //bb_error_msg("free(walker@%p:%p) #1", &p->x.walker, p->x.walker);
924 while (g_cb->prev && g_cb->pos == g_cb->nv) {
929 /* ------- awk program text parsing ------- */
931 /* Parse next token pointed by global pos, place results into global ttt.
932 * If token isn't expected, give away. Return token class
934 static uint32_t next_token(uint32_t expected)
936 #define concat_inserted (G.next_token__concat_inserted)
937 #define save_tclass (G.next_token__save_tclass)
938 #define save_info (G.next_token__save_info)
939 /* Initialized to TC_OPTERM: */
940 #define ltclass (G.next_token__ltclass)
951 } else if (concat_inserted) {
952 concat_inserted = FALSE;
953 t_tclass = save_tclass;
962 while (*p != '\n' && *p != '\0')
971 } else if (*p == '\"') {
975 if (*p == '\0' || *p == '\n')
976 syntax_error(EMSG_UNEXP_EOS);
977 *(s++) = nextchar(&p);
983 } else if ((expected & TC_REGEXP) && *p == '/') {
987 if (*p == '\0' || *p == '\n')
988 syntax_error(EMSG_UNEXP_EOS);
992 *(s-1) = bb_process_escape_sequence((const char **)&p);
1003 } else if (*p == '.' || isdigit(*p)) {
1005 t_double = my_strtod(&p);
1007 syntax_error(EMSG_UNEXP_TOKEN);
1011 /* search for something known */
1021 /* if token class is expected, token
1022 * matches and it's not a longer word,
1023 * then this is what we are looking for
1025 if ((tc & (expected | TC_WORD | TC_NEWLINE))
1026 && *tl == *p && strncmp(p, tl, l) == 0
1027 && !((tc & TC_WORD) && isalnum_(p[l]))
1038 /* it's a name (var/array/function),
1039 * otherwise it's something wrong
1042 syntax_error(EMSG_UNEXP_TOKEN);
1045 while (isalnum_(*(++p))) {
1050 /* also consume whitespace between functionname and bracket */
1051 if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY))
1065 /* skipping newlines in some cases */
1066 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1069 /* insert concatenation operator when needed */
1070 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1071 concat_inserted = TRUE;
1075 t_info = OC_CONCAT | SS | P(35);
1082 /* Are we ready for this? */
1083 if (!(ltclass & expected))
1084 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1085 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1088 #undef concat_inserted
1094 static void rollback_token(void)
1099 static node *new_node(uint32_t info)
1103 n = xzalloc(sizeof(node));
1105 n->lineno = g_lineno;
1109 static node *mk_re_node(const char *s, node *n, regex_t *re)
1111 n->info = OC_REGEXP;
1114 xregcomp(re, s, REG_EXTENDED);
1115 xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1120 static node *condition(void)
1122 next_token(TC_SEQSTART);
1123 return parse_expr(TC_SEQTERM);
1126 /* parse expression terminated by given argument, return ptr
1127 * to built subtree. Terminator is eaten by parse_expr */
1128 static node *parse_expr(uint32_t iexp)
1137 sn.r.n = glptr = NULL;
1138 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1140 while (!((tc = next_token(xtc)) & iexp)) {
1141 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1142 /* input redirection (<) attached to glptr node */
1143 cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1145 xtc = TC_OPERAND | TC_UOPPRE;
1148 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1149 /* for binary and postfix-unary operators, jump back over
1150 * previous operators with higher priority */
1152 while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1153 || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON))
1157 if ((t_info & OPCLSMASK) == OC_TERNARY)
1159 cn = vn->a.n->r.n = new_node(t_info);
1161 if (tc & TC_BINOP) {
1163 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1164 if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1166 next_token(TC_GETLINE);
1167 /* give maximum priority to this pipe */
1168 cn->info &= ~PRIMASK;
1169 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1173 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1178 /* for operands and prefix-unary operators, attach them
1181 cn = vn->r.n = new_node(t_info);
1183 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1184 if (tc & (TC_OPERAND | TC_REGEXP)) {
1185 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1186 /* one should be very careful with switch on tclass -
1187 * only simple tclasses should be used! */
1192 v = hash_search(ahash, t_string);
1194 cn->info = OC_FNARG;
1195 cn->l.i = v->x.aidx;
1197 cn->l.v = newvar(t_string);
1199 if (tc & TC_ARRAY) {
1201 cn->r.n = parse_expr(TC_ARRTERM);
1208 v = cn->l.v = xzalloc(sizeof(var));
1210 setvar_i(v, t_double);
1212 setvar_s(v, t_string);
1216 mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1221 cn->r.f = newfunc(t_string);
1222 cn->l.n = condition();
1226 cn = vn->r.n = parse_expr(TC_SEQTERM);
1232 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1236 cn->l.n = condition();
1245 /* add node to chain. Return ptr to alloc'd node */
1246 static node *chain_node(uint32_t info)
1251 seq->first = seq->last = new_node(0);
1253 if (seq->programname != g_progname) {
1254 seq->programname = g_progname;
1255 n = chain_node(OC_NEWSOURCE);
1256 n->l.s = xstrdup(g_progname);
1261 seq->last = n->a.n = new_node(OC_DONE);
1266 static void chain_expr(uint32_t info)
1270 n = chain_node(info);
1271 n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1272 if (t_tclass & TC_GRPTERM)
1276 static node *chain_loop(node *nn)
1278 node *n, *n2, *save_brk, *save_cont;
1280 save_brk = break_ptr;
1281 save_cont = continue_ptr;
1283 n = chain_node(OC_BR | Vx);
1284 continue_ptr = new_node(OC_EXEC);
1285 break_ptr = new_node(OC_EXEC);
1287 n2 = chain_node(OC_EXEC | Vx);
1290 continue_ptr->a.n = n2;
1291 break_ptr->a.n = n->r.n = seq->last;
1293 continue_ptr = save_cont;
1294 break_ptr = save_brk;
1299 /* parse group and attach it to chain */
1300 static void chain_group(void)
1306 c = next_token(TC_GRPSEQ);
1307 } while (c & TC_NEWLINE);
1309 if (c & TC_GRPSTART) {
1310 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1311 if (t_tclass & TC_NEWLINE) continue;
1315 } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1317 chain_expr(OC_EXEC | Vx);
1318 } else { /* TC_STATEMNT */
1319 switch (t_info & OPCLSMASK) {
1321 n = chain_node(OC_BR | Vx);
1322 n->l.n = condition();
1324 n2 = chain_node(OC_EXEC);
1326 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1328 n2->a.n = seq->last;
1336 n = chain_loop(NULL);
1341 n2 = chain_node(OC_EXEC);
1342 n = chain_loop(NULL);
1344 next_token(TC_WHILE);
1345 n->l.n = condition();
1349 next_token(TC_SEQSTART);
1350 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1351 if (t_tclass & TC_SEQTERM) { /* for-in */
1352 if ((n2->info & OPCLSMASK) != OC_IN)
1353 syntax_error(EMSG_UNEXP_TOKEN);
1354 n = chain_node(OC_WALKINIT | VV);
1357 n = chain_loop(NULL);
1358 n->info = OC_WALKNEXT | Vx;
1360 } else { /* for (;;) */
1361 n = chain_node(OC_EXEC | Vx);
1363 n2 = parse_expr(TC_SEMICOL);
1364 n3 = parse_expr(TC_SEQTERM);
1374 n = chain_node(t_info);
1375 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1376 if (t_tclass & TC_OUTRDR) {
1378 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1380 if (t_tclass & TC_GRPTERM)
1385 n = chain_node(OC_EXEC);
1390 n = chain_node(OC_EXEC);
1391 n->a.n = continue_ptr;
1394 /* delete, next, nextfile, return, exit */
1401 static void parse_program(char *p)
1410 while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1411 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1413 if (tclass & TC_OPTERM)
1417 if (tclass & TC_BEGIN) {
1421 } else if (tclass & TC_END) {
1425 } else if (tclass & TC_FUNCDECL) {
1426 next_token(TC_FUNCTION);
1428 f = newfunc(t_string);
1429 f->body.first = NULL;
1431 while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1432 v = findvar(ahash, t_string);
1433 v->x.aidx = (f->nargs)++;
1435 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1442 } else if (tclass & TC_OPSEQ) {
1444 cn = chain_node(OC_TEST);
1445 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1446 if (t_tclass & TC_GRPSTART) {
1450 chain_node(OC_PRINT);
1452 cn->r.n = mainseq.last;
1454 } else /* if (tclass & TC_GRPSTART) */ {
1462 /* -------- program execution part -------- */
1464 static node *mk_splitter(const char *s, tsplitter *spl)
1472 if ((n->info & OPCLSMASK) == OC_REGEXP) {
1474 regfree(ire); // TODO: nuke ire, use re+1?
1476 if (strlen(s) > 1) {
1477 mk_re_node(s, n, re);
1479 n->info = (uint32_t) *s;
1485 /* use node as a regular expression. Supplied with node ptr and regex_t
1486 * storage space. Return ptr to regex (if result points to preg, it should
1487 * be later regfree'd manually
1489 static regex_t *as_regex(node *op, regex_t *preg)
1495 if ((op->info & OPCLSMASK) == OC_REGEXP) {
1496 return icase ? op->r.ire : op->l.re;
1499 s = getvar_s(evaluate(op, v));
1501 cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
1502 /* Testcase where REG_EXTENDED fails (unpaired '{'):
1503 * echo Hi | awk 'gsub("@(samp|code|file)\{","");'
1504 * gawk 3.1.5 eats this. We revert to ~REG_EXTENDED
1505 * (maybe gsub is not supposed to use REG_EXTENDED?).
1507 if (regcomp(preg, s, cflags)) {
1508 cflags &= ~REG_EXTENDED;
1509 xregcomp(preg, s, cflags);
1515 /* gradually increasing buffer */
1516 static char* qrealloc(char *b, int n, int *size)
1518 if (!b || n >= *size) {
1519 *size = n + (n>>1) + 80;
1520 b = xrealloc(b, *size);
1525 /* resize field storage space */
1526 static void fsrealloc(int size)
1530 if (size >= maxfields) {
1532 maxfields = size + 16;
1533 Fields = xrealloc(Fields, maxfields * sizeof(var));
1534 for (; i < maxfields; i++) {
1535 Fields[i].type = VF_SPECIAL;
1536 Fields[i].string = NULL;
1540 if (size < nfields) {
1541 for (i = size; i < nfields; i++) {
1548 static int awk_split(const char *s, node *spl, char **slist)
1553 regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1555 /* in worst case, each char would be a separate field */
1556 *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1559 c[0] = c[1] = (char)spl->info;
1561 if (*getvar_s(intvar[RS]) == '\0')
1564 if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */
1566 return n; /* "": zero fields */
1567 n++; /* at least one field will be there */
1569 l = strcspn(s, c+2); /* len till next NUL or \n */
1570 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1571 && pmatch[0].rm_so <= l
1573 l = pmatch[0].rm_so;
1574 if (pmatch[0].rm_eo == 0) {
1578 n++; /* we saw yet another delimiter */
1580 pmatch[0].rm_eo = l;
1585 /* make sure we remove *all* of the separator chars */
1588 } while (++l < pmatch[0].rm_eo);
1590 s += pmatch[0].rm_eo;
1594 if (c[0] == '\0') { /* null split */
1602 if (c[0] != ' ') { /* single-character split */
1604 c[0] = toupper(c[0]);
1605 c[1] = tolower(c[1]);
1608 while ((s1 = strpbrk(s1, c))) {
1616 s = skip_whitespace(s);
1619 while (*s && !isspace(*s))
1626 static void split_f0(void)
1628 /* static char *fstrings; */
1629 #define fstrings (G.split_f0__fstrings)
1640 n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1643 for (i = 0; i < n; i++) {
1644 Fields[i].string = nextword(&s);
1645 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1648 /* set NF manually to avoid side effects */
1650 intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1651 intvar[NF]->number = nfields;
1655 /* perform additional actions when some internal variables changed */
1656 static void handle_special(var *v)
1660 const char *sep, *s;
1661 int sl, l, len, i, bsize;
1663 if (!(v->type & VF_SPECIAL))
1666 if (v == intvar[NF]) {
1667 n = (int)getvar_i(v);
1670 /* recalculate $0 */
1671 sep = getvar_s(intvar[OFS]);
1675 for (i = 0; i < n; i++) {
1676 s = getvar_s(&Fields[i]);
1679 memcpy(b+len, sep, sl);
1682 b = qrealloc(b, len+l+sl, &bsize);
1683 memcpy(b+len, s, l);
1688 setvar_p(intvar[F0], b);
1691 } else if (v == intvar[F0]) {
1692 is_f0_split = FALSE;
1694 } else if (v == intvar[FS]) {
1695 mk_splitter(getvar_s(v), &fsplitter);
1697 } else if (v == intvar[RS]) {
1698 mk_splitter(getvar_s(v), &rsplitter);
1700 } else if (v == intvar[IGNORECASE]) {
1704 n = getvar_i(intvar[NF]);
1705 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1706 /* right here v is invalid. Just to note... */
1710 /* step through func/builtin/etc arguments */
1711 static node *nextarg(node **pn)
1716 if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1725 static void hashwalk_init(var *v, xhash *array)
1730 char **prev_walker = (v->type & VF_WALK) ? v->x.walker : NULL;
1734 /* walker structure is: "[ptr2end][ptr2start][prev]<word1>NUL<word2>NUL" */
1735 w = v->x.walker = xzalloc(2 + 3*sizeof(char *) + array->glen);
1736 //bb_error_msg("walker@%p=%p", &v->x.walker, v->x.walker);
1737 w[0] = w[1] = (char *)(w + 3);
1738 w[2] = (char *)prev_walker;
1739 for (i = 0; i < array->csize; i++) {
1740 hi = array->items[i];
1742 strcpy(w[0], hi->name);
1749 static int hashwalk_next(var *v)
1755 char **prev_walker = (char**)w[2];
1757 //bb_error_msg("free(walker@%p:%p) #3, restoring to %p", &v->x.walker, v->x.walker, prev_walker);
1759 v->x.walker = prev_walker;
1763 setvar_s(v, nextword(&w[1]));
1767 /* evaluate node, return 1 when result is true, 0 otherwise */
1768 static int ptest(node *pattern)
1770 /* ptest__v is "static": to save stack space? */
1771 return istrue(evaluate(pattern, &G.ptest__v));
1774 /* read next record from stream rsm into a variable v */
1775 static int awk_getline(rstream *rsm, var *v)
1778 regmatch_t pmatch[2];
1779 int a, p, pp=0, size;
1780 int fd, so, eo, r, rp;
1783 /* we're using our own buffer since we need access to accumulating
1786 fd = fileno(rsm->F);
1791 c = (char) rsplitter.n.info;
1795 m = qrealloc(m, 256, &size);
1801 if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1802 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1803 b, 1, pmatch, 0) == 0) {
1804 so = pmatch[0].rm_so;
1805 eo = pmatch[0].rm_eo;
1809 } else if (c != '\0') {
1810 s = strchr(b+pp, c);
1811 if (!s) s = memchr(b+pp, '\0', p - pp);
1818 while (b[rp] == '\n')
1820 s = strstr(b+rp, "\n\n");
1823 while (b[eo] == '\n') eo++;
1831 memmove(m, (const void *)(m+a), p+1);
1836 m = qrealloc(m, a+p+128, &size);
1839 p += safe_read(fd, b+p, size-p-1);
1843 setvar_i(intvar[ERRNO], errno);
1852 c = b[so]; b[so] = '\0';
1856 c = b[eo]; b[eo] = '\0';
1857 setvar_s(intvar[RT], b+so);
1869 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1873 const char *s = format;
1875 if (int_as_int && n == (int)n) {
1876 r = snprintf(b, size, "%d", (int)n);
1878 do { c = *s; } while (c && *++s);
1879 if (strchr("diouxX", c)) {
1880 r = snprintf(b, size, format, (int)n);
1881 } else if (strchr("eEfgG", c)) {
1882 r = snprintf(b, size, format, n);
1884 syntax_error(EMSG_INV_FMT);
1890 /* formatted output into an allocated buffer, return ptr to buffer */
1891 static char *awk_printf(node *n)
1896 int i, j, incr, bsize;
1901 fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1906 while (*f && (*f != '%' || *(++f) == '%'))
1908 while (*f && !isalpha(*f)) {
1910 syntax_error("%*x formats are not supported");
1914 incr = (f - s) + MAXVARFMT;
1915 b = qrealloc(b, incr + i, &bsize);
1920 arg = evaluate(nextarg(&n), v);
1923 if (c == 'c' || !c) {
1924 i += sprintf(b+i, s, is_numeric(arg) ?
1925 (char)getvar_i(arg) : *getvar_s(arg));
1926 } else if (c == 's') {
1928 b = qrealloc(b, incr+i+strlen(s1), &bsize);
1929 i += sprintf(b+i, s, s1);
1931 i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1935 /* if there was an error while sprintf, return value is negative */
1939 b = xrealloc(b, i + 1);
1946 /* common substitution routine
1947 * replace (nm) substring of (src) that match (n) with (repl), store
1948 * result into (dest), return number of substitutions. If nm=0, replace
1949 * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1950 * subexpression matching (\1-\9)
1952 static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int ex)
1957 int c, i, j, di, rl, so, eo, nbs, n, dssize;
1958 regmatch_t pmatch[10];
1961 re = as_regex(rn, &sreg);
1962 if (!src) src = intvar[F0];
1963 if (!dest) dest = intvar[F0];
1968 while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0 : REG_NOTBOL) == 0) {
1969 so = pmatch[0].rm_so;
1970 eo = pmatch[0].rm_eo;
1972 ds = qrealloc(ds, di + eo + rl, &dssize);
1973 memcpy(ds + di, sp, eo);
1979 for (s = repl; *s; s++) {
1985 if (c == '&' || (ex && c >= '0' && c <= '9')) {
1986 di -= ((nbs + 3) >> 1);
1995 n = pmatch[j].rm_eo - pmatch[j].rm_so;
1996 ds = qrealloc(ds, di + rl + n, &dssize);
1997 memcpy(ds + di, sp + pmatch[j].rm_so, n);
2015 ds = qrealloc(ds, di + strlen(sp), &dssize);
2016 strcpy(ds + di, sp);
2023 static NOINLINE int do_mktime(const char *ds)
2028 /*memset(&then, 0, sizeof(then)); - not needed */
2029 then.tm_isdst = -1; /* default is unknown */
2031 /* manpage of mktime says these fields are ints,
2032 * so we can sscanf stuff directly into them */
2033 count = sscanf(ds, "%u %u %u %u %u %u %d",
2034 &then.tm_year, &then.tm_mon, &then.tm_mday,
2035 &then.tm_hour, &then.tm_min, &then.tm_sec,
2039 || (unsigned)then.tm_mon < 1
2040 || (unsigned)then.tm_year < 1900
2046 then.tm_year -= 1900;
2048 return mktime(&then);
2051 static NOINLINE var *exec_builtin(node *op, var *res)
2053 #define tspl (G.exec_builtin__tspl)
2059 regmatch_t pmatch[2];
2069 isr = info = op->info;
2072 av[2] = av[3] = NULL;
2073 for (i = 0; i < 4 && op; i++) {
2074 an[i] = nextarg(&op);
2075 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
2076 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
2081 if ((uint32_t)nargs < (info >> 30))
2082 syntax_error(EMSG_TOO_FEW_ARGS);
2088 #if ENABLE_FEATURE_AWK_LIBM
2089 setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1])));
2091 syntax_error(EMSG_NO_MATH);
2097 spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2098 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2103 n = awk_split(as[0], spl, &s);
2105 clear_array(iamarray(av[1]));
2106 for (i = 1; i <= n; i++)
2107 setari_u(av[1], i, nextword(&s1));
2114 i = getvar_i(av[1]) - 1;
2117 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2119 s = xstrndup(as[0]+i, n);
2123 /* Bitwise ops must assume that operands are unsigned. GNU Awk 3.1.5:
2124 * awk '{ print or(-1,1) }' gives "4.29497e+09", not "-2.xxxe+09" */
2126 setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1]));
2130 setvar_i(res, ~getvar_i_int(av[0]));
2134 setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1]));
2138 setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1]));
2142 setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1]));
2146 setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1]));
2151 s1 = s = xstrdup(as[0]);
2153 //*s1 = (info == B_up) ? toupper(*s1) : tolower(*s1);
2154 if ((unsigned char)((*s1 | 0x20) - 'a') <= ('z' - 'a'))
2155 *s1 = (info == B_up) ? (*s1 & 0xdf) : (*s1 | 0x20);
2164 l = strlen(as[0]) - ll;
2165 if (ll > 0 && l >= 0) {
2167 s = strstr(as[0], as[1]);
2168 if (s) n = (s - as[0]) + 1;
2170 /* this piece of code is terribly slow and
2171 * really should be rewritten
2173 for (i=0; i<=l; i++) {
2174 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2186 tt = getvar_i(av[1]);
2189 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2190 i = strftime(g_buf, MAXVARFMT,
2191 ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2194 setvar_s(res, g_buf);
2198 setvar_i(res, do_mktime(as[0]));
2202 re = as_regex(an[1], &sreg);
2203 n = regexec(re, as[0], 1, pmatch, 0);
2208 pmatch[0].rm_so = 0;
2209 pmatch[0].rm_eo = -1;
2211 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2212 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2213 setvar_i(res, pmatch[0].rm_so);
2214 if (re == &sreg) regfree(re);
2218 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2222 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2226 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2236 * Evaluate node - the heart of the program. Supplied with subtree
2237 * and place where to store result. returns ptr to result.
2239 #define XC(n) ((n) >> 8)
2241 static var *evaluate(node *op, var *res)
2243 /* This procedure is recursive so we should count every byte */
2244 #define fnargs (G.evaluate__fnargs)
2245 /* seed is initialized to 1 */
2246 #define seed (G.evaluate__seed)
2247 #define sreg (G.evaluate__sreg)
2269 return setvar_s(res, NULL);
2275 opn = (opinfo & OPNMASK);
2276 g_lineno = op->lineno;
2278 /* execute inevitable things */
2280 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2281 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2282 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2283 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2284 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2286 switch (XC(opinfo & OPCLSMASK)) {
2288 /* -- iterative node type -- */
2292 if ((op1->info & OPCLSMASK) == OC_COMMA) {
2293 /* it's range pattern */
2294 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2295 op->info |= OF_CHECKED;
2296 if (ptest(op1->r.n))
2297 op->info &= ~OF_CHECKED;
2304 op = (ptest(op1)) ? op->a.n : op->r.n;
2308 /* just evaluate an expression, also used as unconditional jump */
2312 /* branch, used in if-else and various loops */
2314 op = istrue(L.v) ? op->a.n : op->r.n;
2317 /* initialize for-in loop */
2318 case XC( OC_WALKINIT ):
2319 hashwalk_init(L.v, iamarray(R.v));
2322 /* get next array item */
2323 case XC( OC_WALKNEXT ):
2324 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2327 case XC( OC_PRINT ):
2328 case XC( OC_PRINTF ):
2331 X.rsm = newfile(R.s);
2334 X.rsm->F = popen(R.s, "w");
2335 if (X.rsm->F == NULL)
2336 bb_perror_msg_and_die("popen");
2339 X.rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2345 if ((opinfo & OPCLSMASK) == OC_PRINT) {
2347 fputs(getvar_s(intvar[F0]), X.F);
2350 L.v = evaluate(nextarg(&op1), v1);
2351 if (L.v->type & VF_NUMBER) {
2352 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2353 getvar_i(L.v), TRUE);
2356 fputs(getvar_s(L.v), X.F);
2359 if (op1) fputs(getvar_s(intvar[OFS]), X.F);
2362 fputs(getvar_s(intvar[ORS]), X.F);
2364 } else { /* OC_PRINTF */
2365 L.s = awk_printf(op1);
2372 case XC( OC_DELETE ):
2373 X.info = op1->info & OPCLSMASK;
2374 if (X.info == OC_VAR) {
2376 } else if (X.info == OC_FNARG) {
2377 R.v = &fnargs[op1->l.i];
2379 syntax_error(EMSG_NOT_ARRAY);
2384 L.s = getvar_s(evaluate(op1->r.n, v1));
2385 hash_remove(iamarray(R.v), L.s);
2387 clear_array(iamarray(R.v));
2391 case XC( OC_NEWSOURCE ):
2392 g_progname = op->l.s;
2395 case XC( OC_RETURN ):
2399 case XC( OC_NEXTFILE ):
2410 /* -- recursive node type -- */
2414 if (L.v == intvar[NF])
2418 case XC( OC_FNARG ):
2419 L.v = &fnargs[op->l.i];
2421 res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2425 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2428 case XC( OC_REGEXP ):
2430 L.s = getvar_s(intvar[F0]);
2433 case XC( OC_MATCH ):
2436 X.re = as_regex(op1, &sreg);
2437 R.i = regexec(X.re, L.s, 0, NULL, 0);
2438 if (X.re == &sreg) regfree(X.re);
2439 setvar_i(res, (R.i == 0) ^ (opn == '!'));
2443 /* if source is a temporary string, jusk relink it to dest */
2444 //Disabled: if R.v is numeric but happens to have cached R.v->string,
2445 //then L.v ends up being a string, which is wrong
2446 // if (R.v == v1+1 && R.v->string) {
2447 // res = setvar_p(L.v, R.v->string);
2448 // R.v->string = NULL;
2450 res = copyvar(L.v, R.v);
2454 case XC( OC_TERNARY ):
2455 if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2456 syntax_error(EMSG_POSSIBLE_ERROR);
2457 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2461 if (!op->r.f->body.first)
2462 syntax_error(EMSG_UNDEF_FUNC);
2464 X.v = R.v = nvalloc(op->r.f->nargs + 1);
2466 L.v = evaluate(nextarg(&op1), v1);
2468 R.v->type |= VF_CHILD;
2469 R.v->x.parent = L.v;
2470 if (++R.v - X.v >= op->r.f->nargs)
2478 res = evaluate(op->r.f->body.first, res);
2485 case XC( OC_GETLINE ):
2486 case XC( OC_PGETLINE ):
2488 X.rsm = newfile(L.s);
2490 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2491 X.rsm->F = popen(L.s, "r");
2492 X.rsm->is_pipe = TRUE;
2494 X.rsm->F = fopen_for_read(L.s); /* not xfopen! */
2498 if (!iF) iF = next_input_file();
2503 setvar_i(intvar[ERRNO], errno);
2511 L.i = awk_getline(X.rsm, R.v);
2514 incvar(intvar[FNR]);
2521 /* simple builtins */
2522 case XC( OC_FBLTIN ):
2530 R.d = (double)rand() / (double)RAND_MAX;
2532 #if ENABLE_FEATURE_AWK_LIBM
2558 syntax_error(EMSG_NO_MATH);
2563 seed = op1 ? (unsigned)L.d : (unsigned)time(NULL);
2573 L.s = getvar_s(intvar[F0]);
2579 R.d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2580 ? (system(L.s) >> 8) : 0;
2588 X.rsm = newfile(L.s);
2597 X.rsm = (rstream *)hash_search(fdhash, L.s);
2599 R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
2600 free(X.rsm->buffer);
2601 hash_remove(fdhash, L.s);
2604 setvar_i(intvar[ERRNO], errno);
2611 case XC( OC_BUILTIN ):
2612 res = exec_builtin(op, res);
2615 case XC( OC_SPRINTF ):
2616 setvar_p(res, awk_printf(op1));
2619 case XC( OC_UNARY ):
2621 L.d = R.d = getvar_i(R.v);
2647 case XC( OC_FIELD ):
2648 R.i = (int)getvar_i(R.v);
2655 res = &Fields[R.i - 1];
2659 /* concatenation (" ") and index joining (",") */
2660 case XC( OC_CONCAT ):
2661 case XC( OC_COMMA ):
2662 opn = strlen(L.s) + strlen(R.s) + 2;
2665 if ((opinfo & OPCLSMASK) == OC_COMMA) {
2666 L.s = getvar_s(intvar[SUBSEP]);
2667 X.s = xrealloc(X.s, opn + strlen(L.s));
2675 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2679 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2682 case XC( OC_BINARY ):
2683 case XC( OC_REPLACE ):
2684 R.d = getvar_i(R.v);
2697 syntax_error(EMSG_DIV_BY_ZERO);
2701 #if ENABLE_FEATURE_AWK_LIBM
2702 L.d = pow(L.d, R.d);
2704 syntax_error(EMSG_NO_MATH);
2709 syntax_error(EMSG_DIV_BY_ZERO);
2710 L.d -= (int)(L.d / R.d) * R.d;
2713 res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2716 case XC( OC_COMPARE ):
2717 if (is_numeric(L.v) && is_numeric(R.v)) {
2718 L.d = getvar_i(L.v) - getvar_i(R.v);
2720 L.s = getvar_s(L.v);
2721 R.s = getvar_s(R.v);
2722 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2724 switch (opn & 0xfe) {
2735 setvar_i(res, (opn & 1 ? R.i : !R.i) ? 1 : 0);
2739 syntax_error(EMSG_POSSIBLE_ERROR);
2741 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2743 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2756 /* -------- main & co. -------- */
2758 static int awk_exit(int r)
2769 evaluate(endseq.first, &tv);
2772 /* waiting for children */
2773 for (i = 0; i < fdhash->csize; i++) {
2774 hi = fdhash->items[i];
2776 if (hi->data.rs.F && hi->data.rs.is_pipe)
2777 pclose(hi->data.rs.F);
2785 /* if expr looks like "var=value", perform assignment and return 1,
2786 * otherwise return 0 */
2787 static int is_assignment(const char *expr)
2789 char *exprc, *s, *s0, *s1;
2791 exprc = xstrdup(expr);
2792 if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2800 *(s1++) = nextchar(&s);
2803 setvar_u(newvar(exprc), s0);
2808 /* switch to next input file */
2809 static rstream *next_input_file(void)
2811 #define rsm (G.next_input_file__rsm)
2812 #define files_happen (G.next_input_file__files_happen)
2815 const char *fname, *ind;
2820 rsm.pos = rsm.adv = 0;
2823 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
2829 ind = getvar_s(incvar(intvar[ARGIND]));
2830 fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
2831 if (fname && *fname && !is_assignment(fname))
2832 F = xfopen_stdin(fname);
2836 files_happen = TRUE;
2837 setvar_s(intvar[FILENAME], fname);
2844 int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
2845 int awk_main(int argc, char **argv)
2848 char *opt_F, *opt_W;
2849 llist_t *list_v = NULL;
2850 llist_t *list_f = NULL;
2855 char *vnames = (char *)vNames; /* cheat */
2856 char *vvalues = (char *)vValues;
2860 /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
2861 * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
2862 if (ENABLE_LOCALE_SUPPORT)
2863 setlocale(LC_NUMERIC, "C");
2867 /* allocate global buffer */
2868 g_buf = xmalloc(MAXVARFMT + 1);
2870 vhash = hash_init();
2871 ahash = hash_init();
2872 fdhash = hash_init();
2873 fnhash = hash_init();
2875 /* initialize variables */
2876 for (i = 0; *vnames; i++) {
2877 intvar[i] = v = newvar(nextword(&vnames));
2878 if (*vvalues != '\377')
2879 setvar_s(v, nextword(&vvalues));
2883 if (*vnames == '*') {
2884 v->type |= VF_SPECIAL;
2889 handle_special(intvar[FS]);
2890 handle_special(intvar[RS]);
2892 newfile("/dev/stdin")->F = stdin;
2893 newfile("/dev/stdout")->F = stdout;
2894 newfile("/dev/stderr")->F = stderr;
2896 /* Huh, people report that sometimes environ is NULL. Oh well. */
2897 if (environ) for (envp = environ; *envp; envp++) {
2898 /* environ is writable, thus we don't strdup it needlessly */
2900 char *s1 = strchr(s, '=');
2903 /* Both findvar and setvar_u take const char*
2904 * as 2nd arg -> environment is not trashed */
2905 setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
2909 opt_complementary = "v::f::"; /* -v and -f can occur multiple times */
2910 opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, &opt_W);
2914 setvar_s(intvar[FS], opt_F); // -F
2915 while (list_v) { /* -v */
2916 if (!is_assignment(llist_pop(&list_v)))
2919 if (list_f) { /* -f */
2924 g_progname = llist_pop(&list_f);
2925 from_file = xfopen_stdin(g_progname);
2926 /* one byte is reserved for some trick in next_token */
2927 for (i = j = 1; j > 0; i += j) {
2928 s = xrealloc(s, i + 4096);
2929 j = fread(s + i, 1, 4094, from_file);
2933 parse_program(s + 1);
2937 } else { // no -f: take program from 1st parameter
2940 g_progname = "cmd. line";
2941 parse_program(*argv++);
2943 if (opt & 0x8) // -W
2944 bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
2946 /* fill in ARGV array */
2947 setvar_i(intvar[ARGC], argc);
2948 setari_u(intvar[ARGV], 0, "awk");
2951 setari_u(intvar[ARGV], ++i, *argv++);
2953 evaluate(beginseq.first, &tv);
2954 if (!mainseq.first && !endseq.first)
2955 awk_exit(EXIT_SUCCESS);
2957 /* input file could already be opened in BEGIN block */
2959 iF = next_input_file();
2961 /* passing through input files */
2964 setvar_i(intvar[FNR], 0);
2966 while ((i = awk_getline(iF, intvar[F0])) > 0) {
2969 incvar(intvar[FNR]);
2970 evaluate(mainseq.first, &tv);
2977 syntax_error(strerror(errno));
2979 iF = next_input_file();
2982 awk_exit(EXIT_SUCCESS);