1 /* vi: set sw=4 ts=4: */
3 * awk implementation for busybox
5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
7 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
14 /* This is a NOEXEC applet. Be very careful! */
17 /* If you comment out one of these below, it will be #defined later
18 * to perform debug printfs to stderr: */
19 #define debug_printf_walker(...) do {} while (0)
20 #define debug_printf_eval(...) do {} while (0)
22 #ifndef debug_printf_walker
23 # define debug_printf_walker(...) (fprintf(stderr, __VA_ARGS__))
25 #ifndef debug_printf_eval
26 # define debug_printf_eval(...) (fprintf(stderr, __VA_ARGS__))
35 #define VF_NUMBER 0x0001 /* 1 = primary type is number */
36 #define VF_ARRAY 0x0002 /* 1 = it's an array */
38 #define VF_CACHED 0x0100 /* 1 = num/str value has cached str/num eq */
39 #define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
40 #define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
41 #define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
42 #define VF_FSTR 0x1000 /* 1 = var::string points to fstring buffer */
43 #define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
44 #define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
46 /* these flags are static, don't change them when value is changed */
47 #define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
49 typedef struct walker_list {
52 struct walker_list *prev;
57 typedef struct var_s {
58 unsigned type; /* flags */
62 int aidx; /* func arg idx (for compilation stage) */
63 struct xhash_s *array; /* array ptr */
64 struct var_s *parent; /* for func args, ptr to actual parameter */
65 walker_list *walker; /* list of array elements (for..in) */
69 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
70 typedef struct chain_s {
73 const char *programname;
77 typedef struct func_s {
83 typedef struct rstream_s {
92 typedef struct hash_item_s {
94 struct var_s v; /* variable/array hash */
95 struct rstream_s rs; /* redirect streams hash */
96 struct func_s f; /* functions hash */
98 struct hash_item_s *next; /* next in chain */
99 char name[1]; /* really it's longer */
102 typedef struct xhash_s {
103 unsigned nel; /* num of elements */
104 unsigned csize; /* current hash size */
105 unsigned nprime; /* next hash size in PRIMES[] */
106 unsigned glen; /* summary length of item names */
107 struct hash_item_s **items;
111 typedef struct node_s {
131 /* Block of temporary variables */
132 typedef struct nvblock_s {
135 struct nvblock_s *prev;
136 struct nvblock_s *next;
140 typedef struct tsplitter_s {
145 /* simple token classes */
146 /* Order and hex values are very important!!! See next_token() */
147 #define TC_SEQSTART 1 /* ( */
148 #define TC_SEQTERM (1 << 1) /* ) */
149 #define TC_REGEXP (1 << 2) /* /.../ */
150 #define TC_OUTRDR (1 << 3) /* | > >> */
151 #define TC_UOPPOST (1 << 4) /* unary postfix operator */
152 #define TC_UOPPRE1 (1 << 5) /* unary prefix operator */
153 #define TC_BINOPX (1 << 6) /* two-opnd operator */
154 #define TC_IN (1 << 7)
155 #define TC_COMMA (1 << 8)
156 #define TC_PIPE (1 << 9) /* input redirection pipe */
157 #define TC_UOPPRE2 (1 << 10) /* unary prefix operator */
158 #define TC_ARRTERM (1 << 11) /* ] */
159 #define TC_GRPSTART (1 << 12) /* { */
160 #define TC_GRPTERM (1 << 13) /* } */
161 #define TC_SEMICOL (1 << 14)
162 #define TC_NEWLINE (1 << 15)
163 #define TC_STATX (1 << 16) /* ctl statement (for, next...) */
164 #define TC_WHILE (1 << 17)
165 #define TC_ELSE (1 << 18)
166 #define TC_BUILTIN (1 << 19)
167 #define TC_GETLINE (1 << 20)
168 #define TC_FUNCDECL (1 << 21) /* `function' `func' */
169 #define TC_BEGIN (1 << 22)
170 #define TC_END (1 << 23)
171 #define TC_EOF (1 << 24)
172 #define TC_VARIABLE (1 << 25)
173 #define TC_ARRAY (1 << 26)
174 #define TC_FUNCTION (1 << 27)
175 #define TC_STRING (1 << 28)
176 #define TC_NUMBER (1 << 29)
178 #define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
180 /* combined token classes */
181 #define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
182 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
183 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
184 | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
186 #define TC_STATEMNT (TC_STATX | TC_WHILE)
187 #define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
189 /* word tokens, cannot mean something else if not expected */
190 #define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
191 | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
193 /* discard newlines after these */
194 #define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
195 | TC_BINOP | TC_OPTERM)
197 /* what can expression begin with */
198 #define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
199 /* what can group begin with */
200 #define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
202 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
203 /* operator is inserted between them */
204 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
205 | TC_STRING | TC_NUMBER | TC_UOPPOST)
206 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
208 #define OF_RES1 0x010000
209 #define OF_RES2 0x020000
210 #define OF_STR1 0x040000
211 #define OF_STR2 0x080000
212 #define OF_NUM1 0x100000
213 #define OF_CHECKED 0x200000
215 /* combined operator flags */
218 #define xS (OF_RES2 | OF_STR2)
220 #define VV (OF_RES1 | OF_RES2)
221 #define Nx (OF_RES1 | OF_NUM1)
222 #define NV (OF_RES1 | OF_NUM1 | OF_RES2)
223 #define Sx (OF_RES1 | OF_STR1)
224 #define SV (OF_RES1 | OF_STR1 | OF_RES2)
225 #define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
227 #define OPCLSMASK 0xFF00
228 #define OPNMASK 0x007F
230 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
231 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
232 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
234 #define P(x) (x << 24)
235 #define PRIMASK 0x7F000000
236 #define PRIMASK2 0x7E000000
238 /* Operation classes */
240 #define SHIFT_TIL_THIS 0x0600
241 #define RECUR_FROM_THIS 0x1000
244 OC_DELETE = 0x0100, OC_EXEC = 0x0200, OC_NEWSOURCE = 0x0300,
245 OC_PRINT = 0x0400, OC_PRINTF = 0x0500, OC_WALKINIT = 0x0600,
247 OC_BR = 0x0700, OC_BREAK = 0x0800, OC_CONTINUE = 0x0900,
248 OC_EXIT = 0x0a00, OC_NEXT = 0x0b00, OC_NEXTFILE = 0x0c00,
249 OC_TEST = 0x0d00, OC_WALKNEXT = 0x0e00,
251 OC_BINARY = 0x1000, OC_BUILTIN = 0x1100, OC_COLON = 0x1200,
252 OC_COMMA = 0x1300, OC_COMPARE = 0x1400, OC_CONCAT = 0x1500,
253 OC_FBLTIN = 0x1600, OC_FIELD = 0x1700, OC_FNARG = 0x1800,
254 OC_FUNC = 0x1900, OC_GETLINE = 0x1a00, OC_IN = 0x1b00,
255 OC_LAND = 0x1c00, OC_LOR = 0x1d00, OC_MATCH = 0x1e00,
256 OC_MOVE = 0x1f00, OC_PGETLINE = 0x2000, OC_REGEXP = 0x2100,
257 OC_REPLACE = 0x2200, OC_RETURN = 0x2300, OC_SPRINTF = 0x2400,
258 OC_TERNARY = 0x2500, OC_UNARY = 0x2600, OC_VAR = 0x2700,
261 ST_IF = 0x3000, ST_DO = 0x3100, ST_FOR = 0x3200,
265 /* simple builtins */
267 F_in, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
268 F_ti, F_le, F_sy, F_ff, F_cl
273 B_a2, B_ix, B_ma, B_sp, B_ss, B_ti, B_mt, B_lo, B_up,
275 B_an, B_co, B_ls, B_or, B_rs, B_xo,
278 /* tokens and their corresponding info values */
280 #define NTC "\377" /* switch to next token class (tc<<1) */
283 #define OC_B OC_BUILTIN
285 static const char tokenlist[] ALIGN1 =
288 "\1/" NTC /* REGEXP */
289 "\2>>" "\1>" "\1|" NTC /* OUTRDR */
290 "\2++" "\2--" NTC /* UOPPOST */
291 "\2++" "\2--" "\1$" NTC /* UOPPRE1 */
292 "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */
293 "\2*=" "\2/=" "\2%=" "\2^="
294 "\1+" "\1-" "\3**=" "\2**"
295 "\1/" "\1%" "\1^" "\1*"
296 "\2!=" "\2>=" "\2<=" "\1>"
297 "\1<" "\2!~" "\1~" "\2&&"
298 "\2||" "\1?" "\1:" NTC
302 "\1+" "\1-" "\1!" NTC /* UOPPRE2 */
308 "\2if" "\2do" "\3for" "\5break" /* STATX */
309 "\10continue" "\6delete" "\5print"
310 "\6printf" "\4next" "\10nextfile"
311 "\6return" "\4exit" NTC
315 "\3and" "\5compl" "\6lshift" "\2or"
317 "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */
318 "\3cos" "\3exp" "\3int" "\3log"
319 "\4rand" "\3sin" "\4sqrt" "\5srand"
320 "\6gensub" "\4gsub" "\5index" "\6length"
321 "\5match" "\5split" "\7sprintf" "\3sub"
322 "\6substr" "\7systime" "\10strftime" "\6mktime"
323 "\7tolower" "\7toupper" NTC
325 "\4func" "\10function" NTC
328 /* compiler adds trailing "\0" */
331 static const uint32_t tokeninfo[] = {
335 xS|'a', xS|'w', xS|'|',
336 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
337 OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M', OC_FIELD|xV|P(5),
338 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74), OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
339 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/', OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
340 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-', OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
341 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%', OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
342 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3, OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
343 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!', OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
344 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?', OC_COLON|xx|P(67)|':',
345 OC_IN|SV|P(49), /* in */
347 OC_PGETLINE|SV|P(37),
348 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-', OC_UNARY|xV|P(19)|'!',
354 ST_IF, ST_DO, ST_FOR, OC_BREAK,
355 OC_CONTINUE, OC_DELETE|Vx, OC_PRINT,
356 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
357 OC_RETURN|Vx, OC_EXIT|Nx,
361 OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
362 OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
363 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
364 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
365 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
366 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
367 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
368 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b),
369 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
376 /* internal variable names and their initial values */
377 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
379 CONVFMT, OFMT, FS, OFS,
380 ORS, RS, RT, FILENAME,
381 SUBSEP, F0, ARGIND, ARGC,
382 ARGV, ERRNO, FNR, NR,
383 NF, IGNORECASE, ENVIRON, NUM_INTERNAL_VARS
386 static const char vNames[] ALIGN1 =
387 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
388 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
389 "SUBSEP\0" "$\0*" "ARGIND\0" "ARGC\0"
390 "ARGV\0" "ERRNO\0" "FNR\0" "NR\0"
391 "NF\0*" "IGNORECASE\0*" "ENVIRON\0" "\0";
393 static const char vValues[] ALIGN1 =
394 "%.6g\0" "%.6g\0" " \0" " \0"
395 "\n\0" "\n\0" "\0" "\0"
396 "\034\0" "\0" "\377";
398 /* hash size may grow to these values */
399 #define FIRST_PRIME 61
400 static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
403 /* Globals. Split in two parts so that first one is addressed
404 * with (mostly short) negative offsets.
405 * NB: it's unsafe to put members of type "double"
406 * into globals2 (gcc may fail to align them).
410 chain beginseq, mainseq, endseq;
412 node *break_ptr, *continue_ptr;
414 xhash *vhash, *ahash, *fdhash, *fnhash;
415 const char *g_progname;
418 int maxfields; /* used in fsrealloc() only */
427 smallint is_f0_split;
430 uint32_t t_info; /* often used */
436 var *intvar[NUM_INTERNAL_VARS]; /* often used */
438 /* former statics from various functions */
439 char *split_f0__fstrings;
441 uint32_t next_token__save_tclass;
442 uint32_t next_token__save_info;
443 uint32_t next_token__ltclass;
444 smallint next_token__concat_inserted;
446 smallint next_input_file__files_happen;
447 rstream next_input_file__rsm;
449 var *evaluate__fnargs;
450 unsigned evaluate__seed;
451 regex_t evaluate__sreg;
455 tsplitter exec_builtin__tspl;
457 /* biggest and least used members go last */
458 tsplitter fsplitter, rsplitter;
460 #define G1 (ptr_to_globals[-1])
461 #define G (*(struct globals2 *)ptr_to_globals)
462 /* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
463 /*char G1size[sizeof(G1)]; - 0x74 */
464 /*char Gsize[sizeof(G)]; - 0x1c4 */
465 /* Trying to keep most of members accessible with short offsets: */
466 /*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
467 #define t_double (G1.t_double )
468 #define beginseq (G1.beginseq )
469 #define mainseq (G1.mainseq )
470 #define endseq (G1.endseq )
471 #define seq (G1.seq )
472 #define break_ptr (G1.break_ptr )
473 #define continue_ptr (G1.continue_ptr)
475 #define vhash (G1.vhash )
476 #define ahash (G1.ahash )
477 #define fdhash (G1.fdhash )
478 #define fnhash (G1.fnhash )
479 #define g_progname (G1.g_progname )
480 #define g_lineno (G1.g_lineno )
481 #define nfields (G1.nfields )
482 #define maxfields (G1.maxfields )
483 #define Fields (G1.Fields )
484 #define g_cb (G1.g_cb )
485 #define g_pos (G1.g_pos )
486 #define g_buf (G1.g_buf )
487 #define icase (G1.icase )
488 #define exiting (G1.exiting )
489 #define nextrec (G1.nextrec )
490 #define nextfile (G1.nextfile )
491 #define is_f0_split (G1.is_f0_split )
492 #define t_info (G.t_info )
493 #define t_tclass (G.t_tclass )
494 #define t_string (G.t_string )
495 #define t_lineno (G.t_lineno )
496 #define t_rollback (G.t_rollback )
497 #define intvar (G.intvar )
498 #define fsplitter (G.fsplitter )
499 #define rsplitter (G.rsplitter )
500 #define INIT_G() do { \
501 SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
502 G.next_token__ltclass = TC_OPTERM; \
503 G.evaluate__seed = 1; \
507 /* function prototypes */
508 static void handle_special(var *);
509 static node *parse_expr(uint32_t);
510 static void chain_group(void);
511 static var *evaluate(node *, var *);
512 static rstream *next_input_file(void);
513 static int fmt_num(char *, int, const char *, double, int);
514 static int awk_exit(int) NORETURN;
516 /* ---- error handling ---- */
518 static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
519 static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
520 static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
521 static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
522 static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
523 static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin";
524 static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
525 static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
526 static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
527 static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
529 static void zero_out_var(var *vp)
531 memset(vp, 0, sizeof(*vp));
534 static void syntax_error(const char *message) NORETURN;
535 static void syntax_error(const char *message)
537 bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
540 /* ---- hash stuff ---- */
542 static unsigned hashidx(const char *name)
547 idx = *name++ + (idx << 6) - idx;
551 /* create new hash */
552 static xhash *hash_init(void)
556 newhash = xzalloc(sizeof(*newhash));
557 newhash->csize = FIRST_PRIME;
558 newhash->items = xzalloc(FIRST_PRIME * sizeof(newhash->items[0]));
563 /* find item in hash, return ptr to data, NULL if not found */
564 static void *hash_search(xhash *hash, const char *name)
568 hi = hash->items[hashidx(name) % hash->csize];
570 if (strcmp(hi->name, name) == 0)
577 /* grow hash if it becomes too big */
578 static void hash_rebuild(xhash *hash)
580 unsigned newsize, i, idx;
581 hash_item **newitems, *hi, *thi;
583 if (hash->nprime == ARRAY_SIZE(PRIMES))
586 newsize = PRIMES[hash->nprime++];
587 newitems = xzalloc(newsize * sizeof(newitems[0]));
589 for (i = 0; i < hash->csize; i++) {
594 idx = hashidx(thi->name) % newsize;
595 thi->next = newitems[idx];
601 hash->csize = newsize;
602 hash->items = newitems;
605 /* find item in hash, add it if necessary. Return ptr to data */
606 static void *hash_find(xhash *hash, const char *name)
612 hi = hash_search(hash, name);
614 if (++hash->nel / hash->csize > 10)
617 l = strlen(name) + 1;
618 hi = xzalloc(sizeof(*hi) + l);
619 strcpy(hi->name, name);
621 idx = hashidx(name) % hash->csize;
622 hi->next = hash->items[idx];
623 hash->items[idx] = hi;
629 #define findvar(hash, name) ((var*) hash_find((hash), (name)))
630 #define newvar(name) ((var*) hash_find(vhash, (name)))
631 #define newfile(name) ((rstream*)hash_find(fdhash, (name)))
632 #define newfunc(name) ((func*) hash_find(fnhash, (name)))
634 static void hash_remove(xhash *hash, const char *name)
636 hash_item *hi, **phi;
638 phi = &hash->items[hashidx(name) % hash->csize];
641 if (strcmp(hi->name, name) == 0) {
642 hash->glen -= (strlen(name) + 1);
652 /* ------ some useful functions ------ */
654 static char *skip_spaces(char *p)
657 if (*p == '\\' && p[1] == '\n') {
660 } else if (*p != ' ' && *p != '\t') {
668 /* returns old *s, advances *s past word and terminating NUL */
669 static char *nextword(char **s)
672 while (*(*s)++ != '\0')
677 static char nextchar(char **s)
684 c = bb_process_escape_sequence((const char**)s);
685 if (c == '\\' && *s == pps) { /* unrecognized \z? */
686 c = *(*s); /* yes, fetch z */
688 (*s)++; /* advance unless z = NUL */
693 static ALWAYS_INLINE int isalnum_(int c)
695 return (isalnum(c) || c == '_');
698 static double my_strtod(char **pp)
701 if (ENABLE_DESKTOP && cp[0] == '0') {
702 /* Might be hex or octal integer: 0x123abc or 07777 */
703 char c = (cp[1] | 0x20);
704 if (c == 'x' || isdigit(cp[1])) {
705 unsigned long long ull = strtoull(cp, pp, 0);
709 if (!isdigit(c) && c != '.')
711 /* else: it may be a floating number. Examples:
712 * 009.123 (*pp points to '9')
713 * 000.123 (*pp points to '.')
714 * fall through to strtod.
718 return strtod(cp, pp);
721 /* -------- working with variables (set/get/copy/etc) -------- */
723 static xhash *iamarray(var *v)
727 while (a->type & VF_CHILD)
730 if (!(a->type & VF_ARRAY)) {
732 a->x.array = hash_init();
737 static void clear_array(xhash *array)
742 for (i = 0; i < array->csize; i++) {
743 hi = array->items[i];
747 free(thi->data.v.string);
750 array->items[i] = NULL;
752 array->glen = array->nel = 0;
755 /* clear a variable */
756 static var *clrvar(var *v)
758 if (!(v->type & VF_FSTR))
761 v->type &= VF_DONTTOUCH;
767 /* assign string value to variable */
768 static var *setvar_p(var *v, char *value)
776 /* same as setvar_p but make a copy of string */
777 static var *setvar_s(var *v, const char *value)
779 return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
782 /* same as setvar_s but sets USER flag */
783 static var *setvar_u(var *v, const char *value)
785 v = setvar_s(v, value);
790 /* set array element to user string */
791 static void setari_u(var *a, int idx, const char *s)
795 v = findvar(iamarray(a), itoa(idx));
799 /* assign numeric value to variable */
800 static var *setvar_i(var *v, double value)
803 v->type |= VF_NUMBER;
809 static const char *getvar_s(var *v)
811 /* if v is numeric and has no cached string, convert it to string */
812 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
813 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
814 v->string = xstrdup(g_buf);
815 v->type |= VF_CACHED;
817 return (v->string == NULL) ? "" : v->string;
820 static double getvar_i(var *v)
824 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
828 debug_printf_eval("getvar_i: '%s'->", s);
829 v->number = my_strtod(&s);
830 debug_printf_eval("%f (s:'%s')\n", v->number, s);
831 if (v->type & VF_USER) {
837 debug_printf_eval("getvar_i: '%s'->zero\n", s);
840 v->type |= VF_CACHED;
842 debug_printf_eval("getvar_i: %f\n", v->number);
846 /* Used for operands of bitwise ops */
847 static unsigned long getvar_i_int(var *v)
849 double d = getvar_i(v);
851 /* Casting doubles to longs is undefined for values outside
852 * of target type range. Try to widen it as much as possible */
854 return (unsigned long)d;
855 /* Why? Think about d == -4294967295.0 (assuming 32bit longs) */
856 return - (long) (unsigned long) (-d);
859 static var *copyvar(var *dest, const var *src)
863 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
864 debug_printf_eval("copyvar: number:%f string:'%s'\n", src->number, src->string);
865 dest->number = src->number;
867 dest->string = xstrdup(src->string);
869 handle_special(dest);
873 static var *incvar(var *v)
875 return setvar_i(v, getvar_i(v) + 1.0);
878 /* return true if v is number or numeric string */
879 static int is_numeric(var *v)
882 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
885 /* return 1 when value of v corresponds to true, 0 otherwise */
886 static int istrue(var *v)
889 return (v->number != 0);
890 return (v->string && v->string[0]);
893 /* temporary variables allocator. Last allocated should be first freed */
894 static var *nvalloc(int n)
902 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size)
908 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
909 g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
911 g_cb->pos = g_cb->nv;
913 /*g_cb->next = NULL; - xzalloc did it */
921 while (v < g_cb->pos) {
930 static void nvfree(var *v)
934 if (v < g_cb->nv || v >= g_cb->pos)
935 syntax_error(EMSG_INTERNAL_ERROR);
937 for (p = v; p < g_cb->pos; p++) {
938 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
939 clear_array(iamarray(p));
940 free(p->x.array->items);
943 if (p->type & VF_WALK) {
945 walker_list *w = p->x.walker;
946 debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker);
950 debug_printf_walker(" free(%p)\n", w);
959 while (g_cb->prev && g_cb->pos == g_cb->nv) {
964 /* ------- awk program text parsing ------- */
966 /* Parse next token pointed by global pos, place results into global ttt.
967 * If token isn't expected, give away. Return token class
969 static uint32_t next_token(uint32_t expected)
971 #define concat_inserted (G.next_token__concat_inserted)
972 #define save_tclass (G.next_token__save_tclass)
973 #define save_info (G.next_token__save_info)
974 /* Initialized to TC_OPTERM: */
975 #define ltclass (G.next_token__ltclass)
985 } else if (concat_inserted) {
986 concat_inserted = FALSE;
987 t_tclass = save_tclass;
996 while (*p != '\n' && *p != '\0')
1005 } else if (*p == '\"') {
1008 while (*p != '\"') {
1010 if (*p == '\0' || *p == '\n')
1011 syntax_error(EMSG_UNEXP_EOS);
1013 *s++ = nextchar(&pp);
1020 } else if ((expected & TC_REGEXP) && *p == '/') {
1024 if (*p == '\0' || *p == '\n')
1025 syntax_error(EMSG_UNEXP_EOS);
1029 s[-1] = bb_process_escape_sequence((const char **)&pp);
1042 } else if (*p == '.' || isdigit(*p)) {
1045 t_double = my_strtod(&pp);
1048 syntax_error(EMSG_UNEXP_TOKEN);
1052 /* search for something known */
1057 int l = (unsigned char) *tl++;
1058 if (l == (unsigned char) NTCC) {
1062 /* if token class is expected,
1064 * and it's not a longer word,
1066 if ((tc & (expected | TC_WORD | TC_NEWLINE))
1067 && strncmp(p, tl, l) == 0
1068 && !((tc & TC_WORD) && isalnum_(p[l]))
1070 /* then this is what we are looking for */
1078 /* not a known token */
1080 /* is it a name? (var/array/function) */
1082 syntax_error(EMSG_UNEXP_TOKEN); /* no */
1085 while (isalnum_(*++p)) {
1090 /* also consume whitespace between functionname and bracket */
1091 if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY))
1105 /* skipping newlines in some cases */
1106 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1109 /* insert concatenation operator when needed */
1110 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1111 concat_inserted = TRUE;
1115 t_info = OC_CONCAT | SS | P(35);
1122 /* Are we ready for this? */
1123 if (!(ltclass & expected))
1124 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1125 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1128 #undef concat_inserted
1134 static void rollback_token(void)
1139 static node *new_node(uint32_t info)
1143 n = xzalloc(sizeof(node));
1145 n->lineno = g_lineno;
1149 static void mk_re_node(const char *s, node *n, regex_t *re)
1151 n->info = OC_REGEXP;
1154 xregcomp(re, s, REG_EXTENDED);
1155 xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1158 static node *condition(void)
1160 next_token(TC_SEQSTART);
1161 return parse_expr(TC_SEQTERM);
1164 /* parse expression terminated by given argument, return ptr
1165 * to built subtree. Terminator is eaten by parse_expr */
1166 static node *parse_expr(uint32_t iexp)
1175 sn.r.n = glptr = NULL;
1176 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1178 while (!((tc = next_token(xtc)) & iexp)) {
1180 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1181 /* input redirection (<) attached to glptr node */
1182 cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1184 xtc = TC_OPERAND | TC_UOPPRE;
1187 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1188 /* for binary and postfix-unary operators, jump back over
1189 * previous operators with higher priority */
1191 while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1192 || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON))
1196 if ((t_info & OPCLSMASK) == OC_TERNARY)
1198 cn = vn->a.n->r.n = new_node(t_info);
1200 if (tc & TC_BINOP) {
1202 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1203 if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1205 next_token(TC_GETLINE);
1206 /* give maximum priority to this pipe */
1207 cn->info &= ~PRIMASK;
1208 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1212 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1217 /* for operands and prefix-unary operators, attach them
1220 cn = vn->r.n = new_node(t_info);
1222 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1223 if (tc & (TC_OPERAND | TC_REGEXP)) {
1224 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1225 /* one should be very careful with switch on tclass -
1226 * only simple tclasses should be used! */
1231 v = hash_search(ahash, t_string);
1233 cn->info = OC_FNARG;
1234 cn->l.aidx = v->x.aidx;
1236 cn->l.v = newvar(t_string);
1238 if (tc & TC_ARRAY) {
1240 cn->r.n = parse_expr(TC_ARRTERM);
1247 v = cn->l.v = xzalloc(sizeof(var));
1249 setvar_i(v, t_double);
1251 setvar_s(v, t_string);
1255 mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1260 cn->r.f = newfunc(t_string);
1261 cn->l.n = condition();
1265 cn = vn->r.n = parse_expr(TC_SEQTERM);
1271 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1275 cn->l.n = condition();
1284 /* add node to chain. Return ptr to alloc'd node */
1285 static node *chain_node(uint32_t info)
1290 seq->first = seq->last = new_node(0);
1292 if (seq->programname != g_progname) {
1293 seq->programname = g_progname;
1294 n = chain_node(OC_NEWSOURCE);
1295 n->l.new_progname = xstrdup(g_progname);
1300 seq->last = n->a.n = new_node(OC_DONE);
1305 static void chain_expr(uint32_t info)
1309 n = chain_node(info);
1310 n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1311 if (t_tclass & TC_GRPTERM)
1315 static node *chain_loop(node *nn)
1317 node *n, *n2, *save_brk, *save_cont;
1319 save_brk = break_ptr;
1320 save_cont = continue_ptr;
1322 n = chain_node(OC_BR | Vx);
1323 continue_ptr = new_node(OC_EXEC);
1324 break_ptr = new_node(OC_EXEC);
1326 n2 = chain_node(OC_EXEC | Vx);
1329 continue_ptr->a.n = n2;
1330 break_ptr->a.n = n->r.n = seq->last;
1332 continue_ptr = save_cont;
1333 break_ptr = save_brk;
1338 /* parse group and attach it to chain */
1339 static void chain_group(void)
1345 c = next_token(TC_GRPSEQ);
1346 } while (c & TC_NEWLINE);
1348 if (c & TC_GRPSTART) {
1349 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1350 if (t_tclass & TC_NEWLINE)
1355 } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1357 chain_expr(OC_EXEC | Vx);
1358 } else { /* TC_STATEMNT */
1359 switch (t_info & OPCLSMASK) {
1361 n = chain_node(OC_BR | Vx);
1362 n->l.n = condition();
1364 n2 = chain_node(OC_EXEC);
1366 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1368 n2->a.n = seq->last;
1376 n = chain_loop(NULL);
1381 n2 = chain_node(OC_EXEC);
1382 n = chain_loop(NULL);
1384 next_token(TC_WHILE);
1385 n->l.n = condition();
1389 next_token(TC_SEQSTART);
1390 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1391 if (t_tclass & TC_SEQTERM) { /* for-in */
1392 if ((n2->info & OPCLSMASK) != OC_IN)
1393 syntax_error(EMSG_UNEXP_TOKEN);
1394 n = chain_node(OC_WALKINIT | VV);
1397 n = chain_loop(NULL);
1398 n->info = OC_WALKNEXT | Vx;
1400 } else { /* for (;;) */
1401 n = chain_node(OC_EXEC | Vx);
1403 n2 = parse_expr(TC_SEMICOL);
1404 n3 = parse_expr(TC_SEQTERM);
1414 n = chain_node(t_info);
1415 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1416 if (t_tclass & TC_OUTRDR) {
1418 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1420 if (t_tclass & TC_GRPTERM)
1425 n = chain_node(OC_EXEC);
1430 n = chain_node(OC_EXEC);
1431 n->a.n = continue_ptr;
1434 /* delete, next, nextfile, return, exit */
1441 static void parse_program(char *p)
1450 while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1451 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1453 if (tclass & TC_OPTERM)
1457 if (tclass & TC_BEGIN) {
1461 } else if (tclass & TC_END) {
1465 } else if (tclass & TC_FUNCDECL) {
1466 next_token(TC_FUNCTION);
1468 f = newfunc(t_string);
1469 f->body.first = NULL;
1471 while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1472 v = findvar(ahash, t_string);
1473 v->x.aidx = f->nargs++;
1475 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1482 } else if (tclass & TC_OPSEQ) {
1484 cn = chain_node(OC_TEST);
1485 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1486 if (t_tclass & TC_GRPSTART) {
1490 chain_node(OC_PRINT);
1492 cn->r.n = mainseq.last;
1494 } else /* if (tclass & TC_GRPSTART) */ {
1502 /* -------- program execution part -------- */
1504 static node *mk_splitter(const char *s, tsplitter *spl)
1512 if ((n->info & OPCLSMASK) == OC_REGEXP) {
1514 regfree(ire); // TODO: nuke ire, use re+1?
1516 if (s[0] && s[1]) { /* strlen(s) > 1 */
1517 mk_re_node(s, n, re);
1519 n->info = (uint32_t) s[0];
1525 /* use node as a regular expression. Supplied with node ptr and regex_t
1526 * storage space. Return ptr to regex (if result points to preg, it should
1527 * be later regfree'd manually
1529 static regex_t *as_regex(node *op, regex_t *preg)
1535 if ((op->info & OPCLSMASK) == OC_REGEXP) {
1536 return icase ? op->r.ire : op->l.re;
1539 s = getvar_s(evaluate(op, v));
1541 cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
1542 /* Testcase where REG_EXTENDED fails (unpaired '{'):
1543 * echo Hi | awk 'gsub("@(samp|code|file)\{","");'
1544 * gawk 3.1.5 eats this. We revert to ~REG_EXTENDED
1545 * (maybe gsub is not supposed to use REG_EXTENDED?).
1547 if (regcomp(preg, s, cflags)) {
1548 cflags &= ~REG_EXTENDED;
1549 xregcomp(preg, s, cflags);
1555 /* gradually increasing buffer.
1556 * note that we reallocate even if n == old_size,
1557 * and thus there is at least one extra allocated byte.
1559 static char* qrealloc(char *b, int n, int *size)
1561 if (!b || n >= *size) {
1562 *size = n + (n>>1) + 80;
1563 b = xrealloc(b, *size);
1568 /* resize field storage space */
1569 static void fsrealloc(int size)
1573 if (size >= maxfields) {
1575 maxfields = size + 16;
1576 Fields = xrealloc(Fields, maxfields * sizeof(Fields[0]));
1577 for (; i < maxfields; i++) {
1578 Fields[i].type = VF_SPECIAL;
1579 Fields[i].string = NULL;
1582 /* if size < nfields, clear extra field variables */
1583 for (i = size; i < nfields; i++) {
1589 static int awk_split(const char *s, node *spl, char **slist)
1594 regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1596 /* in worst case, each char would be a separate field */
1597 *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1600 c[0] = c[1] = (char)spl->info;
1602 if (*getvar_s(intvar[RS]) == '\0')
1606 if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */
1608 return n; /* "": zero fields */
1609 n++; /* at least one field will be there */
1611 l = strcspn(s, c+2); /* len till next NUL or \n */
1612 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1613 && pmatch[0].rm_so <= l
1615 l = pmatch[0].rm_so;
1616 if (pmatch[0].rm_eo == 0) {
1620 n++; /* we saw yet another delimiter */
1622 pmatch[0].rm_eo = l;
1627 /* make sure we remove *all* of the separator chars */
1630 } while (++l < pmatch[0].rm_eo);
1632 s += pmatch[0].rm_eo;
1636 if (c[0] == '\0') { /* null split */
1644 if (c[0] != ' ') { /* single-character split */
1646 c[0] = toupper(c[0]);
1647 c[1] = tolower(c[1]);
1651 while ((s1 = strpbrk(s1, c)) != NULL) {
1659 s = skip_whitespace(s);
1663 while (*s && !isspace(*s))
1670 static void split_f0(void)
1672 /* static char *fstrings; */
1673 #define fstrings (G.split_f0__fstrings)
1684 n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1687 for (i = 0; i < n; i++) {
1688 Fields[i].string = nextword(&s);
1689 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1692 /* set NF manually to avoid side effects */
1694 intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1695 intvar[NF]->number = nfields;
1699 /* perform additional actions when some internal variables changed */
1700 static void handle_special(var *v)
1704 const char *sep, *s;
1705 int sl, l, len, i, bsize;
1707 if (!(v->type & VF_SPECIAL))
1710 if (v == intvar[NF]) {
1711 n = (int)getvar_i(v);
1714 /* recalculate $0 */
1715 sep = getvar_s(intvar[OFS]);
1719 for (i = 0; i < n; i++) {
1720 s = getvar_s(&Fields[i]);
1723 memcpy(b+len, sep, sl);
1726 b = qrealloc(b, len+l+sl, &bsize);
1727 memcpy(b+len, s, l);
1732 setvar_p(intvar[F0], b);
1735 } else if (v == intvar[F0]) {
1736 is_f0_split = FALSE;
1738 } else if (v == intvar[FS]) {
1739 mk_splitter(getvar_s(v), &fsplitter);
1741 } else if (v == intvar[RS]) {
1742 mk_splitter(getvar_s(v), &rsplitter);
1744 } else if (v == intvar[IGNORECASE]) {
1748 n = getvar_i(intvar[NF]);
1749 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1750 /* right here v is invalid. Just to note... */
1754 /* step through func/builtin/etc arguments */
1755 static node *nextarg(node **pn)
1760 if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1769 static void hashwalk_init(var *v, xhash *array)
1774 walker_list *prev_walker;
1776 if (v->type & VF_WALK) {
1777 prev_walker = v->x.walker;
1782 debug_printf_walker("hashwalk_init: prev_walker:%p\n", prev_walker);
1784 w = v->x.walker = xzalloc(sizeof(*w) + array->glen + 1); /* why + 1? */
1785 debug_printf_walker(" walker@%p=%p\n", &v->x.walker, w);
1786 w->cur = w->end = w->wbuf;
1787 w->prev = prev_walker;
1788 for (i = 0; i < array->csize; i++) {
1789 hi = array->items[i];
1791 strcpy(w->end, hi->name);
1798 static int hashwalk_next(var *v)
1800 walker_list *w = v->x.walker;
1802 if (w->cur >= w->end) {
1803 walker_list *prev_walker = w->prev;
1805 debug_printf_walker("end of iteration, free(walker@%p:%p), prev_walker:%p\n", &v->x.walker, w, prev_walker);
1807 v->x.walker = prev_walker;
1811 setvar_s(v, nextword(&w->cur));
1815 /* evaluate node, return 1 when result is true, 0 otherwise */
1816 static int ptest(node *pattern)
1818 /* ptest__v is "static": to save stack space? */
1819 return istrue(evaluate(pattern, &G.ptest__v));
1822 /* read next record from stream rsm into a variable v */
1823 static int awk_getline(rstream *rsm, var *v)
1826 regmatch_t pmatch[2];
1827 int size, a, p, pp = 0;
1828 int fd, so, eo, r, rp;
1831 debug_printf_eval("entered %s()\n", __func__);
1833 /* we're using our own buffer since we need access to accumulating
1836 fd = fileno(rsm->F);
1841 c = (char) rsplitter.n.info;
1845 m = qrealloc(m, 256, &size);
1852 if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1853 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1854 b, 1, pmatch, 0) == 0) {
1855 so = pmatch[0].rm_so;
1856 eo = pmatch[0].rm_eo;
1860 } else if (c != '\0') {
1861 s = strchr(b+pp, c);
1863 s = memchr(b+pp, '\0', p - pp);
1870 while (b[rp] == '\n')
1872 s = strstr(b+rp, "\n\n");
1875 while (b[eo] == '\n')
1884 memmove(m, m+a, p+1);
1889 m = qrealloc(m, a+p+128, &size);
1892 p += safe_read(fd, b+p, size-p-1);
1896 setvar_i(intvar[ERRNO], errno);
1905 c = b[so]; b[so] = '\0';
1909 c = b[eo]; b[eo] = '\0';
1910 setvar_s(intvar[RT], b+so);
1919 debug_printf_eval("returning from %s(): %d\n", __func__, r);
1924 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1928 const char *s = format;
1930 if (int_as_int && n == (int)n) {
1931 r = snprintf(b, size, "%d", (int)n);
1933 do { c = *s; } while (c && *++s);
1934 if (strchr("diouxX", c)) {
1935 r = snprintf(b, size, format, (int)n);
1936 } else if (strchr("eEfgG", c)) {
1937 r = snprintf(b, size, format, n);
1939 syntax_error(EMSG_INV_FMT);
1945 /* formatted output into an allocated buffer, return ptr to buffer */
1946 static char *awk_printf(node *n)
1951 int i, j, incr, bsize;
1956 fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1961 while (*f && (*f != '%' || *++f == '%'))
1963 while (*f && !isalpha(*f)) {
1965 syntax_error("%*x formats are not supported");
1969 incr = (f - s) + MAXVARFMT;
1970 b = qrealloc(b, incr + i, &bsize);
1976 arg = evaluate(nextarg(&n), v);
1979 if (c == 'c' || !c) {
1980 i += sprintf(b+i, s, is_numeric(arg) ?
1981 (char)getvar_i(arg) : *getvar_s(arg));
1982 } else if (c == 's') {
1984 b = qrealloc(b, incr+i+strlen(s1), &bsize);
1985 i += sprintf(b+i, s, s1);
1987 i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1991 /* if there was an error while sprintf, return value is negative */
1998 b = xrealloc(b, i + 1);
2003 /* Common substitution routine.
2004 * Replace (nm)'th substring of (src) that matches (rn) with (repl),
2005 * store result into (dest), return number of substitutions.
2006 * If nm = 0, replace all matches.
2007 * If src or dst is NULL, use $0.
2008 * If subexp != 0, enable subexpression matching (\1-\9).
2010 static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int subexp)
2014 int match_no, residx, replen, resbufsize;
2016 regmatch_t pmatch[10];
2017 regex_t sreg, *regex;
2023 regex = as_regex(rn, &sreg);
2024 sp = getvar_s(src ? src : intvar[F0]);
2025 replen = strlen(repl);
2026 while (regexec(regex, sp, 10, pmatch, regexec_flags) == 0) {
2027 int so = pmatch[0].rm_so;
2028 int eo = pmatch[0].rm_eo;
2030 //bb_error_msg("match %u: [%u,%u] '%s'%p", match_no+1, so, eo, sp,sp);
2031 resbuf = qrealloc(resbuf, residx + eo + replen, &resbufsize);
2032 memcpy(resbuf + residx, sp, eo);
2034 if (++match_no >= nm) {
2039 residx -= (eo - so);
2041 for (s = repl; *s; s++) {
2042 char c = resbuf[residx++] = *s;
2047 if (c == '&' || (subexp && c >= '0' && c <= '9')) {
2049 residx -= ((nbs + 3) >> 1);
2056 resbuf[residx++] = c;
2058 int n = pmatch[j].rm_eo - pmatch[j].rm_so;
2059 resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize);
2060 memcpy(resbuf + residx, sp + pmatch[j].rm_so, n);
2068 regexec_flags = REG_NOTBOL;
2073 /* Empty match (e.g. "b*" will match anywhere).
2074 * Advance by one char. */
2076 //gsub(/\<b*/,"") on "abc" will reach this point, advance to "bc"
2077 //... and will erroneously match "b" even though it is NOT at the word start.
2078 //we need REG_NOTBOW but it does not exist...
2079 //TODO: if EXTRA_COMPAT=y, use GNU matching and re_search,
2080 //it should be able to do it correctly.
2081 /* Subtle: this is safe only because
2082 * qrealloc allocated at least one extra byte */
2083 resbuf[residx] = *sp;
2091 resbuf = qrealloc(resbuf, residx + strlen(sp), &resbufsize);
2092 strcpy(resbuf + residx, sp);
2094 //bb_error_msg("end sp:'%s'%p", sp,sp);
2095 setvar_p(dest ? dest : intvar[F0], resbuf);
2101 static NOINLINE int do_mktime(const char *ds)
2106 /*memset(&then, 0, sizeof(then)); - not needed */
2107 then.tm_isdst = -1; /* default is unknown */
2109 /* manpage of mktime says these fields are ints,
2110 * so we can sscanf stuff directly into them */
2111 count = sscanf(ds, "%u %u %u %u %u %u %d",
2112 &then.tm_year, &then.tm_mon, &then.tm_mday,
2113 &then.tm_hour, &then.tm_min, &then.tm_sec,
2117 || (unsigned)then.tm_mon < 1
2118 || (unsigned)then.tm_year < 1900
2124 then.tm_year -= 1900;
2126 return mktime(&then);
2129 static NOINLINE var *exec_builtin(node *op, var *res)
2131 #define tspl (G.exec_builtin__tspl)
2137 regmatch_t pmatch[2];
2146 isr = info = op->info;
2149 av[2] = av[3] = NULL;
2150 for (i = 0; i < 4 && op; i++) {
2151 an[i] = nextarg(&op);
2152 if (isr & 0x09000000)
2153 av[i] = evaluate(an[i], &tv[i]);
2154 if (isr & 0x08000000)
2155 as[i] = getvar_s(av[i]);
2160 if ((uint32_t)nargs < (info >> 30))
2161 syntax_error(EMSG_TOO_FEW_ARGS);
2167 if (ENABLE_FEATURE_AWK_LIBM)
2168 setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1])));
2170 syntax_error(EMSG_NO_MATH);
2177 spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2178 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2183 n = awk_split(as[0], spl, &s);
2185 clear_array(iamarray(av[1]));
2186 for (i = 1; i <= n; i++)
2187 setari_u(av[1], i, nextword(&s));
2197 i = getvar_i(av[1]) - 1;
2202 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2205 s = xstrndup(as[0]+i, n);
2210 /* Bitwise ops must assume that operands are unsigned. GNU Awk 3.1.5:
2211 * awk '{ print or(-1,1) }' gives "4.29497e+09", not "-2.xxxe+09" */
2213 setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1]));
2217 setvar_i(res, ~getvar_i_int(av[0]));
2221 setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1]));
2225 setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1]));
2229 setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1]));
2233 setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1]));
2239 s1 = s = xstrdup(as[0]);
2241 //*s1 = (info == B_up) ? toupper(*s1) : tolower(*s1);
2242 if ((unsigned char)((*s1 | 0x20) - 'a') <= ('z' - 'a'))
2243 *s1 = (info == B_up) ? (*s1 & 0xdf) : (*s1 | 0x20);
2253 l = strlen(as[0]) - ll;
2254 if (ll > 0 && l >= 0) {
2256 char *s = strstr(as[0], as[1]);
2258 n = (s - as[0]) + 1;
2260 /* this piece of code is terribly slow and
2261 * really should be rewritten
2263 for (i = 0; i <= l; i++) {
2264 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2276 tt = getvar_i(av[1]);
2279 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2280 i = strftime(g_buf, MAXVARFMT,
2281 ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2284 setvar_s(res, g_buf);
2288 setvar_i(res, do_mktime(as[0]));
2292 re = as_regex(an[1], &sreg);
2293 n = regexec(re, as[0], 1, pmatch, 0);
2298 pmatch[0].rm_so = 0;
2299 pmatch[0].rm_eo = -1;
2301 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2302 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2303 setvar_i(res, pmatch[0].rm_so);
2309 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2313 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2317 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2327 * Evaluate node - the heart of the program. Supplied with subtree
2328 * and place where to store result. returns ptr to result.
2330 #define XC(n) ((n) >> 8)
2332 static var *evaluate(node *op, var *res)
2334 /* This procedure is recursive so we should count every byte */
2335 #define fnargs (G.evaluate__fnargs)
2336 /* seed is initialized to 1 */
2337 #define seed (G.evaluate__seed)
2338 #define sreg (G.evaluate__sreg)
2343 return setvar_s(res, NULL);
2345 debug_printf_eval("entered %s()\n", __func__);
2353 } L = L; /* for compiler */
2364 opn = (opinfo & OPNMASK);
2365 g_lineno = op->lineno;
2367 debug_printf_eval("opinfo:%08x opn:%08x\n", opinfo, opn);
2369 /* execute inevitable things */
2370 if (opinfo & OF_RES1)
2371 L.v = evaluate(op1, v1);
2372 if (opinfo & OF_RES2)
2373 R.v = evaluate(op->r.n, v1+1);
2374 if (opinfo & OF_STR1) {
2375 L.s = getvar_s(L.v);
2376 debug_printf_eval("L.s:'%s'\n", L.s);
2378 if (opinfo & OF_STR2) {
2379 R.s = getvar_s(R.v);
2380 debug_printf_eval("R.s:'%s'\n", R.s);
2382 if (opinfo & OF_NUM1) {
2383 L_d = getvar_i(L.v);
2384 debug_printf_eval("L_d:%f\n", L_d);
2387 debug_printf_eval("switch(0x%x)\n", XC(opinfo & OPCLSMASK));
2388 switch (XC(opinfo & OPCLSMASK)) {
2390 /* -- iterative node type -- */
2394 if ((op1->info & OPCLSMASK) == OC_COMMA) {
2395 /* it's range pattern */
2396 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2397 op->info |= OF_CHECKED;
2398 if (ptest(op1->r.n))
2399 op->info &= ~OF_CHECKED;
2405 op = ptest(op1) ? op->a.n : op->r.n;
2409 /* just evaluate an expression, also used as unconditional jump */
2413 /* branch, used in if-else and various loops */
2415 op = istrue(L.v) ? op->a.n : op->r.n;
2418 /* initialize for-in loop */
2419 case XC( OC_WALKINIT ):
2420 hashwalk_init(L.v, iamarray(R.v));
2423 /* get next array item */
2424 case XC( OC_WALKNEXT ):
2425 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2428 case XC( OC_PRINT ):
2429 case XC( OC_PRINTF ): {
2433 rstream *rsm = newfile(R.s);
2436 rsm->F = popen(R.s, "w");
2438 bb_perror_msg_and_die("popen");
2441 rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2447 if ((opinfo & OPCLSMASK) == OC_PRINT) {
2449 fputs(getvar_s(intvar[F0]), F);
2452 var *v = evaluate(nextarg(&op1), v1);
2453 if (v->type & VF_NUMBER) {
2454 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2458 fputs(getvar_s(v), F);
2462 fputs(getvar_s(intvar[OFS]), F);
2465 fputs(getvar_s(intvar[ORS]), F);
2467 } else { /* OC_PRINTF */
2468 char *s = awk_printf(op1);
2476 case XC( OC_DELETE ): {
2477 uint32_t info = op1->info & OPCLSMASK;
2480 if (info == OC_VAR) {
2482 } else if (info == OC_FNARG) {
2483 v = &fnargs[op1->l.aidx];
2485 syntax_error(EMSG_NOT_ARRAY);
2491 s = getvar_s(evaluate(op1->r.n, v1));
2492 hash_remove(iamarray(v), s);
2494 clear_array(iamarray(v));
2499 case XC( OC_NEWSOURCE ):
2500 g_progname = op->l.new_progname;
2503 case XC( OC_RETURN ):
2507 case XC( OC_NEXTFILE ):
2518 /* -- recursive node type -- */
2522 if (L.v == intvar[NF])
2526 case XC( OC_FNARG ):
2527 L.v = &fnargs[op->l.aidx];
2529 res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2533 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2536 case XC( OC_REGEXP ):
2538 L.s = getvar_s(intvar[F0]);
2541 case XC( OC_MATCH ):
2545 regex_t *re = as_regex(op1, &sreg);
2546 int i = regexec(re, L.s, 0, NULL, 0);
2549 setvar_i(res, (i == 0) ^ (opn == '!'));
2554 debug_printf_eval("MOVE\n");
2555 /* if source is a temporary string, jusk relink it to dest */
2556 //Disabled: if R.v is numeric but happens to have cached R.v->string,
2557 //then L.v ends up being a string, which is wrong
2558 // if (R.v == v1+1 && R.v->string) {
2559 // res = setvar_p(L.v, R.v->string);
2560 // R.v->string = NULL;
2562 res = copyvar(L.v, R.v);
2566 case XC( OC_TERNARY ):
2567 if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2568 syntax_error(EMSG_POSSIBLE_ERROR);
2569 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2572 case XC( OC_FUNC ): {
2574 const char *sv_progname;
2576 if (!op->r.f->body.first)
2577 syntax_error(EMSG_UNDEF_FUNC);
2579 vbeg = v = nvalloc(op->r.f->nargs + 1);
2581 var *arg = evaluate(nextarg(&op1), v1);
2583 v->type |= VF_CHILD;
2585 if (++v - vbeg >= op->r.f->nargs)
2591 sv_progname = g_progname;
2593 res = evaluate(op->r.f->body.first, res);
2595 g_progname = sv_progname;
2602 case XC( OC_GETLINE ):
2603 case XC( OC_PGETLINE ): {
2610 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2611 rsm->F = popen(L.s, "r");
2612 rsm->is_pipe = TRUE;
2614 rsm->F = fopen_for_read(L.s); /* not xfopen! */
2619 iF = next_input_file();
2624 setvar_i(intvar[ERRNO], errno);
2632 i = awk_getline(rsm, R.v);
2633 if (i > 0 && !op1) {
2634 incvar(intvar[FNR]);
2641 /* simple builtins */
2642 case XC( OC_FBLTIN ): {
2643 double R_d = R_d; /* for compiler */
2651 R_d = (double)rand() / (double)RAND_MAX;
2655 if (ENABLE_FEATURE_AWK_LIBM) {
2661 if (ENABLE_FEATURE_AWK_LIBM) {
2667 if (ENABLE_FEATURE_AWK_LIBM) {
2673 if (ENABLE_FEATURE_AWK_LIBM) {
2679 if (ENABLE_FEATURE_AWK_LIBM) {
2684 syntax_error(EMSG_NO_MATH);
2689 seed = op1 ? (unsigned)L_d : (unsigned)time(NULL);
2699 L.s = getvar_s(intvar[F0]);
2705 R_d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2706 ? (system(L.s) >> 8) : 0;
2712 } else if (L.s && *L.s) {
2713 rstream *rsm = newfile(L.s);
2723 rsm = (rstream *)hash_search(fdhash, L.s);
2724 debug_printf_eval("OC_FBLTIN F_cl rsm:%p\n", rsm);
2726 debug_printf_eval("OC_FBLTIN F_cl "
2727 "rsm->is_pipe:%d, ->F:%p\n",
2728 rsm->is_pipe, rsm->F);
2729 /* Can be NULL if open failed. Example:
2730 * getline line <"doesnt_exist";
2731 * close("doesnt_exist"); <--- here rsm->F is NULL
2734 err = rsm->is_pipe ? pclose(rsm->F) : fclose(rsm->F);
2736 hash_remove(fdhash, L.s);
2739 setvar_i(intvar[ERRNO], errno);
2748 case XC( OC_BUILTIN ):
2749 res = exec_builtin(op, res);
2752 case XC( OC_SPRINTF ):
2753 setvar_p(res, awk_printf(op1));
2756 case XC( OC_UNARY ): {
2759 Ld = R_d = getvar_i(R.v);
2786 case XC( OC_FIELD ): {
2787 int i = (int)getvar_i(R.v);
2794 res = &Fields[i - 1];
2799 /* concatenation (" ") and index joining (",") */
2800 case XC( OC_CONCAT ):
2801 case XC( OC_COMMA ): {
2802 const char *sep = "";
2803 if ((opinfo & OPCLSMASK) == OC_COMMA)
2804 sep = getvar_s(intvar[SUBSEP]);
2805 setvar_p(res, xasprintf("%s%s%s", L.s, sep, R.s));
2810 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2814 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2817 case XC( OC_BINARY ):
2818 case XC( OC_REPLACE ): {
2819 double R_d = getvar_i(R.v);
2820 debug_printf_eval("BINARY/REPLACE: R_d:%f opn:%c\n", R_d, opn);
2833 syntax_error(EMSG_DIV_BY_ZERO);
2837 if (ENABLE_FEATURE_AWK_LIBM)
2838 L_d = pow(L_d, R_d);
2840 syntax_error(EMSG_NO_MATH);
2844 syntax_error(EMSG_DIV_BY_ZERO);
2845 L_d -= (int)(L_d / R_d) * R_d;
2848 debug_printf_eval("BINARY/REPLACE result:%f\n", L_d);
2849 res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : L.v, L_d);
2853 case XC( OC_COMPARE ): {
2854 int i = i; /* for compiler */
2857 if (is_numeric(L.v) && is_numeric(R.v)) {
2858 Ld = getvar_i(L.v) - getvar_i(R.v);
2860 const char *l = getvar_s(L.v);
2861 const char *r = getvar_s(R.v);
2862 Ld = icase ? strcasecmp(l, r) : strcmp(l, r);
2864 switch (opn & 0xfe) {
2875 setvar_i(res, (i == 0) ^ (opn & 1));
2880 syntax_error(EMSG_POSSIBLE_ERROR);
2882 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2884 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2891 debug_printf_eval("returning from %s(): %p\n", __func__, res);
2899 /* -------- main & co. -------- */
2901 static int awk_exit(int r)
2912 evaluate(endseq.first, &tv);
2915 /* waiting for children */
2916 for (i = 0; i < fdhash->csize; i++) {
2917 hi = fdhash->items[i];
2919 if (hi->data.rs.F && hi->data.rs.is_pipe)
2920 pclose(hi->data.rs.F);
2928 /* if expr looks like "var=value", perform assignment and return 1,
2929 * otherwise return 0 */
2930 static int is_assignment(const char *expr)
2932 char *exprc, *val, *s, *s1;
2934 if (!isalnum_(*expr) || (val = strchr(expr, '=')) == NULL) {
2938 exprc = xstrdup(expr);
2939 val = exprc + (val - expr);
2943 while ((*s1 = nextchar(&s)) != '\0')
2946 setvar_u(newvar(exprc), val);
2951 /* switch to next input file */
2952 static rstream *next_input_file(void)
2954 #define rsm (G.next_input_file__rsm)
2955 #define files_happen (G.next_input_file__files_happen)
2958 const char *fname, *ind;
2963 rsm.pos = rsm.adv = 0;
2966 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
2972 ind = getvar_s(incvar(intvar[ARGIND]));
2973 fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
2974 if (fname && *fname && !is_assignment(fname))
2975 F = xfopen_stdin(fname);
2979 files_happen = TRUE;
2980 setvar_s(intvar[FILENAME], fname);
2987 int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
2988 int awk_main(int argc, char **argv)
2991 char *opt_F, *opt_W;
2992 llist_t *list_v = NULL;
2993 llist_t *list_f = NULL;
2998 char *vnames = (char *)vNames; /* cheat */
2999 char *vvalues = (char *)vValues;
3003 /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
3004 * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
3005 if (ENABLE_LOCALE_SUPPORT)
3006 setlocale(LC_NUMERIC, "C");
3010 /* allocate global buffer */
3011 g_buf = xmalloc(MAXVARFMT + 1);
3013 vhash = hash_init();
3014 ahash = hash_init();
3015 fdhash = hash_init();
3016 fnhash = hash_init();
3018 /* initialize variables */
3019 for (i = 0; *vnames; i++) {
3020 intvar[i] = v = newvar(nextword(&vnames));
3021 if (*vvalues != '\377')
3022 setvar_s(v, nextword(&vvalues));
3026 if (*vnames == '*') {
3027 v->type |= VF_SPECIAL;
3032 handle_special(intvar[FS]);
3033 handle_special(intvar[RS]);
3035 newfile("/dev/stdin")->F = stdin;
3036 newfile("/dev/stdout")->F = stdout;
3037 newfile("/dev/stderr")->F = stderr;
3039 /* Huh, people report that sometimes environ is NULL. Oh well. */
3040 if (environ) for (envp = environ; *envp; envp++) {
3041 /* environ is writable, thus we don't strdup it needlessly */
3043 char *s1 = strchr(s, '=');
3046 /* Both findvar and setvar_u take const char*
3047 * as 2nd arg -> environment is not trashed */
3048 setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
3052 opt_complementary = "v::f::"; /* -v and -f can occur multiple times */
3053 opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, &opt_W);
3057 setvar_s(intvar[FS], opt_F); // -F
3058 while (list_v) { /* -v */
3059 if (!is_assignment(llist_pop(&list_v)))
3062 if (list_f) { /* -f */
3067 g_progname = llist_pop(&list_f);
3068 from_file = xfopen_stdin(g_progname);
3069 /* one byte is reserved for some trick in next_token */
3070 for (i = j = 1; j > 0; i += j) {
3071 s = xrealloc(s, i + 4096);
3072 j = fread(s + i, 1, 4094, from_file);
3076 parse_program(s + 1);
3080 } else { // no -f: take program from 1st parameter
3083 g_progname = "cmd. line";
3084 parse_program(*argv++);
3086 if (opt & 0x8) // -W
3087 bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
3089 /* fill in ARGV array */
3090 setvar_i(intvar[ARGC], argc);
3091 setari_u(intvar[ARGV], 0, "awk");
3094 setari_u(intvar[ARGV], ++i, *argv++);
3096 evaluate(beginseq.first, &tv);
3097 if (!mainseq.first && !endseq.first)
3098 awk_exit(EXIT_SUCCESS);
3100 /* input file could already be opened in BEGIN block */
3102 iF = next_input_file();
3104 /* passing through input files */
3107 setvar_i(intvar[FNR], 0);
3109 while ((i = awk_getline(iF, intvar[F0])) > 0) {
3112 incvar(intvar[FNR]);
3113 evaluate(mainseq.first, &tv);
3120 syntax_error(strerror(errno));
3122 iF = next_input_file();
3125 awk_exit(EXIT_SUCCESS);