1 /* vi: set sw=4 ts=4: */
3 * awk implementation for busybox
5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
7 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
14 /* This is a NOEXEC applet. Be very careful! */
17 /* If you comment out one of these below, it will be #defined later
18 * to perform debug printfs to stderr: */
19 #define debug_printf_walker(...) do {} while (0)
21 #ifndef debug_printf_walker
22 # define debug_printf_walker(...) (fprintf(stderr, __VA_ARGS__))
31 #define VF_NUMBER 0x0001 /* 1 = primary type is number */
32 #define VF_ARRAY 0x0002 /* 1 = it's an array */
34 #define VF_CACHED 0x0100 /* 1 = num/str value has cached str/num eq */
35 #define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
36 #define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
37 #define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
38 #define VF_FSTR 0x1000 /* 1 = var::string points to fstring buffer */
39 #define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
40 #define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
42 /* these flags are static, don't change them when value is changed */
43 #define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
45 typedef struct walker_list {
48 struct walker_list *prev;
53 typedef struct var_s {
54 unsigned type; /* flags */
58 int aidx; /* func arg idx (for compilation stage) */
59 struct xhash_s *array; /* array ptr */
60 struct var_s *parent; /* for func args, ptr to actual parameter */
61 walker_list *walker; /* list of array elements (for..in) */
65 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
66 typedef struct chain_s {
69 const char *programname;
73 typedef struct func_s {
79 typedef struct rstream_s {
88 typedef struct hash_item_s {
90 struct var_s v; /* variable/array hash */
91 struct rstream_s rs; /* redirect streams hash */
92 struct func_s f; /* functions hash */
94 struct hash_item_s *next; /* next in chain */
95 char name[1]; /* really it's longer */
98 typedef struct xhash_s {
99 unsigned nel; /* num of elements */
100 unsigned csize; /* current hash size */
101 unsigned nprime; /* next hash size in PRIMES[] */
102 unsigned glen; /* summary length of item names */
103 struct hash_item_s **items;
107 typedef struct node_s {
127 /* Block of temporary variables */
128 typedef struct nvblock_s {
131 struct nvblock_s *prev;
132 struct nvblock_s *next;
136 typedef struct tsplitter_s {
141 /* simple token classes */
142 /* Order and hex values are very important!!! See next_token() */
143 #define TC_SEQSTART 1 /* ( */
144 #define TC_SEQTERM (1 << 1) /* ) */
145 #define TC_REGEXP (1 << 2) /* /.../ */
146 #define TC_OUTRDR (1 << 3) /* | > >> */
147 #define TC_UOPPOST (1 << 4) /* unary postfix operator */
148 #define TC_UOPPRE1 (1 << 5) /* unary prefix operator */
149 #define TC_BINOPX (1 << 6) /* two-opnd operator */
150 #define TC_IN (1 << 7)
151 #define TC_COMMA (1 << 8)
152 #define TC_PIPE (1 << 9) /* input redirection pipe */
153 #define TC_UOPPRE2 (1 << 10) /* unary prefix operator */
154 #define TC_ARRTERM (1 << 11) /* ] */
155 #define TC_GRPSTART (1 << 12) /* { */
156 #define TC_GRPTERM (1 << 13) /* } */
157 #define TC_SEMICOL (1 << 14)
158 #define TC_NEWLINE (1 << 15)
159 #define TC_STATX (1 << 16) /* ctl statement (for, next...) */
160 #define TC_WHILE (1 << 17)
161 #define TC_ELSE (1 << 18)
162 #define TC_BUILTIN (1 << 19)
163 #define TC_GETLINE (1 << 20)
164 #define TC_FUNCDECL (1 << 21) /* `function' `func' */
165 #define TC_BEGIN (1 << 22)
166 #define TC_END (1 << 23)
167 #define TC_EOF (1 << 24)
168 #define TC_VARIABLE (1 << 25)
169 #define TC_ARRAY (1 << 26)
170 #define TC_FUNCTION (1 << 27)
171 #define TC_STRING (1 << 28)
172 #define TC_NUMBER (1 << 29)
174 #define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
176 /* combined token classes */
177 #define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
178 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
179 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
180 | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
182 #define TC_STATEMNT (TC_STATX | TC_WHILE)
183 #define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
185 /* word tokens, cannot mean something else if not expected */
186 #define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
187 | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
189 /* discard newlines after these */
190 #define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
191 | TC_BINOP | TC_OPTERM)
193 /* what can expression begin with */
194 #define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
195 /* what can group begin with */
196 #define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
198 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
199 /* operator is inserted between them */
200 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
201 | TC_STRING | TC_NUMBER | TC_UOPPOST)
202 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
204 #define OF_RES1 0x010000
205 #define OF_RES2 0x020000
206 #define OF_STR1 0x040000
207 #define OF_STR2 0x080000
208 #define OF_NUM1 0x100000
209 #define OF_CHECKED 0x200000
211 /* combined operator flags */
214 #define xS (OF_RES2 | OF_STR2)
216 #define VV (OF_RES1 | OF_RES2)
217 #define Nx (OF_RES1 | OF_NUM1)
218 #define NV (OF_RES1 | OF_NUM1 | OF_RES2)
219 #define Sx (OF_RES1 | OF_STR1)
220 #define SV (OF_RES1 | OF_STR1 | OF_RES2)
221 #define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
223 #define OPCLSMASK 0xFF00
224 #define OPNMASK 0x007F
226 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
227 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
228 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
230 #define P(x) (x << 24)
231 #define PRIMASK 0x7F000000
232 #define PRIMASK2 0x7E000000
234 /* Operation classes */
236 #define SHIFT_TIL_THIS 0x0600
237 #define RECUR_FROM_THIS 0x1000
240 OC_DELETE = 0x0100, OC_EXEC = 0x0200, OC_NEWSOURCE = 0x0300,
241 OC_PRINT = 0x0400, OC_PRINTF = 0x0500, OC_WALKINIT = 0x0600,
243 OC_BR = 0x0700, OC_BREAK = 0x0800, OC_CONTINUE = 0x0900,
244 OC_EXIT = 0x0a00, OC_NEXT = 0x0b00, OC_NEXTFILE = 0x0c00,
245 OC_TEST = 0x0d00, OC_WALKNEXT = 0x0e00,
247 OC_BINARY = 0x1000, OC_BUILTIN = 0x1100, OC_COLON = 0x1200,
248 OC_COMMA = 0x1300, OC_COMPARE = 0x1400, OC_CONCAT = 0x1500,
249 OC_FBLTIN = 0x1600, OC_FIELD = 0x1700, OC_FNARG = 0x1800,
250 OC_FUNC = 0x1900, OC_GETLINE = 0x1a00, OC_IN = 0x1b00,
251 OC_LAND = 0x1c00, OC_LOR = 0x1d00, OC_MATCH = 0x1e00,
252 OC_MOVE = 0x1f00, OC_PGETLINE = 0x2000, OC_REGEXP = 0x2100,
253 OC_REPLACE = 0x2200, OC_RETURN = 0x2300, OC_SPRINTF = 0x2400,
254 OC_TERNARY = 0x2500, OC_UNARY = 0x2600, OC_VAR = 0x2700,
257 ST_IF = 0x3000, ST_DO = 0x3100, ST_FOR = 0x3200,
261 /* simple builtins */
263 F_in, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
264 F_ti, F_le, F_sy, F_ff, F_cl
269 B_a2, B_ix, B_ma, B_sp, B_ss, B_ti, B_mt, B_lo, B_up,
271 B_an, B_co, B_ls, B_or, B_rs, B_xo,
274 /* tokens and their corresponding info values */
276 #define NTC "\377" /* switch to next token class (tc<<1) */
279 #define OC_B OC_BUILTIN
281 static const char tokenlist[] ALIGN1 =
284 "\1/" NTC /* REGEXP */
285 "\2>>" "\1>" "\1|" NTC /* OUTRDR */
286 "\2++" "\2--" NTC /* UOPPOST */
287 "\2++" "\2--" "\1$" NTC /* UOPPRE1 */
288 "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */
289 "\2*=" "\2/=" "\2%=" "\2^="
290 "\1+" "\1-" "\3**=" "\2**"
291 "\1/" "\1%" "\1^" "\1*"
292 "\2!=" "\2>=" "\2<=" "\1>"
293 "\1<" "\2!~" "\1~" "\2&&"
294 "\2||" "\1?" "\1:" NTC
298 "\1+" "\1-" "\1!" NTC /* UOPPRE2 */
304 "\2if" "\2do" "\3for" "\5break" /* STATX */
305 "\10continue" "\6delete" "\5print"
306 "\6printf" "\4next" "\10nextfile"
307 "\6return" "\4exit" NTC
311 "\3and" "\5compl" "\6lshift" "\2or"
313 "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */
314 "\3cos" "\3exp" "\3int" "\3log"
315 "\4rand" "\3sin" "\4sqrt" "\5srand"
316 "\6gensub" "\4gsub" "\5index" "\6length"
317 "\5match" "\5split" "\7sprintf" "\3sub"
318 "\6substr" "\7systime" "\10strftime" "\6mktime"
319 "\7tolower" "\7toupper" NTC
321 "\4func" "\10function" NTC
326 static const uint32_t tokeninfo[] = {
330 xS|'a', xS|'w', xS|'|',
331 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
332 OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M',
334 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74),
335 OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
336 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/',
337 OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
338 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-',
339 OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
340 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%',
341 OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
342 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3,
343 OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
344 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!',
345 OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
346 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?',
347 OC_COLON|xx|P(67)|':',
350 OC_PGETLINE|SV|P(37),
351 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-',
352 OC_UNARY|xV|P(19)|'!',
358 ST_IF, ST_DO, ST_FOR, OC_BREAK,
359 OC_CONTINUE, OC_DELETE|Vx, OC_PRINT,
360 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
361 OC_RETURN|Vx, OC_EXIT|Nx,
365 OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
366 OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
367 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
368 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
369 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
370 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
371 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
372 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b),
373 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
380 /* internal variable names and their initial values */
381 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
383 CONVFMT, OFMT, FS, OFS,
384 ORS, RS, RT, FILENAME,
385 SUBSEP, F0, ARGIND, ARGC,
386 ARGV, ERRNO, FNR, NR,
387 NF, IGNORECASE, ENVIRON, NUM_INTERNAL_VARS
390 static const char vNames[] ALIGN1 =
391 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
392 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
393 "SUBSEP\0" "$\0*" "ARGIND\0" "ARGC\0"
394 "ARGV\0" "ERRNO\0" "FNR\0" "NR\0"
395 "NF\0*" "IGNORECASE\0*" "ENVIRON\0" "\0";
397 static const char vValues[] ALIGN1 =
398 "%.6g\0" "%.6g\0" " \0" " \0"
399 "\n\0" "\n\0" "\0" "\0"
400 "\034\0" "\0" "\377";
402 /* hash size may grow to these values */
403 #define FIRST_PRIME 61
404 static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
407 /* Globals. Split in two parts so that first one is addressed
408 * with (mostly short) negative offsets.
409 * NB: it's unsafe to put members of type "double"
410 * into globals2 (gcc may fail to align them).
414 chain beginseq, mainseq, endseq;
416 node *break_ptr, *continue_ptr;
418 xhash *vhash, *ahash, *fdhash, *fnhash;
419 const char *g_progname;
422 int maxfields; /* used in fsrealloc() only */
431 smallint is_f0_split;
434 uint32_t t_info; /* often used */
440 var *intvar[NUM_INTERNAL_VARS]; /* often used */
442 /* former statics from various functions */
443 char *split_f0__fstrings;
445 uint32_t next_token__save_tclass;
446 uint32_t next_token__save_info;
447 uint32_t next_token__ltclass;
448 smallint next_token__concat_inserted;
450 smallint next_input_file__files_happen;
451 rstream next_input_file__rsm;
453 var *evaluate__fnargs;
454 unsigned evaluate__seed;
455 regex_t evaluate__sreg;
459 tsplitter exec_builtin__tspl;
461 /* biggest and least used members go last */
462 tsplitter fsplitter, rsplitter;
464 #define G1 (ptr_to_globals[-1])
465 #define G (*(struct globals2 *)ptr_to_globals)
466 /* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
467 /*char G1size[sizeof(G1)]; - 0x74 */
468 /*char Gsize[sizeof(G)]; - 0x1c4 */
469 /* Trying to keep most of members accessible with short offsets: */
470 /*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
471 #define t_double (G1.t_double )
472 #define beginseq (G1.beginseq )
473 #define mainseq (G1.mainseq )
474 #define endseq (G1.endseq )
475 #define seq (G1.seq )
476 #define break_ptr (G1.break_ptr )
477 #define continue_ptr (G1.continue_ptr)
479 #define vhash (G1.vhash )
480 #define ahash (G1.ahash )
481 #define fdhash (G1.fdhash )
482 #define fnhash (G1.fnhash )
483 #define g_progname (G1.g_progname )
484 #define g_lineno (G1.g_lineno )
485 #define nfields (G1.nfields )
486 #define maxfields (G1.maxfields )
487 #define Fields (G1.Fields )
488 #define g_cb (G1.g_cb )
489 #define g_pos (G1.g_pos )
490 #define g_buf (G1.g_buf )
491 #define icase (G1.icase )
492 #define exiting (G1.exiting )
493 #define nextrec (G1.nextrec )
494 #define nextfile (G1.nextfile )
495 #define is_f0_split (G1.is_f0_split )
496 #define t_info (G.t_info )
497 #define t_tclass (G.t_tclass )
498 #define t_string (G.t_string )
499 #define t_lineno (G.t_lineno )
500 #define t_rollback (G.t_rollback )
501 #define intvar (G.intvar )
502 #define fsplitter (G.fsplitter )
503 #define rsplitter (G.rsplitter )
504 #define INIT_G() do { \
505 SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
506 G.next_token__ltclass = TC_OPTERM; \
507 G.evaluate__seed = 1; \
511 /* function prototypes */
512 static void handle_special(var *);
513 static node *parse_expr(uint32_t);
514 static void chain_group(void);
515 static var *evaluate(node *, var *);
516 static rstream *next_input_file(void);
517 static int fmt_num(char *, int, const char *, double, int);
518 static int awk_exit(int) NORETURN;
520 /* ---- error handling ---- */
522 static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
523 static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
524 static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
525 static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
526 static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
527 static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin";
528 static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
529 static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
530 static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
531 #if !ENABLE_FEATURE_AWK_LIBM
532 static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
535 static void zero_out_var(var *vp)
537 memset(vp, 0, sizeof(*vp));
540 static void syntax_error(const char *message) NORETURN;
541 static void syntax_error(const char *message)
543 bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
546 /* ---- hash stuff ---- */
548 static unsigned hashidx(const char *name)
553 idx = *name++ + (idx << 6) - idx;
557 /* create new hash */
558 static xhash *hash_init(void)
562 newhash = xzalloc(sizeof(*newhash));
563 newhash->csize = FIRST_PRIME;
564 newhash->items = xzalloc(FIRST_PRIME * sizeof(newhash->items[0]));
569 /* find item in hash, return ptr to data, NULL if not found */
570 static void *hash_search(xhash *hash, const char *name)
574 hi = hash->items[hashidx(name) % hash->csize];
576 if (strcmp(hi->name, name) == 0)
583 /* grow hash if it becomes too big */
584 static void hash_rebuild(xhash *hash)
586 unsigned newsize, i, idx;
587 hash_item **newitems, *hi, *thi;
589 if (hash->nprime == ARRAY_SIZE(PRIMES))
592 newsize = PRIMES[hash->nprime++];
593 newitems = xzalloc(newsize * sizeof(newitems[0]));
595 for (i = 0; i < hash->csize; i++) {
600 idx = hashidx(thi->name) % newsize;
601 thi->next = newitems[idx];
607 hash->csize = newsize;
608 hash->items = newitems;
611 /* find item in hash, add it if necessary. Return ptr to data */
612 static void *hash_find(xhash *hash, const char *name)
618 hi = hash_search(hash, name);
620 if (++hash->nel / hash->csize > 10)
623 l = strlen(name) + 1;
624 hi = xzalloc(sizeof(*hi) + l);
625 strcpy(hi->name, name);
627 idx = hashidx(name) % hash->csize;
628 hi->next = hash->items[idx];
629 hash->items[idx] = hi;
635 #define findvar(hash, name) ((var*) hash_find((hash), (name)))
636 #define newvar(name) ((var*) hash_find(vhash, (name)))
637 #define newfile(name) ((rstream*)hash_find(fdhash, (name)))
638 #define newfunc(name) ((func*) hash_find(fnhash, (name)))
640 static void hash_remove(xhash *hash, const char *name)
642 hash_item *hi, **phi;
644 phi = &hash->items[hashidx(name) % hash->csize];
647 if (strcmp(hi->name, name) == 0) {
648 hash->glen -= (strlen(name) + 1);
658 /* ------ some useful functions ------ */
660 static char *skip_spaces(char *p)
663 if (*p == '\\' && p[1] == '\n') {
666 } else if (*p != ' ' && *p != '\t') {
674 /* returns old *s, advances *s past word and terminating NUL */
675 static char *nextword(char **s)
678 while (*(*s)++ != '\0')
683 static char nextchar(char **s)
690 c = bb_process_escape_sequence((const char**)s);
691 if (c == '\\' && *s == pps)
696 static ALWAYS_INLINE int isalnum_(int c)
698 return (isalnum(c) || c == '_');
701 static double my_strtod(char **pp)
705 && ((((*pp)[1] | 0x20) == 'x') || isdigit((*pp)[1]))
707 return strtoull(*pp, pp, 0);
710 return strtod(*pp, pp);
713 /* -------- working with variables (set/get/copy/etc) -------- */
715 static xhash *iamarray(var *v)
719 while (a->type & VF_CHILD)
722 if (!(a->type & VF_ARRAY)) {
724 a->x.array = hash_init();
729 static void clear_array(xhash *array)
734 for (i = 0; i < array->csize; i++) {
735 hi = array->items[i];
739 free(thi->data.v.string);
742 array->items[i] = NULL;
744 array->glen = array->nel = 0;
747 /* clear a variable */
748 static var *clrvar(var *v)
750 if (!(v->type & VF_FSTR))
753 v->type &= VF_DONTTOUCH;
759 /* assign string value to variable */
760 static var *setvar_p(var *v, char *value)
768 /* same as setvar_p but make a copy of string */
769 static var *setvar_s(var *v, const char *value)
771 return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
774 /* same as setvar_s but sets USER flag */
775 static var *setvar_u(var *v, const char *value)
777 v = setvar_s(v, value);
782 /* set array element to user string */
783 static void setari_u(var *a, int idx, const char *s)
787 v = findvar(iamarray(a), itoa(idx));
791 /* assign numeric value to variable */
792 static var *setvar_i(var *v, double value)
795 v->type |= VF_NUMBER;
801 static const char *getvar_s(var *v)
803 /* if v is numeric and has no cached string, convert it to string */
804 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
805 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
806 v->string = xstrdup(g_buf);
807 v->type |= VF_CACHED;
809 return (v->string == NULL) ? "" : v->string;
812 static double getvar_i(var *v)
816 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
820 v->number = my_strtod(&s);
821 if (v->type & VF_USER) {
829 v->type |= VF_CACHED;
834 /* Used for operands of bitwise ops */
835 static unsigned long getvar_i_int(var *v)
837 double d = getvar_i(v);
839 /* Casting doubles to longs is undefined for values outside
840 * of target type range. Try to widen it as much as possible */
842 return (unsigned long)d;
843 /* Why? Think about d == -4294967295.0 (assuming 32bit longs) */
844 return - (long) (unsigned long) (-d);
847 static var *copyvar(var *dest, const var *src)
851 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
852 dest->number = src->number;
854 dest->string = xstrdup(src->string);
856 handle_special(dest);
860 static var *incvar(var *v)
862 return setvar_i(v, getvar_i(v) + 1.0);
865 /* return true if v is number or numeric string */
866 static int is_numeric(var *v)
869 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
872 /* return 1 when value of v corresponds to true, 0 otherwise */
873 static int istrue(var *v)
876 return (v->number != 0);
877 return (v->string && v->string[0]);
880 /* temporary variables allocator. Last allocated should be first freed */
881 static var *nvalloc(int n)
889 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size)
895 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
896 g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
898 g_cb->pos = g_cb->nv;
900 /*g_cb->next = NULL; - xzalloc did it */
908 while (v < g_cb->pos) {
917 static void nvfree(var *v)
921 if (v < g_cb->nv || v >= g_cb->pos)
922 syntax_error(EMSG_INTERNAL_ERROR);
924 for (p = v; p < g_cb->pos; p++) {
925 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
926 clear_array(iamarray(p));
927 free(p->x.array->items);
930 if (p->type & VF_WALK) {
932 walker_list *w = p->x.walker;
933 debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker);
937 debug_printf_walker(" free(%p)\n", w);
946 while (g_cb->prev && g_cb->pos == g_cb->nv) {
951 /* ------- awk program text parsing ------- */
953 /* Parse next token pointed by global pos, place results into global ttt.
954 * If token isn't expected, give away. Return token class
956 static uint32_t next_token(uint32_t expected)
958 #define concat_inserted (G.next_token__concat_inserted)
959 #define save_tclass (G.next_token__save_tclass)
960 #define save_info (G.next_token__save_info)
961 /* Initialized to TC_OPTERM: */
962 #define ltclass (G.next_token__ltclass)
973 } else if (concat_inserted) {
974 concat_inserted = FALSE;
975 t_tclass = save_tclass;
984 while (*p != '\n' && *p != '\0')
993 } else if (*p == '\"') {
998 if (*p == '\0' || *p == '\n')
999 syntax_error(EMSG_UNEXP_EOS);
1000 *s++ = nextchar(&pp);
1007 } else if ((expected & TC_REGEXP) && *p == '/') {
1011 if (*p == '\0' || *p == '\n')
1012 syntax_error(EMSG_UNEXP_EOS);
1016 s[-1] = bb_process_escape_sequence((const char **)&pp);
1029 } else if (*p == '.' || isdigit(*p)) {
1032 t_double = my_strtod(&pp);
1035 syntax_error(EMSG_UNEXP_TOKEN);
1039 /* search for something known */
1049 /* if token class is expected, token
1050 * matches and it's not a longer word,
1051 * then this is what we are looking for
1053 if ((tc & (expected | TC_WORD | TC_NEWLINE))
1054 && *tl == *p && strncmp(p, tl, l) == 0
1055 && !((tc & TC_WORD) && isalnum_(p[l]))
1066 /* it's a name (var/array/function),
1067 * otherwise it's something wrong
1070 syntax_error(EMSG_UNEXP_TOKEN);
1073 while (isalnum_(*++p)) {
1078 /* also consume whitespace between functionname and bracket */
1079 if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY))
1093 /* skipping newlines in some cases */
1094 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1097 /* insert concatenation operator when needed */
1098 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1099 concat_inserted = TRUE;
1103 t_info = OC_CONCAT | SS | P(35);
1110 /* Are we ready for this? */
1111 if (!(ltclass & expected))
1112 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1113 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1116 #undef concat_inserted
1122 static void rollback_token(void)
1127 static node *new_node(uint32_t info)
1131 n = xzalloc(sizeof(node));
1133 n->lineno = g_lineno;
1137 static void mk_re_node(const char *s, node *n, regex_t *re)
1139 n->info = OC_REGEXP;
1142 xregcomp(re, s, REG_EXTENDED);
1143 xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1146 static node *condition(void)
1148 next_token(TC_SEQSTART);
1149 return parse_expr(TC_SEQTERM);
1152 /* parse expression terminated by given argument, return ptr
1153 * to built subtree. Terminator is eaten by parse_expr */
1154 static node *parse_expr(uint32_t iexp)
1163 sn.r.n = glptr = NULL;
1164 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1166 while (!((tc = next_token(xtc)) & iexp)) {
1167 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1168 /* input redirection (<) attached to glptr node */
1169 cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1171 xtc = TC_OPERAND | TC_UOPPRE;
1174 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1175 /* for binary and postfix-unary operators, jump back over
1176 * previous operators with higher priority */
1178 while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1179 || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON))
1183 if ((t_info & OPCLSMASK) == OC_TERNARY)
1185 cn = vn->a.n->r.n = new_node(t_info);
1187 if (tc & TC_BINOP) {
1189 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1190 if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1192 next_token(TC_GETLINE);
1193 /* give maximum priority to this pipe */
1194 cn->info &= ~PRIMASK;
1195 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1199 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1204 /* for operands and prefix-unary operators, attach them
1207 cn = vn->r.n = new_node(t_info);
1209 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1210 if (tc & (TC_OPERAND | TC_REGEXP)) {
1211 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1212 /* one should be very careful with switch on tclass -
1213 * only simple tclasses should be used! */
1218 v = hash_search(ahash, t_string);
1220 cn->info = OC_FNARG;
1221 cn->l.aidx = v->x.aidx;
1223 cn->l.v = newvar(t_string);
1225 if (tc & TC_ARRAY) {
1227 cn->r.n = parse_expr(TC_ARRTERM);
1234 v = cn->l.v = xzalloc(sizeof(var));
1236 setvar_i(v, t_double);
1238 setvar_s(v, t_string);
1242 mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1247 cn->r.f = newfunc(t_string);
1248 cn->l.n = condition();
1252 cn = vn->r.n = parse_expr(TC_SEQTERM);
1258 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1262 cn->l.n = condition();
1271 /* add node to chain. Return ptr to alloc'd node */
1272 static node *chain_node(uint32_t info)
1277 seq->first = seq->last = new_node(0);
1279 if (seq->programname != g_progname) {
1280 seq->programname = g_progname;
1281 n = chain_node(OC_NEWSOURCE);
1282 n->l.new_progname = xstrdup(g_progname);
1287 seq->last = n->a.n = new_node(OC_DONE);
1292 static void chain_expr(uint32_t info)
1296 n = chain_node(info);
1297 n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1298 if (t_tclass & TC_GRPTERM)
1302 static node *chain_loop(node *nn)
1304 node *n, *n2, *save_brk, *save_cont;
1306 save_brk = break_ptr;
1307 save_cont = continue_ptr;
1309 n = chain_node(OC_BR | Vx);
1310 continue_ptr = new_node(OC_EXEC);
1311 break_ptr = new_node(OC_EXEC);
1313 n2 = chain_node(OC_EXEC | Vx);
1316 continue_ptr->a.n = n2;
1317 break_ptr->a.n = n->r.n = seq->last;
1319 continue_ptr = save_cont;
1320 break_ptr = save_brk;
1325 /* parse group and attach it to chain */
1326 static void chain_group(void)
1332 c = next_token(TC_GRPSEQ);
1333 } while (c & TC_NEWLINE);
1335 if (c & TC_GRPSTART) {
1336 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1337 if (t_tclass & TC_NEWLINE)
1342 } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1344 chain_expr(OC_EXEC | Vx);
1345 } else { /* TC_STATEMNT */
1346 switch (t_info & OPCLSMASK) {
1348 n = chain_node(OC_BR | Vx);
1349 n->l.n = condition();
1351 n2 = chain_node(OC_EXEC);
1353 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1355 n2->a.n = seq->last;
1363 n = chain_loop(NULL);
1368 n2 = chain_node(OC_EXEC);
1369 n = chain_loop(NULL);
1371 next_token(TC_WHILE);
1372 n->l.n = condition();
1376 next_token(TC_SEQSTART);
1377 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1378 if (t_tclass & TC_SEQTERM) { /* for-in */
1379 if ((n2->info & OPCLSMASK) != OC_IN)
1380 syntax_error(EMSG_UNEXP_TOKEN);
1381 n = chain_node(OC_WALKINIT | VV);
1384 n = chain_loop(NULL);
1385 n->info = OC_WALKNEXT | Vx;
1387 } else { /* for (;;) */
1388 n = chain_node(OC_EXEC | Vx);
1390 n2 = parse_expr(TC_SEMICOL);
1391 n3 = parse_expr(TC_SEQTERM);
1401 n = chain_node(t_info);
1402 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1403 if (t_tclass & TC_OUTRDR) {
1405 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1407 if (t_tclass & TC_GRPTERM)
1412 n = chain_node(OC_EXEC);
1417 n = chain_node(OC_EXEC);
1418 n->a.n = continue_ptr;
1421 /* delete, next, nextfile, return, exit */
1428 static void parse_program(char *p)
1437 while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1438 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1440 if (tclass & TC_OPTERM)
1444 if (tclass & TC_BEGIN) {
1448 } else if (tclass & TC_END) {
1452 } else if (tclass & TC_FUNCDECL) {
1453 next_token(TC_FUNCTION);
1455 f = newfunc(t_string);
1456 f->body.first = NULL;
1458 while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1459 v = findvar(ahash, t_string);
1460 v->x.aidx = f->nargs++;
1462 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1469 } else if (tclass & TC_OPSEQ) {
1471 cn = chain_node(OC_TEST);
1472 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1473 if (t_tclass & TC_GRPSTART) {
1477 chain_node(OC_PRINT);
1479 cn->r.n = mainseq.last;
1481 } else /* if (tclass & TC_GRPSTART) */ {
1489 /* -------- program execution part -------- */
1491 static node *mk_splitter(const char *s, tsplitter *spl)
1499 if ((n->info & OPCLSMASK) == OC_REGEXP) {
1501 regfree(ire); // TODO: nuke ire, use re+1?
1503 if (strlen(s) > 1) {
1504 mk_re_node(s, n, re);
1506 n->info = (uint32_t) *s;
1512 /* use node as a regular expression. Supplied with node ptr and regex_t
1513 * storage space. Return ptr to regex (if result points to preg, it should
1514 * be later regfree'd manually
1516 static regex_t *as_regex(node *op, regex_t *preg)
1522 if ((op->info & OPCLSMASK) == OC_REGEXP) {
1523 return icase ? op->r.ire : op->l.re;
1526 s = getvar_s(evaluate(op, v));
1528 cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
1529 /* Testcase where REG_EXTENDED fails (unpaired '{'):
1530 * echo Hi | awk 'gsub("@(samp|code|file)\{","");'
1531 * gawk 3.1.5 eats this. We revert to ~REG_EXTENDED
1532 * (maybe gsub is not supposed to use REG_EXTENDED?).
1534 if (regcomp(preg, s, cflags)) {
1535 cflags &= ~REG_EXTENDED;
1536 xregcomp(preg, s, cflags);
1542 /* gradually increasing buffer.
1543 * note that we reallocate even if n == old_size,
1544 * and thus there is at least one extra allocated byte.
1546 static char* qrealloc(char *b, int n, int *size)
1548 if (!b || n >= *size) {
1549 *size = n + (n>>1) + 80;
1550 b = xrealloc(b, *size);
1555 /* resize field storage space */
1556 static void fsrealloc(int size)
1560 if (size >= maxfields) {
1562 maxfields = size + 16;
1563 Fields = xrealloc(Fields, maxfields * sizeof(var));
1564 for (; i < maxfields; i++) {
1565 Fields[i].type = VF_SPECIAL;
1566 Fields[i].string = NULL;
1570 if (size < nfields) {
1571 for (i = size; i < nfields; i++) {
1578 static int awk_split(const char *s, node *spl, char **slist)
1583 regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1585 /* in worst case, each char would be a separate field */
1586 *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1589 c[0] = c[1] = (char)spl->info;
1591 if (*getvar_s(intvar[RS]) == '\0')
1594 if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */
1596 return n; /* "": zero fields */
1597 n++; /* at least one field will be there */
1599 l = strcspn(s, c+2); /* len till next NUL or \n */
1600 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1601 && pmatch[0].rm_so <= l
1603 l = pmatch[0].rm_so;
1604 if (pmatch[0].rm_eo == 0) {
1608 n++; /* we saw yet another delimiter */
1610 pmatch[0].rm_eo = l;
1615 /* make sure we remove *all* of the separator chars */
1618 } while (++l < pmatch[0].rm_eo);
1620 s += pmatch[0].rm_eo;
1624 if (c[0] == '\0') { /* null split */
1632 if (c[0] != ' ') { /* single-character split */
1634 c[0] = toupper(c[0]);
1635 c[1] = tolower(c[1]);
1639 while ((s1 = strpbrk(s1, c))) {
1647 s = skip_whitespace(s);
1651 while (*s && !isspace(*s))
1658 static void split_f0(void)
1660 /* static char *fstrings; */
1661 #define fstrings (G.split_f0__fstrings)
1672 n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1675 for (i = 0; i < n; i++) {
1676 Fields[i].string = nextword(&s);
1677 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1680 /* set NF manually to avoid side effects */
1682 intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1683 intvar[NF]->number = nfields;
1687 /* perform additional actions when some internal variables changed */
1688 static void handle_special(var *v)
1692 const char *sep, *s;
1693 int sl, l, len, i, bsize;
1695 if (!(v->type & VF_SPECIAL))
1698 if (v == intvar[NF]) {
1699 n = (int)getvar_i(v);
1702 /* recalculate $0 */
1703 sep = getvar_s(intvar[OFS]);
1707 for (i = 0; i < n; i++) {
1708 s = getvar_s(&Fields[i]);
1711 memcpy(b+len, sep, sl);
1714 b = qrealloc(b, len+l+sl, &bsize);
1715 memcpy(b+len, s, l);
1720 setvar_p(intvar[F0], b);
1723 } else if (v == intvar[F0]) {
1724 is_f0_split = FALSE;
1726 } else if (v == intvar[FS]) {
1727 mk_splitter(getvar_s(v), &fsplitter);
1729 } else if (v == intvar[RS]) {
1730 mk_splitter(getvar_s(v), &rsplitter);
1732 } else if (v == intvar[IGNORECASE]) {
1736 n = getvar_i(intvar[NF]);
1737 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1738 /* right here v is invalid. Just to note... */
1742 /* step through func/builtin/etc arguments */
1743 static node *nextarg(node **pn)
1748 if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1757 static void hashwalk_init(var *v, xhash *array)
1762 walker_list *prev_walker;
1764 if (v->type & VF_WALK) {
1765 prev_walker = v->x.walker;
1770 debug_printf_walker("hashwalk_init: prev_walker:%p\n", prev_walker);
1772 w = v->x.walker = xzalloc(sizeof(*w) + array->glen + 1); /* why + 1? */
1773 debug_printf_walker(" walker@%p=%p\n", &v->x.walker, w);
1774 w->cur = w->end = w->wbuf;
1775 w->prev = prev_walker;
1776 for (i = 0; i < array->csize; i++) {
1777 hi = array->items[i];
1779 strcpy(w->end, hi->name);
1786 static int hashwalk_next(var *v)
1788 walker_list *w = v->x.walker;
1790 if (w->cur >= w->end) {
1791 walker_list *prev_walker = w->prev;
1793 debug_printf_walker("end of iteration, free(walker@%p:%p), prev_walker:%p\n", &v->x.walker, w, prev_walker);
1795 v->x.walker = prev_walker;
1799 setvar_s(v, nextword(&w->cur));
1803 /* evaluate node, return 1 when result is true, 0 otherwise */
1804 static int ptest(node *pattern)
1806 /* ptest__v is "static": to save stack space? */
1807 return istrue(evaluate(pattern, &G.ptest__v));
1810 /* read next record from stream rsm into a variable v */
1811 static int awk_getline(rstream *rsm, var *v)
1814 regmatch_t pmatch[2];
1815 int size, a, p, pp = 0;
1816 int fd, so, eo, r, rp;
1819 /* we're using our own buffer since we need access to accumulating
1822 fd = fileno(rsm->F);
1827 c = (char) rsplitter.n.info;
1831 m = qrealloc(m, 256, &size);
1838 if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1839 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1840 b, 1, pmatch, 0) == 0) {
1841 so = pmatch[0].rm_so;
1842 eo = pmatch[0].rm_eo;
1846 } else if (c != '\0') {
1847 s = strchr(b+pp, c);
1849 s = memchr(b+pp, '\0', p - pp);
1856 while (b[rp] == '\n')
1858 s = strstr(b+rp, "\n\n");
1861 while (b[eo] == '\n')
1870 memmove(m, m+a, p+1);
1875 m = qrealloc(m, a+p+128, &size);
1878 p += safe_read(fd, b+p, size-p-1);
1882 setvar_i(intvar[ERRNO], errno);
1891 c = b[so]; b[so] = '\0';
1895 c = b[eo]; b[eo] = '\0';
1896 setvar_s(intvar[RT], b+so);
1908 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1912 const char *s = format;
1914 if (int_as_int && n == (int)n) {
1915 r = snprintf(b, size, "%d", (int)n);
1917 do { c = *s; } while (c && *++s);
1918 if (strchr("diouxX", c)) {
1919 r = snprintf(b, size, format, (int)n);
1920 } else if (strchr("eEfgG", c)) {
1921 r = snprintf(b, size, format, n);
1923 syntax_error(EMSG_INV_FMT);
1929 /* formatted output into an allocated buffer, return ptr to buffer */
1930 static char *awk_printf(node *n)
1935 int i, j, incr, bsize;
1940 fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1945 while (*f && (*f != '%' || *++f == '%'))
1947 while (*f && !isalpha(*f)) {
1949 syntax_error("%*x formats are not supported");
1953 incr = (f - s) + MAXVARFMT;
1954 b = qrealloc(b, incr + i, &bsize);
1960 arg = evaluate(nextarg(&n), v);
1963 if (c == 'c' || !c) {
1964 i += sprintf(b+i, s, is_numeric(arg) ?
1965 (char)getvar_i(arg) : *getvar_s(arg));
1966 } else if (c == 's') {
1968 b = qrealloc(b, incr+i+strlen(s1), &bsize);
1969 i += sprintf(b+i, s, s1);
1971 i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1975 /* if there was an error while sprintf, return value is negative */
1982 b = xrealloc(b, i + 1);
1987 /* Common substitution routine.
1988 * Replace (nm)'th substring of (src) that matches (rn) with (repl),
1989 * store result into (dest), return number of substitutions.
1990 * If nm = 0, replace all matches.
1991 * If src or dst is NULL, use $0.
1992 * If subexp != 0, enable subexpression matching (\1-\9).
1994 static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int subexp)
1998 int match_no, residx, replen, resbufsize;
2000 regmatch_t pmatch[10];
2001 regex_t sreg, *regex;
2007 regex = as_regex(rn, &sreg);
2008 sp = getvar_s(src ? src : intvar[F0]);
2009 replen = strlen(repl);
2010 while (regexec(regex, sp, 10, pmatch, regexec_flags) == 0) {
2011 int so = pmatch[0].rm_so;
2012 int eo = pmatch[0].rm_eo;
2014 //bb_error_msg("match %u: [%u,%u] '%s'%p", match_no+1, so, eo, sp,sp);
2015 resbuf = qrealloc(resbuf, residx + eo + replen, &resbufsize);
2016 memcpy(resbuf + residx, sp, eo);
2018 if (++match_no >= nm) {
2023 residx -= (eo - so);
2025 for (s = repl; *s; s++) {
2026 char c = resbuf[residx++] = *s;
2031 if (c == '&' || (subexp && c >= '0' && c <= '9')) {
2033 residx -= ((nbs + 3) >> 1);
2040 resbuf[residx++] = c;
2042 int n = pmatch[j].rm_eo - pmatch[j].rm_so;
2043 resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize);
2044 memcpy(resbuf + residx, sp + pmatch[j].rm_so, n);
2052 regexec_flags = REG_NOTBOL;
2057 /* Empty match (e.g. "b*" will match anywhere).
2058 * Advance by one char. */
2060 //gsub(/\<b*/,"") on "abc" will reach this point, advance to "bc"
2061 //... and will erroneously match "b" even though it is NOT at the word start.
2062 //we need REG_NOTBOW but it does not exist...
2063 //TODO: if EXTRA_COMPAT=y, use GNU matching and re_search,
2064 //it should be able to do it correctly.
2065 /* Subtle: this is safe only because
2066 * qrealloc allocated at least one extra byte */
2067 resbuf[residx] = *sp;
2075 resbuf = qrealloc(resbuf, residx + strlen(sp), &resbufsize);
2076 strcpy(resbuf + residx, sp);
2078 //bb_error_msg("end sp:'%s'%p", sp,sp);
2079 setvar_p(dest ? dest : intvar[F0], resbuf);
2085 static NOINLINE int do_mktime(const char *ds)
2090 /*memset(&then, 0, sizeof(then)); - not needed */
2091 then.tm_isdst = -1; /* default is unknown */
2093 /* manpage of mktime says these fields are ints,
2094 * so we can sscanf stuff directly into them */
2095 count = sscanf(ds, "%u %u %u %u %u %u %d",
2096 &then.tm_year, &then.tm_mon, &then.tm_mday,
2097 &then.tm_hour, &then.tm_min, &then.tm_sec,
2101 || (unsigned)then.tm_mon < 1
2102 || (unsigned)then.tm_year < 1900
2108 then.tm_year -= 1900;
2110 return mktime(&then);
2113 static NOINLINE var *exec_builtin(node *op, var *res)
2115 #define tspl (G.exec_builtin__tspl)
2121 regmatch_t pmatch[2];
2130 isr = info = op->info;
2133 av[2] = av[3] = NULL;
2134 for (i = 0; i < 4 && op; i++) {
2135 an[i] = nextarg(&op);
2136 if (isr & 0x09000000)
2137 av[i] = evaluate(an[i], &tv[i]);
2138 if (isr & 0x08000000)
2139 as[i] = getvar_s(av[i]);
2144 if ((uint32_t)nargs < (info >> 30))
2145 syntax_error(EMSG_TOO_FEW_ARGS);
2151 #if ENABLE_FEATURE_AWK_LIBM
2152 setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1])));
2154 syntax_error(EMSG_NO_MATH);
2162 spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2163 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2168 n = awk_split(as[0], spl, &s);
2170 clear_array(iamarray(av[1]));
2171 for (i = 1; i <= n; i++)
2172 setari_u(av[1], i, nextword(&s));
2182 i = getvar_i(av[1]) - 1;
2187 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2190 s = xstrndup(as[0]+i, n);
2195 /* Bitwise ops must assume that operands are unsigned. GNU Awk 3.1.5:
2196 * awk '{ print or(-1,1) }' gives "4.29497e+09", not "-2.xxxe+09" */
2198 setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1]));
2202 setvar_i(res, ~getvar_i_int(av[0]));
2206 setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1]));
2210 setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1]));
2214 setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1]));
2218 setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1]));
2224 s1 = s = xstrdup(as[0]);
2226 //*s1 = (info == B_up) ? toupper(*s1) : tolower(*s1);
2227 if ((unsigned char)((*s1 | 0x20) - 'a') <= ('z' - 'a'))
2228 *s1 = (info == B_up) ? (*s1 & 0xdf) : (*s1 | 0x20);
2238 l = strlen(as[0]) - ll;
2239 if (ll > 0 && l >= 0) {
2241 char *s = strstr(as[0], as[1]);
2243 n = (s - as[0]) + 1;
2245 /* this piece of code is terribly slow and
2246 * really should be rewritten
2248 for (i = 0; i <= l; i++) {
2249 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2261 tt = getvar_i(av[1]);
2264 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2265 i = strftime(g_buf, MAXVARFMT,
2266 ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2269 setvar_s(res, g_buf);
2273 setvar_i(res, do_mktime(as[0]));
2277 re = as_regex(an[1], &sreg);
2278 n = regexec(re, as[0], 1, pmatch, 0);
2283 pmatch[0].rm_so = 0;
2284 pmatch[0].rm_eo = -1;
2286 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2287 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2288 setvar_i(res, pmatch[0].rm_so);
2294 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2298 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2302 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2312 * Evaluate node - the heart of the program. Supplied with subtree
2313 * and place where to store result. returns ptr to result.
2315 #define XC(n) ((n) >> 8)
2317 static var *evaluate(node *op, var *res)
2319 /* This procedure is recursive so we should count every byte */
2320 #define fnargs (G.evaluate__fnargs)
2321 /* seed is initialized to 1 */
2322 #define seed (G.evaluate__seed)
2323 #define sreg (G.evaluate__sreg)
2328 return setvar_s(res, NULL);
2336 } L = L; /* for compiler */
2347 opn = (opinfo & OPNMASK);
2348 g_lineno = op->lineno;
2351 /* execute inevitable things */
2352 if (opinfo & OF_RES1)
2353 L.v = evaluate(op1, v1);
2354 if (opinfo & OF_RES2)
2355 R.v = evaluate(op->r.n, v1+1);
2356 if (opinfo & OF_STR1)
2357 L.s = getvar_s(L.v);
2358 if (opinfo & OF_STR2)
2359 R.s = getvar_s(R.v);
2360 if (opinfo & OF_NUM1)
2361 L_d = getvar_i(L.v);
2363 switch (XC(opinfo & OPCLSMASK)) {
2365 /* -- iterative node type -- */
2369 if ((op1->info & OPCLSMASK) == OC_COMMA) {
2370 /* it's range pattern */
2371 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2372 op->info |= OF_CHECKED;
2373 if (ptest(op1->r.n))
2374 op->info &= ~OF_CHECKED;
2380 op = ptest(op1) ? op->a.n : op->r.n;
2384 /* just evaluate an expression, also used as unconditional jump */
2388 /* branch, used in if-else and various loops */
2390 op = istrue(L.v) ? op->a.n : op->r.n;
2393 /* initialize for-in loop */
2394 case XC( OC_WALKINIT ):
2395 hashwalk_init(L.v, iamarray(R.v));
2398 /* get next array item */
2399 case XC( OC_WALKNEXT ):
2400 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2403 case XC( OC_PRINT ):
2404 case XC( OC_PRINTF ): {
2408 rstream *rsm = newfile(R.s);
2411 rsm->F = popen(R.s, "w");
2413 bb_perror_msg_and_die("popen");
2416 rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2422 if ((opinfo & OPCLSMASK) == OC_PRINT) {
2424 fputs(getvar_s(intvar[F0]), F);
2427 var *v = evaluate(nextarg(&op1), v1);
2428 if (v->type & VF_NUMBER) {
2429 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2433 fputs(getvar_s(v), F);
2437 fputs(getvar_s(intvar[OFS]), F);
2440 fputs(getvar_s(intvar[ORS]), F);
2442 } else { /* OC_PRINTF */
2443 char *s = awk_printf(op1);
2451 case XC( OC_DELETE ): {
2452 uint32_t info = op1->info & OPCLSMASK;
2455 if (info == OC_VAR) {
2457 } else if (info == OC_FNARG) {
2458 v = &fnargs[op1->l.aidx];
2460 syntax_error(EMSG_NOT_ARRAY);
2466 s = getvar_s(evaluate(op1->r.n, v1));
2467 hash_remove(iamarray(v), s);
2469 clear_array(iamarray(v));
2474 case XC( OC_NEWSOURCE ):
2475 g_progname = op->l.new_progname;
2478 case XC( OC_RETURN ):
2482 case XC( OC_NEXTFILE ):
2493 /* -- recursive node type -- */
2497 if (L.v == intvar[NF])
2501 case XC( OC_FNARG ):
2502 L.v = &fnargs[op->l.aidx];
2504 res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2508 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2511 case XC( OC_REGEXP ):
2513 L.s = getvar_s(intvar[F0]);
2516 case XC( OC_MATCH ):
2520 regex_t *re = as_regex(op1, &sreg);
2521 int i = regexec(re, L.s, 0, NULL, 0);
2524 setvar_i(res, (i == 0) ^ (opn == '!'));
2529 /* if source is a temporary string, jusk relink it to dest */
2530 //Disabled: if R.v is numeric but happens to have cached R.v->string,
2531 //then L.v ends up being a string, which is wrong
2532 // if (R.v == v1+1 && R.v->string) {
2533 // res = setvar_p(L.v, R.v->string);
2534 // R.v->string = NULL;
2536 res = copyvar(L.v, R.v);
2540 case XC( OC_TERNARY ):
2541 if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2542 syntax_error(EMSG_POSSIBLE_ERROR);
2543 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2546 case XC( OC_FUNC ): {
2548 const char *sv_progname;
2550 if (!op->r.f->body.first)
2551 syntax_error(EMSG_UNDEF_FUNC);
2553 vbeg = v = nvalloc(op->r.f->nargs + 1);
2555 var *arg = evaluate(nextarg(&op1), v1);
2557 v->type |= VF_CHILD;
2559 if (++v - vbeg >= op->r.f->nargs)
2565 sv_progname = g_progname;
2567 res = evaluate(op->r.f->body.first, res);
2569 g_progname = sv_progname;
2576 case XC( OC_GETLINE ):
2577 case XC( OC_PGETLINE ): {
2584 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2585 rsm->F = popen(L.s, "r");
2586 rsm->is_pipe = TRUE;
2588 rsm->F = fopen_for_read(L.s); /* not xfopen! */
2593 iF = next_input_file();
2598 setvar_i(intvar[ERRNO], errno);
2606 i = awk_getline(rsm, R.v);
2607 if (i > 0 && !op1) {
2608 incvar(intvar[FNR]);
2615 /* simple builtins */
2616 case XC( OC_FBLTIN ): {
2619 double R_d = R_d; /* for compiler */
2627 R_d = (double)rand() / (double)RAND_MAX;
2629 #if ENABLE_FEATURE_AWK_LIBM
2655 syntax_error(EMSG_NO_MATH);
2660 seed = op1 ? (unsigned)L_d : (unsigned)time(NULL);
2670 L.s = getvar_s(intvar[F0]);
2676 R_d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2677 ? (system(L.s) >> 8) : 0;
2683 } else if (L.s && *L.s) {
2693 rsm = (rstream *)hash_search(fdhash, L.s);
2695 i = rsm->is_pipe ? pclose(rsm->F) : fclose(rsm->F);
2697 hash_remove(fdhash, L.s);
2700 setvar_i(intvar[ERRNO], errno);
2708 case XC( OC_BUILTIN ):
2709 res = exec_builtin(op, res);
2712 case XC( OC_SPRINTF ):
2713 setvar_p(res, awk_printf(op1));
2716 case XC( OC_UNARY ): {
2719 Ld = R_d = getvar_i(R.v);
2746 case XC( OC_FIELD ): {
2747 int i = (int)getvar_i(R.v);
2754 res = &Fields[i - 1];
2759 /* concatenation (" ") and index joining (",") */
2760 case XC( OC_CONCAT ):
2761 case XC( OC_COMMA ): {
2762 const char *sep = "";
2763 if ((opinfo & OPCLSMASK) == OC_COMMA)
2764 sep = getvar_s(intvar[SUBSEP]);
2765 setvar_p(res, xasprintf("%s%s%s", L.s, sep, R.s));
2770 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2774 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2777 case XC( OC_BINARY ):
2778 case XC( OC_REPLACE ): {
2779 double R_d = getvar_i(R.v);
2792 syntax_error(EMSG_DIV_BY_ZERO);
2796 #if ENABLE_FEATURE_AWK_LIBM
2797 L_d = pow(L_d, R_d);
2799 syntax_error(EMSG_NO_MATH);
2804 syntax_error(EMSG_DIV_BY_ZERO);
2805 L_d -= (int)(L_d / R_d) * R_d;
2808 res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : L.v, L_d);
2812 case XC( OC_COMPARE ): {
2813 int i = i; /* for compiler */
2816 if (is_numeric(L.v) && is_numeric(R.v)) {
2817 Ld = getvar_i(L.v) - getvar_i(R.v);
2819 const char *l = getvar_s(L.v);
2820 const char *r = getvar_s(R.v);
2821 Ld = icase ? strcasecmp(l, r) : strcmp(l, r);
2823 switch (opn & 0xfe) {
2834 setvar_i(res, (i == 0) ^ (opn & 1));
2839 syntax_error(EMSG_POSSIBLE_ERROR);
2841 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2843 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2857 /* -------- main & co. -------- */
2859 static int awk_exit(int r)
2870 evaluate(endseq.first, &tv);
2873 /* waiting for children */
2874 for (i = 0; i < fdhash->csize; i++) {
2875 hi = fdhash->items[i];
2877 if (hi->data.rs.F && hi->data.rs.is_pipe)
2878 pclose(hi->data.rs.F);
2886 /* if expr looks like "var=value", perform assignment and return 1,
2887 * otherwise return 0 */
2888 static int is_assignment(const char *expr)
2890 char *exprc, *s, *s0, *s1;
2892 exprc = xstrdup(expr);
2893 if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2901 *s1++ = nextchar(&s);
2904 setvar_u(newvar(exprc), s0);
2909 /* switch to next input file */
2910 static rstream *next_input_file(void)
2912 #define rsm (G.next_input_file__rsm)
2913 #define files_happen (G.next_input_file__files_happen)
2916 const char *fname, *ind;
2921 rsm.pos = rsm.adv = 0;
2924 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
2930 ind = getvar_s(incvar(intvar[ARGIND]));
2931 fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
2932 if (fname && *fname && !is_assignment(fname))
2933 F = xfopen_stdin(fname);
2937 files_happen = TRUE;
2938 setvar_s(intvar[FILENAME], fname);
2945 int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
2946 int awk_main(int argc, char **argv)
2949 char *opt_F, *opt_W;
2950 llist_t *list_v = NULL;
2951 llist_t *list_f = NULL;
2956 char *vnames = (char *)vNames; /* cheat */
2957 char *vvalues = (char *)vValues;
2961 /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
2962 * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
2963 if (ENABLE_LOCALE_SUPPORT)
2964 setlocale(LC_NUMERIC, "C");
2968 /* allocate global buffer */
2969 g_buf = xmalloc(MAXVARFMT + 1);
2971 vhash = hash_init();
2972 ahash = hash_init();
2973 fdhash = hash_init();
2974 fnhash = hash_init();
2976 /* initialize variables */
2977 for (i = 0; *vnames; i++) {
2978 intvar[i] = v = newvar(nextword(&vnames));
2979 if (*vvalues != '\377')
2980 setvar_s(v, nextword(&vvalues));
2984 if (*vnames == '*') {
2985 v->type |= VF_SPECIAL;
2990 handle_special(intvar[FS]);
2991 handle_special(intvar[RS]);
2993 newfile("/dev/stdin")->F = stdin;
2994 newfile("/dev/stdout")->F = stdout;
2995 newfile("/dev/stderr")->F = stderr;
2997 /* Huh, people report that sometimes environ is NULL. Oh well. */
2998 if (environ) for (envp = environ; *envp; envp++) {
2999 /* environ is writable, thus we don't strdup it needlessly */
3001 char *s1 = strchr(s, '=');
3004 /* Both findvar and setvar_u take const char*
3005 * as 2nd arg -> environment is not trashed */
3006 setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
3010 opt_complementary = "v::f::"; /* -v and -f can occur multiple times */
3011 opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, &opt_W);
3015 setvar_s(intvar[FS], opt_F); // -F
3016 while (list_v) { /* -v */
3017 if (!is_assignment(llist_pop(&list_v)))
3020 if (list_f) { /* -f */
3025 g_progname = llist_pop(&list_f);
3026 from_file = xfopen_stdin(g_progname);
3027 /* one byte is reserved for some trick in next_token */
3028 for (i = j = 1; j > 0; i += j) {
3029 s = xrealloc(s, i + 4096);
3030 j = fread(s + i, 1, 4094, from_file);
3034 parse_program(s + 1);
3038 } else { // no -f: take program from 1st parameter
3041 g_progname = "cmd. line";
3042 parse_program(*argv++);
3044 if (opt & 0x8) // -W
3045 bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
3047 /* fill in ARGV array */
3048 setvar_i(intvar[ARGC], argc);
3049 setari_u(intvar[ARGV], 0, "awk");
3052 setari_u(intvar[ARGV], ++i, *argv++);
3054 evaluate(beginseq.first, &tv);
3055 if (!mainseq.first && !endseq.first)
3056 awk_exit(EXIT_SUCCESS);
3058 /* input file could already be opened in BEGIN block */
3060 iF = next_input_file();
3062 /* passing through input files */
3065 setvar_i(intvar[FNR], 0);
3067 while ((i = awk_getline(iF, intvar[F0])) > 0) {
3070 incvar(intvar[FNR]);
3071 evaluate(mainseq.first, &tv);
3078 syntax_error(strerror(errno));
3080 iF = next_input_file();
3083 awk_exit(EXIT_SUCCESS);