1 /* vi: set sw=4 ts=4: */
3 * awk implementation for busybox
5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
7 * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
14 /* This is a NOEXEC applet. Be very careful! */
17 /* If you comment out one of these below, it will be #defined later
18 * to perform debug printfs to stderr: */
19 #define debug_printf_walker(...) do {} while (0)
21 #ifndef debug_printf_walker
22 # define debug_printf_walker(...) (fprintf(stderr, __VA_ARGS__))
31 #define VF_NUMBER 0x0001 /* 1 = primary type is number */
32 #define VF_ARRAY 0x0002 /* 1 = it's an array */
34 #define VF_CACHED 0x0100 /* 1 = num/str value has cached str/num eq */
35 #define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
36 #define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
37 #define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
38 #define VF_FSTR 0x1000 /* 1 = var::string points to fstring buffer */
39 #define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
40 #define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
42 /* these flags are static, don't change them when value is changed */
43 #define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
45 typedef struct walker_list {
48 struct walker_list *prev;
53 typedef struct var_s {
54 unsigned type; /* flags */
58 int aidx; /* func arg idx (for compilation stage) */
59 struct xhash_s *array; /* array ptr */
60 struct var_s *parent; /* for func args, ptr to actual parameter */
61 walker_list *walker; /* list of array elements (for..in) */
65 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
66 typedef struct chain_s {
69 const char *programname;
73 typedef struct func_s {
79 typedef struct rstream_s {
88 typedef struct hash_item_s {
90 struct var_s v; /* variable/array hash */
91 struct rstream_s rs; /* redirect streams hash */
92 struct func_s f; /* functions hash */
94 struct hash_item_s *next; /* next in chain */
95 char name[1]; /* really it's longer */
98 typedef struct xhash_s {
99 unsigned nel; /* num of elements */
100 unsigned csize; /* current hash size */
101 unsigned nprime; /* next hash size in PRIMES[] */
102 unsigned glen; /* summary length of item names */
103 struct hash_item_s **items;
107 typedef struct node_s {
127 /* Block of temporary variables */
128 typedef struct nvblock_s {
131 struct nvblock_s *prev;
132 struct nvblock_s *next;
136 typedef struct tsplitter_s {
141 /* simple token classes */
142 /* Order and hex values are very important!!! See next_token() */
143 #define TC_SEQSTART 1 /* ( */
144 #define TC_SEQTERM (1 << 1) /* ) */
145 #define TC_REGEXP (1 << 2) /* /.../ */
146 #define TC_OUTRDR (1 << 3) /* | > >> */
147 #define TC_UOPPOST (1 << 4) /* unary postfix operator */
148 #define TC_UOPPRE1 (1 << 5) /* unary prefix operator */
149 #define TC_BINOPX (1 << 6) /* two-opnd operator */
150 #define TC_IN (1 << 7)
151 #define TC_COMMA (1 << 8)
152 #define TC_PIPE (1 << 9) /* input redirection pipe */
153 #define TC_UOPPRE2 (1 << 10) /* unary prefix operator */
154 #define TC_ARRTERM (1 << 11) /* ] */
155 #define TC_GRPSTART (1 << 12) /* { */
156 #define TC_GRPTERM (1 << 13) /* } */
157 #define TC_SEMICOL (1 << 14)
158 #define TC_NEWLINE (1 << 15)
159 #define TC_STATX (1 << 16) /* ctl statement (for, next...) */
160 #define TC_WHILE (1 << 17)
161 #define TC_ELSE (1 << 18)
162 #define TC_BUILTIN (1 << 19)
163 #define TC_GETLINE (1 << 20)
164 #define TC_FUNCDECL (1 << 21) /* `function' `func' */
165 #define TC_BEGIN (1 << 22)
166 #define TC_END (1 << 23)
167 #define TC_EOF (1 << 24)
168 #define TC_VARIABLE (1 << 25)
169 #define TC_ARRAY (1 << 26)
170 #define TC_FUNCTION (1 << 27)
171 #define TC_STRING (1 << 28)
172 #define TC_NUMBER (1 << 29)
174 #define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
176 /* combined token classes */
177 #define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
178 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
179 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
180 | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
182 #define TC_STATEMNT (TC_STATX | TC_WHILE)
183 #define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
185 /* word tokens, cannot mean something else if not expected */
186 #define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
187 | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
189 /* discard newlines after these */
190 #define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
191 | TC_BINOP | TC_OPTERM)
193 /* what can expression begin with */
194 #define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
195 /* what can group begin with */
196 #define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
198 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
199 /* operator is inserted between them */
200 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
201 | TC_STRING | TC_NUMBER | TC_UOPPOST)
202 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
204 #define OF_RES1 0x010000
205 #define OF_RES2 0x020000
206 #define OF_STR1 0x040000
207 #define OF_STR2 0x080000
208 #define OF_NUM1 0x100000
209 #define OF_CHECKED 0x200000
211 /* combined operator flags */
214 #define xS (OF_RES2 | OF_STR2)
216 #define VV (OF_RES1 | OF_RES2)
217 #define Nx (OF_RES1 | OF_NUM1)
218 #define NV (OF_RES1 | OF_NUM1 | OF_RES2)
219 #define Sx (OF_RES1 | OF_STR1)
220 #define SV (OF_RES1 | OF_STR1 | OF_RES2)
221 #define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
223 #define OPCLSMASK 0xFF00
224 #define OPNMASK 0x007F
226 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
227 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
228 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
230 #define P(x) (x << 24)
231 #define PRIMASK 0x7F000000
232 #define PRIMASK2 0x7E000000
234 /* Operation classes */
236 #define SHIFT_TIL_THIS 0x0600
237 #define RECUR_FROM_THIS 0x1000
240 OC_DELETE = 0x0100, OC_EXEC = 0x0200, OC_NEWSOURCE = 0x0300,
241 OC_PRINT = 0x0400, OC_PRINTF = 0x0500, OC_WALKINIT = 0x0600,
243 OC_BR = 0x0700, OC_BREAK = 0x0800, OC_CONTINUE = 0x0900,
244 OC_EXIT = 0x0a00, OC_NEXT = 0x0b00, OC_NEXTFILE = 0x0c00,
245 OC_TEST = 0x0d00, OC_WALKNEXT = 0x0e00,
247 OC_BINARY = 0x1000, OC_BUILTIN = 0x1100, OC_COLON = 0x1200,
248 OC_COMMA = 0x1300, OC_COMPARE = 0x1400, OC_CONCAT = 0x1500,
249 OC_FBLTIN = 0x1600, OC_FIELD = 0x1700, OC_FNARG = 0x1800,
250 OC_FUNC = 0x1900, OC_GETLINE = 0x1a00, OC_IN = 0x1b00,
251 OC_LAND = 0x1c00, OC_LOR = 0x1d00, OC_MATCH = 0x1e00,
252 OC_MOVE = 0x1f00, OC_PGETLINE = 0x2000, OC_REGEXP = 0x2100,
253 OC_REPLACE = 0x2200, OC_RETURN = 0x2300, OC_SPRINTF = 0x2400,
254 OC_TERNARY = 0x2500, OC_UNARY = 0x2600, OC_VAR = 0x2700,
257 ST_IF = 0x3000, ST_DO = 0x3100, ST_FOR = 0x3200,
261 /* simple builtins */
263 F_in, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
264 F_ti, F_le, F_sy, F_ff, F_cl
269 B_a2, B_ix, B_ma, B_sp, B_ss, B_ti, B_mt, B_lo, B_up,
271 B_an, B_co, B_ls, B_or, B_rs, B_xo,
274 /* tokens and their corresponding info values */
276 #define NTC "\377" /* switch to next token class (tc<<1) */
279 #define OC_B OC_BUILTIN
281 static const char tokenlist[] ALIGN1 =
284 "\1/" NTC /* REGEXP */
285 "\2>>" "\1>" "\1|" NTC /* OUTRDR */
286 "\2++" "\2--" NTC /* UOPPOST */
287 "\2++" "\2--" "\1$" NTC /* UOPPRE1 */
288 "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */
289 "\2*=" "\2/=" "\2%=" "\2^="
290 "\1+" "\1-" "\3**=" "\2**"
291 "\1/" "\1%" "\1^" "\1*"
292 "\2!=" "\2>=" "\2<=" "\1>"
293 "\1<" "\2!~" "\1~" "\2&&"
294 "\2||" "\1?" "\1:" NTC
298 "\1+" "\1-" "\1!" NTC /* UOPPRE2 */
304 "\2if" "\2do" "\3for" "\5break" /* STATX */
305 "\10continue" "\6delete" "\5print"
306 "\6printf" "\4next" "\10nextfile"
307 "\6return" "\4exit" NTC
311 "\3and" "\5compl" "\6lshift" "\2or"
313 "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */
314 "\3cos" "\3exp" "\3int" "\3log"
315 "\4rand" "\3sin" "\4sqrt" "\5srand"
316 "\6gensub" "\4gsub" "\5index" "\6length"
317 "\5match" "\5split" "\7sprintf" "\3sub"
318 "\6substr" "\7systime" "\10strftime" "\6mktime"
319 "\7tolower" "\7toupper" NTC
321 "\4func" "\10function" NTC
326 static const uint32_t tokeninfo[] = {
330 xS|'a', xS|'w', xS|'|',
331 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
332 OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M',
334 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74),
335 OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
336 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/',
337 OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
338 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-',
339 OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
340 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%',
341 OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
342 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3,
343 OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
344 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!',
345 OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
346 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?',
347 OC_COLON|xx|P(67)|':',
350 OC_PGETLINE|SV|P(37),
351 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-',
352 OC_UNARY|xV|P(19)|'!',
358 ST_IF, ST_DO, ST_FOR, OC_BREAK,
359 OC_CONTINUE, OC_DELETE|Vx, OC_PRINT,
360 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
361 OC_RETURN|Vx, OC_EXIT|Nx,
365 OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
366 OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
367 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
368 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
369 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
370 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
371 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
372 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b),
373 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
380 /* internal variable names and their initial values */
381 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
383 CONVFMT, OFMT, FS, OFS,
384 ORS, RS, RT, FILENAME,
385 SUBSEP, F0, ARGIND, ARGC,
386 ARGV, ERRNO, FNR, NR,
387 NF, IGNORECASE, ENVIRON, NUM_INTERNAL_VARS
390 static const char vNames[] ALIGN1 =
391 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
392 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
393 "SUBSEP\0" "$\0*" "ARGIND\0" "ARGC\0"
394 "ARGV\0" "ERRNO\0" "FNR\0" "NR\0"
395 "NF\0*" "IGNORECASE\0*" "ENVIRON\0" "\0";
397 static const char vValues[] ALIGN1 =
398 "%.6g\0" "%.6g\0" " \0" " \0"
399 "\n\0" "\n\0" "\0" "\0"
400 "\034\0" "\0" "\377";
402 /* hash size may grow to these values */
403 #define FIRST_PRIME 61
404 static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
407 /* Globals. Split in two parts so that first one is addressed
408 * with (mostly short) negative offsets.
409 * NB: it's unsafe to put members of type "double"
410 * into globals2 (gcc may fail to align them).
414 chain beginseq, mainseq, endseq;
416 node *break_ptr, *continue_ptr;
418 xhash *vhash, *ahash, *fdhash, *fnhash;
419 const char *g_progname;
422 int maxfields; /* used in fsrealloc() only */
431 smallint is_f0_split;
434 uint32_t t_info; /* often used */
440 var *intvar[NUM_INTERNAL_VARS]; /* often used */
442 /* former statics from various functions */
443 char *split_f0__fstrings;
445 uint32_t next_token__save_tclass;
446 uint32_t next_token__save_info;
447 uint32_t next_token__ltclass;
448 smallint next_token__concat_inserted;
450 smallint next_input_file__files_happen;
451 rstream next_input_file__rsm;
453 var *evaluate__fnargs;
454 unsigned evaluate__seed;
455 regex_t evaluate__sreg;
459 tsplitter exec_builtin__tspl;
461 /* biggest and least used members go last */
462 tsplitter fsplitter, rsplitter;
464 #define G1 (ptr_to_globals[-1])
465 #define G (*(struct globals2 *)ptr_to_globals)
466 /* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
467 /*char G1size[sizeof(G1)]; - 0x74 */
468 /*char Gsize[sizeof(G)]; - 0x1c4 */
469 /* Trying to keep most of members accessible with short offsets: */
470 /*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
471 #define t_double (G1.t_double )
472 #define beginseq (G1.beginseq )
473 #define mainseq (G1.mainseq )
474 #define endseq (G1.endseq )
475 #define seq (G1.seq )
476 #define break_ptr (G1.break_ptr )
477 #define continue_ptr (G1.continue_ptr)
479 #define vhash (G1.vhash )
480 #define ahash (G1.ahash )
481 #define fdhash (G1.fdhash )
482 #define fnhash (G1.fnhash )
483 #define g_progname (G1.g_progname )
484 #define g_lineno (G1.g_lineno )
485 #define nfields (G1.nfields )
486 #define maxfields (G1.maxfields )
487 #define Fields (G1.Fields )
488 #define g_cb (G1.g_cb )
489 #define g_pos (G1.g_pos )
490 #define g_buf (G1.g_buf )
491 #define icase (G1.icase )
492 #define exiting (G1.exiting )
493 #define nextrec (G1.nextrec )
494 #define nextfile (G1.nextfile )
495 #define is_f0_split (G1.is_f0_split )
496 #define t_info (G.t_info )
497 #define t_tclass (G.t_tclass )
498 #define t_string (G.t_string )
499 #define t_lineno (G.t_lineno )
500 #define t_rollback (G.t_rollback )
501 #define intvar (G.intvar )
502 #define fsplitter (G.fsplitter )
503 #define rsplitter (G.rsplitter )
504 #define INIT_G() do { \
505 SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
506 G.next_token__ltclass = TC_OPTERM; \
507 G.evaluate__seed = 1; \
511 /* function prototypes */
512 static void handle_special(var *);
513 static node *parse_expr(uint32_t);
514 static void chain_group(void);
515 static var *evaluate(node *, var *);
516 static rstream *next_input_file(void);
517 static int fmt_num(char *, int, const char *, double, int);
518 static int awk_exit(int) NORETURN;
520 /* ---- error handling ---- */
522 static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
523 static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
524 static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
525 static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
526 static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
527 static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin";
528 static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
529 static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
530 static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
531 #if !ENABLE_FEATURE_AWK_LIBM
532 static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
535 static void zero_out_var(var *vp)
537 memset(vp, 0, sizeof(*vp));
540 static void syntax_error(const char *message) NORETURN;
541 static void syntax_error(const char *message)
543 bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
546 /* ---- hash stuff ---- */
548 static unsigned hashidx(const char *name)
553 idx = *name++ + (idx << 6) - idx;
557 /* create new hash */
558 static xhash *hash_init(void)
562 newhash = xzalloc(sizeof(*newhash));
563 newhash->csize = FIRST_PRIME;
564 newhash->items = xzalloc(FIRST_PRIME * sizeof(newhash->items[0]));
569 /* find item in hash, return ptr to data, NULL if not found */
570 static void *hash_search(xhash *hash, const char *name)
574 hi = hash->items[hashidx(name) % hash->csize];
576 if (strcmp(hi->name, name) == 0)
583 /* grow hash if it becomes too big */
584 static void hash_rebuild(xhash *hash)
586 unsigned newsize, i, idx;
587 hash_item **newitems, *hi, *thi;
589 if (hash->nprime == ARRAY_SIZE(PRIMES))
592 newsize = PRIMES[hash->nprime++];
593 newitems = xzalloc(newsize * sizeof(newitems[0]));
595 for (i = 0; i < hash->csize; i++) {
600 idx = hashidx(thi->name) % newsize;
601 thi->next = newitems[idx];
607 hash->csize = newsize;
608 hash->items = newitems;
611 /* find item in hash, add it if necessary. Return ptr to data */
612 static void *hash_find(xhash *hash, const char *name)
618 hi = hash_search(hash, name);
620 if (++hash->nel / hash->csize > 10)
623 l = strlen(name) + 1;
624 hi = xzalloc(sizeof(*hi) + l);
625 strcpy(hi->name, name);
627 idx = hashidx(name) % hash->csize;
628 hi->next = hash->items[idx];
629 hash->items[idx] = hi;
635 #define findvar(hash, name) ((var*) hash_find((hash), (name)))
636 #define newvar(name) ((var*) hash_find(vhash, (name)))
637 #define newfile(name) ((rstream*)hash_find(fdhash, (name)))
638 #define newfunc(name) ((func*) hash_find(fnhash, (name)))
640 static void hash_remove(xhash *hash, const char *name)
642 hash_item *hi, **phi;
644 phi = &hash->items[hashidx(name) % hash->csize];
647 if (strcmp(hi->name, name) == 0) {
648 hash->glen -= (strlen(name) + 1);
658 /* ------ some useful functions ------ */
660 static char *skip_spaces(char *p)
663 if (*p == '\\' && p[1] == '\n') {
666 } else if (*p != ' ' && *p != '\t') {
674 /* returns old *s, advances *s past word and terminating NUL */
675 static char *nextword(char **s)
678 while (*(*s)++ != '\0')
683 static char nextchar(char **s)
690 c = bb_process_escape_sequence((const char**)s);
691 if (c == '\\' && *s == pps)
696 static ALWAYS_INLINE int isalnum_(int c)
698 return (isalnum(c) || c == '_');
701 static double my_strtod(char **pp)
705 && ((((*pp)[1] | 0x20) == 'x') || isdigit((*pp)[1]))
707 return strtoull(*pp, pp, 0);
710 return strtod(*pp, pp);
713 /* -------- working with variables (set/get/copy/etc) -------- */
715 static xhash *iamarray(var *v)
719 while (a->type & VF_CHILD)
722 if (!(a->type & VF_ARRAY)) {
724 a->x.array = hash_init();
729 static void clear_array(xhash *array)
734 for (i = 0; i < array->csize; i++) {
735 hi = array->items[i];
739 free(thi->data.v.string);
742 array->items[i] = NULL;
744 array->glen = array->nel = 0;
747 /* clear a variable */
748 static var *clrvar(var *v)
750 if (!(v->type & VF_FSTR))
753 v->type &= VF_DONTTOUCH;
759 /* assign string value to variable */
760 static var *setvar_p(var *v, char *value)
768 /* same as setvar_p but make a copy of string */
769 static var *setvar_s(var *v, const char *value)
771 return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
774 /* same as setvar_s but sets USER flag */
775 static var *setvar_u(var *v, const char *value)
777 v = setvar_s(v, value);
782 /* set array element to user string */
783 static void setari_u(var *a, int idx, const char *s)
787 v = findvar(iamarray(a), itoa(idx));
791 /* assign numeric value to variable */
792 static var *setvar_i(var *v, double value)
795 v->type |= VF_NUMBER;
801 static const char *getvar_s(var *v)
803 /* if v is numeric and has no cached string, convert it to string */
804 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
805 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
806 v->string = xstrdup(g_buf);
807 v->type |= VF_CACHED;
809 return (v->string == NULL) ? "" : v->string;
812 static double getvar_i(var *v)
816 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
820 v->number = my_strtod(&s);
821 if (v->type & VF_USER) {
829 v->type |= VF_CACHED;
834 /* Used for operands of bitwise ops */
835 static unsigned long getvar_i_int(var *v)
837 double d = getvar_i(v);
839 /* Casting doubles to longs is undefined for values outside
840 * of target type range. Try to widen it as much as possible */
842 return (unsigned long)d;
843 /* Why? Think about d == -4294967295.0 (assuming 32bit longs) */
844 return - (long) (unsigned long) (-d);
847 static var *copyvar(var *dest, const var *src)
851 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
852 dest->number = src->number;
854 dest->string = xstrdup(src->string);
856 handle_special(dest);
860 static var *incvar(var *v)
862 return setvar_i(v, getvar_i(v) + 1.0);
865 /* return true if v is number or numeric string */
866 static int is_numeric(var *v)
869 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
872 /* return 1 when value of v corresponds to true, 0 otherwise */
873 static int istrue(var *v)
876 return (v->number != 0);
877 return (v->string && v->string[0]);
880 /* temporary variables allocator. Last allocated should be first freed */
881 static var *nvalloc(int n)
889 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size)
895 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
896 g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
898 g_cb->pos = g_cb->nv;
900 /*g_cb->next = NULL; - xzalloc did it */
908 while (v < g_cb->pos) {
917 static void nvfree(var *v)
921 if (v < g_cb->nv || v >= g_cb->pos)
922 syntax_error(EMSG_INTERNAL_ERROR);
924 for (p = v; p < g_cb->pos; p++) {
925 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
926 clear_array(iamarray(p));
927 free(p->x.array->items);
930 if (p->type & VF_WALK) {
932 walker_list *w = p->x.walker;
933 debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker);
937 debug_printf_walker(" free(%p)\n", w);
946 while (g_cb->prev && g_cb->pos == g_cb->nv) {
951 /* ------- awk program text parsing ------- */
953 /* Parse next token pointed by global pos, place results into global ttt.
954 * If token isn't expected, give away. Return token class
956 static uint32_t next_token(uint32_t expected)
958 #define concat_inserted (G.next_token__concat_inserted)
959 #define save_tclass (G.next_token__save_tclass)
960 #define save_info (G.next_token__save_info)
961 /* Initialized to TC_OPTERM: */
962 #define ltclass (G.next_token__ltclass)
973 } else if (concat_inserted) {
974 concat_inserted = FALSE;
975 t_tclass = save_tclass;
984 while (*p != '\n' && *p != '\0')
993 } else if (*p == '\"') {
998 if (*p == '\0' || *p == '\n')
999 syntax_error(EMSG_UNEXP_EOS);
1000 *s++ = nextchar(&pp);
1007 } else if ((expected & TC_REGEXP) && *p == '/') {
1011 if (*p == '\0' || *p == '\n')
1012 syntax_error(EMSG_UNEXP_EOS);
1016 s[-1] = bb_process_escape_sequence((const char **)&pp);
1029 } else if (*p == '.' || isdigit(*p)) {
1032 t_double = my_strtod(&pp);
1035 syntax_error(EMSG_UNEXP_TOKEN);
1039 /* search for something known */
1049 /* if token class is expected, token
1050 * matches and it's not a longer word,
1051 * then this is what we are looking for
1053 if ((tc & (expected | TC_WORD | TC_NEWLINE))
1054 && *tl == *p && strncmp(p, tl, l) == 0
1055 && !((tc & TC_WORD) && isalnum_(p[l]))
1066 /* it's a name (var/array/function),
1067 * otherwise it's something wrong
1070 syntax_error(EMSG_UNEXP_TOKEN);
1073 while (isalnum_(*++p)) {
1078 /* also consume whitespace between functionname and bracket */
1079 if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY))
1093 /* skipping newlines in some cases */
1094 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1097 /* insert concatenation operator when needed */
1098 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1099 concat_inserted = TRUE;
1103 t_info = OC_CONCAT | SS | P(35);
1110 /* Are we ready for this? */
1111 if (!(ltclass & expected))
1112 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1113 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1116 #undef concat_inserted
1122 static void rollback_token(void)
1127 static node *new_node(uint32_t info)
1131 n = xzalloc(sizeof(node));
1133 n->lineno = g_lineno;
1137 static node *mk_re_node(const char *s, node *n, regex_t *re)
1139 n->info = OC_REGEXP;
1142 xregcomp(re, s, REG_EXTENDED);
1143 xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1148 static node *condition(void)
1150 next_token(TC_SEQSTART);
1151 return parse_expr(TC_SEQTERM);
1154 /* parse expression terminated by given argument, return ptr
1155 * to built subtree. Terminator is eaten by parse_expr */
1156 static node *parse_expr(uint32_t iexp)
1165 sn.r.n = glptr = NULL;
1166 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1168 while (!((tc = next_token(xtc)) & iexp)) {
1169 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1170 /* input redirection (<) attached to glptr node */
1171 cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1173 xtc = TC_OPERAND | TC_UOPPRE;
1176 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1177 /* for binary and postfix-unary operators, jump back over
1178 * previous operators with higher priority */
1180 while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1181 || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON))
1185 if ((t_info & OPCLSMASK) == OC_TERNARY)
1187 cn = vn->a.n->r.n = new_node(t_info);
1189 if (tc & TC_BINOP) {
1191 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1192 if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1194 next_token(TC_GETLINE);
1195 /* give maximum priority to this pipe */
1196 cn->info &= ~PRIMASK;
1197 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1201 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1206 /* for operands and prefix-unary operators, attach them
1209 cn = vn->r.n = new_node(t_info);
1211 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1212 if (tc & (TC_OPERAND | TC_REGEXP)) {
1213 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1214 /* one should be very careful with switch on tclass -
1215 * only simple tclasses should be used! */
1220 v = hash_search(ahash, t_string);
1222 cn->info = OC_FNARG;
1223 cn->l.aidx = v->x.aidx;
1225 cn->l.v = newvar(t_string);
1227 if (tc & TC_ARRAY) {
1229 cn->r.n = parse_expr(TC_ARRTERM);
1236 v = cn->l.v = xzalloc(sizeof(var));
1238 setvar_i(v, t_double);
1240 setvar_s(v, t_string);
1244 mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1249 cn->r.f = newfunc(t_string);
1250 cn->l.n = condition();
1254 cn = vn->r.n = parse_expr(TC_SEQTERM);
1260 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1264 cn->l.n = condition();
1273 /* add node to chain. Return ptr to alloc'd node */
1274 static node *chain_node(uint32_t info)
1279 seq->first = seq->last = new_node(0);
1281 if (seq->programname != g_progname) {
1282 seq->programname = g_progname;
1283 n = chain_node(OC_NEWSOURCE);
1284 n->l.new_progname = xstrdup(g_progname);
1289 seq->last = n->a.n = new_node(OC_DONE);
1294 static void chain_expr(uint32_t info)
1298 n = chain_node(info);
1299 n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1300 if (t_tclass & TC_GRPTERM)
1304 static node *chain_loop(node *nn)
1306 node *n, *n2, *save_brk, *save_cont;
1308 save_brk = break_ptr;
1309 save_cont = continue_ptr;
1311 n = chain_node(OC_BR | Vx);
1312 continue_ptr = new_node(OC_EXEC);
1313 break_ptr = new_node(OC_EXEC);
1315 n2 = chain_node(OC_EXEC | Vx);
1318 continue_ptr->a.n = n2;
1319 break_ptr->a.n = n->r.n = seq->last;
1321 continue_ptr = save_cont;
1322 break_ptr = save_brk;
1327 /* parse group and attach it to chain */
1328 static void chain_group(void)
1334 c = next_token(TC_GRPSEQ);
1335 } while (c & TC_NEWLINE);
1337 if (c & TC_GRPSTART) {
1338 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1339 if (t_tclass & TC_NEWLINE)
1344 } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1346 chain_expr(OC_EXEC | Vx);
1347 } else { /* TC_STATEMNT */
1348 switch (t_info & OPCLSMASK) {
1350 n = chain_node(OC_BR | Vx);
1351 n->l.n = condition();
1353 n2 = chain_node(OC_EXEC);
1355 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1357 n2->a.n = seq->last;
1365 n = chain_loop(NULL);
1370 n2 = chain_node(OC_EXEC);
1371 n = chain_loop(NULL);
1373 next_token(TC_WHILE);
1374 n->l.n = condition();
1378 next_token(TC_SEQSTART);
1379 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1380 if (t_tclass & TC_SEQTERM) { /* for-in */
1381 if ((n2->info & OPCLSMASK) != OC_IN)
1382 syntax_error(EMSG_UNEXP_TOKEN);
1383 n = chain_node(OC_WALKINIT | VV);
1386 n = chain_loop(NULL);
1387 n->info = OC_WALKNEXT | Vx;
1389 } else { /* for (;;) */
1390 n = chain_node(OC_EXEC | Vx);
1392 n2 = parse_expr(TC_SEMICOL);
1393 n3 = parse_expr(TC_SEQTERM);
1403 n = chain_node(t_info);
1404 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1405 if (t_tclass & TC_OUTRDR) {
1407 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1409 if (t_tclass & TC_GRPTERM)
1414 n = chain_node(OC_EXEC);
1419 n = chain_node(OC_EXEC);
1420 n->a.n = continue_ptr;
1423 /* delete, next, nextfile, return, exit */
1430 static void parse_program(char *p)
1439 while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1440 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1442 if (tclass & TC_OPTERM)
1446 if (tclass & TC_BEGIN) {
1450 } else if (tclass & TC_END) {
1454 } else if (tclass & TC_FUNCDECL) {
1455 next_token(TC_FUNCTION);
1457 f = newfunc(t_string);
1458 f->body.first = NULL;
1460 while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1461 v = findvar(ahash, t_string);
1462 v->x.aidx = f->nargs++;
1464 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1471 } else if (tclass & TC_OPSEQ) {
1473 cn = chain_node(OC_TEST);
1474 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1475 if (t_tclass & TC_GRPSTART) {
1479 chain_node(OC_PRINT);
1481 cn->r.n = mainseq.last;
1483 } else /* if (tclass & TC_GRPSTART) */ {
1491 /* -------- program execution part -------- */
1493 static node *mk_splitter(const char *s, tsplitter *spl)
1501 if ((n->info & OPCLSMASK) == OC_REGEXP) {
1503 regfree(ire); // TODO: nuke ire, use re+1?
1505 if (strlen(s) > 1) {
1506 mk_re_node(s, n, re);
1508 n->info = (uint32_t) *s;
1514 /* use node as a regular expression. Supplied with node ptr and regex_t
1515 * storage space. Return ptr to regex (if result points to preg, it should
1516 * be later regfree'd manually
1518 static regex_t *as_regex(node *op, regex_t *preg)
1524 if ((op->info & OPCLSMASK) == OC_REGEXP) {
1525 return icase ? op->r.ire : op->l.re;
1528 s = getvar_s(evaluate(op, v));
1530 cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
1531 /* Testcase where REG_EXTENDED fails (unpaired '{'):
1532 * echo Hi | awk 'gsub("@(samp|code|file)\{","");'
1533 * gawk 3.1.5 eats this. We revert to ~REG_EXTENDED
1534 * (maybe gsub is not supposed to use REG_EXTENDED?).
1536 if (regcomp(preg, s, cflags)) {
1537 cflags &= ~REG_EXTENDED;
1538 xregcomp(preg, s, cflags);
1544 /* gradually increasing buffer */
1545 static char* qrealloc(char *b, int n, int *size)
1547 if (!b || n >= *size) {
1548 *size = n + (n>>1) + 80;
1549 b = xrealloc(b, *size);
1554 /* resize field storage space */
1555 static void fsrealloc(int size)
1559 if (size >= maxfields) {
1561 maxfields = size + 16;
1562 Fields = xrealloc(Fields, maxfields * sizeof(var));
1563 for (; i < maxfields; i++) {
1564 Fields[i].type = VF_SPECIAL;
1565 Fields[i].string = NULL;
1569 if (size < nfields) {
1570 for (i = size; i < nfields; i++) {
1577 static int awk_split(const char *s, node *spl, char **slist)
1582 regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1584 /* in worst case, each char would be a separate field */
1585 *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1588 c[0] = c[1] = (char)spl->info;
1590 if (*getvar_s(intvar[RS]) == '\0')
1593 if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */
1595 return n; /* "": zero fields */
1596 n++; /* at least one field will be there */
1598 l = strcspn(s, c+2); /* len till next NUL or \n */
1599 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1600 && pmatch[0].rm_so <= l
1602 l = pmatch[0].rm_so;
1603 if (pmatch[0].rm_eo == 0) {
1607 n++; /* we saw yet another delimiter */
1609 pmatch[0].rm_eo = l;
1614 /* make sure we remove *all* of the separator chars */
1617 } while (++l < pmatch[0].rm_eo);
1619 s += pmatch[0].rm_eo;
1623 if (c[0] == '\0') { /* null split */
1631 if (c[0] != ' ') { /* single-character split */
1633 c[0] = toupper(c[0]);
1634 c[1] = tolower(c[1]);
1638 while ((s1 = strpbrk(s1, c))) {
1646 s = skip_whitespace(s);
1650 while (*s && !isspace(*s))
1657 static void split_f0(void)
1659 /* static char *fstrings; */
1660 #define fstrings (G.split_f0__fstrings)
1671 n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1674 for (i = 0; i < n; i++) {
1675 Fields[i].string = nextword(&s);
1676 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1679 /* set NF manually to avoid side effects */
1681 intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1682 intvar[NF]->number = nfields;
1686 /* perform additional actions when some internal variables changed */
1687 static void handle_special(var *v)
1691 const char *sep, *s;
1692 int sl, l, len, i, bsize;
1694 if (!(v->type & VF_SPECIAL))
1697 if (v == intvar[NF]) {
1698 n = (int)getvar_i(v);
1701 /* recalculate $0 */
1702 sep = getvar_s(intvar[OFS]);
1706 for (i = 0; i < n; i++) {
1707 s = getvar_s(&Fields[i]);
1710 memcpy(b+len, sep, sl);
1713 b = qrealloc(b, len+l+sl, &bsize);
1714 memcpy(b+len, s, l);
1719 setvar_p(intvar[F0], b);
1722 } else if (v == intvar[F0]) {
1723 is_f0_split = FALSE;
1725 } else if (v == intvar[FS]) {
1726 mk_splitter(getvar_s(v), &fsplitter);
1728 } else if (v == intvar[RS]) {
1729 mk_splitter(getvar_s(v), &rsplitter);
1731 } else if (v == intvar[IGNORECASE]) {
1735 n = getvar_i(intvar[NF]);
1736 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1737 /* right here v is invalid. Just to note... */
1741 /* step through func/builtin/etc arguments */
1742 static node *nextarg(node **pn)
1747 if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1756 static void hashwalk_init(var *v, xhash *array)
1761 walker_list *prev_walker;
1763 if (v->type & VF_WALK) {
1764 prev_walker = v->x.walker;
1769 debug_printf_walker("hashwalk_init: prev_walker:%p\n", prev_walker);
1771 w = v->x.walker = xzalloc(sizeof(*w) + array->glen + 1); /* why + 1? */
1772 debug_printf_walker(" walker@%p=%p\n", &v->x.walker, w);
1773 w->cur = w->end = w->wbuf;
1774 w->prev = prev_walker;
1775 for (i = 0; i < array->csize; i++) {
1776 hi = array->items[i];
1778 strcpy(w->end, hi->name);
1785 static int hashwalk_next(var *v)
1787 walker_list *w = v->x.walker;
1789 if (w->cur >= w->end) {
1790 walker_list *prev_walker = w->prev;
1792 debug_printf_walker("end of iteration, free(walker@%p:%p), prev_walker:%p\n", &v->x.walker, w, prev_walker);
1794 v->x.walker = prev_walker;
1798 setvar_s(v, nextword(&w->cur));
1802 /* evaluate node, return 1 when result is true, 0 otherwise */
1803 static int ptest(node *pattern)
1805 /* ptest__v is "static": to save stack space? */
1806 return istrue(evaluate(pattern, &G.ptest__v));
1809 /* read next record from stream rsm into a variable v */
1810 static int awk_getline(rstream *rsm, var *v)
1813 regmatch_t pmatch[2];
1814 int size, a, p, pp = 0;
1815 int fd, so, eo, r, rp;
1818 /* we're using our own buffer since we need access to accumulating
1821 fd = fileno(rsm->F);
1826 c = (char) rsplitter.n.info;
1830 m = qrealloc(m, 256, &size);
1837 if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1838 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1839 b, 1, pmatch, 0) == 0) {
1840 so = pmatch[0].rm_so;
1841 eo = pmatch[0].rm_eo;
1845 } else if (c != '\0') {
1846 s = strchr(b+pp, c);
1848 s = memchr(b+pp, '\0', p - pp);
1855 while (b[rp] == '\n')
1857 s = strstr(b+rp, "\n\n");
1860 while (b[eo] == '\n')
1869 memmove(m, m+a, p+1);
1874 m = qrealloc(m, a+p+128, &size);
1877 p += safe_read(fd, b+p, size-p-1);
1881 setvar_i(intvar[ERRNO], errno);
1890 c = b[so]; b[so] = '\0';
1894 c = b[eo]; b[eo] = '\0';
1895 setvar_s(intvar[RT], b+so);
1907 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1911 const char *s = format;
1913 if (int_as_int && n == (int)n) {
1914 r = snprintf(b, size, "%d", (int)n);
1916 do { c = *s; } while (c && *++s);
1917 if (strchr("diouxX", c)) {
1918 r = snprintf(b, size, format, (int)n);
1919 } else if (strchr("eEfgG", c)) {
1920 r = snprintf(b, size, format, n);
1922 syntax_error(EMSG_INV_FMT);
1928 /* formatted output into an allocated buffer, return ptr to buffer */
1929 static char *awk_printf(node *n)
1934 int i, j, incr, bsize;
1939 fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1944 while (*f && (*f != '%' || *++f == '%'))
1946 while (*f && !isalpha(*f)) {
1948 syntax_error("%*x formats are not supported");
1952 incr = (f - s) + MAXVARFMT;
1953 b = qrealloc(b, incr + i, &bsize);
1959 arg = evaluate(nextarg(&n), v);
1962 if (c == 'c' || !c) {
1963 i += sprintf(b+i, s, is_numeric(arg) ?
1964 (char)getvar_i(arg) : *getvar_s(arg));
1965 } else if (c == 's') {
1967 b = qrealloc(b, incr+i+strlen(s1), &bsize);
1968 i += sprintf(b+i, s, s1);
1970 i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1974 /* if there was an error while sprintf, return value is negative */
1981 b = xrealloc(b, i + 1);
1986 /* common substitution routine
1987 * replace (nm) substring of (src) that match (n) with (repl), store
1988 * result into (dest), return number of substitutions. If nm=0, replace
1989 * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1990 * subexpression matching (\1-\9)
1992 static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int ex)
1997 int c, i, j, di, rl, so, eo, nbs, n, dssize;
1998 regmatch_t pmatch[10];
2001 re = as_regex(rn, &sreg);
2010 while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0 : REG_NOTBOL) == 0) {
2011 so = pmatch[0].rm_so;
2012 eo = pmatch[0].rm_eo;
2014 ds = qrealloc(ds, di + eo + rl, &dssize);
2015 memcpy(ds + di, sp, eo);
2021 for (s = repl; *s; s++) {
2027 if (c == '&' || (ex && c >= '0' && c <= '9')) {
2028 di -= ((nbs + 3) >> 1);
2037 n = pmatch[j].rm_eo - pmatch[j].rm_so;
2038 ds = qrealloc(ds, di + rl + n, &dssize);
2039 memcpy(ds + di, sp + pmatch[j].rm_so, n);
2057 ds = qrealloc(ds, di + strlen(sp), &dssize);
2058 strcpy(ds + di, sp);
2065 static NOINLINE int do_mktime(const char *ds)
2070 /*memset(&then, 0, sizeof(then)); - not needed */
2071 then.tm_isdst = -1; /* default is unknown */
2073 /* manpage of mktime says these fields are ints,
2074 * so we can sscanf stuff directly into them */
2075 count = sscanf(ds, "%u %u %u %u %u %u %d",
2076 &then.tm_year, &then.tm_mon, &then.tm_mday,
2077 &then.tm_hour, &then.tm_min, &then.tm_sec,
2081 || (unsigned)then.tm_mon < 1
2082 || (unsigned)then.tm_year < 1900
2088 then.tm_year -= 1900;
2090 return mktime(&then);
2093 static NOINLINE var *exec_builtin(node *op, var *res)
2095 #define tspl (G.exec_builtin__tspl)
2101 regmatch_t pmatch[2];
2110 isr = info = op->info;
2113 av[2] = av[3] = NULL;
2114 for (i = 0; i < 4 && op; i++) {
2115 an[i] = nextarg(&op);
2116 if (isr & 0x09000000)
2117 av[i] = evaluate(an[i], &tv[i]);
2118 if (isr & 0x08000000)
2119 as[i] = getvar_s(av[i]);
2124 if ((uint32_t)nargs < (info >> 30))
2125 syntax_error(EMSG_TOO_FEW_ARGS);
2131 #if ENABLE_FEATURE_AWK_LIBM
2132 setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1])));
2134 syntax_error(EMSG_NO_MATH);
2142 spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2143 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2148 n = awk_split(as[0], spl, &s);
2150 clear_array(iamarray(av[1]));
2151 for (i = 1; i <= n; i++)
2152 setari_u(av[1], i, nextword(&s));
2162 i = getvar_i(av[1]) - 1;
2167 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2170 s = xstrndup(as[0]+i, n);
2175 /* Bitwise ops must assume that operands are unsigned. GNU Awk 3.1.5:
2176 * awk '{ print or(-1,1) }' gives "4.29497e+09", not "-2.xxxe+09" */
2178 setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1]));
2182 setvar_i(res, ~getvar_i_int(av[0]));
2186 setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1]));
2190 setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1]));
2194 setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1]));
2198 setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1]));
2204 s1 = s = xstrdup(as[0]);
2206 //*s1 = (info == B_up) ? toupper(*s1) : tolower(*s1);
2207 if ((unsigned char)((*s1 | 0x20) - 'a') <= ('z' - 'a'))
2208 *s1 = (info == B_up) ? (*s1 & 0xdf) : (*s1 | 0x20);
2218 l = strlen(as[0]) - ll;
2219 if (ll > 0 && l >= 0) {
2221 char *s = strstr(as[0], as[1]);
2223 n = (s - as[0]) + 1;
2225 /* this piece of code is terribly slow and
2226 * really should be rewritten
2228 for (i = 0; i <= l; i++) {
2229 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2241 tt = getvar_i(av[1]);
2244 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2245 i = strftime(g_buf, MAXVARFMT,
2246 ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2249 setvar_s(res, g_buf);
2253 setvar_i(res, do_mktime(as[0]));
2257 re = as_regex(an[1], &sreg);
2258 n = regexec(re, as[0], 1, pmatch, 0);
2263 pmatch[0].rm_so = 0;
2264 pmatch[0].rm_eo = -1;
2266 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2267 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2268 setvar_i(res, pmatch[0].rm_so);
2274 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2278 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2282 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2292 * Evaluate node - the heart of the program. Supplied with subtree
2293 * and place where to store result. returns ptr to result.
2295 #define XC(n) ((n) >> 8)
2297 static var *evaluate(node *op, var *res)
2299 /* This procedure is recursive so we should count every byte */
2300 #define fnargs (G.evaluate__fnargs)
2301 /* seed is initialized to 1 */
2302 #define seed (G.evaluate__seed)
2303 #define sreg (G.evaluate__sreg)
2308 return setvar_s(res, NULL);
2316 } L = L; /* for compiler */
2327 opn = (opinfo & OPNMASK);
2328 g_lineno = op->lineno;
2331 /* execute inevitable things */
2332 if (opinfo & OF_RES1)
2333 L.v = evaluate(op1, v1);
2334 if (opinfo & OF_RES2)
2335 R.v = evaluate(op->r.n, v1+1);
2336 if (opinfo & OF_STR1)
2337 L.s = getvar_s(L.v);
2338 if (opinfo & OF_STR2)
2339 R.s = getvar_s(R.v);
2340 if (opinfo & OF_NUM1)
2341 L_d = getvar_i(L.v);
2343 switch (XC(opinfo & OPCLSMASK)) {
2345 /* -- iterative node type -- */
2349 if ((op1->info & OPCLSMASK) == OC_COMMA) {
2350 /* it's range pattern */
2351 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2352 op->info |= OF_CHECKED;
2353 if (ptest(op1->r.n))
2354 op->info &= ~OF_CHECKED;
2360 op = ptest(op1) ? op->a.n : op->r.n;
2364 /* just evaluate an expression, also used as unconditional jump */
2368 /* branch, used in if-else and various loops */
2370 op = istrue(L.v) ? op->a.n : op->r.n;
2373 /* initialize for-in loop */
2374 case XC( OC_WALKINIT ):
2375 hashwalk_init(L.v, iamarray(R.v));
2378 /* get next array item */
2379 case XC( OC_WALKNEXT ):
2380 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2383 case XC( OC_PRINT ):
2384 case XC( OC_PRINTF ): {
2388 rstream *rsm = newfile(R.s);
2391 rsm->F = popen(R.s, "w");
2393 bb_perror_msg_and_die("popen");
2396 rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2402 if ((opinfo & OPCLSMASK) == OC_PRINT) {
2404 fputs(getvar_s(intvar[F0]), F);
2407 var *v = evaluate(nextarg(&op1), v1);
2408 if (v->type & VF_NUMBER) {
2409 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2413 fputs(getvar_s(v), F);
2417 fputs(getvar_s(intvar[OFS]), F);
2420 fputs(getvar_s(intvar[ORS]), F);
2422 } else { /* OC_PRINTF */
2423 char *s = awk_printf(op1);
2431 case XC( OC_DELETE ): {
2432 uint32_t info = op1->info & OPCLSMASK;
2435 if (info == OC_VAR) {
2437 } else if (info == OC_FNARG) {
2438 v = &fnargs[op1->l.aidx];
2440 syntax_error(EMSG_NOT_ARRAY);
2446 s = getvar_s(evaluate(op1->r.n, v1));
2447 hash_remove(iamarray(v), s);
2449 clear_array(iamarray(v));
2454 case XC( OC_NEWSOURCE ):
2455 g_progname = op->l.new_progname;
2458 case XC( OC_RETURN ):
2462 case XC( OC_NEXTFILE ):
2473 /* -- recursive node type -- */
2477 if (L.v == intvar[NF])
2481 case XC( OC_FNARG ):
2482 L.v = &fnargs[op->l.aidx];
2484 res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2488 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2491 case XC( OC_REGEXP ):
2493 L.s = getvar_s(intvar[F0]);
2496 case XC( OC_MATCH ):
2500 regex_t *re = as_regex(op1, &sreg);
2501 int i = regexec(re, L.s, 0, NULL, 0);
2504 setvar_i(res, (i == 0) ^ (opn == '!'));
2509 /* if source is a temporary string, jusk relink it to dest */
2510 //Disabled: if R.v is numeric but happens to have cached R.v->string,
2511 //then L.v ends up being a string, which is wrong
2512 // if (R.v == v1+1 && R.v->string) {
2513 // res = setvar_p(L.v, R.v->string);
2514 // R.v->string = NULL;
2516 res = copyvar(L.v, R.v);
2520 case XC( OC_TERNARY ):
2521 if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2522 syntax_error(EMSG_POSSIBLE_ERROR);
2523 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2526 case XC( OC_FUNC ): {
2528 const char *sv_progname;
2530 if (!op->r.f->body.first)
2531 syntax_error(EMSG_UNDEF_FUNC);
2533 vbeg = v = nvalloc(op->r.f->nargs + 1);
2535 var *arg = evaluate(nextarg(&op1), v1);
2537 v->type |= VF_CHILD;
2539 if (++v - vbeg >= op->r.f->nargs)
2545 sv_progname = g_progname;
2547 res = evaluate(op->r.f->body.first, res);
2549 g_progname = sv_progname;
2556 case XC( OC_GETLINE ):
2557 case XC( OC_PGETLINE ): {
2564 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2565 rsm->F = popen(L.s, "r");
2566 rsm->is_pipe = TRUE;
2568 rsm->F = fopen_for_read(L.s); /* not xfopen! */
2573 iF = next_input_file();
2578 setvar_i(intvar[ERRNO], errno);
2586 i = awk_getline(rsm, R.v);
2587 if (i > 0 && !op1) {
2588 incvar(intvar[FNR]);
2595 /* simple builtins */
2596 case XC( OC_FBLTIN ): {
2599 double R_d = R_d; /* for compiler */
2607 R_d = (double)rand() / (double)RAND_MAX;
2609 #if ENABLE_FEATURE_AWK_LIBM
2635 syntax_error(EMSG_NO_MATH);
2640 seed = op1 ? (unsigned)L_d : (unsigned)time(NULL);
2650 L.s = getvar_s(intvar[F0]);
2656 R_d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2657 ? (system(L.s) >> 8) : 0;
2663 } else if (L.s && *L.s) {
2673 rsm = (rstream *)hash_search(fdhash, L.s);
2675 i = rsm->is_pipe ? pclose(rsm->F) : fclose(rsm->F);
2677 hash_remove(fdhash, L.s);
2680 setvar_i(intvar[ERRNO], errno);
2688 case XC( OC_BUILTIN ):
2689 res = exec_builtin(op, res);
2692 case XC( OC_SPRINTF ):
2693 setvar_p(res, awk_printf(op1));
2696 case XC( OC_UNARY ): {
2699 Ld = R_d = getvar_i(R.v);
2726 case XC( OC_FIELD ): {
2727 int i = (int)getvar_i(R.v);
2734 res = &Fields[i - 1];
2739 /* concatenation (" ") and index joining (",") */
2740 case XC( OC_CONCAT ):
2741 case XC( OC_COMMA ): {
2742 const char *sep = "";
2743 if ((opinfo & OPCLSMASK) == OC_COMMA)
2744 sep = getvar_s(intvar[SUBSEP]);
2745 setvar_p(res, xasprintf("%s%s%s", L.s, sep, R.s));
2750 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2754 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2757 case XC( OC_BINARY ):
2758 case XC( OC_REPLACE ): {
2759 double R_d = getvar_i(R.v);
2772 syntax_error(EMSG_DIV_BY_ZERO);
2776 #if ENABLE_FEATURE_AWK_LIBM
2777 L_d = pow(L_d, R_d);
2779 syntax_error(EMSG_NO_MATH);
2784 syntax_error(EMSG_DIV_BY_ZERO);
2785 L_d -= (int)(L_d / R_d) * R_d;
2788 res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : L.v, L_d);
2792 case XC( OC_COMPARE ): {
2793 int i = i; /* for compiler */
2796 if (is_numeric(L.v) && is_numeric(R.v)) {
2797 Ld = getvar_i(L.v) - getvar_i(R.v);
2799 const char *l = getvar_s(L.v);
2800 const char *r = getvar_s(R.v);
2801 Ld = icase ? strcasecmp(l, r) : strcmp(l, r);
2803 switch (opn & 0xfe) {
2814 setvar_i(res, (i == 0) ^ (opn & 1));
2819 syntax_error(EMSG_POSSIBLE_ERROR);
2821 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2823 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2837 /* -------- main & co. -------- */
2839 static int awk_exit(int r)
2850 evaluate(endseq.first, &tv);
2853 /* waiting for children */
2854 for (i = 0; i < fdhash->csize; i++) {
2855 hi = fdhash->items[i];
2857 if (hi->data.rs.F && hi->data.rs.is_pipe)
2858 pclose(hi->data.rs.F);
2866 /* if expr looks like "var=value", perform assignment and return 1,
2867 * otherwise return 0 */
2868 static int is_assignment(const char *expr)
2870 char *exprc, *s, *s0, *s1;
2872 exprc = xstrdup(expr);
2873 if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2881 *s1++ = nextchar(&s);
2884 setvar_u(newvar(exprc), s0);
2889 /* switch to next input file */
2890 static rstream *next_input_file(void)
2892 #define rsm (G.next_input_file__rsm)
2893 #define files_happen (G.next_input_file__files_happen)
2896 const char *fname, *ind;
2901 rsm.pos = rsm.adv = 0;
2904 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
2910 ind = getvar_s(incvar(intvar[ARGIND]));
2911 fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
2912 if (fname && *fname && !is_assignment(fname))
2913 F = xfopen_stdin(fname);
2917 files_happen = TRUE;
2918 setvar_s(intvar[FILENAME], fname);
2925 int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
2926 int awk_main(int argc, char **argv)
2929 char *opt_F, *opt_W;
2930 llist_t *list_v = NULL;
2931 llist_t *list_f = NULL;
2936 char *vnames = (char *)vNames; /* cheat */
2937 char *vvalues = (char *)vValues;
2941 /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
2942 * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
2943 if (ENABLE_LOCALE_SUPPORT)
2944 setlocale(LC_NUMERIC, "C");
2948 /* allocate global buffer */
2949 g_buf = xmalloc(MAXVARFMT + 1);
2951 vhash = hash_init();
2952 ahash = hash_init();
2953 fdhash = hash_init();
2954 fnhash = hash_init();
2956 /* initialize variables */
2957 for (i = 0; *vnames; i++) {
2958 intvar[i] = v = newvar(nextword(&vnames));
2959 if (*vvalues != '\377')
2960 setvar_s(v, nextword(&vvalues));
2964 if (*vnames == '*') {
2965 v->type |= VF_SPECIAL;
2970 handle_special(intvar[FS]);
2971 handle_special(intvar[RS]);
2973 newfile("/dev/stdin")->F = stdin;
2974 newfile("/dev/stdout")->F = stdout;
2975 newfile("/dev/stderr")->F = stderr;
2977 /* Huh, people report that sometimes environ is NULL. Oh well. */
2978 if (environ) for (envp = environ; *envp; envp++) {
2979 /* environ is writable, thus we don't strdup it needlessly */
2981 char *s1 = strchr(s, '=');
2984 /* Both findvar and setvar_u take const char*
2985 * as 2nd arg -> environment is not trashed */
2986 setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
2990 opt_complementary = "v::f::"; /* -v and -f can occur multiple times */
2991 opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, &opt_W);
2995 setvar_s(intvar[FS], opt_F); // -F
2996 while (list_v) { /* -v */
2997 if (!is_assignment(llist_pop(&list_v)))
3000 if (list_f) { /* -f */
3005 g_progname = llist_pop(&list_f);
3006 from_file = xfopen_stdin(g_progname);
3007 /* one byte is reserved for some trick in next_token */
3008 for (i = j = 1; j > 0; i += j) {
3009 s = xrealloc(s, i + 4096);
3010 j = fread(s + i, 1, 4094, from_file);
3014 parse_program(s + 1);
3018 } else { // no -f: take program from 1st parameter
3021 g_progname = "cmd. line";
3022 parse_program(*argv++);
3024 if (opt & 0x8) // -W
3025 bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
3027 /* fill in ARGV array */
3028 setvar_i(intvar[ARGC], argc);
3029 setari_u(intvar[ARGV], 0, "awk");
3032 setari_u(intvar[ARGV], ++i, *argv++);
3034 evaluate(beginseq.first, &tv);
3035 if (!mainseq.first && !endseq.first)
3036 awk_exit(EXIT_SUCCESS);
3038 /* input file could already be opened in BEGIN block */
3040 iF = next_input_file();
3042 /* passing through input files */
3045 setvar_i(intvar[FNR], 0);
3047 while ((i = awk_getline(iF, intvar[F0])) > 0) {
3050 incvar(intvar[FNR]);
3051 evaluate(mainseq.first, &tv);
3058 syntax_error(strerror(errno));
3060 iF = next_input_file();
3063 awk_exit(EXIT_SUCCESS);