1 /* vi: set sw=4 ts=4: */
3 * awk implementation for busybox
5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
7 * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
14 /* This is a NOEXEC applet. Be very careful! */
17 /* If you comment out one of these below, it will be #defined later
18 * to perform debug printfs to stderr: */
19 #define debug_printf_walker(...) do {} while (0)
21 #ifndef debug_printf_walker
22 # define debug_printf_walker(...) (fprintf(stderr, __VA_ARGS__))
31 #define VF_NUMBER 0x0001 /* 1 = primary type is number */
32 #define VF_ARRAY 0x0002 /* 1 = it's an array */
34 #define VF_CACHED 0x0100 /* 1 = num/str value has cached str/num eq */
35 #define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
36 #define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
37 #define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
38 #define VF_FSTR 0x1000 /* 1 = var::string points to fstring buffer */
39 #define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
40 #define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
42 /* these flags are static, don't change them when value is changed */
43 #define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
45 typedef struct walker_list {
48 struct walker_list *prev;
53 typedef struct var_s {
54 unsigned type; /* flags */
58 int aidx; /* func arg idx (for compilation stage) */
59 struct xhash_s *array; /* array ptr */
60 struct var_s *parent; /* for func args, ptr to actual parameter */
61 walker_list *walker; /* list of array elements (for..in) */
65 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
66 typedef struct chain_s {
69 const char *programname;
73 typedef struct func_s {
79 typedef struct rstream_s {
88 typedef struct hash_item_s {
90 struct var_s v; /* variable/array hash */
91 struct rstream_s rs; /* redirect streams hash */
92 struct func_s f; /* functions hash */
94 struct hash_item_s *next; /* next in chain */
95 char name[1]; /* really it's longer */
98 typedef struct xhash_s {
99 unsigned nel; /* num of elements */
100 unsigned csize; /* current hash size */
101 unsigned nprime; /* next hash size in PRIMES[] */
102 unsigned glen; /* summary length of item names */
103 struct hash_item_s **items;
107 typedef struct node_s {
128 /* Block of temporary variables */
129 typedef struct nvblock_s {
132 struct nvblock_s *prev;
133 struct nvblock_s *next;
137 typedef struct tsplitter_s {
142 /* simple token classes */
143 /* Order and hex values are very important!!! See next_token() */
144 #define TC_SEQSTART 1 /* ( */
145 #define TC_SEQTERM (1 << 1) /* ) */
146 #define TC_REGEXP (1 << 2) /* /.../ */
147 #define TC_OUTRDR (1 << 3) /* | > >> */
148 #define TC_UOPPOST (1 << 4) /* unary postfix operator */
149 #define TC_UOPPRE1 (1 << 5) /* unary prefix operator */
150 #define TC_BINOPX (1 << 6) /* two-opnd operator */
151 #define TC_IN (1 << 7)
152 #define TC_COMMA (1 << 8)
153 #define TC_PIPE (1 << 9) /* input redirection pipe */
154 #define TC_UOPPRE2 (1 << 10) /* unary prefix operator */
155 #define TC_ARRTERM (1 << 11) /* ] */
156 #define TC_GRPSTART (1 << 12) /* { */
157 #define TC_GRPTERM (1 << 13) /* } */
158 #define TC_SEMICOL (1 << 14)
159 #define TC_NEWLINE (1 << 15)
160 #define TC_STATX (1 << 16) /* ctl statement (for, next...) */
161 #define TC_WHILE (1 << 17)
162 #define TC_ELSE (1 << 18)
163 #define TC_BUILTIN (1 << 19)
164 #define TC_GETLINE (1 << 20)
165 #define TC_FUNCDECL (1 << 21) /* `function' `func' */
166 #define TC_BEGIN (1 << 22)
167 #define TC_END (1 << 23)
168 #define TC_EOF (1 << 24)
169 #define TC_VARIABLE (1 << 25)
170 #define TC_ARRAY (1 << 26)
171 #define TC_FUNCTION (1 << 27)
172 #define TC_STRING (1 << 28)
173 #define TC_NUMBER (1 << 29)
175 #define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
177 /* combined token classes */
178 #define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
179 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
180 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
181 | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
183 #define TC_STATEMNT (TC_STATX | TC_WHILE)
184 #define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
186 /* word tokens, cannot mean something else if not expected */
187 #define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
188 | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
190 /* discard newlines after these */
191 #define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
192 | TC_BINOP | TC_OPTERM)
194 /* what can expression begin with */
195 #define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
196 /* what can group begin with */
197 #define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
199 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
200 /* operator is inserted between them */
201 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
202 | TC_STRING | TC_NUMBER | TC_UOPPOST)
203 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
205 #define OF_RES1 0x010000
206 #define OF_RES2 0x020000
207 #define OF_STR1 0x040000
208 #define OF_STR2 0x080000
209 #define OF_NUM1 0x100000
210 #define OF_CHECKED 0x200000
212 /* combined operator flags */
215 #define xS (OF_RES2 | OF_STR2)
217 #define VV (OF_RES1 | OF_RES2)
218 #define Nx (OF_RES1 | OF_NUM1)
219 #define NV (OF_RES1 | OF_NUM1 | OF_RES2)
220 #define Sx (OF_RES1 | OF_STR1)
221 #define SV (OF_RES1 | OF_STR1 | OF_RES2)
222 #define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
224 #define OPCLSMASK 0xFF00
225 #define OPNMASK 0x007F
227 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
228 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
229 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
231 #define P(x) (x << 24)
232 #define PRIMASK 0x7F000000
233 #define PRIMASK2 0x7E000000
235 /* Operation classes */
237 #define SHIFT_TIL_THIS 0x0600
238 #define RECUR_FROM_THIS 0x1000
241 OC_DELETE = 0x0100, OC_EXEC = 0x0200, OC_NEWSOURCE = 0x0300,
242 OC_PRINT = 0x0400, OC_PRINTF = 0x0500, OC_WALKINIT = 0x0600,
244 OC_BR = 0x0700, OC_BREAK = 0x0800, OC_CONTINUE = 0x0900,
245 OC_EXIT = 0x0a00, OC_NEXT = 0x0b00, OC_NEXTFILE = 0x0c00,
246 OC_TEST = 0x0d00, OC_WALKNEXT = 0x0e00,
248 OC_BINARY = 0x1000, OC_BUILTIN = 0x1100, OC_COLON = 0x1200,
249 OC_COMMA = 0x1300, OC_COMPARE = 0x1400, OC_CONCAT = 0x1500,
250 OC_FBLTIN = 0x1600, OC_FIELD = 0x1700, OC_FNARG = 0x1800,
251 OC_FUNC = 0x1900, OC_GETLINE = 0x1a00, OC_IN = 0x1b00,
252 OC_LAND = 0x1c00, OC_LOR = 0x1d00, OC_MATCH = 0x1e00,
253 OC_MOVE = 0x1f00, OC_PGETLINE = 0x2000, OC_REGEXP = 0x2100,
254 OC_REPLACE = 0x2200, OC_RETURN = 0x2300, OC_SPRINTF = 0x2400,
255 OC_TERNARY = 0x2500, OC_UNARY = 0x2600, OC_VAR = 0x2700,
258 ST_IF = 0x3000, ST_DO = 0x3100, ST_FOR = 0x3200,
262 /* simple builtins */
264 F_in, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
265 F_ti, F_le, F_sy, F_ff, F_cl
270 B_a2, B_ix, B_ma, B_sp, B_ss, B_ti, B_mt, B_lo, B_up,
272 B_an, B_co, B_ls, B_or, B_rs, B_xo,
275 /* tokens and their corresponding info values */
277 #define NTC "\377" /* switch to next token class (tc<<1) */
280 #define OC_B OC_BUILTIN
282 static const char tokenlist[] ALIGN1 =
285 "\1/" NTC /* REGEXP */
286 "\2>>" "\1>" "\1|" NTC /* OUTRDR */
287 "\2++" "\2--" NTC /* UOPPOST */
288 "\2++" "\2--" "\1$" NTC /* UOPPRE1 */
289 "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */
290 "\2*=" "\2/=" "\2%=" "\2^="
291 "\1+" "\1-" "\3**=" "\2**"
292 "\1/" "\1%" "\1^" "\1*"
293 "\2!=" "\2>=" "\2<=" "\1>"
294 "\1<" "\2!~" "\1~" "\2&&"
295 "\2||" "\1?" "\1:" NTC
299 "\1+" "\1-" "\1!" NTC /* UOPPRE2 */
305 "\2if" "\2do" "\3for" "\5break" /* STATX */
306 "\10continue" "\6delete" "\5print"
307 "\6printf" "\4next" "\10nextfile"
308 "\6return" "\4exit" NTC
312 "\3and" "\5compl" "\6lshift" "\2or"
314 "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */
315 "\3cos" "\3exp" "\3int" "\3log"
316 "\4rand" "\3sin" "\4sqrt" "\5srand"
317 "\6gensub" "\4gsub" "\5index" "\6length"
318 "\5match" "\5split" "\7sprintf" "\3sub"
319 "\6substr" "\7systime" "\10strftime" "\6mktime"
320 "\7tolower" "\7toupper" NTC
322 "\4func" "\10function" NTC
327 static const uint32_t tokeninfo[] = {
331 xS|'a', xS|'w', xS|'|',
332 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
333 OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M',
335 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74),
336 OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
337 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/',
338 OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
339 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-',
340 OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
341 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%',
342 OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
343 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3,
344 OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
345 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!',
346 OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
347 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?',
348 OC_COLON|xx|P(67)|':',
351 OC_PGETLINE|SV|P(37),
352 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-',
353 OC_UNARY|xV|P(19)|'!',
359 ST_IF, ST_DO, ST_FOR, OC_BREAK,
360 OC_CONTINUE, OC_DELETE|Vx, OC_PRINT,
361 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
362 OC_RETURN|Vx, OC_EXIT|Nx,
366 OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
367 OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
368 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
369 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
370 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
371 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
372 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
373 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b),
374 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
381 /* internal variable names and their initial values */
382 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
384 CONVFMT, OFMT, FS, OFS,
385 ORS, RS, RT, FILENAME,
386 SUBSEP, F0, ARGIND, ARGC,
387 ARGV, ERRNO, FNR, NR,
388 NF, IGNORECASE, ENVIRON, NUM_INTERNAL_VARS
391 static const char vNames[] ALIGN1 =
392 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
393 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
394 "SUBSEP\0" "$\0*" "ARGIND\0" "ARGC\0"
395 "ARGV\0" "ERRNO\0" "FNR\0" "NR\0"
396 "NF\0*" "IGNORECASE\0*" "ENVIRON\0" "\0";
398 static const char vValues[] ALIGN1 =
399 "%.6g\0" "%.6g\0" " \0" " \0"
400 "\n\0" "\n\0" "\0" "\0"
401 "\034\0" "\0" "\377";
403 /* hash size may grow to these values */
404 #define FIRST_PRIME 61
405 static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
408 /* Globals. Split in two parts so that first one is addressed
409 * with (mostly short) negative offsets.
410 * NB: it's unsafe to put members of type "double"
411 * into globals2 (gcc may fail to align them).
415 chain beginseq, mainseq, endseq;
417 node *break_ptr, *continue_ptr;
419 xhash *vhash, *ahash, *fdhash, *fnhash;
420 const char *g_progname;
423 int maxfields; /* used in fsrealloc() only */
432 smallint is_f0_split;
435 uint32_t t_info; /* often used */
441 var *intvar[NUM_INTERNAL_VARS]; /* often used */
443 /* former statics from various functions */
444 char *split_f0__fstrings;
446 uint32_t next_token__save_tclass;
447 uint32_t next_token__save_info;
448 uint32_t next_token__ltclass;
449 smallint next_token__concat_inserted;
451 smallint next_input_file__files_happen;
452 rstream next_input_file__rsm;
454 var *evaluate__fnargs;
455 unsigned evaluate__seed;
456 regex_t evaluate__sreg;
460 tsplitter exec_builtin__tspl;
462 /* biggest and least used members go last */
463 tsplitter fsplitter, rsplitter;
465 #define G1 (ptr_to_globals[-1])
466 #define G (*(struct globals2 *)ptr_to_globals)
467 /* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
468 /*char G1size[sizeof(G1)]; - 0x74 */
469 /*char Gsize[sizeof(G)]; - 0x1c4 */
470 /* Trying to keep most of members accessible with short offsets: */
471 /*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
472 #define t_double (G1.t_double )
473 #define beginseq (G1.beginseq )
474 #define mainseq (G1.mainseq )
475 #define endseq (G1.endseq )
476 #define seq (G1.seq )
477 #define break_ptr (G1.break_ptr )
478 #define continue_ptr (G1.continue_ptr)
480 #define vhash (G1.vhash )
481 #define ahash (G1.ahash )
482 #define fdhash (G1.fdhash )
483 #define fnhash (G1.fnhash )
484 #define g_progname (G1.g_progname )
485 #define g_lineno (G1.g_lineno )
486 #define nfields (G1.nfields )
487 #define maxfields (G1.maxfields )
488 #define Fields (G1.Fields )
489 #define g_cb (G1.g_cb )
490 #define g_pos (G1.g_pos )
491 #define g_buf (G1.g_buf )
492 #define icase (G1.icase )
493 #define exiting (G1.exiting )
494 #define nextrec (G1.nextrec )
495 #define nextfile (G1.nextfile )
496 #define is_f0_split (G1.is_f0_split )
497 #define t_info (G.t_info )
498 #define t_tclass (G.t_tclass )
499 #define t_string (G.t_string )
500 #define t_lineno (G.t_lineno )
501 #define t_rollback (G.t_rollback )
502 #define intvar (G.intvar )
503 #define fsplitter (G.fsplitter )
504 #define rsplitter (G.rsplitter )
505 #define INIT_G() do { \
506 SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
507 G.next_token__ltclass = TC_OPTERM; \
508 G.evaluate__seed = 1; \
512 /* function prototypes */
513 static void handle_special(var *);
514 static node *parse_expr(uint32_t);
515 static void chain_group(void);
516 static var *evaluate(node *, var *);
517 static rstream *next_input_file(void);
518 static int fmt_num(char *, int, const char *, double, int);
519 static int awk_exit(int) NORETURN;
521 /* ---- error handling ---- */
523 static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
524 static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
525 static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
526 static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
527 static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
528 static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin";
529 static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
530 static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
531 static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
532 #if !ENABLE_FEATURE_AWK_LIBM
533 static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
536 static void zero_out_var(var *vp)
538 memset(vp, 0, sizeof(*vp));
541 static void syntax_error(const char *message) NORETURN;
542 static void syntax_error(const char *message)
544 bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
547 /* ---- hash stuff ---- */
549 static unsigned hashidx(const char *name)
554 idx = *name++ + (idx << 6) - idx;
558 /* create new hash */
559 static xhash *hash_init(void)
563 newhash = xzalloc(sizeof(*newhash));
564 newhash->csize = FIRST_PRIME;
565 newhash->items = xzalloc(FIRST_PRIME * sizeof(newhash->items[0]));
570 /* find item in hash, return ptr to data, NULL if not found */
571 static void *hash_search(xhash *hash, const char *name)
575 hi = hash->items[hashidx(name) % hash->csize];
577 if (strcmp(hi->name, name) == 0)
584 /* grow hash if it becomes too big */
585 static void hash_rebuild(xhash *hash)
587 unsigned newsize, i, idx;
588 hash_item **newitems, *hi, *thi;
590 if (hash->nprime == ARRAY_SIZE(PRIMES))
593 newsize = PRIMES[hash->nprime++];
594 newitems = xzalloc(newsize * sizeof(newitems[0]));
596 for (i = 0; i < hash->csize; i++) {
601 idx = hashidx(thi->name) % newsize;
602 thi->next = newitems[idx];
608 hash->csize = newsize;
609 hash->items = newitems;
612 /* find item in hash, add it if necessary. Return ptr to data */
613 static void *hash_find(xhash *hash, const char *name)
619 hi = hash_search(hash, name);
621 if (++hash->nel / hash->csize > 10)
624 l = strlen(name) + 1;
625 hi = xzalloc(sizeof(*hi) + l);
626 strcpy(hi->name, name);
628 idx = hashidx(name) % hash->csize;
629 hi->next = hash->items[idx];
630 hash->items[idx] = hi;
636 #define findvar(hash, name) ((var*) hash_find((hash), (name)))
637 #define newvar(name) ((var*) hash_find(vhash, (name)))
638 #define newfile(name) ((rstream*)hash_find(fdhash, (name)))
639 #define newfunc(name) ((func*) hash_find(fnhash, (name)))
641 static void hash_remove(xhash *hash, const char *name)
643 hash_item *hi, **phi;
645 phi = &(hash->items[hashidx(name) % hash->csize]);
648 if (strcmp(hi->name, name) == 0) {
649 hash->glen -= (strlen(name) + 1);
659 /* ------ some useful functions ------ */
661 static void skip_spaces(char **s)
666 if (*p == '\\' && p[1] == '\n') {
669 } else if (*p != ' ' && *p != '\t') {
677 static char *nextword(char **s)
685 static char nextchar(char **s)
692 c = bb_process_escape_sequence((const char**)s);
693 if (c == '\\' && *s == pps)
698 static ALWAYS_INLINE int isalnum_(int c)
700 return (isalnum(c) || c == '_');
703 static double my_strtod(char **pp)
707 && ((((*pp)[1] | 0x20) == 'x') || isdigit((*pp)[1]))
709 return strtoull(*pp, pp, 0);
712 return strtod(*pp, pp);
715 /* -------- working with variables (set/get/copy/etc) -------- */
717 static xhash *iamarray(var *v)
721 while (a->type & VF_CHILD)
724 if (!(a->type & VF_ARRAY)) {
726 a->x.array = hash_init();
731 static void clear_array(xhash *array)
736 for (i = 0; i < array->csize; i++) {
737 hi = array->items[i];
741 free(thi->data.v.string);
744 array->items[i] = NULL;
746 array->glen = array->nel = 0;
749 /* clear a variable */
750 static var *clrvar(var *v)
752 if (!(v->type & VF_FSTR))
755 v->type &= VF_DONTTOUCH;
761 /* assign string value to variable */
762 static var *setvar_p(var *v, char *value)
770 /* same as setvar_p but make a copy of string */
771 static var *setvar_s(var *v, const char *value)
773 return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
776 /* same as setvar_s but sets USER flag */
777 static var *setvar_u(var *v, const char *value)
779 v = setvar_s(v, value);
784 /* set array element to user string */
785 static void setari_u(var *a, int idx, const char *s)
789 v = findvar(iamarray(a), itoa(idx));
793 /* assign numeric value to variable */
794 static var *setvar_i(var *v, double value)
797 v->type |= VF_NUMBER;
803 static const char *getvar_s(var *v)
805 /* if v is numeric and has no cached string, convert it to string */
806 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
807 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
808 v->string = xstrdup(g_buf);
809 v->type |= VF_CACHED;
811 return (v->string == NULL) ? "" : v->string;
814 static double getvar_i(var *v)
818 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
822 v->number = my_strtod(&s);
823 if (v->type & VF_USER) {
831 v->type |= VF_CACHED;
836 /* Used for operands of bitwise ops */
837 static unsigned long getvar_i_int(var *v)
839 double d = getvar_i(v);
841 /* Casting doubles to longs is undefined for values outside
842 * of target type range. Try to widen it as much as possible */
844 return (unsigned long)d;
845 /* Why? Think about d == -4294967295.0 (assuming 32bit longs) */
846 return - (long) (unsigned long) (-d);
849 static var *copyvar(var *dest, const var *src)
853 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
854 dest->number = src->number;
856 dest->string = xstrdup(src->string);
858 handle_special(dest);
862 static var *incvar(var *v)
864 return setvar_i(v, getvar_i(v) + 1.0);
867 /* return true if v is number or numeric string */
868 static int is_numeric(var *v)
871 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
874 /* return 1 when value of v corresponds to true, 0 otherwise */
875 static int istrue(var *v)
878 return (v->number != 0);
879 return (v->string && v->string[0]);
882 /* temporary variables allocator. Last allocated should be first freed */
883 static var *nvalloc(int n)
891 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size)
897 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
898 g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
900 g_cb->pos = g_cb->nv;
902 /*g_cb->next = NULL; - xzalloc did it */
910 while (v < g_cb->pos) {
919 static void nvfree(var *v)
923 if (v < g_cb->nv || v >= g_cb->pos)
924 syntax_error(EMSG_INTERNAL_ERROR);
926 for (p = v; p < g_cb->pos; p++) {
927 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
928 clear_array(iamarray(p));
929 free(p->x.array->items);
932 if (p->type & VF_WALK) {
934 walker_list *w = p->x.walker;
935 debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker);
939 debug_printf_walker(" free(%p)\n", w);
948 while (g_cb->prev && g_cb->pos == g_cb->nv) {
953 /* ------- awk program text parsing ------- */
955 /* Parse next token pointed by global pos, place results into global ttt.
956 * If token isn't expected, give away. Return token class
958 static uint32_t next_token(uint32_t expected)
960 #define concat_inserted (G.next_token__concat_inserted)
961 #define save_tclass (G.next_token__save_tclass)
962 #define save_info (G.next_token__save_info)
963 /* Initialized to TC_OPTERM: */
964 #define ltclass (G.next_token__ltclass)
975 } else if (concat_inserted) {
976 concat_inserted = FALSE;
977 t_tclass = save_tclass;
986 while (*p != '\n' && *p != '\0')
995 } else if (*p == '\"') {
999 if (*p == '\0' || *p == '\n')
1000 syntax_error(EMSG_UNEXP_EOS);
1001 *(s++) = nextchar(&p);
1007 } else if ((expected & TC_REGEXP) && *p == '/') {
1011 if (*p == '\0' || *p == '\n')
1012 syntax_error(EMSG_UNEXP_EOS);
1016 *(s-1) = bb_process_escape_sequence((const char **)&p);
1027 } else if (*p == '.' || isdigit(*p)) {
1029 t_double = my_strtod(&p);
1031 syntax_error(EMSG_UNEXP_TOKEN);
1035 /* search for something known */
1045 /* if token class is expected, token
1046 * matches and it's not a longer word,
1047 * then this is what we are looking for
1049 if ((tc & (expected | TC_WORD | TC_NEWLINE))
1050 && *tl == *p && strncmp(p, tl, l) == 0
1051 && !((tc & TC_WORD) && isalnum_(p[l]))
1062 /* it's a name (var/array/function),
1063 * otherwise it's something wrong
1066 syntax_error(EMSG_UNEXP_TOKEN);
1069 while (isalnum_(*(++p))) {
1074 /* also consume whitespace between functionname and bracket */
1075 if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY))
1089 /* skipping newlines in some cases */
1090 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1093 /* insert concatenation operator when needed */
1094 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1095 concat_inserted = TRUE;
1099 t_info = OC_CONCAT | SS | P(35);
1106 /* Are we ready for this? */
1107 if (!(ltclass & expected))
1108 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1109 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1112 #undef concat_inserted
1118 static void rollback_token(void)
1123 static node *new_node(uint32_t info)
1127 n = xzalloc(sizeof(node));
1129 n->lineno = g_lineno;
1133 static node *mk_re_node(const char *s, node *n, regex_t *re)
1135 n->info = OC_REGEXP;
1138 xregcomp(re, s, REG_EXTENDED);
1139 xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1144 static node *condition(void)
1146 next_token(TC_SEQSTART);
1147 return parse_expr(TC_SEQTERM);
1150 /* parse expression terminated by given argument, return ptr
1151 * to built subtree. Terminator is eaten by parse_expr */
1152 static node *parse_expr(uint32_t iexp)
1161 sn.r.n = glptr = NULL;
1162 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1164 while (!((tc = next_token(xtc)) & iexp)) {
1165 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1166 /* input redirection (<) attached to glptr node */
1167 cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1169 xtc = TC_OPERAND | TC_UOPPRE;
1172 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1173 /* for binary and postfix-unary operators, jump back over
1174 * previous operators with higher priority */
1176 while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1177 || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON))
1181 if ((t_info & OPCLSMASK) == OC_TERNARY)
1183 cn = vn->a.n->r.n = new_node(t_info);
1185 if (tc & TC_BINOP) {
1187 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1188 if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1190 next_token(TC_GETLINE);
1191 /* give maximum priority to this pipe */
1192 cn->info &= ~PRIMASK;
1193 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1197 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1202 /* for operands and prefix-unary operators, attach them
1205 cn = vn->r.n = new_node(t_info);
1207 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1208 if (tc & (TC_OPERAND | TC_REGEXP)) {
1209 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1210 /* one should be very careful with switch on tclass -
1211 * only simple tclasses should be used! */
1216 v = hash_search(ahash, t_string);
1218 cn->info = OC_FNARG;
1219 cn->l.i = v->x.aidx;
1221 cn->l.v = newvar(t_string);
1223 if (tc & TC_ARRAY) {
1225 cn->r.n = parse_expr(TC_ARRTERM);
1232 v = cn->l.v = xzalloc(sizeof(var));
1234 setvar_i(v, t_double);
1236 setvar_s(v, t_string);
1240 mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1245 cn->r.f = newfunc(t_string);
1246 cn->l.n = condition();
1250 cn = vn->r.n = parse_expr(TC_SEQTERM);
1256 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1260 cn->l.n = condition();
1269 /* add node to chain. Return ptr to alloc'd node */
1270 static node *chain_node(uint32_t info)
1275 seq->first = seq->last = new_node(0);
1277 if (seq->programname != g_progname) {
1278 seq->programname = g_progname;
1279 n = chain_node(OC_NEWSOURCE);
1280 n->l.s = xstrdup(g_progname);
1285 seq->last = n->a.n = new_node(OC_DONE);
1290 static void chain_expr(uint32_t info)
1294 n = chain_node(info);
1295 n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1296 if (t_tclass & TC_GRPTERM)
1300 static node *chain_loop(node *nn)
1302 node *n, *n2, *save_brk, *save_cont;
1304 save_brk = break_ptr;
1305 save_cont = continue_ptr;
1307 n = chain_node(OC_BR | Vx);
1308 continue_ptr = new_node(OC_EXEC);
1309 break_ptr = new_node(OC_EXEC);
1311 n2 = chain_node(OC_EXEC | Vx);
1314 continue_ptr->a.n = n2;
1315 break_ptr->a.n = n->r.n = seq->last;
1317 continue_ptr = save_cont;
1318 break_ptr = save_brk;
1323 /* parse group and attach it to chain */
1324 static void chain_group(void)
1330 c = next_token(TC_GRPSEQ);
1331 } while (c & TC_NEWLINE);
1333 if (c & TC_GRPSTART) {
1334 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1335 if (t_tclass & TC_NEWLINE) continue;
1339 } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1341 chain_expr(OC_EXEC | Vx);
1342 } else { /* TC_STATEMNT */
1343 switch (t_info & OPCLSMASK) {
1345 n = chain_node(OC_BR | Vx);
1346 n->l.n = condition();
1348 n2 = chain_node(OC_EXEC);
1350 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1352 n2->a.n = seq->last;
1360 n = chain_loop(NULL);
1365 n2 = chain_node(OC_EXEC);
1366 n = chain_loop(NULL);
1368 next_token(TC_WHILE);
1369 n->l.n = condition();
1373 next_token(TC_SEQSTART);
1374 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1375 if (t_tclass & TC_SEQTERM) { /* for-in */
1376 if ((n2->info & OPCLSMASK) != OC_IN)
1377 syntax_error(EMSG_UNEXP_TOKEN);
1378 n = chain_node(OC_WALKINIT | VV);
1381 n = chain_loop(NULL);
1382 n->info = OC_WALKNEXT | Vx;
1384 } else { /* for (;;) */
1385 n = chain_node(OC_EXEC | Vx);
1387 n2 = parse_expr(TC_SEMICOL);
1388 n3 = parse_expr(TC_SEQTERM);
1398 n = chain_node(t_info);
1399 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1400 if (t_tclass & TC_OUTRDR) {
1402 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1404 if (t_tclass & TC_GRPTERM)
1409 n = chain_node(OC_EXEC);
1414 n = chain_node(OC_EXEC);
1415 n->a.n = continue_ptr;
1418 /* delete, next, nextfile, return, exit */
1425 static void parse_program(char *p)
1434 while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1435 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1437 if (tclass & TC_OPTERM)
1441 if (tclass & TC_BEGIN) {
1445 } else if (tclass & TC_END) {
1449 } else if (tclass & TC_FUNCDECL) {
1450 next_token(TC_FUNCTION);
1452 f = newfunc(t_string);
1453 f->body.first = NULL;
1455 while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1456 v = findvar(ahash, t_string);
1457 v->x.aidx = (f->nargs)++;
1459 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1466 } else if (tclass & TC_OPSEQ) {
1468 cn = chain_node(OC_TEST);
1469 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1470 if (t_tclass & TC_GRPSTART) {
1474 chain_node(OC_PRINT);
1476 cn->r.n = mainseq.last;
1478 } else /* if (tclass & TC_GRPSTART) */ {
1486 /* -------- program execution part -------- */
1488 static node *mk_splitter(const char *s, tsplitter *spl)
1496 if ((n->info & OPCLSMASK) == OC_REGEXP) {
1498 regfree(ire); // TODO: nuke ire, use re+1?
1500 if (strlen(s) > 1) {
1501 mk_re_node(s, n, re);
1503 n->info = (uint32_t) *s;
1509 /* use node as a regular expression. Supplied with node ptr and regex_t
1510 * storage space. Return ptr to regex (if result points to preg, it should
1511 * be later regfree'd manually
1513 static regex_t *as_regex(node *op, regex_t *preg)
1519 if ((op->info & OPCLSMASK) == OC_REGEXP) {
1520 return icase ? op->r.ire : op->l.re;
1523 s = getvar_s(evaluate(op, v));
1525 cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
1526 /* Testcase where REG_EXTENDED fails (unpaired '{'):
1527 * echo Hi | awk 'gsub("@(samp|code|file)\{","");'
1528 * gawk 3.1.5 eats this. We revert to ~REG_EXTENDED
1529 * (maybe gsub is not supposed to use REG_EXTENDED?).
1531 if (regcomp(preg, s, cflags)) {
1532 cflags &= ~REG_EXTENDED;
1533 xregcomp(preg, s, cflags);
1539 /* gradually increasing buffer */
1540 static char* qrealloc(char *b, int n, int *size)
1542 if (!b || n >= *size) {
1543 *size = n + (n>>1) + 80;
1544 b = xrealloc(b, *size);
1549 /* resize field storage space */
1550 static void fsrealloc(int size)
1554 if (size >= maxfields) {
1556 maxfields = size + 16;
1557 Fields = xrealloc(Fields, maxfields * sizeof(var));
1558 for (; i < maxfields; i++) {
1559 Fields[i].type = VF_SPECIAL;
1560 Fields[i].string = NULL;
1564 if (size < nfields) {
1565 for (i = size; i < nfields; i++) {
1572 static int awk_split(const char *s, node *spl, char **slist)
1577 regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1579 /* in worst case, each char would be a separate field */
1580 *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1583 c[0] = c[1] = (char)spl->info;
1585 if (*getvar_s(intvar[RS]) == '\0')
1588 if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */
1590 return n; /* "": zero fields */
1591 n++; /* at least one field will be there */
1593 l = strcspn(s, c+2); /* len till next NUL or \n */
1594 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1595 && pmatch[0].rm_so <= l
1597 l = pmatch[0].rm_so;
1598 if (pmatch[0].rm_eo == 0) {
1602 n++; /* we saw yet another delimiter */
1604 pmatch[0].rm_eo = l;
1609 /* make sure we remove *all* of the separator chars */
1612 } while (++l < pmatch[0].rm_eo);
1614 s += pmatch[0].rm_eo;
1618 if (c[0] == '\0') { /* null split */
1626 if (c[0] != ' ') { /* single-character split */
1628 c[0] = toupper(c[0]);
1629 c[1] = tolower(c[1]);
1632 while ((s1 = strpbrk(s1, c))) {
1640 s = skip_whitespace(s);
1643 while (*s && !isspace(*s))
1650 static void split_f0(void)
1652 /* static char *fstrings; */
1653 #define fstrings (G.split_f0__fstrings)
1664 n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1667 for (i = 0; i < n; i++) {
1668 Fields[i].string = nextword(&s);
1669 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1672 /* set NF manually to avoid side effects */
1674 intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1675 intvar[NF]->number = nfields;
1679 /* perform additional actions when some internal variables changed */
1680 static void handle_special(var *v)
1684 const char *sep, *s;
1685 int sl, l, len, i, bsize;
1687 if (!(v->type & VF_SPECIAL))
1690 if (v == intvar[NF]) {
1691 n = (int)getvar_i(v);
1694 /* recalculate $0 */
1695 sep = getvar_s(intvar[OFS]);
1699 for (i = 0; i < n; i++) {
1700 s = getvar_s(&Fields[i]);
1703 memcpy(b+len, sep, sl);
1706 b = qrealloc(b, len+l+sl, &bsize);
1707 memcpy(b+len, s, l);
1712 setvar_p(intvar[F0], b);
1715 } else if (v == intvar[F0]) {
1716 is_f0_split = FALSE;
1718 } else if (v == intvar[FS]) {
1719 mk_splitter(getvar_s(v), &fsplitter);
1721 } else if (v == intvar[RS]) {
1722 mk_splitter(getvar_s(v), &rsplitter);
1724 } else if (v == intvar[IGNORECASE]) {
1728 n = getvar_i(intvar[NF]);
1729 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1730 /* right here v is invalid. Just to note... */
1734 /* step through func/builtin/etc arguments */
1735 static node *nextarg(node **pn)
1740 if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1749 static void hashwalk_init(var *v, xhash *array)
1754 walker_list *prev_walker;
1756 if (v->type & VF_WALK) {
1757 prev_walker = v->x.walker;
1762 debug_printf_walker("hashwalk_init: prev_walker:%p\n", prev_walker);
1764 w = v->x.walker = xzalloc(sizeof(*w) + array->glen + 1); /* why + 1? */
1765 debug_printf_walker(" walker@%p=%p\n", &v->x.walker, w);
1766 w->cur = w->end = w->wbuf;
1767 w->prev = prev_walker;
1768 for (i = 0; i < array->csize; i++) {
1769 hi = array->items[i];
1771 strcpy(w->end, hi->name);
1778 static int hashwalk_next(var *v)
1780 walker_list *w = v->x.walker;
1782 if (w->cur >= w->end) {
1783 walker_list *prev_walker = w->prev;
1785 debug_printf_walker("end of iteration, free(walker@%p:%p), prev_walker:%p\n", &v->x.walker, w, prev_walker);
1787 v->x.walker = prev_walker;
1791 setvar_s(v, nextword(&w->cur));
1795 /* evaluate node, return 1 when result is true, 0 otherwise */
1796 static int ptest(node *pattern)
1798 /* ptest__v is "static": to save stack space? */
1799 return istrue(evaluate(pattern, &G.ptest__v));
1802 /* read next record from stream rsm into a variable v */
1803 static int awk_getline(rstream *rsm, var *v)
1806 regmatch_t pmatch[2];
1807 int a, p, pp=0, size;
1808 int fd, so, eo, r, rp;
1811 /* we're using our own buffer since we need access to accumulating
1814 fd = fileno(rsm->F);
1819 c = (char) rsplitter.n.info;
1823 m = qrealloc(m, 256, &size);
1829 if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1830 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1831 b, 1, pmatch, 0) == 0) {
1832 so = pmatch[0].rm_so;
1833 eo = pmatch[0].rm_eo;
1837 } else if (c != '\0') {
1838 s = strchr(b+pp, c);
1839 if (!s) s = memchr(b+pp, '\0', p - pp);
1846 while (b[rp] == '\n')
1848 s = strstr(b+rp, "\n\n");
1851 while (b[eo] == '\n') eo++;
1859 memmove(m, (const void *)(m+a), p+1);
1864 m = qrealloc(m, a+p+128, &size);
1867 p += safe_read(fd, b+p, size-p-1);
1871 setvar_i(intvar[ERRNO], errno);
1880 c = b[so]; b[so] = '\0';
1884 c = b[eo]; b[eo] = '\0';
1885 setvar_s(intvar[RT], b+so);
1897 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1901 const char *s = format;
1903 if (int_as_int && n == (int)n) {
1904 r = snprintf(b, size, "%d", (int)n);
1906 do { c = *s; } while (c && *++s);
1907 if (strchr("diouxX", c)) {
1908 r = snprintf(b, size, format, (int)n);
1909 } else if (strchr("eEfgG", c)) {
1910 r = snprintf(b, size, format, n);
1912 syntax_error(EMSG_INV_FMT);
1918 /* formatted output into an allocated buffer, return ptr to buffer */
1919 static char *awk_printf(node *n)
1924 int i, j, incr, bsize;
1929 fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1934 while (*f && (*f != '%' || *(++f) == '%'))
1936 while (*f && !isalpha(*f)) {
1938 syntax_error("%*x formats are not supported");
1942 incr = (f - s) + MAXVARFMT;
1943 b = qrealloc(b, incr + i, &bsize);
1948 arg = evaluate(nextarg(&n), v);
1951 if (c == 'c' || !c) {
1952 i += sprintf(b+i, s, is_numeric(arg) ?
1953 (char)getvar_i(arg) : *getvar_s(arg));
1954 } else if (c == 's') {
1956 b = qrealloc(b, incr+i+strlen(s1), &bsize);
1957 i += sprintf(b+i, s, s1);
1959 i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1963 /* if there was an error while sprintf, return value is negative */
1967 b = xrealloc(b, i + 1);
1974 /* common substitution routine
1975 * replace (nm) substring of (src) that match (n) with (repl), store
1976 * result into (dest), return number of substitutions. If nm=0, replace
1977 * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1978 * subexpression matching (\1-\9)
1980 static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int ex)
1985 int c, i, j, di, rl, so, eo, nbs, n, dssize;
1986 regmatch_t pmatch[10];
1989 re = as_regex(rn, &sreg);
1990 if (!src) src = intvar[F0];
1991 if (!dest) dest = intvar[F0];
1996 while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0 : REG_NOTBOL) == 0) {
1997 so = pmatch[0].rm_so;
1998 eo = pmatch[0].rm_eo;
2000 ds = qrealloc(ds, di + eo + rl, &dssize);
2001 memcpy(ds + di, sp, eo);
2007 for (s = repl; *s; s++) {
2013 if (c == '&' || (ex && c >= '0' && c <= '9')) {
2014 di -= ((nbs + 3) >> 1);
2023 n = pmatch[j].rm_eo - pmatch[j].rm_so;
2024 ds = qrealloc(ds, di + rl + n, &dssize);
2025 memcpy(ds + di, sp + pmatch[j].rm_so, n);
2043 ds = qrealloc(ds, di + strlen(sp), &dssize);
2044 strcpy(ds + di, sp);
2051 static NOINLINE int do_mktime(const char *ds)
2056 /*memset(&then, 0, sizeof(then)); - not needed */
2057 then.tm_isdst = -1; /* default is unknown */
2059 /* manpage of mktime says these fields are ints,
2060 * so we can sscanf stuff directly into them */
2061 count = sscanf(ds, "%u %u %u %u %u %u %d",
2062 &then.tm_year, &then.tm_mon, &then.tm_mday,
2063 &then.tm_hour, &then.tm_min, &then.tm_sec,
2067 || (unsigned)then.tm_mon < 1
2068 || (unsigned)then.tm_year < 1900
2074 then.tm_year -= 1900;
2076 return mktime(&then);
2079 static NOINLINE var *exec_builtin(node *op, var *res)
2081 #define tspl (G.exec_builtin__tspl)
2087 regmatch_t pmatch[2];
2097 isr = info = op->info;
2100 av[2] = av[3] = NULL;
2101 for (i = 0; i < 4 && op; i++) {
2102 an[i] = nextarg(&op);
2103 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
2104 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
2109 if ((uint32_t)nargs < (info >> 30))
2110 syntax_error(EMSG_TOO_FEW_ARGS);
2116 #if ENABLE_FEATURE_AWK_LIBM
2117 setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1])));
2119 syntax_error(EMSG_NO_MATH);
2125 spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2126 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2131 n = awk_split(as[0], spl, &s);
2133 clear_array(iamarray(av[1]));
2134 for (i = 1; i <= n; i++)
2135 setari_u(av[1], i, nextword(&s1));
2142 i = getvar_i(av[1]) - 1;
2145 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2147 s = xstrndup(as[0]+i, n);
2151 /* Bitwise ops must assume that operands are unsigned. GNU Awk 3.1.5:
2152 * awk '{ print or(-1,1) }' gives "4.29497e+09", not "-2.xxxe+09" */
2154 setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1]));
2158 setvar_i(res, ~getvar_i_int(av[0]));
2162 setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1]));
2166 setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1]));
2170 setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1]));
2174 setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1]));
2179 s1 = s = xstrdup(as[0]);
2181 //*s1 = (info == B_up) ? toupper(*s1) : tolower(*s1);
2182 if ((unsigned char)((*s1 | 0x20) - 'a') <= ('z' - 'a'))
2183 *s1 = (info == B_up) ? (*s1 & 0xdf) : (*s1 | 0x20);
2192 l = strlen(as[0]) - ll;
2193 if (ll > 0 && l >= 0) {
2195 s = strstr(as[0], as[1]);
2196 if (s) n = (s - as[0]) + 1;
2198 /* this piece of code is terribly slow and
2199 * really should be rewritten
2201 for (i=0; i<=l; i++) {
2202 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2214 tt = getvar_i(av[1]);
2217 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2218 i = strftime(g_buf, MAXVARFMT,
2219 ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2222 setvar_s(res, g_buf);
2226 setvar_i(res, do_mktime(as[0]));
2230 re = as_regex(an[1], &sreg);
2231 n = regexec(re, as[0], 1, pmatch, 0);
2236 pmatch[0].rm_so = 0;
2237 pmatch[0].rm_eo = -1;
2239 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2240 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2241 setvar_i(res, pmatch[0].rm_so);
2242 if (re == &sreg) regfree(re);
2246 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2250 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2254 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2264 * Evaluate node - the heart of the program. Supplied with subtree
2265 * and place where to store result. returns ptr to result.
2267 #define XC(n) ((n) >> 8)
2269 static var *evaluate(node *op, var *res)
2271 /* This procedure is recursive so we should count every byte */
2272 #define fnargs (G.evaluate__fnargs)
2273 /* seed is initialized to 1 */
2274 #define seed (G.evaluate__seed)
2275 #define sreg (G.evaluate__sreg)
2297 return setvar_s(res, NULL);
2303 opn = (opinfo & OPNMASK);
2304 g_lineno = op->lineno;
2306 /* execute inevitable things */
2308 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2309 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2310 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2311 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2312 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2314 switch (XC(opinfo & OPCLSMASK)) {
2316 /* -- iterative node type -- */
2320 if ((op1->info & OPCLSMASK) == OC_COMMA) {
2321 /* it's range pattern */
2322 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2323 op->info |= OF_CHECKED;
2324 if (ptest(op1->r.n))
2325 op->info &= ~OF_CHECKED;
2332 op = (ptest(op1)) ? op->a.n : op->r.n;
2336 /* just evaluate an expression, also used as unconditional jump */
2340 /* branch, used in if-else and various loops */
2342 op = istrue(L.v) ? op->a.n : op->r.n;
2345 /* initialize for-in loop */
2346 case XC( OC_WALKINIT ):
2347 hashwalk_init(L.v, iamarray(R.v));
2350 /* get next array item */
2351 case XC( OC_WALKNEXT ):
2352 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2355 case XC( OC_PRINT ):
2356 case XC( OC_PRINTF ):
2359 X.rsm = newfile(R.s);
2362 X.rsm->F = popen(R.s, "w");
2363 if (X.rsm->F == NULL)
2364 bb_perror_msg_and_die("popen");
2367 X.rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2373 if ((opinfo & OPCLSMASK) == OC_PRINT) {
2375 fputs(getvar_s(intvar[F0]), X.F);
2378 L.v = evaluate(nextarg(&op1), v1);
2379 if (L.v->type & VF_NUMBER) {
2380 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2381 getvar_i(L.v), TRUE);
2384 fputs(getvar_s(L.v), X.F);
2387 if (op1) fputs(getvar_s(intvar[OFS]), X.F);
2390 fputs(getvar_s(intvar[ORS]), X.F);
2392 } else { /* OC_PRINTF */
2393 L.s = awk_printf(op1);
2400 case XC( OC_DELETE ):
2401 X.info = op1->info & OPCLSMASK;
2402 if (X.info == OC_VAR) {
2404 } else if (X.info == OC_FNARG) {
2405 R.v = &fnargs[op1->l.i];
2407 syntax_error(EMSG_NOT_ARRAY);
2412 L.s = getvar_s(evaluate(op1->r.n, v1));
2413 hash_remove(iamarray(R.v), L.s);
2415 clear_array(iamarray(R.v));
2419 case XC( OC_NEWSOURCE ):
2420 g_progname = op->l.s;
2423 case XC( OC_RETURN ):
2427 case XC( OC_NEXTFILE ):
2438 /* -- recursive node type -- */
2442 if (L.v == intvar[NF])
2446 case XC( OC_FNARG ):
2447 L.v = &fnargs[op->l.i];
2449 res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2453 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2456 case XC( OC_REGEXP ):
2458 L.s = getvar_s(intvar[F0]);
2461 case XC( OC_MATCH ):
2464 X.re = as_regex(op1, &sreg);
2465 R.i = regexec(X.re, L.s, 0, NULL, 0);
2466 if (X.re == &sreg) regfree(X.re);
2467 setvar_i(res, (R.i == 0) ^ (opn == '!'));
2471 /* if source is a temporary string, jusk relink it to dest */
2472 //Disabled: if R.v is numeric but happens to have cached R.v->string,
2473 //then L.v ends up being a string, which is wrong
2474 // if (R.v == v1+1 && R.v->string) {
2475 // res = setvar_p(L.v, R.v->string);
2476 // R.v->string = NULL;
2478 res = copyvar(L.v, R.v);
2482 case XC( OC_TERNARY ):
2483 if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2484 syntax_error(EMSG_POSSIBLE_ERROR);
2485 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2489 if (!op->r.f->body.first)
2490 syntax_error(EMSG_UNDEF_FUNC);
2492 X.v = R.v = nvalloc(op->r.f->nargs + 1);
2494 L.v = evaluate(nextarg(&op1), v1);
2496 R.v->type |= VF_CHILD;
2497 R.v->x.parent = L.v;
2498 if (++R.v - X.v >= op->r.f->nargs)
2506 res = evaluate(op->r.f->body.first, res);
2513 case XC( OC_GETLINE ):
2514 case XC( OC_PGETLINE ):
2516 X.rsm = newfile(L.s);
2518 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2519 X.rsm->F = popen(L.s, "r");
2520 X.rsm->is_pipe = TRUE;
2522 X.rsm->F = fopen_for_read(L.s); /* not xfopen! */
2526 if (!iF) iF = next_input_file();
2531 setvar_i(intvar[ERRNO], errno);
2539 L.i = awk_getline(X.rsm, R.v);
2542 incvar(intvar[FNR]);
2549 /* simple builtins */
2550 case XC( OC_FBLTIN ):
2558 R.d = (double)rand() / (double)RAND_MAX;
2560 #if ENABLE_FEATURE_AWK_LIBM
2586 syntax_error(EMSG_NO_MATH);
2591 seed = op1 ? (unsigned)L.d : (unsigned)time(NULL);
2601 L.s = getvar_s(intvar[F0]);
2607 R.d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2608 ? (system(L.s) >> 8) : 0;
2616 X.rsm = newfile(L.s);
2625 X.rsm = (rstream *)hash_search(fdhash, L.s);
2627 R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
2628 free(X.rsm->buffer);
2629 hash_remove(fdhash, L.s);
2632 setvar_i(intvar[ERRNO], errno);
2639 case XC( OC_BUILTIN ):
2640 res = exec_builtin(op, res);
2643 case XC( OC_SPRINTF ):
2644 setvar_p(res, awk_printf(op1));
2647 case XC( OC_UNARY ):
2649 L.d = R.d = getvar_i(R.v);
2675 case XC( OC_FIELD ):
2676 R.i = (int)getvar_i(R.v);
2683 res = &Fields[R.i - 1];
2687 /* concatenation (" ") and index joining (",") */
2688 case XC( OC_CONCAT ):
2689 case XC( OC_COMMA ):
2690 opn = strlen(L.s) + strlen(R.s) + 2;
2693 if ((opinfo & OPCLSMASK) == OC_COMMA) {
2694 L.s = getvar_s(intvar[SUBSEP]);
2695 X.s = xrealloc(X.s, opn + strlen(L.s));
2703 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2707 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2710 case XC( OC_BINARY ):
2711 case XC( OC_REPLACE ):
2712 R.d = getvar_i(R.v);
2725 syntax_error(EMSG_DIV_BY_ZERO);
2729 #if ENABLE_FEATURE_AWK_LIBM
2730 L.d = pow(L.d, R.d);
2732 syntax_error(EMSG_NO_MATH);
2737 syntax_error(EMSG_DIV_BY_ZERO);
2738 L.d -= (int)(L.d / R.d) * R.d;
2741 res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2744 case XC( OC_COMPARE ):
2745 if (is_numeric(L.v) && is_numeric(R.v)) {
2746 L.d = getvar_i(L.v) - getvar_i(R.v);
2748 L.s = getvar_s(L.v);
2749 R.s = getvar_s(R.v);
2750 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2752 switch (opn & 0xfe) {
2763 setvar_i(res, (opn & 1 ? R.i : !R.i) ? 1 : 0);
2767 syntax_error(EMSG_POSSIBLE_ERROR);
2769 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2771 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2784 /* -------- main & co. -------- */
2786 static int awk_exit(int r)
2797 evaluate(endseq.first, &tv);
2800 /* waiting for children */
2801 for (i = 0; i < fdhash->csize; i++) {
2802 hi = fdhash->items[i];
2804 if (hi->data.rs.F && hi->data.rs.is_pipe)
2805 pclose(hi->data.rs.F);
2813 /* if expr looks like "var=value", perform assignment and return 1,
2814 * otherwise return 0 */
2815 static int is_assignment(const char *expr)
2817 char *exprc, *s, *s0, *s1;
2819 exprc = xstrdup(expr);
2820 if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2828 *(s1++) = nextchar(&s);
2831 setvar_u(newvar(exprc), s0);
2836 /* switch to next input file */
2837 static rstream *next_input_file(void)
2839 #define rsm (G.next_input_file__rsm)
2840 #define files_happen (G.next_input_file__files_happen)
2843 const char *fname, *ind;
2848 rsm.pos = rsm.adv = 0;
2851 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
2857 ind = getvar_s(incvar(intvar[ARGIND]));
2858 fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
2859 if (fname && *fname && !is_assignment(fname))
2860 F = xfopen_stdin(fname);
2864 files_happen = TRUE;
2865 setvar_s(intvar[FILENAME], fname);
2872 int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
2873 int awk_main(int argc, char **argv)
2876 char *opt_F, *opt_W;
2877 llist_t *list_v = NULL;
2878 llist_t *list_f = NULL;
2883 char *vnames = (char *)vNames; /* cheat */
2884 char *vvalues = (char *)vValues;
2888 /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
2889 * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
2890 if (ENABLE_LOCALE_SUPPORT)
2891 setlocale(LC_NUMERIC, "C");
2895 /* allocate global buffer */
2896 g_buf = xmalloc(MAXVARFMT + 1);
2898 vhash = hash_init();
2899 ahash = hash_init();
2900 fdhash = hash_init();
2901 fnhash = hash_init();
2903 /* initialize variables */
2904 for (i = 0; *vnames; i++) {
2905 intvar[i] = v = newvar(nextword(&vnames));
2906 if (*vvalues != '\377')
2907 setvar_s(v, nextword(&vvalues));
2911 if (*vnames == '*') {
2912 v->type |= VF_SPECIAL;
2917 handle_special(intvar[FS]);
2918 handle_special(intvar[RS]);
2920 newfile("/dev/stdin")->F = stdin;
2921 newfile("/dev/stdout")->F = stdout;
2922 newfile("/dev/stderr")->F = stderr;
2924 /* Huh, people report that sometimes environ is NULL. Oh well. */
2925 if (environ) for (envp = environ; *envp; envp++) {
2926 /* environ is writable, thus we don't strdup it needlessly */
2928 char *s1 = strchr(s, '=');
2931 /* Both findvar and setvar_u take const char*
2932 * as 2nd arg -> environment is not trashed */
2933 setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
2937 opt_complementary = "v::f::"; /* -v and -f can occur multiple times */
2938 opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, &opt_W);
2942 setvar_s(intvar[FS], opt_F); // -F
2943 while (list_v) { /* -v */
2944 if (!is_assignment(llist_pop(&list_v)))
2947 if (list_f) { /* -f */
2952 g_progname = llist_pop(&list_f);
2953 from_file = xfopen_stdin(g_progname);
2954 /* one byte is reserved for some trick in next_token */
2955 for (i = j = 1; j > 0; i += j) {
2956 s = xrealloc(s, i + 4096);
2957 j = fread(s + i, 1, 4094, from_file);
2961 parse_program(s + 1);
2965 } else { // no -f: take program from 1st parameter
2968 g_progname = "cmd. line";
2969 parse_program(*argv++);
2971 if (opt & 0x8) // -W
2972 bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
2974 /* fill in ARGV array */
2975 setvar_i(intvar[ARGC], argc);
2976 setari_u(intvar[ARGV], 0, "awk");
2979 setari_u(intvar[ARGV], ++i, *argv++);
2981 evaluate(beginseq.first, &tv);
2982 if (!mainseq.first && !endseq.first)
2983 awk_exit(EXIT_SUCCESS);
2985 /* input file could already be opened in BEGIN block */
2987 iF = next_input_file();
2989 /* passing through input files */
2992 setvar_i(intvar[FNR], 0);
2994 while ((i = awk_getline(iF, intvar[F0])) > 0) {
2997 incvar(intvar[FNR]);
2998 evaluate(mainseq.first, &tv);
3005 syntax_error(strerror(errno));
3007 iF = next_input_file();
3010 awk_exit(EXIT_SUCCESS);