1 /* vi: set sw=4 ts=4: */
3 * awk implementation for busybox
5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
7 * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
14 /* This is a NOEXEC applet. Be very careful! */
21 #define VF_NUMBER 0x0001 /* 1 = primary type is number */
22 #define VF_ARRAY 0x0002 /* 1 = it's an array */
24 #define VF_CACHED 0x0100 /* 1 = num/str value has cached str/num eq */
25 #define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
26 #define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
27 #define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
28 #define VF_FSTR 0x1000 /* 1 = var::string points to fstring buffer */
29 #define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
30 #define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
32 /* these flags are static, don't change them when value is changed */
33 #define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
36 typedef struct var_s {
37 unsigned type; /* flags */
41 int aidx; /* func arg idx (for compilation stage) */
42 struct xhash_s *array; /* array ptr */
43 struct var_s *parent; /* for func args, ptr to actual parameter */
44 char **walker; /* list of array elements (for..in) */
48 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
49 typedef struct chain_s {
52 const char *programname;
56 typedef struct func_s {
62 typedef struct rstream_s {
71 typedef struct hash_item_s {
73 struct var_s v; /* variable/array hash */
74 struct rstream_s rs; /* redirect streams hash */
75 struct func_s f; /* functions hash */
77 struct hash_item_s *next; /* next in chain */
78 char name[1]; /* really it's longer */
81 typedef struct xhash_s {
82 unsigned nel; /* num of elements */
83 unsigned csize; /* current hash size */
84 unsigned nprime; /* next hash size in PRIMES[] */
85 unsigned glen; /* summary length of item names */
86 struct hash_item_s **items;
90 typedef struct node_s {
111 /* Block of temporary variables */
112 typedef struct nvblock_s {
115 struct nvblock_s *prev;
116 struct nvblock_s *next;
120 typedef struct tsplitter_s {
125 /* simple token classes */
126 /* Order and hex values are very important!!! See next_token() */
127 #define TC_SEQSTART 1 /* ( */
128 #define TC_SEQTERM (1 << 1) /* ) */
129 #define TC_REGEXP (1 << 2) /* /.../ */
130 #define TC_OUTRDR (1 << 3) /* | > >> */
131 #define TC_UOPPOST (1 << 4) /* unary postfix operator */
132 #define TC_UOPPRE1 (1 << 5) /* unary prefix operator */
133 #define TC_BINOPX (1 << 6) /* two-opnd operator */
134 #define TC_IN (1 << 7)
135 #define TC_COMMA (1 << 8)
136 #define TC_PIPE (1 << 9) /* input redirection pipe */
137 #define TC_UOPPRE2 (1 << 10) /* unary prefix operator */
138 #define TC_ARRTERM (1 << 11) /* ] */
139 #define TC_GRPSTART (1 << 12) /* { */
140 #define TC_GRPTERM (1 << 13) /* } */
141 #define TC_SEMICOL (1 << 14)
142 #define TC_NEWLINE (1 << 15)
143 #define TC_STATX (1 << 16) /* ctl statement (for, next...) */
144 #define TC_WHILE (1 << 17)
145 #define TC_ELSE (1 << 18)
146 #define TC_BUILTIN (1 << 19)
147 #define TC_GETLINE (1 << 20)
148 #define TC_FUNCDECL (1 << 21) /* `function' `func' */
149 #define TC_BEGIN (1 << 22)
150 #define TC_END (1 << 23)
151 #define TC_EOF (1 << 24)
152 #define TC_VARIABLE (1 << 25)
153 #define TC_ARRAY (1 << 26)
154 #define TC_FUNCTION (1 << 27)
155 #define TC_STRING (1 << 28)
156 #define TC_NUMBER (1 << 29)
158 #define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
160 /* combined token classes */
161 #define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
162 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
163 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
164 | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
166 #define TC_STATEMNT (TC_STATX | TC_WHILE)
167 #define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
169 /* word tokens, cannot mean something else if not expected */
170 #define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
171 | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
173 /* discard newlines after these */
174 #define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
175 | TC_BINOP | TC_OPTERM)
177 /* what can expression begin with */
178 #define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
179 /* what can group begin with */
180 #define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
182 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
183 /* operator is inserted between them */
184 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
185 | TC_STRING | TC_NUMBER | TC_UOPPOST)
186 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
188 #define OF_RES1 0x010000
189 #define OF_RES2 0x020000
190 #define OF_STR1 0x040000
191 #define OF_STR2 0x080000
192 #define OF_NUM1 0x100000
193 #define OF_CHECKED 0x200000
195 /* combined operator flags */
198 #define xS (OF_RES2 | OF_STR2)
200 #define VV (OF_RES1 | OF_RES2)
201 #define Nx (OF_RES1 | OF_NUM1)
202 #define NV (OF_RES1 | OF_NUM1 | OF_RES2)
203 #define Sx (OF_RES1 | OF_STR1)
204 #define SV (OF_RES1 | OF_STR1 | OF_RES2)
205 #define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
207 #define OPCLSMASK 0xFF00
208 #define OPNMASK 0x007F
210 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
211 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
212 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
214 #define P(x) (x << 24)
215 #define PRIMASK 0x7F000000
216 #define PRIMASK2 0x7E000000
218 /* Operation classes */
220 #define SHIFT_TIL_THIS 0x0600
221 #define RECUR_FROM_THIS 0x1000
224 OC_DELETE = 0x0100, OC_EXEC = 0x0200, OC_NEWSOURCE = 0x0300,
225 OC_PRINT = 0x0400, OC_PRINTF = 0x0500, OC_WALKINIT = 0x0600,
227 OC_BR = 0x0700, OC_BREAK = 0x0800, OC_CONTINUE = 0x0900,
228 OC_EXIT = 0x0a00, OC_NEXT = 0x0b00, OC_NEXTFILE = 0x0c00,
229 OC_TEST = 0x0d00, OC_WALKNEXT = 0x0e00,
231 OC_BINARY = 0x1000, OC_BUILTIN = 0x1100, OC_COLON = 0x1200,
232 OC_COMMA = 0x1300, OC_COMPARE = 0x1400, OC_CONCAT = 0x1500,
233 OC_FBLTIN = 0x1600, OC_FIELD = 0x1700, OC_FNARG = 0x1800,
234 OC_FUNC = 0x1900, OC_GETLINE = 0x1a00, OC_IN = 0x1b00,
235 OC_LAND = 0x1c00, OC_LOR = 0x1d00, OC_MATCH = 0x1e00,
236 OC_MOVE = 0x1f00, OC_PGETLINE = 0x2000, OC_REGEXP = 0x2100,
237 OC_REPLACE = 0x2200, OC_RETURN = 0x2300, OC_SPRINTF = 0x2400,
238 OC_TERNARY = 0x2500, OC_UNARY = 0x2600, OC_VAR = 0x2700,
241 ST_IF = 0x3000, ST_DO = 0x3100, ST_FOR = 0x3200,
245 /* simple builtins */
247 F_in, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
248 F_ti, F_le, F_sy, F_ff, F_cl
253 B_a2, B_ix, B_ma, B_sp, B_ss, B_ti, B_mt, B_lo, B_up,
255 B_an, B_co, B_ls, B_or, B_rs, B_xo,
258 /* tokens and their corresponding info values */
260 #define NTC "\377" /* switch to next token class (tc<<1) */
263 #define OC_B OC_BUILTIN
265 static const char tokenlist[] ALIGN1 =
268 "\1/" NTC /* REGEXP */
269 "\2>>" "\1>" "\1|" NTC /* OUTRDR */
270 "\2++" "\2--" NTC /* UOPPOST */
271 "\2++" "\2--" "\1$" NTC /* UOPPRE1 */
272 "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */
273 "\2*=" "\2/=" "\2%=" "\2^="
274 "\1+" "\1-" "\3**=" "\2**"
275 "\1/" "\1%" "\1^" "\1*"
276 "\2!=" "\2>=" "\2<=" "\1>"
277 "\1<" "\2!~" "\1~" "\2&&"
278 "\2||" "\1?" "\1:" NTC
282 "\1+" "\1-" "\1!" NTC /* UOPPRE2 */
288 "\2if" "\2do" "\3for" "\5break" /* STATX */
289 "\10continue" "\6delete" "\5print"
290 "\6printf" "\4next" "\10nextfile"
291 "\6return" "\4exit" NTC
295 "\3and" "\5compl" "\6lshift" "\2or"
297 "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */
298 "\3cos" "\3exp" "\3int" "\3log"
299 "\4rand" "\3sin" "\4sqrt" "\5srand"
300 "\6gensub" "\4gsub" "\5index" "\6length"
301 "\5match" "\5split" "\7sprintf" "\3sub"
302 "\6substr" "\7systime" "\10strftime" "\6mktime"
303 "\7tolower" "\7toupper" NTC
305 "\4func" "\10function" NTC
310 static const uint32_t tokeninfo[] = {
314 xS|'a', xS|'w', xS|'|',
315 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
316 OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M',
318 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74),
319 OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
320 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/',
321 OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
322 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-',
323 OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
324 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%',
325 OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
326 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3,
327 OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
328 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!',
329 OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
330 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?',
331 OC_COLON|xx|P(67)|':',
334 OC_PGETLINE|SV|P(37),
335 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-',
336 OC_UNARY|xV|P(19)|'!',
342 ST_IF, ST_DO, ST_FOR, OC_BREAK,
343 OC_CONTINUE, OC_DELETE|Vx, OC_PRINT,
344 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
345 OC_RETURN|Vx, OC_EXIT|Nx,
349 OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
350 OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
351 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
352 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
353 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
354 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
355 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
356 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b),
357 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
364 /* internal variable names and their initial values */
365 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
367 CONVFMT, OFMT, FS, OFS,
368 ORS, RS, RT, FILENAME,
369 SUBSEP, F0, ARGIND, ARGC,
370 ARGV, ERRNO, FNR, NR,
371 NF, IGNORECASE, ENVIRON, NUM_INTERNAL_VARS
374 static const char vNames[] ALIGN1 =
375 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
376 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
377 "SUBSEP\0" "$\0*" "ARGIND\0" "ARGC\0"
378 "ARGV\0" "ERRNO\0" "FNR\0" "NR\0"
379 "NF\0*" "IGNORECASE\0*" "ENVIRON\0" "\0";
381 static const char vValues[] ALIGN1 =
382 "%.6g\0" "%.6g\0" " \0" " \0"
383 "\n\0" "\n\0" "\0" "\0"
384 "\034\0" "\0" "\377";
386 /* hash size may grow to these values */
387 #define FIRST_PRIME 61
388 static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
391 /* Globals. Split in two parts so that first one is addressed
392 * with (mostly short) negative offsets.
393 * NB: it's unsafe to put members of type "double"
394 * into globals2 (gcc may fail to align them).
398 chain beginseq, mainseq, endseq;
400 node *break_ptr, *continue_ptr;
402 xhash *vhash, *ahash, *fdhash, *fnhash;
403 const char *g_progname;
406 int maxfields; /* used in fsrealloc() only */
415 smallint is_f0_split;
418 uint32_t t_info; /* often used */
424 var *intvar[NUM_INTERNAL_VARS]; /* often used */
426 /* former statics from various functions */
427 char *split_f0__fstrings;
429 uint32_t next_token__save_tclass;
430 uint32_t next_token__save_info;
431 uint32_t next_token__ltclass;
432 smallint next_token__concat_inserted;
434 smallint next_input_file__files_happen;
435 rstream next_input_file__rsm;
437 var *evaluate__fnargs;
438 unsigned evaluate__seed;
439 regex_t evaluate__sreg;
443 tsplitter exec_builtin__tspl;
445 /* biggest and least used members go last */
446 tsplitter fsplitter, rsplitter;
448 #define G1 (ptr_to_globals[-1])
449 #define G (*(struct globals2 *)ptr_to_globals)
450 /* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
451 /*char G1size[sizeof(G1)]; - 0x74 */
452 /*char Gsize[sizeof(G)]; - 0x1c4 */
453 /* Trying to keep most of members accessible with short offsets: */
454 /*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
455 #define t_double (G1.t_double )
456 #define beginseq (G1.beginseq )
457 #define mainseq (G1.mainseq )
458 #define endseq (G1.endseq )
459 #define seq (G1.seq )
460 #define break_ptr (G1.break_ptr )
461 #define continue_ptr (G1.continue_ptr)
463 #define vhash (G1.vhash )
464 #define ahash (G1.ahash )
465 #define fdhash (G1.fdhash )
466 #define fnhash (G1.fnhash )
467 #define g_progname (G1.g_progname )
468 #define g_lineno (G1.g_lineno )
469 #define nfields (G1.nfields )
470 #define maxfields (G1.maxfields )
471 #define Fields (G1.Fields )
472 #define g_cb (G1.g_cb )
473 #define g_pos (G1.g_pos )
474 #define g_buf (G1.g_buf )
475 #define icase (G1.icase )
476 #define exiting (G1.exiting )
477 #define nextrec (G1.nextrec )
478 #define nextfile (G1.nextfile )
479 #define is_f0_split (G1.is_f0_split )
480 #define t_info (G.t_info )
481 #define t_tclass (G.t_tclass )
482 #define t_string (G.t_string )
483 #define t_lineno (G.t_lineno )
484 #define t_rollback (G.t_rollback )
485 #define intvar (G.intvar )
486 #define fsplitter (G.fsplitter )
487 #define rsplitter (G.rsplitter )
488 #define INIT_G() do { \
489 SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
490 G.next_token__ltclass = TC_OPTERM; \
491 G.evaluate__seed = 1; \
495 /* function prototypes */
496 static void handle_special(var *);
497 static node *parse_expr(uint32_t);
498 static void chain_group(void);
499 static var *evaluate(node *, var *);
500 static rstream *next_input_file(void);
501 static int fmt_num(char *, int, const char *, double, int);
502 static int awk_exit(int) NORETURN;
504 /* ---- error handling ---- */
506 static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
507 static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
508 static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
509 static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
510 static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
511 static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin";
512 static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
513 static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
514 static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
515 #if !ENABLE_FEATURE_AWK_LIBM
516 static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
519 static void zero_out_var(var * vp)
521 memset(vp, 0, sizeof(*vp));
524 static void syntax_error(const char *message) NORETURN;
525 static void syntax_error(const char *message)
527 bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
530 /* ---- hash stuff ---- */
532 static unsigned hashidx(const char *name)
536 while (*name) idx = *name++ + (idx << 6) - idx;
540 /* create new hash */
541 static xhash *hash_init(void)
545 newhash = xzalloc(sizeof(xhash));
546 newhash->csize = FIRST_PRIME;
547 newhash->items = xzalloc(newhash->csize * sizeof(hash_item *));
552 /* find item in hash, return ptr to data, NULL if not found */
553 static void *hash_search(xhash *hash, const char *name)
557 hi = hash->items [ hashidx(name) % hash->csize ];
559 if (strcmp(hi->name, name) == 0)
566 /* grow hash if it becomes too big */
567 static void hash_rebuild(xhash *hash)
569 unsigned newsize, i, idx;
570 hash_item **newitems, *hi, *thi;
572 if (hash->nprime == ARRAY_SIZE(PRIMES))
575 newsize = PRIMES[hash->nprime++];
576 newitems = xzalloc(newsize * sizeof(hash_item *));
578 for (i = 0; i < hash->csize; i++) {
583 idx = hashidx(thi->name) % newsize;
584 thi->next = newitems[idx];
590 hash->csize = newsize;
591 hash->items = newitems;
594 /* find item in hash, add it if necessary. Return ptr to data */
595 static void *hash_find(xhash *hash, const char *name)
601 hi = hash_search(hash, name);
603 if (++hash->nel / hash->csize > 10)
606 l = strlen(name) + 1;
607 hi = xzalloc(sizeof(*hi) + l);
608 strcpy(hi->name, name);
610 idx = hashidx(name) % hash->csize;
611 hi->next = hash->items[idx];
612 hash->items[idx] = hi;
618 #define findvar(hash, name) ((var*) hash_find((hash), (name)))
619 #define newvar(name) ((var*) hash_find(vhash, (name)))
620 #define newfile(name) ((rstream*)hash_find(fdhash, (name)))
621 #define newfunc(name) ((func*) hash_find(fnhash, (name)))
623 static void hash_remove(xhash *hash, const char *name)
625 hash_item *hi, **phi;
627 phi = &(hash->items[hashidx(name) % hash->csize]);
630 if (strcmp(hi->name, name) == 0) {
631 hash->glen -= (strlen(name) + 1);
641 /* ------ some useful functions ------ */
643 static void skip_spaces(char **s)
648 if (*p == '\\' && p[1] == '\n') {
651 } else if (*p != ' ' && *p != '\t') {
659 static char *nextword(char **s)
663 while (*(*s)++) /* */;
668 static char nextchar(char **s)
674 if (c == '\\') c = bb_process_escape_sequence((const char**)s);
675 if (c == '\\' && *s == pps) c = *((*s)++);
679 static ALWAYS_INLINE int isalnum_(int c)
681 return (isalnum(c) || c == '_');
684 static double my_strtod(char **pp)
688 && ((((*pp)[1] | 0x20) == 'x') || isdigit((*pp)[1]))
690 return strtoull(*pp, pp, 0);
693 return strtod(*pp, pp);
696 /* -------- working with variables (set/get/copy/etc) -------- */
698 static xhash *iamarray(var *v)
702 while (a->type & VF_CHILD)
705 if (!(a->type & VF_ARRAY)) {
707 a->x.array = hash_init();
712 static void clear_array(xhash *array)
717 for (i = 0; i < array->csize; i++) {
718 hi = array->items[i];
722 free(thi->data.v.string);
725 array->items[i] = NULL;
727 array->glen = array->nel = 0;
730 /* clear a variable */
731 static var *clrvar(var *v)
733 if (!(v->type & VF_FSTR))
736 v->type &= VF_DONTTOUCH;
742 /* assign string value to variable */
743 static var *setvar_p(var *v, char *value)
751 /* same as setvar_p but make a copy of string */
752 static var *setvar_s(var *v, const char *value)
754 return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
757 /* same as setvar_s but set USER flag */
758 static var *setvar_u(var *v, const char *value)
765 /* set array element to user string */
766 static void setari_u(var *a, int idx, const char *s)
768 char sidx[sizeof(int)*3 + 1];
771 sprintf(sidx, "%d", idx);
772 v = findvar(iamarray(a), sidx);
776 /* assign numeric value to variable */
777 static var *setvar_i(var *v, double value)
780 v->type |= VF_NUMBER;
786 static const char *getvar_s(var *v)
788 /* if v is numeric and has no cached string, convert it to string */
789 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
790 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
791 v->string = xstrdup(g_buf);
792 v->type |= VF_CACHED;
794 return (v->string == NULL) ? "" : v->string;
797 static double getvar_i(var *v)
801 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
805 v->number = my_strtod(&s);
806 if (v->type & VF_USER) {
814 v->type |= VF_CACHED;
819 /* Used for operands of bitwise ops */
820 static unsigned long getvar_i_int(var *v)
822 double d = getvar_i(v);
824 /* Casting doubles to longs is undefined for values outside
825 * of target type range. Try to widen it as much as possible */
827 return (unsigned long)d;
828 /* Why? Think about d == -4294967295.0 (assuming 32bit longs) */
829 return - (long) (unsigned long) (-d);
832 static var *copyvar(var *dest, const var *src)
836 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
837 dest->number = src->number;
839 dest->string = xstrdup(src->string);
841 handle_special(dest);
845 static var *incvar(var *v)
847 return setvar_i(v, getvar_i(v) + 1.);
850 /* return true if v is number or numeric string */
851 static int is_numeric(var *v)
854 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
857 /* return 1 when value of v corresponds to true, 0 otherwise */
858 static int istrue(var *v)
861 return (v->number == 0) ? 0 : 1;
862 return (v->string && *(v->string)) ? 1 : 0;
865 /* temporary variables allocator. Last allocated should be first freed */
866 static var *nvalloc(int n)
874 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size) break;
879 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
880 g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
882 g_cb->pos = g_cb->nv;
884 /*g_cb->next = NULL; - xzalloc did it */
885 if (pb) pb->next = g_cb;
891 while (v < g_cb->pos) {
900 static void nvfree(var *v)
904 if (v < g_cb->nv || v >= g_cb->pos)
905 syntax_error(EMSG_INTERNAL_ERROR);
907 for (p = v; p < g_cb->pos; p++) {
908 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
909 clear_array(iamarray(p));
910 free(p->x.array->items);
913 if (p->type & VF_WALK)
920 while (g_cb->prev && g_cb->pos == g_cb->nv) {
925 /* ------- awk program text parsing ------- */
927 /* Parse next token pointed by global pos, place results into global ttt.
928 * If token isn't expected, give away. Return token class
930 static uint32_t next_token(uint32_t expected)
932 #define concat_inserted (G.next_token__concat_inserted)
933 #define save_tclass (G.next_token__save_tclass)
934 #define save_info (G.next_token__save_info)
935 /* Initialized to TC_OPTERM: */
936 #define ltclass (G.next_token__ltclass)
947 } else if (concat_inserted) {
948 concat_inserted = FALSE;
949 t_tclass = save_tclass;
958 while (*p != '\n' && *p != '\0')
967 } else if (*p == '\"') {
971 if (*p == '\0' || *p == '\n')
972 syntax_error(EMSG_UNEXP_EOS);
973 *(s++) = nextchar(&p);
979 } else if ((expected & TC_REGEXP) && *p == '/') {
983 if (*p == '\0' || *p == '\n')
984 syntax_error(EMSG_UNEXP_EOS);
988 *(s-1) = bb_process_escape_sequence((const char **)&p);
999 } else if (*p == '.' || isdigit(*p)) {
1001 t_double = my_strtod(&p);
1003 syntax_error(EMSG_UNEXP_TOKEN);
1007 /* search for something known */
1017 /* if token class is expected, token
1018 * matches and it's not a longer word,
1019 * then this is what we are looking for
1021 if ((tc & (expected | TC_WORD | TC_NEWLINE))
1022 && *tl == *p && strncmp(p, tl, l) == 0
1023 && !((tc & TC_WORD) && isalnum_(p[l]))
1034 /* it's a name (var/array/function),
1035 * otherwise it's something wrong
1038 syntax_error(EMSG_UNEXP_TOKEN);
1041 while (isalnum_(*(++p))) {
1046 /* also consume whitespace between functionname and bracket */
1047 if (!(expected & TC_VARIABLE))
1061 /* skipping newlines in some cases */
1062 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1065 /* insert concatenation operator when needed */
1066 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1067 concat_inserted = TRUE;
1071 t_info = OC_CONCAT | SS | P(35);
1078 /* Are we ready for this? */
1079 if (!(ltclass & expected))
1080 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1081 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1084 #undef concat_inserted
1090 static void rollback_token(void)
1095 static node *new_node(uint32_t info)
1099 n = xzalloc(sizeof(node));
1101 n->lineno = g_lineno;
1105 static node *mk_re_node(const char *s, node *n, regex_t *re)
1107 n->info = OC_REGEXP;
1110 xregcomp(re, s, REG_EXTENDED);
1111 xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1116 static node *condition(void)
1118 next_token(TC_SEQSTART);
1119 return parse_expr(TC_SEQTERM);
1122 /* parse expression terminated by given argument, return ptr
1123 * to built subtree. Terminator is eaten by parse_expr */
1124 static node *parse_expr(uint32_t iexp)
1133 sn.r.n = glptr = NULL;
1134 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1136 while (!((tc = next_token(xtc)) & iexp)) {
1137 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1138 /* input redirection (<) attached to glptr node */
1139 cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1141 xtc = TC_OPERAND | TC_UOPPRE;
1144 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1145 /* for binary and postfix-unary operators, jump back over
1146 * previous operators with higher priority */
1148 while ( ((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1149 || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON)) )
1151 if ((t_info & OPCLSMASK) == OC_TERNARY)
1153 cn = vn->a.n->r.n = new_node(t_info);
1155 if (tc & TC_BINOP) {
1157 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1158 if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1160 next_token(TC_GETLINE);
1161 /* give maximum priority to this pipe */
1162 cn->info &= ~PRIMASK;
1163 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1167 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1172 /* for operands and prefix-unary operators, attach them
1175 cn = vn->r.n = new_node(t_info);
1177 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1178 if (tc & (TC_OPERAND | TC_REGEXP)) {
1179 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1180 /* one should be very careful with switch on tclass -
1181 * only simple tclasses should be used! */
1186 v = hash_search(ahash, t_string);
1188 cn->info = OC_FNARG;
1189 cn->l.i = v->x.aidx;
1191 cn->l.v = newvar(t_string);
1193 if (tc & TC_ARRAY) {
1195 cn->r.n = parse_expr(TC_ARRTERM);
1202 v = cn->l.v = xzalloc(sizeof(var));
1204 setvar_i(v, t_double);
1206 setvar_s(v, t_string);
1210 mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1215 cn->r.f = newfunc(t_string);
1216 cn->l.n = condition();
1220 cn = vn->r.n = parse_expr(TC_SEQTERM);
1226 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1230 cn->l.n = condition();
1239 /* add node to chain. Return ptr to alloc'd node */
1240 static node *chain_node(uint32_t info)
1245 seq->first = seq->last = new_node(0);
1247 if (seq->programname != g_progname) {
1248 seq->programname = g_progname;
1249 n = chain_node(OC_NEWSOURCE);
1250 n->l.s = xstrdup(g_progname);
1255 seq->last = n->a.n = new_node(OC_DONE);
1260 static void chain_expr(uint32_t info)
1264 n = chain_node(info);
1265 n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1266 if (t_tclass & TC_GRPTERM)
1270 static node *chain_loop(node *nn)
1272 node *n, *n2, *save_brk, *save_cont;
1274 save_brk = break_ptr;
1275 save_cont = continue_ptr;
1277 n = chain_node(OC_BR | Vx);
1278 continue_ptr = new_node(OC_EXEC);
1279 break_ptr = new_node(OC_EXEC);
1281 n2 = chain_node(OC_EXEC | Vx);
1284 continue_ptr->a.n = n2;
1285 break_ptr->a.n = n->r.n = seq->last;
1287 continue_ptr = save_cont;
1288 break_ptr = save_brk;
1293 /* parse group and attach it to chain */
1294 static void chain_group(void)
1300 c = next_token(TC_GRPSEQ);
1301 } while (c & TC_NEWLINE);
1303 if (c & TC_GRPSTART) {
1304 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1305 if (t_tclass & TC_NEWLINE) continue;
1309 } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1311 chain_expr(OC_EXEC | Vx);
1312 } else { /* TC_STATEMNT */
1313 switch (t_info & OPCLSMASK) {
1315 n = chain_node(OC_BR | Vx);
1316 n->l.n = condition();
1318 n2 = chain_node(OC_EXEC);
1320 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1322 n2->a.n = seq->last;
1330 n = chain_loop(NULL);
1335 n2 = chain_node(OC_EXEC);
1336 n = chain_loop(NULL);
1338 next_token(TC_WHILE);
1339 n->l.n = condition();
1343 next_token(TC_SEQSTART);
1344 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1345 if (t_tclass & TC_SEQTERM) { /* for-in */
1346 if ((n2->info & OPCLSMASK) != OC_IN)
1347 syntax_error(EMSG_UNEXP_TOKEN);
1348 n = chain_node(OC_WALKINIT | VV);
1351 n = chain_loop(NULL);
1352 n->info = OC_WALKNEXT | Vx;
1354 } else { /* for (;;) */
1355 n = chain_node(OC_EXEC | Vx);
1357 n2 = parse_expr(TC_SEMICOL);
1358 n3 = parse_expr(TC_SEQTERM);
1368 n = chain_node(t_info);
1369 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1370 if (t_tclass & TC_OUTRDR) {
1372 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1374 if (t_tclass & TC_GRPTERM)
1379 n = chain_node(OC_EXEC);
1384 n = chain_node(OC_EXEC);
1385 n->a.n = continue_ptr;
1388 /* delete, next, nextfile, return, exit */
1395 static void parse_program(char *p)
1404 while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1405 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1407 if (tclass & TC_OPTERM)
1411 if (tclass & TC_BEGIN) {
1415 } else if (tclass & TC_END) {
1419 } else if (tclass & TC_FUNCDECL) {
1420 next_token(TC_FUNCTION);
1422 f = newfunc(t_string);
1423 f->body.first = NULL;
1425 while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1426 v = findvar(ahash, t_string);
1427 v->x.aidx = (f->nargs)++;
1429 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1436 } else if (tclass & TC_OPSEQ) {
1438 cn = chain_node(OC_TEST);
1439 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1440 if (t_tclass & TC_GRPSTART) {
1444 chain_node(OC_PRINT);
1446 cn->r.n = mainseq.last;
1448 } else /* if (tclass & TC_GRPSTART) */ {
1456 /* -------- program execution part -------- */
1458 static node *mk_splitter(const char *s, tsplitter *spl)
1466 if ((n->info & OPCLSMASK) == OC_REGEXP) {
1468 regfree(ire); // TODO: nuke ire, use re+1?
1470 if (strlen(s) > 1) {
1471 mk_re_node(s, n, re);
1473 n->info = (uint32_t) *s;
1479 /* use node as a regular expression. Supplied with node ptr and regex_t
1480 * storage space. Return ptr to regex (if result points to preg, it should
1481 * be later regfree'd manually
1483 static regex_t *as_regex(node *op, regex_t *preg)
1489 if ((op->info & OPCLSMASK) == OC_REGEXP) {
1490 return icase ? op->r.ire : op->l.re;
1493 s = getvar_s(evaluate(op, v));
1495 cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
1496 /* Testcase where REG_EXTENDED fails (unpaired '{'):
1497 * echo Hi | awk 'gsub("@(samp|code|file)\{","");'
1498 * gawk 3.1.5 eats this. We revert to ~REG_EXTENDED
1499 * (maybe gsub is not supposed to use REG_EXTENDED?).
1501 if (regcomp(preg, s, cflags)) {
1502 cflags &= ~REG_EXTENDED;
1503 xregcomp(preg, s, cflags);
1509 /* gradually increasing buffer */
1510 static void qrealloc(char **b, int n, int *size)
1512 if (!*b || n >= *size) {
1513 *size = n + (n>>1) + 80;
1514 *b = xrealloc(*b, *size);
1518 /* resize field storage space */
1519 static void fsrealloc(int size)
1523 if (size >= maxfields) {
1525 maxfields = size + 16;
1526 Fields = xrealloc(Fields, maxfields * sizeof(var));
1527 for (; i < maxfields; i++) {
1528 Fields[i].type = VF_SPECIAL;
1529 Fields[i].string = NULL;
1533 if (size < nfields) {
1534 for (i = size; i < nfields; i++) {
1541 static int awk_split(const char *s, node *spl, char **slist)
1546 regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1548 /* in worst case, each char would be a separate field */
1549 *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1552 c[0] = c[1] = (char)spl->info;
1554 if (*getvar_s(intvar[RS]) == '\0')
1557 if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */
1559 return n; /* "": zero fields */
1560 n++; /* at least one field will be there */
1562 l = strcspn(s, c+2); /* len till next NUL or \n */
1563 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1564 && pmatch[0].rm_so <= l
1566 l = pmatch[0].rm_so;
1567 if (pmatch[0].rm_eo == 0) {
1571 n++; /* we saw yet another delimiter */
1573 pmatch[0].rm_eo = l;
1578 /* make sure we remove *all* of the separator chars */
1581 } while (++l < pmatch[0].rm_eo);
1583 s += pmatch[0].rm_eo;
1587 if (c[0] == '\0') { /* null split */
1595 if (c[0] != ' ') { /* single-character split */
1597 c[0] = toupper(c[0]);
1598 c[1] = tolower(c[1]);
1601 while ((s1 = strpbrk(s1, c))) {
1609 s = skip_whitespace(s);
1612 while (*s && !isspace(*s))
1619 static void split_f0(void)
1621 /* static char *fstrings; */
1622 #define fstrings (G.split_f0__fstrings)
1633 n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1636 for (i = 0; i < n; i++) {
1637 Fields[i].string = nextword(&s);
1638 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1641 /* set NF manually to avoid side effects */
1643 intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1644 intvar[NF]->number = nfields;
1648 /* perform additional actions when some internal variables changed */
1649 static void handle_special(var *v)
1653 const char *sep, *s;
1654 int sl, l, len, i, bsize;
1656 if (!(v->type & VF_SPECIAL))
1659 if (v == intvar[NF]) {
1660 n = (int)getvar_i(v);
1663 /* recalculate $0 */
1664 sep = getvar_s(intvar[OFS]);
1668 for (i = 0; i < n; i++) {
1669 s = getvar_s(&Fields[i]);
1672 memcpy(b+len, sep, sl);
1675 qrealloc(&b, len+l+sl, &bsize);
1676 memcpy(b+len, s, l);
1681 setvar_p(intvar[F0], b);
1684 } else if (v == intvar[F0]) {
1685 is_f0_split = FALSE;
1687 } else if (v == intvar[FS]) {
1688 mk_splitter(getvar_s(v), &fsplitter);
1690 } else if (v == intvar[RS]) {
1691 mk_splitter(getvar_s(v), &rsplitter);
1693 } else if (v == intvar[IGNORECASE]) {
1697 n = getvar_i(intvar[NF]);
1698 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1699 /* right here v is invalid. Just to note... */
1703 /* step through func/builtin/etc arguments */
1704 static node *nextarg(node **pn)
1709 if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1718 static void hashwalk_init(var *v, xhash *array)
1724 if (v->type & VF_WALK)
1728 w = v->x.walker = xzalloc(2 + 2*sizeof(char *) + array->glen);
1729 w[0] = w[1] = (char *)(w + 2);
1730 for (i = 0; i < array->csize; i++) {
1731 hi = array->items[i];
1733 strcpy(*w, hi->name);
1740 static int hashwalk_next(var *v)
1748 setvar_s(v, nextword(w+1));
1752 /* evaluate node, return 1 when result is true, 0 otherwise */
1753 static int ptest(node *pattern)
1755 /* ptest__v is "static": to save stack space? */
1756 return istrue(evaluate(pattern, &G.ptest__v));
1759 /* read next record from stream rsm into a variable v */
1760 static int awk_getline(rstream *rsm, var *v)
1763 regmatch_t pmatch[2];
1764 int a, p, pp=0, size;
1765 int fd, so, eo, r, rp;
1768 /* we're using our own buffer since we need access to accumulating
1771 fd = fileno(rsm->F);
1776 c = (char) rsplitter.n.info;
1779 if (!m) qrealloc(&m, 256, &size);
1785 if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1786 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1787 b, 1, pmatch, 0) == 0) {
1788 so = pmatch[0].rm_so;
1789 eo = pmatch[0].rm_eo;
1793 } else if (c != '\0') {
1794 s = strchr(b+pp, c);
1795 if (!s) s = memchr(b+pp, '\0', p - pp);
1802 while (b[rp] == '\n')
1804 s = strstr(b+rp, "\n\n");
1807 while (b[eo] == '\n') eo++;
1815 memmove(m, (const void *)(m+a), p+1);
1820 qrealloc(&m, a+p+128, &size);
1823 p += safe_read(fd, b+p, size-p-1);
1827 setvar_i(intvar[ERRNO], errno);
1836 c = b[so]; b[so] = '\0';
1840 c = b[eo]; b[eo] = '\0';
1841 setvar_s(intvar[RT], b+so);
1853 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1857 const char *s = format;
1859 if (int_as_int && n == (int)n) {
1860 r = snprintf(b, size, "%d", (int)n);
1862 do { c = *s; } while (c && *++s);
1863 if (strchr("diouxX", c)) {
1864 r = snprintf(b, size, format, (int)n);
1865 } else if (strchr("eEfgG", c)) {
1866 r = snprintf(b, size, format, n);
1868 syntax_error(EMSG_INV_FMT);
1874 /* formatted output into an allocated buffer, return ptr to buffer */
1875 static char *awk_printf(node *n)
1880 int i, j, incr, bsize;
1885 fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1890 while (*f && (*f != '%' || *(++f) == '%'))
1892 while (*f && !isalpha(*f)) {
1894 syntax_error("%*x formats are not supported");
1898 incr = (f - s) + MAXVARFMT;
1899 qrealloc(&b, incr + i, &bsize);
1904 arg = evaluate(nextarg(&n), v);
1907 if (c == 'c' || !c) {
1908 i += sprintf(b+i, s, is_numeric(arg) ?
1909 (char)getvar_i(arg) : *getvar_s(arg));
1910 } else if (c == 's') {
1912 qrealloc(&b, incr+i+strlen(s1), &bsize);
1913 i += sprintf(b+i, s, s1);
1915 i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1919 /* if there was an error while sprintf, return value is negative */
1923 b = xrealloc(b, i + 1);
1930 /* common substitution routine
1931 * replace (nm) substring of (src) that match (n) with (repl), store
1932 * result into (dest), return number of substitutions. If nm=0, replace
1933 * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1934 * subexpression matching (\1-\9)
1936 static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int ex)
1941 int c, i, j, di, rl, so, eo, nbs, n, dssize;
1942 regmatch_t pmatch[10];
1945 re = as_regex(rn, &sreg);
1946 if (!src) src = intvar[F0];
1947 if (!dest) dest = intvar[F0];
1952 while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0 : REG_NOTBOL) == 0) {
1953 so = pmatch[0].rm_so;
1954 eo = pmatch[0].rm_eo;
1956 qrealloc(&ds, di + eo + rl, &dssize);
1957 memcpy(ds + di, sp, eo);
1963 for (s = repl; *s; s++) {
1969 if (c == '&' || (ex && c >= '0' && c <= '9')) {
1970 di -= ((nbs + 3) >> 1);
1979 n = pmatch[j].rm_eo - pmatch[j].rm_so;
1980 qrealloc(&ds, di + rl + n, &dssize);
1981 memcpy(ds + di, sp + pmatch[j].rm_so, n);
1993 if (!ds[di++]) break;
1997 qrealloc(&ds, di + strlen(sp), &dssize);
1998 strcpy(ds + di, sp);
2000 if (re == &sreg) regfree(re);
2004 static NOINLINE int do_mktime(const char *ds)
2009 /*memset(&then, 0, sizeof(then)); - not needed */
2010 then.tm_isdst = -1; /* default is unknown */
2012 /* manpage of mktime says these fields are ints,
2013 * so we can sscanf stuff directly into them */
2014 count = sscanf(ds, "%u %u %u %u %u %u %d",
2015 &then.tm_year, &then.tm_mon, &then.tm_mday,
2016 &then.tm_hour, &then.tm_min, &then.tm_sec,
2020 || (unsigned)then.tm_mon < 1
2021 || (unsigned)then.tm_year < 1900
2027 then.tm_year -= 1900;
2029 return mktime(&then);
2032 static NOINLINE var *exec_builtin(node *op, var *res)
2034 #define tspl (G.exec_builtin__tspl)
2041 regmatch_t pmatch[2];
2051 isr = info = op->info;
2054 av[2] = av[3] = NULL;
2055 for (i = 0; i < 4 && op; i++) {
2056 an[i] = nextarg(&op);
2057 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
2058 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
2063 if ((uint32_t)nargs < (info >> 30))
2064 syntax_error(EMSG_TOO_FEW_ARGS);
2066 switch (info & OPNMASK) {
2069 #if ENABLE_FEATURE_AWK_LIBM
2070 setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1])));
2072 syntax_error(EMSG_NO_MATH);
2078 spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2079 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2084 n = awk_split(as[0], spl, &s);
2086 clear_array(iamarray(av[1]));
2087 for (i = 1; i <= n; i++)
2088 setari_u(av[1], i, nextword(&s1));
2095 i = getvar_i(av[1]) - 1;
2098 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2100 s = xstrndup(as[0]+i, n);
2104 /* Bitwise ops must assume that operands are unsigned. GNU Awk 3.1.5:
2105 * awk '{ print or(-1,1) }' gives "4.29497e+09", not "-2.xxxe+09" */
2107 setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1]));
2111 setvar_i(res, ~getvar_i_int(av[0]));
2115 setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1]));
2119 setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1]));
2123 setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1]));
2127 setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1]));
2137 s1 = s = xstrdup(as[0]);
2139 *s1 = (*to_xxx)(*s1);
2148 l = strlen(as[0]) - ll;
2149 if (ll > 0 && l >= 0) {
2151 s = strstr(as[0], as[1]);
2152 if (s) n = (s - as[0]) + 1;
2154 /* this piece of code is terribly slow and
2155 * really should be rewritten
2157 for (i=0; i<=l; i++) {
2158 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2170 tt = getvar_i(av[1]);
2173 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2174 i = strftime(g_buf, MAXVARFMT,
2175 ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2178 setvar_s(res, g_buf);
2182 setvar_i(res, do_mktime(as[0]));
2186 re = as_regex(an[1], &sreg);
2187 n = regexec(re, as[0], 1, pmatch, 0);
2192 pmatch[0].rm_so = 0;
2193 pmatch[0].rm_eo = -1;
2195 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2196 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2197 setvar_i(res, pmatch[0].rm_so);
2198 if (re == &sreg) regfree(re);
2202 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2206 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2210 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2220 * Evaluate node - the heart of the program. Supplied with subtree
2221 * and place where to store result. returns ptr to result.
2223 #define XC(n) ((n) >> 8)
2225 static var *evaluate(node *op, var *res)
2227 /* This procedure is recursive so we should count every byte */
2228 #define fnargs (G.evaluate__fnargs)
2229 /* seed is initialized to 1 */
2230 #define seed (G.evaluate__seed)
2231 #define sreg (G.evaluate__sreg)
2253 return setvar_s(res, NULL);
2259 opn = (opinfo & OPNMASK);
2260 g_lineno = op->lineno;
2262 /* execute inevitable things */
2264 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2265 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2266 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2267 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2268 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2270 switch (XC(opinfo & OPCLSMASK)) {
2272 /* -- iterative node type -- */
2276 if ((op1->info & OPCLSMASK) == OC_COMMA) {
2277 /* it's range pattern */
2278 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2279 op->info |= OF_CHECKED;
2280 if (ptest(op1->r.n))
2281 op->info &= ~OF_CHECKED;
2288 op = (ptest(op1)) ? op->a.n : op->r.n;
2292 /* just evaluate an expression, also used as unconditional jump */
2296 /* branch, used in if-else and various loops */
2298 op = istrue(L.v) ? op->a.n : op->r.n;
2301 /* initialize for-in loop */
2302 case XC( OC_WALKINIT ):
2303 hashwalk_init(L.v, iamarray(R.v));
2306 /* get next array item */
2307 case XC( OC_WALKNEXT ):
2308 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2311 case XC( OC_PRINT ):
2312 case XC( OC_PRINTF ):
2315 X.rsm = newfile(R.s);
2318 X.rsm->F = popen(R.s, "w");
2319 if (X.rsm->F == NULL)
2320 bb_perror_msg_and_die("popen");
2323 X.rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2329 if ((opinfo & OPCLSMASK) == OC_PRINT) {
2331 fputs(getvar_s(intvar[F0]), X.F);
2334 L.v = evaluate(nextarg(&op1), v1);
2335 if (L.v->type & VF_NUMBER) {
2336 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2337 getvar_i(L.v), TRUE);
2340 fputs(getvar_s(L.v), X.F);
2343 if (op1) fputs(getvar_s(intvar[OFS]), X.F);
2346 fputs(getvar_s(intvar[ORS]), X.F);
2348 } else { /* OC_PRINTF */
2349 L.s = awk_printf(op1);
2356 case XC( OC_DELETE ):
2357 X.info = op1->info & OPCLSMASK;
2358 if (X.info == OC_VAR) {
2360 } else if (X.info == OC_FNARG) {
2361 R.v = &fnargs[op1->l.i];
2363 syntax_error(EMSG_NOT_ARRAY);
2368 L.s = getvar_s(evaluate(op1->r.n, v1));
2369 hash_remove(iamarray(R.v), L.s);
2371 clear_array(iamarray(R.v));
2375 case XC( OC_NEWSOURCE ):
2376 g_progname = op->l.s;
2379 case XC( OC_RETURN ):
2383 case XC( OC_NEXTFILE ):
2394 /* -- recursive node type -- */
2398 if (L.v == intvar[NF])
2402 case XC( OC_FNARG ):
2403 L.v = &fnargs[op->l.i];
2405 res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2409 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2412 case XC( OC_REGEXP ):
2414 L.s = getvar_s(intvar[F0]);
2417 case XC( OC_MATCH ):
2420 X.re = as_regex(op1, &sreg);
2421 R.i = regexec(X.re, L.s, 0, NULL, 0);
2422 if (X.re == &sreg) regfree(X.re);
2423 setvar_i(res, (R.i == 0 ? 1 : 0) ^ (opn == '!' ? 1 : 0));
2427 /* if source is a temporary string, jusk relink it to dest */
2428 if (R.v == v1+1 && R.v->string) {
2429 res = setvar_p(L.v, R.v->string);
2432 res = copyvar(L.v, R.v);
2436 case XC( OC_TERNARY ):
2437 if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2438 syntax_error(EMSG_POSSIBLE_ERROR);
2439 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2443 if (!op->r.f->body.first)
2444 syntax_error(EMSG_UNDEF_FUNC);
2446 X.v = R.v = nvalloc(op->r.f->nargs+1);
2448 L.v = evaluate(nextarg(&op1), v1);
2450 R.v->type |= VF_CHILD;
2451 R.v->x.parent = L.v;
2452 if (++R.v - X.v >= op->r.f->nargs)
2460 res = evaluate(op->r.f->body.first, res);
2467 case XC( OC_GETLINE ):
2468 case XC( OC_PGETLINE ):
2470 X.rsm = newfile(L.s);
2472 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2473 X.rsm->F = popen(L.s, "r");
2474 X.rsm->is_pipe = TRUE;
2476 X.rsm->F = fopen_for_read(L.s); /* not xfopen! */
2480 if (!iF) iF = next_input_file();
2485 setvar_i(intvar[ERRNO], errno);
2493 L.i = awk_getline(X.rsm, R.v);
2496 incvar(intvar[FNR]);
2503 /* simple builtins */
2504 case XC( OC_FBLTIN ):
2512 R.d = (double)rand() / (double)RAND_MAX;
2514 #if ENABLE_FEATURE_AWK_LIBM
2540 syntax_error(EMSG_NO_MATH);
2545 seed = op1 ? (unsigned)L.d : (unsigned)time(NULL);
2555 L.s = getvar_s(intvar[F0]);
2561 R.d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2562 ? (system(L.s) >> 8) : 0;
2570 X.rsm = newfile(L.s);
2579 X.rsm = (rstream *)hash_search(fdhash, L.s);
2581 R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
2582 free(X.rsm->buffer);
2583 hash_remove(fdhash, L.s);
2586 setvar_i(intvar[ERRNO], errno);
2593 case XC( OC_BUILTIN ):
2594 res = exec_builtin(op, res);
2597 case XC( OC_SPRINTF ):
2598 setvar_p(res, awk_printf(op1));
2601 case XC( OC_UNARY ):
2603 L.d = R.d = getvar_i(R.v);
2618 L.d = istrue(X.v) ? 0 : 1;
2629 case XC( OC_FIELD ):
2630 R.i = (int)getvar_i(R.v);
2637 res = &Fields[R.i - 1];
2641 /* concatenation (" ") and index joining (",") */
2642 case XC( OC_CONCAT ):
2643 case XC( OC_COMMA ):
2644 opn = strlen(L.s) + strlen(R.s) + 2;
2647 if ((opinfo & OPCLSMASK) == OC_COMMA) {
2648 L.s = getvar_s(intvar[SUBSEP]);
2649 X.s = xrealloc(X.s, opn + strlen(L.s));
2657 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2661 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2664 case XC( OC_BINARY ):
2665 case XC( OC_REPLACE ):
2666 R.d = getvar_i(R.v);
2678 if (R.d == 0) syntax_error(EMSG_DIV_BY_ZERO);
2682 #if ENABLE_FEATURE_AWK_LIBM
2683 L.d = pow(L.d, R.d);
2685 syntax_error(EMSG_NO_MATH);
2689 if (R.d == 0) syntax_error(EMSG_DIV_BY_ZERO);
2690 L.d -= (int)(L.d / R.d) * R.d;
2693 res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2696 case XC( OC_COMPARE ):
2697 if (is_numeric(L.v) && is_numeric(R.v)) {
2698 L.d = getvar_i(L.v) - getvar_i(R.v);
2700 L.s = getvar_s(L.v);
2701 R.s = getvar_s(R.v);
2702 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2704 switch (opn & 0xfe) {
2715 setvar_i(res, (opn & 0x1 ? R.i : !R.i) ? 1 : 0);
2719 syntax_error(EMSG_POSSIBLE_ERROR);
2721 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2723 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2736 /* -------- main & co. -------- */
2738 static int awk_exit(int r)
2749 evaluate(endseq.first, &tv);
2752 /* waiting for children */
2753 for (i = 0; i < fdhash->csize; i++) {
2754 hi = fdhash->items[i];
2756 if (hi->data.rs.F && hi->data.rs.is_pipe)
2757 pclose(hi->data.rs.F);
2765 /* if expr looks like "var=value", perform assignment and return 1,
2766 * otherwise return 0 */
2767 static int is_assignment(const char *expr)
2769 char *exprc, *s, *s0, *s1;
2771 exprc = xstrdup(expr);
2772 if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2780 *(s1++) = nextchar(&s);
2783 setvar_u(newvar(exprc), s0);
2788 /* switch to next input file */
2789 static rstream *next_input_file(void)
2791 #define rsm (G.next_input_file__rsm)
2792 #define files_happen (G.next_input_file__files_happen)
2795 const char *fname, *ind;
2797 if (rsm.F) fclose(rsm.F);
2799 rsm.pos = rsm.adv = 0;
2802 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
2808 ind = getvar_s(incvar(intvar[ARGIND]));
2809 fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
2810 if (fname && *fname && !is_assignment(fname))
2811 F = xfopen_stdin(fname);
2815 files_happen = TRUE;
2816 setvar_s(intvar[FILENAME], fname);
2823 int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
2824 int awk_main(int argc, char **argv)
2827 char *opt_F, *opt_W;
2828 llist_t *list_v = NULL;
2829 llist_t *list_f = NULL;
2834 char *vnames = (char *)vNames; /* cheat */
2835 char *vvalues = (char *)vValues;
2839 /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
2840 * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
2841 if (ENABLE_LOCALE_SUPPORT)
2842 setlocale(LC_NUMERIC, "C");
2846 /* allocate global buffer */
2847 g_buf = xmalloc(MAXVARFMT + 1);
2849 vhash = hash_init();
2850 ahash = hash_init();
2851 fdhash = hash_init();
2852 fnhash = hash_init();
2854 /* initialize variables */
2855 for (i = 0; *vnames; i++) {
2856 intvar[i] = v = newvar(nextword(&vnames));
2857 if (*vvalues != '\377')
2858 setvar_s(v, nextword(&vvalues));
2862 if (*vnames == '*') {
2863 v->type |= VF_SPECIAL;
2868 handle_special(intvar[FS]);
2869 handle_special(intvar[RS]);
2871 newfile("/dev/stdin")->F = stdin;
2872 newfile("/dev/stdout")->F = stdout;
2873 newfile("/dev/stderr")->F = stderr;
2875 /* Huh, people report that sometimes environ is NULL. Oh well. */
2876 if (environ) for (envp = environ; *envp; envp++) {
2877 /* environ is writable, thus we don't strdup it needlessly */
2879 char *s1 = strchr(s, '=');
2882 /* Both findvar and setvar_u take const char*
2883 * as 2nd arg -> environment is not trashed */
2884 setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
2888 opt_complementary = "v::f::"; /* -v and -f can occur multiple times */
2889 opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, &opt_W);
2893 setvar_s(intvar[FS], opt_F); // -F
2894 while (list_v) { /* -v */
2895 if (!is_assignment(llist_pop(&list_v)))
2898 if (list_f) { /* -f */
2903 g_progname = llist_pop(&list_f);
2904 from_file = xfopen_stdin(g_progname);
2905 /* one byte is reserved for some trick in next_token */
2906 for (i = j = 1; j > 0; i += j) {
2907 s = xrealloc(s, i + 4096);
2908 j = fread(s + i, 1, 4094, from_file);
2912 parse_program(s + 1);
2916 } else { // no -f: take program from 1st parameter
2919 g_progname = "cmd. line";
2920 parse_program(*argv++);
2922 if (opt & 0x8) // -W
2923 bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
2925 /* fill in ARGV array */
2926 setvar_i(intvar[ARGC], argc);
2927 setari_u(intvar[ARGV], 0, "awk");
2930 setari_u(intvar[ARGV], ++i, *argv++);
2932 evaluate(beginseq.first, &tv);
2933 if (!mainseq.first && !endseq.first)
2934 awk_exit(EXIT_SUCCESS);
2936 /* input file could already be opened in BEGIN block */
2937 if (!iF) iF = next_input_file();
2939 /* passing through input files */
2942 setvar_i(intvar[FNR], 0);
2944 while ((i = awk_getline(iF, intvar[F0])) > 0) {
2947 incvar(intvar[FNR]);
2948 evaluate(mainseq.first, &tv);
2955 syntax_error(strerror(errno));
2957 iF = next_input_file();
2960 awk_exit(EXIT_SUCCESS);