1 /* vi: set sw=4 ts=4: */
3 * awk implementation for busybox
5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
7 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
14 /* This is a NOEXEC applet. Be very careful! */
17 /* If you comment out one of these below, it will be #defined later
18 * to perform debug printfs to stderr: */
19 #define debug_printf_walker(...) do {} while (0)
20 #define debug_printf_eval(...) do {} while (0)
22 #ifndef debug_printf_walker
23 # define debug_printf_walker(...) (fprintf(stderr, __VA_ARGS__))
25 #ifndef debug_printf_eval
26 # define debug_printf_eval(...) (fprintf(stderr, __VA_ARGS__))
35 #define VF_NUMBER 0x0001 /* 1 = primary type is number */
36 #define VF_ARRAY 0x0002 /* 1 = it's an array */
38 #define VF_CACHED 0x0100 /* 1 = num/str value has cached str/num eq */
39 #define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
40 #define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
41 #define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
42 #define VF_FSTR 0x1000 /* 1 = var::string points to fstring buffer */
43 #define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
44 #define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
46 /* these flags are static, don't change them when value is changed */
47 #define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
49 typedef struct walker_list {
52 struct walker_list *prev;
57 typedef struct var_s {
58 unsigned type; /* flags */
62 int aidx; /* func arg idx (for compilation stage) */
63 struct xhash_s *array; /* array ptr */
64 struct var_s *parent; /* for func args, ptr to actual parameter */
65 walker_list *walker; /* list of array elements (for..in) */
69 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
70 typedef struct chain_s {
73 const char *programname;
77 typedef struct func_s {
83 typedef struct rstream_s {
92 typedef struct hash_item_s {
94 struct var_s v; /* variable/array hash */
95 struct rstream_s rs; /* redirect streams hash */
96 struct func_s f; /* functions hash */
98 struct hash_item_s *next; /* next in chain */
99 char name[1]; /* really it's longer */
102 typedef struct xhash_s {
103 unsigned nel; /* num of elements */
104 unsigned csize; /* current hash size */
105 unsigned nprime; /* next hash size in PRIMES[] */
106 unsigned glen; /* summary length of item names */
107 struct hash_item_s **items;
111 typedef struct node_s {
131 /* Block of temporary variables */
132 typedef struct nvblock_s {
135 struct nvblock_s *prev;
136 struct nvblock_s *next;
140 typedef struct tsplitter_s {
145 /* simple token classes */
146 /* Order and hex values are very important!!! See next_token() */
147 #define TC_SEQSTART 1 /* ( */
148 #define TC_SEQTERM (1 << 1) /* ) */
149 #define TC_REGEXP (1 << 2) /* /.../ */
150 #define TC_OUTRDR (1 << 3) /* | > >> */
151 #define TC_UOPPOST (1 << 4) /* unary postfix operator */
152 #define TC_UOPPRE1 (1 << 5) /* unary prefix operator */
153 #define TC_BINOPX (1 << 6) /* two-opnd operator */
154 #define TC_IN (1 << 7)
155 #define TC_COMMA (1 << 8)
156 #define TC_PIPE (1 << 9) /* input redirection pipe */
157 #define TC_UOPPRE2 (1 << 10) /* unary prefix operator */
158 #define TC_ARRTERM (1 << 11) /* ] */
159 #define TC_GRPSTART (1 << 12) /* { */
160 #define TC_GRPTERM (1 << 13) /* } */
161 #define TC_SEMICOL (1 << 14)
162 #define TC_NEWLINE (1 << 15)
163 #define TC_STATX (1 << 16) /* ctl statement (for, next...) */
164 #define TC_WHILE (1 << 17)
165 #define TC_ELSE (1 << 18)
166 #define TC_BUILTIN (1 << 19)
167 #define TC_GETLINE (1 << 20)
168 #define TC_FUNCDECL (1 << 21) /* `function' `func' */
169 #define TC_BEGIN (1 << 22)
170 #define TC_END (1 << 23)
171 #define TC_EOF (1 << 24)
172 #define TC_VARIABLE (1 << 25)
173 #define TC_ARRAY (1 << 26)
174 #define TC_FUNCTION (1 << 27)
175 #define TC_STRING (1 << 28)
176 #define TC_NUMBER (1 << 29)
178 #define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
180 /* combined token classes */
181 #define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
182 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
183 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
184 | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
186 #define TC_STATEMNT (TC_STATX | TC_WHILE)
187 #define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
189 /* word tokens, cannot mean something else if not expected */
190 #define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
191 | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
193 /* discard newlines after these */
194 #define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
195 | TC_BINOP | TC_OPTERM)
197 /* what can expression begin with */
198 #define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
199 /* what can group begin with */
200 #define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
202 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
203 /* operator is inserted between them */
204 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
205 | TC_STRING | TC_NUMBER | TC_UOPPOST)
206 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
208 #define OF_RES1 0x010000
209 #define OF_RES2 0x020000
210 #define OF_STR1 0x040000
211 #define OF_STR2 0x080000
212 #define OF_NUM1 0x100000
213 #define OF_CHECKED 0x200000
215 /* combined operator flags */
218 #define xS (OF_RES2 | OF_STR2)
220 #define VV (OF_RES1 | OF_RES2)
221 #define Nx (OF_RES1 | OF_NUM1)
222 #define NV (OF_RES1 | OF_NUM1 | OF_RES2)
223 #define Sx (OF_RES1 | OF_STR1)
224 #define SV (OF_RES1 | OF_STR1 | OF_RES2)
225 #define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
227 #define OPCLSMASK 0xFF00
228 #define OPNMASK 0x007F
230 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
231 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
232 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
234 #define P(x) (x << 24)
235 #define PRIMASK 0x7F000000
236 #define PRIMASK2 0x7E000000
238 /* Operation classes */
240 #define SHIFT_TIL_THIS 0x0600
241 #define RECUR_FROM_THIS 0x1000
244 OC_DELETE = 0x0100, OC_EXEC = 0x0200, OC_NEWSOURCE = 0x0300,
245 OC_PRINT = 0x0400, OC_PRINTF = 0x0500, OC_WALKINIT = 0x0600,
247 OC_BR = 0x0700, OC_BREAK = 0x0800, OC_CONTINUE = 0x0900,
248 OC_EXIT = 0x0a00, OC_NEXT = 0x0b00, OC_NEXTFILE = 0x0c00,
249 OC_TEST = 0x0d00, OC_WALKNEXT = 0x0e00,
251 OC_BINARY = 0x1000, OC_BUILTIN = 0x1100, OC_COLON = 0x1200,
252 OC_COMMA = 0x1300, OC_COMPARE = 0x1400, OC_CONCAT = 0x1500,
253 OC_FBLTIN = 0x1600, OC_FIELD = 0x1700, OC_FNARG = 0x1800,
254 OC_FUNC = 0x1900, OC_GETLINE = 0x1a00, OC_IN = 0x1b00,
255 OC_LAND = 0x1c00, OC_LOR = 0x1d00, OC_MATCH = 0x1e00,
256 OC_MOVE = 0x1f00, OC_PGETLINE = 0x2000, OC_REGEXP = 0x2100,
257 OC_REPLACE = 0x2200, OC_RETURN = 0x2300, OC_SPRINTF = 0x2400,
258 OC_TERNARY = 0x2500, OC_UNARY = 0x2600, OC_VAR = 0x2700,
261 ST_IF = 0x3000, ST_DO = 0x3100, ST_FOR = 0x3200,
265 /* simple builtins */
267 F_in, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
268 F_ti, F_le, F_sy, F_ff, F_cl
273 B_a2, B_ix, B_ma, B_sp, B_ss, B_ti, B_mt, B_lo, B_up,
275 B_an, B_co, B_ls, B_or, B_rs, B_xo,
278 /* tokens and their corresponding info values */
280 #define NTC "\377" /* switch to next token class (tc<<1) */
283 #define OC_B OC_BUILTIN
285 static const char tokenlist[] ALIGN1 =
288 "\1/" NTC /* REGEXP */
289 "\2>>" "\1>" "\1|" NTC /* OUTRDR */
290 "\2++" "\2--" NTC /* UOPPOST */
291 "\2++" "\2--" "\1$" NTC /* UOPPRE1 */
292 "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */
293 "\2*=" "\2/=" "\2%=" "\2^="
294 "\1+" "\1-" "\3**=" "\2**"
295 "\1/" "\1%" "\1^" "\1*"
296 "\2!=" "\2>=" "\2<=" "\1>"
297 "\1<" "\2!~" "\1~" "\2&&"
298 "\2||" "\1?" "\1:" NTC
302 "\1+" "\1-" "\1!" NTC /* UOPPRE2 */
308 "\2if" "\2do" "\3for" "\5break" /* STATX */
309 "\10continue" "\6delete" "\5print"
310 "\6printf" "\4next" "\10nextfile"
311 "\6return" "\4exit" NTC
315 "\3and" "\5compl" "\6lshift" "\2or"
317 "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */
318 "\3cos" "\3exp" "\3int" "\3log"
319 "\4rand" "\3sin" "\4sqrt" "\5srand"
320 "\6gensub" "\4gsub" "\5index" "\6length"
321 "\5match" "\5split" "\7sprintf" "\3sub"
322 "\6substr" "\7systime" "\10strftime" "\6mktime"
323 "\7tolower" "\7toupper" NTC
325 "\4func" "\10function" NTC
328 /* compiler adds trailing "\0" */
331 static const uint32_t tokeninfo[] = {
335 xS|'a', xS|'w', xS|'|',
336 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
337 OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M', OC_FIELD|xV|P(5),
338 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74), OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
339 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/', OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
340 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-', OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
341 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%', OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
342 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3, OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
343 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!', OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
344 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?', OC_COLON|xx|P(67)|':',
345 OC_IN|SV|P(49), /* in */
347 OC_PGETLINE|SV|P(37),
348 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-', OC_UNARY|xV|P(19)|'!',
354 ST_IF, ST_DO, ST_FOR, OC_BREAK,
355 OC_CONTINUE, OC_DELETE|Vx, OC_PRINT,
356 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
357 OC_RETURN|Vx, OC_EXIT|Nx,
361 OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
362 OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
363 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
364 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
365 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
366 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
367 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
368 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b),
369 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
376 /* internal variable names and their initial values */
377 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
379 CONVFMT, OFMT, FS, OFS,
380 ORS, RS, RT, FILENAME,
381 SUBSEP, F0, ARGIND, ARGC,
382 ARGV, ERRNO, FNR, NR,
383 NF, IGNORECASE, ENVIRON, NUM_INTERNAL_VARS
386 static const char vNames[] ALIGN1 =
387 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
388 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
389 "SUBSEP\0" "$\0*" "ARGIND\0" "ARGC\0"
390 "ARGV\0" "ERRNO\0" "FNR\0" "NR\0"
391 "NF\0*" "IGNORECASE\0*" "ENVIRON\0" "\0";
393 static const char vValues[] ALIGN1 =
394 "%.6g\0" "%.6g\0" " \0" " \0"
395 "\n\0" "\n\0" "\0" "\0"
396 "\034\0" "\0" "\377";
398 /* hash size may grow to these values */
399 #define FIRST_PRIME 61
400 static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
403 /* Globals. Split in two parts so that first one is addressed
404 * with (mostly short) negative offsets.
405 * NB: it's unsafe to put members of type "double"
406 * into globals2 (gcc may fail to align them).
410 chain beginseq, mainseq, endseq;
412 node *break_ptr, *continue_ptr;
414 xhash *vhash, *ahash, *fdhash, *fnhash;
415 const char *g_progname;
418 int maxfields; /* used in fsrealloc() only */
427 smallint is_f0_split;
430 uint32_t t_info; /* often used */
436 var *intvar[NUM_INTERNAL_VARS]; /* often used */
438 /* former statics from various functions */
439 char *split_f0__fstrings;
441 uint32_t next_token__save_tclass;
442 uint32_t next_token__save_info;
443 uint32_t next_token__ltclass;
444 smallint next_token__concat_inserted;
446 smallint next_input_file__files_happen;
447 rstream next_input_file__rsm;
449 var *evaluate__fnargs;
450 unsigned evaluate__seed;
451 regex_t evaluate__sreg;
455 tsplitter exec_builtin__tspl;
457 /* biggest and least used members go last */
458 tsplitter fsplitter, rsplitter;
460 #define G1 (ptr_to_globals[-1])
461 #define G (*(struct globals2 *)ptr_to_globals)
462 /* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
463 /*char G1size[sizeof(G1)]; - 0x74 */
464 /*char Gsize[sizeof(G)]; - 0x1c4 */
465 /* Trying to keep most of members accessible with short offsets: */
466 /*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
467 #define t_double (G1.t_double )
468 #define beginseq (G1.beginseq )
469 #define mainseq (G1.mainseq )
470 #define endseq (G1.endseq )
471 #define seq (G1.seq )
472 #define break_ptr (G1.break_ptr )
473 #define continue_ptr (G1.continue_ptr)
475 #define vhash (G1.vhash )
476 #define ahash (G1.ahash )
477 #define fdhash (G1.fdhash )
478 #define fnhash (G1.fnhash )
479 #define g_progname (G1.g_progname )
480 #define g_lineno (G1.g_lineno )
481 #define nfields (G1.nfields )
482 #define maxfields (G1.maxfields )
483 #define Fields (G1.Fields )
484 #define g_cb (G1.g_cb )
485 #define g_pos (G1.g_pos )
486 #define g_buf (G1.g_buf )
487 #define icase (G1.icase )
488 #define exiting (G1.exiting )
489 #define nextrec (G1.nextrec )
490 #define nextfile (G1.nextfile )
491 #define is_f0_split (G1.is_f0_split )
492 #define t_info (G.t_info )
493 #define t_tclass (G.t_tclass )
494 #define t_string (G.t_string )
495 #define t_lineno (G.t_lineno )
496 #define t_rollback (G.t_rollback )
497 #define intvar (G.intvar )
498 #define fsplitter (G.fsplitter )
499 #define rsplitter (G.rsplitter )
500 #define INIT_G() do { \
501 SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
502 G.next_token__ltclass = TC_OPTERM; \
503 G.evaluate__seed = 1; \
507 /* function prototypes */
508 static void handle_special(var *);
509 static node *parse_expr(uint32_t);
510 static void chain_group(void);
511 static var *evaluate(node *, var *);
512 static rstream *next_input_file(void);
513 static int fmt_num(char *, int, const char *, double, int);
514 static int awk_exit(int) NORETURN;
516 /* ---- error handling ---- */
518 static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
519 static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
520 static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
521 static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
522 static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
523 static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin";
524 static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
525 static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
526 static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
527 #if !ENABLE_FEATURE_AWK_LIBM
528 static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
531 static void zero_out_var(var *vp)
533 memset(vp, 0, sizeof(*vp));
536 static void syntax_error(const char *message) NORETURN;
537 static void syntax_error(const char *message)
539 bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
542 /* ---- hash stuff ---- */
544 static unsigned hashidx(const char *name)
549 idx = *name++ + (idx << 6) - idx;
553 /* create new hash */
554 static xhash *hash_init(void)
558 newhash = xzalloc(sizeof(*newhash));
559 newhash->csize = FIRST_PRIME;
560 newhash->items = xzalloc(FIRST_PRIME * sizeof(newhash->items[0]));
565 /* find item in hash, return ptr to data, NULL if not found */
566 static void *hash_search(xhash *hash, const char *name)
570 hi = hash->items[hashidx(name) % hash->csize];
572 if (strcmp(hi->name, name) == 0)
579 /* grow hash if it becomes too big */
580 static void hash_rebuild(xhash *hash)
582 unsigned newsize, i, idx;
583 hash_item **newitems, *hi, *thi;
585 if (hash->nprime == ARRAY_SIZE(PRIMES))
588 newsize = PRIMES[hash->nprime++];
589 newitems = xzalloc(newsize * sizeof(newitems[0]));
591 for (i = 0; i < hash->csize; i++) {
596 idx = hashidx(thi->name) % newsize;
597 thi->next = newitems[idx];
603 hash->csize = newsize;
604 hash->items = newitems;
607 /* find item in hash, add it if necessary. Return ptr to data */
608 static void *hash_find(xhash *hash, const char *name)
614 hi = hash_search(hash, name);
616 if (++hash->nel / hash->csize > 10)
619 l = strlen(name) + 1;
620 hi = xzalloc(sizeof(*hi) + l);
621 strcpy(hi->name, name);
623 idx = hashidx(name) % hash->csize;
624 hi->next = hash->items[idx];
625 hash->items[idx] = hi;
631 #define findvar(hash, name) ((var*) hash_find((hash), (name)))
632 #define newvar(name) ((var*) hash_find(vhash, (name)))
633 #define newfile(name) ((rstream*)hash_find(fdhash, (name)))
634 #define newfunc(name) ((func*) hash_find(fnhash, (name)))
636 static void hash_remove(xhash *hash, const char *name)
638 hash_item *hi, **phi;
640 phi = &hash->items[hashidx(name) % hash->csize];
643 if (strcmp(hi->name, name) == 0) {
644 hash->glen -= (strlen(name) + 1);
654 /* ------ some useful functions ------ */
656 static char *skip_spaces(char *p)
659 if (*p == '\\' && p[1] == '\n') {
662 } else if (*p != ' ' && *p != '\t') {
670 /* returns old *s, advances *s past word and terminating NUL */
671 static char *nextword(char **s)
674 while (*(*s)++ != '\0')
679 static char nextchar(char **s)
686 c = bb_process_escape_sequence((const char**)s);
687 if (c == '\\' && *s == pps) { /* unrecognized \z? */
688 c = *(*s); /* yes, fetch z */
690 (*s)++; /* advance unless z = NUL */
695 static ALWAYS_INLINE int isalnum_(int c)
697 return (isalnum(c) || c == '_');
700 static double my_strtod(char **pp)
705 /* Might be hex or octal integer: 0x123abc or 07777 */
706 char c = (cp[1] | 0x20);
707 if (c == 'x' || isdigit(cp[1])) {
708 unsigned long long ull = strtoull(cp, pp, 0);
712 if (!isdigit(c) && c != '.')
714 /* else: it may be a floating number. Examples:
715 * 009.123 (*pp points to '9')
716 * 000.123 (*pp points to '.')
717 * fall through to strtod.
722 return strtod(cp, pp);
725 /* -------- working with variables (set/get/copy/etc) -------- */
727 static xhash *iamarray(var *v)
731 while (a->type & VF_CHILD)
734 if (!(a->type & VF_ARRAY)) {
736 a->x.array = hash_init();
741 static void clear_array(xhash *array)
746 for (i = 0; i < array->csize; i++) {
747 hi = array->items[i];
751 free(thi->data.v.string);
754 array->items[i] = NULL;
756 array->glen = array->nel = 0;
759 /* clear a variable */
760 static var *clrvar(var *v)
762 if (!(v->type & VF_FSTR))
765 v->type &= VF_DONTTOUCH;
771 /* assign string value to variable */
772 static var *setvar_p(var *v, char *value)
780 /* same as setvar_p but make a copy of string */
781 static var *setvar_s(var *v, const char *value)
783 return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
786 /* same as setvar_s but sets USER flag */
787 static var *setvar_u(var *v, const char *value)
789 v = setvar_s(v, value);
794 /* set array element to user string */
795 static void setari_u(var *a, int idx, const char *s)
799 v = findvar(iamarray(a), itoa(idx));
803 /* assign numeric value to variable */
804 static var *setvar_i(var *v, double value)
807 v->type |= VF_NUMBER;
813 static const char *getvar_s(var *v)
815 /* if v is numeric and has no cached string, convert it to string */
816 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
817 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
818 v->string = xstrdup(g_buf);
819 v->type |= VF_CACHED;
821 return (v->string == NULL) ? "" : v->string;
824 static double getvar_i(var *v)
828 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
832 debug_printf_eval("getvar_i: '%s'->", s);
833 v->number = my_strtod(&s);
834 debug_printf_eval("%f (s:'%s')\n", v->number, s);
835 if (v->type & VF_USER) {
841 debug_printf_eval("getvar_i: '%s'->zero\n", s);
844 v->type |= VF_CACHED;
846 debug_printf_eval("getvar_i: %f\n", v->number);
850 /* Used for operands of bitwise ops */
851 static unsigned long getvar_i_int(var *v)
853 double d = getvar_i(v);
855 /* Casting doubles to longs is undefined for values outside
856 * of target type range. Try to widen it as much as possible */
858 return (unsigned long)d;
859 /* Why? Think about d == -4294967295.0 (assuming 32bit longs) */
860 return - (long) (unsigned long) (-d);
863 static var *copyvar(var *dest, const var *src)
867 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
868 debug_printf_eval("copyvar: number:%f string:'%s'\n", src->number, src->string);
869 dest->number = src->number;
871 dest->string = xstrdup(src->string);
873 handle_special(dest);
877 static var *incvar(var *v)
879 return setvar_i(v, getvar_i(v) + 1.0);
882 /* return true if v is number or numeric string */
883 static int is_numeric(var *v)
886 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
889 /* return 1 when value of v corresponds to true, 0 otherwise */
890 static int istrue(var *v)
893 return (v->number != 0);
894 return (v->string && v->string[0]);
897 /* temporary variables allocator. Last allocated should be first freed */
898 static var *nvalloc(int n)
906 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size)
912 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
913 g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
915 g_cb->pos = g_cb->nv;
917 /*g_cb->next = NULL; - xzalloc did it */
925 while (v < g_cb->pos) {
934 static void nvfree(var *v)
938 if (v < g_cb->nv || v >= g_cb->pos)
939 syntax_error(EMSG_INTERNAL_ERROR);
941 for (p = v; p < g_cb->pos; p++) {
942 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
943 clear_array(iamarray(p));
944 free(p->x.array->items);
947 if (p->type & VF_WALK) {
949 walker_list *w = p->x.walker;
950 debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker);
954 debug_printf_walker(" free(%p)\n", w);
963 while (g_cb->prev && g_cb->pos == g_cb->nv) {
968 /* ------- awk program text parsing ------- */
970 /* Parse next token pointed by global pos, place results into global ttt.
971 * If token isn't expected, give away. Return token class
973 static uint32_t next_token(uint32_t expected)
975 #define concat_inserted (G.next_token__concat_inserted)
976 #define save_tclass (G.next_token__save_tclass)
977 #define save_info (G.next_token__save_info)
978 /* Initialized to TC_OPTERM: */
979 #define ltclass (G.next_token__ltclass)
989 } else if (concat_inserted) {
990 concat_inserted = FALSE;
991 t_tclass = save_tclass;
1000 while (*p != '\n' && *p != '\0')
1009 } else if (*p == '\"') {
1012 while (*p != '\"') {
1014 if (*p == '\0' || *p == '\n')
1015 syntax_error(EMSG_UNEXP_EOS);
1017 *s++ = nextchar(&pp);
1024 } else if ((expected & TC_REGEXP) && *p == '/') {
1028 if (*p == '\0' || *p == '\n')
1029 syntax_error(EMSG_UNEXP_EOS);
1033 s[-1] = bb_process_escape_sequence((const char **)&pp);
1046 } else if (*p == '.' || isdigit(*p)) {
1049 t_double = my_strtod(&pp);
1052 syntax_error(EMSG_UNEXP_TOKEN);
1056 /* search for something known */
1061 int l = (unsigned char) *tl++;
1062 if (l == (unsigned char) NTCC) {
1066 /* if token class is expected,
1068 * and it's not a longer word,
1070 if ((tc & (expected | TC_WORD | TC_NEWLINE))
1071 && strncmp(p, tl, l) == 0
1072 && !((tc & TC_WORD) && isalnum_(p[l]))
1074 /* then this is what we are looking for */
1082 /* not a known token */
1084 /* is it a name? (var/array/function) */
1086 syntax_error(EMSG_UNEXP_TOKEN); /* no */
1089 while (isalnum_(*++p)) {
1094 /* also consume whitespace between functionname and bracket */
1095 if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY))
1109 /* skipping newlines in some cases */
1110 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1113 /* insert concatenation operator when needed */
1114 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1115 concat_inserted = TRUE;
1119 t_info = OC_CONCAT | SS | P(35);
1126 /* Are we ready for this? */
1127 if (!(ltclass & expected))
1128 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1129 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1132 #undef concat_inserted
1138 static void rollback_token(void)
1143 static node *new_node(uint32_t info)
1147 n = xzalloc(sizeof(node));
1149 n->lineno = g_lineno;
1153 static void mk_re_node(const char *s, node *n, regex_t *re)
1155 n->info = OC_REGEXP;
1158 xregcomp(re, s, REG_EXTENDED);
1159 xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1162 static node *condition(void)
1164 next_token(TC_SEQSTART);
1165 return parse_expr(TC_SEQTERM);
1168 /* parse expression terminated by given argument, return ptr
1169 * to built subtree. Terminator is eaten by parse_expr */
1170 static node *parse_expr(uint32_t iexp)
1179 sn.r.n = glptr = NULL;
1180 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1182 while (!((tc = next_token(xtc)) & iexp)) {
1184 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1185 /* input redirection (<) attached to glptr node */
1186 cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1188 xtc = TC_OPERAND | TC_UOPPRE;
1191 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1192 /* for binary and postfix-unary operators, jump back over
1193 * previous operators with higher priority */
1195 while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1196 || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON))
1200 if ((t_info & OPCLSMASK) == OC_TERNARY)
1202 cn = vn->a.n->r.n = new_node(t_info);
1204 if (tc & TC_BINOP) {
1206 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1207 if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1209 next_token(TC_GETLINE);
1210 /* give maximum priority to this pipe */
1211 cn->info &= ~PRIMASK;
1212 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1216 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1221 /* for operands and prefix-unary operators, attach them
1224 cn = vn->r.n = new_node(t_info);
1226 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1227 if (tc & (TC_OPERAND | TC_REGEXP)) {
1228 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1229 /* one should be very careful with switch on tclass -
1230 * only simple tclasses should be used! */
1235 v = hash_search(ahash, t_string);
1237 cn->info = OC_FNARG;
1238 cn->l.aidx = v->x.aidx;
1240 cn->l.v = newvar(t_string);
1242 if (tc & TC_ARRAY) {
1244 cn->r.n = parse_expr(TC_ARRTERM);
1251 v = cn->l.v = xzalloc(sizeof(var));
1253 setvar_i(v, t_double);
1255 setvar_s(v, t_string);
1259 mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1264 cn->r.f = newfunc(t_string);
1265 cn->l.n = condition();
1269 cn = vn->r.n = parse_expr(TC_SEQTERM);
1275 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1279 cn->l.n = condition();
1288 /* add node to chain. Return ptr to alloc'd node */
1289 static node *chain_node(uint32_t info)
1294 seq->first = seq->last = new_node(0);
1296 if (seq->programname != g_progname) {
1297 seq->programname = g_progname;
1298 n = chain_node(OC_NEWSOURCE);
1299 n->l.new_progname = xstrdup(g_progname);
1304 seq->last = n->a.n = new_node(OC_DONE);
1309 static void chain_expr(uint32_t info)
1313 n = chain_node(info);
1314 n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1315 if (t_tclass & TC_GRPTERM)
1319 static node *chain_loop(node *nn)
1321 node *n, *n2, *save_brk, *save_cont;
1323 save_brk = break_ptr;
1324 save_cont = continue_ptr;
1326 n = chain_node(OC_BR | Vx);
1327 continue_ptr = new_node(OC_EXEC);
1328 break_ptr = new_node(OC_EXEC);
1330 n2 = chain_node(OC_EXEC | Vx);
1333 continue_ptr->a.n = n2;
1334 break_ptr->a.n = n->r.n = seq->last;
1336 continue_ptr = save_cont;
1337 break_ptr = save_brk;
1342 /* parse group and attach it to chain */
1343 static void chain_group(void)
1349 c = next_token(TC_GRPSEQ);
1350 } while (c & TC_NEWLINE);
1352 if (c & TC_GRPSTART) {
1353 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1354 if (t_tclass & TC_NEWLINE)
1359 } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1361 chain_expr(OC_EXEC | Vx);
1362 } else { /* TC_STATEMNT */
1363 switch (t_info & OPCLSMASK) {
1365 n = chain_node(OC_BR | Vx);
1366 n->l.n = condition();
1368 n2 = chain_node(OC_EXEC);
1370 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1372 n2->a.n = seq->last;
1380 n = chain_loop(NULL);
1385 n2 = chain_node(OC_EXEC);
1386 n = chain_loop(NULL);
1388 next_token(TC_WHILE);
1389 n->l.n = condition();
1393 next_token(TC_SEQSTART);
1394 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1395 if (t_tclass & TC_SEQTERM) { /* for-in */
1396 if ((n2->info & OPCLSMASK) != OC_IN)
1397 syntax_error(EMSG_UNEXP_TOKEN);
1398 n = chain_node(OC_WALKINIT | VV);
1401 n = chain_loop(NULL);
1402 n->info = OC_WALKNEXT | Vx;
1404 } else { /* for (;;) */
1405 n = chain_node(OC_EXEC | Vx);
1407 n2 = parse_expr(TC_SEMICOL);
1408 n3 = parse_expr(TC_SEQTERM);
1418 n = chain_node(t_info);
1419 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1420 if (t_tclass & TC_OUTRDR) {
1422 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1424 if (t_tclass & TC_GRPTERM)
1429 n = chain_node(OC_EXEC);
1434 n = chain_node(OC_EXEC);
1435 n->a.n = continue_ptr;
1438 /* delete, next, nextfile, return, exit */
1445 static void parse_program(char *p)
1454 while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1455 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1457 if (tclass & TC_OPTERM)
1461 if (tclass & TC_BEGIN) {
1465 } else if (tclass & TC_END) {
1469 } else if (tclass & TC_FUNCDECL) {
1470 next_token(TC_FUNCTION);
1472 f = newfunc(t_string);
1473 f->body.first = NULL;
1475 while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1476 v = findvar(ahash, t_string);
1477 v->x.aidx = f->nargs++;
1479 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1486 } else if (tclass & TC_OPSEQ) {
1488 cn = chain_node(OC_TEST);
1489 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1490 if (t_tclass & TC_GRPSTART) {
1494 chain_node(OC_PRINT);
1496 cn->r.n = mainseq.last;
1498 } else /* if (tclass & TC_GRPSTART) */ {
1506 /* -------- program execution part -------- */
1508 static node *mk_splitter(const char *s, tsplitter *spl)
1516 if ((n->info & OPCLSMASK) == OC_REGEXP) {
1518 regfree(ire); // TODO: nuke ire, use re+1?
1520 if (s[0] && s[1]) { /* strlen(s) > 1 */
1521 mk_re_node(s, n, re);
1523 n->info = (uint32_t) s[0];
1529 /* use node as a regular expression. Supplied with node ptr and regex_t
1530 * storage space. Return ptr to regex (if result points to preg, it should
1531 * be later regfree'd manually
1533 static regex_t *as_regex(node *op, regex_t *preg)
1539 if ((op->info & OPCLSMASK) == OC_REGEXP) {
1540 return icase ? op->r.ire : op->l.re;
1543 s = getvar_s(evaluate(op, v));
1545 cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
1546 /* Testcase where REG_EXTENDED fails (unpaired '{'):
1547 * echo Hi | awk 'gsub("@(samp|code|file)\{","");'
1548 * gawk 3.1.5 eats this. We revert to ~REG_EXTENDED
1549 * (maybe gsub is not supposed to use REG_EXTENDED?).
1551 if (regcomp(preg, s, cflags)) {
1552 cflags &= ~REG_EXTENDED;
1553 xregcomp(preg, s, cflags);
1559 /* gradually increasing buffer.
1560 * note that we reallocate even if n == old_size,
1561 * and thus there is at least one extra allocated byte.
1563 static char* qrealloc(char *b, int n, int *size)
1565 if (!b || n >= *size) {
1566 *size = n + (n>>1) + 80;
1567 b = xrealloc(b, *size);
1572 /* resize field storage space */
1573 static void fsrealloc(int size)
1577 if (size >= maxfields) {
1579 maxfields = size + 16;
1580 Fields = xrealloc(Fields, maxfields * sizeof(Fields[0]));
1581 for (; i < maxfields; i++) {
1582 Fields[i].type = VF_SPECIAL;
1583 Fields[i].string = NULL;
1586 /* if size < nfields, clear extra field variables */
1587 for (i = size; i < nfields; i++) {
1593 static int awk_split(const char *s, node *spl, char **slist)
1598 regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1600 /* in worst case, each char would be a separate field */
1601 *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1604 c[0] = c[1] = (char)spl->info;
1606 if (*getvar_s(intvar[RS]) == '\0')
1610 if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */
1612 return n; /* "": zero fields */
1613 n++; /* at least one field will be there */
1615 l = strcspn(s, c+2); /* len till next NUL or \n */
1616 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1617 && pmatch[0].rm_so <= l
1619 l = pmatch[0].rm_so;
1620 if (pmatch[0].rm_eo == 0) {
1624 n++; /* we saw yet another delimiter */
1626 pmatch[0].rm_eo = l;
1631 /* make sure we remove *all* of the separator chars */
1634 } while (++l < pmatch[0].rm_eo);
1636 s += pmatch[0].rm_eo;
1640 if (c[0] == '\0') { /* null split */
1648 if (c[0] != ' ') { /* single-character split */
1650 c[0] = toupper(c[0]);
1651 c[1] = tolower(c[1]);
1655 while ((s1 = strpbrk(s1, c)) != NULL) {
1663 s = skip_whitespace(s);
1667 while (*s && !isspace(*s))
1674 static void split_f0(void)
1676 /* static char *fstrings; */
1677 #define fstrings (G.split_f0__fstrings)
1688 n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1691 for (i = 0; i < n; i++) {
1692 Fields[i].string = nextword(&s);
1693 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1696 /* set NF manually to avoid side effects */
1698 intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1699 intvar[NF]->number = nfields;
1703 /* perform additional actions when some internal variables changed */
1704 static void handle_special(var *v)
1708 const char *sep, *s;
1709 int sl, l, len, i, bsize;
1711 if (!(v->type & VF_SPECIAL))
1714 if (v == intvar[NF]) {
1715 n = (int)getvar_i(v);
1718 /* recalculate $0 */
1719 sep = getvar_s(intvar[OFS]);
1723 for (i = 0; i < n; i++) {
1724 s = getvar_s(&Fields[i]);
1727 memcpy(b+len, sep, sl);
1730 b = qrealloc(b, len+l+sl, &bsize);
1731 memcpy(b+len, s, l);
1736 setvar_p(intvar[F0], b);
1739 } else if (v == intvar[F0]) {
1740 is_f0_split = FALSE;
1742 } else if (v == intvar[FS]) {
1743 mk_splitter(getvar_s(v), &fsplitter);
1745 } else if (v == intvar[RS]) {
1746 mk_splitter(getvar_s(v), &rsplitter);
1748 } else if (v == intvar[IGNORECASE]) {
1752 n = getvar_i(intvar[NF]);
1753 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1754 /* right here v is invalid. Just to note... */
1758 /* step through func/builtin/etc arguments */
1759 static node *nextarg(node **pn)
1764 if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1773 static void hashwalk_init(var *v, xhash *array)
1778 walker_list *prev_walker;
1780 if (v->type & VF_WALK) {
1781 prev_walker = v->x.walker;
1786 debug_printf_walker("hashwalk_init: prev_walker:%p\n", prev_walker);
1788 w = v->x.walker = xzalloc(sizeof(*w) + array->glen + 1); /* why + 1? */
1789 debug_printf_walker(" walker@%p=%p\n", &v->x.walker, w);
1790 w->cur = w->end = w->wbuf;
1791 w->prev = prev_walker;
1792 for (i = 0; i < array->csize; i++) {
1793 hi = array->items[i];
1795 strcpy(w->end, hi->name);
1802 static int hashwalk_next(var *v)
1804 walker_list *w = v->x.walker;
1806 if (w->cur >= w->end) {
1807 walker_list *prev_walker = w->prev;
1809 debug_printf_walker("end of iteration, free(walker@%p:%p), prev_walker:%p\n", &v->x.walker, w, prev_walker);
1811 v->x.walker = prev_walker;
1815 setvar_s(v, nextword(&w->cur));
1819 /* evaluate node, return 1 when result is true, 0 otherwise */
1820 static int ptest(node *pattern)
1822 /* ptest__v is "static": to save stack space? */
1823 return istrue(evaluate(pattern, &G.ptest__v));
1826 /* read next record from stream rsm into a variable v */
1827 static int awk_getline(rstream *rsm, var *v)
1830 regmatch_t pmatch[2];
1831 int size, a, p, pp = 0;
1832 int fd, so, eo, r, rp;
1835 debug_printf_eval("entered %s()\n", __func__);
1837 /* we're using our own buffer since we need access to accumulating
1840 fd = fileno(rsm->F);
1845 c = (char) rsplitter.n.info;
1849 m = qrealloc(m, 256, &size);
1856 if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1857 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1858 b, 1, pmatch, 0) == 0) {
1859 so = pmatch[0].rm_so;
1860 eo = pmatch[0].rm_eo;
1864 } else if (c != '\0') {
1865 s = strchr(b+pp, c);
1867 s = memchr(b+pp, '\0', p - pp);
1874 while (b[rp] == '\n')
1876 s = strstr(b+rp, "\n\n");
1879 while (b[eo] == '\n')
1888 memmove(m, m+a, p+1);
1893 m = qrealloc(m, a+p+128, &size);
1896 p += safe_read(fd, b+p, size-p-1);
1900 setvar_i(intvar[ERRNO], errno);
1909 c = b[so]; b[so] = '\0';
1913 c = b[eo]; b[eo] = '\0';
1914 setvar_s(intvar[RT], b+so);
1923 debug_printf_eval("returning from %s(): %d\n", __func__, r);
1928 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1932 const char *s = format;
1934 if (int_as_int && n == (int)n) {
1935 r = snprintf(b, size, "%d", (int)n);
1937 do { c = *s; } while (c && *++s);
1938 if (strchr("diouxX", c)) {
1939 r = snprintf(b, size, format, (int)n);
1940 } else if (strchr("eEfgG", c)) {
1941 r = snprintf(b, size, format, n);
1943 syntax_error(EMSG_INV_FMT);
1949 /* formatted output into an allocated buffer, return ptr to buffer */
1950 static char *awk_printf(node *n)
1955 int i, j, incr, bsize;
1960 fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1965 while (*f && (*f != '%' || *++f == '%'))
1967 while (*f && !isalpha(*f)) {
1969 syntax_error("%*x formats are not supported");
1973 incr = (f - s) + MAXVARFMT;
1974 b = qrealloc(b, incr + i, &bsize);
1980 arg = evaluate(nextarg(&n), v);
1983 if (c == 'c' || !c) {
1984 i += sprintf(b+i, s, is_numeric(arg) ?
1985 (char)getvar_i(arg) : *getvar_s(arg));
1986 } else if (c == 's') {
1988 b = qrealloc(b, incr+i+strlen(s1), &bsize);
1989 i += sprintf(b+i, s, s1);
1991 i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1995 /* if there was an error while sprintf, return value is negative */
2002 b = xrealloc(b, i + 1);
2007 /* Common substitution routine.
2008 * Replace (nm)'th substring of (src) that matches (rn) with (repl),
2009 * store result into (dest), return number of substitutions.
2010 * If nm = 0, replace all matches.
2011 * If src or dst is NULL, use $0.
2012 * If subexp != 0, enable subexpression matching (\1-\9).
2014 static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int subexp)
2018 int match_no, residx, replen, resbufsize;
2020 regmatch_t pmatch[10];
2021 regex_t sreg, *regex;
2027 regex = as_regex(rn, &sreg);
2028 sp = getvar_s(src ? src : intvar[F0]);
2029 replen = strlen(repl);
2030 while (regexec(regex, sp, 10, pmatch, regexec_flags) == 0) {
2031 int so = pmatch[0].rm_so;
2032 int eo = pmatch[0].rm_eo;
2034 //bb_error_msg("match %u: [%u,%u] '%s'%p", match_no+1, so, eo, sp,sp);
2035 resbuf = qrealloc(resbuf, residx + eo + replen, &resbufsize);
2036 memcpy(resbuf + residx, sp, eo);
2038 if (++match_no >= nm) {
2043 residx -= (eo - so);
2045 for (s = repl; *s; s++) {
2046 char c = resbuf[residx++] = *s;
2051 if (c == '&' || (subexp && c >= '0' && c <= '9')) {
2053 residx -= ((nbs + 3) >> 1);
2060 resbuf[residx++] = c;
2062 int n = pmatch[j].rm_eo - pmatch[j].rm_so;
2063 resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize);
2064 memcpy(resbuf + residx, sp + pmatch[j].rm_so, n);
2072 regexec_flags = REG_NOTBOL;
2077 /* Empty match (e.g. "b*" will match anywhere).
2078 * Advance by one char. */
2080 //gsub(/\<b*/,"") on "abc" will reach this point, advance to "bc"
2081 //... and will erroneously match "b" even though it is NOT at the word start.
2082 //we need REG_NOTBOW but it does not exist...
2083 //TODO: if EXTRA_COMPAT=y, use GNU matching and re_search,
2084 //it should be able to do it correctly.
2085 /* Subtle: this is safe only because
2086 * qrealloc allocated at least one extra byte */
2087 resbuf[residx] = *sp;
2095 resbuf = qrealloc(resbuf, residx + strlen(sp), &resbufsize);
2096 strcpy(resbuf + residx, sp);
2098 //bb_error_msg("end sp:'%s'%p", sp,sp);
2099 setvar_p(dest ? dest : intvar[F0], resbuf);
2105 static NOINLINE int do_mktime(const char *ds)
2110 /*memset(&then, 0, sizeof(then)); - not needed */
2111 then.tm_isdst = -1; /* default is unknown */
2113 /* manpage of mktime says these fields are ints,
2114 * so we can sscanf stuff directly into them */
2115 count = sscanf(ds, "%u %u %u %u %u %u %d",
2116 &then.tm_year, &then.tm_mon, &then.tm_mday,
2117 &then.tm_hour, &then.tm_min, &then.tm_sec,
2121 || (unsigned)then.tm_mon < 1
2122 || (unsigned)then.tm_year < 1900
2128 then.tm_year -= 1900;
2130 return mktime(&then);
2133 static NOINLINE var *exec_builtin(node *op, var *res)
2135 #define tspl (G.exec_builtin__tspl)
2141 regmatch_t pmatch[2];
2150 isr = info = op->info;
2153 av[2] = av[3] = NULL;
2154 for (i = 0; i < 4 && op; i++) {
2155 an[i] = nextarg(&op);
2156 if (isr & 0x09000000)
2157 av[i] = evaluate(an[i], &tv[i]);
2158 if (isr & 0x08000000)
2159 as[i] = getvar_s(av[i]);
2164 if ((uint32_t)nargs < (info >> 30))
2165 syntax_error(EMSG_TOO_FEW_ARGS);
2171 #if ENABLE_FEATURE_AWK_LIBM
2172 setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1])));
2174 syntax_error(EMSG_NO_MATH);
2182 spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2183 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2188 n = awk_split(as[0], spl, &s);
2190 clear_array(iamarray(av[1]));
2191 for (i = 1; i <= n; i++)
2192 setari_u(av[1], i, nextword(&s));
2202 i = getvar_i(av[1]) - 1;
2207 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2210 s = xstrndup(as[0]+i, n);
2215 /* Bitwise ops must assume that operands are unsigned. GNU Awk 3.1.5:
2216 * awk '{ print or(-1,1) }' gives "4.29497e+09", not "-2.xxxe+09" */
2218 setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1]));
2222 setvar_i(res, ~getvar_i_int(av[0]));
2226 setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1]));
2230 setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1]));
2234 setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1]));
2238 setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1]));
2244 s1 = s = xstrdup(as[0]);
2246 //*s1 = (info == B_up) ? toupper(*s1) : tolower(*s1);
2247 if ((unsigned char)((*s1 | 0x20) - 'a') <= ('z' - 'a'))
2248 *s1 = (info == B_up) ? (*s1 & 0xdf) : (*s1 | 0x20);
2258 l = strlen(as[0]) - ll;
2259 if (ll > 0 && l >= 0) {
2261 char *s = strstr(as[0], as[1]);
2263 n = (s - as[0]) + 1;
2265 /* this piece of code is terribly slow and
2266 * really should be rewritten
2268 for (i = 0; i <= l; i++) {
2269 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2281 tt = getvar_i(av[1]);
2284 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2285 i = strftime(g_buf, MAXVARFMT,
2286 ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2289 setvar_s(res, g_buf);
2293 setvar_i(res, do_mktime(as[0]));
2297 re = as_regex(an[1], &sreg);
2298 n = regexec(re, as[0], 1, pmatch, 0);
2303 pmatch[0].rm_so = 0;
2304 pmatch[0].rm_eo = -1;
2306 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2307 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2308 setvar_i(res, pmatch[0].rm_so);
2314 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2318 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2322 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2332 * Evaluate node - the heart of the program. Supplied with subtree
2333 * and place where to store result. returns ptr to result.
2335 #define XC(n) ((n) >> 8)
2337 static var *evaluate(node *op, var *res)
2339 /* This procedure is recursive so we should count every byte */
2340 #define fnargs (G.evaluate__fnargs)
2341 /* seed is initialized to 1 */
2342 #define seed (G.evaluate__seed)
2343 #define sreg (G.evaluate__sreg)
2348 return setvar_s(res, NULL);
2350 debug_printf_eval("entered %s()\n", __func__);
2358 } L = L; /* for compiler */
2369 opn = (opinfo & OPNMASK);
2370 g_lineno = op->lineno;
2372 debug_printf_eval("opinfo:%08x opn:%08x\n", opinfo, opn);
2374 /* execute inevitable things */
2375 if (opinfo & OF_RES1)
2376 L.v = evaluate(op1, v1);
2377 if (opinfo & OF_RES2)
2378 R.v = evaluate(op->r.n, v1+1);
2379 if (opinfo & OF_STR1) {
2380 L.s = getvar_s(L.v);
2381 debug_printf_eval("L.s:'%s'\n", L.s);
2383 if (opinfo & OF_STR2) {
2384 R.s = getvar_s(R.v);
2385 debug_printf_eval("R.s:'%s'\n", R.s);
2387 if (opinfo & OF_NUM1) {
2388 L_d = getvar_i(L.v);
2389 debug_printf_eval("L_d:%f\n", L_d);
2392 debug_printf_eval("switch(0x%x)\n", XC(opinfo & OPCLSMASK));
2393 switch (XC(opinfo & OPCLSMASK)) {
2395 /* -- iterative node type -- */
2399 if ((op1->info & OPCLSMASK) == OC_COMMA) {
2400 /* it's range pattern */
2401 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2402 op->info |= OF_CHECKED;
2403 if (ptest(op1->r.n))
2404 op->info &= ~OF_CHECKED;
2410 op = ptest(op1) ? op->a.n : op->r.n;
2414 /* just evaluate an expression, also used as unconditional jump */
2418 /* branch, used in if-else and various loops */
2420 op = istrue(L.v) ? op->a.n : op->r.n;
2423 /* initialize for-in loop */
2424 case XC( OC_WALKINIT ):
2425 hashwalk_init(L.v, iamarray(R.v));
2428 /* get next array item */
2429 case XC( OC_WALKNEXT ):
2430 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2433 case XC( OC_PRINT ):
2434 case XC( OC_PRINTF ): {
2438 rstream *rsm = newfile(R.s);
2441 rsm->F = popen(R.s, "w");
2443 bb_perror_msg_and_die("popen");
2446 rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2452 if ((opinfo & OPCLSMASK) == OC_PRINT) {
2454 fputs(getvar_s(intvar[F0]), F);
2457 var *v = evaluate(nextarg(&op1), v1);
2458 if (v->type & VF_NUMBER) {
2459 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2463 fputs(getvar_s(v), F);
2467 fputs(getvar_s(intvar[OFS]), F);
2470 fputs(getvar_s(intvar[ORS]), F);
2472 } else { /* OC_PRINTF */
2473 char *s = awk_printf(op1);
2481 case XC( OC_DELETE ): {
2482 uint32_t info = op1->info & OPCLSMASK;
2485 if (info == OC_VAR) {
2487 } else if (info == OC_FNARG) {
2488 v = &fnargs[op1->l.aidx];
2490 syntax_error(EMSG_NOT_ARRAY);
2496 s = getvar_s(evaluate(op1->r.n, v1));
2497 hash_remove(iamarray(v), s);
2499 clear_array(iamarray(v));
2504 case XC( OC_NEWSOURCE ):
2505 g_progname = op->l.new_progname;
2508 case XC( OC_RETURN ):
2512 case XC( OC_NEXTFILE ):
2523 /* -- recursive node type -- */
2527 if (L.v == intvar[NF])
2531 case XC( OC_FNARG ):
2532 L.v = &fnargs[op->l.aidx];
2534 res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2538 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2541 case XC( OC_REGEXP ):
2543 L.s = getvar_s(intvar[F0]);
2546 case XC( OC_MATCH ):
2550 regex_t *re = as_regex(op1, &sreg);
2551 int i = regexec(re, L.s, 0, NULL, 0);
2554 setvar_i(res, (i == 0) ^ (opn == '!'));
2559 debug_printf_eval("MOVE\n");
2560 /* if source is a temporary string, jusk relink it to dest */
2561 //Disabled: if R.v is numeric but happens to have cached R.v->string,
2562 //then L.v ends up being a string, which is wrong
2563 // if (R.v == v1+1 && R.v->string) {
2564 // res = setvar_p(L.v, R.v->string);
2565 // R.v->string = NULL;
2567 res = copyvar(L.v, R.v);
2571 case XC( OC_TERNARY ):
2572 if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2573 syntax_error(EMSG_POSSIBLE_ERROR);
2574 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2577 case XC( OC_FUNC ): {
2579 const char *sv_progname;
2581 if (!op->r.f->body.first)
2582 syntax_error(EMSG_UNDEF_FUNC);
2584 vbeg = v = nvalloc(op->r.f->nargs + 1);
2586 var *arg = evaluate(nextarg(&op1), v1);
2588 v->type |= VF_CHILD;
2590 if (++v - vbeg >= op->r.f->nargs)
2596 sv_progname = g_progname;
2598 res = evaluate(op->r.f->body.first, res);
2600 g_progname = sv_progname;
2607 case XC( OC_GETLINE ):
2608 case XC( OC_PGETLINE ): {
2615 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2616 rsm->F = popen(L.s, "r");
2617 rsm->is_pipe = TRUE;
2619 rsm->F = fopen_for_read(L.s); /* not xfopen! */
2624 iF = next_input_file();
2629 setvar_i(intvar[ERRNO], errno);
2637 i = awk_getline(rsm, R.v);
2638 if (i > 0 && !op1) {
2639 incvar(intvar[FNR]);
2646 /* simple builtins */
2647 case XC( OC_FBLTIN ): {
2648 double R_d = R_d; /* for compiler */
2656 R_d = (double)rand() / (double)RAND_MAX;
2658 #if ENABLE_FEATURE_AWK_LIBM
2684 syntax_error(EMSG_NO_MATH);
2689 seed = op1 ? (unsigned)L_d : (unsigned)time(NULL);
2699 L.s = getvar_s(intvar[F0]);
2705 R_d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2706 ? (system(L.s) >> 8) : 0;
2712 } else if (L.s && *L.s) {
2713 rstream *rsm = newfile(L.s);
2723 rsm = (rstream *)hash_search(fdhash, L.s);
2724 debug_printf_eval("OC_FBLTIN F_cl rsm:%p\n", rsm);
2726 debug_printf_eval("OC_FBLTIN F_cl "
2727 "rsm->is_pipe:%d, ->F:%p\n",
2728 rsm->is_pipe, rsm->F);
2729 /* Can be NULL if open failed. Example:
2730 * getline line <"doesnt_exist";
2731 * close("doesnt_exist"); <--- here rsm->F is NULL
2734 err = rsm->is_pipe ? pclose(rsm->F) : fclose(rsm->F);
2736 hash_remove(fdhash, L.s);
2739 setvar_i(intvar[ERRNO], errno);
2748 case XC( OC_BUILTIN ):
2749 res = exec_builtin(op, res);
2752 case XC( OC_SPRINTF ):
2753 setvar_p(res, awk_printf(op1));
2756 case XC( OC_UNARY ): {
2759 Ld = R_d = getvar_i(R.v);
2786 case XC( OC_FIELD ): {
2787 int i = (int)getvar_i(R.v);
2794 res = &Fields[i - 1];
2799 /* concatenation (" ") and index joining (",") */
2800 case XC( OC_CONCAT ):
2801 case XC( OC_COMMA ): {
2802 const char *sep = "";
2803 if ((opinfo & OPCLSMASK) == OC_COMMA)
2804 sep = getvar_s(intvar[SUBSEP]);
2805 setvar_p(res, xasprintf("%s%s%s", L.s, sep, R.s));
2810 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2814 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2817 case XC( OC_BINARY ):
2818 case XC( OC_REPLACE ): {
2819 double R_d = getvar_i(R.v);
2820 debug_printf_eval("BINARY/REPLACE: R_d:%f opn:%c\n", R_d, opn);
2833 syntax_error(EMSG_DIV_BY_ZERO);
2837 #if ENABLE_FEATURE_AWK_LIBM
2838 L_d = pow(L_d, R_d);
2840 syntax_error(EMSG_NO_MATH);
2845 syntax_error(EMSG_DIV_BY_ZERO);
2846 L_d -= (int)(L_d / R_d) * R_d;
2849 debug_printf_eval("BINARY/REPLACE result:%f\n", L_d);
2850 res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : L.v, L_d);
2854 case XC( OC_COMPARE ): {
2855 int i = i; /* for compiler */
2858 if (is_numeric(L.v) && is_numeric(R.v)) {
2859 Ld = getvar_i(L.v) - getvar_i(R.v);
2861 const char *l = getvar_s(L.v);
2862 const char *r = getvar_s(R.v);
2863 Ld = icase ? strcasecmp(l, r) : strcmp(l, r);
2865 switch (opn & 0xfe) {
2876 setvar_i(res, (i == 0) ^ (opn & 1));
2881 syntax_error(EMSG_POSSIBLE_ERROR);
2883 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2885 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2892 debug_printf_eval("returning from %s(): %p\n", __func__, res);
2900 /* -------- main & co. -------- */
2902 static int awk_exit(int r)
2913 evaluate(endseq.first, &tv);
2916 /* waiting for children */
2917 for (i = 0; i < fdhash->csize; i++) {
2918 hi = fdhash->items[i];
2920 if (hi->data.rs.F && hi->data.rs.is_pipe)
2921 pclose(hi->data.rs.F);
2929 /* if expr looks like "var=value", perform assignment and return 1,
2930 * otherwise return 0 */
2931 static int is_assignment(const char *expr)
2933 char *exprc, *val, *s, *s1;
2935 if (!isalnum_(*expr) || (val = strchr(expr, '=')) == NULL) {
2939 exprc = xstrdup(expr);
2940 val = exprc + (val - expr);
2944 while ((*s1 = nextchar(&s)) != '\0')
2947 setvar_u(newvar(exprc), val);
2952 /* switch to next input file */
2953 static rstream *next_input_file(void)
2955 #define rsm (G.next_input_file__rsm)
2956 #define files_happen (G.next_input_file__files_happen)
2959 const char *fname, *ind;
2964 rsm.pos = rsm.adv = 0;
2967 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
2973 ind = getvar_s(incvar(intvar[ARGIND]));
2974 fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
2975 if (fname && *fname && !is_assignment(fname))
2976 F = xfopen_stdin(fname);
2980 files_happen = TRUE;
2981 setvar_s(intvar[FILENAME], fname);
2988 int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
2989 int awk_main(int argc, char **argv)
2992 char *opt_F, *opt_W;
2993 llist_t *list_v = NULL;
2994 llist_t *list_f = NULL;
2999 char *vnames = (char *)vNames; /* cheat */
3000 char *vvalues = (char *)vValues;
3004 /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
3005 * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
3006 if (ENABLE_LOCALE_SUPPORT)
3007 setlocale(LC_NUMERIC, "C");
3011 /* allocate global buffer */
3012 g_buf = xmalloc(MAXVARFMT + 1);
3014 vhash = hash_init();
3015 ahash = hash_init();
3016 fdhash = hash_init();
3017 fnhash = hash_init();
3019 /* initialize variables */
3020 for (i = 0; *vnames; i++) {
3021 intvar[i] = v = newvar(nextword(&vnames));
3022 if (*vvalues != '\377')
3023 setvar_s(v, nextword(&vvalues));
3027 if (*vnames == '*') {
3028 v->type |= VF_SPECIAL;
3033 handle_special(intvar[FS]);
3034 handle_special(intvar[RS]);
3036 newfile("/dev/stdin")->F = stdin;
3037 newfile("/dev/stdout")->F = stdout;
3038 newfile("/dev/stderr")->F = stderr;
3040 /* Huh, people report that sometimes environ is NULL. Oh well. */
3041 if (environ) for (envp = environ; *envp; envp++) {
3042 /* environ is writable, thus we don't strdup it needlessly */
3044 char *s1 = strchr(s, '=');
3047 /* Both findvar and setvar_u take const char*
3048 * as 2nd arg -> environment is not trashed */
3049 setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
3053 opt_complementary = "v::f::"; /* -v and -f can occur multiple times */
3054 opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, &opt_W);
3058 setvar_s(intvar[FS], opt_F); // -F
3059 while (list_v) { /* -v */
3060 if (!is_assignment(llist_pop(&list_v)))
3063 if (list_f) { /* -f */
3068 g_progname = llist_pop(&list_f);
3069 from_file = xfopen_stdin(g_progname);
3070 /* one byte is reserved for some trick in next_token */
3071 for (i = j = 1; j > 0; i += j) {
3072 s = xrealloc(s, i + 4096);
3073 j = fread(s + i, 1, 4094, from_file);
3077 parse_program(s + 1);
3081 } else { // no -f: take program from 1st parameter
3084 g_progname = "cmd. line";
3085 parse_program(*argv++);
3087 if (opt & 0x8) // -W
3088 bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
3090 /* fill in ARGV array */
3091 setvar_i(intvar[ARGC], argc);
3092 setari_u(intvar[ARGV], 0, "awk");
3095 setari_u(intvar[ARGV], ++i, *argv++);
3097 evaluate(beginseq.first, &tv);
3098 if (!mainseq.first && !endseq.first)
3099 awk_exit(EXIT_SUCCESS);
3101 /* input file could already be opened in BEGIN block */
3103 iF = next_input_file();
3105 /* passing through input files */
3108 setvar_i(intvar[FNR], 0);
3110 while ((i = awk_getline(iF, intvar[F0])) > 0) {
3113 incvar(intvar[FNR]);
3114 evaluate(mainseq.first, &tv);
3121 syntax_error(strerror(errno));
3123 iF = next_input_file();
3126 awk_exit(EXIT_SUCCESS);