1 /* vi: set sw=4 ts=4: */
3 * awk implementation for busybox
5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
7 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
14 /* This is a NOEXEC applet. Be very careful! */
17 /* If you comment out one of these below, it will be #defined later
18 * to perform debug printfs to stderr: */
19 #define debug_printf_walker(...) do {} while (0)
20 #define debug_printf_eval(...) do {} while (0)
22 #ifndef debug_printf_walker
23 # define debug_printf_walker(...) (fprintf(stderr, __VA_ARGS__))
25 #ifndef debug_printf_eval
26 # define debug_printf_eval(...) (fprintf(stderr, __VA_ARGS__))
35 #define VF_NUMBER 0x0001 /* 1 = primary type is number */
36 #define VF_ARRAY 0x0002 /* 1 = it's an array */
38 #define VF_CACHED 0x0100 /* 1 = num/str value has cached str/num eq */
39 #define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
40 #define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
41 #define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
42 #define VF_FSTR 0x1000 /* 1 = var::string points to fstring buffer */
43 #define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
44 #define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
46 /* these flags are static, don't change them when value is changed */
47 #define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
49 typedef struct walker_list {
52 struct walker_list *prev;
57 typedef struct var_s {
58 unsigned type; /* flags */
62 int aidx; /* func arg idx (for compilation stage) */
63 struct xhash_s *array; /* array ptr */
64 struct var_s *parent; /* for func args, ptr to actual parameter */
65 walker_list *walker; /* list of array elements (for..in) */
69 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
70 typedef struct chain_s {
73 const char *programname;
77 typedef struct func_s {
83 typedef struct rstream_s {
92 typedef struct hash_item_s {
94 struct var_s v; /* variable/array hash */
95 struct rstream_s rs; /* redirect streams hash */
96 struct func_s f; /* functions hash */
98 struct hash_item_s *next; /* next in chain */
99 char name[1]; /* really it's longer */
102 typedef struct xhash_s {
103 unsigned nel; /* num of elements */
104 unsigned csize; /* current hash size */
105 unsigned nprime; /* next hash size in PRIMES[] */
106 unsigned glen; /* summary length of item names */
107 struct hash_item_s **items;
111 typedef struct node_s {
131 /* Block of temporary variables */
132 typedef struct nvblock_s {
135 struct nvblock_s *prev;
136 struct nvblock_s *next;
140 typedef struct tsplitter_s {
145 /* simple token classes */
146 /* Order and hex values are very important!!! See next_token() */
147 #define TC_SEQSTART 1 /* ( */
148 #define TC_SEQTERM (1 << 1) /* ) */
149 #define TC_REGEXP (1 << 2) /* /.../ */
150 #define TC_OUTRDR (1 << 3) /* | > >> */
151 #define TC_UOPPOST (1 << 4) /* unary postfix operator */
152 #define TC_UOPPRE1 (1 << 5) /* unary prefix operator */
153 #define TC_BINOPX (1 << 6) /* two-opnd operator */
154 #define TC_IN (1 << 7)
155 #define TC_COMMA (1 << 8)
156 #define TC_PIPE (1 << 9) /* input redirection pipe */
157 #define TC_UOPPRE2 (1 << 10) /* unary prefix operator */
158 #define TC_ARRTERM (1 << 11) /* ] */
159 #define TC_GRPSTART (1 << 12) /* { */
160 #define TC_GRPTERM (1 << 13) /* } */
161 #define TC_SEMICOL (1 << 14)
162 #define TC_NEWLINE (1 << 15)
163 #define TC_STATX (1 << 16) /* ctl statement (for, next...) */
164 #define TC_WHILE (1 << 17)
165 #define TC_ELSE (1 << 18)
166 #define TC_BUILTIN (1 << 19)
167 #define TC_GETLINE (1 << 20)
168 #define TC_FUNCDECL (1 << 21) /* `function' `func' */
169 #define TC_BEGIN (1 << 22)
170 #define TC_END (1 << 23)
171 #define TC_EOF (1 << 24)
172 #define TC_VARIABLE (1 << 25)
173 #define TC_ARRAY (1 << 26)
174 #define TC_FUNCTION (1 << 27)
175 #define TC_STRING (1 << 28)
176 #define TC_NUMBER (1 << 29)
178 #define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
180 /* combined token classes */
181 #define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
182 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
183 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
184 | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
186 #define TC_STATEMNT (TC_STATX | TC_WHILE)
187 #define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
189 /* word tokens, cannot mean something else if not expected */
190 #define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
191 | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
193 /* discard newlines after these */
194 #define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
195 | TC_BINOP | TC_OPTERM)
197 /* what can expression begin with */
198 #define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
199 /* what can group begin with */
200 #define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
202 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
203 /* operator is inserted between them */
204 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
205 | TC_STRING | TC_NUMBER | TC_UOPPOST)
206 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
208 #define OF_RES1 0x010000
209 #define OF_RES2 0x020000
210 #define OF_STR1 0x040000
211 #define OF_STR2 0x080000
212 #define OF_NUM1 0x100000
213 #define OF_CHECKED 0x200000
215 /* combined operator flags */
218 #define xS (OF_RES2 | OF_STR2)
220 #define VV (OF_RES1 | OF_RES2)
221 #define Nx (OF_RES1 | OF_NUM1)
222 #define NV (OF_RES1 | OF_NUM1 | OF_RES2)
223 #define Sx (OF_RES1 | OF_STR1)
224 #define SV (OF_RES1 | OF_STR1 | OF_RES2)
225 #define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
227 #define OPCLSMASK 0xFF00
228 #define OPNMASK 0x007F
230 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
231 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
232 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
234 #define P(x) (x << 24)
235 #define PRIMASK 0x7F000000
236 #define PRIMASK2 0x7E000000
238 /* Operation classes */
240 #define SHIFT_TIL_THIS 0x0600
241 #define RECUR_FROM_THIS 0x1000
244 OC_DELETE = 0x0100, OC_EXEC = 0x0200, OC_NEWSOURCE = 0x0300,
245 OC_PRINT = 0x0400, OC_PRINTF = 0x0500, OC_WALKINIT = 0x0600,
247 OC_BR = 0x0700, OC_BREAK = 0x0800, OC_CONTINUE = 0x0900,
248 OC_EXIT = 0x0a00, OC_NEXT = 0x0b00, OC_NEXTFILE = 0x0c00,
249 OC_TEST = 0x0d00, OC_WALKNEXT = 0x0e00,
251 OC_BINARY = 0x1000, OC_BUILTIN = 0x1100, OC_COLON = 0x1200,
252 OC_COMMA = 0x1300, OC_COMPARE = 0x1400, OC_CONCAT = 0x1500,
253 OC_FBLTIN = 0x1600, OC_FIELD = 0x1700, OC_FNARG = 0x1800,
254 OC_FUNC = 0x1900, OC_GETLINE = 0x1a00, OC_IN = 0x1b00,
255 OC_LAND = 0x1c00, OC_LOR = 0x1d00, OC_MATCH = 0x1e00,
256 OC_MOVE = 0x1f00, OC_PGETLINE = 0x2000, OC_REGEXP = 0x2100,
257 OC_REPLACE = 0x2200, OC_RETURN = 0x2300, OC_SPRINTF = 0x2400,
258 OC_TERNARY = 0x2500, OC_UNARY = 0x2600, OC_VAR = 0x2700,
261 ST_IF = 0x3000, ST_DO = 0x3100, ST_FOR = 0x3200,
265 /* simple builtins */
267 F_in, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
268 F_ti, F_le, F_sy, F_ff, F_cl
273 B_a2, B_ix, B_ma, B_sp, B_ss, B_ti, B_mt, B_lo, B_up,
275 B_an, B_co, B_ls, B_or, B_rs, B_xo,
278 /* tokens and their corresponding info values */
280 #define NTC "\377" /* switch to next token class (tc<<1) */
283 #define OC_B OC_BUILTIN
285 static const char tokenlist[] ALIGN1 =
288 "\1/" NTC /* REGEXP */
289 "\2>>" "\1>" "\1|" NTC /* OUTRDR */
290 "\2++" "\2--" NTC /* UOPPOST */
291 "\2++" "\2--" "\1$" NTC /* UOPPRE1 */
292 "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */
293 "\2*=" "\2/=" "\2%=" "\2^="
294 "\1+" "\1-" "\3**=" "\2**"
295 "\1/" "\1%" "\1^" "\1*"
296 "\2!=" "\2>=" "\2<=" "\1>"
297 "\1<" "\2!~" "\1~" "\2&&"
298 "\2||" "\1?" "\1:" NTC
302 "\1+" "\1-" "\1!" NTC /* UOPPRE2 */
308 "\2if" "\2do" "\3for" "\5break" /* STATX */
309 "\10continue" "\6delete" "\5print"
310 "\6printf" "\4next" "\10nextfile"
311 "\6return" "\4exit" NTC
315 "\3and" "\5compl" "\6lshift" "\2or"
317 "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */
318 "\3cos" "\3exp" "\3int" "\3log"
319 "\4rand" "\3sin" "\4sqrt" "\5srand"
320 "\6gensub" "\4gsub" "\5index" "\6length"
321 "\5match" "\5split" "\7sprintf" "\3sub"
322 "\6substr" "\7systime" "\10strftime" "\6mktime"
323 "\7tolower" "\7toupper" NTC
325 "\4func" "\10function" NTC
330 static const uint32_t tokeninfo[] = {
334 xS|'a', xS|'w', xS|'|',
335 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
336 OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M',
338 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74),
339 OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
340 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/',
341 OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
342 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-',
343 OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
344 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%',
345 OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
346 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3,
347 OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
348 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!',
349 OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
350 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?',
351 OC_COLON|xx|P(67)|':',
354 OC_PGETLINE|SV|P(37),
355 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-',
356 OC_UNARY|xV|P(19)|'!',
362 ST_IF, ST_DO, ST_FOR, OC_BREAK,
363 OC_CONTINUE, OC_DELETE|Vx, OC_PRINT,
364 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
365 OC_RETURN|Vx, OC_EXIT|Nx,
369 OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
370 OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
371 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
372 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
373 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
374 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
375 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
376 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b),
377 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
384 /* internal variable names and their initial values */
385 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
387 CONVFMT, OFMT, FS, OFS,
388 ORS, RS, RT, FILENAME,
389 SUBSEP, F0, ARGIND, ARGC,
390 ARGV, ERRNO, FNR, NR,
391 NF, IGNORECASE, ENVIRON, NUM_INTERNAL_VARS
394 static const char vNames[] ALIGN1 =
395 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
396 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
397 "SUBSEP\0" "$\0*" "ARGIND\0" "ARGC\0"
398 "ARGV\0" "ERRNO\0" "FNR\0" "NR\0"
399 "NF\0*" "IGNORECASE\0*" "ENVIRON\0" "\0";
401 static const char vValues[] ALIGN1 =
402 "%.6g\0" "%.6g\0" " \0" " \0"
403 "\n\0" "\n\0" "\0" "\0"
404 "\034\0" "\0" "\377";
406 /* hash size may grow to these values */
407 #define FIRST_PRIME 61
408 static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
411 /* Globals. Split in two parts so that first one is addressed
412 * with (mostly short) negative offsets.
413 * NB: it's unsafe to put members of type "double"
414 * into globals2 (gcc may fail to align them).
418 chain beginseq, mainseq, endseq;
420 node *break_ptr, *continue_ptr;
422 xhash *vhash, *ahash, *fdhash, *fnhash;
423 const char *g_progname;
426 int maxfields; /* used in fsrealloc() only */
435 smallint is_f0_split;
438 uint32_t t_info; /* often used */
444 var *intvar[NUM_INTERNAL_VARS]; /* often used */
446 /* former statics from various functions */
447 char *split_f0__fstrings;
449 uint32_t next_token__save_tclass;
450 uint32_t next_token__save_info;
451 uint32_t next_token__ltclass;
452 smallint next_token__concat_inserted;
454 smallint next_input_file__files_happen;
455 rstream next_input_file__rsm;
457 var *evaluate__fnargs;
458 unsigned evaluate__seed;
459 regex_t evaluate__sreg;
463 tsplitter exec_builtin__tspl;
465 /* biggest and least used members go last */
466 tsplitter fsplitter, rsplitter;
468 #define G1 (ptr_to_globals[-1])
469 #define G (*(struct globals2 *)ptr_to_globals)
470 /* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
471 /*char G1size[sizeof(G1)]; - 0x74 */
472 /*char Gsize[sizeof(G)]; - 0x1c4 */
473 /* Trying to keep most of members accessible with short offsets: */
474 /*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
475 #define t_double (G1.t_double )
476 #define beginseq (G1.beginseq )
477 #define mainseq (G1.mainseq )
478 #define endseq (G1.endseq )
479 #define seq (G1.seq )
480 #define break_ptr (G1.break_ptr )
481 #define continue_ptr (G1.continue_ptr)
483 #define vhash (G1.vhash )
484 #define ahash (G1.ahash )
485 #define fdhash (G1.fdhash )
486 #define fnhash (G1.fnhash )
487 #define g_progname (G1.g_progname )
488 #define g_lineno (G1.g_lineno )
489 #define nfields (G1.nfields )
490 #define maxfields (G1.maxfields )
491 #define Fields (G1.Fields )
492 #define g_cb (G1.g_cb )
493 #define g_pos (G1.g_pos )
494 #define g_buf (G1.g_buf )
495 #define icase (G1.icase )
496 #define exiting (G1.exiting )
497 #define nextrec (G1.nextrec )
498 #define nextfile (G1.nextfile )
499 #define is_f0_split (G1.is_f0_split )
500 #define t_info (G.t_info )
501 #define t_tclass (G.t_tclass )
502 #define t_string (G.t_string )
503 #define t_lineno (G.t_lineno )
504 #define t_rollback (G.t_rollback )
505 #define intvar (G.intvar )
506 #define fsplitter (G.fsplitter )
507 #define rsplitter (G.rsplitter )
508 #define INIT_G() do { \
509 SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
510 G.next_token__ltclass = TC_OPTERM; \
511 G.evaluate__seed = 1; \
515 /* function prototypes */
516 static void handle_special(var *);
517 static node *parse_expr(uint32_t);
518 static void chain_group(void);
519 static var *evaluate(node *, var *);
520 static rstream *next_input_file(void);
521 static int fmt_num(char *, int, const char *, double, int);
522 static int awk_exit(int) NORETURN;
524 /* ---- error handling ---- */
526 static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
527 static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
528 static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
529 static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
530 static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
531 static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin";
532 static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
533 static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
534 static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
535 #if !ENABLE_FEATURE_AWK_LIBM
536 static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
539 static void zero_out_var(var *vp)
541 memset(vp, 0, sizeof(*vp));
544 static void syntax_error(const char *message) NORETURN;
545 static void syntax_error(const char *message)
547 bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
550 /* ---- hash stuff ---- */
552 static unsigned hashidx(const char *name)
557 idx = *name++ + (idx << 6) - idx;
561 /* create new hash */
562 static xhash *hash_init(void)
566 newhash = xzalloc(sizeof(*newhash));
567 newhash->csize = FIRST_PRIME;
568 newhash->items = xzalloc(FIRST_PRIME * sizeof(newhash->items[0]));
573 /* find item in hash, return ptr to data, NULL if not found */
574 static void *hash_search(xhash *hash, const char *name)
578 hi = hash->items[hashidx(name) % hash->csize];
580 if (strcmp(hi->name, name) == 0)
587 /* grow hash if it becomes too big */
588 static void hash_rebuild(xhash *hash)
590 unsigned newsize, i, idx;
591 hash_item **newitems, *hi, *thi;
593 if (hash->nprime == ARRAY_SIZE(PRIMES))
596 newsize = PRIMES[hash->nprime++];
597 newitems = xzalloc(newsize * sizeof(newitems[0]));
599 for (i = 0; i < hash->csize; i++) {
604 idx = hashidx(thi->name) % newsize;
605 thi->next = newitems[idx];
611 hash->csize = newsize;
612 hash->items = newitems;
615 /* find item in hash, add it if necessary. Return ptr to data */
616 static void *hash_find(xhash *hash, const char *name)
622 hi = hash_search(hash, name);
624 if (++hash->nel / hash->csize > 10)
627 l = strlen(name) + 1;
628 hi = xzalloc(sizeof(*hi) + l);
629 strcpy(hi->name, name);
631 idx = hashidx(name) % hash->csize;
632 hi->next = hash->items[idx];
633 hash->items[idx] = hi;
639 #define findvar(hash, name) ((var*) hash_find((hash), (name)))
640 #define newvar(name) ((var*) hash_find(vhash, (name)))
641 #define newfile(name) ((rstream*)hash_find(fdhash, (name)))
642 #define newfunc(name) ((func*) hash_find(fnhash, (name)))
644 static void hash_remove(xhash *hash, const char *name)
646 hash_item *hi, **phi;
648 phi = &hash->items[hashidx(name) % hash->csize];
651 if (strcmp(hi->name, name) == 0) {
652 hash->glen -= (strlen(name) + 1);
662 /* ------ some useful functions ------ */
664 static char *skip_spaces(char *p)
667 if (*p == '\\' && p[1] == '\n') {
670 } else if (*p != ' ' && *p != '\t') {
678 /* returns old *s, advances *s past word and terminating NUL */
679 static char *nextword(char **s)
682 while (*(*s)++ != '\0')
687 static char nextchar(char **s)
694 c = bb_process_escape_sequence((const char**)s);
695 if (c == '\\' && *s == pps)
700 static ALWAYS_INLINE int isalnum_(int c)
702 return (isalnum(c) || c == '_');
705 static double my_strtod(char **pp)
710 /* Might be hex or octal integer: 0x123abc or 07777 */
711 char c = (cp[1] | 0x20);
712 if (c == 'x' || isdigit(cp[1])) {
713 unsigned long long ull = strtoull(cp, pp, 0);
717 if (!isdigit(c) && c != '.')
719 /* else: it may be a floating number. Examples:
720 * 009.123 (*pp points to '9')
721 * 000.123 (*pp points to '.')
722 * fall through to strtod.
727 return strtod(cp, pp);
730 /* -------- working with variables (set/get/copy/etc) -------- */
732 static xhash *iamarray(var *v)
736 while (a->type & VF_CHILD)
739 if (!(a->type & VF_ARRAY)) {
741 a->x.array = hash_init();
746 static void clear_array(xhash *array)
751 for (i = 0; i < array->csize; i++) {
752 hi = array->items[i];
756 free(thi->data.v.string);
759 array->items[i] = NULL;
761 array->glen = array->nel = 0;
764 /* clear a variable */
765 static var *clrvar(var *v)
767 if (!(v->type & VF_FSTR))
770 v->type &= VF_DONTTOUCH;
776 /* assign string value to variable */
777 static var *setvar_p(var *v, char *value)
785 /* same as setvar_p but make a copy of string */
786 static var *setvar_s(var *v, const char *value)
788 return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
791 /* same as setvar_s but sets USER flag */
792 static var *setvar_u(var *v, const char *value)
794 v = setvar_s(v, value);
799 /* set array element to user string */
800 static void setari_u(var *a, int idx, const char *s)
804 v = findvar(iamarray(a), itoa(idx));
808 /* assign numeric value to variable */
809 static var *setvar_i(var *v, double value)
812 v->type |= VF_NUMBER;
818 static const char *getvar_s(var *v)
820 /* if v is numeric and has no cached string, convert it to string */
821 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
822 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
823 v->string = xstrdup(g_buf);
824 v->type |= VF_CACHED;
826 return (v->string == NULL) ? "" : v->string;
829 static double getvar_i(var *v)
833 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
837 debug_printf_eval("getvar_i: '%s'->", s);
838 v->number = my_strtod(&s);
839 debug_printf_eval("%f (s:'%s')\n", v->number, s);
840 if (v->type & VF_USER) {
846 debug_printf_eval("getvar_i: '%s'->zero\n", s);
849 v->type |= VF_CACHED;
851 debug_printf_eval("getvar_i: %f\n", v->number);
855 /* Used for operands of bitwise ops */
856 static unsigned long getvar_i_int(var *v)
858 double d = getvar_i(v);
860 /* Casting doubles to longs is undefined for values outside
861 * of target type range. Try to widen it as much as possible */
863 return (unsigned long)d;
864 /* Why? Think about d == -4294967295.0 (assuming 32bit longs) */
865 return - (long) (unsigned long) (-d);
868 static var *copyvar(var *dest, const var *src)
872 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
873 debug_printf_eval("copyvar: number:%f string:'%s'\n", src->number, src->string);
874 dest->number = src->number;
876 dest->string = xstrdup(src->string);
878 handle_special(dest);
882 static var *incvar(var *v)
884 return setvar_i(v, getvar_i(v) + 1.0);
887 /* return true if v is number or numeric string */
888 static int is_numeric(var *v)
891 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
894 /* return 1 when value of v corresponds to true, 0 otherwise */
895 static int istrue(var *v)
898 return (v->number != 0);
899 return (v->string && v->string[0]);
902 /* temporary variables allocator. Last allocated should be first freed */
903 static var *nvalloc(int n)
911 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size)
917 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
918 g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
920 g_cb->pos = g_cb->nv;
922 /*g_cb->next = NULL; - xzalloc did it */
930 while (v < g_cb->pos) {
939 static void nvfree(var *v)
943 if (v < g_cb->nv || v >= g_cb->pos)
944 syntax_error(EMSG_INTERNAL_ERROR);
946 for (p = v; p < g_cb->pos; p++) {
947 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
948 clear_array(iamarray(p));
949 free(p->x.array->items);
952 if (p->type & VF_WALK) {
954 walker_list *w = p->x.walker;
955 debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker);
959 debug_printf_walker(" free(%p)\n", w);
968 while (g_cb->prev && g_cb->pos == g_cb->nv) {
973 /* ------- awk program text parsing ------- */
975 /* Parse next token pointed by global pos, place results into global ttt.
976 * If token isn't expected, give away. Return token class
978 static uint32_t next_token(uint32_t expected)
980 #define concat_inserted (G.next_token__concat_inserted)
981 #define save_tclass (G.next_token__save_tclass)
982 #define save_info (G.next_token__save_info)
983 /* Initialized to TC_OPTERM: */
984 #define ltclass (G.next_token__ltclass)
994 } else if (concat_inserted) {
995 concat_inserted = FALSE;
996 t_tclass = save_tclass;
1003 g_lineno = t_lineno;
1005 while (*p != '\n' && *p != '\0')
1014 } else if (*p == '\"') {
1017 while (*p != '\"') {
1019 if (*p == '\0' || *p == '\n')
1020 syntax_error(EMSG_UNEXP_EOS);
1021 *s++ = nextchar(&pp);
1028 } else if ((expected & TC_REGEXP) && *p == '/') {
1032 if (*p == '\0' || *p == '\n')
1033 syntax_error(EMSG_UNEXP_EOS);
1037 s[-1] = bb_process_escape_sequence((const char **)&pp);
1050 } else if (*p == '.' || isdigit(*p)) {
1053 t_double = my_strtod(&pp);
1056 syntax_error(EMSG_UNEXP_TOKEN);
1060 /* search for something known */
1065 int l = (unsigned char) *tl++;
1066 if (l == (unsigned char) NTCC) {
1070 /* if token class is expected,
1072 * and it's not a longer word,
1074 if ((tc & (expected | TC_WORD | TC_NEWLINE))
1075 && strncmp(p, tl, l) == 0
1076 && !((tc & TC_WORD) && isalnum_(p[l]))
1078 /* then this is what we are looking for */
1086 /* not a known token */
1088 /* is it a name? (var/array/function) */
1090 syntax_error(EMSG_UNEXP_TOKEN); /* no */
1093 while (isalnum_(*++p)) {
1098 /* also consume whitespace between functionname and bracket */
1099 if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY))
1113 /* skipping newlines in some cases */
1114 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1117 /* insert concatenation operator when needed */
1118 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1119 concat_inserted = TRUE;
1123 t_info = OC_CONCAT | SS | P(35);
1130 /* Are we ready for this? */
1131 if (!(ltclass & expected))
1132 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1133 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1136 #undef concat_inserted
1142 static void rollback_token(void)
1147 static node *new_node(uint32_t info)
1151 n = xzalloc(sizeof(node));
1153 n->lineno = g_lineno;
1157 static void mk_re_node(const char *s, node *n, regex_t *re)
1159 n->info = OC_REGEXP;
1162 xregcomp(re, s, REG_EXTENDED);
1163 xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1166 static node *condition(void)
1168 next_token(TC_SEQSTART);
1169 return parse_expr(TC_SEQTERM);
1172 /* parse expression terminated by given argument, return ptr
1173 * to built subtree. Terminator is eaten by parse_expr */
1174 static node *parse_expr(uint32_t iexp)
1183 sn.r.n = glptr = NULL;
1184 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1186 while (!((tc = next_token(xtc)) & iexp)) {
1188 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1189 /* input redirection (<) attached to glptr node */
1190 cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1192 xtc = TC_OPERAND | TC_UOPPRE;
1195 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1196 /* for binary and postfix-unary operators, jump back over
1197 * previous operators with higher priority */
1199 while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1200 || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON))
1204 if ((t_info & OPCLSMASK) == OC_TERNARY)
1206 cn = vn->a.n->r.n = new_node(t_info);
1208 if (tc & TC_BINOP) {
1210 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1211 if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1213 next_token(TC_GETLINE);
1214 /* give maximum priority to this pipe */
1215 cn->info &= ~PRIMASK;
1216 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1220 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1225 /* for operands and prefix-unary operators, attach them
1228 cn = vn->r.n = new_node(t_info);
1230 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1231 if (tc & (TC_OPERAND | TC_REGEXP)) {
1232 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1233 /* one should be very careful with switch on tclass -
1234 * only simple tclasses should be used! */
1239 v = hash_search(ahash, t_string);
1241 cn->info = OC_FNARG;
1242 cn->l.aidx = v->x.aidx;
1244 cn->l.v = newvar(t_string);
1246 if (tc & TC_ARRAY) {
1248 cn->r.n = parse_expr(TC_ARRTERM);
1255 v = cn->l.v = xzalloc(sizeof(var));
1257 setvar_i(v, t_double);
1259 setvar_s(v, t_string);
1263 mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1268 cn->r.f = newfunc(t_string);
1269 cn->l.n = condition();
1273 cn = vn->r.n = parse_expr(TC_SEQTERM);
1279 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1283 cn->l.n = condition();
1292 /* add node to chain. Return ptr to alloc'd node */
1293 static node *chain_node(uint32_t info)
1298 seq->first = seq->last = new_node(0);
1300 if (seq->programname != g_progname) {
1301 seq->programname = g_progname;
1302 n = chain_node(OC_NEWSOURCE);
1303 n->l.new_progname = xstrdup(g_progname);
1308 seq->last = n->a.n = new_node(OC_DONE);
1313 static void chain_expr(uint32_t info)
1317 n = chain_node(info);
1318 n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1319 if (t_tclass & TC_GRPTERM)
1323 static node *chain_loop(node *nn)
1325 node *n, *n2, *save_brk, *save_cont;
1327 save_brk = break_ptr;
1328 save_cont = continue_ptr;
1330 n = chain_node(OC_BR | Vx);
1331 continue_ptr = new_node(OC_EXEC);
1332 break_ptr = new_node(OC_EXEC);
1334 n2 = chain_node(OC_EXEC | Vx);
1337 continue_ptr->a.n = n2;
1338 break_ptr->a.n = n->r.n = seq->last;
1340 continue_ptr = save_cont;
1341 break_ptr = save_brk;
1346 /* parse group and attach it to chain */
1347 static void chain_group(void)
1353 c = next_token(TC_GRPSEQ);
1354 } while (c & TC_NEWLINE);
1356 if (c & TC_GRPSTART) {
1357 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1358 if (t_tclass & TC_NEWLINE)
1363 } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1365 chain_expr(OC_EXEC | Vx);
1366 } else { /* TC_STATEMNT */
1367 switch (t_info & OPCLSMASK) {
1369 n = chain_node(OC_BR | Vx);
1370 n->l.n = condition();
1372 n2 = chain_node(OC_EXEC);
1374 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1376 n2->a.n = seq->last;
1384 n = chain_loop(NULL);
1389 n2 = chain_node(OC_EXEC);
1390 n = chain_loop(NULL);
1392 next_token(TC_WHILE);
1393 n->l.n = condition();
1397 next_token(TC_SEQSTART);
1398 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1399 if (t_tclass & TC_SEQTERM) { /* for-in */
1400 if ((n2->info & OPCLSMASK) != OC_IN)
1401 syntax_error(EMSG_UNEXP_TOKEN);
1402 n = chain_node(OC_WALKINIT | VV);
1405 n = chain_loop(NULL);
1406 n->info = OC_WALKNEXT | Vx;
1408 } else { /* for (;;) */
1409 n = chain_node(OC_EXEC | Vx);
1411 n2 = parse_expr(TC_SEMICOL);
1412 n3 = parse_expr(TC_SEQTERM);
1422 n = chain_node(t_info);
1423 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1424 if (t_tclass & TC_OUTRDR) {
1426 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1428 if (t_tclass & TC_GRPTERM)
1433 n = chain_node(OC_EXEC);
1438 n = chain_node(OC_EXEC);
1439 n->a.n = continue_ptr;
1442 /* delete, next, nextfile, return, exit */
1449 static void parse_program(char *p)
1458 while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1459 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1461 if (tclass & TC_OPTERM)
1465 if (tclass & TC_BEGIN) {
1469 } else if (tclass & TC_END) {
1473 } else if (tclass & TC_FUNCDECL) {
1474 next_token(TC_FUNCTION);
1476 f = newfunc(t_string);
1477 f->body.first = NULL;
1479 while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1480 v = findvar(ahash, t_string);
1481 v->x.aidx = f->nargs++;
1483 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1490 } else if (tclass & TC_OPSEQ) {
1492 cn = chain_node(OC_TEST);
1493 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1494 if (t_tclass & TC_GRPSTART) {
1498 chain_node(OC_PRINT);
1500 cn->r.n = mainseq.last;
1502 } else /* if (tclass & TC_GRPSTART) */ {
1510 /* -------- program execution part -------- */
1512 static node *mk_splitter(const char *s, tsplitter *spl)
1520 if ((n->info & OPCLSMASK) == OC_REGEXP) {
1522 regfree(ire); // TODO: nuke ire, use re+1?
1524 if (s[0] && s[1]) { /* strlen(s) > 1 */
1525 mk_re_node(s, n, re);
1527 n->info = (uint32_t) s[0];
1533 /* use node as a regular expression. Supplied with node ptr and regex_t
1534 * storage space. Return ptr to regex (if result points to preg, it should
1535 * be later regfree'd manually
1537 static regex_t *as_regex(node *op, regex_t *preg)
1543 if ((op->info & OPCLSMASK) == OC_REGEXP) {
1544 return icase ? op->r.ire : op->l.re;
1547 s = getvar_s(evaluate(op, v));
1549 cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
1550 /* Testcase where REG_EXTENDED fails (unpaired '{'):
1551 * echo Hi | awk 'gsub("@(samp|code|file)\{","");'
1552 * gawk 3.1.5 eats this. We revert to ~REG_EXTENDED
1553 * (maybe gsub is not supposed to use REG_EXTENDED?).
1555 if (regcomp(preg, s, cflags)) {
1556 cflags &= ~REG_EXTENDED;
1557 xregcomp(preg, s, cflags);
1563 /* gradually increasing buffer.
1564 * note that we reallocate even if n == old_size,
1565 * and thus there is at least one extra allocated byte.
1567 static char* qrealloc(char *b, int n, int *size)
1569 if (!b || n >= *size) {
1570 *size = n + (n>>1) + 80;
1571 b = xrealloc(b, *size);
1576 /* resize field storage space */
1577 static void fsrealloc(int size)
1581 if (size >= maxfields) {
1583 maxfields = size + 16;
1584 Fields = xrealloc(Fields, maxfields * sizeof(Fields[0]));
1585 for (; i < maxfields; i++) {
1586 Fields[i].type = VF_SPECIAL;
1587 Fields[i].string = NULL;
1590 /* if size < nfields, clear extra field variables */
1591 for (i = size; i < nfields; i++) {
1597 static int awk_split(const char *s, node *spl, char **slist)
1602 regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1604 /* in worst case, each char would be a separate field */
1605 *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1608 c[0] = c[1] = (char)spl->info;
1610 if (*getvar_s(intvar[RS]) == '\0')
1614 if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */
1616 return n; /* "": zero fields */
1617 n++; /* at least one field will be there */
1619 l = strcspn(s, c+2); /* len till next NUL or \n */
1620 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1621 && pmatch[0].rm_so <= l
1623 l = pmatch[0].rm_so;
1624 if (pmatch[0].rm_eo == 0) {
1628 n++; /* we saw yet another delimiter */
1630 pmatch[0].rm_eo = l;
1635 /* make sure we remove *all* of the separator chars */
1638 } while (++l < pmatch[0].rm_eo);
1640 s += pmatch[0].rm_eo;
1644 if (c[0] == '\0') { /* null split */
1652 if (c[0] != ' ') { /* single-character split */
1654 c[0] = toupper(c[0]);
1655 c[1] = tolower(c[1]);
1659 while ((s1 = strpbrk(s1, c)) != NULL) {
1667 s = skip_whitespace(s);
1671 while (*s && !isspace(*s))
1678 static void split_f0(void)
1680 /* static char *fstrings; */
1681 #define fstrings (G.split_f0__fstrings)
1692 n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1695 for (i = 0; i < n; i++) {
1696 Fields[i].string = nextword(&s);
1697 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1700 /* set NF manually to avoid side effects */
1702 intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1703 intvar[NF]->number = nfields;
1707 /* perform additional actions when some internal variables changed */
1708 static void handle_special(var *v)
1712 const char *sep, *s;
1713 int sl, l, len, i, bsize;
1715 if (!(v->type & VF_SPECIAL))
1718 if (v == intvar[NF]) {
1719 n = (int)getvar_i(v);
1722 /* recalculate $0 */
1723 sep = getvar_s(intvar[OFS]);
1727 for (i = 0; i < n; i++) {
1728 s = getvar_s(&Fields[i]);
1731 memcpy(b+len, sep, sl);
1734 b = qrealloc(b, len+l+sl, &bsize);
1735 memcpy(b+len, s, l);
1740 setvar_p(intvar[F0], b);
1743 } else if (v == intvar[F0]) {
1744 is_f0_split = FALSE;
1746 } else if (v == intvar[FS]) {
1747 mk_splitter(getvar_s(v), &fsplitter);
1749 } else if (v == intvar[RS]) {
1750 mk_splitter(getvar_s(v), &rsplitter);
1752 } else if (v == intvar[IGNORECASE]) {
1756 n = getvar_i(intvar[NF]);
1757 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1758 /* right here v is invalid. Just to note... */
1762 /* step through func/builtin/etc arguments */
1763 static node *nextarg(node **pn)
1768 if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1777 static void hashwalk_init(var *v, xhash *array)
1782 walker_list *prev_walker;
1784 if (v->type & VF_WALK) {
1785 prev_walker = v->x.walker;
1790 debug_printf_walker("hashwalk_init: prev_walker:%p\n", prev_walker);
1792 w = v->x.walker = xzalloc(sizeof(*w) + array->glen + 1); /* why + 1? */
1793 debug_printf_walker(" walker@%p=%p\n", &v->x.walker, w);
1794 w->cur = w->end = w->wbuf;
1795 w->prev = prev_walker;
1796 for (i = 0; i < array->csize; i++) {
1797 hi = array->items[i];
1799 strcpy(w->end, hi->name);
1806 static int hashwalk_next(var *v)
1808 walker_list *w = v->x.walker;
1810 if (w->cur >= w->end) {
1811 walker_list *prev_walker = w->prev;
1813 debug_printf_walker("end of iteration, free(walker@%p:%p), prev_walker:%p\n", &v->x.walker, w, prev_walker);
1815 v->x.walker = prev_walker;
1819 setvar_s(v, nextword(&w->cur));
1823 /* evaluate node, return 1 when result is true, 0 otherwise */
1824 static int ptest(node *pattern)
1826 /* ptest__v is "static": to save stack space? */
1827 return istrue(evaluate(pattern, &G.ptest__v));
1830 /* read next record from stream rsm into a variable v */
1831 static int awk_getline(rstream *rsm, var *v)
1834 regmatch_t pmatch[2];
1835 int size, a, p, pp = 0;
1836 int fd, so, eo, r, rp;
1839 /* we're using our own buffer since we need access to accumulating
1842 fd = fileno(rsm->F);
1847 c = (char) rsplitter.n.info;
1851 m = qrealloc(m, 256, &size);
1858 if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1859 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1860 b, 1, pmatch, 0) == 0) {
1861 so = pmatch[0].rm_so;
1862 eo = pmatch[0].rm_eo;
1866 } else if (c != '\0') {
1867 s = strchr(b+pp, c);
1869 s = memchr(b+pp, '\0', p - pp);
1876 while (b[rp] == '\n')
1878 s = strstr(b+rp, "\n\n");
1881 while (b[eo] == '\n')
1890 memmove(m, m+a, p+1);
1895 m = qrealloc(m, a+p+128, &size);
1898 p += safe_read(fd, b+p, size-p-1);
1902 setvar_i(intvar[ERRNO], errno);
1911 c = b[so]; b[so] = '\0';
1915 c = b[eo]; b[eo] = '\0';
1916 setvar_s(intvar[RT], b+so);
1928 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1932 const char *s = format;
1934 if (int_as_int && n == (int)n) {
1935 r = snprintf(b, size, "%d", (int)n);
1937 do { c = *s; } while (c && *++s);
1938 if (strchr("diouxX", c)) {
1939 r = snprintf(b, size, format, (int)n);
1940 } else if (strchr("eEfgG", c)) {
1941 r = snprintf(b, size, format, n);
1943 syntax_error(EMSG_INV_FMT);
1949 /* formatted output into an allocated buffer, return ptr to buffer */
1950 static char *awk_printf(node *n)
1955 int i, j, incr, bsize;
1960 fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1965 while (*f && (*f != '%' || *++f == '%'))
1967 while (*f && !isalpha(*f)) {
1969 syntax_error("%*x formats are not supported");
1973 incr = (f - s) + MAXVARFMT;
1974 b = qrealloc(b, incr + i, &bsize);
1980 arg = evaluate(nextarg(&n), v);
1983 if (c == 'c' || !c) {
1984 i += sprintf(b+i, s, is_numeric(arg) ?
1985 (char)getvar_i(arg) : *getvar_s(arg));
1986 } else if (c == 's') {
1988 b = qrealloc(b, incr+i+strlen(s1), &bsize);
1989 i += sprintf(b+i, s, s1);
1991 i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1995 /* if there was an error while sprintf, return value is negative */
2002 b = xrealloc(b, i + 1);
2007 /* Common substitution routine.
2008 * Replace (nm)'th substring of (src) that matches (rn) with (repl),
2009 * store result into (dest), return number of substitutions.
2010 * If nm = 0, replace all matches.
2011 * If src or dst is NULL, use $0.
2012 * If subexp != 0, enable subexpression matching (\1-\9).
2014 static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int subexp)
2018 int match_no, residx, replen, resbufsize;
2020 regmatch_t pmatch[10];
2021 regex_t sreg, *regex;
2027 regex = as_regex(rn, &sreg);
2028 sp = getvar_s(src ? src : intvar[F0]);
2029 replen = strlen(repl);
2030 while (regexec(regex, sp, 10, pmatch, regexec_flags) == 0) {
2031 int so = pmatch[0].rm_so;
2032 int eo = pmatch[0].rm_eo;
2034 //bb_error_msg("match %u: [%u,%u] '%s'%p", match_no+1, so, eo, sp,sp);
2035 resbuf = qrealloc(resbuf, residx + eo + replen, &resbufsize);
2036 memcpy(resbuf + residx, sp, eo);
2038 if (++match_no >= nm) {
2043 residx -= (eo - so);
2045 for (s = repl; *s; s++) {
2046 char c = resbuf[residx++] = *s;
2051 if (c == '&' || (subexp && c >= '0' && c <= '9')) {
2053 residx -= ((nbs + 3) >> 1);
2060 resbuf[residx++] = c;
2062 int n = pmatch[j].rm_eo - pmatch[j].rm_so;
2063 resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize);
2064 memcpy(resbuf + residx, sp + pmatch[j].rm_so, n);
2072 regexec_flags = REG_NOTBOL;
2077 /* Empty match (e.g. "b*" will match anywhere).
2078 * Advance by one char. */
2080 //gsub(/\<b*/,"") on "abc" will reach this point, advance to "bc"
2081 //... and will erroneously match "b" even though it is NOT at the word start.
2082 //we need REG_NOTBOW but it does not exist...
2083 //TODO: if EXTRA_COMPAT=y, use GNU matching and re_search,
2084 //it should be able to do it correctly.
2085 /* Subtle: this is safe only because
2086 * qrealloc allocated at least one extra byte */
2087 resbuf[residx] = *sp;
2095 resbuf = qrealloc(resbuf, residx + strlen(sp), &resbufsize);
2096 strcpy(resbuf + residx, sp);
2098 //bb_error_msg("end sp:'%s'%p", sp,sp);
2099 setvar_p(dest ? dest : intvar[F0], resbuf);
2105 static NOINLINE int do_mktime(const char *ds)
2110 /*memset(&then, 0, sizeof(then)); - not needed */
2111 then.tm_isdst = -1; /* default is unknown */
2113 /* manpage of mktime says these fields are ints,
2114 * so we can sscanf stuff directly into them */
2115 count = sscanf(ds, "%u %u %u %u %u %u %d",
2116 &then.tm_year, &then.tm_mon, &then.tm_mday,
2117 &then.tm_hour, &then.tm_min, &then.tm_sec,
2121 || (unsigned)then.tm_mon < 1
2122 || (unsigned)then.tm_year < 1900
2128 then.tm_year -= 1900;
2130 return mktime(&then);
2133 static NOINLINE var *exec_builtin(node *op, var *res)
2135 #define tspl (G.exec_builtin__tspl)
2141 regmatch_t pmatch[2];
2150 isr = info = op->info;
2153 av[2] = av[3] = NULL;
2154 for (i = 0; i < 4 && op; i++) {
2155 an[i] = nextarg(&op);
2156 if (isr & 0x09000000)
2157 av[i] = evaluate(an[i], &tv[i]);
2158 if (isr & 0x08000000)
2159 as[i] = getvar_s(av[i]);
2164 if ((uint32_t)nargs < (info >> 30))
2165 syntax_error(EMSG_TOO_FEW_ARGS);
2171 #if ENABLE_FEATURE_AWK_LIBM
2172 setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1])));
2174 syntax_error(EMSG_NO_MATH);
2182 spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2183 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2188 n = awk_split(as[0], spl, &s);
2190 clear_array(iamarray(av[1]));
2191 for (i = 1; i <= n; i++)
2192 setari_u(av[1], i, nextword(&s));
2202 i = getvar_i(av[1]) - 1;
2207 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2210 s = xstrndup(as[0]+i, n);
2215 /* Bitwise ops must assume that operands are unsigned. GNU Awk 3.1.5:
2216 * awk '{ print or(-1,1) }' gives "4.29497e+09", not "-2.xxxe+09" */
2218 setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1]));
2222 setvar_i(res, ~getvar_i_int(av[0]));
2226 setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1]));
2230 setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1]));
2234 setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1]));
2238 setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1]));
2244 s1 = s = xstrdup(as[0]);
2246 //*s1 = (info == B_up) ? toupper(*s1) : tolower(*s1);
2247 if ((unsigned char)((*s1 | 0x20) - 'a') <= ('z' - 'a'))
2248 *s1 = (info == B_up) ? (*s1 & 0xdf) : (*s1 | 0x20);
2258 l = strlen(as[0]) - ll;
2259 if (ll > 0 && l >= 0) {
2261 char *s = strstr(as[0], as[1]);
2263 n = (s - as[0]) + 1;
2265 /* this piece of code is terribly slow and
2266 * really should be rewritten
2268 for (i = 0; i <= l; i++) {
2269 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2281 tt = getvar_i(av[1]);
2284 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2285 i = strftime(g_buf, MAXVARFMT,
2286 ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2289 setvar_s(res, g_buf);
2293 setvar_i(res, do_mktime(as[0]));
2297 re = as_regex(an[1], &sreg);
2298 n = regexec(re, as[0], 1, pmatch, 0);
2303 pmatch[0].rm_so = 0;
2304 pmatch[0].rm_eo = -1;
2306 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2307 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2308 setvar_i(res, pmatch[0].rm_so);
2314 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2318 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2322 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2332 * Evaluate node - the heart of the program. Supplied with subtree
2333 * and place where to store result. returns ptr to result.
2335 #define XC(n) ((n) >> 8)
2337 static var *evaluate(node *op, var *res)
2339 /* This procedure is recursive so we should count every byte */
2340 #define fnargs (G.evaluate__fnargs)
2341 /* seed is initialized to 1 */
2342 #define seed (G.evaluate__seed)
2343 #define sreg (G.evaluate__sreg)
2348 return setvar_s(res, NULL);
2356 } L = L; /* for compiler */
2367 opn = (opinfo & OPNMASK);
2368 g_lineno = op->lineno;
2370 debug_printf_eval("opinfo:%08x opn:%08x XC:%x\n", opinfo, opn, XC(opinfo & OPCLSMASK));
2372 /* execute inevitable things */
2373 if (opinfo & OF_RES1)
2374 L.v = evaluate(op1, v1);
2375 if (opinfo & OF_RES2)
2376 R.v = evaluate(op->r.n, v1+1);
2377 if (opinfo & OF_STR1) {
2378 L.s = getvar_s(L.v);
2379 debug_printf_eval("L.s:'%s'\n", L.s);
2381 if (opinfo & OF_STR2) {
2382 R.s = getvar_s(R.v);
2383 debug_printf_eval("R.s:'%s'\n", R.s);
2385 if (opinfo & OF_NUM1) {
2386 L_d = getvar_i(L.v);
2387 debug_printf_eval("L_d:%f\n", L_d);
2390 switch (XC(opinfo & OPCLSMASK)) {
2392 /* -- iterative node type -- */
2396 if ((op1->info & OPCLSMASK) == OC_COMMA) {
2397 /* it's range pattern */
2398 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2399 op->info |= OF_CHECKED;
2400 if (ptest(op1->r.n))
2401 op->info &= ~OF_CHECKED;
2407 op = ptest(op1) ? op->a.n : op->r.n;
2411 /* just evaluate an expression, also used as unconditional jump */
2415 /* branch, used in if-else and various loops */
2417 op = istrue(L.v) ? op->a.n : op->r.n;
2420 /* initialize for-in loop */
2421 case XC( OC_WALKINIT ):
2422 hashwalk_init(L.v, iamarray(R.v));
2425 /* get next array item */
2426 case XC( OC_WALKNEXT ):
2427 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2430 case XC( OC_PRINT ):
2431 case XC( OC_PRINTF ): {
2435 rstream *rsm = newfile(R.s);
2438 rsm->F = popen(R.s, "w");
2440 bb_perror_msg_and_die("popen");
2443 rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2449 if ((opinfo & OPCLSMASK) == OC_PRINT) {
2451 fputs(getvar_s(intvar[F0]), F);
2454 var *v = evaluate(nextarg(&op1), v1);
2455 if (v->type & VF_NUMBER) {
2456 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2460 fputs(getvar_s(v), F);
2464 fputs(getvar_s(intvar[OFS]), F);
2467 fputs(getvar_s(intvar[ORS]), F);
2469 } else { /* OC_PRINTF */
2470 char *s = awk_printf(op1);
2478 case XC( OC_DELETE ): {
2479 uint32_t info = op1->info & OPCLSMASK;
2482 if (info == OC_VAR) {
2484 } else if (info == OC_FNARG) {
2485 v = &fnargs[op1->l.aidx];
2487 syntax_error(EMSG_NOT_ARRAY);
2493 s = getvar_s(evaluate(op1->r.n, v1));
2494 hash_remove(iamarray(v), s);
2496 clear_array(iamarray(v));
2501 case XC( OC_NEWSOURCE ):
2502 g_progname = op->l.new_progname;
2505 case XC( OC_RETURN ):
2509 case XC( OC_NEXTFILE ):
2520 /* -- recursive node type -- */
2524 if (L.v == intvar[NF])
2528 case XC( OC_FNARG ):
2529 L.v = &fnargs[op->l.aidx];
2531 res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2535 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2538 case XC( OC_REGEXP ):
2540 L.s = getvar_s(intvar[F0]);
2543 case XC( OC_MATCH ):
2547 regex_t *re = as_regex(op1, &sreg);
2548 int i = regexec(re, L.s, 0, NULL, 0);
2551 setvar_i(res, (i == 0) ^ (opn == '!'));
2556 debug_printf_eval("MOVE\n");
2557 /* if source is a temporary string, jusk relink it to dest */
2558 //Disabled: if R.v is numeric but happens to have cached R.v->string,
2559 //then L.v ends up being a string, which is wrong
2560 // if (R.v == v1+1 && R.v->string) {
2561 // res = setvar_p(L.v, R.v->string);
2562 // R.v->string = NULL;
2564 res = copyvar(L.v, R.v);
2568 case XC( OC_TERNARY ):
2569 if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2570 syntax_error(EMSG_POSSIBLE_ERROR);
2571 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2574 case XC( OC_FUNC ): {
2576 const char *sv_progname;
2578 if (!op->r.f->body.first)
2579 syntax_error(EMSG_UNDEF_FUNC);
2581 vbeg = v = nvalloc(op->r.f->nargs + 1);
2583 var *arg = evaluate(nextarg(&op1), v1);
2585 v->type |= VF_CHILD;
2587 if (++v - vbeg >= op->r.f->nargs)
2593 sv_progname = g_progname;
2595 res = evaluate(op->r.f->body.first, res);
2597 g_progname = sv_progname;
2604 case XC( OC_GETLINE ):
2605 case XC( OC_PGETLINE ): {
2612 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2613 rsm->F = popen(L.s, "r");
2614 rsm->is_pipe = TRUE;
2616 rsm->F = fopen_for_read(L.s); /* not xfopen! */
2621 iF = next_input_file();
2626 setvar_i(intvar[ERRNO], errno);
2634 i = awk_getline(rsm, R.v);
2635 if (i > 0 && !op1) {
2636 incvar(intvar[FNR]);
2643 /* simple builtins */
2644 case XC( OC_FBLTIN ): {
2647 double R_d = R_d; /* for compiler */
2655 R_d = (double)rand() / (double)RAND_MAX;
2657 #if ENABLE_FEATURE_AWK_LIBM
2683 syntax_error(EMSG_NO_MATH);
2688 seed = op1 ? (unsigned)L_d : (unsigned)time(NULL);
2698 L.s = getvar_s(intvar[F0]);
2704 R_d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2705 ? (system(L.s) >> 8) : 0;
2711 } else if (L.s && *L.s) {
2721 rsm = (rstream *)hash_search(fdhash, L.s);
2723 i = rsm->is_pipe ? pclose(rsm->F) : fclose(rsm->F);
2725 hash_remove(fdhash, L.s);
2728 setvar_i(intvar[ERRNO], errno);
2736 case XC( OC_BUILTIN ):
2737 res = exec_builtin(op, res);
2740 case XC( OC_SPRINTF ):
2741 setvar_p(res, awk_printf(op1));
2744 case XC( OC_UNARY ): {
2747 Ld = R_d = getvar_i(R.v);
2774 case XC( OC_FIELD ): {
2775 int i = (int)getvar_i(R.v);
2782 res = &Fields[i - 1];
2787 /* concatenation (" ") and index joining (",") */
2788 case XC( OC_CONCAT ):
2789 case XC( OC_COMMA ): {
2790 const char *sep = "";
2791 if ((opinfo & OPCLSMASK) == OC_COMMA)
2792 sep = getvar_s(intvar[SUBSEP]);
2793 setvar_p(res, xasprintf("%s%s%s", L.s, sep, R.s));
2798 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2802 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2805 case XC( OC_BINARY ):
2806 case XC( OC_REPLACE ): {
2807 double R_d = getvar_i(R.v);
2808 debug_printf_eval("BINARY/REPLACE: R_d:%f opn:%c\n", R_d, opn);
2821 syntax_error(EMSG_DIV_BY_ZERO);
2825 #if ENABLE_FEATURE_AWK_LIBM
2826 L_d = pow(L_d, R_d);
2828 syntax_error(EMSG_NO_MATH);
2833 syntax_error(EMSG_DIV_BY_ZERO);
2834 L_d -= (int)(L_d / R_d) * R_d;
2837 debug_printf_eval("BINARY/REPLACE result:%f\n", L_d);
2838 res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : L.v, L_d);
2842 case XC( OC_COMPARE ): {
2843 int i = i; /* for compiler */
2846 if (is_numeric(L.v) && is_numeric(R.v)) {
2847 Ld = getvar_i(L.v) - getvar_i(R.v);
2849 const char *l = getvar_s(L.v);
2850 const char *r = getvar_s(R.v);
2851 Ld = icase ? strcasecmp(l, r) : strcmp(l, r);
2853 switch (opn & 0xfe) {
2864 setvar_i(res, (i == 0) ^ (opn & 1));
2869 syntax_error(EMSG_POSSIBLE_ERROR);
2871 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2873 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2887 /* -------- main & co. -------- */
2889 static int awk_exit(int r)
2900 evaluate(endseq.first, &tv);
2903 /* waiting for children */
2904 for (i = 0; i < fdhash->csize; i++) {
2905 hi = fdhash->items[i];
2907 if (hi->data.rs.F && hi->data.rs.is_pipe)
2908 pclose(hi->data.rs.F);
2916 /* if expr looks like "var=value", perform assignment and return 1,
2917 * otherwise return 0 */
2918 static int is_assignment(const char *expr)
2920 char *exprc, *s, *s0, *s1;
2922 exprc = xstrdup(expr);
2923 if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2931 *s1++ = nextchar(&s);
2934 setvar_u(newvar(exprc), s0);
2939 /* switch to next input file */
2940 static rstream *next_input_file(void)
2942 #define rsm (G.next_input_file__rsm)
2943 #define files_happen (G.next_input_file__files_happen)
2946 const char *fname, *ind;
2951 rsm.pos = rsm.adv = 0;
2954 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
2960 ind = getvar_s(incvar(intvar[ARGIND]));
2961 fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
2962 if (fname && *fname && !is_assignment(fname))
2963 F = xfopen_stdin(fname);
2967 files_happen = TRUE;
2968 setvar_s(intvar[FILENAME], fname);
2975 int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
2976 int awk_main(int argc, char **argv)
2979 char *opt_F, *opt_W;
2980 llist_t *list_v = NULL;
2981 llist_t *list_f = NULL;
2986 char *vnames = (char *)vNames; /* cheat */
2987 char *vvalues = (char *)vValues;
2991 /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
2992 * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
2993 if (ENABLE_LOCALE_SUPPORT)
2994 setlocale(LC_NUMERIC, "C");
2998 /* allocate global buffer */
2999 g_buf = xmalloc(MAXVARFMT + 1);
3001 vhash = hash_init();
3002 ahash = hash_init();
3003 fdhash = hash_init();
3004 fnhash = hash_init();
3006 /* initialize variables */
3007 for (i = 0; *vnames; i++) {
3008 intvar[i] = v = newvar(nextword(&vnames));
3009 if (*vvalues != '\377')
3010 setvar_s(v, nextword(&vvalues));
3014 if (*vnames == '*') {
3015 v->type |= VF_SPECIAL;
3020 handle_special(intvar[FS]);
3021 handle_special(intvar[RS]);
3023 newfile("/dev/stdin")->F = stdin;
3024 newfile("/dev/stdout")->F = stdout;
3025 newfile("/dev/stderr")->F = stderr;
3027 /* Huh, people report that sometimes environ is NULL. Oh well. */
3028 if (environ) for (envp = environ; *envp; envp++) {
3029 /* environ is writable, thus we don't strdup it needlessly */
3031 char *s1 = strchr(s, '=');
3034 /* Both findvar and setvar_u take const char*
3035 * as 2nd arg -> environment is not trashed */
3036 setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
3040 opt_complementary = "v::f::"; /* -v and -f can occur multiple times */
3041 opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, &opt_W);
3045 setvar_s(intvar[FS], opt_F); // -F
3046 while (list_v) { /* -v */
3047 if (!is_assignment(llist_pop(&list_v)))
3050 if (list_f) { /* -f */
3055 g_progname = llist_pop(&list_f);
3056 from_file = xfopen_stdin(g_progname);
3057 /* one byte is reserved for some trick in next_token */
3058 for (i = j = 1; j > 0; i += j) {
3059 s = xrealloc(s, i + 4096);
3060 j = fread(s + i, 1, 4094, from_file);
3064 parse_program(s + 1);
3068 } else { // no -f: take program from 1st parameter
3071 g_progname = "cmd. line";
3072 parse_program(*argv++);
3074 if (opt & 0x8) // -W
3075 bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
3077 /* fill in ARGV array */
3078 setvar_i(intvar[ARGC], argc);
3079 setari_u(intvar[ARGV], 0, "awk");
3082 setari_u(intvar[ARGV], ++i, *argv++);
3084 evaluate(beginseq.first, &tv);
3085 if (!mainseq.first && !endseq.first)
3086 awk_exit(EXIT_SUCCESS);
3088 /* input file could already be opened in BEGIN block */
3090 iF = next_input_file();
3092 /* passing through input files */
3095 setvar_i(intvar[FNR], 0);
3097 while ((i = awk_getline(iF, intvar[F0])) > 0) {
3100 incvar(intvar[FNR]);
3101 evaluate(mainseq.first, &tv);
3108 syntax_error(strerror(errno));
3110 iF = next_input_file();
3113 awk_exit(EXIT_SUCCESS);