1 /* vi: set sw=4 ts=4: */
3 * awk implementation for busybox
5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
7 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
14 /* This is a NOEXEC applet. Be very careful! */
17 /* If you comment out one of these below, it will be #defined later
18 * to perform debug printfs to stderr: */
19 #define debug_printf_walker(...) do {} while (0)
20 #define debug_printf_eval(...) do {} while (0)
22 #ifndef debug_printf_walker
23 # define debug_printf_walker(...) (fprintf(stderr, __VA_ARGS__))
25 #ifndef debug_printf_eval
26 # define debug_printf_eval(...) (fprintf(stderr, __VA_ARGS__))
35 #define VF_NUMBER 0x0001 /* 1 = primary type is number */
36 #define VF_ARRAY 0x0002 /* 1 = it's an array */
38 #define VF_CACHED 0x0100 /* 1 = num/str value has cached str/num eq */
39 #define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
40 #define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
41 #define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
42 #define VF_FSTR 0x1000 /* 1 = var::string points to fstring buffer */
43 #define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
44 #define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
46 /* these flags are static, don't change them when value is changed */
47 #define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
49 typedef struct walker_list {
52 struct walker_list *prev;
57 typedef struct var_s {
58 unsigned type; /* flags */
62 int aidx; /* func arg idx (for compilation stage) */
63 struct xhash_s *array; /* array ptr */
64 struct var_s *parent; /* for func args, ptr to actual parameter */
65 walker_list *walker; /* list of array elements (for..in) */
69 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
70 typedef struct chain_s {
73 const char *programname;
77 typedef struct func_s {
83 typedef struct rstream_s {
92 typedef struct hash_item_s {
94 struct var_s v; /* variable/array hash */
95 struct rstream_s rs; /* redirect streams hash */
96 struct func_s f; /* functions hash */
98 struct hash_item_s *next; /* next in chain */
99 char name[1]; /* really it's longer */
102 typedef struct xhash_s {
103 unsigned nel; /* num of elements */
104 unsigned csize; /* current hash size */
105 unsigned nprime; /* next hash size in PRIMES[] */
106 unsigned glen; /* summary length of item names */
107 struct hash_item_s **items;
111 typedef struct node_s {
131 /* Block of temporary variables */
132 typedef struct nvblock_s {
135 struct nvblock_s *prev;
136 struct nvblock_s *next;
140 typedef struct tsplitter_s {
145 /* simple token classes */
146 /* Order and hex values are very important!!! See next_token() */
147 #define TC_SEQSTART 1 /* ( */
148 #define TC_SEQTERM (1 << 1) /* ) */
149 #define TC_REGEXP (1 << 2) /* /.../ */
150 #define TC_OUTRDR (1 << 3) /* | > >> */
151 #define TC_UOPPOST (1 << 4) /* unary postfix operator */
152 #define TC_UOPPRE1 (1 << 5) /* unary prefix operator */
153 #define TC_BINOPX (1 << 6) /* two-opnd operator */
154 #define TC_IN (1 << 7)
155 #define TC_COMMA (1 << 8)
156 #define TC_PIPE (1 << 9) /* input redirection pipe */
157 #define TC_UOPPRE2 (1 << 10) /* unary prefix operator */
158 #define TC_ARRTERM (1 << 11) /* ] */
159 #define TC_GRPSTART (1 << 12) /* { */
160 #define TC_GRPTERM (1 << 13) /* } */
161 #define TC_SEMICOL (1 << 14)
162 #define TC_NEWLINE (1 << 15)
163 #define TC_STATX (1 << 16) /* ctl statement (for, next...) */
164 #define TC_WHILE (1 << 17)
165 #define TC_ELSE (1 << 18)
166 #define TC_BUILTIN (1 << 19)
167 #define TC_GETLINE (1 << 20)
168 #define TC_FUNCDECL (1 << 21) /* `function' `func' */
169 #define TC_BEGIN (1 << 22)
170 #define TC_END (1 << 23)
171 #define TC_EOF (1 << 24)
172 #define TC_VARIABLE (1 << 25)
173 #define TC_ARRAY (1 << 26)
174 #define TC_FUNCTION (1 << 27)
175 #define TC_STRING (1 << 28)
176 #define TC_NUMBER (1 << 29)
178 #define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
180 /* combined token classes */
181 #define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
182 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
183 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
184 | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
186 #define TC_STATEMNT (TC_STATX | TC_WHILE)
187 #define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
189 /* word tokens, cannot mean something else if not expected */
190 #define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
191 | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
193 /* discard newlines after these */
194 #define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
195 | TC_BINOP | TC_OPTERM)
197 /* what can expression begin with */
198 #define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
199 /* what can group begin with */
200 #define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
202 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
203 /* operator is inserted between them */
204 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
205 | TC_STRING | TC_NUMBER | TC_UOPPOST)
206 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
208 #define OF_RES1 0x010000
209 #define OF_RES2 0x020000
210 #define OF_STR1 0x040000
211 #define OF_STR2 0x080000
212 #define OF_NUM1 0x100000
213 #define OF_CHECKED 0x200000
215 /* combined operator flags */
218 #define xS (OF_RES2 | OF_STR2)
220 #define VV (OF_RES1 | OF_RES2)
221 #define Nx (OF_RES1 | OF_NUM1)
222 #define NV (OF_RES1 | OF_NUM1 | OF_RES2)
223 #define Sx (OF_RES1 | OF_STR1)
224 #define SV (OF_RES1 | OF_STR1 | OF_RES2)
225 #define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
227 #define OPCLSMASK 0xFF00
228 #define OPNMASK 0x007F
230 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
231 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
232 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
234 #define P(x) (x << 24)
235 #define PRIMASK 0x7F000000
236 #define PRIMASK2 0x7E000000
238 /* Operation classes */
240 #define SHIFT_TIL_THIS 0x0600
241 #define RECUR_FROM_THIS 0x1000
244 OC_DELETE = 0x0100, OC_EXEC = 0x0200, OC_NEWSOURCE = 0x0300,
245 OC_PRINT = 0x0400, OC_PRINTF = 0x0500, OC_WALKINIT = 0x0600,
247 OC_BR = 0x0700, OC_BREAK = 0x0800, OC_CONTINUE = 0x0900,
248 OC_EXIT = 0x0a00, OC_NEXT = 0x0b00, OC_NEXTFILE = 0x0c00,
249 OC_TEST = 0x0d00, OC_WALKNEXT = 0x0e00,
251 OC_BINARY = 0x1000, OC_BUILTIN = 0x1100, OC_COLON = 0x1200,
252 OC_COMMA = 0x1300, OC_COMPARE = 0x1400, OC_CONCAT = 0x1500,
253 OC_FBLTIN = 0x1600, OC_FIELD = 0x1700, OC_FNARG = 0x1800,
254 OC_FUNC = 0x1900, OC_GETLINE = 0x1a00, OC_IN = 0x1b00,
255 OC_LAND = 0x1c00, OC_LOR = 0x1d00, OC_MATCH = 0x1e00,
256 OC_MOVE = 0x1f00, OC_PGETLINE = 0x2000, OC_REGEXP = 0x2100,
257 OC_REPLACE = 0x2200, OC_RETURN = 0x2300, OC_SPRINTF = 0x2400,
258 OC_TERNARY = 0x2500, OC_UNARY = 0x2600, OC_VAR = 0x2700,
261 ST_IF = 0x3000, ST_DO = 0x3100, ST_FOR = 0x3200,
265 /* simple builtins */
267 F_in, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
268 F_ti, F_le, F_sy, F_ff, F_cl
273 B_a2, B_ix, B_ma, B_sp, B_ss, B_ti, B_mt, B_lo, B_up,
275 B_an, B_co, B_ls, B_or, B_rs, B_xo,
278 /* tokens and their corresponding info values */
280 #define NTC "\377" /* switch to next token class (tc<<1) */
283 #define OC_B OC_BUILTIN
285 static const char tokenlist[] ALIGN1 =
288 "\1/" NTC /* REGEXP */
289 "\2>>" "\1>" "\1|" NTC /* OUTRDR */
290 "\2++" "\2--" NTC /* UOPPOST */
291 "\2++" "\2--" "\1$" NTC /* UOPPRE1 */
292 "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */
293 "\2*=" "\2/=" "\2%=" "\2^="
294 "\1+" "\1-" "\3**=" "\2**"
295 "\1/" "\1%" "\1^" "\1*"
296 "\2!=" "\2>=" "\2<=" "\1>"
297 "\1<" "\2!~" "\1~" "\2&&"
298 "\2||" "\1?" "\1:" NTC
302 "\1+" "\1-" "\1!" NTC /* UOPPRE2 */
308 "\2if" "\2do" "\3for" "\5break" /* STATX */
309 "\10continue" "\6delete" "\5print"
310 "\6printf" "\4next" "\10nextfile"
311 "\6return" "\4exit" NTC
315 "\3and" "\5compl" "\6lshift" "\2or"
317 "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */
318 "\3cos" "\3exp" "\3int" "\3log"
319 "\4rand" "\3sin" "\4sqrt" "\5srand"
320 "\6gensub" "\4gsub" "\5index" "\6length"
321 "\5match" "\5split" "\7sprintf" "\3sub"
322 "\6substr" "\7systime" "\10strftime" "\6mktime"
323 "\7tolower" "\7toupper" NTC
325 "\4func" "\10function" NTC
330 static const uint32_t tokeninfo[] = {
334 xS|'a', xS|'w', xS|'|',
335 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
336 OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M',
338 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74),
339 OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
340 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/',
341 OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
342 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-',
343 OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
344 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%',
345 OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
346 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3,
347 OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
348 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!',
349 OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
350 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?',
351 OC_COLON|xx|P(67)|':',
354 OC_PGETLINE|SV|P(37),
355 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-',
356 OC_UNARY|xV|P(19)|'!',
362 ST_IF, ST_DO, ST_FOR, OC_BREAK,
363 OC_CONTINUE, OC_DELETE|Vx, OC_PRINT,
364 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
365 OC_RETURN|Vx, OC_EXIT|Nx,
369 OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
370 OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
371 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
372 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
373 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
374 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
375 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
376 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b),
377 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
384 /* internal variable names and their initial values */
385 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
387 CONVFMT, OFMT, FS, OFS,
388 ORS, RS, RT, FILENAME,
389 SUBSEP, F0, ARGIND, ARGC,
390 ARGV, ERRNO, FNR, NR,
391 NF, IGNORECASE, ENVIRON, NUM_INTERNAL_VARS
394 static const char vNames[] ALIGN1 =
395 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
396 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
397 "SUBSEP\0" "$\0*" "ARGIND\0" "ARGC\0"
398 "ARGV\0" "ERRNO\0" "FNR\0" "NR\0"
399 "NF\0*" "IGNORECASE\0*" "ENVIRON\0" "\0";
401 static const char vValues[] ALIGN1 =
402 "%.6g\0" "%.6g\0" " \0" " \0"
403 "\n\0" "\n\0" "\0" "\0"
404 "\034\0" "\0" "\377";
406 /* hash size may grow to these values */
407 #define FIRST_PRIME 61
408 static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
411 /* Globals. Split in two parts so that first one is addressed
412 * with (mostly short) negative offsets.
413 * NB: it's unsafe to put members of type "double"
414 * into globals2 (gcc may fail to align them).
418 chain beginseq, mainseq, endseq;
420 node *break_ptr, *continue_ptr;
422 xhash *vhash, *ahash, *fdhash, *fnhash;
423 const char *g_progname;
426 int maxfields; /* used in fsrealloc() only */
435 smallint is_f0_split;
438 uint32_t t_info; /* often used */
444 var *intvar[NUM_INTERNAL_VARS]; /* often used */
446 /* former statics from various functions */
447 char *split_f0__fstrings;
449 uint32_t next_token__save_tclass;
450 uint32_t next_token__save_info;
451 uint32_t next_token__ltclass;
452 smallint next_token__concat_inserted;
454 smallint next_input_file__files_happen;
455 rstream next_input_file__rsm;
457 var *evaluate__fnargs;
458 unsigned evaluate__seed;
459 regex_t evaluate__sreg;
463 tsplitter exec_builtin__tspl;
465 /* biggest and least used members go last */
466 tsplitter fsplitter, rsplitter;
468 #define G1 (ptr_to_globals[-1])
469 #define G (*(struct globals2 *)ptr_to_globals)
470 /* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
471 /*char G1size[sizeof(G1)]; - 0x74 */
472 /*char Gsize[sizeof(G)]; - 0x1c4 */
473 /* Trying to keep most of members accessible with short offsets: */
474 /*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
475 #define t_double (G1.t_double )
476 #define beginseq (G1.beginseq )
477 #define mainseq (G1.mainseq )
478 #define endseq (G1.endseq )
479 #define seq (G1.seq )
480 #define break_ptr (G1.break_ptr )
481 #define continue_ptr (G1.continue_ptr)
483 #define vhash (G1.vhash )
484 #define ahash (G1.ahash )
485 #define fdhash (G1.fdhash )
486 #define fnhash (G1.fnhash )
487 #define g_progname (G1.g_progname )
488 #define g_lineno (G1.g_lineno )
489 #define nfields (G1.nfields )
490 #define maxfields (G1.maxfields )
491 #define Fields (G1.Fields )
492 #define g_cb (G1.g_cb )
493 #define g_pos (G1.g_pos )
494 #define g_buf (G1.g_buf )
495 #define icase (G1.icase )
496 #define exiting (G1.exiting )
497 #define nextrec (G1.nextrec )
498 #define nextfile (G1.nextfile )
499 #define is_f0_split (G1.is_f0_split )
500 #define t_info (G.t_info )
501 #define t_tclass (G.t_tclass )
502 #define t_string (G.t_string )
503 #define t_lineno (G.t_lineno )
504 #define t_rollback (G.t_rollback )
505 #define intvar (G.intvar )
506 #define fsplitter (G.fsplitter )
507 #define rsplitter (G.rsplitter )
508 #define INIT_G() do { \
509 SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
510 G.next_token__ltclass = TC_OPTERM; \
511 G.evaluate__seed = 1; \
515 /* function prototypes */
516 static void handle_special(var *);
517 static node *parse_expr(uint32_t);
518 static void chain_group(void);
519 static var *evaluate(node *, var *);
520 static rstream *next_input_file(void);
521 static int fmt_num(char *, int, const char *, double, int);
522 static int awk_exit(int) NORETURN;
524 /* ---- error handling ---- */
526 static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
527 static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
528 static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
529 static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
530 static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
531 static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin";
532 static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
533 static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
534 static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
535 #if !ENABLE_FEATURE_AWK_LIBM
536 static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
539 static void zero_out_var(var *vp)
541 memset(vp, 0, sizeof(*vp));
544 static void syntax_error(const char *message) NORETURN;
545 static void syntax_error(const char *message)
547 bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
550 /* ---- hash stuff ---- */
552 static unsigned hashidx(const char *name)
557 idx = *name++ + (idx << 6) - idx;
561 /* create new hash */
562 static xhash *hash_init(void)
566 newhash = xzalloc(sizeof(*newhash));
567 newhash->csize = FIRST_PRIME;
568 newhash->items = xzalloc(FIRST_PRIME * sizeof(newhash->items[0]));
573 /* find item in hash, return ptr to data, NULL if not found */
574 static void *hash_search(xhash *hash, const char *name)
578 hi = hash->items[hashidx(name) % hash->csize];
580 if (strcmp(hi->name, name) == 0)
587 /* grow hash if it becomes too big */
588 static void hash_rebuild(xhash *hash)
590 unsigned newsize, i, idx;
591 hash_item **newitems, *hi, *thi;
593 if (hash->nprime == ARRAY_SIZE(PRIMES))
596 newsize = PRIMES[hash->nprime++];
597 newitems = xzalloc(newsize * sizeof(newitems[0]));
599 for (i = 0; i < hash->csize; i++) {
604 idx = hashidx(thi->name) % newsize;
605 thi->next = newitems[idx];
611 hash->csize = newsize;
612 hash->items = newitems;
615 /* find item in hash, add it if necessary. Return ptr to data */
616 static void *hash_find(xhash *hash, const char *name)
622 hi = hash_search(hash, name);
624 if (++hash->nel / hash->csize > 10)
627 l = strlen(name) + 1;
628 hi = xzalloc(sizeof(*hi) + l);
629 strcpy(hi->name, name);
631 idx = hashidx(name) % hash->csize;
632 hi->next = hash->items[idx];
633 hash->items[idx] = hi;
639 #define findvar(hash, name) ((var*) hash_find((hash), (name)))
640 #define newvar(name) ((var*) hash_find(vhash, (name)))
641 #define newfile(name) ((rstream*)hash_find(fdhash, (name)))
642 #define newfunc(name) ((func*) hash_find(fnhash, (name)))
644 static void hash_remove(xhash *hash, const char *name)
646 hash_item *hi, **phi;
648 phi = &hash->items[hashidx(name) % hash->csize];
651 if (strcmp(hi->name, name) == 0) {
652 hash->glen -= (strlen(name) + 1);
662 /* ------ some useful functions ------ */
664 static char *skip_spaces(char *p)
667 if (*p == '\\' && p[1] == '\n') {
670 } else if (*p != ' ' && *p != '\t') {
678 /* returns old *s, advances *s past word and terminating NUL */
679 static char *nextword(char **s)
682 while (*(*s)++ != '\0')
687 static char nextchar(char **s)
694 c = bb_process_escape_sequence((const char**)s);
695 if (c == '\\' && *s == pps)
700 static ALWAYS_INLINE int isalnum_(int c)
702 return (isalnum(c) || c == '_');
705 static double my_strtod(char **pp)
710 /* Might be hex or octal integer: 0x123abc or 07777 */
711 char c = (cp[1] | 0x20);
712 if (c == 'x' || isdigit(cp[1])) {
713 unsigned long long ull = strtoull(cp, pp, 0);
717 if (!isdigit(c) && c != '.')
719 /* else: it may be a floating number. Examples:
720 * 009.123 (*pp points to '9')
721 * 000.123 (*pp points to '.')
722 * fall through to strtod.
727 return strtod(cp, pp);
730 /* -------- working with variables (set/get/copy/etc) -------- */
732 static xhash *iamarray(var *v)
736 while (a->type & VF_CHILD)
739 if (!(a->type & VF_ARRAY)) {
741 a->x.array = hash_init();
746 static void clear_array(xhash *array)
751 for (i = 0; i < array->csize; i++) {
752 hi = array->items[i];
756 free(thi->data.v.string);
759 array->items[i] = NULL;
761 array->glen = array->nel = 0;
764 /* clear a variable */
765 static var *clrvar(var *v)
767 if (!(v->type & VF_FSTR))
770 v->type &= VF_DONTTOUCH;
776 /* assign string value to variable */
777 static var *setvar_p(var *v, char *value)
785 /* same as setvar_p but make a copy of string */
786 static var *setvar_s(var *v, const char *value)
788 return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
791 /* same as setvar_s but sets USER flag */
792 static var *setvar_u(var *v, const char *value)
794 v = setvar_s(v, value);
799 /* set array element to user string */
800 static void setari_u(var *a, int idx, const char *s)
804 v = findvar(iamarray(a), itoa(idx));
808 /* assign numeric value to variable */
809 static var *setvar_i(var *v, double value)
812 v->type |= VF_NUMBER;
818 static const char *getvar_s(var *v)
820 /* if v is numeric and has no cached string, convert it to string */
821 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
822 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
823 v->string = xstrdup(g_buf);
824 v->type |= VF_CACHED;
826 return (v->string == NULL) ? "" : v->string;
829 static double getvar_i(var *v)
833 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
837 debug_printf_eval("getvar_i: '%s'->", s);
838 v->number = my_strtod(&s);
839 debug_printf_eval("%f (s:'%s')\n", v->number, s);
840 if (v->type & VF_USER) {
846 debug_printf_eval("getvar_i: '%s'->zero\n", s);
849 v->type |= VF_CACHED;
851 debug_printf_eval("getvar_i: %f\n", v->number);
855 /* Used for operands of bitwise ops */
856 static unsigned long getvar_i_int(var *v)
858 double d = getvar_i(v);
860 /* Casting doubles to longs is undefined for values outside
861 * of target type range. Try to widen it as much as possible */
863 return (unsigned long)d;
864 /* Why? Think about d == -4294967295.0 (assuming 32bit longs) */
865 return - (long) (unsigned long) (-d);
868 static var *copyvar(var *dest, const var *src)
872 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
873 debug_printf_eval("copyvar: number:%f string:'%s'\n", src->number, src->string);
874 dest->number = src->number;
876 dest->string = xstrdup(src->string);
878 handle_special(dest);
882 static var *incvar(var *v)
884 return setvar_i(v, getvar_i(v) + 1.0);
887 /* return true if v is number or numeric string */
888 static int is_numeric(var *v)
891 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
894 /* return 1 when value of v corresponds to true, 0 otherwise */
895 static int istrue(var *v)
898 return (v->number != 0);
899 return (v->string && v->string[0]);
902 /* temporary variables allocator. Last allocated should be first freed */
903 static var *nvalloc(int n)
911 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size)
917 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
918 g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
920 g_cb->pos = g_cb->nv;
922 /*g_cb->next = NULL; - xzalloc did it */
930 while (v < g_cb->pos) {
939 static void nvfree(var *v)
943 if (v < g_cb->nv || v >= g_cb->pos)
944 syntax_error(EMSG_INTERNAL_ERROR);
946 for (p = v; p < g_cb->pos; p++) {
947 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
948 clear_array(iamarray(p));
949 free(p->x.array->items);
952 if (p->type & VF_WALK) {
954 walker_list *w = p->x.walker;
955 debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker);
959 debug_printf_walker(" free(%p)\n", w);
968 while (g_cb->prev && g_cb->pos == g_cb->nv) {
973 /* ------- awk program text parsing ------- */
975 /* Parse next token pointed by global pos, place results into global ttt.
976 * If token isn't expected, give away. Return token class
978 static uint32_t next_token(uint32_t expected)
980 #define concat_inserted (G.next_token__concat_inserted)
981 #define save_tclass (G.next_token__save_tclass)
982 #define save_info (G.next_token__save_info)
983 /* Initialized to TC_OPTERM: */
984 #define ltclass (G.next_token__ltclass)
995 } else if (concat_inserted) {
996 concat_inserted = FALSE;
997 t_tclass = save_tclass;
1004 g_lineno = t_lineno;
1006 while (*p != '\n' && *p != '\0')
1015 } else if (*p == '\"') {
1018 while (*p != '\"') {
1020 if (*p == '\0' || *p == '\n')
1021 syntax_error(EMSG_UNEXP_EOS);
1022 *s++ = nextchar(&pp);
1029 } else if ((expected & TC_REGEXP) && *p == '/') {
1033 if (*p == '\0' || *p == '\n')
1034 syntax_error(EMSG_UNEXP_EOS);
1038 s[-1] = bb_process_escape_sequence((const char **)&pp);
1051 } else if (*p == '.' || isdigit(*p)) {
1054 t_double = my_strtod(&pp);
1057 syntax_error(EMSG_UNEXP_TOKEN);
1061 /* search for something known */
1071 /* if token class is expected, token
1072 * matches and it's not a longer word,
1073 * then this is what we are looking for
1075 if ((tc & (expected | TC_WORD | TC_NEWLINE))
1076 && *tl == *p && strncmp(p, tl, l) == 0
1077 && !((tc & TC_WORD) && isalnum_(p[l]))
1088 /* it's a name (var/array/function),
1089 * otherwise it's something wrong
1092 syntax_error(EMSG_UNEXP_TOKEN);
1095 while (isalnum_(*++p)) {
1100 /* also consume whitespace between functionname and bracket */
1101 if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY))
1115 /* skipping newlines in some cases */
1116 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1119 /* insert concatenation operator when needed */
1120 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1121 concat_inserted = TRUE;
1125 t_info = OC_CONCAT | SS | P(35);
1132 /* Are we ready for this? */
1133 if (!(ltclass & expected))
1134 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1135 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1138 #undef concat_inserted
1144 static void rollback_token(void)
1149 static node *new_node(uint32_t info)
1153 n = xzalloc(sizeof(node));
1155 n->lineno = g_lineno;
1159 static void mk_re_node(const char *s, node *n, regex_t *re)
1161 n->info = OC_REGEXP;
1164 xregcomp(re, s, REG_EXTENDED);
1165 xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1168 static node *condition(void)
1170 next_token(TC_SEQSTART);
1171 return parse_expr(TC_SEQTERM);
1174 /* parse expression terminated by given argument, return ptr
1175 * to built subtree. Terminator is eaten by parse_expr */
1176 static node *parse_expr(uint32_t iexp)
1185 sn.r.n = glptr = NULL;
1186 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1188 while (!((tc = next_token(xtc)) & iexp)) {
1189 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1190 /* input redirection (<) attached to glptr node */
1191 cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1193 xtc = TC_OPERAND | TC_UOPPRE;
1196 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1197 /* for binary and postfix-unary operators, jump back over
1198 * previous operators with higher priority */
1200 while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1201 || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON))
1205 if ((t_info & OPCLSMASK) == OC_TERNARY)
1207 cn = vn->a.n->r.n = new_node(t_info);
1209 if (tc & TC_BINOP) {
1211 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1212 if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1214 next_token(TC_GETLINE);
1215 /* give maximum priority to this pipe */
1216 cn->info &= ~PRIMASK;
1217 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1221 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1226 /* for operands and prefix-unary operators, attach them
1229 cn = vn->r.n = new_node(t_info);
1231 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1232 if (tc & (TC_OPERAND | TC_REGEXP)) {
1233 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1234 /* one should be very careful with switch on tclass -
1235 * only simple tclasses should be used! */
1240 v = hash_search(ahash, t_string);
1242 cn->info = OC_FNARG;
1243 cn->l.aidx = v->x.aidx;
1245 cn->l.v = newvar(t_string);
1247 if (tc & TC_ARRAY) {
1249 cn->r.n = parse_expr(TC_ARRTERM);
1256 v = cn->l.v = xzalloc(sizeof(var));
1258 setvar_i(v, t_double);
1260 setvar_s(v, t_string);
1264 mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1269 cn->r.f = newfunc(t_string);
1270 cn->l.n = condition();
1274 cn = vn->r.n = parse_expr(TC_SEQTERM);
1280 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1284 cn->l.n = condition();
1293 /* add node to chain. Return ptr to alloc'd node */
1294 static node *chain_node(uint32_t info)
1299 seq->first = seq->last = new_node(0);
1301 if (seq->programname != g_progname) {
1302 seq->programname = g_progname;
1303 n = chain_node(OC_NEWSOURCE);
1304 n->l.new_progname = xstrdup(g_progname);
1309 seq->last = n->a.n = new_node(OC_DONE);
1314 static void chain_expr(uint32_t info)
1318 n = chain_node(info);
1319 n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1320 if (t_tclass & TC_GRPTERM)
1324 static node *chain_loop(node *nn)
1326 node *n, *n2, *save_brk, *save_cont;
1328 save_brk = break_ptr;
1329 save_cont = continue_ptr;
1331 n = chain_node(OC_BR | Vx);
1332 continue_ptr = new_node(OC_EXEC);
1333 break_ptr = new_node(OC_EXEC);
1335 n2 = chain_node(OC_EXEC | Vx);
1338 continue_ptr->a.n = n2;
1339 break_ptr->a.n = n->r.n = seq->last;
1341 continue_ptr = save_cont;
1342 break_ptr = save_brk;
1347 /* parse group and attach it to chain */
1348 static void chain_group(void)
1354 c = next_token(TC_GRPSEQ);
1355 } while (c & TC_NEWLINE);
1357 if (c & TC_GRPSTART) {
1358 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1359 if (t_tclass & TC_NEWLINE)
1364 } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1366 chain_expr(OC_EXEC | Vx);
1367 } else { /* TC_STATEMNT */
1368 switch (t_info & OPCLSMASK) {
1370 n = chain_node(OC_BR | Vx);
1371 n->l.n = condition();
1373 n2 = chain_node(OC_EXEC);
1375 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1377 n2->a.n = seq->last;
1385 n = chain_loop(NULL);
1390 n2 = chain_node(OC_EXEC);
1391 n = chain_loop(NULL);
1393 next_token(TC_WHILE);
1394 n->l.n = condition();
1398 next_token(TC_SEQSTART);
1399 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1400 if (t_tclass & TC_SEQTERM) { /* for-in */
1401 if ((n2->info & OPCLSMASK) != OC_IN)
1402 syntax_error(EMSG_UNEXP_TOKEN);
1403 n = chain_node(OC_WALKINIT | VV);
1406 n = chain_loop(NULL);
1407 n->info = OC_WALKNEXT | Vx;
1409 } else { /* for (;;) */
1410 n = chain_node(OC_EXEC | Vx);
1412 n2 = parse_expr(TC_SEMICOL);
1413 n3 = parse_expr(TC_SEQTERM);
1423 n = chain_node(t_info);
1424 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1425 if (t_tclass & TC_OUTRDR) {
1427 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1429 if (t_tclass & TC_GRPTERM)
1434 n = chain_node(OC_EXEC);
1439 n = chain_node(OC_EXEC);
1440 n->a.n = continue_ptr;
1443 /* delete, next, nextfile, return, exit */
1450 static void parse_program(char *p)
1459 while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1460 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1462 if (tclass & TC_OPTERM)
1466 if (tclass & TC_BEGIN) {
1470 } else if (tclass & TC_END) {
1474 } else if (tclass & TC_FUNCDECL) {
1475 next_token(TC_FUNCTION);
1477 f = newfunc(t_string);
1478 f->body.first = NULL;
1480 while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1481 v = findvar(ahash, t_string);
1482 v->x.aidx = f->nargs++;
1484 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1491 } else if (tclass & TC_OPSEQ) {
1493 cn = chain_node(OC_TEST);
1494 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1495 if (t_tclass & TC_GRPSTART) {
1499 chain_node(OC_PRINT);
1501 cn->r.n = mainseq.last;
1503 } else /* if (tclass & TC_GRPSTART) */ {
1511 /* -------- program execution part -------- */
1513 static node *mk_splitter(const char *s, tsplitter *spl)
1521 if ((n->info & OPCLSMASK) == OC_REGEXP) {
1523 regfree(ire); // TODO: nuke ire, use re+1?
1525 if (strlen(s) > 1) {
1526 mk_re_node(s, n, re);
1528 n->info = (uint32_t) *s;
1534 /* use node as a regular expression. Supplied with node ptr and regex_t
1535 * storage space. Return ptr to regex (if result points to preg, it should
1536 * be later regfree'd manually
1538 static regex_t *as_regex(node *op, regex_t *preg)
1544 if ((op->info & OPCLSMASK) == OC_REGEXP) {
1545 return icase ? op->r.ire : op->l.re;
1548 s = getvar_s(evaluate(op, v));
1550 cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
1551 /* Testcase where REG_EXTENDED fails (unpaired '{'):
1552 * echo Hi | awk 'gsub("@(samp|code|file)\{","");'
1553 * gawk 3.1.5 eats this. We revert to ~REG_EXTENDED
1554 * (maybe gsub is not supposed to use REG_EXTENDED?).
1556 if (regcomp(preg, s, cflags)) {
1557 cflags &= ~REG_EXTENDED;
1558 xregcomp(preg, s, cflags);
1564 /* gradually increasing buffer.
1565 * note that we reallocate even if n == old_size,
1566 * and thus there is at least one extra allocated byte.
1568 static char* qrealloc(char *b, int n, int *size)
1570 if (!b || n >= *size) {
1571 *size = n + (n>>1) + 80;
1572 b = xrealloc(b, *size);
1577 /* resize field storage space */
1578 static void fsrealloc(int size)
1582 if (size >= maxfields) {
1584 maxfields = size + 16;
1585 Fields = xrealloc(Fields, maxfields * sizeof(var));
1586 for (; i < maxfields; i++) {
1587 Fields[i].type = VF_SPECIAL;
1588 Fields[i].string = NULL;
1592 if (size < nfields) {
1593 for (i = size; i < nfields; i++) {
1600 static int awk_split(const char *s, node *spl, char **slist)
1605 regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1607 /* in worst case, each char would be a separate field */
1608 *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1611 c[0] = c[1] = (char)spl->info;
1613 if (*getvar_s(intvar[RS]) == '\0')
1616 if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */
1618 return n; /* "": zero fields */
1619 n++; /* at least one field will be there */
1621 l = strcspn(s, c+2); /* len till next NUL or \n */
1622 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1623 && pmatch[0].rm_so <= l
1625 l = pmatch[0].rm_so;
1626 if (pmatch[0].rm_eo == 0) {
1630 n++; /* we saw yet another delimiter */
1632 pmatch[0].rm_eo = l;
1637 /* make sure we remove *all* of the separator chars */
1640 } while (++l < pmatch[0].rm_eo);
1642 s += pmatch[0].rm_eo;
1646 if (c[0] == '\0') { /* null split */
1654 if (c[0] != ' ') { /* single-character split */
1656 c[0] = toupper(c[0]);
1657 c[1] = tolower(c[1]);
1661 while ((s1 = strpbrk(s1, c))) {
1669 s = skip_whitespace(s);
1673 while (*s && !isspace(*s))
1680 static void split_f0(void)
1682 /* static char *fstrings; */
1683 #define fstrings (G.split_f0__fstrings)
1694 n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1697 for (i = 0; i < n; i++) {
1698 Fields[i].string = nextword(&s);
1699 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1702 /* set NF manually to avoid side effects */
1704 intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1705 intvar[NF]->number = nfields;
1709 /* perform additional actions when some internal variables changed */
1710 static void handle_special(var *v)
1714 const char *sep, *s;
1715 int sl, l, len, i, bsize;
1717 if (!(v->type & VF_SPECIAL))
1720 if (v == intvar[NF]) {
1721 n = (int)getvar_i(v);
1724 /* recalculate $0 */
1725 sep = getvar_s(intvar[OFS]);
1729 for (i = 0; i < n; i++) {
1730 s = getvar_s(&Fields[i]);
1733 memcpy(b+len, sep, sl);
1736 b = qrealloc(b, len+l+sl, &bsize);
1737 memcpy(b+len, s, l);
1742 setvar_p(intvar[F0], b);
1745 } else if (v == intvar[F0]) {
1746 is_f0_split = FALSE;
1748 } else if (v == intvar[FS]) {
1749 mk_splitter(getvar_s(v), &fsplitter);
1751 } else if (v == intvar[RS]) {
1752 mk_splitter(getvar_s(v), &rsplitter);
1754 } else if (v == intvar[IGNORECASE]) {
1758 n = getvar_i(intvar[NF]);
1759 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1760 /* right here v is invalid. Just to note... */
1764 /* step through func/builtin/etc arguments */
1765 static node *nextarg(node **pn)
1770 if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1779 static void hashwalk_init(var *v, xhash *array)
1784 walker_list *prev_walker;
1786 if (v->type & VF_WALK) {
1787 prev_walker = v->x.walker;
1792 debug_printf_walker("hashwalk_init: prev_walker:%p\n", prev_walker);
1794 w = v->x.walker = xzalloc(sizeof(*w) + array->glen + 1); /* why + 1? */
1795 debug_printf_walker(" walker@%p=%p\n", &v->x.walker, w);
1796 w->cur = w->end = w->wbuf;
1797 w->prev = prev_walker;
1798 for (i = 0; i < array->csize; i++) {
1799 hi = array->items[i];
1801 strcpy(w->end, hi->name);
1808 static int hashwalk_next(var *v)
1810 walker_list *w = v->x.walker;
1812 if (w->cur >= w->end) {
1813 walker_list *prev_walker = w->prev;
1815 debug_printf_walker("end of iteration, free(walker@%p:%p), prev_walker:%p\n", &v->x.walker, w, prev_walker);
1817 v->x.walker = prev_walker;
1821 setvar_s(v, nextword(&w->cur));
1825 /* evaluate node, return 1 when result is true, 0 otherwise */
1826 static int ptest(node *pattern)
1828 /* ptest__v is "static": to save stack space? */
1829 return istrue(evaluate(pattern, &G.ptest__v));
1832 /* read next record from stream rsm into a variable v */
1833 static int awk_getline(rstream *rsm, var *v)
1836 regmatch_t pmatch[2];
1837 int size, a, p, pp = 0;
1838 int fd, so, eo, r, rp;
1841 /* we're using our own buffer since we need access to accumulating
1844 fd = fileno(rsm->F);
1849 c = (char) rsplitter.n.info;
1853 m = qrealloc(m, 256, &size);
1860 if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1861 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1862 b, 1, pmatch, 0) == 0) {
1863 so = pmatch[0].rm_so;
1864 eo = pmatch[0].rm_eo;
1868 } else if (c != '\0') {
1869 s = strchr(b+pp, c);
1871 s = memchr(b+pp, '\0', p - pp);
1878 while (b[rp] == '\n')
1880 s = strstr(b+rp, "\n\n");
1883 while (b[eo] == '\n')
1892 memmove(m, m+a, p+1);
1897 m = qrealloc(m, a+p+128, &size);
1900 p += safe_read(fd, b+p, size-p-1);
1904 setvar_i(intvar[ERRNO], errno);
1913 c = b[so]; b[so] = '\0';
1917 c = b[eo]; b[eo] = '\0';
1918 setvar_s(intvar[RT], b+so);
1930 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1934 const char *s = format;
1936 if (int_as_int && n == (int)n) {
1937 r = snprintf(b, size, "%d", (int)n);
1939 do { c = *s; } while (c && *++s);
1940 if (strchr("diouxX", c)) {
1941 r = snprintf(b, size, format, (int)n);
1942 } else if (strchr("eEfgG", c)) {
1943 r = snprintf(b, size, format, n);
1945 syntax_error(EMSG_INV_FMT);
1951 /* formatted output into an allocated buffer, return ptr to buffer */
1952 static char *awk_printf(node *n)
1957 int i, j, incr, bsize;
1962 fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1967 while (*f && (*f != '%' || *++f == '%'))
1969 while (*f && !isalpha(*f)) {
1971 syntax_error("%*x formats are not supported");
1975 incr = (f - s) + MAXVARFMT;
1976 b = qrealloc(b, incr + i, &bsize);
1982 arg = evaluate(nextarg(&n), v);
1985 if (c == 'c' || !c) {
1986 i += sprintf(b+i, s, is_numeric(arg) ?
1987 (char)getvar_i(arg) : *getvar_s(arg));
1988 } else if (c == 's') {
1990 b = qrealloc(b, incr+i+strlen(s1), &bsize);
1991 i += sprintf(b+i, s, s1);
1993 i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1997 /* if there was an error while sprintf, return value is negative */
2004 b = xrealloc(b, i + 1);
2009 /* Common substitution routine.
2010 * Replace (nm)'th substring of (src) that matches (rn) with (repl),
2011 * store result into (dest), return number of substitutions.
2012 * If nm = 0, replace all matches.
2013 * If src or dst is NULL, use $0.
2014 * If subexp != 0, enable subexpression matching (\1-\9).
2016 static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int subexp)
2020 int match_no, residx, replen, resbufsize;
2022 regmatch_t pmatch[10];
2023 regex_t sreg, *regex;
2029 regex = as_regex(rn, &sreg);
2030 sp = getvar_s(src ? src : intvar[F0]);
2031 replen = strlen(repl);
2032 while (regexec(regex, sp, 10, pmatch, regexec_flags) == 0) {
2033 int so = pmatch[0].rm_so;
2034 int eo = pmatch[0].rm_eo;
2036 //bb_error_msg("match %u: [%u,%u] '%s'%p", match_no+1, so, eo, sp,sp);
2037 resbuf = qrealloc(resbuf, residx + eo + replen, &resbufsize);
2038 memcpy(resbuf + residx, sp, eo);
2040 if (++match_no >= nm) {
2045 residx -= (eo - so);
2047 for (s = repl; *s; s++) {
2048 char c = resbuf[residx++] = *s;
2053 if (c == '&' || (subexp && c >= '0' && c <= '9')) {
2055 residx -= ((nbs + 3) >> 1);
2062 resbuf[residx++] = c;
2064 int n = pmatch[j].rm_eo - pmatch[j].rm_so;
2065 resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize);
2066 memcpy(resbuf + residx, sp + pmatch[j].rm_so, n);
2074 regexec_flags = REG_NOTBOL;
2079 /* Empty match (e.g. "b*" will match anywhere).
2080 * Advance by one char. */
2082 //gsub(/\<b*/,"") on "abc" will reach this point, advance to "bc"
2083 //... and will erroneously match "b" even though it is NOT at the word start.
2084 //we need REG_NOTBOW but it does not exist...
2085 //TODO: if EXTRA_COMPAT=y, use GNU matching and re_search,
2086 //it should be able to do it correctly.
2087 /* Subtle: this is safe only because
2088 * qrealloc allocated at least one extra byte */
2089 resbuf[residx] = *sp;
2097 resbuf = qrealloc(resbuf, residx + strlen(sp), &resbufsize);
2098 strcpy(resbuf + residx, sp);
2100 //bb_error_msg("end sp:'%s'%p", sp,sp);
2101 setvar_p(dest ? dest : intvar[F0], resbuf);
2107 static NOINLINE int do_mktime(const char *ds)
2112 /*memset(&then, 0, sizeof(then)); - not needed */
2113 then.tm_isdst = -1; /* default is unknown */
2115 /* manpage of mktime says these fields are ints,
2116 * so we can sscanf stuff directly into them */
2117 count = sscanf(ds, "%u %u %u %u %u %u %d",
2118 &then.tm_year, &then.tm_mon, &then.tm_mday,
2119 &then.tm_hour, &then.tm_min, &then.tm_sec,
2123 || (unsigned)then.tm_mon < 1
2124 || (unsigned)then.tm_year < 1900
2130 then.tm_year -= 1900;
2132 return mktime(&then);
2135 static NOINLINE var *exec_builtin(node *op, var *res)
2137 #define tspl (G.exec_builtin__tspl)
2143 regmatch_t pmatch[2];
2152 isr = info = op->info;
2155 av[2] = av[3] = NULL;
2156 for (i = 0; i < 4 && op; i++) {
2157 an[i] = nextarg(&op);
2158 if (isr & 0x09000000)
2159 av[i] = evaluate(an[i], &tv[i]);
2160 if (isr & 0x08000000)
2161 as[i] = getvar_s(av[i]);
2166 if ((uint32_t)nargs < (info >> 30))
2167 syntax_error(EMSG_TOO_FEW_ARGS);
2173 #if ENABLE_FEATURE_AWK_LIBM
2174 setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1])));
2176 syntax_error(EMSG_NO_MATH);
2184 spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2185 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2190 n = awk_split(as[0], spl, &s);
2192 clear_array(iamarray(av[1]));
2193 for (i = 1; i <= n; i++)
2194 setari_u(av[1], i, nextword(&s));
2204 i = getvar_i(av[1]) - 1;
2209 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2212 s = xstrndup(as[0]+i, n);
2217 /* Bitwise ops must assume that operands are unsigned. GNU Awk 3.1.5:
2218 * awk '{ print or(-1,1) }' gives "4.29497e+09", not "-2.xxxe+09" */
2220 setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1]));
2224 setvar_i(res, ~getvar_i_int(av[0]));
2228 setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1]));
2232 setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1]));
2236 setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1]));
2240 setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1]));
2246 s1 = s = xstrdup(as[0]);
2248 //*s1 = (info == B_up) ? toupper(*s1) : tolower(*s1);
2249 if ((unsigned char)((*s1 | 0x20) - 'a') <= ('z' - 'a'))
2250 *s1 = (info == B_up) ? (*s1 & 0xdf) : (*s1 | 0x20);
2260 l = strlen(as[0]) - ll;
2261 if (ll > 0 && l >= 0) {
2263 char *s = strstr(as[0], as[1]);
2265 n = (s - as[0]) + 1;
2267 /* this piece of code is terribly slow and
2268 * really should be rewritten
2270 for (i = 0; i <= l; i++) {
2271 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2283 tt = getvar_i(av[1]);
2286 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2287 i = strftime(g_buf, MAXVARFMT,
2288 ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2291 setvar_s(res, g_buf);
2295 setvar_i(res, do_mktime(as[0]));
2299 re = as_regex(an[1], &sreg);
2300 n = regexec(re, as[0], 1, pmatch, 0);
2305 pmatch[0].rm_so = 0;
2306 pmatch[0].rm_eo = -1;
2308 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2309 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2310 setvar_i(res, pmatch[0].rm_so);
2316 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2320 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2324 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2334 * Evaluate node - the heart of the program. Supplied with subtree
2335 * and place where to store result. returns ptr to result.
2337 #define XC(n) ((n) >> 8)
2339 static var *evaluate(node *op, var *res)
2341 /* This procedure is recursive so we should count every byte */
2342 #define fnargs (G.evaluate__fnargs)
2343 /* seed is initialized to 1 */
2344 #define seed (G.evaluate__seed)
2345 #define sreg (G.evaluate__sreg)
2350 return setvar_s(res, NULL);
2358 } L = L; /* for compiler */
2369 opn = (opinfo & OPNMASK);
2370 g_lineno = op->lineno;
2372 debug_printf_eval("opinfo:%08x opn:%08x XC:%x\n", opinfo, opn, XC(opinfo & OPCLSMASK));
2374 /* execute inevitable things */
2375 if (opinfo & OF_RES1)
2376 L.v = evaluate(op1, v1);
2377 if (opinfo & OF_RES2)
2378 R.v = evaluate(op->r.n, v1+1);
2379 if (opinfo & OF_STR1) {
2380 L.s = getvar_s(L.v);
2381 debug_printf_eval("L.s:'%s'\n", L.s);
2383 if (opinfo & OF_STR2) {
2384 R.s = getvar_s(R.v);
2385 debug_printf_eval("R.s:'%s'\n", R.s);
2387 if (opinfo & OF_NUM1) {
2388 L_d = getvar_i(L.v);
2389 debug_printf_eval("L_d:%f\n", L_d);
2392 switch (XC(opinfo & OPCLSMASK)) {
2394 /* -- iterative node type -- */
2398 if ((op1->info & OPCLSMASK) == OC_COMMA) {
2399 /* it's range pattern */
2400 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2401 op->info |= OF_CHECKED;
2402 if (ptest(op1->r.n))
2403 op->info &= ~OF_CHECKED;
2409 op = ptest(op1) ? op->a.n : op->r.n;
2413 /* just evaluate an expression, also used as unconditional jump */
2417 /* branch, used in if-else and various loops */
2419 op = istrue(L.v) ? op->a.n : op->r.n;
2422 /* initialize for-in loop */
2423 case XC( OC_WALKINIT ):
2424 hashwalk_init(L.v, iamarray(R.v));
2427 /* get next array item */
2428 case XC( OC_WALKNEXT ):
2429 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2432 case XC( OC_PRINT ):
2433 case XC( OC_PRINTF ): {
2437 rstream *rsm = newfile(R.s);
2440 rsm->F = popen(R.s, "w");
2442 bb_perror_msg_and_die("popen");
2445 rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2451 if ((opinfo & OPCLSMASK) == OC_PRINT) {
2453 fputs(getvar_s(intvar[F0]), F);
2456 var *v = evaluate(nextarg(&op1), v1);
2457 if (v->type & VF_NUMBER) {
2458 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2462 fputs(getvar_s(v), F);
2466 fputs(getvar_s(intvar[OFS]), F);
2469 fputs(getvar_s(intvar[ORS]), F);
2471 } else { /* OC_PRINTF */
2472 char *s = awk_printf(op1);
2480 case XC( OC_DELETE ): {
2481 uint32_t info = op1->info & OPCLSMASK;
2484 if (info == OC_VAR) {
2486 } else if (info == OC_FNARG) {
2487 v = &fnargs[op1->l.aidx];
2489 syntax_error(EMSG_NOT_ARRAY);
2495 s = getvar_s(evaluate(op1->r.n, v1));
2496 hash_remove(iamarray(v), s);
2498 clear_array(iamarray(v));
2503 case XC( OC_NEWSOURCE ):
2504 g_progname = op->l.new_progname;
2507 case XC( OC_RETURN ):
2511 case XC( OC_NEXTFILE ):
2522 /* -- recursive node type -- */
2526 if (L.v == intvar[NF])
2530 case XC( OC_FNARG ):
2531 L.v = &fnargs[op->l.aidx];
2533 res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2537 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2540 case XC( OC_REGEXP ):
2542 L.s = getvar_s(intvar[F0]);
2545 case XC( OC_MATCH ):
2549 regex_t *re = as_regex(op1, &sreg);
2550 int i = regexec(re, L.s, 0, NULL, 0);
2553 setvar_i(res, (i == 0) ^ (opn == '!'));
2558 debug_printf_eval("MOVE\n");
2559 /* if source is a temporary string, jusk relink it to dest */
2560 //Disabled: if R.v is numeric but happens to have cached R.v->string,
2561 //then L.v ends up being a string, which is wrong
2562 // if (R.v == v1+1 && R.v->string) {
2563 // res = setvar_p(L.v, R.v->string);
2564 // R.v->string = NULL;
2566 res = copyvar(L.v, R.v);
2570 case XC( OC_TERNARY ):
2571 if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2572 syntax_error(EMSG_POSSIBLE_ERROR);
2573 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2576 case XC( OC_FUNC ): {
2578 const char *sv_progname;
2580 if (!op->r.f->body.first)
2581 syntax_error(EMSG_UNDEF_FUNC);
2583 vbeg = v = nvalloc(op->r.f->nargs + 1);
2585 var *arg = evaluate(nextarg(&op1), v1);
2587 v->type |= VF_CHILD;
2589 if (++v - vbeg >= op->r.f->nargs)
2595 sv_progname = g_progname;
2597 res = evaluate(op->r.f->body.first, res);
2599 g_progname = sv_progname;
2606 case XC( OC_GETLINE ):
2607 case XC( OC_PGETLINE ): {
2614 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2615 rsm->F = popen(L.s, "r");
2616 rsm->is_pipe = TRUE;
2618 rsm->F = fopen_for_read(L.s); /* not xfopen! */
2623 iF = next_input_file();
2628 setvar_i(intvar[ERRNO], errno);
2636 i = awk_getline(rsm, R.v);
2637 if (i > 0 && !op1) {
2638 incvar(intvar[FNR]);
2645 /* simple builtins */
2646 case XC( OC_FBLTIN ): {
2649 double R_d = R_d; /* for compiler */
2657 R_d = (double)rand() / (double)RAND_MAX;
2659 #if ENABLE_FEATURE_AWK_LIBM
2685 syntax_error(EMSG_NO_MATH);
2690 seed = op1 ? (unsigned)L_d : (unsigned)time(NULL);
2700 L.s = getvar_s(intvar[F0]);
2706 R_d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2707 ? (system(L.s) >> 8) : 0;
2713 } else if (L.s && *L.s) {
2723 rsm = (rstream *)hash_search(fdhash, L.s);
2725 i = rsm->is_pipe ? pclose(rsm->F) : fclose(rsm->F);
2727 hash_remove(fdhash, L.s);
2730 setvar_i(intvar[ERRNO], errno);
2738 case XC( OC_BUILTIN ):
2739 res = exec_builtin(op, res);
2742 case XC( OC_SPRINTF ):
2743 setvar_p(res, awk_printf(op1));
2746 case XC( OC_UNARY ): {
2749 Ld = R_d = getvar_i(R.v);
2776 case XC( OC_FIELD ): {
2777 int i = (int)getvar_i(R.v);
2784 res = &Fields[i - 1];
2789 /* concatenation (" ") and index joining (",") */
2790 case XC( OC_CONCAT ):
2791 case XC( OC_COMMA ): {
2792 const char *sep = "";
2793 if ((opinfo & OPCLSMASK) == OC_COMMA)
2794 sep = getvar_s(intvar[SUBSEP]);
2795 setvar_p(res, xasprintf("%s%s%s", L.s, sep, R.s));
2800 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2804 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2807 case XC( OC_BINARY ):
2808 case XC( OC_REPLACE ): {
2809 double R_d = getvar_i(R.v);
2810 debug_printf_eval("BINARY/REPLACE: R_d:%f opn:%c\n", R_d, opn);
2823 syntax_error(EMSG_DIV_BY_ZERO);
2827 #if ENABLE_FEATURE_AWK_LIBM
2828 L_d = pow(L_d, R_d);
2830 syntax_error(EMSG_NO_MATH);
2835 syntax_error(EMSG_DIV_BY_ZERO);
2836 L_d -= (int)(L_d / R_d) * R_d;
2839 debug_printf_eval("BINARY/REPLACE result:%f\n", L_d);
2840 res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : L.v, L_d);
2844 case XC( OC_COMPARE ): {
2845 int i = i; /* for compiler */
2848 if (is_numeric(L.v) && is_numeric(R.v)) {
2849 Ld = getvar_i(L.v) - getvar_i(R.v);
2851 const char *l = getvar_s(L.v);
2852 const char *r = getvar_s(R.v);
2853 Ld = icase ? strcasecmp(l, r) : strcmp(l, r);
2855 switch (opn & 0xfe) {
2866 setvar_i(res, (i == 0) ^ (opn & 1));
2871 syntax_error(EMSG_POSSIBLE_ERROR);
2873 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2875 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2889 /* -------- main & co. -------- */
2891 static int awk_exit(int r)
2902 evaluate(endseq.first, &tv);
2905 /* waiting for children */
2906 for (i = 0; i < fdhash->csize; i++) {
2907 hi = fdhash->items[i];
2909 if (hi->data.rs.F && hi->data.rs.is_pipe)
2910 pclose(hi->data.rs.F);
2918 /* if expr looks like "var=value", perform assignment and return 1,
2919 * otherwise return 0 */
2920 static int is_assignment(const char *expr)
2922 char *exprc, *s, *s0, *s1;
2924 exprc = xstrdup(expr);
2925 if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2933 *s1++ = nextchar(&s);
2936 setvar_u(newvar(exprc), s0);
2941 /* switch to next input file */
2942 static rstream *next_input_file(void)
2944 #define rsm (G.next_input_file__rsm)
2945 #define files_happen (G.next_input_file__files_happen)
2948 const char *fname, *ind;
2953 rsm.pos = rsm.adv = 0;
2956 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
2962 ind = getvar_s(incvar(intvar[ARGIND]));
2963 fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
2964 if (fname && *fname && !is_assignment(fname))
2965 F = xfopen_stdin(fname);
2969 files_happen = TRUE;
2970 setvar_s(intvar[FILENAME], fname);
2977 int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
2978 int awk_main(int argc, char **argv)
2981 char *opt_F, *opt_W;
2982 llist_t *list_v = NULL;
2983 llist_t *list_f = NULL;
2988 char *vnames = (char *)vNames; /* cheat */
2989 char *vvalues = (char *)vValues;
2993 /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
2994 * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
2995 if (ENABLE_LOCALE_SUPPORT)
2996 setlocale(LC_NUMERIC, "C");
3000 /* allocate global buffer */
3001 g_buf = xmalloc(MAXVARFMT + 1);
3003 vhash = hash_init();
3004 ahash = hash_init();
3005 fdhash = hash_init();
3006 fnhash = hash_init();
3008 /* initialize variables */
3009 for (i = 0; *vnames; i++) {
3010 intvar[i] = v = newvar(nextword(&vnames));
3011 if (*vvalues != '\377')
3012 setvar_s(v, nextword(&vvalues));
3016 if (*vnames == '*') {
3017 v->type |= VF_SPECIAL;
3022 handle_special(intvar[FS]);
3023 handle_special(intvar[RS]);
3025 newfile("/dev/stdin")->F = stdin;
3026 newfile("/dev/stdout")->F = stdout;
3027 newfile("/dev/stderr")->F = stderr;
3029 /* Huh, people report that sometimes environ is NULL. Oh well. */
3030 if (environ) for (envp = environ; *envp; envp++) {
3031 /* environ is writable, thus we don't strdup it needlessly */
3033 char *s1 = strchr(s, '=');
3036 /* Both findvar and setvar_u take const char*
3037 * as 2nd arg -> environment is not trashed */
3038 setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
3042 opt_complementary = "v::f::"; /* -v and -f can occur multiple times */
3043 opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, &opt_W);
3047 setvar_s(intvar[FS], opt_F); // -F
3048 while (list_v) { /* -v */
3049 if (!is_assignment(llist_pop(&list_v)))
3052 if (list_f) { /* -f */
3057 g_progname = llist_pop(&list_f);
3058 from_file = xfopen_stdin(g_progname);
3059 /* one byte is reserved for some trick in next_token */
3060 for (i = j = 1; j > 0; i += j) {
3061 s = xrealloc(s, i + 4096);
3062 j = fread(s + i, 1, 4094, from_file);
3066 parse_program(s + 1);
3070 } else { // no -f: take program from 1st parameter
3073 g_progname = "cmd. line";
3074 parse_program(*argv++);
3076 if (opt & 0x8) // -W
3077 bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
3079 /* fill in ARGV array */
3080 setvar_i(intvar[ARGC], argc);
3081 setari_u(intvar[ARGV], 0, "awk");
3084 setari_u(intvar[ARGV], ++i, *argv++);
3086 evaluate(beginseq.first, &tv);
3087 if (!mainseq.first && !endseq.first)
3088 awk_exit(EXIT_SUCCESS);
3090 /* input file could already be opened in BEGIN block */
3092 iF = next_input_file();
3094 /* passing through input files */
3097 setvar_i(intvar[FNR], 0);
3099 while ((i = awk_getline(iF, intvar[F0])) > 0) {
3102 incvar(intvar[FNR]);
3103 evaluate(mainseq.first, &tv);
3110 syntax_error(strerror(errno));
3112 iF = next_input_file();
3115 awk_exit(EXIT_SUCCESS);