1 /* vi: set sw=4 ts=4: */
3 * awk implementation for busybox
5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
7 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
10 //usage:#define awk_trivial_usage
11 //usage: "[OPTIONS] [AWK_PROGRAM] [FILE]..."
12 //usage:#define awk_full_usage "\n\n"
13 //usage: " -v VAR=VAL Set variable"
14 //usage: "\n -F SEP Use SEP as field separator"
15 //usage: "\n -f FILE Read program from FILE"
21 /* This is a NOEXEC applet. Be very careful! */
24 /* If you comment out one of these below, it will be #defined later
25 * to perform debug printfs to stderr: */
26 #define debug_printf_walker(...) do {} while (0)
27 #define debug_printf_eval(...) do {} while (0)
29 #ifndef debug_printf_walker
30 # define debug_printf_walker(...) (fprintf(stderr, __VA_ARGS__))
32 #ifndef debug_printf_eval
33 # define debug_printf_eval(...) (fprintf(stderr, __VA_ARGS__))
42 #define VF_NUMBER 0x0001 /* 1 = primary type is number */
43 #define VF_ARRAY 0x0002 /* 1 = it's an array */
45 #define VF_CACHED 0x0100 /* 1 = num/str value has cached str/num eq */
46 #define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
47 #define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
48 #define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
49 #define VF_FSTR 0x1000 /* 1 = var::string points to fstring buffer */
50 #define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
51 #define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
53 /* these flags are static, don't change them when value is changed */
54 #define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
56 typedef struct walker_list {
59 struct walker_list *prev;
64 typedef struct var_s {
65 unsigned type; /* flags */
69 int aidx; /* func arg idx (for compilation stage) */
70 struct xhash_s *array; /* array ptr */
71 struct var_s *parent; /* for func args, ptr to actual parameter */
72 walker_list *walker; /* list of array elements (for..in) */
76 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
77 typedef struct chain_s {
80 const char *programname;
84 typedef struct func_s {
90 typedef struct rstream_s {
99 typedef struct hash_item_s {
101 struct var_s v; /* variable/array hash */
102 struct rstream_s rs; /* redirect streams hash */
103 struct func_s f; /* functions hash */
105 struct hash_item_s *next; /* next in chain */
106 char name[1]; /* really it's longer */
109 typedef struct xhash_s {
110 unsigned nel; /* num of elements */
111 unsigned csize; /* current hash size */
112 unsigned nprime; /* next hash size in PRIMES[] */
113 unsigned glen; /* summary length of item names */
114 struct hash_item_s **items;
118 typedef struct node_s {
138 /* Block of temporary variables */
139 typedef struct nvblock_s {
142 struct nvblock_s *prev;
143 struct nvblock_s *next;
147 typedef struct tsplitter_s {
152 /* simple token classes */
153 /* Order and hex values are very important!!! See next_token() */
154 #define TC_SEQSTART 1 /* ( */
155 #define TC_SEQTERM (1 << 1) /* ) */
156 #define TC_REGEXP (1 << 2) /* /.../ */
157 #define TC_OUTRDR (1 << 3) /* | > >> */
158 #define TC_UOPPOST (1 << 4) /* unary postfix operator */
159 #define TC_UOPPRE1 (1 << 5) /* unary prefix operator */
160 #define TC_BINOPX (1 << 6) /* two-opnd operator */
161 #define TC_IN (1 << 7)
162 #define TC_COMMA (1 << 8)
163 #define TC_PIPE (1 << 9) /* input redirection pipe */
164 #define TC_UOPPRE2 (1 << 10) /* unary prefix operator */
165 #define TC_ARRTERM (1 << 11) /* ] */
166 #define TC_GRPSTART (1 << 12) /* { */
167 #define TC_GRPTERM (1 << 13) /* } */
168 #define TC_SEMICOL (1 << 14)
169 #define TC_NEWLINE (1 << 15)
170 #define TC_STATX (1 << 16) /* ctl statement (for, next...) */
171 #define TC_WHILE (1 << 17)
172 #define TC_ELSE (1 << 18)
173 #define TC_BUILTIN (1 << 19)
174 #define TC_GETLINE (1 << 20)
175 #define TC_FUNCDECL (1 << 21) /* `function' `func' */
176 #define TC_BEGIN (1 << 22)
177 #define TC_END (1 << 23)
178 #define TC_EOF (1 << 24)
179 #define TC_VARIABLE (1 << 25)
180 #define TC_ARRAY (1 << 26)
181 #define TC_FUNCTION (1 << 27)
182 #define TC_STRING (1 << 28)
183 #define TC_NUMBER (1 << 29)
185 #define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
187 /* combined token classes */
188 #define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
189 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
190 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
191 | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
193 #define TC_STATEMNT (TC_STATX | TC_WHILE)
194 #define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
196 /* word tokens, cannot mean something else if not expected */
197 #define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
198 | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
200 /* discard newlines after these */
201 #define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
202 | TC_BINOP | TC_OPTERM)
204 /* what can expression begin with */
205 #define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
206 /* what can group begin with */
207 #define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
209 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
210 /* operator is inserted between them */
211 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
212 | TC_STRING | TC_NUMBER | TC_UOPPOST)
213 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
215 #define OF_RES1 0x010000
216 #define OF_RES2 0x020000
217 #define OF_STR1 0x040000
218 #define OF_STR2 0x080000
219 #define OF_NUM1 0x100000
220 #define OF_CHECKED 0x200000
222 /* combined operator flags */
225 #define xS (OF_RES2 | OF_STR2)
227 #define VV (OF_RES1 | OF_RES2)
228 #define Nx (OF_RES1 | OF_NUM1)
229 #define NV (OF_RES1 | OF_NUM1 | OF_RES2)
230 #define Sx (OF_RES1 | OF_STR1)
231 #define SV (OF_RES1 | OF_STR1 | OF_RES2)
232 #define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
234 #define OPCLSMASK 0xFF00
235 #define OPNMASK 0x007F
237 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
238 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
239 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
244 #define P(x) (x << 24)
245 #define PRIMASK 0x7F000000
246 #define PRIMASK2 0x7E000000
248 /* Operation classes */
250 #define SHIFT_TIL_THIS 0x0600
251 #define RECUR_FROM_THIS 0x1000
254 OC_DELETE = 0x0100, OC_EXEC = 0x0200, OC_NEWSOURCE = 0x0300,
255 OC_PRINT = 0x0400, OC_PRINTF = 0x0500, OC_WALKINIT = 0x0600,
257 OC_BR = 0x0700, OC_BREAK = 0x0800, OC_CONTINUE = 0x0900,
258 OC_EXIT = 0x0a00, OC_NEXT = 0x0b00, OC_NEXTFILE = 0x0c00,
259 OC_TEST = 0x0d00, OC_WALKNEXT = 0x0e00,
261 OC_BINARY = 0x1000, OC_BUILTIN = 0x1100, OC_COLON = 0x1200,
262 OC_COMMA = 0x1300, OC_COMPARE = 0x1400, OC_CONCAT = 0x1500,
263 OC_FBLTIN = 0x1600, OC_FIELD = 0x1700, OC_FNARG = 0x1800,
264 OC_FUNC = 0x1900, OC_GETLINE = 0x1a00, OC_IN = 0x1b00,
265 OC_LAND = 0x1c00, OC_LOR = 0x1d00, OC_MATCH = 0x1e00,
266 OC_MOVE = 0x1f00, OC_PGETLINE = 0x2000, OC_REGEXP = 0x2100,
267 OC_REPLACE = 0x2200, OC_RETURN = 0x2300, OC_SPRINTF = 0x2400,
268 OC_TERNARY = 0x2500, OC_UNARY = 0x2600, OC_VAR = 0x2700,
271 ST_IF = 0x3000, ST_DO = 0x3100, ST_FOR = 0x3200,
275 /* simple builtins */
277 F_in, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
278 F_ti, F_le, F_sy, F_ff, F_cl
283 B_a2, B_ix, B_ma, B_sp, B_ss, B_ti, B_mt, B_lo, B_up,
285 B_an, B_co, B_ls, B_or, B_rs, B_xo,
288 /* tokens and their corresponding info values */
290 #define NTC "\377" /* switch to next token class (tc<<1) */
293 #define OC_B OC_BUILTIN
295 static const char tokenlist[] ALIGN1 =
298 "\1/" NTC /* REGEXP */
299 "\2>>" "\1>" "\1|" NTC /* OUTRDR */
300 "\2++" "\2--" NTC /* UOPPOST */
301 "\2++" "\2--" "\1$" NTC /* UOPPRE1 */
302 "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */
303 "\2*=" "\2/=" "\2%=" "\2^="
304 "\1+" "\1-" "\3**=" "\2**"
305 "\1/" "\1%" "\1^" "\1*"
306 "\2!=" "\2>=" "\2<=" "\1>"
307 "\1<" "\2!~" "\1~" "\2&&"
308 "\2||" "\1?" "\1:" NTC
312 "\1+" "\1-" "\1!" NTC /* UOPPRE2 */
318 "\2if" "\2do" "\3for" "\5break" /* STATX */
319 "\10continue" "\6delete" "\5print"
320 "\6printf" "\4next" "\10nextfile"
321 "\6return" "\4exit" NTC
325 "\3and" "\5compl" "\6lshift" "\2or"
327 "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */
328 "\3cos" "\3exp" "\3int" "\3log"
329 "\4rand" "\3sin" "\4sqrt" "\5srand"
330 "\6gensub" "\4gsub" "\5index" "\6length"
331 "\5match" "\5split" "\7sprintf" "\3sub"
332 "\6substr" "\7systime" "\10strftime" "\6mktime"
333 "\7tolower" "\7toupper" NTC
335 "\4func" "\10function" NTC
338 /* compiler adds trailing "\0" */
341 static const uint32_t tokeninfo[] = {
345 xS|'a', xS|'w', xS|'|',
346 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
347 OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M', OC_FIELD|xV|P(5),
348 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74), OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
349 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/', OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
350 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-', OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
351 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%', OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
352 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3, OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
353 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!', OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
354 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?', OC_COLON|xx|P(67)|':',
355 OC_IN|SV|P(49), /* in */
357 OC_PGETLINE|SV|P(37),
358 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-', OC_UNARY|xV|P(19)|'!',
364 ST_IF, ST_DO, ST_FOR, OC_BREAK,
365 OC_CONTINUE, OC_DELETE|Vx, OC_PRINT,
366 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
367 OC_RETURN|Vx, OC_EXIT|Nx,
371 OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
372 OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
373 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
374 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
375 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
376 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
377 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
378 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b),
379 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
386 /* internal variable names and their initial values */
387 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
389 CONVFMT, OFMT, FS, OFS,
390 ORS, RS, RT, FILENAME,
391 SUBSEP, F0, ARGIND, ARGC,
392 ARGV, ERRNO, FNR, NR,
393 NF, IGNORECASE, ENVIRON, NUM_INTERNAL_VARS
396 static const char vNames[] ALIGN1 =
397 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
398 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
399 "SUBSEP\0" "$\0*" "ARGIND\0" "ARGC\0"
400 "ARGV\0" "ERRNO\0" "FNR\0" "NR\0"
401 "NF\0*" "IGNORECASE\0*" "ENVIRON\0" "\0";
403 static const char vValues[] ALIGN1 =
404 "%.6g\0" "%.6g\0" " \0" " \0"
405 "\n\0" "\n\0" "\0" "\0"
406 "\034\0" "\0" "\377";
408 /* hash size may grow to these values */
409 #define FIRST_PRIME 61
410 static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
413 /* Globals. Split in two parts so that first one is addressed
414 * with (mostly short) negative offsets.
415 * NB: it's unsafe to put members of type "double"
416 * into globals2 (gcc may fail to align them).
420 chain beginseq, mainseq, endseq;
422 node *break_ptr, *continue_ptr;
424 xhash *vhash, *ahash, *fdhash, *fnhash;
425 const char *g_progname;
428 int maxfields; /* used in fsrealloc() only */
437 smallint is_f0_split;
440 uint32_t t_info; /* often used */
446 var *intvar[NUM_INTERNAL_VARS]; /* often used */
448 /* former statics from various functions */
449 char *split_f0__fstrings;
451 uint32_t next_token__save_tclass;
452 uint32_t next_token__save_info;
453 uint32_t next_token__ltclass;
454 smallint next_token__concat_inserted;
456 smallint next_input_file__files_happen;
457 rstream next_input_file__rsm;
459 var *evaluate__fnargs;
460 unsigned evaluate__seed;
461 regex_t evaluate__sreg;
465 tsplitter exec_builtin__tspl;
467 /* biggest and least used members go last */
468 tsplitter fsplitter, rsplitter;
470 #define G1 (ptr_to_globals[-1])
471 #define G (*(struct globals2 *)ptr_to_globals)
472 /* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
473 /*char G1size[sizeof(G1)]; - 0x74 */
474 /*char Gsize[sizeof(G)]; - 0x1c4 */
475 /* Trying to keep most of members accessible with short offsets: */
476 /*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
477 #define t_double (G1.t_double )
478 #define beginseq (G1.beginseq )
479 #define mainseq (G1.mainseq )
480 #define endseq (G1.endseq )
481 #define seq (G1.seq )
482 #define break_ptr (G1.break_ptr )
483 #define continue_ptr (G1.continue_ptr)
485 #define vhash (G1.vhash )
486 #define ahash (G1.ahash )
487 #define fdhash (G1.fdhash )
488 #define fnhash (G1.fnhash )
489 #define g_progname (G1.g_progname )
490 #define g_lineno (G1.g_lineno )
491 #define nfields (G1.nfields )
492 #define maxfields (G1.maxfields )
493 #define Fields (G1.Fields )
494 #define g_cb (G1.g_cb )
495 #define g_pos (G1.g_pos )
496 #define g_buf (G1.g_buf )
497 #define icase (G1.icase )
498 #define exiting (G1.exiting )
499 #define nextrec (G1.nextrec )
500 #define nextfile (G1.nextfile )
501 #define is_f0_split (G1.is_f0_split )
502 #define t_info (G.t_info )
503 #define t_tclass (G.t_tclass )
504 #define t_string (G.t_string )
505 #define t_lineno (G.t_lineno )
506 #define t_rollback (G.t_rollback )
507 #define intvar (G.intvar )
508 #define fsplitter (G.fsplitter )
509 #define rsplitter (G.rsplitter )
510 #define INIT_G() do { \
511 SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
512 G.next_token__ltclass = TC_OPTERM; \
513 G.evaluate__seed = 1; \
517 /* function prototypes */
518 static void handle_special(var *);
519 static node *parse_expr(uint32_t);
520 static void chain_group(void);
521 static var *evaluate(node *, var *);
522 static rstream *next_input_file(void);
523 static int fmt_num(char *, int, const char *, double, int);
524 static int awk_exit(int) NORETURN;
526 /* ---- error handling ---- */
528 static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
529 static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
530 static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
531 static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
532 static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
533 static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin";
534 static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
535 static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
536 static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
537 static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
539 static void zero_out_var(var *vp)
541 memset(vp, 0, sizeof(*vp));
544 static void syntax_error(const char *message) NORETURN;
545 static void syntax_error(const char *message)
547 bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
550 /* ---- hash stuff ---- */
552 static unsigned hashidx(const char *name)
557 idx = *name++ + (idx << 6) - idx;
561 /* create new hash */
562 static xhash *hash_init(void)
566 newhash = xzalloc(sizeof(*newhash));
567 newhash->csize = FIRST_PRIME;
568 newhash->items = xzalloc(FIRST_PRIME * sizeof(newhash->items[0]));
573 /* find item in hash, return ptr to data, NULL if not found */
574 static void *hash_search(xhash *hash, const char *name)
578 hi = hash->items[hashidx(name) % hash->csize];
580 if (strcmp(hi->name, name) == 0)
587 /* grow hash if it becomes too big */
588 static void hash_rebuild(xhash *hash)
590 unsigned newsize, i, idx;
591 hash_item **newitems, *hi, *thi;
593 if (hash->nprime == ARRAY_SIZE(PRIMES))
596 newsize = PRIMES[hash->nprime++];
597 newitems = xzalloc(newsize * sizeof(newitems[0]));
599 for (i = 0; i < hash->csize; i++) {
604 idx = hashidx(thi->name) % newsize;
605 thi->next = newitems[idx];
611 hash->csize = newsize;
612 hash->items = newitems;
615 /* find item in hash, add it if necessary. Return ptr to data */
616 static void *hash_find(xhash *hash, const char *name)
622 hi = hash_search(hash, name);
624 if (++hash->nel / hash->csize > 10)
627 l = strlen(name) + 1;
628 hi = xzalloc(sizeof(*hi) + l);
629 strcpy(hi->name, name);
631 idx = hashidx(name) % hash->csize;
632 hi->next = hash->items[idx];
633 hash->items[idx] = hi;
639 #define findvar(hash, name) ((var*) hash_find((hash), (name)))
640 #define newvar(name) ((var*) hash_find(vhash, (name)))
641 #define newfile(name) ((rstream*)hash_find(fdhash, (name)))
642 #define newfunc(name) ((func*) hash_find(fnhash, (name)))
644 static void hash_remove(xhash *hash, const char *name)
646 hash_item *hi, **phi;
648 phi = &hash->items[hashidx(name) % hash->csize];
651 if (strcmp(hi->name, name) == 0) {
652 hash->glen -= (strlen(name) + 1);
662 /* ------ some useful functions ------ */
664 static char *skip_spaces(char *p)
667 if (*p == '\\' && p[1] == '\n') {
670 } else if (*p != ' ' && *p != '\t') {
678 /* returns old *s, advances *s past word and terminating NUL */
679 static char *nextword(char **s)
682 while (*(*s)++ != '\0')
687 static char nextchar(char **s)
694 c = bb_process_escape_sequence((const char**)s);
695 if (c == '\\' && *s == pps) { /* unrecognized \z? */
696 c = *(*s); /* yes, fetch z */
698 (*s)++; /* advance unless z = NUL */
703 static ALWAYS_INLINE int isalnum_(int c)
705 return (isalnum(c) || c == '_');
708 static double my_strtod(char **pp)
711 if (ENABLE_DESKTOP && cp[0] == '0') {
712 /* Might be hex or octal integer: 0x123abc or 07777 */
713 char c = (cp[1] | 0x20);
714 if (c == 'x' || isdigit(cp[1])) {
715 unsigned long long ull = strtoull(cp, pp, 0);
719 if (!isdigit(c) && c != '.')
721 /* else: it may be a floating number. Examples:
722 * 009.123 (*pp points to '9')
723 * 000.123 (*pp points to '.')
724 * fall through to strtod.
728 return strtod(cp, pp);
731 /* -------- working with variables (set/get/copy/etc) -------- */
733 static xhash *iamarray(var *v)
737 while (a->type & VF_CHILD)
740 if (!(a->type & VF_ARRAY)) {
742 a->x.array = hash_init();
747 static void clear_array(xhash *array)
752 for (i = 0; i < array->csize; i++) {
753 hi = array->items[i];
757 free(thi->data.v.string);
760 array->items[i] = NULL;
762 array->glen = array->nel = 0;
765 /* clear a variable */
766 static var *clrvar(var *v)
768 if (!(v->type & VF_FSTR))
771 v->type &= VF_DONTTOUCH;
777 /* assign string value to variable */
778 static var *setvar_p(var *v, char *value)
786 /* same as setvar_p but make a copy of string */
787 static var *setvar_s(var *v, const char *value)
789 return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
792 /* same as setvar_s but sets USER flag */
793 static var *setvar_u(var *v, const char *value)
795 v = setvar_s(v, value);
800 /* set array element to user string */
801 static void setari_u(var *a, int idx, const char *s)
805 v = findvar(iamarray(a), itoa(idx));
809 /* assign numeric value to variable */
810 static var *setvar_i(var *v, double value)
813 v->type |= VF_NUMBER;
819 static const char *getvar_s(var *v)
821 /* if v is numeric and has no cached string, convert it to string */
822 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
823 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
824 v->string = xstrdup(g_buf);
825 v->type |= VF_CACHED;
827 return (v->string == NULL) ? "" : v->string;
830 static double getvar_i(var *v)
834 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
838 debug_printf_eval("getvar_i: '%s'->", s);
839 v->number = my_strtod(&s);
840 debug_printf_eval("%f (s:'%s')\n", v->number, s);
841 if (v->type & VF_USER) {
847 debug_printf_eval("getvar_i: '%s'->zero\n", s);
850 v->type |= VF_CACHED;
852 debug_printf_eval("getvar_i: %f\n", v->number);
856 /* Used for operands of bitwise ops */
857 static unsigned long getvar_i_int(var *v)
859 double d = getvar_i(v);
861 /* Casting doubles to longs is undefined for values outside
862 * of target type range. Try to widen it as much as possible */
864 return (unsigned long)d;
865 /* Why? Think about d == -4294967295.0 (assuming 32bit longs) */
866 return - (long) (unsigned long) (-d);
869 static var *copyvar(var *dest, const var *src)
873 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
874 debug_printf_eval("copyvar: number:%f string:'%s'\n", src->number, src->string);
875 dest->number = src->number;
877 dest->string = xstrdup(src->string);
879 handle_special(dest);
883 static var *incvar(var *v)
885 return setvar_i(v, getvar_i(v) + 1.0);
888 /* return true if v is number or numeric string */
889 static int is_numeric(var *v)
892 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
895 /* return 1 when value of v corresponds to true, 0 otherwise */
896 static int istrue(var *v)
899 return (v->number != 0);
900 return (v->string && v->string[0]);
903 /* temporary variables allocator. Last allocated should be first freed */
904 static var *nvalloc(int n)
912 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size)
918 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
919 g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
921 g_cb->pos = g_cb->nv;
923 /*g_cb->next = NULL; - xzalloc did it */
931 while (v < g_cb->pos) {
940 static void nvfree(var *v)
944 if (v < g_cb->nv || v >= g_cb->pos)
945 syntax_error(EMSG_INTERNAL_ERROR);
947 for (p = v; p < g_cb->pos; p++) {
948 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
949 clear_array(iamarray(p));
950 free(p->x.array->items);
953 if (p->type & VF_WALK) {
955 walker_list *w = p->x.walker;
956 debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker);
960 debug_printf_walker(" free(%p)\n", w);
969 while (g_cb->prev && g_cb->pos == g_cb->nv) {
974 /* ------- awk program text parsing ------- */
976 /* Parse next token pointed by global pos, place results into global ttt.
977 * If token isn't expected, give away. Return token class
979 static uint32_t next_token(uint32_t expected)
981 #define concat_inserted (G.next_token__concat_inserted)
982 #define save_tclass (G.next_token__save_tclass)
983 #define save_info (G.next_token__save_info)
984 /* Initialized to TC_OPTERM: */
985 #define ltclass (G.next_token__ltclass)
995 } else if (concat_inserted) {
996 concat_inserted = FALSE;
997 t_tclass = save_tclass;
1004 g_lineno = t_lineno;
1006 while (*p != '\n' && *p != '\0')
1015 } else if (*p == '\"') {
1018 while (*p != '\"') {
1020 if (*p == '\0' || *p == '\n')
1021 syntax_error(EMSG_UNEXP_EOS);
1023 *s++ = nextchar(&pp);
1030 } else if ((expected & TC_REGEXP) && *p == '/') {
1034 if (*p == '\0' || *p == '\n')
1035 syntax_error(EMSG_UNEXP_EOS);
1039 s[-1] = bb_process_escape_sequence((const char **)&pp);
1052 } else if (*p == '.' || isdigit(*p)) {
1055 t_double = my_strtod(&pp);
1058 syntax_error(EMSG_UNEXP_TOKEN);
1062 /* search for something known */
1067 int l = (unsigned char) *tl++;
1068 if (l == (unsigned char) NTCC) {
1072 /* if token class is expected,
1074 * and it's not a longer word,
1076 if ((tc & (expected | TC_WORD | TC_NEWLINE))
1077 && strncmp(p, tl, l) == 0
1078 && !((tc & TC_WORD) && isalnum_(p[l]))
1080 /* then this is what we are looking for */
1088 /* not a known token */
1090 /* is it a name? (var/array/function) */
1092 syntax_error(EMSG_UNEXP_TOKEN); /* no */
1095 while (isalnum_(*++p)) {
1100 /* also consume whitespace between functionname and bracket */
1101 if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY))
1115 /* skipping newlines in some cases */
1116 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1119 /* insert concatenation operator when needed */
1120 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1121 concat_inserted = TRUE;
1125 t_info = OC_CONCAT | SS | P(35);
1132 /* Are we ready for this? */
1133 if (!(ltclass & expected))
1134 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1135 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1138 #undef concat_inserted
1144 static void rollback_token(void)
1149 static node *new_node(uint32_t info)
1153 n = xzalloc(sizeof(node));
1155 n->lineno = g_lineno;
1159 static void mk_re_node(const char *s, node *n, regex_t *re)
1161 n->info = OC_REGEXP;
1164 xregcomp(re, s, REG_EXTENDED);
1165 xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1168 static node *condition(void)
1170 next_token(TC_SEQSTART);
1171 return parse_expr(TC_SEQTERM);
1174 /* parse expression terminated by given argument, return ptr
1175 * to built subtree. Terminator is eaten by parse_expr */
1176 static node *parse_expr(uint32_t iexp)
1185 sn.r.n = glptr = NULL;
1186 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1188 while (!((tc = next_token(xtc)) & iexp)) {
1190 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1191 /* input redirection (<) attached to glptr node */
1192 cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1194 xtc = TC_OPERAND | TC_UOPPRE;
1197 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1198 /* for binary and postfix-unary operators, jump back over
1199 * previous operators with higher priority */
1201 while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1202 || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON))
1206 if ((t_info & OPCLSMASK) == OC_TERNARY)
1208 cn = vn->a.n->r.n = new_node(t_info);
1210 if (tc & TC_BINOP) {
1212 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1213 if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1215 next_token(TC_GETLINE);
1216 /* give maximum priority to this pipe */
1217 cn->info &= ~PRIMASK;
1218 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1222 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1227 /* for operands and prefix-unary operators, attach them
1230 cn = vn->r.n = new_node(t_info);
1232 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1233 if (tc & (TC_OPERAND | TC_REGEXP)) {
1234 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1235 /* one should be very careful with switch on tclass -
1236 * only simple tclasses should be used! */
1241 v = hash_search(ahash, t_string);
1243 cn->info = OC_FNARG;
1244 cn->l.aidx = v->x.aidx;
1246 cn->l.v = newvar(t_string);
1248 if (tc & TC_ARRAY) {
1250 cn->r.n = parse_expr(TC_ARRTERM);
1257 v = cn->l.v = xzalloc(sizeof(var));
1259 setvar_i(v, t_double);
1261 setvar_s(v, t_string);
1265 mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1270 cn->r.f = newfunc(t_string);
1271 cn->l.n = condition();
1275 cn = vn->r.n = parse_expr(TC_SEQTERM);
1281 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1285 cn->l.n = condition();
1294 /* add node to chain. Return ptr to alloc'd node */
1295 static node *chain_node(uint32_t info)
1300 seq->first = seq->last = new_node(0);
1302 if (seq->programname != g_progname) {
1303 seq->programname = g_progname;
1304 n = chain_node(OC_NEWSOURCE);
1305 n->l.new_progname = xstrdup(g_progname);
1310 seq->last = n->a.n = new_node(OC_DONE);
1315 static void chain_expr(uint32_t info)
1319 n = chain_node(info);
1320 n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1321 if (t_tclass & TC_GRPTERM)
1325 static node *chain_loop(node *nn)
1327 node *n, *n2, *save_brk, *save_cont;
1329 save_brk = break_ptr;
1330 save_cont = continue_ptr;
1332 n = chain_node(OC_BR | Vx);
1333 continue_ptr = new_node(OC_EXEC);
1334 break_ptr = new_node(OC_EXEC);
1336 n2 = chain_node(OC_EXEC | Vx);
1339 continue_ptr->a.n = n2;
1340 break_ptr->a.n = n->r.n = seq->last;
1342 continue_ptr = save_cont;
1343 break_ptr = save_brk;
1348 /* parse group and attach it to chain */
1349 static void chain_group(void)
1355 c = next_token(TC_GRPSEQ);
1356 } while (c & TC_NEWLINE);
1358 if (c & TC_GRPSTART) {
1359 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1360 if (t_tclass & TC_NEWLINE)
1365 } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1367 chain_expr(OC_EXEC | Vx);
1368 } else { /* TC_STATEMNT */
1369 switch (t_info & OPCLSMASK) {
1371 n = chain_node(OC_BR | Vx);
1372 n->l.n = condition();
1374 n2 = chain_node(OC_EXEC);
1376 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1378 n2->a.n = seq->last;
1386 n = chain_loop(NULL);
1391 n2 = chain_node(OC_EXEC);
1392 n = chain_loop(NULL);
1394 next_token(TC_WHILE);
1395 n->l.n = condition();
1399 next_token(TC_SEQSTART);
1400 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1401 if (t_tclass & TC_SEQTERM) { /* for-in */
1402 if ((n2->info & OPCLSMASK) != OC_IN)
1403 syntax_error(EMSG_UNEXP_TOKEN);
1404 n = chain_node(OC_WALKINIT | VV);
1407 n = chain_loop(NULL);
1408 n->info = OC_WALKNEXT | Vx;
1410 } else { /* for (;;) */
1411 n = chain_node(OC_EXEC | Vx);
1413 n2 = parse_expr(TC_SEMICOL);
1414 n3 = parse_expr(TC_SEQTERM);
1424 n = chain_node(t_info);
1425 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1426 if (t_tclass & TC_OUTRDR) {
1428 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1430 if (t_tclass & TC_GRPTERM)
1435 n = chain_node(OC_EXEC);
1440 n = chain_node(OC_EXEC);
1441 n->a.n = continue_ptr;
1444 /* delete, next, nextfile, return, exit */
1451 static void parse_program(char *p)
1460 while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1461 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1463 if (tclass & TC_OPTERM)
1467 if (tclass & TC_BEGIN) {
1471 } else if (tclass & TC_END) {
1475 } else if (tclass & TC_FUNCDECL) {
1476 next_token(TC_FUNCTION);
1478 f = newfunc(t_string);
1479 f->body.first = NULL;
1481 while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1482 v = findvar(ahash, t_string);
1483 v->x.aidx = f->nargs++;
1485 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1492 } else if (tclass & TC_OPSEQ) {
1494 cn = chain_node(OC_TEST);
1495 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1496 if (t_tclass & TC_GRPSTART) {
1500 chain_node(OC_PRINT);
1502 cn->r.n = mainseq.last;
1504 } else /* if (tclass & TC_GRPSTART) */ {
1512 /* -------- program execution part -------- */
1514 static node *mk_splitter(const char *s, tsplitter *spl)
1522 if ((n->info & OPCLSMASK) == OC_REGEXP) {
1524 regfree(ire); // TODO: nuke ire, use re+1?
1526 if (s[0] && s[1]) { /* strlen(s) > 1 */
1527 mk_re_node(s, n, re);
1529 n->info = (uint32_t) s[0];
1535 /* use node as a regular expression. Supplied with node ptr and regex_t
1536 * storage space. Return ptr to regex (if result points to preg, it should
1537 * be later regfree'd manually
1539 static regex_t *as_regex(node *op, regex_t *preg)
1545 if ((op->info & OPCLSMASK) == OC_REGEXP) {
1546 return icase ? op->r.ire : op->l.re;
1549 s = getvar_s(evaluate(op, v));
1551 cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
1552 /* Testcase where REG_EXTENDED fails (unpaired '{'):
1553 * echo Hi | awk 'gsub("@(samp|code|file)\{","");'
1554 * gawk 3.1.5 eats this. We revert to ~REG_EXTENDED
1555 * (maybe gsub is not supposed to use REG_EXTENDED?).
1557 if (regcomp(preg, s, cflags)) {
1558 cflags &= ~REG_EXTENDED;
1559 xregcomp(preg, s, cflags);
1565 /* gradually increasing buffer.
1566 * note that we reallocate even if n == old_size,
1567 * and thus there is at least one extra allocated byte.
1569 static char* qrealloc(char *b, int n, int *size)
1571 if (!b || n >= *size) {
1572 *size = n + (n>>1) + 80;
1573 b = xrealloc(b, *size);
1578 /* resize field storage space */
1579 static void fsrealloc(int size)
1583 if (size >= maxfields) {
1585 maxfields = size + 16;
1586 Fields = xrealloc(Fields, maxfields * sizeof(Fields[0]));
1587 for (; i < maxfields; i++) {
1588 Fields[i].type = VF_SPECIAL;
1589 Fields[i].string = NULL;
1592 /* if size < nfields, clear extra field variables */
1593 for (i = size; i < nfields; i++) {
1599 static int awk_split(const char *s, node *spl, char **slist)
1604 regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1606 /* in worst case, each char would be a separate field */
1607 *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1610 c[0] = c[1] = (char)spl->info;
1612 if (*getvar_s(intvar[RS]) == '\0')
1616 if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */
1618 return n; /* "": zero fields */
1619 n++; /* at least one field will be there */
1621 l = strcspn(s, c+2); /* len till next NUL or \n */
1622 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1623 && pmatch[0].rm_so <= l
1625 l = pmatch[0].rm_so;
1626 if (pmatch[0].rm_eo == 0) {
1630 n++; /* we saw yet another delimiter */
1632 pmatch[0].rm_eo = l;
1637 /* make sure we remove *all* of the separator chars */
1640 } while (++l < pmatch[0].rm_eo);
1642 s += pmatch[0].rm_eo;
1646 if (c[0] == '\0') { /* null split */
1654 if (c[0] != ' ') { /* single-character split */
1656 c[0] = toupper(c[0]);
1657 c[1] = tolower(c[1]);
1661 while ((s1 = strpbrk(s1, c)) != NULL) {
1669 s = skip_whitespace(s);
1673 while (*s && !isspace(*s))
1680 static void split_f0(void)
1682 /* static char *fstrings; */
1683 #define fstrings (G.split_f0__fstrings)
1694 n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1697 for (i = 0; i < n; i++) {
1698 Fields[i].string = nextword(&s);
1699 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1702 /* set NF manually to avoid side effects */
1704 intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1705 intvar[NF]->number = nfields;
1709 /* perform additional actions when some internal variables changed */
1710 static void handle_special(var *v)
1714 const char *sep, *s;
1715 int sl, l, len, i, bsize;
1717 if (!(v->type & VF_SPECIAL))
1720 if (v == intvar[NF]) {
1721 n = (int)getvar_i(v);
1724 /* recalculate $0 */
1725 sep = getvar_s(intvar[OFS]);
1729 for (i = 0; i < n; i++) {
1730 s = getvar_s(&Fields[i]);
1733 memcpy(b+len, sep, sl);
1736 b = qrealloc(b, len+l+sl, &bsize);
1737 memcpy(b+len, s, l);
1742 setvar_p(intvar[F0], b);
1745 } else if (v == intvar[F0]) {
1746 is_f0_split = FALSE;
1748 } else if (v == intvar[FS]) {
1749 mk_splitter(getvar_s(v), &fsplitter);
1751 } else if (v == intvar[RS]) {
1752 mk_splitter(getvar_s(v), &rsplitter);
1754 } else if (v == intvar[IGNORECASE]) {
1758 n = getvar_i(intvar[NF]);
1759 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1760 /* right here v is invalid. Just to note... */
1764 /* step through func/builtin/etc arguments */
1765 static node *nextarg(node **pn)
1770 if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1779 static void hashwalk_init(var *v, xhash *array)
1784 walker_list *prev_walker;
1786 if (v->type & VF_WALK) {
1787 prev_walker = v->x.walker;
1792 debug_printf_walker("hashwalk_init: prev_walker:%p\n", prev_walker);
1794 w = v->x.walker = xzalloc(sizeof(*w) + array->glen + 1); /* why + 1? */
1795 debug_printf_walker(" walker@%p=%p\n", &v->x.walker, w);
1796 w->cur = w->end = w->wbuf;
1797 w->prev = prev_walker;
1798 for (i = 0; i < array->csize; i++) {
1799 hi = array->items[i];
1801 strcpy(w->end, hi->name);
1808 static int hashwalk_next(var *v)
1810 walker_list *w = v->x.walker;
1812 if (w->cur >= w->end) {
1813 walker_list *prev_walker = w->prev;
1815 debug_printf_walker("end of iteration, free(walker@%p:%p), prev_walker:%p\n", &v->x.walker, w, prev_walker);
1817 v->x.walker = prev_walker;
1821 setvar_s(v, nextword(&w->cur));
1825 /* evaluate node, return 1 when result is true, 0 otherwise */
1826 static int ptest(node *pattern)
1828 /* ptest__v is "static": to save stack space? */
1829 return istrue(evaluate(pattern, &G.ptest__v));
1832 /* read next record from stream rsm into a variable v */
1833 static int awk_getline(rstream *rsm, var *v)
1836 regmatch_t pmatch[2];
1837 int size, a, p, pp = 0;
1838 int fd, so, eo, r, rp;
1841 debug_printf_eval("entered %s()\n", __func__);
1843 /* we're using our own buffer since we need access to accumulating
1846 fd = fileno(rsm->F);
1851 c = (char) rsplitter.n.info;
1855 m = qrealloc(m, 256, &size);
1862 if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1863 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1864 b, 1, pmatch, 0) == 0) {
1865 so = pmatch[0].rm_so;
1866 eo = pmatch[0].rm_eo;
1870 } else if (c != '\0') {
1871 s = strchr(b+pp, c);
1873 s = memchr(b+pp, '\0', p - pp);
1880 while (b[rp] == '\n')
1882 s = strstr(b+rp, "\n\n");
1885 while (b[eo] == '\n')
1894 memmove(m, m+a, p+1);
1899 m = qrealloc(m, a+p+128, &size);
1902 p += safe_read(fd, b+p, size-p-1);
1906 setvar_i(intvar[ERRNO], errno);
1915 c = b[so]; b[so] = '\0';
1919 c = b[eo]; b[eo] = '\0';
1920 setvar_s(intvar[RT], b+so);
1929 debug_printf_eval("returning from %s(): %d\n", __func__, r);
1934 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1938 const char *s = format;
1940 if (int_as_int && n == (int)n) {
1941 r = snprintf(b, size, "%d", (int)n);
1943 do { c = *s; } while (c && *++s);
1944 if (strchr("diouxX", c)) {
1945 r = snprintf(b, size, format, (int)n);
1946 } else if (strchr("eEfgG", c)) {
1947 r = snprintf(b, size, format, n);
1949 syntax_error(EMSG_INV_FMT);
1955 /* formatted output into an allocated buffer, return ptr to buffer */
1956 static char *awk_printf(node *n)
1961 int i, j, incr, bsize;
1966 fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1971 while (*f && (*f != '%' || *++f == '%'))
1973 while (*f && !isalpha(*f)) {
1975 syntax_error("%*x formats are not supported");
1979 incr = (f - s) + MAXVARFMT;
1980 b = qrealloc(b, incr + i, &bsize);
1986 arg = evaluate(nextarg(&n), v);
1989 if (c == 'c' || !c) {
1990 i += sprintf(b+i, s, is_numeric(arg) ?
1991 (char)getvar_i(arg) : *getvar_s(arg));
1992 } else if (c == 's') {
1994 b = qrealloc(b, incr+i+strlen(s1), &bsize);
1995 i += sprintf(b+i, s, s1);
1997 i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
2001 /* if there was an error while sprintf, return value is negative */
2008 b = xrealloc(b, i + 1);
2013 /* Common substitution routine.
2014 * Replace (nm)'th substring of (src) that matches (rn) with (repl),
2015 * store result into (dest), return number of substitutions.
2016 * If nm = 0, replace all matches.
2017 * If src or dst is NULL, use $0.
2018 * If subexp != 0, enable subexpression matching (\1-\9).
2020 static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int subexp)
2024 int match_no, residx, replen, resbufsize;
2026 regmatch_t pmatch[10];
2027 regex_t sreg, *regex;
2033 regex = as_regex(rn, &sreg);
2034 sp = getvar_s(src ? src : intvar[F0]);
2035 replen = strlen(repl);
2036 while (regexec(regex, sp, 10, pmatch, regexec_flags) == 0) {
2037 int so = pmatch[0].rm_so;
2038 int eo = pmatch[0].rm_eo;
2040 //bb_error_msg("match %u: [%u,%u] '%s'%p", match_no+1, so, eo, sp,sp);
2041 resbuf = qrealloc(resbuf, residx + eo + replen, &resbufsize);
2042 memcpy(resbuf + residx, sp, eo);
2044 if (++match_no >= nm) {
2049 residx -= (eo - so);
2051 for (s = repl; *s; s++) {
2052 char c = resbuf[residx++] = *s;
2057 if (c == '&' || (subexp && c >= '0' && c <= '9')) {
2059 residx -= ((nbs + 3) >> 1);
2066 resbuf[residx++] = c;
2068 int n = pmatch[j].rm_eo - pmatch[j].rm_so;
2069 resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize);
2070 memcpy(resbuf + residx, sp + pmatch[j].rm_so, n);
2078 regexec_flags = REG_NOTBOL;
2083 /* Empty match (e.g. "b*" will match anywhere).
2084 * Advance by one char. */
2086 //gsub(/\<b*/,"") on "abc" will reach this point, advance to "bc"
2087 //... and will erroneously match "b" even though it is NOT at the word start.
2088 //we need REG_NOTBOW but it does not exist...
2089 //TODO: if EXTRA_COMPAT=y, use GNU matching and re_search,
2090 //it should be able to do it correctly.
2091 /* Subtle: this is safe only because
2092 * qrealloc allocated at least one extra byte */
2093 resbuf[residx] = *sp;
2101 resbuf = qrealloc(resbuf, residx + strlen(sp), &resbufsize);
2102 strcpy(resbuf + residx, sp);
2104 //bb_error_msg("end sp:'%s'%p", sp,sp);
2105 setvar_p(dest ? dest : intvar[F0], resbuf);
2111 static NOINLINE int do_mktime(const char *ds)
2116 /*memset(&then, 0, sizeof(then)); - not needed */
2117 then.tm_isdst = -1; /* default is unknown */
2119 /* manpage of mktime says these fields are ints,
2120 * so we can sscanf stuff directly into them */
2121 count = sscanf(ds, "%u %u %u %u %u %u %d",
2122 &then.tm_year, &then.tm_mon, &then.tm_mday,
2123 &then.tm_hour, &then.tm_min, &then.tm_sec,
2127 || (unsigned)then.tm_mon < 1
2128 || (unsigned)then.tm_year < 1900
2134 then.tm_year -= 1900;
2136 return mktime(&then);
2139 static NOINLINE var *exec_builtin(node *op, var *res)
2141 #define tspl (G.exec_builtin__tspl)
2147 regmatch_t pmatch[2];
2156 isr = info = op->info;
2159 av[2] = av[3] = NULL;
2160 for (i = 0; i < 4 && op; i++) {
2161 an[i] = nextarg(&op);
2162 if (isr & 0x09000000)
2163 av[i] = evaluate(an[i], &tv[i]);
2164 if (isr & 0x08000000)
2165 as[i] = getvar_s(av[i]);
2170 if ((uint32_t)nargs < (info >> 30))
2171 syntax_error(EMSG_TOO_FEW_ARGS);
2177 if (ENABLE_FEATURE_AWK_LIBM)
2178 setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1])));
2180 syntax_error(EMSG_NO_MATH);
2187 spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2188 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2193 n = awk_split(as[0], spl, &s);
2195 clear_array(iamarray(av[1]));
2196 for (i = 1; i <= n; i++)
2197 setari_u(av[1], i, nextword(&s));
2207 i = getvar_i(av[1]) - 1;
2212 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2215 s = xstrndup(as[0]+i, n);
2220 /* Bitwise ops must assume that operands are unsigned. GNU Awk 3.1.5:
2221 * awk '{ print or(-1,1) }' gives "4.29497e+09", not "-2.xxxe+09" */
2223 setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1]));
2227 setvar_i(res, ~getvar_i_int(av[0]));
2231 setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1]));
2235 setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1]));
2239 setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1]));
2243 setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1]));
2249 s1 = s = xstrdup(as[0]);
2251 //*s1 = (info == B_up) ? toupper(*s1) : tolower(*s1);
2252 if ((unsigned char)((*s1 | 0x20) - 'a') <= ('z' - 'a'))
2253 *s1 = (info == B_up) ? (*s1 & 0xdf) : (*s1 | 0x20);
2263 l = strlen(as[0]) - ll;
2264 if (ll > 0 && l >= 0) {
2266 char *s = strstr(as[0], as[1]);
2268 n = (s - as[0]) + 1;
2270 /* this piece of code is terribly slow and
2271 * really should be rewritten
2273 for (i = 0; i <= l; i++) {
2274 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2286 tt = getvar_i(av[1]);
2289 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2290 i = strftime(g_buf, MAXVARFMT,
2291 ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2294 setvar_s(res, g_buf);
2298 setvar_i(res, do_mktime(as[0]));
2302 re = as_regex(an[1], &sreg);
2303 n = regexec(re, as[0], 1, pmatch, 0);
2308 pmatch[0].rm_so = 0;
2309 pmatch[0].rm_eo = -1;
2311 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2312 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2313 setvar_i(res, pmatch[0].rm_so);
2319 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2323 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2327 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2337 * Evaluate node - the heart of the program. Supplied with subtree
2338 * and place where to store result. returns ptr to result.
2340 #define XC(n) ((n) >> 8)
2342 static var *evaluate(node *op, var *res)
2344 /* This procedure is recursive so we should count every byte */
2345 #define fnargs (G.evaluate__fnargs)
2346 /* seed is initialized to 1 */
2347 #define seed (G.evaluate__seed)
2348 #define sreg (G.evaluate__sreg)
2353 return setvar_s(res, NULL);
2355 debug_printf_eval("entered %s()\n", __func__);
2363 } L = L; /* for compiler */
2374 opn = (opinfo & OPNMASK);
2375 g_lineno = op->lineno;
2377 debug_printf_eval("opinfo:%08x opn:%08x\n", opinfo, opn);
2379 /* execute inevitable things */
2380 if (opinfo & OF_RES1)
2381 L.v = evaluate(op1, v1);
2382 if (opinfo & OF_RES2)
2383 R.v = evaluate(op->r.n, v1+1);
2384 if (opinfo & OF_STR1) {
2385 L.s = getvar_s(L.v);
2386 debug_printf_eval("L.s:'%s'\n", L.s);
2388 if (opinfo & OF_STR2) {
2389 R.s = getvar_s(R.v);
2390 debug_printf_eval("R.s:'%s'\n", R.s);
2392 if (opinfo & OF_NUM1) {
2393 L_d = getvar_i(L.v);
2394 debug_printf_eval("L_d:%f\n", L_d);
2397 debug_printf_eval("switch(0x%x)\n", XC(opinfo & OPCLSMASK));
2398 switch (XC(opinfo & OPCLSMASK)) {
2400 /* -- iterative node type -- */
2404 if ((op1->info & OPCLSMASK) == OC_COMMA) {
2405 /* it's range pattern */
2406 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2407 op->info |= OF_CHECKED;
2408 if (ptest(op1->r.n))
2409 op->info &= ~OF_CHECKED;
2415 op = ptest(op1) ? op->a.n : op->r.n;
2419 /* just evaluate an expression, also used as unconditional jump */
2423 /* branch, used in if-else and various loops */
2425 op = istrue(L.v) ? op->a.n : op->r.n;
2428 /* initialize for-in loop */
2429 case XC( OC_WALKINIT ):
2430 hashwalk_init(L.v, iamarray(R.v));
2433 /* get next array item */
2434 case XC( OC_WALKNEXT ):
2435 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2438 case XC( OC_PRINT ):
2439 case XC( OC_PRINTF ): {
2443 rstream *rsm = newfile(R.s);
2446 rsm->F = popen(R.s, "w");
2448 bb_perror_msg_and_die("popen");
2451 rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2457 if ((opinfo & OPCLSMASK) == OC_PRINT) {
2459 fputs(getvar_s(intvar[F0]), F);
2462 var *v = evaluate(nextarg(&op1), v1);
2463 if (v->type & VF_NUMBER) {
2464 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2468 fputs(getvar_s(v), F);
2472 fputs(getvar_s(intvar[OFS]), F);
2475 fputs(getvar_s(intvar[ORS]), F);
2477 } else { /* OC_PRINTF */
2478 char *s = awk_printf(op1);
2486 case XC( OC_DELETE ): {
2487 uint32_t info = op1->info & OPCLSMASK;
2490 if (info == OC_VAR) {
2492 } else if (info == OC_FNARG) {
2493 v = &fnargs[op1->l.aidx];
2495 syntax_error(EMSG_NOT_ARRAY);
2501 s = getvar_s(evaluate(op1->r.n, v1));
2502 hash_remove(iamarray(v), s);
2504 clear_array(iamarray(v));
2509 case XC( OC_NEWSOURCE ):
2510 g_progname = op->l.new_progname;
2513 case XC( OC_RETURN ):
2517 case XC( OC_NEXTFILE ):
2528 /* -- recursive node type -- */
2532 if (L.v == intvar[NF])
2536 case XC( OC_FNARG ):
2537 L.v = &fnargs[op->l.aidx];
2539 res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2543 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2546 case XC( OC_REGEXP ):
2548 L.s = getvar_s(intvar[F0]);
2551 case XC( OC_MATCH ):
2555 regex_t *re = as_regex(op1, &sreg);
2556 int i = regexec(re, L.s, 0, NULL, 0);
2559 setvar_i(res, (i == 0) ^ (opn == '!'));
2564 debug_printf_eval("MOVE\n");
2565 /* if source is a temporary string, jusk relink it to dest */
2566 //Disabled: if R.v is numeric but happens to have cached R.v->string,
2567 //then L.v ends up being a string, which is wrong
2568 // if (R.v == v1+1 && R.v->string) {
2569 // res = setvar_p(L.v, R.v->string);
2570 // R.v->string = NULL;
2572 res = copyvar(L.v, R.v);
2576 case XC( OC_TERNARY ):
2577 if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2578 syntax_error(EMSG_POSSIBLE_ERROR);
2579 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2582 case XC( OC_FUNC ): {
2584 const char *sv_progname;
2586 if (!op->r.f->body.first)
2587 syntax_error(EMSG_UNDEF_FUNC);
2589 vbeg = v = nvalloc(op->r.f->nargs + 1);
2591 var *arg = evaluate(nextarg(&op1), v1);
2593 v->type |= VF_CHILD;
2595 if (++v - vbeg >= op->r.f->nargs)
2601 sv_progname = g_progname;
2603 res = evaluate(op->r.f->body.first, res);
2605 g_progname = sv_progname;
2612 case XC( OC_GETLINE ):
2613 case XC( OC_PGETLINE ): {
2620 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2621 rsm->F = popen(L.s, "r");
2622 rsm->is_pipe = TRUE;
2624 rsm->F = fopen_for_read(L.s); /* not xfopen! */
2629 iF = next_input_file();
2633 if (!rsm || !rsm->F) {
2634 setvar_i(intvar[ERRNO], errno);
2642 i = awk_getline(rsm, R.v);
2643 if (i > 0 && !op1) {
2644 incvar(intvar[FNR]);
2651 /* simple builtins */
2652 case XC( OC_FBLTIN ): {
2653 double R_d = R_d; /* for compiler */
2661 R_d = (double)rand() / (double)RAND_MAX;
2665 if (ENABLE_FEATURE_AWK_LIBM) {
2671 if (ENABLE_FEATURE_AWK_LIBM) {
2677 if (ENABLE_FEATURE_AWK_LIBM) {
2683 if (ENABLE_FEATURE_AWK_LIBM) {
2689 if (ENABLE_FEATURE_AWK_LIBM) {
2694 syntax_error(EMSG_NO_MATH);
2699 seed = op1 ? (unsigned)L_d : (unsigned)time(NULL);
2709 L.s = getvar_s(intvar[F0]);
2715 R_d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2716 ? (system(L.s) >> 8) : 0;
2722 } else if (L.s && *L.s) {
2723 rstream *rsm = newfile(L.s);
2733 rsm = (rstream *)hash_search(fdhash, L.s);
2734 debug_printf_eval("OC_FBLTIN F_cl rsm:%p\n", rsm);
2736 debug_printf_eval("OC_FBLTIN F_cl "
2737 "rsm->is_pipe:%d, ->F:%p\n",
2738 rsm->is_pipe, rsm->F);
2739 /* Can be NULL if open failed. Example:
2740 * getline line <"doesnt_exist";
2741 * close("doesnt_exist"); <--- here rsm->F is NULL
2744 err = rsm->is_pipe ? pclose(rsm->F) : fclose(rsm->F);
2746 hash_remove(fdhash, L.s);
2749 setvar_i(intvar[ERRNO], errno);
2758 case XC( OC_BUILTIN ):
2759 res = exec_builtin(op, res);
2762 case XC( OC_SPRINTF ):
2763 setvar_p(res, awk_printf(op1));
2766 case XC( OC_UNARY ): {
2769 Ld = R_d = getvar_i(R.v);
2796 case XC( OC_FIELD ): {
2797 int i = (int)getvar_i(R.v);
2804 res = &Fields[i - 1];
2809 /* concatenation (" ") and index joining (",") */
2810 case XC( OC_CONCAT ):
2811 case XC( OC_COMMA ): {
2812 const char *sep = "";
2813 if ((opinfo & OPCLSMASK) == OC_COMMA)
2814 sep = getvar_s(intvar[SUBSEP]);
2815 setvar_p(res, xasprintf("%s%s%s", L.s, sep, R.s));
2820 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2824 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2827 case XC( OC_BINARY ):
2828 case XC( OC_REPLACE ): {
2829 double R_d = getvar_i(R.v);
2830 debug_printf_eval("BINARY/REPLACE: R_d:%f opn:%c\n", R_d, opn);
2843 syntax_error(EMSG_DIV_BY_ZERO);
2847 if (ENABLE_FEATURE_AWK_LIBM)
2848 L_d = pow(L_d, R_d);
2850 syntax_error(EMSG_NO_MATH);
2854 syntax_error(EMSG_DIV_BY_ZERO);
2855 L_d -= (int)(L_d / R_d) * R_d;
2858 debug_printf_eval("BINARY/REPLACE result:%f\n", L_d);
2859 res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : L.v, L_d);
2863 case XC( OC_COMPARE ): {
2864 int i = i; /* for compiler */
2867 if (is_numeric(L.v) && is_numeric(R.v)) {
2868 Ld = getvar_i(L.v) - getvar_i(R.v);
2870 const char *l = getvar_s(L.v);
2871 const char *r = getvar_s(R.v);
2872 Ld = icase ? strcasecmp(l, r) : strcmp(l, r);
2874 switch (opn & 0xfe) {
2885 setvar_i(res, (i == 0) ^ (opn & 1));
2890 syntax_error(EMSG_POSSIBLE_ERROR);
2892 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2894 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2901 debug_printf_eval("returning from %s(): %p\n", __func__, res);
2909 /* -------- main & co. -------- */
2911 static int awk_exit(int r)
2922 evaluate(endseq.first, &tv);
2925 /* waiting for children */
2926 for (i = 0; i < fdhash->csize; i++) {
2927 hi = fdhash->items[i];
2929 if (hi->data.rs.F && hi->data.rs.is_pipe)
2930 pclose(hi->data.rs.F);
2938 /* if expr looks like "var=value", perform assignment and return 1,
2939 * otherwise return 0 */
2940 static int is_assignment(const char *expr)
2942 char *exprc, *val, *s, *s1;
2944 if (!isalnum_(*expr) || (val = strchr(expr, '=')) == NULL) {
2948 exprc = xstrdup(expr);
2949 val = exprc + (val - expr);
2953 while ((*s1 = nextchar(&s)) != '\0')
2956 setvar_u(newvar(exprc), val);
2961 /* switch to next input file */
2962 static rstream *next_input_file(void)
2964 #define rsm (G.next_input_file__rsm)
2965 #define files_happen (G.next_input_file__files_happen)
2968 const char *fname, *ind;
2973 rsm.pos = rsm.adv = 0;
2976 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
2983 ind = getvar_s(incvar(intvar[ARGIND]));
2984 fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
2985 if (fname && *fname && !is_assignment(fname)) {
2986 F = xfopen_stdin(fname);
2991 files_happen = TRUE;
2992 setvar_s(intvar[FILENAME], fname);
2999 int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
3000 int awk_main(int argc, char **argv)
3003 char *opt_F, *opt_W;
3004 llist_t *list_v = NULL;
3005 llist_t *list_f = NULL;
3010 char *vnames = (char *)vNames; /* cheat */
3011 char *vvalues = (char *)vValues;
3015 /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
3016 * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
3017 if (ENABLE_LOCALE_SUPPORT)
3018 setlocale(LC_NUMERIC, "C");
3022 /* allocate global buffer */
3023 g_buf = xmalloc(MAXVARFMT + 1);
3025 vhash = hash_init();
3026 ahash = hash_init();
3027 fdhash = hash_init();
3028 fnhash = hash_init();
3030 /* initialize variables */
3031 for (i = 0; *vnames; i++) {
3032 intvar[i] = v = newvar(nextword(&vnames));
3033 if (*vvalues != '\377')
3034 setvar_s(v, nextword(&vvalues));
3038 if (*vnames == '*') {
3039 v->type |= VF_SPECIAL;
3044 handle_special(intvar[FS]);
3045 handle_special(intvar[RS]);
3047 newfile("/dev/stdin")->F = stdin;
3048 newfile("/dev/stdout")->F = stdout;
3049 newfile("/dev/stderr")->F = stderr;
3051 /* Huh, people report that sometimes environ is NULL. Oh well. */
3052 if (environ) for (envp = environ; *envp; envp++) {
3053 /* environ is writable, thus we don't strdup it needlessly */
3055 char *s1 = strchr(s, '=');
3058 /* Both findvar and setvar_u take const char*
3059 * as 2nd arg -> environment is not trashed */
3060 setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
3064 opt_complementary = "v::f::"; /* -v and -f can occur multiple times */
3065 opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, &opt_W);
3069 setvar_s(intvar[FS], opt_F); // -F
3070 while (list_v) { /* -v */
3071 if (!is_assignment(llist_pop(&list_v)))
3074 if (list_f) { /* -f */
3079 g_progname = llist_pop(&list_f);
3080 from_file = xfopen_stdin(g_progname);
3081 /* one byte is reserved for some trick in next_token */
3082 for (i = j = 1; j > 0; i += j) {
3083 s = xrealloc(s, i + 4096);
3084 j = fread(s + i, 1, 4094, from_file);
3088 parse_program(s + 1);
3092 } else { // no -f: take program from 1st parameter
3095 g_progname = "cmd. line";
3096 parse_program(*argv++);
3098 if (opt & 0x8) // -W
3099 bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
3101 /* fill in ARGV array */
3102 setvar_i(intvar[ARGC], argc);
3103 setari_u(intvar[ARGV], 0, "awk");
3106 setari_u(intvar[ARGV], ++i, *argv++);
3108 evaluate(beginseq.first, &tv);
3109 if (!mainseq.first && !endseq.first)
3110 awk_exit(EXIT_SUCCESS);
3112 /* input file could already be opened in BEGIN block */
3114 iF = next_input_file();
3116 /* passing through input files */
3119 setvar_i(intvar[FNR], 0);
3121 while ((i = awk_getline(iF, intvar[F0])) > 0) {
3124 incvar(intvar[FNR]);
3125 evaluate(mainseq.first, &tv);
3132 syntax_error(strerror(errno));
3134 iF = next_input_file();
3137 awk_exit(EXIT_SUCCESS);