1 /* vi: set sw=4 ts=4: */
3 * awk implementation for busybox
5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
7 * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
14 /* This is a NOEXEC applet. Be very careful! */
21 #define VF_NUMBER 0x0001 /* 1 = primary type is number */
22 #define VF_ARRAY 0x0002 /* 1 = it's an array */
24 #define VF_CACHED 0x0100 /* 1 = num/str value has cached str/num eq */
25 #define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
26 #define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
27 #define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
28 #define VF_FSTR 0x1000 /* 1 = var::string points to fstring buffer */
29 #define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
30 #define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
32 /* these flags are static, don't change them when value is changed */
33 #define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
36 typedef struct var_s {
37 unsigned type; /* flags */
41 int aidx; /* func arg idx (for compilation stage) */
42 struct xhash_s *array; /* array ptr */
43 struct var_s *parent; /* for func args, ptr to actual parameter */
44 char **walker; /* list of array elements (for..in) */
48 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
49 typedef struct chain_s {
52 const char *programname;
56 typedef struct func_s {
62 typedef struct rstream_s {
71 typedef struct hash_item_s {
73 struct var_s v; /* variable/array hash */
74 struct rstream_s rs; /* redirect streams hash */
75 struct func_s f; /* functions hash */
77 struct hash_item_s *next; /* next in chain */
78 char name[1]; /* really it's longer */
81 typedef struct xhash_s {
82 unsigned nel; /* num of elements */
83 unsigned csize; /* current hash size */
84 unsigned nprime; /* next hash size in PRIMES[] */
85 unsigned glen; /* summary length of item names */
86 struct hash_item_s **items;
90 typedef struct node_s {
111 /* Block of temporary variables */
112 typedef struct nvblock_s {
115 struct nvblock_s *prev;
116 struct nvblock_s *next;
120 typedef struct tsplitter_s {
125 /* simple token classes */
126 /* Order and hex values are very important!!! See next_token() */
127 #define TC_SEQSTART 1 /* ( */
128 #define TC_SEQTERM (1 << 1) /* ) */
129 #define TC_REGEXP (1 << 2) /* /.../ */
130 #define TC_OUTRDR (1 << 3) /* | > >> */
131 #define TC_UOPPOST (1 << 4) /* unary postfix operator */
132 #define TC_UOPPRE1 (1 << 5) /* unary prefix operator */
133 #define TC_BINOPX (1 << 6) /* two-opnd operator */
134 #define TC_IN (1 << 7)
135 #define TC_COMMA (1 << 8)
136 #define TC_PIPE (1 << 9) /* input redirection pipe */
137 #define TC_UOPPRE2 (1 << 10) /* unary prefix operator */
138 #define TC_ARRTERM (1 << 11) /* ] */
139 #define TC_GRPSTART (1 << 12) /* { */
140 #define TC_GRPTERM (1 << 13) /* } */
141 #define TC_SEMICOL (1 << 14)
142 #define TC_NEWLINE (1 << 15)
143 #define TC_STATX (1 << 16) /* ctl statement (for, next...) */
144 #define TC_WHILE (1 << 17)
145 #define TC_ELSE (1 << 18)
146 #define TC_BUILTIN (1 << 19)
147 #define TC_GETLINE (1 << 20)
148 #define TC_FUNCDECL (1 << 21) /* `function' `func' */
149 #define TC_BEGIN (1 << 22)
150 #define TC_END (1 << 23)
151 #define TC_EOF (1 << 24)
152 #define TC_VARIABLE (1 << 25)
153 #define TC_ARRAY (1 << 26)
154 #define TC_FUNCTION (1 << 27)
155 #define TC_STRING (1 << 28)
156 #define TC_NUMBER (1 << 29)
158 #define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
160 /* combined token classes */
161 #define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
162 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
163 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
164 | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
166 #define TC_STATEMNT (TC_STATX | TC_WHILE)
167 #define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
169 /* word tokens, cannot mean something else if not expected */
170 #define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
171 | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
173 /* discard newlines after these */
174 #define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
175 | TC_BINOP | TC_OPTERM)
177 /* what can expression begin with */
178 #define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
179 /* what can group begin with */
180 #define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
182 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
183 /* operator is inserted between them */
184 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
185 | TC_STRING | TC_NUMBER | TC_UOPPOST)
186 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
188 #define OF_RES1 0x010000
189 #define OF_RES2 0x020000
190 #define OF_STR1 0x040000
191 #define OF_STR2 0x080000
192 #define OF_NUM1 0x100000
193 #define OF_CHECKED 0x200000
195 /* combined operator flags */
198 #define xS (OF_RES2 | OF_STR2)
200 #define VV (OF_RES1 | OF_RES2)
201 #define Nx (OF_RES1 | OF_NUM1)
202 #define NV (OF_RES1 | OF_NUM1 | OF_RES2)
203 #define Sx (OF_RES1 | OF_STR1)
204 #define SV (OF_RES1 | OF_STR1 | OF_RES2)
205 #define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
207 #define OPCLSMASK 0xFF00
208 #define OPNMASK 0x007F
210 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
211 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
212 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
214 #define P(x) (x << 24)
215 #define PRIMASK 0x7F000000
216 #define PRIMASK2 0x7E000000
218 /* Operation classes */
220 #define SHIFT_TIL_THIS 0x0600
221 #define RECUR_FROM_THIS 0x1000
224 OC_DELETE = 0x0100, OC_EXEC = 0x0200, OC_NEWSOURCE = 0x0300,
225 OC_PRINT = 0x0400, OC_PRINTF = 0x0500, OC_WALKINIT = 0x0600,
227 OC_BR = 0x0700, OC_BREAK = 0x0800, OC_CONTINUE = 0x0900,
228 OC_EXIT = 0x0a00, OC_NEXT = 0x0b00, OC_NEXTFILE = 0x0c00,
229 OC_TEST = 0x0d00, OC_WALKNEXT = 0x0e00,
231 OC_BINARY = 0x1000, OC_BUILTIN = 0x1100, OC_COLON = 0x1200,
232 OC_COMMA = 0x1300, OC_COMPARE = 0x1400, OC_CONCAT = 0x1500,
233 OC_FBLTIN = 0x1600, OC_FIELD = 0x1700, OC_FNARG = 0x1800,
234 OC_FUNC = 0x1900, OC_GETLINE = 0x1a00, OC_IN = 0x1b00,
235 OC_LAND = 0x1c00, OC_LOR = 0x1d00, OC_MATCH = 0x1e00,
236 OC_MOVE = 0x1f00, OC_PGETLINE = 0x2000, OC_REGEXP = 0x2100,
237 OC_REPLACE = 0x2200, OC_RETURN = 0x2300, OC_SPRINTF = 0x2400,
238 OC_TERNARY = 0x2500, OC_UNARY = 0x2600, OC_VAR = 0x2700,
241 ST_IF = 0x3000, ST_DO = 0x3100, ST_FOR = 0x3200,
245 /* simple builtins */
247 F_in, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
248 F_ti, F_le, F_sy, F_ff, F_cl
253 B_a2, B_ix, B_ma, B_sp, B_ss, B_ti, B_lo, B_up,
255 B_an, B_co, B_ls, B_or, B_rs, B_xo,
258 /* tokens and their corresponding info values */
260 #define NTC "\377" /* switch to next token class (tc<<1) */
263 #define OC_B OC_BUILTIN
265 static const char tokenlist[] ALIGN1 =
268 "\1/" NTC /* REGEXP */
269 "\2>>" "\1>" "\1|" NTC /* OUTRDR */
270 "\2++" "\2--" NTC /* UOPPOST */
271 "\2++" "\2--" "\1$" NTC /* UOPPRE1 */
272 "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */
273 "\2*=" "\2/=" "\2%=" "\2^="
274 "\1+" "\1-" "\3**=" "\2**"
275 "\1/" "\1%" "\1^" "\1*"
276 "\2!=" "\2>=" "\2<=" "\1>"
277 "\1<" "\2!~" "\1~" "\2&&"
278 "\2||" "\1?" "\1:" NTC
282 "\1+" "\1-" "\1!" NTC /* UOPPRE2 */
288 "\2if" "\2do" "\3for" "\5break" /* STATX */
289 "\10continue" "\6delete" "\5print"
290 "\6printf" "\4next" "\10nextfile"
291 "\6return" "\4exit" NTC
295 "\3and" "\5compl" "\6lshift" "\2or"
297 "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */
298 "\3cos" "\3exp" "\3int" "\3log"
299 "\4rand" "\3sin" "\4sqrt" "\5srand"
300 "\6gensub" "\4gsub" "\5index" "\6length"
301 "\5match" "\5split" "\7sprintf" "\3sub"
302 "\6substr" "\7systime" "\10strftime"
303 "\7tolower" "\7toupper" NTC
305 "\4func" "\10function" NTC
310 static const uint32_t tokeninfo[] = {
314 xS|'a', xS|'w', xS|'|',
315 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
316 OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M',
318 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74),
319 OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
320 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/',
321 OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
322 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-',
323 OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
324 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%',
325 OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
326 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3,
327 OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
328 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!',
329 OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
330 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?',
331 OC_COLON|xx|P(67)|':',
334 OC_PGETLINE|SV|P(37),
335 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-',
336 OC_UNARY|xV|P(19)|'!',
342 ST_IF, ST_DO, ST_FOR, OC_BREAK,
343 OC_CONTINUE, OC_DELETE|Vx, OC_PRINT,
344 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
345 OC_RETURN|Vx, OC_EXIT|Nx,
349 OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
350 OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
351 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
352 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
353 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
354 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
355 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
356 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b),
357 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
364 /* internal variable names and their initial values */
365 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
367 CONVFMT, OFMT, FS, OFS,
368 ORS, RS, RT, FILENAME,
369 SUBSEP, ARGIND, ARGC, ARGV,
372 ENVIRON, F0, NUM_INTERNAL_VARS
375 static const char vNames[] ALIGN1 =
376 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
377 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
378 "SUBSEP\0" "ARGIND\0" "ARGC\0" "ARGV\0"
380 "NR\0" "NF\0*" "IGNORECASE\0*"
381 "ENVIRON\0" "$\0*" "\0";
383 static const char vValues[] ALIGN1 =
384 "%.6g\0" "%.6g\0" " \0" " \0"
385 "\n\0" "\n\0" "\0" "\0"
389 /* hash size may grow to these values */
390 #define FIRST_PRIME 61
391 static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
394 /* Globals. Split in two parts so that first one is addressed
395 * with (mostly short) negative offsets */
397 chain beginseq, mainseq, endseq;
399 node *break_ptr, *continue_ptr;
401 xhash *vhash, *ahash, *fdhash, *fnhash;
402 const char *g_progname;
405 int maxfields; /* used in fsrealloc() only */
414 smallint is_f0_split;
417 uint32_t t_info; /* often used */
423 var *intvar[NUM_INTERNAL_VARS]; /* often used */
425 /* former statics from various functions */
426 char *split_f0__fstrings;
428 uint32_t next_token__save_tclass;
429 uint32_t next_token__save_info;
430 uint32_t next_token__ltclass;
431 smallint next_token__concat_inserted;
433 smallint next_input_file__files_happen;
434 rstream next_input_file__rsm;
436 var *evaluate__fnargs;
437 unsigned evaluate__seed;
438 regex_t evaluate__sreg;
442 tsplitter exec_builtin__tspl;
444 /* biggest and least used members go last */
446 tsplitter fsplitter, rsplitter;
448 #define G1 (ptr_to_globals[-1])
449 #define G (*(struct globals2 *)ptr_to_globals)
450 /* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
451 /* char G1size[sizeof(G1)]; - 0x6c */
452 /* char Gsize[sizeof(G)]; - 0x1cc */
453 /* Trying to keep most of members accessible with short offsets: */
454 /* char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
455 #define beginseq (G1.beginseq )
456 #define mainseq (G1.mainseq )
457 #define endseq (G1.endseq )
458 #define seq (G1.seq )
459 #define break_ptr (G1.break_ptr )
460 #define continue_ptr (G1.continue_ptr)
462 #define vhash (G1.vhash )
463 #define ahash (G1.ahash )
464 #define fdhash (G1.fdhash )
465 #define fnhash (G1.fnhash )
466 #define g_progname (G1.g_progname )
467 #define g_lineno (G1.g_lineno )
468 #define nfields (G1.nfields )
469 #define maxfields (G1.maxfields )
470 #define Fields (G1.Fields )
471 #define g_cb (G1.g_cb )
472 #define g_pos (G1.g_pos )
473 #define g_buf (G1.g_buf )
474 #define icase (G1.icase )
475 #define exiting (G1.exiting )
476 #define nextrec (G1.nextrec )
477 #define nextfile (G1.nextfile )
478 #define is_f0_split (G1.is_f0_split )
479 #define t_info (G.t_info )
480 #define t_tclass (G.t_tclass )
481 #define t_string (G.t_string )
482 #define t_double (G.t_double )
483 #define t_lineno (G.t_lineno )
484 #define t_rollback (G.t_rollback )
485 #define intvar (G.intvar )
486 #define fsplitter (G.fsplitter )
487 #define rsplitter (G.rsplitter )
488 #define INIT_G() do { \
489 SET_PTR_TO_GLOBALS(xzalloc(sizeof(G1) + sizeof(G)) + sizeof(G1)); \
490 G.next_token__ltclass = TC_OPTERM; \
491 G.evaluate__seed = 1; \
495 /* function prototypes */
496 static void handle_special(var *);
497 static node *parse_expr(uint32_t);
498 static void chain_group(void);
499 static var *evaluate(node *, var *);
500 static rstream *next_input_file(void);
501 static int fmt_num(char *, int, const char *, double, int);
502 static int awk_exit(int) NORETURN;
504 /* ---- error handling ---- */
506 static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
507 static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
508 static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
509 static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
510 static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
511 static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin";
512 static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
513 static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
514 static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
515 #if !ENABLE_FEATURE_AWK_MATH
516 static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
519 static void zero_out_var(var * vp)
521 memset(vp, 0, sizeof(*vp));
524 static void syntax_error(const char *const message) NORETURN;
525 static void syntax_error(const char *const message)
527 bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
530 /* ---- hash stuff ---- */
532 static unsigned hashidx(const char *name)
536 while (*name) idx = *name++ + (idx << 6) - idx;
540 /* create new hash */
541 static xhash *hash_init(void)
545 newhash = xzalloc(sizeof(xhash));
546 newhash->csize = FIRST_PRIME;
547 newhash->items = xzalloc(newhash->csize * sizeof(hash_item *));
552 /* find item in hash, return ptr to data, NULL if not found */
553 static void *hash_search(xhash *hash, const char *name)
557 hi = hash->items [ hashidx(name) % hash->csize ];
559 if (strcmp(hi->name, name) == 0)
566 /* grow hash if it becomes too big */
567 static void hash_rebuild(xhash *hash)
569 unsigned newsize, i, idx;
570 hash_item **newitems, *hi, *thi;
572 if (hash->nprime == ARRAY_SIZE(PRIMES))
575 newsize = PRIMES[hash->nprime++];
576 newitems = xzalloc(newsize * sizeof(hash_item *));
578 for (i = 0; i < hash->csize; i++) {
583 idx = hashidx(thi->name) % newsize;
584 thi->next = newitems[idx];
590 hash->csize = newsize;
591 hash->items = newitems;
594 /* find item in hash, add it if necessary. Return ptr to data */
595 static void *hash_find(xhash *hash, const char *name)
601 hi = hash_search(hash, name);
603 if (++hash->nel / hash->csize > 10)
606 l = strlen(name) + 1;
607 hi = xzalloc(sizeof(hash_item) + l);
608 memcpy(hi->name, name, l);
610 idx = hashidx(name) % hash->csize;
611 hi->next = hash->items[idx];
612 hash->items[idx] = hi;
618 #define findvar(hash, name) ((var*) hash_find((hash), (name)))
619 #define newvar(name) ((var*) hash_find(vhash, (name)))
620 #define newfile(name) ((rstream*)hash_find(fdhash, (name)))
621 #define newfunc(name) ((func*) hash_find(fnhash, (name)))
623 static void hash_remove(xhash *hash, const char *name)
625 hash_item *hi, **phi;
627 phi = &(hash->items[hashidx(name) % hash->csize]);
630 if (strcmp(hi->name, name) == 0) {
631 hash->glen -= (strlen(name) + 1);
641 /* ------ some useful functions ------ */
643 static void skip_spaces(char **s)
648 if (*p == '\\' && p[1] == '\n') {
651 } else if (*p != ' ' && *p != '\t') {
659 static char *nextword(char **s)
663 while (*(*s)++) /* */;
668 static char nextchar(char **s)
674 if (c == '\\') c = bb_process_escape_sequence((const char**)s);
675 if (c == '\\' && *s == pps) c = *((*s)++);
679 static ALWAYS_INLINE int isalnum_(int c)
681 return (isalnum(c) || c == '_');
684 /* -------- working with variables (set/get/copy/etc) -------- */
686 static xhash *iamarray(var *v)
690 while (a->type & VF_CHILD)
693 if (!(a->type & VF_ARRAY)) {
695 a->x.array = hash_init();
700 static void clear_array(xhash *array)
705 for (i = 0; i < array->csize; i++) {
706 hi = array->items[i];
710 free(thi->data.v.string);
713 array->items[i] = NULL;
715 array->glen = array->nel = 0;
718 /* clear a variable */
719 static var *clrvar(var *v)
721 if (!(v->type & VF_FSTR))
724 v->type &= VF_DONTTOUCH;
730 /* assign string value to variable */
731 static var *setvar_p(var *v, char *value)
739 /* same as setvar_p but make a copy of string */
740 static var *setvar_s(var *v, const char *value)
742 return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
745 /* same as setvar_s but set USER flag */
746 static var *setvar_u(var *v, const char *value)
753 /* set array element to user string */
754 static void setari_u(var *a, int idx, const char *s)
756 char sidx[sizeof(int)*3 + 1];
759 sprintf(sidx, "%d", idx);
760 v = findvar(iamarray(a), sidx);
764 /* assign numeric value to variable */
765 static var *setvar_i(var *v, double value)
768 v->type |= VF_NUMBER;
774 static const char *getvar_s(var *v)
776 /* if v is numeric and has no cached string, convert it to string */
777 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
778 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
779 v->string = xstrdup(g_buf);
780 v->type |= VF_CACHED;
782 return (v->string == NULL) ? "" : v->string;
785 static double getvar_i(var *v)
789 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
793 v->number = strtod(s, &s);
794 if (v->type & VF_USER) {
802 v->type |= VF_CACHED;
807 static var *copyvar(var *dest, const var *src)
811 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
812 dest->number = src->number;
814 dest->string = xstrdup(src->string);
816 handle_special(dest);
820 static var *incvar(var *v)
822 return setvar_i(v, getvar_i(v) + 1.);
825 /* return true if v is number or numeric string */
826 static int is_numeric(var *v)
829 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
832 /* return 1 when value of v corresponds to true, 0 otherwise */
833 static int istrue(var *v)
836 return (v->number == 0) ? 0 : 1;
837 return (v->string && *(v->string)) ? 1 : 0;
840 /* temporary variables allocator. Last allocated should be first freed */
841 static var *nvalloc(int n)
849 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size) break;
854 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
855 g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
857 g_cb->pos = g_cb->nv;
859 /*g_cb->next = NULL; - xzalloc did it */
860 if (pb) pb->next = g_cb;
866 while (v < g_cb->pos) {
875 static void nvfree(var *v)
879 if (v < g_cb->nv || v >= g_cb->pos)
880 syntax_error(EMSG_INTERNAL_ERROR);
882 for (p = v; p < g_cb->pos; p++) {
883 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
884 clear_array(iamarray(p));
885 free(p->x.array->items);
888 if (p->type & VF_WALK)
895 while (g_cb->prev && g_cb->pos == g_cb->nv) {
900 /* ------- awk program text parsing ------- */
902 /* Parse next token pointed by global pos, place results into global ttt.
903 * If token isn't expected, give away. Return token class
905 static uint32_t next_token(uint32_t expected)
907 #define concat_inserted (G.next_token__concat_inserted)
908 #define save_tclass (G.next_token__save_tclass)
909 #define save_info (G.next_token__save_info)
910 /* Initialized to TC_OPTERM: */
911 #define ltclass (G.next_token__ltclass)
922 } else if (concat_inserted) {
923 concat_inserted = FALSE;
924 t_tclass = save_tclass;
933 while (*p != '\n' && *p != '\0')
942 } else if (*p == '\"') {
946 if (*p == '\0' || *p == '\n')
947 syntax_error(EMSG_UNEXP_EOS);
948 *(s++) = nextchar(&p);
954 } else if ((expected & TC_REGEXP) && *p == '/') {
958 if (*p == '\0' || *p == '\n')
959 syntax_error(EMSG_UNEXP_EOS);
963 *(s-1) = bb_process_escape_sequence((const char **)&p);
974 } else if (*p == '.' || isdigit(*p)) {
977 if (p[0] == '0' && (p[1] | 0x20) == 'x')
978 t_double = strtoll(p, &p, 0);
981 t_double = strtod(p, &p);
983 syntax_error(EMSG_UNEXP_TOKEN);
987 /* search for something known */
997 /* if token class is expected, token
998 * matches and it's not a longer word,
999 * then this is what we are looking for
1001 if ((tc & (expected | TC_WORD | TC_NEWLINE))
1002 && *tl == *p && strncmp(p, tl, l) == 0
1003 && !((tc & TC_WORD) && isalnum_(p[l]))
1014 /* it's a name (var/array/function),
1015 * otherwise it's something wrong
1018 syntax_error(EMSG_UNEXP_TOKEN);
1021 while (isalnum_(*(++p))) {
1026 /* also consume whitespace between functionname and bracket */
1027 if (!(expected & TC_VARIABLE))
1041 /* skipping newlines in some cases */
1042 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1045 /* insert concatenation operator when needed */
1046 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1047 concat_inserted = TRUE;
1051 t_info = OC_CONCAT | SS | P(35);
1058 /* Are we ready for this? */
1059 if (!(ltclass & expected))
1060 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1061 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1064 #undef concat_inserted
1070 static void rollback_token(void)
1075 static node *new_node(uint32_t info)
1079 n = xzalloc(sizeof(node));
1081 n->lineno = g_lineno;
1085 static node *mk_re_node(const char *s, node *n, regex_t *re)
1087 n->info = OC_REGEXP;
1090 xregcomp(re, s, REG_EXTENDED);
1091 xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1096 static node *condition(void)
1098 next_token(TC_SEQSTART);
1099 return parse_expr(TC_SEQTERM);
1102 /* parse expression terminated by given argument, return ptr
1103 * to built subtree. Terminator is eaten by parse_expr */
1104 static node *parse_expr(uint32_t iexp)
1113 sn.r.n = glptr = NULL;
1114 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1116 while (!((tc = next_token(xtc)) & iexp)) {
1117 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1118 /* input redirection (<) attached to glptr node */
1119 cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1121 xtc = TC_OPERAND | TC_UOPPRE;
1124 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1125 /* for binary and postfix-unary operators, jump back over
1126 * previous operators with higher priority */
1128 while ( ((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1129 || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON)) )
1131 if ((t_info & OPCLSMASK) == OC_TERNARY)
1133 cn = vn->a.n->r.n = new_node(t_info);
1135 if (tc & TC_BINOP) {
1137 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1138 if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1140 next_token(TC_GETLINE);
1141 /* give maximum priority to this pipe */
1142 cn->info &= ~PRIMASK;
1143 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1147 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1152 /* for operands and prefix-unary operators, attach them
1155 cn = vn->r.n = new_node(t_info);
1157 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1158 if (tc & (TC_OPERAND | TC_REGEXP)) {
1159 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1160 /* one should be very careful with switch on tclass -
1161 * only simple tclasses should be used! */
1166 v = hash_search(ahash, t_string);
1168 cn->info = OC_FNARG;
1169 cn->l.i = v->x.aidx;
1171 cn->l.v = newvar(t_string);
1173 if (tc & TC_ARRAY) {
1175 cn->r.n = parse_expr(TC_ARRTERM);
1182 v = cn->l.v = xzalloc(sizeof(var));
1184 setvar_i(v, t_double);
1186 setvar_s(v, t_string);
1190 mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1195 cn->r.f = newfunc(t_string);
1196 cn->l.n = condition();
1200 cn = vn->r.n = parse_expr(TC_SEQTERM);
1206 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1210 cn->l.n = condition();
1219 /* add node to chain. Return ptr to alloc'd node */
1220 static node *chain_node(uint32_t info)
1225 seq->first = seq->last = new_node(0);
1227 if (seq->programname != g_progname) {
1228 seq->programname = g_progname;
1229 n = chain_node(OC_NEWSOURCE);
1230 n->l.s = xstrdup(g_progname);
1235 seq->last = n->a.n = new_node(OC_DONE);
1240 static void chain_expr(uint32_t info)
1244 n = chain_node(info);
1245 n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1246 if (t_tclass & TC_GRPTERM)
1250 static node *chain_loop(node *nn)
1252 node *n, *n2, *save_brk, *save_cont;
1254 save_brk = break_ptr;
1255 save_cont = continue_ptr;
1257 n = chain_node(OC_BR | Vx);
1258 continue_ptr = new_node(OC_EXEC);
1259 break_ptr = new_node(OC_EXEC);
1261 n2 = chain_node(OC_EXEC | Vx);
1264 continue_ptr->a.n = n2;
1265 break_ptr->a.n = n->r.n = seq->last;
1267 continue_ptr = save_cont;
1268 break_ptr = save_brk;
1273 /* parse group and attach it to chain */
1274 static void chain_group(void)
1280 c = next_token(TC_GRPSEQ);
1281 } while (c & TC_NEWLINE);
1283 if (c & TC_GRPSTART) {
1284 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1285 if (t_tclass & TC_NEWLINE) continue;
1289 } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1291 chain_expr(OC_EXEC | Vx);
1292 } else { /* TC_STATEMNT */
1293 switch (t_info & OPCLSMASK) {
1295 n = chain_node(OC_BR | Vx);
1296 n->l.n = condition();
1298 n2 = chain_node(OC_EXEC);
1300 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1302 n2->a.n = seq->last;
1310 n = chain_loop(NULL);
1315 n2 = chain_node(OC_EXEC);
1316 n = chain_loop(NULL);
1318 next_token(TC_WHILE);
1319 n->l.n = condition();
1323 next_token(TC_SEQSTART);
1324 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1325 if (t_tclass & TC_SEQTERM) { /* for-in */
1326 if ((n2->info & OPCLSMASK) != OC_IN)
1327 syntax_error(EMSG_UNEXP_TOKEN);
1328 n = chain_node(OC_WALKINIT | VV);
1331 n = chain_loop(NULL);
1332 n->info = OC_WALKNEXT | Vx;
1334 } else { /* for (;;) */
1335 n = chain_node(OC_EXEC | Vx);
1337 n2 = parse_expr(TC_SEMICOL);
1338 n3 = parse_expr(TC_SEQTERM);
1348 n = chain_node(t_info);
1349 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1350 if (t_tclass & TC_OUTRDR) {
1352 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1354 if (t_tclass & TC_GRPTERM)
1359 n = chain_node(OC_EXEC);
1364 n = chain_node(OC_EXEC);
1365 n->a.n = continue_ptr;
1368 /* delete, next, nextfile, return, exit */
1375 static void parse_program(char *p)
1384 while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1385 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1387 if (tclass & TC_OPTERM)
1391 if (tclass & TC_BEGIN) {
1395 } else if (tclass & TC_END) {
1399 } else if (tclass & TC_FUNCDECL) {
1400 next_token(TC_FUNCTION);
1402 f = newfunc(t_string);
1403 f->body.first = NULL;
1405 while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1406 v = findvar(ahash, t_string);
1407 v->x.aidx = (f->nargs)++;
1409 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1416 } else if (tclass & TC_OPSEQ) {
1418 cn = chain_node(OC_TEST);
1419 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1420 if (t_tclass & TC_GRPSTART) {
1424 chain_node(OC_PRINT);
1426 cn->r.n = mainseq.last;
1428 } else /* if (tclass & TC_GRPSTART) */ {
1436 /* -------- program execution part -------- */
1438 static node *mk_splitter(const char *s, tsplitter *spl)
1446 if ((n->info & OPCLSMASK) == OC_REGEXP) {
1448 regfree(ire); // TODO: nuke ire, use re+1?
1450 if (strlen(s) > 1) {
1451 mk_re_node(s, n, re);
1453 n->info = (uint32_t) *s;
1459 /* use node as a regular expression. Supplied with node ptr and regex_t
1460 * storage space. Return ptr to regex (if result points to preg, it should
1461 * be later regfree'd manually
1463 static regex_t *as_regex(node *op, regex_t *preg)
1468 if ((op->info & OPCLSMASK) == OC_REGEXP) {
1469 return icase ? op->r.ire : op->l.re;
1472 s = getvar_s(evaluate(op, v));
1473 xregcomp(preg, s, icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED);
1478 /* gradually increasing buffer */
1479 static void qrealloc(char **b, int n, int *size)
1481 if (!*b || n >= *size) {
1482 *size = n + (n>>1) + 80;
1483 *b = xrealloc(*b, *size);
1487 /* resize field storage space */
1488 static void fsrealloc(int size)
1492 if (size >= maxfields) {
1494 maxfields = size + 16;
1495 Fields = xrealloc(Fields, maxfields * sizeof(var));
1496 for (; i < maxfields; i++) {
1497 Fields[i].type = VF_SPECIAL;
1498 Fields[i].string = NULL;
1502 if (size < nfields) {
1503 for (i = size; i < nfields; i++) {
1510 static int awk_split(const char *s, node *spl, char **slist)
1515 regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1517 /* in worst case, each char would be a separate field */
1518 *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1521 c[0] = c[1] = (char)spl->info;
1523 if (*getvar_s(intvar[RS]) == '\0')
1526 if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */
1528 return n; /* "": zero fields */
1529 n++; /* at least one field will be there */
1531 l = strcspn(s, c+2); /* len till next NUL or \n */
1532 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1533 && pmatch[0].rm_so <= l
1535 l = pmatch[0].rm_so;
1536 if (pmatch[0].rm_eo == 0) {
1540 n++; /* we saw yet another delimiter */
1542 pmatch[0].rm_eo = l;
1543 if (s[l]) pmatch[0].rm_eo++;
1548 s += pmatch[0].rm_eo;
1552 if (c[0] == '\0') { /* null split */
1560 if (c[0] != ' ') { /* single-character split */
1562 c[0] = toupper(c[0]);
1563 c[1] = tolower(c[1]);
1566 while ((s1 = strpbrk(s1, c))) {
1574 s = skip_whitespace(s);
1577 while (*s && !isspace(*s))
1584 static void split_f0(void)
1586 /* static char *fstrings; */
1587 #define fstrings (G.split_f0__fstrings)
1598 n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1601 for (i = 0; i < n; i++) {
1602 Fields[i].string = nextword(&s);
1603 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1606 /* set NF manually to avoid side effects */
1608 intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1609 intvar[NF]->number = nfields;
1613 /* perform additional actions when some internal variables changed */
1614 static void handle_special(var *v)
1618 const char *sep, *s;
1619 int sl, l, len, i, bsize;
1621 if (!(v->type & VF_SPECIAL))
1624 if (v == intvar[NF]) {
1625 n = (int)getvar_i(v);
1628 /* recalculate $0 */
1629 sep = getvar_s(intvar[OFS]);
1633 for (i = 0; i < n; i++) {
1634 s = getvar_s(&Fields[i]);
1637 memcpy(b+len, sep, sl);
1640 qrealloc(&b, len+l+sl, &bsize);
1641 memcpy(b+len, s, l);
1646 setvar_p(intvar[F0], b);
1649 } else if (v == intvar[F0]) {
1650 is_f0_split = FALSE;
1652 } else if (v == intvar[FS]) {
1653 mk_splitter(getvar_s(v), &fsplitter);
1655 } else if (v == intvar[RS]) {
1656 mk_splitter(getvar_s(v), &rsplitter);
1658 } else if (v == intvar[IGNORECASE]) {
1662 n = getvar_i(intvar[NF]);
1663 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1664 /* right here v is invalid. Just to note... */
1668 /* step through func/builtin/etc arguments */
1669 static node *nextarg(node **pn)
1674 if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1683 static void hashwalk_init(var *v, xhash *array)
1689 if (v->type & VF_WALK)
1693 w = v->x.walker = xzalloc(2 + 2*sizeof(char *) + array->glen);
1694 w[0] = w[1] = (char *)(w + 2);
1695 for (i = 0; i < array->csize; i++) {
1696 hi = array->items[i];
1698 strcpy(*w, hi->name);
1705 static int hashwalk_next(var *v)
1713 setvar_s(v, nextword(w+1));
1717 /* evaluate node, return 1 when result is true, 0 otherwise */
1718 static int ptest(node *pattern)
1720 /* ptest__v is "static": to save stack space? */
1721 return istrue(evaluate(pattern, &G.ptest__v));
1724 /* read next record from stream rsm into a variable v */
1725 static int awk_getline(rstream *rsm, var *v)
1728 regmatch_t pmatch[2];
1729 int a, p, pp=0, size;
1730 int fd, so, eo, r, rp;
1733 /* we're using our own buffer since we need access to accumulating
1736 fd = fileno(rsm->F);
1741 c = (char) rsplitter.n.info;
1744 if (!m) qrealloc(&m, 256, &size);
1750 if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1751 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1752 b, 1, pmatch, 0) == 0) {
1753 so = pmatch[0].rm_so;
1754 eo = pmatch[0].rm_eo;
1758 } else if (c != '\0') {
1759 s = strchr(b+pp, c);
1760 if (!s) s = memchr(b+pp, '\0', p - pp);
1767 while (b[rp] == '\n')
1769 s = strstr(b+rp, "\n\n");
1772 while (b[eo] == '\n') eo++;
1780 memmove(m, (const void *)(m+a), p+1);
1785 qrealloc(&m, a+p+128, &size);
1788 p += safe_read(fd, b+p, size-p-1);
1792 setvar_i(intvar[ERRNO], errno);
1801 c = b[so]; b[so] = '\0';
1805 c = b[eo]; b[eo] = '\0';
1806 setvar_s(intvar[RT], b+so);
1818 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1822 const char *s = format;
1824 if (int_as_int && n == (int)n) {
1825 r = snprintf(b, size, "%d", (int)n);
1827 do { c = *s; } while (c && *++s);
1828 if (strchr("diouxX", c)) {
1829 r = snprintf(b, size, format, (int)n);
1830 } else if (strchr("eEfgG", c)) {
1831 r = snprintf(b, size, format, n);
1833 syntax_error(EMSG_INV_FMT);
1840 /* formatted output into an allocated buffer, return ptr to buffer */
1841 static char *awk_printf(node *n)
1846 int i, j, incr, bsize;
1851 fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1856 while (*f && (*f != '%' || *(++f) == '%'))
1858 while (*f && !isalpha(*f)) {
1860 syntax_error("%*x formats are not supported");
1864 incr = (f - s) + MAXVARFMT;
1865 qrealloc(&b, incr + i, &bsize);
1870 arg = evaluate(nextarg(&n), v);
1873 if (c == 'c' || !c) {
1874 i += sprintf(b+i, s, is_numeric(arg) ?
1875 (char)getvar_i(arg) : *getvar_s(arg));
1876 } else if (c == 's') {
1878 qrealloc(&b, incr+i+strlen(s1), &bsize);
1879 i += sprintf(b+i, s, s1);
1881 i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1885 /* if there was an error while sprintf, return value is negative */
1889 b = xrealloc(b, i + 1);
1896 /* common substitution routine
1897 * replace (nm) substring of (src) that match (n) with (repl), store
1898 * result into (dest), return number of substitutions. If nm=0, replace
1899 * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1900 * subexpression matching (\1-\9)
1902 static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int ex)
1907 int c, i, j, di, rl, so, eo, nbs, n, dssize;
1908 regmatch_t pmatch[10];
1911 re = as_regex(rn, &sreg);
1912 if (!src) src = intvar[F0];
1913 if (!dest) dest = intvar[F0];
1918 while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0 : REG_NOTBOL) == 0) {
1919 so = pmatch[0].rm_so;
1920 eo = pmatch[0].rm_eo;
1922 qrealloc(&ds, di + eo + rl, &dssize);
1923 memcpy(ds + di, sp, eo);
1929 for (s = repl; *s; s++) {
1935 if (c == '&' || (ex && c >= '0' && c <= '9')) {
1936 di -= ((nbs + 3) >> 1);
1945 n = pmatch[j].rm_eo - pmatch[j].rm_so;
1946 qrealloc(&ds, di + rl + n, &dssize);
1947 memcpy(ds + di, sp + pmatch[j].rm_so, n);
1959 if (!ds[di++]) break;
1963 qrealloc(&ds, di + strlen(sp), &dssize);
1964 strcpy(ds + di, sp);
1966 if (re == &sreg) regfree(re);
1970 static var *exec_builtin(node *op, var *res)
1972 #define tspl (G.exec_builtin__tspl)
1979 regmatch_t pmatch[2];
1989 isr = info = op->info;
1992 av[2] = av[3] = NULL;
1993 for (i = 0; i < 4 && op; i++) {
1994 an[i] = nextarg(&op);
1995 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
1996 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
2001 if ((uint32_t)nargs < (info >> 30))
2002 syntax_error(EMSG_TOO_FEW_ARGS);
2004 switch (info & OPNMASK) {
2007 #if ENABLE_FEATURE_AWK_MATH
2008 setvar_i(res, atan2(getvar_i(av[i]), getvar_i(av[1])));
2010 syntax_error(EMSG_NO_MATH);
2016 spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2017 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2022 n = awk_split(as[0], spl, &s);
2024 clear_array(iamarray(av[1]));
2025 for (i=1; i<=n; i++)
2026 setari_u(av[1], i, nextword(&s1));
2033 i = getvar_i(av[1]) - 1;
2036 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2038 s = xstrndup(as[0]+i, n);
2042 /* Bitwise ops must assume that operands are unsigned. GNU Awk 3.1.5:
2043 * awk '{ print or(-1,1) }' gives "4.29497e+09", not "-2.xxxe+09" */
2045 setvar_i(res, (unsigned long)getvar_i(av[0]) & (unsigned long)getvar_i(av[1]));
2049 setvar_i(res, ~(unsigned long)getvar_i(av[0]));
2053 setvar_i(res, (unsigned long)getvar_i(av[0]) << (unsigned long)getvar_i(av[1]));
2057 setvar_i(res, (unsigned long)getvar_i(av[0]) | (unsigned long)getvar_i(av[1]));
2061 setvar_i(res, (unsigned long)getvar_i(av[0]) >> (unsigned long)getvar_i(av[1]));
2065 setvar_i(res, (unsigned long)getvar_i(av[0]) ^ (unsigned long)getvar_i(av[1]));
2075 s1 = s = xstrdup(as[0]);
2077 *s1 = (*to_xxx)(*s1);
2086 l = strlen(as[0]) - ll;
2087 if (ll > 0 && l >= 0) {
2089 s = strstr(as[0], as[1]);
2090 if (s) n = (s - as[0]) + 1;
2092 /* this piece of code is terribly slow and
2093 * really should be rewritten
2095 for (i=0; i<=l; i++) {
2096 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2108 tt = getvar_i(av[1]);
2111 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2112 i = strftime(g_buf, MAXVARFMT,
2113 ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2116 setvar_s(res, g_buf);
2120 re = as_regex(an[1], &sreg);
2121 n = regexec(re, as[0], 1, pmatch, 0);
2126 pmatch[0].rm_so = 0;
2127 pmatch[0].rm_eo = -1;
2129 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2130 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2131 setvar_i(res, pmatch[0].rm_so);
2132 if (re == &sreg) regfree(re);
2136 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2140 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2144 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2154 * Evaluate node - the heart of the program. Supplied with subtree
2155 * and place where to store result. returns ptr to result.
2157 #define XC(n) ((n) >> 8)
2159 static var *evaluate(node *op, var *res)
2161 /* This procedure is recursive so we should count every byte */
2162 #define fnargs (G.evaluate__fnargs)
2163 /* seed is initialized to 1 */
2164 #define seed (G.evaluate__seed)
2165 #define sreg (G.evaluate__sreg)
2187 return setvar_s(res, NULL);
2193 opn = (opinfo & OPNMASK);
2194 g_lineno = op->lineno;
2196 /* execute inevitable things */
2198 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2199 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2200 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2201 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2202 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2204 switch (XC(opinfo & OPCLSMASK)) {
2206 /* -- iterative node type -- */
2210 if ((op1->info & OPCLSMASK) == OC_COMMA) {
2211 /* it's range pattern */
2212 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2213 op->info |= OF_CHECKED;
2214 if (ptest(op1->r.n))
2215 op->info &= ~OF_CHECKED;
2222 op = (ptest(op1)) ? op->a.n : op->r.n;
2226 /* just evaluate an expression, also used as unconditional jump */
2230 /* branch, used in if-else and various loops */
2232 op = istrue(L.v) ? op->a.n : op->r.n;
2235 /* initialize for-in loop */
2236 case XC( OC_WALKINIT ):
2237 hashwalk_init(L.v, iamarray(R.v));
2240 /* get next array item */
2241 case XC( OC_WALKNEXT ):
2242 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2245 case XC( OC_PRINT ):
2246 case XC( OC_PRINTF ):
2249 X.rsm = newfile(R.s);
2252 X.rsm->F = popen(R.s, "w");
2253 if (X.rsm->F == NULL)
2254 bb_perror_msg_and_die("popen");
2257 X.rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2263 if ((opinfo & OPCLSMASK) == OC_PRINT) {
2265 fputs(getvar_s(intvar[F0]), X.F);
2268 L.v = evaluate(nextarg(&op1), v1);
2269 if (L.v->type & VF_NUMBER) {
2270 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2271 getvar_i(L.v), TRUE);
2274 fputs(getvar_s(L.v), X.F);
2277 if (op1) fputs(getvar_s(intvar[OFS]), X.F);
2280 fputs(getvar_s(intvar[ORS]), X.F);
2282 } else { /* OC_PRINTF */
2283 L.s = awk_printf(op1);
2290 case XC( OC_DELETE ):
2291 X.info = op1->info & OPCLSMASK;
2292 if (X.info == OC_VAR) {
2294 } else if (X.info == OC_FNARG) {
2295 R.v = &fnargs[op1->l.i];
2297 syntax_error(EMSG_NOT_ARRAY);
2302 L.s = getvar_s(evaluate(op1->r.n, v1));
2303 hash_remove(iamarray(R.v), L.s);
2305 clear_array(iamarray(R.v));
2309 case XC( OC_NEWSOURCE ):
2310 g_progname = op->l.s;
2313 case XC( OC_RETURN ):
2317 case XC( OC_NEXTFILE ):
2328 /* -- recursive node type -- */
2332 if (L.v == intvar[NF])
2336 case XC( OC_FNARG ):
2337 L.v = &fnargs[op->l.i];
2339 res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2343 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2346 case XC( OC_REGEXP ):
2348 L.s = getvar_s(intvar[F0]);
2351 case XC( OC_MATCH ):
2354 X.re = as_regex(op1, &sreg);
2355 R.i = regexec(X.re, L.s, 0, NULL, 0);
2356 if (X.re == &sreg) regfree(X.re);
2357 setvar_i(res, (R.i == 0 ? 1 : 0) ^ (opn == '!' ? 1 : 0));
2361 /* if source is a temporary string, jusk relink it to dest */
2362 if (R.v == v1+1 && R.v->string) {
2363 res = setvar_p(L.v, R.v->string);
2366 res = copyvar(L.v, R.v);
2370 case XC( OC_TERNARY ):
2371 if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2372 syntax_error(EMSG_POSSIBLE_ERROR);
2373 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2377 if (!op->r.f->body.first)
2378 syntax_error(EMSG_UNDEF_FUNC);
2380 X.v = R.v = nvalloc(op->r.f->nargs+1);
2382 L.v = evaluate(nextarg(&op1), v1);
2384 R.v->type |= VF_CHILD;
2385 R.v->x.parent = L.v;
2386 if (++R.v - X.v >= op->r.f->nargs)
2394 res = evaluate(op->r.f->body.first, res);
2401 case XC( OC_GETLINE ):
2402 case XC( OC_PGETLINE ):
2404 X.rsm = newfile(L.s);
2406 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2407 X.rsm->F = popen(L.s, "r");
2408 X.rsm->is_pipe = TRUE;
2410 X.rsm->F = fopen_for_read(L.s); /* not xfopen! */
2414 if (!iF) iF = next_input_file();
2419 setvar_i(intvar[ERRNO], errno);
2427 L.i = awk_getline(X.rsm, R.v);
2430 incvar(intvar[FNR]);
2437 /* simple builtins */
2438 case XC( OC_FBLTIN ):
2446 R.d = (double)rand() / (double)RAND_MAX;
2448 #if ENABLE_FEATURE_AWK_MATH
2474 syntax_error(EMSG_NO_MATH);
2479 seed = op1 ? (unsigned)L.d : (unsigned)time(NULL);
2489 L.s = getvar_s(intvar[F0]);
2495 R.d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2496 ? (system(L.s) >> 8) : 0;
2504 X.rsm = newfile(L.s);
2513 X.rsm = (rstream *)hash_search(fdhash, L.s);
2515 R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
2516 free(X.rsm->buffer);
2517 hash_remove(fdhash, L.s);
2520 setvar_i(intvar[ERRNO], errno);
2527 case XC( OC_BUILTIN ):
2528 res = exec_builtin(op, res);
2531 case XC( OC_SPRINTF ):
2532 setvar_p(res, awk_printf(op1));
2535 case XC( OC_UNARY ):
2537 L.d = R.d = getvar_i(R.v);
2552 L.d = istrue(X.v) ? 0 : 1;
2563 case XC( OC_FIELD ):
2564 R.i = (int)getvar_i(R.v);
2571 res = &Fields[R.i - 1];
2575 /* concatenation (" ") and index joining (",") */
2576 case XC( OC_CONCAT ):
2577 case XC( OC_COMMA ):
2578 opn = strlen(L.s) + strlen(R.s) + 2;
2581 if ((opinfo & OPCLSMASK) == OC_COMMA) {
2582 L.s = getvar_s(intvar[SUBSEP]);
2583 X.s = xrealloc(X.s, opn + strlen(L.s));
2591 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2595 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2598 case XC( OC_BINARY ):
2599 case XC( OC_REPLACE ):
2600 R.d = getvar_i(R.v);
2612 if (R.d == 0) syntax_error(EMSG_DIV_BY_ZERO);
2616 #if ENABLE_FEATURE_AWK_MATH
2617 L.d = pow(L.d, R.d);
2619 syntax_error(EMSG_NO_MATH);
2623 if (R.d == 0) syntax_error(EMSG_DIV_BY_ZERO);
2624 L.d -= (int)(L.d / R.d) * R.d;
2627 res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2630 case XC( OC_COMPARE ):
2631 if (is_numeric(L.v) && is_numeric(R.v)) {
2632 L.d = getvar_i(L.v) - getvar_i(R.v);
2634 L.s = getvar_s(L.v);
2635 R.s = getvar_s(R.v);
2636 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2638 switch (opn & 0xfe) {
2649 setvar_i(res, (opn & 0x1 ? R.i : !R.i) ? 1 : 0);
2653 syntax_error(EMSG_POSSIBLE_ERROR);
2655 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2657 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2670 /* -------- main & co. -------- */
2672 static int awk_exit(int r)
2683 evaluate(endseq.first, &tv);
2686 /* waiting for children */
2687 for (i = 0; i < fdhash->csize; i++) {
2688 hi = fdhash->items[i];
2690 if (hi->data.rs.F && hi->data.rs.is_pipe)
2691 pclose(hi->data.rs.F);
2699 /* if expr looks like "var=value", perform assignment and return 1,
2700 * otherwise return 0 */
2701 static int is_assignment(const char *expr)
2703 char *exprc, *s, *s0, *s1;
2705 exprc = xstrdup(expr);
2706 if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2714 *(s1++) = nextchar(&s);
2717 setvar_u(newvar(exprc), s0);
2722 /* switch to next input file */
2723 static rstream *next_input_file(void)
2725 #define rsm (G.next_input_file__rsm)
2726 #define files_happen (G.next_input_file__files_happen)
2729 const char *fname, *ind;
2731 if (rsm.F) fclose(rsm.F);
2733 rsm.pos = rsm.adv = 0;
2736 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
2742 ind = getvar_s(incvar(intvar[ARGIND]));
2743 fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
2744 if (fname && *fname && !is_assignment(fname))
2745 F = xfopen_stdin(fname);
2749 files_happen = TRUE;
2750 setvar_s(intvar[FILENAME], fname);
2757 int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
2758 int awk_main(int argc, char **argv)
2761 char *opt_F, *opt_W;
2762 llist_t *list_v = NULL;
2763 llist_t *list_f = NULL;
2768 char *vnames = (char *)vNames; /* cheat */
2769 char *vvalues = (char *)vValues;
2773 /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
2774 * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
2775 if (ENABLE_LOCALE_SUPPORT)
2776 setlocale(LC_NUMERIC, "C");
2780 /* allocate global buffer */
2781 g_buf = xmalloc(MAXVARFMT + 1);
2783 vhash = hash_init();
2784 ahash = hash_init();
2785 fdhash = hash_init();
2786 fnhash = hash_init();
2788 /* initialize variables */
2789 for (i = 0; *vnames; i++) {
2790 intvar[i] = v = newvar(nextword(&vnames));
2791 if (*vvalues != '\377')
2792 setvar_s(v, nextword(&vvalues));
2796 if (*vnames == '*') {
2797 v->type |= VF_SPECIAL;
2802 handle_special(intvar[FS]);
2803 handle_special(intvar[RS]);
2805 newfile("/dev/stdin")->F = stdin;
2806 newfile("/dev/stdout")->F = stdout;
2807 newfile("/dev/stderr")->F = stderr;
2809 /* Huh, people report that sometimes environ is NULL. Oh well. */
2810 if (environ) for (envp = environ; *envp; envp++) {
2811 /* environ is writable, thus we don't strdup it needlessly */
2813 char *s1 = strchr(s, '=');
2816 /* Both findvar and setvar_u take const char*
2817 * as 2nd arg -> environment is not trashed */
2818 setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
2822 opt_complementary = "v::f::"; /* -v and -f can occur multiple times */
2823 opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, &opt_W);
2827 setvar_s(intvar[FS], opt_F); // -F
2828 while (list_v) { /* -v */
2829 if (!is_assignment(llist_pop(&list_v)))
2832 if (list_f) { /* -f */
2837 g_progname = llist_pop(&list_f);
2838 from_file = xfopen_stdin(g_progname);
2839 /* one byte is reserved for some trick in next_token */
2840 for (i = j = 1; j > 0; i += j) {
2841 s = xrealloc(s, i + 4096);
2842 j = fread(s + i, 1, 4094, from_file);
2846 parse_program(s + 1);
2849 } else { // no -f: take program from 1st parameter
2852 g_progname = "cmd. line";
2853 parse_program(*argv++);
2856 if (opt & 0x8) // -W
2857 bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
2859 /* fill in ARGV array */
2860 setvar_i(intvar[ARGC], argc + 1);
2861 setari_u(intvar[ARGV], 0, "awk");
2864 setari_u(intvar[ARGV], ++i, *argv++);
2866 evaluate(beginseq.first, &tv);
2867 if (!mainseq.first && !endseq.first)
2868 awk_exit(EXIT_SUCCESS);
2870 /* input file could already be opened in BEGIN block */
2871 if (!iF) iF = next_input_file();
2873 /* passing through input files */
2876 setvar_i(intvar[FNR], 0);
2878 while ((i = awk_getline(iF, intvar[F0])) > 0) {
2881 incvar(intvar[FNR]);
2882 evaluate(mainseq.first, &tv);
2889 syntax_error(strerror(errno));
2891 iF = next_input_file();
2894 awk_exit(EXIT_SUCCESS);