1 /* vi: set sw=4 ts=4: */
3 * awk implementation for busybox
5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
7 * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
14 /* This is a NOEXEC applet. Be very careful! */
21 #define VF_NUMBER 0x0001 /* 1 = primary type is number */
22 #define VF_ARRAY 0x0002 /* 1 = it's an array */
24 #define VF_CACHED 0x0100 /* 1 = num/str value has cached str/num eq */
25 #define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
26 #define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
27 #define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
28 #define VF_FSTR 0x1000 /* 1 = var::string points to fstring buffer */
29 #define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
30 #define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
32 /* these flags are static, don't change them when value is changed */
33 #define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
36 typedef struct var_s {
37 unsigned type; /* flags */
41 int aidx; /* func arg idx (for compilation stage) */
42 struct xhash_s *array; /* array ptr */
43 struct var_s *parent; /* for func args, ptr to actual parameter */
44 char **walker; /* list of array elements (for..in) */
48 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
49 typedef struct chain_s {
52 const char *programname;
56 typedef struct func_s {
62 typedef struct rstream_s {
71 typedef struct hash_item_s {
73 struct var_s v; /* variable/array hash */
74 struct rstream_s rs; /* redirect streams hash */
75 struct func_s f; /* functions hash */
77 struct hash_item_s *next; /* next in chain */
78 char name[1]; /* really it's longer */
81 typedef struct xhash_s {
82 unsigned nel; /* num of elements */
83 unsigned csize; /* current hash size */
84 unsigned nprime; /* next hash size in PRIMES[] */
85 unsigned glen; /* summary length of item names */
86 struct hash_item_s **items;
90 typedef struct node_s {
111 /* Block of temporary variables */
112 typedef struct nvblock_s {
115 struct nvblock_s *prev;
116 struct nvblock_s *next;
120 typedef struct tsplitter_s {
125 /* simple token classes */
126 /* Order and hex values are very important!!! See next_token() */
127 #define TC_SEQSTART 1 /* ( */
128 #define TC_SEQTERM (1 << 1) /* ) */
129 #define TC_REGEXP (1 << 2) /* /.../ */
130 #define TC_OUTRDR (1 << 3) /* | > >> */
131 #define TC_UOPPOST (1 << 4) /* unary postfix operator */
132 #define TC_UOPPRE1 (1 << 5) /* unary prefix operator */
133 #define TC_BINOPX (1 << 6) /* two-opnd operator */
134 #define TC_IN (1 << 7)
135 #define TC_COMMA (1 << 8)
136 #define TC_PIPE (1 << 9) /* input redirection pipe */
137 #define TC_UOPPRE2 (1 << 10) /* unary prefix operator */
138 #define TC_ARRTERM (1 << 11) /* ] */
139 #define TC_GRPSTART (1 << 12) /* { */
140 #define TC_GRPTERM (1 << 13) /* } */
141 #define TC_SEMICOL (1 << 14)
142 #define TC_NEWLINE (1 << 15)
143 #define TC_STATX (1 << 16) /* ctl statement (for, next...) */
144 #define TC_WHILE (1 << 17)
145 #define TC_ELSE (1 << 18)
146 #define TC_BUILTIN (1 << 19)
147 #define TC_GETLINE (1 << 20)
148 #define TC_FUNCDECL (1 << 21) /* `function' `func' */
149 #define TC_BEGIN (1 << 22)
150 #define TC_END (1 << 23)
151 #define TC_EOF (1 << 24)
152 #define TC_VARIABLE (1 << 25)
153 #define TC_ARRAY (1 << 26)
154 #define TC_FUNCTION (1 << 27)
155 #define TC_STRING (1 << 28)
156 #define TC_NUMBER (1 << 29)
158 #define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
160 /* combined token classes */
161 #define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
162 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
163 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
164 | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
166 #define TC_STATEMNT (TC_STATX | TC_WHILE)
167 #define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
169 /* word tokens, cannot mean something else if not expected */
170 #define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
171 | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
173 /* discard newlines after these */
174 #define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
175 | TC_BINOP | TC_OPTERM)
177 /* what can expression begin with */
178 #define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
179 /* what can group begin with */
180 #define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
182 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
183 /* operator is inserted between them */
184 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
185 | TC_STRING | TC_NUMBER | TC_UOPPOST)
186 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
188 #define OF_RES1 0x010000
189 #define OF_RES2 0x020000
190 #define OF_STR1 0x040000
191 #define OF_STR2 0x080000
192 #define OF_NUM1 0x100000
193 #define OF_CHECKED 0x200000
195 /* combined operator flags */
198 #define xS (OF_RES2 | OF_STR2)
200 #define VV (OF_RES1 | OF_RES2)
201 #define Nx (OF_RES1 | OF_NUM1)
202 #define NV (OF_RES1 | OF_NUM1 | OF_RES2)
203 #define Sx (OF_RES1 | OF_STR1)
204 #define SV (OF_RES1 | OF_STR1 | OF_RES2)
205 #define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
207 #define OPCLSMASK 0xFF00
208 #define OPNMASK 0x007F
210 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
211 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
212 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
214 #define P(x) (x << 24)
215 #define PRIMASK 0x7F000000
216 #define PRIMASK2 0x7E000000
218 /* Operation classes */
220 #define SHIFT_TIL_THIS 0x0600
221 #define RECUR_FROM_THIS 0x1000
224 OC_DELETE = 0x0100, OC_EXEC = 0x0200, OC_NEWSOURCE = 0x0300,
225 OC_PRINT = 0x0400, OC_PRINTF = 0x0500, OC_WALKINIT = 0x0600,
227 OC_BR = 0x0700, OC_BREAK = 0x0800, OC_CONTINUE = 0x0900,
228 OC_EXIT = 0x0a00, OC_NEXT = 0x0b00, OC_NEXTFILE = 0x0c00,
229 OC_TEST = 0x0d00, OC_WALKNEXT = 0x0e00,
231 OC_BINARY = 0x1000, OC_BUILTIN = 0x1100, OC_COLON = 0x1200,
232 OC_COMMA = 0x1300, OC_COMPARE = 0x1400, OC_CONCAT = 0x1500,
233 OC_FBLTIN = 0x1600, OC_FIELD = 0x1700, OC_FNARG = 0x1800,
234 OC_FUNC = 0x1900, OC_GETLINE = 0x1a00, OC_IN = 0x1b00,
235 OC_LAND = 0x1c00, OC_LOR = 0x1d00, OC_MATCH = 0x1e00,
236 OC_MOVE = 0x1f00, OC_PGETLINE = 0x2000, OC_REGEXP = 0x2100,
237 OC_REPLACE = 0x2200, OC_RETURN = 0x2300, OC_SPRINTF = 0x2400,
238 OC_TERNARY = 0x2500, OC_UNARY = 0x2600, OC_VAR = 0x2700,
241 ST_IF = 0x3000, ST_DO = 0x3100, ST_FOR = 0x3200,
245 /* simple builtins */
247 F_in, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
248 F_ti, F_le, F_sy, F_ff, F_cl
253 B_a2, B_ix, B_ma, B_sp, B_ss, B_ti, B_lo, B_up,
255 B_an, B_co, B_ls, B_or, B_rs, B_xo,
258 /* tokens and their corresponding info values */
260 #define NTC "\377" /* switch to next token class (tc<<1) */
263 #define OC_B OC_BUILTIN
265 static const char tokenlist[] ALIGN1 =
268 "\1/" NTC /* REGEXP */
269 "\2>>" "\1>" "\1|" NTC /* OUTRDR */
270 "\2++" "\2--" NTC /* UOPPOST */
271 "\2++" "\2--" "\1$" NTC /* UOPPRE1 */
272 "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */
273 "\2*=" "\2/=" "\2%=" "\2^="
274 "\1+" "\1-" "\3**=" "\2**"
275 "\1/" "\1%" "\1^" "\1*"
276 "\2!=" "\2>=" "\2<=" "\1>"
277 "\1<" "\2!~" "\1~" "\2&&"
278 "\2||" "\1?" "\1:" NTC
282 "\1+" "\1-" "\1!" NTC /* UOPPRE2 */
288 "\2if" "\2do" "\3for" "\5break" /* STATX */
289 "\10continue" "\6delete" "\5print"
290 "\6printf" "\4next" "\10nextfile"
291 "\6return" "\4exit" NTC
295 "\3and" "\5compl" "\6lshift" "\2or"
297 "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */
298 "\3cos" "\3exp" "\3int" "\3log"
299 "\4rand" "\3sin" "\4sqrt" "\5srand"
300 "\6gensub" "\4gsub" "\5index" "\6length"
301 "\5match" "\5split" "\7sprintf" "\3sub"
302 "\6substr" "\7systime" "\10strftime"
303 "\7tolower" "\7toupper" NTC
305 "\4func" "\10function" NTC
310 static const uint32_t tokeninfo[] = {
314 xS|'a', xS|'w', xS|'|',
315 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
316 OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M',
318 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74),
319 OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
320 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/',
321 OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
322 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-',
323 OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
324 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%',
325 OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
326 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3,
327 OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
328 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!',
329 OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
330 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?',
331 OC_COLON|xx|P(67)|':',
334 OC_PGETLINE|SV|P(37),
335 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-',
336 OC_UNARY|xV|P(19)|'!',
342 ST_IF, ST_DO, ST_FOR, OC_BREAK,
343 OC_CONTINUE, OC_DELETE|Vx, OC_PRINT,
344 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
345 OC_RETURN|Vx, OC_EXIT|Nx,
349 OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
350 OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
351 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
352 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
353 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
354 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
355 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
356 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b),
357 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
364 /* internal variable names and their initial values */
365 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
367 CONVFMT, OFMT, FS, OFS,
368 ORS, RS, RT, FILENAME,
369 SUBSEP, ARGIND, ARGC, ARGV,
372 ENVIRON, F0, NUM_INTERNAL_VARS
375 static const char vNames[] ALIGN1 =
376 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
377 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
378 "SUBSEP\0" "ARGIND\0" "ARGC\0" "ARGV\0"
380 "NR\0" "NF\0*" "IGNORECASE\0*"
381 "ENVIRON\0" "$\0*" "\0";
383 static const char vValues[] ALIGN1 =
384 "%.6g\0" "%.6g\0" " \0" " \0"
385 "\n\0" "\n\0" "\0" "\0"
389 /* hash size may grow to these values */
390 #define FIRST_PRIME 61
391 static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
394 /* Globals. Split in two parts so that first one is addressed
395 * with (mostly short) negative offsets */
397 chain beginseq, mainseq, endseq;
399 node *break_ptr, *continue_ptr;
401 xhash *vhash, *ahash, *fdhash, *fnhash;
402 const char *g_progname;
405 int maxfields; /* used in fsrealloc() only */
414 smallint is_f0_split;
417 uint32_t t_info; /* often used */
423 var *intvar[NUM_INTERNAL_VARS]; /* often used */
425 /* former statics from various functions */
426 char *split_f0__fstrings;
428 uint32_t next_token__save_tclass;
429 uint32_t next_token__save_info;
430 uint32_t next_token__ltclass;
431 smallint next_token__concat_inserted;
433 smallint next_input_file__files_happen;
434 rstream next_input_file__rsm;
436 var *evaluate__fnargs;
437 unsigned evaluate__seed;
438 regex_t evaluate__sreg;
442 tsplitter exec_builtin__tspl;
444 /* biggest and least used members go last */
446 tsplitter fsplitter, rsplitter;
448 #define G1 (ptr_to_globals[-1])
449 #define G (*(struct globals2 *)ptr_to_globals)
450 /* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
451 /* char G1size[sizeof(G1)]; - 0x6c */
452 /* char Gsize[sizeof(G)]; - 0x1cc */
453 /* Trying to keep most of members accessible with short offsets: */
454 /* char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
455 #define beginseq (G1.beginseq )
456 #define mainseq (G1.mainseq )
457 #define endseq (G1.endseq )
458 #define seq (G1.seq )
459 #define break_ptr (G1.break_ptr )
460 #define continue_ptr (G1.continue_ptr)
462 #define vhash (G1.vhash )
463 #define ahash (G1.ahash )
464 #define fdhash (G1.fdhash )
465 #define fnhash (G1.fnhash )
466 #define g_progname (G1.g_progname )
467 #define g_lineno (G1.g_lineno )
468 #define nfields (G1.nfields )
469 #define maxfields (G1.maxfields )
470 #define Fields (G1.Fields )
471 #define g_cb (G1.g_cb )
472 #define g_pos (G1.g_pos )
473 #define g_buf (G1.g_buf )
474 #define icase (G1.icase )
475 #define exiting (G1.exiting )
476 #define nextrec (G1.nextrec )
477 #define nextfile (G1.nextfile )
478 #define is_f0_split (G1.is_f0_split )
479 #define t_info (G.t_info )
480 #define t_tclass (G.t_tclass )
481 #define t_string (G.t_string )
482 #define t_double (G.t_double )
483 #define t_lineno (G.t_lineno )
484 #define t_rollback (G.t_rollback )
485 #define intvar (G.intvar )
486 #define fsplitter (G.fsplitter )
487 #define rsplitter (G.rsplitter )
488 #define INIT_G() do { \
489 SET_PTR_TO_GLOBALS(xzalloc(sizeof(G1) + sizeof(G)) + sizeof(G1)); \
490 G.next_token__ltclass = TC_OPTERM; \
491 G.evaluate__seed = 1; \
495 /* function prototypes */
496 static void handle_special(var *);
497 static node *parse_expr(uint32_t);
498 static void chain_group(void);
499 static var *evaluate(node *, var *);
500 static rstream *next_input_file(void);
501 static int fmt_num(char *, int, const char *, double, int);
502 static int awk_exit(int) NORETURN;
504 /* ---- error handling ---- */
506 static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
507 static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
508 static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
509 static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
510 static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
511 static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin";
512 static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
513 static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
514 static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
515 #if !ENABLE_FEATURE_AWK_MATH
516 static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
519 static void zero_out_var(var * vp)
521 memset(vp, 0, sizeof(*vp));
524 static void syntax_error(const char *const message) NORETURN;
525 static void syntax_error(const char *const message)
527 bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
530 /* ---- hash stuff ---- */
532 static unsigned hashidx(const char *name)
536 while (*name) idx = *name++ + (idx << 6) - idx;
540 /* create new hash */
541 static xhash *hash_init(void)
545 newhash = xzalloc(sizeof(xhash));
546 newhash->csize = FIRST_PRIME;
547 newhash->items = xzalloc(newhash->csize * sizeof(hash_item *));
552 /* find item in hash, return ptr to data, NULL if not found */
553 static void *hash_search(xhash *hash, const char *name)
557 hi = hash->items [ hashidx(name) % hash->csize ];
559 if (strcmp(hi->name, name) == 0)
566 /* grow hash if it becomes too big */
567 static void hash_rebuild(xhash *hash)
569 unsigned newsize, i, idx;
570 hash_item **newitems, *hi, *thi;
572 if (hash->nprime == ARRAY_SIZE(PRIMES))
575 newsize = PRIMES[hash->nprime++];
576 newitems = xzalloc(newsize * sizeof(hash_item *));
578 for (i = 0; i < hash->csize; i++) {
583 idx = hashidx(thi->name) % newsize;
584 thi->next = newitems[idx];
590 hash->csize = newsize;
591 hash->items = newitems;
594 /* find item in hash, add it if necessary. Return ptr to data */
595 static void *hash_find(xhash *hash, const char *name)
601 hi = hash_search(hash, name);
603 if (++hash->nel / hash->csize > 10)
606 l = strlen(name) + 1;
607 hi = xzalloc(sizeof(hash_item) + l);
608 memcpy(hi->name, name, l);
610 idx = hashidx(name) % hash->csize;
611 hi->next = hash->items[idx];
612 hash->items[idx] = hi;
618 #define findvar(hash, name) ((var*) hash_find((hash), (name)))
619 #define newvar(name) ((var*) hash_find(vhash, (name)))
620 #define newfile(name) ((rstream*)hash_find(fdhash, (name)))
621 #define newfunc(name) ((func*) hash_find(fnhash, (name)))
623 static void hash_remove(xhash *hash, const char *name)
625 hash_item *hi, **phi;
627 phi = &(hash->items[hashidx(name) % hash->csize]);
630 if (strcmp(hi->name, name) == 0) {
631 hash->glen -= (strlen(name) + 1);
641 /* ------ some useful functions ------ */
643 static void skip_spaces(char **s)
648 if (*p == '\\' && p[1] == '\n') {
651 } else if (*p != ' ' && *p != '\t') {
659 static char *nextword(char **s)
663 while (*(*s)++) /* */;
668 static char nextchar(char **s)
674 if (c == '\\') c = bb_process_escape_sequence((const char**)s);
675 if (c == '\\' && *s == pps) c = *((*s)++);
679 static ALWAYS_INLINE int isalnum_(int c)
681 return (isalnum(c) || c == '_');
684 /* -------- working with variables (set/get/copy/etc) -------- */
686 static xhash *iamarray(var *v)
690 while (a->type & VF_CHILD)
693 if (!(a->type & VF_ARRAY)) {
695 a->x.array = hash_init();
700 static void clear_array(xhash *array)
705 for (i = 0; i < array->csize; i++) {
706 hi = array->items[i];
710 free(thi->data.v.string);
713 array->items[i] = NULL;
715 array->glen = array->nel = 0;
718 /* clear a variable */
719 static var *clrvar(var *v)
721 if (!(v->type & VF_FSTR))
724 v->type &= VF_DONTTOUCH;
730 /* assign string value to variable */
731 static var *setvar_p(var *v, char *value)
739 /* same as setvar_p but make a copy of string */
740 static var *setvar_s(var *v, const char *value)
742 return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
745 /* same as setvar_s but set USER flag */
746 static var *setvar_u(var *v, const char *value)
753 /* set array element to user string */
754 static void setari_u(var *a, int idx, const char *s)
756 char sidx[sizeof(int)*3 + 1];
759 sprintf(sidx, "%d", idx);
760 v = findvar(iamarray(a), sidx);
764 /* assign numeric value to variable */
765 static var *setvar_i(var *v, double value)
768 v->type |= VF_NUMBER;
774 static const char *getvar_s(var *v)
776 /* if v is numeric and has no cached string, convert it to string */
777 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
778 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
779 v->string = xstrdup(g_buf);
780 v->type |= VF_CACHED;
782 return (v->string == NULL) ? "" : v->string;
785 static double getvar_i(var *v)
789 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
793 v->number = strtod(s, &s);
794 if (v->type & VF_USER) {
802 v->type |= VF_CACHED;
807 static var *copyvar(var *dest, const var *src)
811 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
812 dest->number = src->number;
814 dest->string = xstrdup(src->string);
816 handle_special(dest);
820 static var *incvar(var *v)
822 return setvar_i(v, getvar_i(v) + 1.);
825 /* return true if v is number or numeric string */
826 static int is_numeric(var *v)
829 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
832 /* return 1 when value of v corresponds to true, 0 otherwise */
833 static int istrue(var *v)
836 return (v->number == 0) ? 0 : 1;
837 return (v->string && *(v->string)) ? 1 : 0;
840 /* temporary variables allocator. Last allocated should be first freed */
841 static var *nvalloc(int n)
849 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size) break;
854 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
855 g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
857 g_cb->pos = g_cb->nv;
859 /*g_cb->next = NULL; - xzalloc did it */
860 if (pb) pb->next = g_cb;
866 while (v < g_cb->pos) {
875 static void nvfree(var *v)
879 if (v < g_cb->nv || v >= g_cb->pos)
880 syntax_error(EMSG_INTERNAL_ERROR);
882 for (p = v; p < g_cb->pos; p++) {
883 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
884 clear_array(iamarray(p));
885 free(p->x.array->items);
888 if (p->type & VF_WALK)
895 while (g_cb->prev && g_cb->pos == g_cb->nv) {
900 /* ------- awk program text parsing ------- */
902 /* Parse next token pointed by global pos, place results into global ttt.
903 * If token isn't expected, give away. Return token class
905 static uint32_t next_token(uint32_t expected)
907 #define concat_inserted (G.next_token__concat_inserted)
908 #define save_tclass (G.next_token__save_tclass)
909 #define save_info (G.next_token__save_info)
910 /* Initialized to TC_OPTERM: */
911 #define ltclass (G.next_token__ltclass)
922 } else if (concat_inserted) {
923 concat_inserted = FALSE;
924 t_tclass = save_tclass;
933 while (*p != '\n' && *p != '\0')
942 } else if (*p == '\"') {
946 if (*p == '\0' || *p == '\n')
947 syntax_error(EMSG_UNEXP_EOS);
948 *(s++) = nextchar(&p);
954 } else if ((expected & TC_REGEXP) && *p == '/') {
958 if (*p == '\0' || *p == '\n')
959 syntax_error(EMSG_UNEXP_EOS);
963 *(s-1) = bb_process_escape_sequence((const char **)&p);
974 } else if (*p == '.' || isdigit(*p)) {
976 t_double = strtod(p, &p);
978 syntax_error(EMSG_UNEXP_TOKEN);
982 /* search for something known */
992 /* if token class is expected, token
993 * matches and it's not a longer word,
994 * then this is what we are looking for
996 if ((tc & (expected | TC_WORD | TC_NEWLINE))
997 && *tl == *p && strncmp(p, tl, l) == 0
998 && !((tc & TC_WORD) && isalnum_(p[l]))
1009 /* it's a name (var/array/function),
1010 * otherwise it's something wrong
1013 syntax_error(EMSG_UNEXP_TOKEN);
1016 while (isalnum_(*(++p))) {
1021 /* also consume whitespace between functionname and bracket */
1022 if (!(expected & TC_VARIABLE))
1036 /* skipping newlines in some cases */
1037 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1040 /* insert concatenation operator when needed */
1041 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1042 concat_inserted = TRUE;
1046 t_info = OC_CONCAT | SS | P(35);
1053 /* Are we ready for this? */
1054 if (!(ltclass & expected))
1055 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1056 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1059 #undef concat_inserted
1065 static void rollback_token(void)
1070 static node *new_node(uint32_t info)
1074 n = xzalloc(sizeof(node));
1076 n->lineno = g_lineno;
1080 static node *mk_re_node(const char *s, node *n, regex_t *re)
1082 n->info = OC_REGEXP;
1085 xregcomp(re, s, REG_EXTENDED);
1086 xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1091 static node *condition(void)
1093 next_token(TC_SEQSTART);
1094 return parse_expr(TC_SEQTERM);
1097 /* parse expression terminated by given argument, return ptr
1098 * to built subtree. Terminator is eaten by parse_expr */
1099 static node *parse_expr(uint32_t iexp)
1108 sn.r.n = glptr = NULL;
1109 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1111 while (!((tc = next_token(xtc)) & iexp)) {
1112 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1113 /* input redirection (<) attached to glptr node */
1114 cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1116 xtc = TC_OPERAND | TC_UOPPRE;
1119 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1120 /* for binary and postfix-unary operators, jump back over
1121 * previous operators with higher priority */
1123 while ( ((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1124 || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON)) )
1126 if ((t_info & OPCLSMASK) == OC_TERNARY)
1128 cn = vn->a.n->r.n = new_node(t_info);
1130 if (tc & TC_BINOP) {
1132 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1133 if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1135 next_token(TC_GETLINE);
1136 /* give maximum priority to this pipe */
1137 cn->info &= ~PRIMASK;
1138 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1142 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1147 /* for operands and prefix-unary operators, attach them
1150 cn = vn->r.n = new_node(t_info);
1152 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1153 if (tc & (TC_OPERAND | TC_REGEXP)) {
1154 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1155 /* one should be very careful with switch on tclass -
1156 * only simple tclasses should be used! */
1161 v = hash_search(ahash, t_string);
1163 cn->info = OC_FNARG;
1164 cn->l.i = v->x.aidx;
1166 cn->l.v = newvar(t_string);
1168 if (tc & TC_ARRAY) {
1170 cn->r.n = parse_expr(TC_ARRTERM);
1177 v = cn->l.v = xzalloc(sizeof(var));
1179 setvar_i(v, t_double);
1181 setvar_s(v, t_string);
1185 mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1190 cn->r.f = newfunc(t_string);
1191 cn->l.n = condition();
1195 cn = vn->r.n = parse_expr(TC_SEQTERM);
1201 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1205 cn->l.n = condition();
1214 /* add node to chain. Return ptr to alloc'd node */
1215 static node *chain_node(uint32_t info)
1220 seq->first = seq->last = new_node(0);
1222 if (seq->programname != g_progname) {
1223 seq->programname = g_progname;
1224 n = chain_node(OC_NEWSOURCE);
1225 n->l.s = xstrdup(g_progname);
1230 seq->last = n->a.n = new_node(OC_DONE);
1235 static void chain_expr(uint32_t info)
1239 n = chain_node(info);
1240 n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1241 if (t_tclass & TC_GRPTERM)
1245 static node *chain_loop(node *nn)
1247 node *n, *n2, *save_brk, *save_cont;
1249 save_brk = break_ptr;
1250 save_cont = continue_ptr;
1252 n = chain_node(OC_BR | Vx);
1253 continue_ptr = new_node(OC_EXEC);
1254 break_ptr = new_node(OC_EXEC);
1256 n2 = chain_node(OC_EXEC | Vx);
1259 continue_ptr->a.n = n2;
1260 break_ptr->a.n = n->r.n = seq->last;
1262 continue_ptr = save_cont;
1263 break_ptr = save_brk;
1268 /* parse group and attach it to chain */
1269 static void chain_group(void)
1275 c = next_token(TC_GRPSEQ);
1276 } while (c & TC_NEWLINE);
1278 if (c & TC_GRPSTART) {
1279 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1280 if (t_tclass & TC_NEWLINE) continue;
1284 } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1286 chain_expr(OC_EXEC | Vx);
1287 } else { /* TC_STATEMNT */
1288 switch (t_info & OPCLSMASK) {
1290 n = chain_node(OC_BR | Vx);
1291 n->l.n = condition();
1293 n2 = chain_node(OC_EXEC);
1295 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1297 n2->a.n = seq->last;
1305 n = chain_loop(NULL);
1310 n2 = chain_node(OC_EXEC);
1311 n = chain_loop(NULL);
1313 next_token(TC_WHILE);
1314 n->l.n = condition();
1318 next_token(TC_SEQSTART);
1319 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1320 if (t_tclass & TC_SEQTERM) { /* for-in */
1321 if ((n2->info & OPCLSMASK) != OC_IN)
1322 syntax_error(EMSG_UNEXP_TOKEN);
1323 n = chain_node(OC_WALKINIT | VV);
1326 n = chain_loop(NULL);
1327 n->info = OC_WALKNEXT | Vx;
1329 } else { /* for (;;) */
1330 n = chain_node(OC_EXEC | Vx);
1332 n2 = parse_expr(TC_SEMICOL);
1333 n3 = parse_expr(TC_SEQTERM);
1343 n = chain_node(t_info);
1344 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1345 if (t_tclass & TC_OUTRDR) {
1347 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1349 if (t_tclass & TC_GRPTERM)
1354 n = chain_node(OC_EXEC);
1359 n = chain_node(OC_EXEC);
1360 n->a.n = continue_ptr;
1363 /* delete, next, nextfile, return, exit */
1370 static void parse_program(char *p)
1379 while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1380 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1382 if (tclass & TC_OPTERM)
1386 if (tclass & TC_BEGIN) {
1390 } else if (tclass & TC_END) {
1394 } else if (tclass & TC_FUNCDECL) {
1395 next_token(TC_FUNCTION);
1397 f = newfunc(t_string);
1398 f->body.first = NULL;
1400 while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1401 v = findvar(ahash, t_string);
1402 v->x.aidx = (f->nargs)++;
1404 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1411 } else if (tclass & TC_OPSEQ) {
1413 cn = chain_node(OC_TEST);
1414 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1415 if (t_tclass & TC_GRPSTART) {
1419 chain_node(OC_PRINT);
1421 cn->r.n = mainseq.last;
1423 } else /* if (tclass & TC_GRPSTART) */ {
1431 /* -------- program execution part -------- */
1433 static node *mk_splitter(const char *s, tsplitter *spl)
1441 if ((n->info & OPCLSMASK) == OC_REGEXP) {
1443 regfree(ire); // TODO: nuke ire, use re+1?
1445 if (strlen(s) > 1) {
1446 mk_re_node(s, n, re);
1448 n->info = (uint32_t) *s;
1454 /* use node as a regular expression. Supplied with node ptr and regex_t
1455 * storage space. Return ptr to regex (if result points to preg, it should
1456 * be later regfree'd manually
1458 static regex_t *as_regex(node *op, regex_t *preg)
1463 if ((op->info & OPCLSMASK) == OC_REGEXP) {
1464 return icase ? op->r.ire : op->l.re;
1467 s = getvar_s(evaluate(op, v));
1468 xregcomp(preg, s, icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED);
1473 /* gradually increasing buffer */
1474 static void qrealloc(char **b, int n, int *size)
1476 if (!*b || n >= *size)
1477 *b = xrealloc(*b, *size = n + (n>>1) + 80);
1480 /* resize field storage space */
1481 static void fsrealloc(int size)
1485 if (size >= maxfields) {
1487 maxfields = size + 16;
1488 Fields = xrealloc(Fields, maxfields * sizeof(var));
1489 for (; i < maxfields; i++) {
1490 Fields[i].type = VF_SPECIAL;
1491 Fields[i].string = NULL;
1495 if (size < nfields) {
1496 for (i = size; i < nfields; i++) {
1503 static int awk_split(const char *s, node *spl, char **slist)
1508 regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1510 /* in worst case, each char would be a separate field */
1511 *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1514 c[0] = c[1] = (char)spl->info;
1516 if (*getvar_s(intvar[RS]) == '\0')
1519 if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */
1521 return n; /* "": zero fields */
1522 n++; /* at least one field will be there */
1524 l = strcspn(s, c+2); /* len till next NUL or \n */
1525 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1526 && pmatch[0].rm_so <= l
1528 l = pmatch[0].rm_so;
1529 if (pmatch[0].rm_eo == 0) {
1533 n++; /* we saw yet another delimiter */
1535 pmatch[0].rm_eo = l;
1536 if (s[l]) pmatch[0].rm_eo++;
1541 s += pmatch[0].rm_eo;
1545 if (c[0] == '\0') { /* null split */
1553 if (c[0] != ' ') { /* single-character split */
1555 c[0] = toupper(c[0]);
1556 c[1] = tolower(c[1]);
1559 while ((s1 = strpbrk(s1, c))) {
1567 s = skip_whitespace(s);
1570 while (*s && !isspace(*s))
1577 static void split_f0(void)
1579 /* static char *fstrings; */
1580 #define fstrings (G.split_f0__fstrings)
1591 n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1594 for (i = 0; i < n; i++) {
1595 Fields[i].string = nextword(&s);
1596 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1599 /* set NF manually to avoid side effects */
1601 intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1602 intvar[NF]->number = nfields;
1606 /* perform additional actions when some internal variables changed */
1607 static void handle_special(var *v)
1611 const char *sep, *s;
1612 int sl, l, len, i, bsize;
1614 if (!(v->type & VF_SPECIAL))
1617 if (v == intvar[NF]) {
1618 n = (int)getvar_i(v);
1621 /* recalculate $0 */
1622 sep = getvar_s(intvar[OFS]);
1626 for (i = 0; i < n; i++) {
1627 s = getvar_s(&Fields[i]);
1630 memcpy(b+len, sep, sl);
1633 qrealloc(&b, len+l+sl, &bsize);
1634 memcpy(b+len, s, l);
1639 setvar_p(intvar[F0], b);
1642 } else if (v == intvar[F0]) {
1643 is_f0_split = FALSE;
1645 } else if (v == intvar[FS]) {
1646 mk_splitter(getvar_s(v), &fsplitter);
1648 } else if (v == intvar[RS]) {
1649 mk_splitter(getvar_s(v), &rsplitter);
1651 } else if (v == intvar[IGNORECASE]) {
1655 n = getvar_i(intvar[NF]);
1656 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1657 /* right here v is invalid. Just to note... */
1661 /* step through func/builtin/etc arguments */
1662 static node *nextarg(node **pn)
1667 if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1676 static void hashwalk_init(var *v, xhash *array)
1682 if (v->type & VF_WALK)
1686 w = v->x.walker = xzalloc(2 + 2*sizeof(char *) + array->glen);
1687 w[0] = w[1] = (char *)(w + 2);
1688 for (i = 0; i < array->csize; i++) {
1689 hi = array->items[i];
1691 strcpy(*w, hi->name);
1698 static int hashwalk_next(var *v)
1706 setvar_s(v, nextword(w+1));
1710 /* evaluate node, return 1 when result is true, 0 otherwise */
1711 static int ptest(node *pattern)
1713 /* ptest__v is "static": to save stack space? */
1714 return istrue(evaluate(pattern, &G.ptest__v));
1717 /* read next record from stream rsm into a variable v */
1718 static int awk_getline(rstream *rsm, var *v)
1721 regmatch_t pmatch[2];
1722 int a, p, pp=0, size;
1723 int fd, so, eo, r, rp;
1726 /* we're using our own buffer since we need access to accumulating
1729 fd = fileno(rsm->F);
1734 c = (char) rsplitter.n.info;
1737 if (!m) qrealloc(&m, 256, &size);
1743 if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1744 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1745 b, 1, pmatch, 0) == 0) {
1746 so = pmatch[0].rm_so;
1747 eo = pmatch[0].rm_eo;
1751 } else if (c != '\0') {
1752 s = strchr(b+pp, c);
1753 if (!s) s = memchr(b+pp, '\0', p - pp);
1760 while (b[rp] == '\n')
1762 s = strstr(b+rp, "\n\n");
1765 while (b[eo] == '\n') eo++;
1773 memmove(m, (const void *)(m+a), p+1);
1778 qrealloc(&m, a+p+128, &size);
1781 p += safe_read(fd, b+p, size-p-1);
1785 setvar_i(intvar[ERRNO], errno);
1794 c = b[so]; b[so] = '\0';
1798 c = b[eo]; b[eo] = '\0';
1799 setvar_s(intvar[RT], b+so);
1811 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1815 const char *s = format;
1817 if (int_as_int && n == (int)n) {
1818 r = snprintf(b, size, "%d", (int)n);
1820 do { c = *s; } while (c && *++s);
1821 if (strchr("diouxX", c)) {
1822 r = snprintf(b, size, format, (int)n);
1823 } else if (strchr("eEfgG", c)) {
1824 r = snprintf(b, size, format, n);
1826 syntax_error(EMSG_INV_FMT);
1833 /* formatted output into an allocated buffer, return ptr to buffer */
1834 static char *awk_printf(node *n)
1839 int i, j, incr, bsize;
1844 fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1849 while (*f && (*f != '%' || *(++f) == '%'))
1851 while (*f && !isalpha(*f)) {
1853 syntax_error("%*x formats are not supported");
1857 incr = (f - s) + MAXVARFMT;
1858 qrealloc(&b, incr + i, &bsize);
1863 arg = evaluate(nextarg(&n), v);
1866 if (c == 'c' || !c) {
1867 i += sprintf(b+i, s, is_numeric(arg) ?
1868 (char)getvar_i(arg) : *getvar_s(arg));
1869 } else if (c == 's') {
1871 qrealloc(&b, incr+i+strlen(s1), &bsize);
1872 i += sprintf(b+i, s, s1);
1874 i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1878 /* if there was an error while sprintf, return value is negative */
1882 b = xrealloc(b, i + 1);
1889 /* common substitution routine
1890 * replace (nm) substring of (src) that match (n) with (repl), store
1891 * result into (dest), return number of substitutions. If nm=0, replace
1892 * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1893 * subexpression matching (\1-\9)
1895 static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int ex)
1900 int c, i, j, di, rl, so, eo, nbs, n, dssize;
1901 regmatch_t pmatch[10];
1904 re = as_regex(rn, &sreg);
1905 if (!src) src = intvar[F0];
1906 if (!dest) dest = intvar[F0];
1911 while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0 : REG_NOTBOL) == 0) {
1912 so = pmatch[0].rm_so;
1913 eo = pmatch[0].rm_eo;
1915 qrealloc(&ds, di + eo + rl, &dssize);
1916 memcpy(ds + di, sp, eo);
1922 for (s = repl; *s; s++) {
1928 if (c == '&' || (ex && c >= '0' && c <= '9')) {
1929 di -= ((nbs + 3) >> 1);
1938 n = pmatch[j].rm_eo - pmatch[j].rm_so;
1939 qrealloc(&ds, di + rl + n, &dssize);
1940 memcpy(ds + di, sp + pmatch[j].rm_so, n);
1952 if (!ds[di++]) break;
1956 qrealloc(&ds, di + strlen(sp), &dssize);
1957 strcpy(ds + di, sp);
1959 if (re == &sreg) regfree(re);
1963 static var *exec_builtin(node *op, var *res)
1965 #define tspl (G.exec_builtin__tspl)
1972 regmatch_t pmatch[2];
1982 isr = info = op->info;
1985 av[2] = av[3] = NULL;
1986 for (i = 0; i < 4 && op; i++) {
1987 an[i] = nextarg(&op);
1988 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
1989 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
1994 if ((uint32_t)nargs < (info >> 30))
1995 syntax_error(EMSG_TOO_FEW_ARGS);
1997 switch (info & OPNMASK) {
2000 #if ENABLE_FEATURE_AWK_MATH
2001 setvar_i(res, atan2(getvar_i(av[i]), getvar_i(av[1])));
2003 syntax_error(EMSG_NO_MATH);
2009 spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2010 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2015 n = awk_split(as[0], spl, &s);
2017 clear_array(iamarray(av[1]));
2018 for (i=1; i<=n; i++)
2019 setari_u(av[1], i, nextword(&s1));
2026 i = getvar_i(av[1]) - 1;
2029 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2031 s = xstrndup(as[0]+i, n);
2036 setvar_i(res, (long)getvar_i(av[0]) & (long)getvar_i(av[1]));
2040 setvar_i(res, ~(long)getvar_i(av[0]));
2044 setvar_i(res, (long)getvar_i(av[0]) << (long)getvar_i(av[1]));
2048 setvar_i(res, (long)getvar_i(av[0]) | (long)getvar_i(av[1]));
2052 setvar_i(res, (long)((unsigned long)getvar_i(av[0]) >> (unsigned long)getvar_i(av[1])));
2056 setvar_i(res, (long)getvar_i(av[0]) ^ (long)getvar_i(av[1]));
2066 s1 = s = xstrdup(as[0]);
2068 *s1 = (*to_xxx)(*s1);
2077 l = strlen(as[0]) - ll;
2078 if (ll > 0 && l >= 0) {
2080 s = strstr(as[0], as[1]);
2081 if (s) n = (s - as[0]) + 1;
2083 /* this piece of code is terribly slow and
2084 * really should be rewritten
2086 for (i=0; i<=l; i++) {
2087 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2099 tt = getvar_i(av[1]);
2102 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2103 i = strftime(g_buf, MAXVARFMT,
2104 ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2107 setvar_s(res, g_buf);
2111 re = as_regex(an[1], &sreg);
2112 n = regexec(re, as[0], 1, pmatch, 0);
2117 pmatch[0].rm_so = 0;
2118 pmatch[0].rm_eo = -1;
2120 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2121 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2122 setvar_i(res, pmatch[0].rm_so);
2123 if (re == &sreg) regfree(re);
2127 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2131 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2135 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2145 * Evaluate node - the heart of the program. Supplied with subtree
2146 * and place where to store result. returns ptr to result.
2148 #define XC(n) ((n) >> 8)
2150 static var *evaluate(node *op, var *res)
2152 /* This procedure is recursive so we should count every byte */
2153 #define fnargs (G.evaluate__fnargs)
2154 /* seed is initialized to 1 */
2155 #define seed (G.evaluate__seed)
2156 #define sreg (G.evaluate__sreg)
2178 return setvar_s(res, NULL);
2184 opn = (opinfo & OPNMASK);
2185 g_lineno = op->lineno;
2187 /* execute inevitable things */
2189 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2190 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2191 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2192 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2193 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2195 switch (XC(opinfo & OPCLSMASK)) {
2197 /* -- iterative node type -- */
2201 if ((op1->info & OPCLSMASK) == OC_COMMA) {
2202 /* it's range pattern */
2203 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2204 op->info |= OF_CHECKED;
2205 if (ptest(op1->r.n))
2206 op->info &= ~OF_CHECKED;
2213 op = (ptest(op1)) ? op->a.n : op->r.n;
2217 /* just evaluate an expression, also used as unconditional jump */
2221 /* branch, used in if-else and various loops */
2223 op = istrue(L.v) ? op->a.n : op->r.n;
2226 /* initialize for-in loop */
2227 case XC( OC_WALKINIT ):
2228 hashwalk_init(L.v, iamarray(R.v));
2231 /* get next array item */
2232 case XC( OC_WALKNEXT ):
2233 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2236 case XC( OC_PRINT ):
2237 case XC( OC_PRINTF ):
2240 X.rsm = newfile(R.s);
2243 X.rsm->F = popen(R.s, "w");
2244 if (X.rsm->F == NULL)
2245 bb_perror_msg_and_die("popen");
2248 X.rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2254 if ((opinfo & OPCLSMASK) == OC_PRINT) {
2256 fputs(getvar_s(intvar[F0]), X.F);
2259 L.v = evaluate(nextarg(&op1), v1);
2260 if (L.v->type & VF_NUMBER) {
2261 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2262 getvar_i(L.v), TRUE);
2265 fputs(getvar_s(L.v), X.F);
2268 if (op1) fputs(getvar_s(intvar[OFS]), X.F);
2271 fputs(getvar_s(intvar[ORS]), X.F);
2273 } else { /* OC_PRINTF */
2274 L.s = awk_printf(op1);
2281 case XC( OC_DELETE ):
2282 X.info = op1->info & OPCLSMASK;
2283 if (X.info == OC_VAR) {
2285 } else if (X.info == OC_FNARG) {
2286 R.v = &fnargs[op1->l.i];
2288 syntax_error(EMSG_NOT_ARRAY);
2293 L.s = getvar_s(evaluate(op1->r.n, v1));
2294 hash_remove(iamarray(R.v), L.s);
2296 clear_array(iamarray(R.v));
2300 case XC( OC_NEWSOURCE ):
2301 g_progname = op->l.s;
2304 case XC( OC_RETURN ):
2308 case XC( OC_NEXTFILE ):
2319 /* -- recursive node type -- */
2323 if (L.v == intvar[NF])
2327 case XC( OC_FNARG ):
2328 L.v = &fnargs[op->l.i];
2330 res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2334 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2337 case XC( OC_REGEXP ):
2339 L.s = getvar_s(intvar[F0]);
2342 case XC( OC_MATCH ):
2345 X.re = as_regex(op1, &sreg);
2346 R.i = regexec(X.re, L.s, 0, NULL, 0);
2347 if (X.re == &sreg) regfree(X.re);
2348 setvar_i(res, (R.i == 0 ? 1 : 0) ^ (opn == '!' ? 1 : 0));
2352 /* if source is a temporary string, jusk relink it to dest */
2353 if (R.v == v1+1 && R.v->string) {
2354 res = setvar_p(L.v, R.v->string);
2357 res = copyvar(L.v, R.v);
2361 case XC( OC_TERNARY ):
2362 if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2363 syntax_error(EMSG_POSSIBLE_ERROR);
2364 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2368 if (!op->r.f->body.first)
2369 syntax_error(EMSG_UNDEF_FUNC);
2371 X.v = R.v = nvalloc(op->r.f->nargs+1);
2373 L.v = evaluate(nextarg(&op1), v1);
2375 R.v->type |= VF_CHILD;
2376 R.v->x.parent = L.v;
2377 if (++R.v - X.v >= op->r.f->nargs)
2385 res = evaluate(op->r.f->body.first, res);
2392 case XC( OC_GETLINE ):
2393 case XC( OC_PGETLINE ):
2395 X.rsm = newfile(L.s);
2397 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2398 X.rsm->F = popen(L.s, "r");
2399 X.rsm->is_pipe = TRUE;
2401 X.rsm->F = fopen(L.s, "r"); /* not xfopen! */
2405 if (!iF) iF = next_input_file();
2410 setvar_i(intvar[ERRNO], errno);
2418 L.i = awk_getline(X.rsm, R.v);
2421 incvar(intvar[FNR]);
2428 /* simple builtins */
2429 case XC( OC_FBLTIN ):
2437 R.d = (double)rand() / (double)RAND_MAX;
2439 #if ENABLE_FEATURE_AWK_MATH
2465 syntax_error(EMSG_NO_MATH);
2470 seed = op1 ? (unsigned)L.d : (unsigned)time(NULL);
2480 L.s = getvar_s(intvar[F0]);
2486 R.d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2487 ? (system(L.s) >> 8) : 0;
2495 X.rsm = newfile(L.s);
2504 X.rsm = (rstream *)hash_search(fdhash, L.s);
2506 R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
2507 free(X.rsm->buffer);
2508 hash_remove(fdhash, L.s);
2511 setvar_i(intvar[ERRNO], errno);
2518 case XC( OC_BUILTIN ):
2519 res = exec_builtin(op, res);
2522 case XC( OC_SPRINTF ):
2523 setvar_p(res, awk_printf(op1));
2526 case XC( OC_UNARY ):
2528 L.d = R.d = getvar_i(R.v);
2543 L.d = istrue(X.v) ? 0 : 1;
2554 case XC( OC_FIELD ):
2555 R.i = (int)getvar_i(R.v);
2562 res = &Fields[R.i - 1];
2566 /* concatenation (" ") and index joining (",") */
2567 case XC( OC_CONCAT ):
2568 case XC( OC_COMMA ):
2569 opn = strlen(L.s) + strlen(R.s) + 2;
2572 if ((opinfo & OPCLSMASK) == OC_COMMA) {
2573 L.s = getvar_s(intvar[SUBSEP]);
2574 X.s = xrealloc(X.s, opn + strlen(L.s));
2582 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2586 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2589 case XC( OC_BINARY ):
2590 case XC( OC_REPLACE ):
2591 R.d = getvar_i(R.v);
2603 if (R.d == 0) syntax_error(EMSG_DIV_BY_ZERO);
2607 #if ENABLE_FEATURE_AWK_MATH
2608 L.d = pow(L.d, R.d);
2610 syntax_error(EMSG_NO_MATH);
2614 if (R.d == 0) syntax_error(EMSG_DIV_BY_ZERO);
2615 L.d -= (int)(L.d / R.d) * R.d;
2618 res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2621 case XC( OC_COMPARE ):
2622 if (is_numeric(L.v) && is_numeric(R.v)) {
2623 L.d = getvar_i(L.v) - getvar_i(R.v);
2625 L.s = getvar_s(L.v);
2626 R.s = getvar_s(R.v);
2627 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2629 switch (opn & 0xfe) {
2640 setvar_i(res, (opn & 0x1 ? R.i : !R.i) ? 1 : 0);
2644 syntax_error(EMSG_POSSIBLE_ERROR);
2646 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2648 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2661 /* -------- main & co. -------- */
2663 static int awk_exit(int r)
2674 evaluate(endseq.first, &tv);
2677 /* waiting for children */
2678 for (i = 0; i < fdhash->csize; i++) {
2679 hi = fdhash->items[i];
2681 if (hi->data.rs.F && hi->data.rs.is_pipe)
2682 pclose(hi->data.rs.F);
2690 /* if expr looks like "var=value", perform assignment and return 1,
2691 * otherwise return 0 */
2692 static int is_assignment(const char *expr)
2694 char *exprc, *s, *s0, *s1;
2696 exprc = xstrdup(expr);
2697 if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2705 *(s1++) = nextchar(&s);
2708 setvar_u(newvar(exprc), s0);
2713 /* switch to next input file */
2714 static rstream *next_input_file(void)
2716 #define rsm (G.next_input_file__rsm)
2717 #define files_happen (G.next_input_file__files_happen)
2720 const char *fname, *ind;
2722 if (rsm.F) fclose(rsm.F);
2724 rsm.pos = rsm.adv = 0;
2727 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
2733 ind = getvar_s(incvar(intvar[ARGIND]));
2734 fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
2735 if (fname && *fname && !is_assignment(fname))
2736 F = xfopen_stdin(fname);
2740 files_happen = TRUE;
2741 setvar_s(intvar[FILENAME], fname);
2748 int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
2749 int awk_main(int argc, char **argv)
2752 char *opt_F, *opt_W;
2753 llist_t *list_v = NULL;
2754 llist_t *list_f = NULL;
2759 char *vnames = (char *)vNames; /* cheat */
2760 char *vvalues = (char *)vValues;
2764 /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
2765 * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
2766 if (ENABLE_LOCALE_SUPPORT)
2767 setlocale(LC_NUMERIC, "C");
2771 /* allocate global buffer */
2772 g_buf = xmalloc(MAXVARFMT + 1);
2774 vhash = hash_init();
2775 ahash = hash_init();
2776 fdhash = hash_init();
2777 fnhash = hash_init();
2779 /* initialize variables */
2780 for (i = 0; *vnames; i++) {
2781 intvar[i] = v = newvar(nextword(&vnames));
2782 if (*vvalues != '\377')
2783 setvar_s(v, nextword(&vvalues));
2787 if (*vnames == '*') {
2788 v->type |= VF_SPECIAL;
2793 handle_special(intvar[FS]);
2794 handle_special(intvar[RS]);
2796 newfile("/dev/stdin")->F = stdin;
2797 newfile("/dev/stdout")->F = stdout;
2798 newfile("/dev/stderr")->F = stderr;
2800 /* Huh, people report that sometimes environ is NULL. Oh well. */
2801 if (environ) for (envp = environ; *envp; envp++) {
2802 /* environ is writable, thus we don't strdup it needlessly */
2804 char *s1 = strchr(s, '=');
2807 /* Both findvar and setvar_u take const char*
2808 * as 2nd arg -> environment is not trashed */
2809 setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
2813 opt_complementary = "v::f::"; /* -v and -f can occur multiple times */
2814 opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, &opt_W);
2818 setvar_s(intvar[FS], opt_F); // -F
2819 while (list_v) { /* -v */
2820 if (!is_assignment(llist_pop(&list_v)))
2823 if (list_f) { /* -f */
2828 g_progname = llist_pop(&list_f);
2829 from_file = xfopen_stdin(g_progname);
2830 /* one byte is reserved for some trick in next_token */
2831 for (i = j = 1; j > 0; i += j) {
2832 s = xrealloc(s, i + 4096);
2833 j = fread(s + i, 1, 4094, from_file);
2837 parse_program(s + 1);
2840 } else { // no -f: take program from 1st parameter
2843 g_progname = "cmd. line";
2844 parse_program(*argv++);
2847 if (opt & 0x8) // -W
2848 bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
2850 /* fill in ARGV array */
2851 setvar_i(intvar[ARGC], argc + 1);
2852 setari_u(intvar[ARGV], 0, "awk");
2855 setari_u(intvar[ARGV], ++i, *argv++);
2857 evaluate(beginseq.first, &tv);
2858 if (!mainseq.first && !endseq.first)
2859 awk_exit(EXIT_SUCCESS);
2861 /* input file could already be opened in BEGIN block */
2862 if (!iF) iF = next_input_file();
2864 /* passing through input files */
2867 setvar_i(intvar[FNR], 0);
2869 while ((i = awk_getline(iF, intvar[F0])) > 0) {
2872 incvar(intvar[FNR]);
2873 evaluate(mainseq.first, &tv);
2880 syntax_error(strerror(errno));
2882 iF = next_input_file();
2885 awk_exit(EXIT_SUCCESS);