1 /* vi: set sw=4 ts=4: */
3 * awk implementation for busybox
5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
7 * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
14 /* This is a NOEXEC applet. Be very careful! */
21 #define VF_NUMBER 0x0001 /* 1 = primary type is number */
22 #define VF_ARRAY 0x0002 /* 1 = it's an array */
24 #define VF_CACHED 0x0100 /* 1 = num/str value has cached str/num eq */
25 #define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
26 #define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
27 #define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
28 #define VF_FSTR 0x1000 /* 1 = var::string points to fstring buffer */
29 #define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
30 #define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
32 /* these flags are static, don't change them when value is changed */
33 #define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
36 typedef struct var_s {
37 unsigned type; /* flags */
41 int aidx; /* func arg idx (for compilation stage) */
42 struct xhash_s *array; /* array ptr */
43 struct var_s *parent; /* for func args, ptr to actual parameter */
44 char **walker; /* list of array elements (for..in) */
48 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
49 typedef struct chain_s {
52 const char *programname;
56 typedef struct func_s {
62 typedef struct rstream_s {
71 typedef struct hash_item_s {
73 struct var_s v; /* variable/array hash */
74 struct rstream_s rs; /* redirect streams hash */
75 struct func_s f; /* functions hash */
77 struct hash_item_s *next; /* next in chain */
78 char name[1]; /* really it's longer */
81 typedef struct xhash_s {
82 unsigned nel; /* num of elements */
83 unsigned csize; /* current hash size */
84 unsigned nprime; /* next hash size in PRIMES[] */
85 unsigned glen; /* summary length of item names */
86 struct hash_item_s **items;
90 typedef struct node_s {
111 /* Block of temporary variables */
112 typedef struct nvblock_s {
115 struct nvblock_s *prev;
116 struct nvblock_s *next;
120 typedef struct tsplitter_s {
125 /* simple token classes */
126 /* Order and hex values are very important!!! See next_token() */
127 #define TC_SEQSTART 1 /* ( */
128 #define TC_SEQTERM (1 << 1) /* ) */
129 #define TC_REGEXP (1 << 2) /* /.../ */
130 #define TC_OUTRDR (1 << 3) /* | > >> */
131 #define TC_UOPPOST (1 << 4) /* unary postfix operator */
132 #define TC_UOPPRE1 (1 << 5) /* unary prefix operator */
133 #define TC_BINOPX (1 << 6) /* two-opnd operator */
134 #define TC_IN (1 << 7)
135 #define TC_COMMA (1 << 8)
136 #define TC_PIPE (1 << 9) /* input redirection pipe */
137 #define TC_UOPPRE2 (1 << 10) /* unary prefix operator */
138 #define TC_ARRTERM (1 << 11) /* ] */
139 #define TC_GRPSTART (1 << 12) /* { */
140 #define TC_GRPTERM (1 << 13) /* } */
141 #define TC_SEMICOL (1 << 14)
142 #define TC_NEWLINE (1 << 15)
143 #define TC_STATX (1 << 16) /* ctl statement (for, next...) */
144 #define TC_WHILE (1 << 17)
145 #define TC_ELSE (1 << 18)
146 #define TC_BUILTIN (1 << 19)
147 #define TC_GETLINE (1 << 20)
148 #define TC_FUNCDECL (1 << 21) /* `function' `func' */
149 #define TC_BEGIN (1 << 22)
150 #define TC_END (1 << 23)
151 #define TC_EOF (1 << 24)
152 #define TC_VARIABLE (1 << 25)
153 #define TC_ARRAY (1 << 26)
154 #define TC_FUNCTION (1 << 27)
155 #define TC_STRING (1 << 28)
156 #define TC_NUMBER (1 << 29)
158 #define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
160 /* combined token classes */
161 #define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
162 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
163 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
164 | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
166 #define TC_STATEMNT (TC_STATX | TC_WHILE)
167 #define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
169 /* word tokens, cannot mean something else if not expected */
170 #define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
171 | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
173 /* discard newlines after these */
174 #define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
175 | TC_BINOP | TC_OPTERM)
177 /* what can expression begin with */
178 #define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
179 /* what can group begin with */
180 #define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
182 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
183 /* operator is inserted between them */
184 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
185 | TC_STRING | TC_NUMBER | TC_UOPPOST)
186 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
188 #define OF_RES1 0x010000
189 #define OF_RES2 0x020000
190 #define OF_STR1 0x040000
191 #define OF_STR2 0x080000
192 #define OF_NUM1 0x100000
193 #define OF_CHECKED 0x200000
195 /* combined operator flags */
198 #define xS (OF_RES2 | OF_STR2)
200 #define VV (OF_RES1 | OF_RES2)
201 #define Nx (OF_RES1 | OF_NUM1)
202 #define NV (OF_RES1 | OF_NUM1 | OF_RES2)
203 #define Sx (OF_RES1 | OF_STR1)
204 #define SV (OF_RES1 | OF_STR1 | OF_RES2)
205 #define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
207 #define OPCLSMASK 0xFF00
208 #define OPNMASK 0x007F
210 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
211 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
212 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
214 #define P(x) (x << 24)
215 #define PRIMASK 0x7F000000
216 #define PRIMASK2 0x7E000000
218 /* Operation classes */
220 #define SHIFT_TIL_THIS 0x0600
221 #define RECUR_FROM_THIS 0x1000
224 OC_DELETE = 0x0100, OC_EXEC = 0x0200, OC_NEWSOURCE = 0x0300,
225 OC_PRINT = 0x0400, OC_PRINTF = 0x0500, OC_WALKINIT = 0x0600,
227 OC_BR = 0x0700, OC_BREAK = 0x0800, OC_CONTINUE = 0x0900,
228 OC_EXIT = 0x0a00, OC_NEXT = 0x0b00, OC_NEXTFILE = 0x0c00,
229 OC_TEST = 0x0d00, OC_WALKNEXT = 0x0e00,
231 OC_BINARY = 0x1000, OC_BUILTIN = 0x1100, OC_COLON = 0x1200,
232 OC_COMMA = 0x1300, OC_COMPARE = 0x1400, OC_CONCAT = 0x1500,
233 OC_FBLTIN = 0x1600, OC_FIELD = 0x1700, OC_FNARG = 0x1800,
234 OC_FUNC = 0x1900, OC_GETLINE = 0x1a00, OC_IN = 0x1b00,
235 OC_LAND = 0x1c00, OC_LOR = 0x1d00, OC_MATCH = 0x1e00,
236 OC_MOVE = 0x1f00, OC_PGETLINE = 0x2000, OC_REGEXP = 0x2100,
237 OC_REPLACE = 0x2200, OC_RETURN = 0x2300, OC_SPRINTF = 0x2400,
238 OC_TERNARY = 0x2500, OC_UNARY = 0x2600, OC_VAR = 0x2700,
241 ST_IF = 0x3000, ST_DO = 0x3100, ST_FOR = 0x3200,
245 /* simple builtins */
247 F_in, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
248 F_ti, F_le, F_sy, F_ff, F_cl
253 B_a2, B_ix, B_ma, B_sp, B_ss, B_ti, B_lo, B_up,
255 B_an, B_co, B_ls, B_or, B_rs, B_xo,
258 /* tokens and their corresponding info values */
260 #define NTC "\377" /* switch to next token class (tc<<1) */
263 #define OC_B OC_BUILTIN
265 static const char tokenlist[] ALIGN1 =
268 "\1/" NTC /* REGEXP */
269 "\2>>" "\1>" "\1|" NTC /* OUTRDR */
270 "\2++" "\2--" NTC /* UOPPOST */
271 "\2++" "\2--" "\1$" NTC /* UOPPRE1 */
272 "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */
273 "\2*=" "\2/=" "\2%=" "\2^="
274 "\1+" "\1-" "\3**=" "\2**"
275 "\1/" "\1%" "\1^" "\1*"
276 "\2!=" "\2>=" "\2<=" "\1>"
277 "\1<" "\2!~" "\1~" "\2&&"
278 "\2||" "\1?" "\1:" NTC
282 "\1+" "\1-" "\1!" NTC /* UOPPRE2 */
288 "\2if" "\2do" "\3for" "\5break" /* STATX */
289 "\10continue" "\6delete" "\5print"
290 "\6printf" "\4next" "\10nextfile"
291 "\6return" "\4exit" NTC
295 "\3and" "\5compl" "\6lshift" "\2or"
297 "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */
298 "\3cos" "\3exp" "\3int" "\3log"
299 "\4rand" "\3sin" "\4sqrt" "\5srand"
300 "\6gensub" "\4gsub" "\5index" "\6length"
301 "\5match" "\5split" "\7sprintf" "\3sub"
302 "\6substr" "\7systime" "\10strftime"
303 "\7tolower" "\7toupper" NTC
305 "\4func" "\10function" NTC
310 static const uint32_t tokeninfo[] = {
314 xS|'a', xS|'w', xS|'|',
315 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
316 OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M',
318 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74),
319 OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
320 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/',
321 OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
322 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-',
323 OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
324 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%',
325 OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
326 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3,
327 OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
328 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!',
329 OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
330 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?',
331 OC_COLON|xx|P(67)|':',
334 OC_PGETLINE|SV|P(37),
335 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-',
336 OC_UNARY|xV|P(19)|'!',
342 ST_IF, ST_DO, ST_FOR, OC_BREAK,
343 OC_CONTINUE, OC_DELETE|Vx, OC_PRINT,
344 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
345 OC_RETURN|Vx, OC_EXIT|Nx,
349 OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
350 OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
351 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
352 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
353 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
354 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
355 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
356 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b),
357 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
364 /* internal variable names and their initial values */
365 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
367 CONVFMT, OFMT, FS, OFS,
368 ORS, RS, RT, FILENAME,
369 SUBSEP, ARGIND, ARGC, ARGV,
372 ENVIRON, F0, NUM_INTERNAL_VARS
375 static const char vNames[] ALIGN1 =
376 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
377 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
378 "SUBSEP\0" "ARGIND\0" "ARGC\0" "ARGV\0"
380 "NR\0" "NF\0*" "IGNORECASE\0*"
381 "ENVIRON\0" "$\0*" "\0";
383 static const char vValues[] ALIGN1 =
384 "%.6g\0" "%.6g\0" " \0" " \0"
385 "\n\0" "\n\0" "\0" "\0"
389 /* hash size may grow to these values */
390 #define FIRST_PRIME 61
391 static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
394 /* Globals. Split in two parts so that first one is addressed
395 * with (mostly short) negative offsets */
397 chain beginseq, mainseq, endseq;
399 node *break_ptr, *continue_ptr;
401 xhash *vhash, *ahash, *fdhash, *fnhash;
402 const char *g_progname;
405 int maxfields; /* used in fsrealloc() only */
414 smallint is_f0_split;
417 uint32_t t_info; /* often used */
423 var *intvar[NUM_INTERNAL_VARS]; /* often used */
425 /* former statics from various functions */
426 char *split_f0__fstrings;
428 uint32_t next_token__save_tclass;
429 uint32_t next_token__save_info;
430 uint32_t next_token__ltclass;
431 smallint next_token__concat_inserted;
433 smallint next_input_file__files_happen;
434 rstream next_input_file__rsm;
436 var *evaluate__fnargs;
437 unsigned evaluate__seed;
438 regex_t evaluate__sreg;
442 tsplitter exec_builtin__tspl;
444 /* biggest and least used members go last */
446 tsplitter fsplitter, rsplitter;
448 #define G1 (ptr_to_globals[-1])
449 #define G (*(struct globals2 *)ptr_to_globals)
450 /* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
451 /* char G1size[sizeof(G1)]; - 0x6c */
452 /* char Gsize[sizeof(G)]; - 0x1cc */
453 /* Trying to keep most of members accessible with short offsets: */
454 /* char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
455 #define beginseq (G1.beginseq )
456 #define mainseq (G1.mainseq )
457 #define endseq (G1.endseq )
458 #define seq (G1.seq )
459 #define break_ptr (G1.break_ptr )
460 #define continue_ptr (G1.continue_ptr)
462 #define vhash (G1.vhash )
463 #define ahash (G1.ahash )
464 #define fdhash (G1.fdhash )
465 #define fnhash (G1.fnhash )
466 #define g_progname (G1.g_progname )
467 #define g_lineno (G1.g_lineno )
468 #define nfields (G1.nfields )
469 #define maxfields (G1.maxfields )
470 #define Fields (G1.Fields )
471 #define g_cb (G1.g_cb )
472 #define g_pos (G1.g_pos )
473 #define g_buf (G1.g_buf )
474 #define icase (G1.icase )
475 #define exiting (G1.exiting )
476 #define nextrec (G1.nextrec )
477 #define nextfile (G1.nextfile )
478 #define is_f0_split (G1.is_f0_split )
479 #define t_info (G.t_info )
480 #define t_tclass (G.t_tclass )
481 #define t_string (G.t_string )
482 #define t_double (G.t_double )
483 #define t_lineno (G.t_lineno )
484 #define t_rollback (G.t_rollback )
485 #define intvar (G.intvar )
486 #define fsplitter (G.fsplitter )
487 #define rsplitter (G.rsplitter )
488 #define INIT_G() do { \
489 SET_PTR_TO_GLOBALS(xzalloc(sizeof(G1) + sizeof(G)) + sizeof(G1)); \
490 G.next_token__ltclass = TC_OPTERM; \
491 G.evaluate__seed = 1; \
495 /* function prototypes */
496 static void handle_special(var *);
497 static node *parse_expr(uint32_t);
498 static void chain_group(void);
499 static var *evaluate(node *, var *);
500 static rstream *next_input_file(void);
501 static int fmt_num(char *, int, const char *, double, int);
502 static int awk_exit(int) ATTRIBUTE_NORETURN;
504 /* ---- error handling ---- */
506 static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
507 static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
508 static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
509 static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
510 static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
511 static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin";
512 static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
513 static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
514 static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
515 #if !ENABLE_FEATURE_AWK_MATH
516 static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
519 static void zero_out_var(var * vp)
521 memset(vp, 0, sizeof(*vp));
524 static void syntax_error(const char *const message) ATTRIBUTE_NORETURN;
525 static void syntax_error(const char *const message)
527 bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
530 /* ---- hash stuff ---- */
532 static unsigned hashidx(const char *name)
536 while (*name) idx = *name++ + (idx << 6) - idx;
540 /* create new hash */
541 static xhash *hash_init(void)
545 newhash = xzalloc(sizeof(xhash));
546 newhash->csize = FIRST_PRIME;
547 newhash->items = xzalloc(newhash->csize * sizeof(hash_item *));
552 /* find item in hash, return ptr to data, NULL if not found */
553 static void *hash_search(xhash *hash, const char *name)
557 hi = hash->items [ hashidx(name) % hash->csize ];
559 if (strcmp(hi->name, name) == 0)
566 /* grow hash if it becomes too big */
567 static void hash_rebuild(xhash *hash)
569 unsigned newsize, i, idx;
570 hash_item **newitems, *hi, *thi;
572 if (hash->nprime == ARRAY_SIZE(PRIMES))
575 newsize = PRIMES[hash->nprime++];
576 newitems = xzalloc(newsize * sizeof(hash_item *));
578 for (i = 0; i < hash->csize; i++) {
583 idx = hashidx(thi->name) % newsize;
584 thi->next = newitems[idx];
590 hash->csize = newsize;
591 hash->items = newitems;
594 /* find item in hash, add it if necessary. Return ptr to data */
595 static void *hash_find(xhash *hash, const char *name)
601 hi = hash_search(hash, name);
603 if (++hash->nel / hash->csize > 10)
606 l = strlen(name) + 1;
607 hi = xzalloc(sizeof(hash_item) + l);
608 memcpy(hi->name, name, l);
610 idx = hashidx(name) % hash->csize;
611 hi->next = hash->items[idx];
612 hash->items[idx] = hi;
618 #define findvar(hash, name) ((var*) hash_find((hash), (name)))
619 #define newvar(name) ((var*) hash_find(vhash, (name)))
620 #define newfile(name) ((rstream*)hash_find(fdhash, (name)))
621 #define newfunc(name) ((func*) hash_find(fnhash, (name)))
623 static void hash_remove(xhash *hash, const char *name)
625 hash_item *hi, **phi;
627 phi = &(hash->items[hashidx(name) % hash->csize]);
630 if (strcmp(hi->name, name) == 0) {
631 hash->glen -= (strlen(name) + 1);
641 /* ------ some useful functions ------ */
643 static void skip_spaces(char **s)
648 if (*p == '\\' && p[1] == '\n') {
651 } else if (*p != ' ' && *p != '\t') {
659 static char *nextword(char **s)
663 while (*(*s)++) /* */;
668 static char nextchar(char **s)
674 if (c == '\\') c = bb_process_escape_sequence((const char**)s);
675 if (c == '\\' && *s == pps) c = *((*s)++);
679 static ALWAYS_INLINE int isalnum_(int c)
681 return (isalnum(c) || c == '_');
684 static FILE *afopen(const char *path, const char *mode)
686 return (*path == '-' && *(path+1) == '\0') ? stdin : xfopen(path, mode);
689 /* -------- working with variables (set/get/copy/etc) -------- */
691 static xhash *iamarray(var *v)
695 while (a->type & VF_CHILD)
698 if (!(a->type & VF_ARRAY)) {
700 a->x.array = hash_init();
705 static void clear_array(xhash *array)
710 for (i = 0; i < array->csize; i++) {
711 hi = array->items[i];
715 free(thi->data.v.string);
718 array->items[i] = NULL;
720 array->glen = array->nel = 0;
723 /* clear a variable */
724 static var *clrvar(var *v)
726 if (!(v->type & VF_FSTR))
729 v->type &= VF_DONTTOUCH;
735 /* assign string value to variable */
736 static var *setvar_p(var *v, char *value)
744 /* same as setvar_p but make a copy of string */
745 static var *setvar_s(var *v, const char *value)
747 return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
750 /* same as setvar_s but set USER flag */
751 static var *setvar_u(var *v, const char *value)
758 /* set array element to user string */
759 static void setari_u(var *a, int idx, const char *s)
761 char sidx[sizeof(int)*3 + 1];
764 sprintf(sidx, "%d", idx);
765 v = findvar(iamarray(a), sidx);
769 /* assign numeric value to variable */
770 static var *setvar_i(var *v, double value)
773 v->type |= VF_NUMBER;
779 static const char *getvar_s(var *v)
781 /* if v is numeric and has no cached string, convert it to string */
782 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
783 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
784 v->string = xstrdup(g_buf);
785 v->type |= VF_CACHED;
787 return (v->string == NULL) ? "" : v->string;
790 static double getvar_i(var *v)
794 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
798 v->number = strtod(s, &s);
799 if (v->type & VF_USER) {
807 v->type |= VF_CACHED;
812 static var *copyvar(var *dest, const var *src)
816 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
817 dest->number = src->number;
819 dest->string = xstrdup(src->string);
821 handle_special(dest);
825 static var *incvar(var *v)
827 return setvar_i(v, getvar_i(v) + 1.);
830 /* return true if v is number or numeric string */
831 static int is_numeric(var *v)
834 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
837 /* return 1 when value of v corresponds to true, 0 otherwise */
838 static int istrue(var *v)
841 return (v->number == 0) ? 0 : 1;
842 return (v->string && *(v->string)) ? 1 : 0;
845 /* temporary variables allocator. Last allocated should be first freed */
846 static var *nvalloc(int n)
854 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size) break;
859 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
860 g_cb = xmalloc(sizeof(nvblock) + size * sizeof(var));
862 g_cb->pos = g_cb->nv;
865 if (pb) pb->next = g_cb;
871 while (v < g_cb->pos) {
880 static void nvfree(var *v)
884 if (v < g_cb->nv || v >= g_cb->pos)
885 syntax_error(EMSG_INTERNAL_ERROR);
887 for (p = v; p < g_cb->pos; p++) {
888 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
889 clear_array(iamarray(p));
890 free(p->x.array->items);
893 if (p->type & VF_WALK)
900 while (g_cb->prev && g_cb->pos == g_cb->nv) {
905 /* ------- awk program text parsing ------- */
907 /* Parse next token pointed by global pos, place results into global ttt.
908 * If token isn't expected, give away. Return token class
910 static uint32_t next_token(uint32_t expected)
912 #define concat_inserted (G.next_token__concat_inserted)
913 #define save_tclass (G.next_token__save_tclass)
914 #define save_info (G.next_token__save_info)
915 /* Initialized to TC_OPTERM: */
916 #define ltclass (G.next_token__ltclass)
927 } else if (concat_inserted) {
928 concat_inserted = FALSE;
929 t_tclass = save_tclass;
938 while (*p != '\n' && *p != '\0')
947 } else if (*p == '\"') {
951 if (*p == '\0' || *p == '\n')
952 syntax_error(EMSG_UNEXP_EOS);
953 *(s++) = nextchar(&p);
959 } else if ((expected & TC_REGEXP) && *p == '/') {
963 if (*p == '\0' || *p == '\n')
964 syntax_error(EMSG_UNEXP_EOS);
968 *(s-1) = bb_process_escape_sequence((const char **)&p);
979 } else if (*p == '.' || isdigit(*p)) {
981 t_double = strtod(p, &p);
983 syntax_error(EMSG_UNEXP_TOKEN);
987 /* search for something known */
997 /* if token class is expected, token
998 * matches and it's not a longer word,
999 * then this is what we are looking for
1001 if ((tc & (expected | TC_WORD | TC_NEWLINE))
1002 && *tl == *p && strncmp(p, tl, l) == 0
1003 && !((tc & TC_WORD) && isalnum_(p[l]))
1014 /* it's a name (var/array/function),
1015 * otherwise it's something wrong
1018 syntax_error(EMSG_UNEXP_TOKEN);
1021 while (isalnum_(*(++p))) {
1026 /* also consume whitespace between functionname and bracket */
1027 if (!(expected & TC_VARIABLE))
1041 /* skipping newlines in some cases */
1042 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1045 /* insert concatenation operator when needed */
1046 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1047 concat_inserted = TRUE;
1051 t_info = OC_CONCAT | SS | P(35);
1058 /* Are we ready for this? */
1059 if (!(ltclass & expected))
1060 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1061 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1064 #undef concat_inserted
1070 static void rollback_token(void)
1075 static node *new_node(uint32_t info)
1079 n = xzalloc(sizeof(node));
1081 n->lineno = g_lineno;
1085 static node *mk_re_node(const char *s, node *n, regex_t *re)
1087 n->info = OC_REGEXP;
1090 xregcomp(re, s, REG_EXTENDED);
1091 xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1096 static node *condition(void)
1098 next_token(TC_SEQSTART);
1099 return parse_expr(TC_SEQTERM);
1102 /* parse expression terminated by given argument, return ptr
1103 * to built subtree. Terminator is eaten by parse_expr */
1104 static node *parse_expr(uint32_t iexp)
1113 sn.r.n = glptr = NULL;
1114 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1116 while (!((tc = next_token(xtc)) & iexp)) {
1117 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1118 /* input redirection (<) attached to glptr node */
1119 cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1121 xtc = TC_OPERAND | TC_UOPPRE;
1124 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1125 /* for binary and postfix-unary operators, jump back over
1126 * previous operators with higher priority */
1128 while ( ((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1129 || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON)) )
1131 if ((t_info & OPCLSMASK) == OC_TERNARY)
1133 cn = vn->a.n->r.n = new_node(t_info);
1135 if (tc & TC_BINOP) {
1137 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1138 if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1140 next_token(TC_GETLINE);
1141 /* give maximum priority to this pipe */
1142 cn->info &= ~PRIMASK;
1143 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1147 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1152 /* for operands and prefix-unary operators, attach them
1155 cn = vn->r.n = new_node(t_info);
1157 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1158 if (tc & (TC_OPERAND | TC_REGEXP)) {
1159 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1160 /* one should be very careful with switch on tclass -
1161 * only simple tclasses should be used! */
1166 v = hash_search(ahash, t_string);
1168 cn->info = OC_FNARG;
1169 cn->l.i = v->x.aidx;
1171 cn->l.v = newvar(t_string);
1173 if (tc & TC_ARRAY) {
1175 cn->r.n = parse_expr(TC_ARRTERM);
1182 v = cn->l.v = xzalloc(sizeof(var));
1184 setvar_i(v, t_double);
1186 setvar_s(v, t_string);
1190 mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1195 cn->r.f = newfunc(t_string);
1196 cn->l.n = condition();
1200 cn = vn->r.n = parse_expr(TC_SEQTERM);
1206 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1210 cn->l.n = condition();
1219 /* add node to chain. Return ptr to alloc'd node */
1220 static node *chain_node(uint32_t info)
1225 seq->first = seq->last = new_node(0);
1227 if (seq->programname != g_progname) {
1228 seq->programname = g_progname;
1229 n = chain_node(OC_NEWSOURCE);
1230 n->l.s = xstrdup(g_progname);
1235 seq->last = n->a.n = new_node(OC_DONE);
1240 static void chain_expr(uint32_t info)
1244 n = chain_node(info);
1245 n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1246 if (t_tclass & TC_GRPTERM)
1250 static node *chain_loop(node *nn)
1252 node *n, *n2, *save_brk, *save_cont;
1254 save_brk = break_ptr;
1255 save_cont = continue_ptr;
1257 n = chain_node(OC_BR | Vx);
1258 continue_ptr = new_node(OC_EXEC);
1259 break_ptr = new_node(OC_EXEC);
1261 n2 = chain_node(OC_EXEC | Vx);
1264 continue_ptr->a.n = n2;
1265 break_ptr->a.n = n->r.n = seq->last;
1267 continue_ptr = save_cont;
1268 break_ptr = save_brk;
1273 /* parse group and attach it to chain */
1274 static void chain_group(void)
1280 c = next_token(TC_GRPSEQ);
1281 } while (c & TC_NEWLINE);
1283 if (c & TC_GRPSTART) {
1284 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1285 if (t_tclass & TC_NEWLINE) continue;
1289 } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1291 chain_expr(OC_EXEC | Vx);
1292 } else { /* TC_STATEMNT */
1293 switch (t_info & OPCLSMASK) {
1295 n = chain_node(OC_BR | Vx);
1296 n->l.n = condition();
1298 n2 = chain_node(OC_EXEC);
1300 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1302 n2->a.n = seq->last;
1310 n = chain_loop(NULL);
1315 n2 = chain_node(OC_EXEC);
1316 n = chain_loop(NULL);
1318 next_token(TC_WHILE);
1319 n->l.n = condition();
1323 next_token(TC_SEQSTART);
1324 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1325 if (t_tclass & TC_SEQTERM) { /* for-in */
1326 if ((n2->info & OPCLSMASK) != OC_IN)
1327 syntax_error(EMSG_UNEXP_TOKEN);
1328 n = chain_node(OC_WALKINIT | VV);
1331 n = chain_loop(NULL);
1332 n->info = OC_WALKNEXT | Vx;
1334 } else { /* for (;;) */
1335 n = chain_node(OC_EXEC | Vx);
1337 n2 = parse_expr(TC_SEMICOL);
1338 n3 = parse_expr(TC_SEQTERM);
1348 n = chain_node(t_info);
1349 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1350 if (t_tclass & TC_OUTRDR) {
1352 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1354 if (t_tclass & TC_GRPTERM)
1359 n = chain_node(OC_EXEC);
1364 n = chain_node(OC_EXEC);
1365 n->a.n = continue_ptr;
1368 /* delete, next, nextfile, return, exit */
1375 static void parse_program(char *p)
1384 while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1385 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1387 if (tclass & TC_OPTERM)
1391 if (tclass & TC_BEGIN) {
1395 } else if (tclass & TC_END) {
1399 } else if (tclass & TC_FUNCDECL) {
1400 next_token(TC_FUNCTION);
1402 f = newfunc(t_string);
1403 f->body.first = NULL;
1405 while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1406 v = findvar(ahash, t_string);
1407 v->x.aidx = (f->nargs)++;
1409 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1416 } else if (tclass & TC_OPSEQ) {
1418 cn = chain_node(OC_TEST);
1419 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1420 if (t_tclass & TC_GRPSTART) {
1424 chain_node(OC_PRINT);
1426 cn->r.n = mainseq.last;
1428 } else /* if (tclass & TC_GRPSTART) */ {
1436 /* -------- program execution part -------- */
1438 static node *mk_splitter(const char *s, tsplitter *spl)
1446 if ((n->info & OPCLSMASK) == OC_REGEXP) {
1448 regfree(ire); // TODO: nuke ire, use re+1?
1450 if (strlen(s) > 1) {
1451 mk_re_node(s, n, re);
1453 n->info = (uint32_t) *s;
1459 /* use node as a regular expression. Supplied with node ptr and regex_t
1460 * storage space. Return ptr to regex (if result points to preg, it should
1461 * be later regfree'd manually
1463 static regex_t *as_regex(node *op, regex_t *preg)
1468 if ((op->info & OPCLSMASK) == OC_REGEXP) {
1469 return icase ? op->r.ire : op->l.re;
1472 s = getvar_s(evaluate(op, v));
1473 xregcomp(preg, s, icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED);
1478 /* gradually increasing buffer */
1479 static void qrealloc(char **b, int n, int *size)
1481 if (!*b || n >= *size)
1482 *b = xrealloc(*b, *size = n + (n>>1) + 80);
1485 /* resize field storage space */
1486 static void fsrealloc(int size)
1490 if (size >= maxfields) {
1492 maxfields = size + 16;
1493 Fields = xrealloc(Fields, maxfields * sizeof(var));
1494 for (; i < maxfields; i++) {
1495 Fields[i].type = VF_SPECIAL;
1496 Fields[i].string = NULL;
1500 if (size < nfields) {
1501 for (i = size; i < nfields; i++) {
1508 static int awk_split(const char *s, node *spl, char **slist)
1513 regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1515 /* in worst case, each char would be a separate field */
1516 *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1519 c[0] = c[1] = (char)spl->info;
1521 if (*getvar_s(intvar[RS]) == '\0')
1524 if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */
1526 return n; /* "": zero fields */
1527 n++; /* at least one field will be there */
1529 l = strcspn(s, c+2); /* len till next NUL or \n */
1530 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1531 && pmatch[0].rm_so <= l
1533 l = pmatch[0].rm_so;
1534 if (pmatch[0].rm_eo == 0) {
1538 n++; /* we saw yet another delimiter */
1540 pmatch[0].rm_eo = l;
1541 if (s[l]) pmatch[0].rm_eo++;
1546 s += pmatch[0].rm_eo;
1550 if (c[0] == '\0') { /* null split */
1558 if (c[0] != ' ') { /* single-character split */
1560 c[0] = toupper(c[0]);
1561 c[1] = tolower(c[1]);
1564 while ((s1 = strpbrk(s1, c))) {
1572 s = skip_whitespace(s);
1575 while (*s && !isspace(*s))
1582 static void split_f0(void)
1584 /* static char *fstrings; */
1585 #define fstrings (G.split_f0__fstrings)
1596 n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1599 for (i = 0; i < n; i++) {
1600 Fields[i].string = nextword(&s);
1601 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1604 /* set NF manually to avoid side effects */
1606 intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1607 intvar[NF]->number = nfields;
1611 /* perform additional actions when some internal variables changed */
1612 static void handle_special(var *v)
1616 const char *sep, *s;
1617 int sl, l, len, i, bsize;
1619 if (!(v->type & VF_SPECIAL))
1622 if (v == intvar[NF]) {
1623 n = (int)getvar_i(v);
1626 /* recalculate $0 */
1627 sep = getvar_s(intvar[OFS]);
1631 for (i = 0; i < n; i++) {
1632 s = getvar_s(&Fields[i]);
1635 memcpy(b+len, sep, sl);
1638 qrealloc(&b, len+l+sl, &bsize);
1639 memcpy(b+len, s, l);
1644 setvar_p(intvar[F0], b);
1647 } else if (v == intvar[F0]) {
1648 is_f0_split = FALSE;
1650 } else if (v == intvar[FS]) {
1651 mk_splitter(getvar_s(v), &fsplitter);
1653 } else if (v == intvar[RS]) {
1654 mk_splitter(getvar_s(v), &rsplitter);
1656 } else if (v == intvar[IGNORECASE]) {
1660 n = getvar_i(intvar[NF]);
1661 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1662 /* right here v is invalid. Just to note... */
1666 /* step through func/builtin/etc arguments */
1667 static node *nextarg(node **pn)
1672 if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1681 static void hashwalk_init(var *v, xhash *array)
1687 if (v->type & VF_WALK)
1691 w = v->x.walker = xzalloc(2 + 2*sizeof(char *) + array->glen);
1692 w[0] = w[1] = (char *)(w + 2);
1693 for (i = 0; i < array->csize; i++) {
1694 hi = array->items[i];
1696 strcpy(*w, hi->name);
1703 static int hashwalk_next(var *v)
1711 setvar_s(v, nextword(w+1));
1715 /* evaluate node, return 1 when result is true, 0 otherwise */
1716 static int ptest(node *pattern)
1718 /* ptest__v is "static": to save stack space? */
1719 return istrue(evaluate(pattern, &G.ptest__v));
1722 /* read next record from stream rsm into a variable v */
1723 static int awk_getline(rstream *rsm, var *v)
1726 regmatch_t pmatch[2];
1727 int a, p, pp=0, size;
1728 int fd, so, eo, r, rp;
1731 /* we're using our own buffer since we need access to accumulating
1734 fd = fileno(rsm->F);
1739 c = (char) rsplitter.n.info;
1742 if (!m) qrealloc(&m, 256, &size);
1748 if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1749 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1750 b, 1, pmatch, 0) == 0) {
1751 so = pmatch[0].rm_so;
1752 eo = pmatch[0].rm_eo;
1756 } else if (c != '\0') {
1757 s = strchr(b+pp, c);
1758 if (!s) s = memchr(b+pp, '\0', p - pp);
1765 while (b[rp] == '\n')
1767 s = strstr(b+rp, "\n\n");
1770 while (b[eo] == '\n') eo++;
1778 memmove(m, (const void *)(m+a), p+1);
1783 qrealloc(&m, a+p+128, &size);
1786 p += safe_read(fd, b+p, size-p-1);
1790 setvar_i(intvar[ERRNO], errno);
1799 c = b[so]; b[so] = '\0';
1803 c = b[eo]; b[eo] = '\0';
1804 setvar_s(intvar[RT], b+so);
1816 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1820 const char *s = format;
1822 if (int_as_int && n == (int)n) {
1823 r = snprintf(b, size, "%d", (int)n);
1825 do { c = *s; } while (c && *++s);
1826 if (strchr("diouxX", c)) {
1827 r = snprintf(b, size, format, (int)n);
1828 } else if (strchr("eEfgG", c)) {
1829 r = snprintf(b, size, format, n);
1831 syntax_error(EMSG_INV_FMT);
1838 /* formatted output into an allocated buffer, return ptr to buffer */
1839 static char *awk_printf(node *n)
1844 int i, j, incr, bsize;
1849 fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1854 while (*f && (*f != '%' || *(++f) == '%'))
1856 while (*f && !isalpha(*f)) {
1858 syntax_error("%*x formats are not supported");
1862 incr = (f - s) + MAXVARFMT;
1863 qrealloc(&b, incr + i, &bsize);
1868 arg = evaluate(nextarg(&n), v);
1871 if (c == 'c' || !c) {
1872 i += sprintf(b+i, s, is_numeric(arg) ?
1873 (char)getvar_i(arg) : *getvar_s(arg));
1874 } else if (c == 's') {
1876 qrealloc(&b, incr+i+strlen(s1), &bsize);
1877 i += sprintf(b+i, s, s1);
1879 i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1883 /* if there was an error while sprintf, return value is negative */
1887 b = xrealloc(b, i + 1);
1894 /* common substitution routine
1895 * replace (nm) substring of (src) that match (n) with (repl), store
1896 * result into (dest), return number of substitutions. If nm=0, replace
1897 * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1898 * subexpression matching (\1-\9)
1900 static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int ex)
1905 int c, i, j, di, rl, so, eo, nbs, n, dssize;
1906 regmatch_t pmatch[10];
1909 re = as_regex(rn, &sreg);
1910 if (!src) src = intvar[F0];
1911 if (!dest) dest = intvar[F0];
1916 while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0 : REG_NOTBOL) == 0) {
1917 so = pmatch[0].rm_so;
1918 eo = pmatch[0].rm_eo;
1920 qrealloc(&ds, di + eo + rl, &dssize);
1921 memcpy(ds + di, sp, eo);
1927 for (s = repl; *s; s++) {
1933 if (c == '&' || (ex && c >= '0' && c <= '9')) {
1934 di -= ((nbs + 3) >> 1);
1943 n = pmatch[j].rm_eo - pmatch[j].rm_so;
1944 qrealloc(&ds, di + rl + n, &dssize);
1945 memcpy(ds + di, sp + pmatch[j].rm_so, n);
1957 if (!ds[di++]) break;
1961 qrealloc(&ds, di + strlen(sp), &dssize);
1962 strcpy(ds + di, sp);
1964 if (re == &sreg) regfree(re);
1968 static var *exec_builtin(node *op, var *res)
1970 #define tspl (G.exec_builtin__tspl)
1977 regmatch_t pmatch[2];
1987 isr = info = op->info;
1990 av[2] = av[3] = NULL;
1991 for (i = 0; i < 4 && op; i++) {
1992 an[i] = nextarg(&op);
1993 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
1994 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
1999 if ((uint32_t)nargs < (info >> 30))
2000 syntax_error(EMSG_TOO_FEW_ARGS);
2002 switch (info & OPNMASK) {
2005 #if ENABLE_FEATURE_AWK_MATH
2006 setvar_i(res, atan2(getvar_i(av[i]), getvar_i(av[1])));
2008 syntax_error(EMSG_NO_MATH);
2014 spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2015 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2020 n = awk_split(as[0], spl, &s);
2022 clear_array(iamarray(av[1]));
2023 for (i=1; i<=n; i++)
2024 setari_u(av[1], i, nextword(&s1));
2031 i = getvar_i(av[1]) - 1;
2034 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2037 strncpy(s, as[0]+i, n);
2043 setvar_i(res, (long)getvar_i(av[0]) & (long)getvar_i(av[1]));
2047 setvar_i(res, ~(long)getvar_i(av[0]));
2051 setvar_i(res, (long)getvar_i(av[0]) << (long)getvar_i(av[1]));
2055 setvar_i(res, (long)getvar_i(av[0]) | (long)getvar_i(av[1]));
2059 setvar_i(res, (long)((unsigned long)getvar_i(av[0]) >> (unsigned long)getvar_i(av[1])));
2063 setvar_i(res, (long)getvar_i(av[0]) ^ (long)getvar_i(av[1]));
2073 s1 = s = xstrdup(as[0]);
2075 *s1 = (*to_xxx)(*s1);
2084 l = strlen(as[0]) - ll;
2085 if (ll > 0 && l >= 0) {
2087 s = strstr(as[0], as[1]);
2088 if (s) n = (s - as[0]) + 1;
2090 /* this piece of code is terribly slow and
2091 * really should be rewritten
2093 for (i=0; i<=l; i++) {
2094 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2106 tt = getvar_i(av[1]);
2109 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2110 i = strftime(g_buf, MAXVARFMT,
2111 ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2114 setvar_s(res, g_buf);
2118 re = as_regex(an[1], &sreg);
2119 n = regexec(re, as[0], 1, pmatch, 0);
2124 pmatch[0].rm_so = 0;
2125 pmatch[0].rm_eo = -1;
2127 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2128 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2129 setvar_i(res, pmatch[0].rm_so);
2130 if (re == &sreg) regfree(re);
2134 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2138 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2142 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2152 * Evaluate node - the heart of the program. Supplied with subtree
2153 * and place where to store result. returns ptr to result.
2155 #define XC(n) ((n) >> 8)
2157 static var *evaluate(node *op, var *res)
2159 /* This procedure is recursive so we should count every byte */
2160 #define fnargs (G.evaluate__fnargs)
2161 /* seed is initialized to 1 */
2162 #define seed (G.evaluate__seed)
2163 #define sreg (G.evaluate__sreg)
2185 return setvar_s(res, NULL);
2191 opn = (opinfo & OPNMASK);
2192 g_lineno = op->lineno;
2194 /* execute inevitable things */
2196 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2197 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2198 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2199 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2200 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2202 switch (XC(opinfo & OPCLSMASK)) {
2204 /* -- iterative node type -- */
2208 if ((op1->info & OPCLSMASK) == OC_COMMA) {
2209 /* it's range pattern */
2210 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2211 op->info |= OF_CHECKED;
2212 if (ptest(op1->r.n))
2213 op->info &= ~OF_CHECKED;
2220 op = (ptest(op1)) ? op->a.n : op->r.n;
2224 /* just evaluate an expression, also used as unconditional jump */
2228 /* branch, used in if-else and various loops */
2230 op = istrue(L.v) ? op->a.n : op->r.n;
2233 /* initialize for-in loop */
2234 case XC( OC_WALKINIT ):
2235 hashwalk_init(L.v, iamarray(R.v));
2238 /* get next array item */
2239 case XC( OC_WALKNEXT ):
2240 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2243 case XC( OC_PRINT ):
2244 case XC( OC_PRINTF ):
2247 X.rsm = newfile(R.s);
2250 X.rsm->F = popen(R.s, "w");
2251 if (X.rsm->F == NULL)
2252 bb_perror_msg_and_die("popen");
2255 X.rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2261 if ((opinfo & OPCLSMASK) == OC_PRINT) {
2263 fputs(getvar_s(intvar[F0]), X.F);
2266 L.v = evaluate(nextarg(&op1), v1);
2267 if (L.v->type & VF_NUMBER) {
2268 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2269 getvar_i(L.v), TRUE);
2272 fputs(getvar_s(L.v), X.F);
2275 if (op1) fputs(getvar_s(intvar[OFS]), X.F);
2278 fputs(getvar_s(intvar[ORS]), X.F);
2280 } else { /* OC_PRINTF */
2281 L.s = awk_printf(op1);
2288 case XC( OC_DELETE ):
2289 X.info = op1->info & OPCLSMASK;
2290 if (X.info == OC_VAR) {
2292 } else if (X.info == OC_FNARG) {
2293 R.v = &fnargs[op1->l.i];
2295 syntax_error(EMSG_NOT_ARRAY);
2300 L.s = getvar_s(evaluate(op1->r.n, v1));
2301 hash_remove(iamarray(R.v), L.s);
2303 clear_array(iamarray(R.v));
2307 case XC( OC_NEWSOURCE ):
2308 g_progname = op->l.s;
2311 case XC( OC_RETURN ):
2315 case XC( OC_NEXTFILE ):
2326 /* -- recursive node type -- */
2330 if (L.v == intvar[NF])
2334 case XC( OC_FNARG ):
2335 L.v = &fnargs[op->l.i];
2337 res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2341 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2344 case XC( OC_REGEXP ):
2346 L.s = getvar_s(intvar[F0]);
2349 case XC( OC_MATCH ):
2352 X.re = as_regex(op1, &sreg);
2353 R.i = regexec(X.re, L.s, 0, NULL, 0);
2354 if (X.re == &sreg) regfree(X.re);
2355 setvar_i(res, (R.i == 0 ? 1 : 0) ^ (opn == '!' ? 1 : 0));
2359 /* if source is a temporary string, jusk relink it to dest */
2360 if (R.v == v1+1 && R.v->string) {
2361 res = setvar_p(L.v, R.v->string);
2364 res = copyvar(L.v, R.v);
2368 case XC( OC_TERNARY ):
2369 if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2370 syntax_error(EMSG_POSSIBLE_ERROR);
2371 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2375 if (!op->r.f->body.first)
2376 syntax_error(EMSG_UNDEF_FUNC);
2378 X.v = R.v = nvalloc(op->r.f->nargs+1);
2380 L.v = evaluate(nextarg(&op1), v1);
2382 R.v->type |= VF_CHILD;
2383 R.v->x.parent = L.v;
2384 if (++R.v - X.v >= op->r.f->nargs)
2392 res = evaluate(op->r.f->body.first, res);
2399 case XC( OC_GETLINE ):
2400 case XC( OC_PGETLINE ):
2402 X.rsm = newfile(L.s);
2404 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2405 X.rsm->F = popen(L.s, "r");
2406 X.rsm->is_pipe = TRUE;
2408 X.rsm->F = fopen(L.s, "r"); /* not xfopen! */
2412 if (!iF) iF = next_input_file();
2417 setvar_i(intvar[ERRNO], errno);
2425 L.i = awk_getline(X.rsm, R.v);
2428 incvar(intvar[FNR]);
2435 /* simple builtins */
2436 case XC( OC_FBLTIN ):
2444 R.d = (double)rand() / (double)RAND_MAX;
2446 #if ENABLE_FEATURE_AWK_MATH
2472 syntax_error(EMSG_NO_MATH);
2477 seed = op1 ? (unsigned)L.d : (unsigned)time(NULL);
2487 L.s = getvar_s(intvar[F0]);
2493 R.d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2494 ? (system(L.s) >> 8) : 0;
2502 X.rsm = newfile(L.s);
2511 X.rsm = (rstream *)hash_search(fdhash, L.s);
2513 R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
2514 free(X.rsm->buffer);
2515 hash_remove(fdhash, L.s);
2518 setvar_i(intvar[ERRNO], errno);
2525 case XC( OC_BUILTIN ):
2526 res = exec_builtin(op, res);
2529 case XC( OC_SPRINTF ):
2530 setvar_p(res, awk_printf(op1));
2533 case XC( OC_UNARY ):
2535 L.d = R.d = getvar_i(R.v);
2550 L.d = istrue(X.v) ? 0 : 1;
2561 case XC( OC_FIELD ):
2562 R.i = (int)getvar_i(R.v);
2569 res = &Fields[R.i - 1];
2573 /* concatenation (" ") and index joining (",") */
2574 case XC( OC_CONCAT ):
2575 case XC( OC_COMMA ):
2576 opn = strlen(L.s) + strlen(R.s) + 2;
2579 if ((opinfo & OPCLSMASK) == OC_COMMA) {
2580 L.s = getvar_s(intvar[SUBSEP]);
2581 X.s = xrealloc(X.s, opn + strlen(L.s));
2589 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2593 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2596 case XC( OC_BINARY ):
2597 case XC( OC_REPLACE ):
2598 R.d = getvar_i(R.v);
2610 if (R.d == 0) syntax_error(EMSG_DIV_BY_ZERO);
2614 #if ENABLE_FEATURE_AWK_MATH
2615 L.d = pow(L.d, R.d);
2617 syntax_error(EMSG_NO_MATH);
2621 if (R.d == 0) syntax_error(EMSG_DIV_BY_ZERO);
2622 L.d -= (int)(L.d / R.d) * R.d;
2625 res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2628 case XC( OC_COMPARE ):
2629 if (is_numeric(L.v) && is_numeric(R.v)) {
2630 L.d = getvar_i(L.v) - getvar_i(R.v);
2632 L.s = getvar_s(L.v);
2633 R.s = getvar_s(R.v);
2634 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2636 switch (opn & 0xfe) {
2647 setvar_i(res, (opn & 0x1 ? R.i : !R.i) ? 1 : 0);
2651 syntax_error(EMSG_POSSIBLE_ERROR);
2653 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2655 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2668 /* -------- main & co. -------- */
2670 static int awk_exit(int r)
2681 evaluate(endseq.first, &tv);
2684 /* waiting for children */
2685 for (i = 0; i < fdhash->csize; i++) {
2686 hi = fdhash->items[i];
2688 if (hi->data.rs.F && hi->data.rs.is_pipe)
2689 pclose(hi->data.rs.F);
2697 /* if expr looks like "var=value", perform assignment and return 1,
2698 * otherwise return 0 */
2699 static int is_assignment(const char *expr)
2701 char *exprc, *s, *s0, *s1;
2703 exprc = xstrdup(expr);
2704 if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2712 *(s1++) = nextchar(&s);
2715 setvar_u(newvar(exprc), s0);
2720 /* switch to next input file */
2721 static rstream *next_input_file(void)
2723 #define rsm (G.next_input_file__rsm)
2724 #define files_happen (G.next_input_file__files_happen)
2727 const char *fname, *ind;
2729 if (rsm.F) fclose(rsm.F);
2731 rsm.pos = rsm.adv = 0;
2734 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
2740 ind = getvar_s(incvar(intvar[ARGIND]));
2741 fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
2742 if (fname && *fname && !is_assignment(fname))
2743 F = afopen(fname, "r");
2747 files_happen = TRUE;
2748 setvar_s(intvar[FILENAME], fname);
2755 int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
2756 int awk_main(int argc, char **argv)
2759 char *opt_F, *opt_W;
2760 llist_t *opt_v = NULL;
2765 char *vnames = (char *)vNames; /* cheat */
2766 char *vvalues = (char *)vValues;
2770 /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
2771 * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
2772 if (ENABLE_LOCALE_SUPPORT)
2773 setlocale(LC_NUMERIC, "C");
2777 /* allocate global buffer */
2778 g_buf = xmalloc(MAXVARFMT + 1);
2780 vhash = hash_init();
2781 ahash = hash_init();
2782 fdhash = hash_init();
2783 fnhash = hash_init();
2785 /* initialize variables */
2786 for (i = 0; *vnames; i++) {
2787 intvar[i] = v = newvar(nextword(&vnames));
2788 if (*vvalues != '\377')
2789 setvar_s(v, nextword(&vvalues));
2793 if (*vnames == '*') {
2794 v->type |= VF_SPECIAL;
2799 handle_special(intvar[FS]);
2800 handle_special(intvar[RS]);
2802 newfile("/dev/stdin")->F = stdin;
2803 newfile("/dev/stdout")->F = stdout;
2804 newfile("/dev/stderr")->F = stderr;
2806 /* Huh, people report that sometimes environ is NULL. Oh well. */
2807 if (environ) for (envp = environ; *envp; envp++) {
2808 /* environ is writable, thus we don't strdup it needlessly */
2810 char *s1 = strchr(s, '=');
2813 /* Both findvar and setvar_u take const char*
2814 * as 2nd arg -> environment is not trashed */
2815 setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
2819 opt_complementary = "v::";
2820 opt = getopt32(argv, "F:v:f:W:", &opt_F, &opt_v, &g_progname, &opt_W);
2824 setvar_s(intvar[FS], opt_F); // -F
2825 while (opt_v) { /* -v */
2826 if (!is_assignment(llist_pop(&opt_v)))
2829 if (opt & 0x4) { // -f
2830 char *s = s; /* die, gcc, die */
2831 FILE *from_file = afopen(g_progname, "r");
2832 /* one byte is reserved for some trick in next_token */
2833 if (fseek(from_file, 0, SEEK_END) == 0) {
2834 flen = ftell(from_file);
2835 s = xmalloc(flen + 4);
2836 fseek(from_file, 0, SEEK_SET);
2837 i = 1 + fread(s + 1, 1, flen, from_file);
2839 for (i = j = 1; j > 0; i += j) {
2840 s = xrealloc(s, i + 4096);
2841 j = fread(s + i, 1, 4094, from_file);
2846 parse_program(s + 1);
2848 } else { // no -f: take program from 1st parameter
2851 g_progname = "cmd. line";
2852 parse_program(*argv++);
2855 if (opt & 0x8) // -W
2856 bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
2858 /* fill in ARGV array */
2859 setvar_i(intvar[ARGC], argc + 1);
2860 setari_u(intvar[ARGV], 0, "awk");
2863 setari_u(intvar[ARGV], ++i, *argv++);
2865 evaluate(beginseq.first, &tv);
2866 if (!mainseq.first && !endseq.first)
2867 awk_exit(EXIT_SUCCESS);
2869 /* input file could already be opened in BEGIN block */
2870 if (!iF) iF = next_input_file();
2872 /* passing through input files */
2875 setvar_i(intvar[FNR], 0);
2877 while ((i = awk_getline(iF, intvar[F0])) > 0) {
2880 incvar(intvar[FNR]);
2881 evaluate(mainseq.first, &tv);
2888 syntax_error(strerror(errno));
2890 iF = next_input_file();
2893 awk_exit(EXIT_SUCCESS);