1 /* vi: set sw=4 ts=4: */
3 * awk implementation for busybox
5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
7 * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
19 #define VF_NUMBER 0x0001 /* 1 = primary type is number */
20 #define VF_ARRAY 0x0002 /* 1 = it's an array */
22 #define VF_CACHED 0x0100 /* 1 = num/str value has cached str/num eq */
23 #define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
24 #define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
25 #define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
26 #define VF_FSTR 0x1000 /* 1 = string points to fstring buffer */
27 #define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
28 #define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
30 /* these flags are static, don't change them when value is changed */
31 #define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
34 typedef struct var_s {
35 unsigned short type; /* flags */
39 int aidx; /* func arg idx (for compilation stage) */
40 struct xhash_s *array; /* array ptr */
41 struct var_s *parent; /* for func args, ptr to actual parameter */
42 char **walker; /* list of array elements (for..in) */
46 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
47 typedef struct chain_s {
54 typedef struct func_s {
60 typedef struct rstream_s {
66 unsigned short is_pipe;
69 typedef struct hash_item_s {
71 struct var_s v; /* variable/array hash */
72 struct rstream_s rs; /* redirect streams hash */
73 struct func_s f; /* functions hash */
75 struct hash_item_s *next; /* next in chain */
76 char name[1]; /* really it's longer */
79 typedef struct xhash_s {
80 unsigned int nel; /* num of elements */
81 unsigned int csize; /* current hash size */
82 unsigned int nprime; /* next hash size in PRIMES[] */
83 unsigned int glen; /* summary length of item names */
84 struct hash_item_s **items;
88 typedef struct node_s {
90 unsigned short lineno;
109 /* Block of temporary variables */
110 typedef struct nvblock_s {
113 struct nvblock_s *prev;
114 struct nvblock_s *next;
118 typedef struct tsplitter_s {
123 /* simple token classes */
124 /* Order and hex values are very important!!! See next_token() */
125 #define TC_SEQSTART 1 /* ( */
126 #define TC_SEQTERM (1 << 1) /* ) */
127 #define TC_REGEXP (1 << 2) /* /.../ */
128 #define TC_OUTRDR (1 << 3) /* | > >> */
129 #define TC_UOPPOST (1 << 4) /* unary postfix operator */
130 #define TC_UOPPRE1 (1 << 5) /* unary prefix operator */
131 #define TC_BINOPX (1 << 6) /* two-opnd operator */
132 #define TC_IN (1 << 7)
133 #define TC_COMMA (1 << 8)
134 #define TC_PIPE (1 << 9) /* input redirection pipe */
135 #define TC_UOPPRE2 (1 << 10) /* unary prefix operator */
136 #define TC_ARRTERM (1 << 11) /* ] */
137 #define TC_GRPSTART (1 << 12) /* { */
138 #define TC_GRPTERM (1 << 13) /* } */
139 #define TC_SEMICOL (1 << 14)
140 #define TC_NEWLINE (1 << 15)
141 #define TC_STATX (1 << 16) /* ctl statement (for, next...) */
142 #define TC_WHILE (1 << 17)
143 #define TC_ELSE (1 << 18)
144 #define TC_BUILTIN (1 << 19)
145 #define TC_GETLINE (1 << 20)
146 #define TC_FUNCDECL (1 << 21) /* `function' `func' */
147 #define TC_BEGIN (1 << 22)
148 #define TC_END (1 << 23)
149 #define TC_EOF (1 << 24)
150 #define TC_VARIABLE (1 << 25)
151 #define TC_ARRAY (1 << 26)
152 #define TC_FUNCTION (1 << 27)
153 #define TC_STRING (1 << 28)
154 #define TC_NUMBER (1 << 29)
156 #define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
158 /* combined token classes */
159 #define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
160 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
161 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION | \
162 TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
164 #define TC_STATEMNT (TC_STATX | TC_WHILE)
165 #define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
167 /* word tokens, cannot mean something else if not expected */
168 #define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN | \
169 TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
171 /* discard newlines after these */
172 #define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM | \
173 TC_BINOP | TC_OPTERM)
175 /* what can expression begin with */
176 #define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
177 /* what can group begin with */
178 #define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
180 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
181 /* operator is inserted between them */
182 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM | \
183 TC_STRING | TC_NUMBER | TC_UOPPOST)
184 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
186 #define OF_RES1 0x010000
187 #define OF_RES2 0x020000
188 #define OF_STR1 0x040000
189 #define OF_STR2 0x080000
190 #define OF_NUM1 0x100000
191 #define OF_CHECKED 0x200000
193 /* combined operator flags */
196 #define xS (OF_RES2 | OF_STR2)
198 #define VV (OF_RES1 | OF_RES2)
199 #define Nx (OF_RES1 | OF_NUM1)
200 #define NV (OF_RES1 | OF_NUM1 | OF_RES2)
201 #define Sx (OF_RES1 | OF_STR1)
202 #define SV (OF_RES1 | OF_STR1 | OF_RES2)
203 #define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
205 #define OPCLSMASK 0xFF00
206 #define OPNMASK 0x007F
208 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
209 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
210 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
212 #define P(x) (x << 24)
213 #define PRIMASK 0x7F000000
214 #define PRIMASK2 0x7E000000
216 /* Operation classes */
218 #define SHIFT_TIL_THIS 0x0600
219 #define RECUR_FROM_THIS 0x1000
222 OC_DELETE=0x0100, OC_EXEC=0x0200, OC_NEWSOURCE=0x0300,
223 OC_PRINT=0x0400, OC_PRINTF=0x0500, OC_WALKINIT=0x0600,
225 OC_BR=0x0700, OC_BREAK=0x0800, OC_CONTINUE=0x0900,
226 OC_EXIT=0x0a00, OC_NEXT=0x0b00, OC_NEXTFILE=0x0c00,
227 OC_TEST=0x0d00, OC_WALKNEXT=0x0e00,
229 OC_BINARY=0x1000, OC_BUILTIN=0x1100, OC_COLON=0x1200,
230 OC_COMMA=0x1300, OC_COMPARE=0x1400, OC_CONCAT=0x1500,
231 OC_FBLTIN=0x1600, OC_FIELD=0x1700, OC_FNARG=0x1800,
232 OC_FUNC=0x1900, OC_GETLINE=0x1a00, OC_IN=0x1b00,
233 OC_LAND=0x1c00, OC_LOR=0x1d00, OC_MATCH=0x1e00,
234 OC_MOVE=0x1f00, OC_PGETLINE=0x2000, OC_REGEXP=0x2100,
235 OC_REPLACE=0x2200, OC_RETURN=0x2300, OC_SPRINTF=0x2400,
236 OC_TERNARY=0x2500, OC_UNARY=0x2600, OC_VAR=0x2700,
239 ST_IF=0x3000, ST_DO=0x3100, ST_FOR=0x3200,
243 /* simple builtins */
245 F_in=0, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
246 F_ti, F_le, F_sy, F_ff, F_cl
251 B_a2=0, B_ix, B_ma, B_sp, B_ss, B_ti, B_lo, B_up,
253 B_an, B_co, B_ls, B_or, B_rs, B_xo,
256 /* tokens and their corresponding info values */
258 #define NTC "\377" /* switch to next token class (tc<<1) */
261 #define OC_B OC_BUILTIN
263 static char * const tokenlist =
266 "\1/" NTC /* REGEXP */
267 "\2>>" "\1>" "\1|" NTC /* OUTRDR */
268 "\2++" "\2--" NTC /* UOPPOST */
269 "\2++" "\2--" "\1$" NTC /* UOPPRE1 */
270 "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */
271 "\2*=" "\2/=" "\2%=" "\2^="
272 "\1+" "\1-" "\3**=" "\2**"
273 "\1/" "\1%" "\1^" "\1*"
274 "\2!=" "\2>=" "\2<=" "\1>"
275 "\1<" "\2!~" "\1~" "\2&&"
276 "\2||" "\1?" "\1:" NTC
280 "\1+" "\1-" "\1!" NTC /* UOPPRE2 */
286 "\2if" "\2do" "\3for" "\5break" /* STATX */
287 "\10continue" "\6delete" "\5print"
288 "\6printf" "\4next" "\10nextfile"
289 "\6return" "\4exit" NTC
293 "\3and" "\5compl" "\6lshift" "\2or"
295 "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */
296 "\3cos" "\3exp" "\3int" "\3log"
297 "\4rand" "\3sin" "\4sqrt" "\5srand"
298 "\6gensub" "\4gsub" "\5index" "\6length"
299 "\5match" "\5split" "\7sprintf" "\3sub"
300 "\6substr" "\7systime" "\10strftime"
301 "\7tolower" "\7toupper" NTC
303 "\4func" "\10function" NTC
308 static const uint32_t tokeninfo[] = {
313 xS|'a', xS|'w', xS|'|',
314 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
315 OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M',
317 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74),
318 OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
319 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/',
320 OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
321 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-',
322 OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
323 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%',
324 OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
325 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3,
326 OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
327 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!',
328 OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
329 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?',
330 OC_COLON|xx|P(67)|':',
333 OC_PGETLINE|SV|P(37),
334 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-',
335 OC_UNARY|xV|P(19)|'!',
341 ST_IF, ST_DO, ST_FOR, OC_BREAK,
342 OC_CONTINUE, OC_DELETE|Vx, OC_PRINT,
343 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
344 OC_RETURN|Vx, OC_EXIT|Nx,
348 OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
349 OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
350 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
351 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
352 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
353 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
354 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
355 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b),
356 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
363 /* internal variable names and their initial values */
364 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
366 CONVFMT=0, OFMT, FS, OFS,
367 ORS, RS, RT, FILENAME,
368 SUBSEP, ARGIND, ARGC, ARGV,
371 ENVIRON, F0, _intvarcount_
374 static char * vNames =
375 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
376 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
377 "SUBSEP\0" "ARGIND\0" "ARGC\0" "ARGV\0"
379 "NR\0" "NF\0*" "IGNORECASE\0*"
380 "ENVIRON\0" "$\0*" "\0";
382 static char * vValues =
383 "%.6g\0" "%.6g\0" " \0" " \0"
384 "\n\0" "\n\0" "\0" "\0"
388 /* hash size may grow to these values */
389 #define FIRST_PRIME 61;
390 static const unsigned int PRIMES[] = { 251, 1021, 4093, 16381, 65521 };
391 enum { NPRIMES = sizeof(PRIMES) / sizeof(unsigned int) };
395 extern char **environ;
397 static var * V[_intvarcount_];
398 static chain beginseq, mainseq, endseq, *seq;
399 static int nextrec, nextfile;
400 static node *break_ptr, *continue_ptr;
402 static xhash *vhash, *ahash, *fdhash, *fnhash;
403 static char *programname;
405 static int is_f0_split;
408 static tsplitter fsplitter, rsplitter;
424 /* function prototypes */
425 static void handle_special(var *);
426 static node *parse_expr(uint32_t);
427 static void chain_group(void);
428 static var *evaluate(node *, var *);
429 static rstream *next_input_file(void);
430 static int fmt_num(char *, int, const char *, double, int);
431 static int awk_exit(int) ATTRIBUTE_NORETURN;
433 /* ---- error handling ---- */
435 static const char EMSG_INTERNAL_ERROR[] = "Internal error";
436 static const char EMSG_UNEXP_EOS[] = "Unexpected end of string";
437 static const char EMSG_UNEXP_TOKEN[] = "Unexpected token";
438 static const char EMSG_DIV_BY_ZERO[] = "Division by zero";
439 static const char EMSG_INV_FMT[] = "Invalid format specifier";
440 static const char EMSG_TOO_FEW_ARGS[] = "Too few arguments for builtin";
441 static const char EMSG_NOT_ARRAY[] = "Not an array";
442 static const char EMSG_POSSIBLE_ERROR[] = "Possible syntax error";
443 static const char EMSG_UNDEF_FUNC[] = "Call to undefined function";
444 #ifndef CONFIG_FEATURE_AWK_MATH
445 static const char EMSG_NO_MATH[] = "Math support is not compiled in";
448 static void syntax_error(const char * const message) ATTRIBUTE_NORETURN;
449 static void syntax_error(const char * const message)
451 bb_error_msg_and_die("%s:%i: %s", programname, lineno, message);
454 #define runtime_error(x) syntax_error(x)
457 /* ---- hash stuff ---- */
459 static unsigned int hashidx(const char *name)
463 while (*name) idx = *name++ + (idx << 6) - idx;
467 /* create new hash */
468 static xhash *hash_init(void)
472 newhash = xzalloc(sizeof(xhash));
473 newhash->csize = FIRST_PRIME;
474 newhash->items = xzalloc(newhash->csize * sizeof(hash_item *));
479 /* find item in hash, return ptr to data, NULL if not found */
480 static void *hash_search(xhash *hash, const char *name)
484 hi = hash->items [ hashidx(name) % hash->csize ];
486 if (strcmp(hi->name, name) == 0)
493 /* grow hash if it becomes too big */
494 static void hash_rebuild(xhash *hash)
496 unsigned int newsize, i, idx;
497 hash_item **newitems, *hi, *thi;
499 if (hash->nprime == NPRIMES)
502 newsize = PRIMES[hash->nprime++];
503 newitems = xzalloc(newsize * sizeof(hash_item *));
505 for (i=0; i<hash->csize; i++) {
510 idx = hashidx(thi->name) % newsize;
511 thi->next = newitems[idx];
517 hash->csize = newsize;
518 hash->items = newitems;
521 /* find item in hash, add it if necessary. Return ptr to data */
522 static void *hash_find(xhash *hash, const char *name)
528 hi = hash_search(hash, name);
530 if (++hash->nel / hash->csize > 10)
533 l = strlen(name) + 1;
534 hi = xzalloc(sizeof(hash_item) + l);
535 memcpy(hi->name, name, l);
537 idx = hashidx(name) % hash->csize;
538 hi->next = hash->items[idx];
539 hash->items[idx] = hi;
545 #define findvar(hash, name) (var *) hash_find ( (hash) , (name) )
546 #define newvar(name) (var *) hash_find ( vhash , (name) )
547 #define newfile(name) (rstream *) hash_find ( fdhash , (name) )
548 #define newfunc(name) (func *) hash_find ( fnhash , (name) )
550 static void hash_remove(xhash *hash, const char *name)
552 hash_item *hi, **phi;
554 phi = &(hash->items[ hashidx(name) % hash->csize ]);
557 if (strcmp(hi->name, name) == 0) {
558 hash->glen -= (strlen(name) + 1);
568 /* ------ some useful functions ------ */
570 static void skip_spaces(char **s)
574 while (*p == ' ' || *p == '\t' ||
575 (*p == '\\' && *(p+1) == '\n' && (++p, ++t.lineno))) {
581 static char *nextword(char **s)
590 static char nextchar(char **s)
596 if (c == '\\') c = bb_process_escape_sequence((const char**)s);
597 if (c == '\\' && *s == pps) c = *((*s)++);
601 static int ATTRIBUTE_ALWAYS_INLINE isalnum_(int c)
603 return (isalnum(c) || c == '_');
606 static FILE *afopen(const char *path, const char *mode)
608 return (*path == '-' && *(path+1) == '\0') ? stdin : xfopen(path, mode);
611 /* -------- working with variables (set/get/copy/etc) -------- */
613 static xhash *iamarray(var *v)
617 while (a->type & VF_CHILD)
620 if (! (a->type & VF_ARRAY)) {
622 a->x.array = hash_init();
627 static void clear_array(xhash *array)
632 for (i=0; i<array->csize; i++) {
633 hi = array->items[i];
637 free(thi->data.v.string);
640 array->items[i] = NULL;
642 array->glen = array->nel = 0;
645 /* clear a variable */
646 static var *clrvar(var *v)
648 if (!(v->type & VF_FSTR))
651 v->type &= VF_DONTTOUCH;
657 /* assign string value to variable */
658 static var *setvar_p(var *v, char *value)
667 /* same as setvar_p but make a copy of string */
668 static var *setvar_s(var *v, const char *value)
670 return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
673 /* same as setvar_s but set USER flag */
674 static var *setvar_u(var *v, const char *value)
681 /* set array element to user string */
682 static void setari_u(var *a, int idx, const char *s)
685 static char sidx[12];
687 sprintf(sidx, "%d", idx);
688 v = findvar(iamarray(a), sidx);
692 /* assign numeric value to variable */
693 static var *setvar_i(var *v, double value)
696 v->type |= VF_NUMBER;
702 static char *getvar_s(var *v)
704 /* if v is numeric and has no cached string, convert it to string */
705 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
706 fmt_num(buf, MAXVARFMT, getvar_s(V[CONVFMT]), v->number, TRUE);
707 v->string = xstrdup(buf);
708 v->type |= VF_CACHED;
710 return (v->string == NULL) ? "" : v->string;
713 static double getvar_i(var *v)
717 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
721 v->number = strtod(s, &s);
722 if (v->type & VF_USER) {
730 v->type |= VF_CACHED;
735 static var *copyvar(var *dest, const var *src)
739 dest->type |= (src->type & ~VF_DONTTOUCH);
740 dest->number = src->number;
742 dest->string = xstrdup(src->string);
744 handle_special(dest);
748 static var *incvar(var *v)
750 return setvar_i(v, getvar_i(v)+1.);
753 /* return true if v is number or numeric string */
754 static int is_numeric(var *v)
757 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
760 /* return 1 when value of v corresponds to true, 0 otherwise */
761 static int istrue(var *v)
764 return (v->number == 0) ? 0 : 1;
766 return (v->string && *(v->string)) ? 1 : 0;
769 /* temporary variables allocator. Last allocated should be first freed */
770 static var *nvalloc(int n)
778 if ((cb->pos - cb->nv) + n <= cb->size) break;
783 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
784 cb = xmalloc(sizeof(nvblock) + size * sizeof(var));
789 if (pb) pb->next = cb;
795 while (v < cb->pos) {
804 static void nvfree(var *v)
808 if (v < cb->nv || v >= cb->pos)
809 runtime_error(EMSG_INTERNAL_ERROR);
811 for (p=v; p<cb->pos; p++) {
812 if ((p->type & (VF_ARRAY|VF_CHILD)) == VF_ARRAY) {
813 clear_array(iamarray(p));
814 free(p->x.array->items);
817 if (p->type & VF_WALK)
824 while (cb->prev && cb->pos == cb->nv) {
829 /* ------- awk program text parsing ------- */
831 /* Parse next token pointed by global pos, place results into global t.
832 * If token isn't expected, give away. Return token class
834 static uint32_t next_token(uint32_t expected)
841 static int concat_inserted;
842 static uint32_t save_tclass, save_info;
843 static uint32_t ltclass = TC_OPTERM;
849 } else if (concat_inserted) {
851 concat_inserted = FALSE;
852 t.tclass = save_tclass;
863 while (*p != '\n' && *p != '\0') p++;
871 } else if (*p == '\"') {
875 if (*p == '\0' || *p == '\n')
876 syntax_error(EMSG_UNEXP_EOS);
877 *(s++) = nextchar(&p);
883 } else if ((expected & TC_REGEXP) && *p == '/') {
887 if (*p == '\0' || *p == '\n')
888 syntax_error(EMSG_UNEXP_EOS);
889 if ((*s++ = *p++) == '\\') {
891 *(s-1) = bb_process_escape_sequence((const char **)&p);
892 if (*pp == '\\') *s++ = '\\';
893 if (p == pp) *s++ = *p++;
900 } else if (*p == '.' || isdigit(*p)) {
902 t.number = strtod(p, &p);
904 syntax_error(EMSG_UNEXP_TOKEN);
908 /* search for something known */
918 /* if token class is expected, token
919 * matches and it's not a longer word,
920 * then this is what we are looking for
922 if ((tc & (expected | TC_WORD | TC_NEWLINE)) &&
923 *tl == *p && strncmp(p, tl, l) == 0 &&
924 !((tc & TC_WORD) && isalnum_(*(p + l)))) {
934 /* it's a name (var/array/function),
935 * otherwise it's something wrong
938 syntax_error(EMSG_UNEXP_TOKEN);
941 while (isalnum_(*(++p))) {
946 /* also consume whitespace between functionname and bracket */
947 if (! (expected & TC_VARIABLE)) skip_spaces(&p);
960 /* skipping newlines in some cases */
961 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
964 /* insert concatenation operator when needed */
965 if ((ltclass&TC_CONCAT1) && (tc&TC_CONCAT2) && (expected&TC_BINOP)) {
966 concat_inserted = TRUE;
970 t.info = OC_CONCAT | SS | P(35);
977 /* Are we ready for this? */
978 if (! (ltclass & expected))
979 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
980 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
985 static void rollback_token(void) { t.rollback = TRUE; }
987 static node *new_node(uint32_t info)
991 n = xzalloc(sizeof(node));
997 static node *mk_re_node(char *s, node *n, regex_t *re)
1002 xregcomp(re, s, REG_EXTENDED);
1003 xregcomp(re+1, s, REG_EXTENDED | REG_ICASE);
1008 static node *condition(void)
1010 next_token(TC_SEQSTART);
1011 return parse_expr(TC_SEQTERM);
1014 /* parse expression terminated by given argument, return ptr
1015 * to built subtree. Terminator is eaten by parse_expr */
1016 static node *parse_expr(uint32_t iexp)
1025 sn.r.n = glptr = NULL;
1026 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1028 while (! ((tc = next_token(xtc)) & iexp)) {
1029 if (glptr && (t.info == (OC_COMPARE|VV|P(39)|2))) {
1030 /* input redirection (<) attached to glptr node */
1031 cn = glptr->l.n = new_node(OC_CONCAT|SS|P(37));
1033 xtc = TC_OPERAND | TC_UOPPRE;
1036 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1037 /* for binary and postfix-unary operators, jump back over
1038 * previous operators with higher priority */
1040 while ( ((t.info & PRIMASK) > (vn->a.n->info & PRIMASK2)) ||
1041 ((t.info == vn->info) && ((t.info & OPCLSMASK) == OC_COLON)) )
1043 if ((t.info & OPCLSMASK) == OC_TERNARY)
1045 cn = vn->a.n->r.n = new_node(t.info);
1047 if (tc & TC_BINOP) {
1049 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1050 if ((t.info & OPCLSMASK) == OC_PGETLINE) {
1052 next_token(TC_GETLINE);
1053 /* give maximum priority to this pipe */
1054 cn->info &= ~PRIMASK;
1055 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1059 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1064 /* for operands and prefix-unary operators, attach them
1067 cn = vn->r.n = new_node(t.info);
1069 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1070 if (tc & (TC_OPERAND | TC_REGEXP)) {
1071 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1072 /* one should be very careful with switch on tclass -
1073 * only simple tclasses should be used! */
1078 if ((v = hash_search(ahash, t.string)) != NULL) {
1079 cn->info = OC_FNARG;
1080 cn->l.i = v->x.aidx;
1082 cn->l.v = newvar(t.string);
1084 if (tc & TC_ARRAY) {
1086 cn->r.n = parse_expr(TC_ARRTERM);
1093 v = cn->l.v = xzalloc(sizeof(var));
1095 setvar_i(v, t.number);
1097 setvar_s(v, t.string);
1101 mk_re_node(t.string, cn, xzalloc(sizeof(regex_t)*2));
1106 cn->r.f = newfunc(t.string);
1107 cn->l.n = condition();
1111 cn = vn->r.n = parse_expr(TC_SEQTERM);
1117 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1121 cn->l.n = condition();
1130 /* add node to chain. Return ptr to alloc'd node */
1131 static node *chain_node(uint32_t info)
1136 seq->first = seq->last = new_node(0);
1138 if (seq->programname != programname) {
1139 seq->programname = programname;
1140 n = chain_node(OC_NEWSOURCE);
1141 n->l.s = xstrdup(programname);
1146 seq->last = n->a.n = new_node(OC_DONE);
1151 static void chain_expr(uint32_t info)
1155 n = chain_node(info);
1156 n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1157 if (t.tclass & TC_GRPTERM)
1161 static node *chain_loop(node *nn)
1163 node *n, *n2, *save_brk, *save_cont;
1165 save_brk = break_ptr;
1166 save_cont = continue_ptr;
1168 n = chain_node(OC_BR | Vx);
1169 continue_ptr = new_node(OC_EXEC);
1170 break_ptr = new_node(OC_EXEC);
1172 n2 = chain_node(OC_EXEC | Vx);
1175 continue_ptr->a.n = n2;
1176 break_ptr->a.n = n->r.n = seq->last;
1178 continue_ptr = save_cont;
1179 break_ptr = save_brk;
1184 /* parse group and attach it to chain */
1185 static void chain_group(void)
1191 c = next_token(TC_GRPSEQ);
1192 } while (c & TC_NEWLINE);
1194 if (c & TC_GRPSTART) {
1195 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1196 if (t.tclass & TC_NEWLINE) continue;
1200 } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1202 chain_expr(OC_EXEC | Vx);
1203 } else { /* TC_STATEMNT */
1204 switch (t.info & OPCLSMASK) {
1206 n = chain_node(OC_BR | Vx);
1207 n->l.n = condition();
1209 n2 = chain_node(OC_EXEC);
1211 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE)==TC_ELSE) {
1213 n2->a.n = seq->last;
1221 n = chain_loop(NULL);
1226 n2 = chain_node(OC_EXEC);
1227 n = chain_loop(NULL);
1229 next_token(TC_WHILE);
1230 n->l.n = condition();
1234 next_token(TC_SEQSTART);
1235 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1236 if (t.tclass & TC_SEQTERM) { /* for-in */
1237 if ((n2->info & OPCLSMASK) != OC_IN)
1238 syntax_error(EMSG_UNEXP_TOKEN);
1239 n = chain_node(OC_WALKINIT | VV);
1242 n = chain_loop(NULL);
1243 n->info = OC_WALKNEXT | Vx;
1245 } else { /* for (;;) */
1246 n = chain_node(OC_EXEC | Vx);
1248 n2 = parse_expr(TC_SEMICOL);
1249 n3 = parse_expr(TC_SEQTERM);
1259 n = chain_node(t.info);
1260 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1261 if (t.tclass & TC_OUTRDR) {
1263 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1265 if (t.tclass & TC_GRPTERM)
1270 n = chain_node(OC_EXEC);
1275 n = chain_node(OC_EXEC);
1276 n->a.n = continue_ptr;
1279 /* delete, next, nextfile, return, exit */
1286 static void parse_program(char *p)
1295 while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1296 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1298 if (tclass & TC_OPTERM)
1302 if (tclass & TC_BEGIN) {
1306 } else if (tclass & TC_END) {
1310 } else if (tclass & TC_FUNCDECL) {
1311 next_token(TC_FUNCTION);
1313 f = newfunc(t.string);
1314 f->body.first = NULL;
1316 while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1317 v = findvar(ahash, t.string);
1318 v->x.aidx = (f->nargs)++;
1320 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1327 } else if (tclass & TC_OPSEQ) {
1329 cn = chain_node(OC_TEST);
1330 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1331 if (t.tclass & TC_GRPSTART) {
1335 chain_node(OC_PRINT);
1337 cn->r.n = mainseq.last;
1339 } else /* if (tclass & TC_GRPSTART) */ {
1347 /* -------- program execution part -------- */
1349 static node *mk_splitter(char *s, tsplitter *spl)
1357 if ((n->info & OPCLSMASK) == OC_REGEXP) {
1361 if (strlen(s) > 1) {
1362 mk_re_node(s, n, re);
1364 n->info = (uint32_t) *s;
1370 /* use node as a regular expression. Supplied with node ptr and regex_t
1371 * storage space. Return ptr to regex (if result points to preg, it should
1372 * be later regfree'd manually
1374 static regex_t *as_regex(node *op, regex_t *preg)
1379 if ((op->info & OPCLSMASK) == OC_REGEXP) {
1380 return icase ? op->r.ire : op->l.re;
1383 s = getvar_s(evaluate(op, v));
1384 xregcomp(preg, s, icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED);
1390 /* gradually increasing buffer */
1391 static void qrealloc(char **b, int n, int *size)
1393 if (! *b || n >= *size)
1394 *b = xrealloc(*b, *size = n + (n>>1) + 80);
1397 /* resize field storage space */
1398 static void fsrealloc(int size)
1400 static int maxfields = 0;
1403 if (size >= maxfields) {
1405 maxfields = size + 16;
1406 Fields = (var *)xrealloc(Fields, maxfields * sizeof(var));
1407 for (; i<maxfields; i++) {
1408 Fields[i].type = VF_SPECIAL;
1409 Fields[i].string = NULL;
1413 if (size < nfields) {
1414 for (i=size; i<nfields; i++) {
1421 static int awk_split(char *s, node *spl, char **slist)
1426 regmatch_t pmatch[2];
1428 /* in worst case, each char would be a separate field */
1429 *slist = s1 = xstrndup(s, strlen(s) * 2 + 3);
1431 c[0] = c[1] = (char)spl->info;
1433 if (*getvar_s(V[RS]) == '\0') c[2] = '\n';
1435 if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */
1437 l = strcspn(s, c+2);
1438 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0 &&
1439 pmatch[0].rm_so <= l) {
1440 l = pmatch[0].rm_so;
1441 if (pmatch[0].rm_eo == 0) { l++; pmatch[0].rm_eo++; }
1443 pmatch[0].rm_eo = l;
1444 if (*(s+l)) pmatch[0].rm_eo++;
1450 s += pmatch[0].rm_eo;
1453 } else if (c[0] == '\0') { /* null split */
1459 } else if (c[0] != ' ') { /* single-character split */
1461 c[0] = toupper(c[0]);
1462 c[1] = tolower(c[1]);
1465 while ((s1 = strpbrk(s1, c))) {
1469 } else { /* space split */
1471 s = skip_whitespace(s);
1474 while (*s && !isspace(*s))
1482 static void split_f0(void)
1484 static char *fstrings = NULL;
1494 n = awk_split(getvar_s(V[F0]), &fsplitter.n, &fstrings);
1497 for (i=0; i<n; i++) {
1498 Fields[i].string = nextword(&s);
1499 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1502 /* set NF manually to avoid side effects */
1504 V[NF]->type = VF_NUMBER | VF_SPECIAL;
1505 V[NF]->number = nfields;
1508 /* perform additional actions when some internal variables changed */
1509 static void handle_special(var *v)
1513 int sl, l, len, i, bsize;
1515 if (! (v->type & VF_SPECIAL))
1519 n = (int)getvar_i(v);
1522 /* recalculate $0 */
1523 sep = getvar_s(V[OFS]);
1527 for (i=0; i<n; i++) {
1528 s = getvar_s(&Fields[i]);
1531 memcpy(b+len, sep, sl);
1534 qrealloc(&b, len+l+sl, &bsize);
1535 memcpy(b+len, s, l);
1538 if (b) b[len] = '\0';
1542 } else if (v == V[F0]) {
1543 is_f0_split = FALSE;
1545 } else if (v == V[FS]) {
1546 mk_splitter(getvar_s(v), &fsplitter);
1548 } else if (v == V[RS]) {
1549 mk_splitter(getvar_s(v), &rsplitter);
1551 } else if (v == V[IGNORECASE]) {
1555 n = getvar_i(V[NF]);
1556 setvar_i(V[NF], n > v-Fields ? n : v-Fields+1);
1557 /* right here v is invalid. Just to note... */
1561 /* step through func/builtin/etc arguments */
1562 static node *nextarg(node **pn)
1567 if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1576 static void hashwalk_init(var *v, xhash *array)
1582 if (v->type & VF_WALK)
1586 w = v->x.walker = xzalloc(2 + 2*sizeof(char *) + array->glen);
1587 *w = *(w+1) = (char *)(w + 2);
1588 for (i=0; i<array->csize; i++) {
1589 hi = array->items[i];
1591 strcpy(*w, hi->name);
1598 static int hashwalk_next(var *v)
1606 setvar_s(v, nextword(w+1));
1610 /* evaluate node, return 1 when result is true, 0 otherwise */
1611 static int ptest(node *pattern)
1614 return istrue(evaluate(pattern, &v));
1617 /* read next record from stream rsm into a variable v */
1618 static int awk_getline(rstream *rsm, var *v)
1621 regmatch_t pmatch[2];
1622 int a, p, pp=0, size;
1623 int fd, so, eo, r, rp;
1626 /* we're using our own buffer since we need access to accumulating
1629 fd = fileno(rsm->F);
1634 c = (char) rsplitter.n.info;
1637 if (! m) qrealloc(&m, 256, &size);
1643 if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1644 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1645 b, 1, pmatch, 0) == 0) {
1646 so = pmatch[0].rm_so;
1647 eo = pmatch[0].rm_eo;
1651 } else if (c != '\0') {
1652 s = strchr(b+pp, c);
1653 if (! s) s = memchr(b+pp, '\0', p - pp);
1660 while (b[rp] == '\n')
1662 s = strstr(b+rp, "\n\n");
1665 while (b[eo] == '\n') eo++;
1673 memmove(m, (const void *)(m+a), p+1);
1678 qrealloc(&m, a+p+128, &size);
1681 p += safe_read(fd, b+p, size-p-1);
1685 setvar_i(V[ERRNO], errno);
1694 c = b[so]; b[so] = '\0';
1698 c = b[eo]; b[eo] = '\0';
1699 setvar_s(V[RT], b+so);
1711 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1715 const char *s=format;
1717 if (int_as_int && n == (int)n) {
1718 r = snprintf(b, size, "%d", (int)n);
1720 do { c = *s; } while (*s && *++s);
1721 if (strchr("diouxX", c)) {
1722 r = snprintf(b, size, format, (int)n);
1723 } else if (strchr("eEfgG", c)) {
1724 r = snprintf(b, size, format, n);
1726 runtime_error(EMSG_INV_FMT);
1733 /* formatted output into an allocated buffer, return ptr to buffer */
1734 static char *awk_printf(node *n)
1737 char *fmt, *s, *s1, *f;
1738 int i, j, incr, bsize;
1743 fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1748 while (*f && (*f != '%' || *(++f) == '%'))
1750 while (*f && !isalpha(*f))
1753 incr = (f - s) + MAXVARFMT;
1754 qrealloc(&b, incr+i, &bsize);
1755 c = *f; if (c != '\0') f++;
1756 c1 = *f ; *f = '\0';
1757 arg = evaluate(nextarg(&n), v);
1760 if (c == 'c' || !c) {
1761 i += sprintf(b+i, s,
1762 is_numeric(arg) ? (char)getvar_i(arg) : *getvar_s(arg));
1764 } else if (c == 's') {
1766 qrealloc(&b, incr+i+strlen(s1), &bsize);
1767 i += sprintf(b+i, s, s1);
1770 i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1774 /* if there was an error while sprintf, return value is negative */
1779 b = xrealloc(b, i+1);
1786 /* common substitution routine
1787 * replace (nm) substring of (src) that match (n) with (repl), store
1788 * result into (dest), return number of substitutions. If nm=0, replace
1789 * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1790 * subexpression matching (\1-\9)
1792 static int awk_sub(node *rn, char *repl, int nm, var *src, var *dest, int ex)
1796 int c, i, j, di, rl, so, eo, nbs, n, dssize;
1797 regmatch_t pmatch[10];
1800 re = as_regex(rn, &sreg);
1801 if (! src) src = V[F0];
1802 if (! dest) dest = V[F0];
1807 while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0:REG_NOTBOL) == 0) {
1808 so = pmatch[0].rm_so;
1809 eo = pmatch[0].rm_eo;
1811 qrealloc(&ds, di + eo + rl, &dssize);
1812 memcpy(ds + di, sp, eo);
1818 for (s = repl; *s; s++) {
1824 if (c == '&' || (ex && c >= '0' && c <= '9')) {
1825 di -= ((nbs + 3) >> 1);
1834 n = pmatch[j].rm_eo - pmatch[j].rm_so;
1835 qrealloc(&ds, di + rl + n, &dssize);
1836 memcpy(ds + di, sp + pmatch[j].rm_so, n);
1847 if (! (ds[di++] = *sp++)) break;
1851 qrealloc(&ds, di + strlen(sp), &dssize);
1852 strcpy(ds + di, sp);
1854 if (re == &sreg) regfree(re);
1858 static var *exec_builtin(node *op, var *res)
1865 regmatch_t pmatch[2];
1867 static tsplitter tspl;
1876 isr = info = op->info;
1879 av[2] = av[3] = NULL;
1880 for (i=0 ; i<4 && op ; i++) {
1881 an[i] = nextarg(&op);
1882 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
1883 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
1888 if (nargs < (info >> 30))
1889 runtime_error(EMSG_TOO_FEW_ARGS);
1891 switch (info & OPNMASK) {
1894 #ifdef CONFIG_FEATURE_AWK_MATH
1895 setvar_i(res, atan2(getvar_i(av[i]), getvar_i(av[1])));
1897 runtime_error(EMSG_NO_MATH);
1903 spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
1904 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
1909 n = awk_split(as[0], spl, &s);
1911 clear_array(iamarray(av[1]));
1912 for (i=1; i<=n; i++)
1913 setari_u(av[1], i, nextword(&s1));
1920 i = getvar_i(av[1]) - 1;
1921 if (i>l) i=l; if (i<0) i=0;
1922 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
1925 strncpy(s, as[0]+i, n);
1931 setvar_i(res, (long)getvar_i(av[0]) & (long)getvar_i(av[1]));
1935 setvar_i(res, ~(long)getvar_i(av[0]));
1939 setvar_i(res, (long)getvar_i(av[0]) << (long)getvar_i(av[1]));
1943 setvar_i(res, (long)getvar_i(av[0]) | (long)getvar_i(av[1]));
1947 setvar_i(res, (long)((unsigned long)getvar_i(av[0]) >> (unsigned long)getvar_i(av[1])));
1951 setvar_i(res, (long)getvar_i(av[0]) ^ (long)getvar_i(av[1]));
1961 s1 = s = xstrdup(as[0]);
1963 *s1 = (*to_xxx)(*s1);
1972 l = strlen(as[0]) - ll;
1973 if (ll > 0 && l >= 0) {
1975 s = strstr(as[0], as[1]);
1976 if (s) n = (s - as[0]) + 1;
1978 /* this piece of code is terribly slow and
1979 * really should be rewritten
1981 for (i=0; i<=l; i++) {
1982 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
1994 tt = getvar_i(av[1]);
1997 s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
1998 i = strftime(buf, MAXVARFMT, s, localtime(&tt));
2004 re = as_regex(an[1], &sreg);
2005 n = regexec(re, as[0], 1, pmatch, 0);
2010 pmatch[0].rm_so = 0;
2011 pmatch[0].rm_eo = -1;
2013 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2014 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2015 setvar_i(res, pmatch[0].rm_so);
2016 if (re == &sreg) regfree(re);
2020 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2024 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2028 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2037 * Evaluate node - the heart of the program. Supplied with subtree
2038 * and place where to store result. returns ptr to result.
2040 #define XC(n) ((n) >> 8)
2042 static var *evaluate(node *op, var *res)
2044 /* This procedure is recursive so we should count every byte */
2045 static var *fnargs = NULL;
2046 static unsigned int seed = 1;
2047 static regex_t sreg;
2068 return setvar_s(res, NULL);
2075 opn = (short)(opinfo & OPNMASK);
2076 lineno = op->lineno;
2078 /* execute inevitable things */
2080 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2081 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2082 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2083 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2084 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2086 switch (XC(opinfo & OPCLSMASK)) {
2088 /* -- iterative node type -- */
2092 if ((op1->info & OPCLSMASK) == OC_COMMA) {
2093 /* it's range pattern */
2094 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2095 op->info |= OF_CHECKED;
2096 if (ptest(op1->r.n))
2097 op->info &= ~OF_CHECKED;
2104 op = (ptest(op1)) ? op->a.n : op->r.n;
2108 /* just evaluate an expression, also used as unconditional jump */
2112 /* branch, used in if-else and various loops */
2114 op = istrue(L.v) ? op->a.n : op->r.n;
2117 /* initialize for-in loop */
2118 case XC( OC_WALKINIT ):
2119 hashwalk_init(L.v, iamarray(R.v));
2122 /* get next array item */
2123 case XC( OC_WALKNEXT ):
2124 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2127 case XC( OC_PRINT ):
2128 case XC( OC_PRINTF ):
2131 X.rsm = newfile(R.s);
2134 if((X.rsm->F = popen(R.s, "w")) == NULL)
2135 bb_perror_msg_and_die("popen");
2138 X.rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2144 if ((opinfo & OPCLSMASK) == OC_PRINT) {
2146 fputs(getvar_s(V[F0]), X.F);
2149 L.v = evaluate(nextarg(&op1), v1);
2150 if (L.v->type & VF_NUMBER) {
2151 fmt_num(buf, MAXVARFMT, getvar_s(V[OFMT]),
2152 getvar_i(L.v), TRUE);
2155 fputs(getvar_s(L.v), X.F);
2158 if (op1) fputs(getvar_s(V[OFS]), X.F);
2161 fputs(getvar_s(V[ORS]), X.F);
2163 } else { /* OC_PRINTF */
2164 L.s = awk_printf(op1);
2171 case XC( OC_DELETE ):
2172 X.info = op1->info & OPCLSMASK;
2173 if (X.info == OC_VAR) {
2175 } else if (X.info == OC_FNARG) {
2176 R.v = &fnargs[op1->l.i];
2178 runtime_error(EMSG_NOT_ARRAY);
2183 L.s = getvar_s(evaluate(op1->r.n, v1));
2184 hash_remove(iamarray(R.v), L.s);
2186 clear_array(iamarray(R.v));
2190 case XC( OC_NEWSOURCE ):
2191 programname = op->l.s;
2194 case XC( OC_RETURN ):
2198 case XC( OC_NEXTFILE ):
2209 /* -- recursive node type -- */
2217 case XC( OC_FNARG ):
2218 L.v = &fnargs[op->l.i];
2221 res = (op->r.n) ? findvar(iamarray(L.v), R.s) : L.v;
2225 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2228 case XC( OC_REGEXP ):
2230 L.s = getvar_s(V[F0]);
2233 case XC( OC_MATCH ):
2236 X.re = as_regex(op1, &sreg);
2237 R.i = regexec(X.re, L.s, 0, NULL, 0);
2238 if (X.re == &sreg) regfree(X.re);
2239 setvar_i(res, (R.i == 0 ? 1 : 0) ^ (opn == '!' ? 1 : 0));
2243 /* if source is a temporary string, jusk relink it to dest */
2244 if (R.v == v1+1 && R.v->string) {
2245 res = setvar_p(L.v, R.v->string);
2248 res = copyvar(L.v, R.v);
2252 case XC( OC_TERNARY ):
2253 if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2254 runtime_error(EMSG_POSSIBLE_ERROR);
2255 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2259 if (! op->r.f->body.first)
2260 runtime_error(EMSG_UNDEF_FUNC);
2262 X.v = R.v = nvalloc(op->r.f->nargs+1);
2264 L.v = evaluate(nextarg(&op1), v1);
2266 R.v->type |= VF_CHILD;
2267 R.v->x.parent = L.v;
2268 if (++R.v - X.v >= op->r.f->nargs)
2276 res = evaluate(op->r.f->body.first, res);
2283 case XC( OC_GETLINE ):
2284 case XC( OC_PGETLINE ):
2286 X.rsm = newfile(L.s);
2288 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2289 X.rsm->F = popen(L.s, "r");
2290 X.rsm->is_pipe = TRUE;
2292 X.rsm->F = fopen(L.s, "r"); /* not xfopen! */
2296 if (! iF) iF = next_input_file();
2301 setvar_i(V[ERRNO], errno);
2309 L.i = awk_getline(X.rsm, R.v);
2319 /* simple builtins */
2320 case XC( OC_FBLTIN ):
2328 R.d = (double)rand() / (double)RAND_MAX;
2331 #ifdef CONFIG_FEATURE_AWK_MATH
2357 runtime_error(EMSG_NO_MATH);
2363 seed = op1 ? (unsigned int)L.d : (unsigned int)time(NULL);
2373 L.s = getvar_s(V[F0]);
2379 R.d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2380 ? (system(L.s) >> 8) : 0;
2388 X.rsm = newfile(L.s);
2397 X.rsm = (rstream *)hash_search(fdhash, L.s);
2399 R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
2400 free(X.rsm->buffer);
2401 hash_remove(fdhash, L.s);
2404 setvar_i(V[ERRNO], errno);
2411 case XC( OC_BUILTIN ):
2412 res = exec_builtin(op, res);
2415 case XC( OC_SPRINTF ):
2416 setvar_p(res, awk_printf(op1));
2419 case XC( OC_UNARY ):
2421 L.d = R.d = getvar_i(R.v);
2436 L.d = istrue(X.v) ? 0 : 1;
2447 case XC( OC_FIELD ):
2448 R.i = (int)getvar_i(R.v);
2456 res = &Fields[R.i-1];
2460 /* concatenation (" ") and index joining (",") */
2461 case XC( OC_CONCAT ):
2462 case XC( OC_COMMA ):
2463 opn = strlen(L.s) + strlen(R.s) + 2;
2466 if ((opinfo & OPCLSMASK) == OC_COMMA) {
2467 L.s = getvar_s(V[SUBSEP]);
2468 X.s = (char *)xrealloc(X.s, opn + strlen(L.s));
2476 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2480 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2483 case XC( OC_BINARY ):
2484 case XC( OC_REPLACE ):
2485 R.d = getvar_i(R.v);
2497 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2501 #ifdef CONFIG_FEATURE_AWK_MATH
2502 L.d = pow(L.d, R.d);
2504 runtime_error(EMSG_NO_MATH);
2508 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2509 L.d -= (int)(L.d / R.d) * R.d;
2512 res = setvar_i(((opinfo&OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2515 case XC( OC_COMPARE ):
2516 if (is_numeric(L.v) && is_numeric(R.v)) {
2517 L.d = getvar_i(L.v) - getvar_i(R.v);
2519 L.s = getvar_s(L.v);
2520 R.s = getvar_s(R.v);
2521 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2523 switch (opn & 0xfe) {
2534 setvar_i(res, (opn & 0x1 ? R.i : !R.i) ? 1 : 0);
2538 runtime_error(EMSG_POSSIBLE_ERROR);
2540 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2542 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2552 /* -------- main & co. -------- */
2554 static int awk_exit(int r)
2563 evaluate(endseq.first, &tv);
2566 /* waiting for children */
2567 for (i=0; i<fdhash->csize; i++) {
2568 hi = fdhash->items[i];
2570 if (hi->data.rs.F && hi->data.rs.is_pipe)
2571 pclose(hi->data.rs.F);
2579 /* if expr looks like "var=value", perform assignment and return 1,
2580 * otherwise return 0 */
2581 static int is_assignment(const char *expr)
2583 char *exprc, *s, *s0, *s1;
2585 exprc = xstrdup(expr);
2586 if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2594 *(s1++) = nextchar(&s);
2597 setvar_u(newvar(exprc), s0);
2602 /* switch to next input file */
2603 static rstream *next_input_file(void)
2608 static int files_happen = FALSE;
2610 if (rsm.F) fclose(rsm.F);
2612 rsm.pos = rsm.adv = 0;
2615 if (getvar_i(V[ARGIND])+1 >= getvar_i(V[ARGC])) {
2621 ind = getvar_s(incvar(V[ARGIND]));
2622 fname = getvar_s(findvar(iamarray(V[ARGV]), ind));
2623 if (fname && *fname && !is_assignment(fname))
2624 F = afopen(fname, "r");
2628 files_happen = TRUE;
2629 setvar_s(V[FILENAME], fname);
2634 int awk_main(int argc, char **argv)
2637 char *opt_F, *opt_v, *opt_W;
2643 static int from_file = FALSE;
2645 FILE *F, *stdfiles[3];
2646 static char * stdnames = "/dev/stdin\0/dev/stdout\0/dev/stderr";
2648 /* allocate global buffer */
2649 buf = xmalloc(MAXVARFMT+1);
2651 vhash = hash_init();
2652 ahash = hash_init();
2653 fdhash = hash_init();
2654 fnhash = hash_init();
2656 /* initialize variables */
2657 for (i=0; *vNames; i++) {
2658 V[i] = v = newvar(nextword(&vNames));
2659 if (*vValues != '\377')
2660 setvar_s(v, nextword(&vValues));
2664 if (*vNames == '*') {
2665 v->type |= VF_SPECIAL;
2670 handle_special(V[FS]);
2671 handle_special(V[RS]);
2673 stdfiles[0] = stdin;
2674 stdfiles[1] = stdout;
2675 stdfiles[2] = stderr;
2676 for (i=0; i<3; i++) {
2677 rsm = newfile(nextword(&stdnames));
2678 rsm->F = stdfiles[i];
2681 for (envp=environ; *envp; envp++) {
2683 s1 = strchr(s, '=');
2688 setvar_u(findvar(iamarray(V[ENVIRON]), s), s1);
2693 opt = getopt32(argc, argv, "F:v:f:W:", &opt_F, &opt_v, &programname, &opt_W);
2694 if (opt & 0x1) setvar_s(V[FS], opt_F); // -F
2695 if (opt & 0x2) if (!is_assignment(opt_v)) bb_show_usage(); // -v
2696 if (opt & 0x4) { // -f
2698 F = afopen(programname, "r");
2700 /* one byte is reserved for some trick in next_token */
2701 if (fseek(F, 0, SEEK_END) == 0) {
2703 s = xmalloc(flen+4);
2704 fseek(F, 0, SEEK_SET);
2705 i = 1 + fread(s+1, 1, flen, F);
2707 for (i=j=1; j>0; i+=j) {
2708 s = (char *)xrealloc(s, i+4096);
2709 j = fread(s+i, 1, 4094, F);
2717 if (opt & 0x8) // -W
2718 bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
2723 programname = "cmd. line";
2724 parse_program(argv[optind++]);
2728 /* fill in ARGV array */
2729 setvar_i(V[ARGC], argc - optind + 1);
2730 setari_u(V[ARGV], 0, "awk");
2731 for (i = optind; i < argc; i++)
2732 setari_u(V[ARGV], i+1-optind, argv[i]);
2734 evaluate(beginseq.first, &tv);
2735 if (! mainseq.first && ! endseq.first)
2736 awk_exit(EXIT_SUCCESS);
2738 /* input file could already be opened in BEGIN block */
2739 if (! iF) iF = next_input_file();
2741 /* passing through input files */
2745 setvar_i(V[FNR], 0);
2747 while ((c = awk_getline(iF, V[F0])) > 0) {
2752 evaluate(mainseq.first, &tv);
2759 runtime_error(strerror(errno));
2761 iF = next_input_file();
2765 awk_exit(EXIT_SUCCESS);