1 /* vi: set sw=4 ts=4: */
3 * awk implementation for busybox
5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
7 * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
19 #define VF_NUMBER 0x0001 /* 1 = primary type is number */
20 #define VF_ARRAY 0x0002 /* 1 = it's an array */
22 #define VF_CACHED 0x0100 /* 1 = num/str value has cached str/num eq */
23 #define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
24 #define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
25 #define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
26 #define VF_FSTR 0x1000 /* 1 = string points to fstring buffer */
27 #define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
28 #define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
30 /* these flags are static, don't change them when value is changed */
31 #define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
34 typedef struct var_s {
35 unsigned short type; /* flags */
39 int aidx; /* func arg idx (for compilation stage) */
40 struct xhash_s *array; /* array ptr */
41 struct var_s *parent; /* for func args, ptr to actual parameter */
42 char **walker; /* list of array elements (for..in) */
46 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
47 typedef struct chain_s {
54 typedef struct func_s {
60 typedef struct rstream_s {
66 unsigned short is_pipe;
69 typedef struct hash_item_s {
71 struct var_s v; /* variable/array hash */
72 struct rstream_s rs; /* redirect streams hash */
73 struct func_s f; /* functions hash */
75 struct hash_item_s *next; /* next in chain */
76 char name[1]; /* really it's longer */
79 typedef struct xhash_s {
80 unsigned int nel; /* num of elements */
81 unsigned int csize; /* current hash size */
82 unsigned int nprime; /* next hash size in PRIMES[] */
83 unsigned int glen; /* summary length of item names */
84 struct hash_item_s **items;
88 typedef struct node_s {
90 unsigned short lineno;
109 /* Block of temporary variables */
110 typedef struct nvblock_s {
113 struct nvblock_s *prev;
114 struct nvblock_s *next;
118 typedef struct tsplitter_s {
123 /* simple token classes */
124 /* Order and hex values are very important!!! See next_token() */
125 #define TC_SEQSTART 1 /* ( */
126 #define TC_SEQTERM (1 << 1) /* ) */
127 #define TC_REGEXP (1 << 2) /* /.../ */
128 #define TC_OUTRDR (1 << 3) /* | > >> */
129 #define TC_UOPPOST (1 << 4) /* unary postfix operator */
130 #define TC_UOPPRE1 (1 << 5) /* unary prefix operator */
131 #define TC_BINOPX (1 << 6) /* two-opnd operator */
132 #define TC_IN (1 << 7)
133 #define TC_COMMA (1 << 8)
134 #define TC_PIPE (1 << 9) /* input redirection pipe */
135 #define TC_UOPPRE2 (1 << 10) /* unary prefix operator */
136 #define TC_ARRTERM (1 << 11) /* ] */
137 #define TC_GRPSTART (1 << 12) /* { */
138 #define TC_GRPTERM (1 << 13) /* } */
139 #define TC_SEMICOL (1 << 14)
140 #define TC_NEWLINE (1 << 15)
141 #define TC_STATX (1 << 16) /* ctl statement (for, next...) */
142 #define TC_WHILE (1 << 17)
143 #define TC_ELSE (1 << 18)
144 #define TC_BUILTIN (1 << 19)
145 #define TC_GETLINE (1 << 20)
146 #define TC_FUNCDECL (1 << 21) /* `function' `func' */
147 #define TC_BEGIN (1 << 22)
148 #define TC_END (1 << 23)
149 #define TC_EOF (1 << 24)
150 #define TC_VARIABLE (1 << 25)
151 #define TC_ARRAY (1 << 26)
152 #define TC_FUNCTION (1 << 27)
153 #define TC_STRING (1 << 28)
154 #define TC_NUMBER (1 << 29)
156 #define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
158 /* combined token classes */
159 #define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
160 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
161 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION | \
162 TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
164 #define TC_STATEMNT (TC_STATX | TC_WHILE)
165 #define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
167 /* word tokens, cannot mean something else if not expected */
168 #define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN | \
169 TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
171 /* discard newlines after these */
172 #define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM | \
173 TC_BINOP | TC_OPTERM)
175 /* what can expression begin with */
176 #define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
177 /* what can group begin with */
178 #define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
180 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
181 /* operator is inserted between them */
182 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM | \
183 TC_STRING | TC_NUMBER | TC_UOPPOST)
184 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
186 #define OF_RES1 0x010000
187 #define OF_RES2 0x020000
188 #define OF_STR1 0x040000
189 #define OF_STR2 0x080000
190 #define OF_NUM1 0x100000
191 #define OF_CHECKED 0x200000
193 /* combined operator flags */
196 #define xS (OF_RES2 | OF_STR2)
198 #define VV (OF_RES1 | OF_RES2)
199 #define Nx (OF_RES1 | OF_NUM1)
200 #define NV (OF_RES1 | OF_NUM1 | OF_RES2)
201 #define Sx (OF_RES1 | OF_STR1)
202 #define SV (OF_RES1 | OF_STR1 | OF_RES2)
203 #define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
205 #define OPCLSMASK 0xFF00
206 #define OPNMASK 0x007F
208 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
209 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
210 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
212 #define P(x) (x << 24)
213 #define PRIMASK 0x7F000000
214 #define PRIMASK2 0x7E000000
216 /* Operation classes */
218 #define SHIFT_TIL_THIS 0x0600
219 #define RECUR_FROM_THIS 0x1000
222 OC_DELETE=0x0100, OC_EXEC=0x0200, OC_NEWSOURCE=0x0300,
223 OC_PRINT=0x0400, OC_PRINTF=0x0500, OC_WALKINIT=0x0600,
225 OC_BR=0x0700, OC_BREAK=0x0800, OC_CONTINUE=0x0900,
226 OC_EXIT=0x0a00, OC_NEXT=0x0b00, OC_NEXTFILE=0x0c00,
227 OC_TEST=0x0d00, OC_WALKNEXT=0x0e00,
229 OC_BINARY=0x1000, OC_BUILTIN=0x1100, OC_COLON=0x1200,
230 OC_COMMA=0x1300, OC_COMPARE=0x1400, OC_CONCAT=0x1500,
231 OC_FBLTIN=0x1600, OC_FIELD=0x1700, OC_FNARG=0x1800,
232 OC_FUNC=0x1900, OC_GETLINE=0x1a00, OC_IN=0x1b00,
233 OC_LAND=0x1c00, OC_LOR=0x1d00, OC_MATCH=0x1e00,
234 OC_MOVE=0x1f00, OC_PGETLINE=0x2000, OC_REGEXP=0x2100,
235 OC_REPLACE=0x2200, OC_RETURN=0x2300, OC_SPRINTF=0x2400,
236 OC_TERNARY=0x2500, OC_UNARY=0x2600, OC_VAR=0x2700,
239 ST_IF=0x3000, ST_DO=0x3100, ST_FOR=0x3200,
243 /* simple builtins */
245 F_in=0, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
246 F_ti, F_le, F_sy, F_ff, F_cl
251 B_a2=0, B_ix, B_ma, B_sp, B_ss, B_ti, B_lo, B_up,
253 B_an, B_co, B_ls, B_or, B_rs, B_xo,
256 /* tokens and their corresponding info values */
258 #define NTC "\377" /* switch to next token class (tc<<1) */
261 #define OC_B OC_BUILTIN
263 static char * const tokenlist =
266 "\1/" NTC /* REGEXP */
267 "\2>>" "\1>" "\1|" NTC /* OUTRDR */
268 "\2++" "\2--" NTC /* UOPPOST */
269 "\2++" "\2--" "\1$" NTC /* UOPPRE1 */
270 "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */
271 "\2*=" "\2/=" "\2%=" "\2^="
272 "\1+" "\1-" "\3**=" "\2**"
273 "\1/" "\1%" "\1^" "\1*"
274 "\2!=" "\2>=" "\2<=" "\1>"
275 "\1<" "\2!~" "\1~" "\2&&"
276 "\2||" "\1?" "\1:" NTC
280 "\1+" "\1-" "\1!" NTC /* UOPPRE2 */
286 "\2if" "\2do" "\3for" "\5break" /* STATX */
287 "\10continue" "\6delete" "\5print"
288 "\6printf" "\4next" "\10nextfile"
289 "\6return" "\4exit" NTC
293 "\3and" "\5compl" "\6lshift" "\2or"
295 "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */
296 "\3cos" "\3exp" "\3int" "\3log"
297 "\4rand" "\3sin" "\4sqrt" "\5srand"
298 "\6gensub" "\4gsub" "\5index" "\6length"
299 "\5match" "\5split" "\7sprintf" "\3sub"
300 "\6substr" "\7systime" "\10strftime"
301 "\7tolower" "\7toupper" NTC
303 "\4func" "\10function" NTC
308 static const uint32_t tokeninfo[] = {
313 xS|'a', xS|'w', xS|'|',
314 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
315 OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M',
317 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74),
318 OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
319 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/',
320 OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
321 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-',
322 OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
323 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%',
324 OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
325 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3,
326 OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
327 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!',
328 OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
329 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?',
330 OC_COLON|xx|P(67)|':',
333 OC_PGETLINE|SV|P(37),
334 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-',
335 OC_UNARY|xV|P(19)|'!',
341 ST_IF, ST_DO, ST_FOR, OC_BREAK,
342 OC_CONTINUE, OC_DELETE|Vx, OC_PRINT,
343 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
344 OC_RETURN|Vx, OC_EXIT|Nx,
348 OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
349 OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
350 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
351 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
352 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
353 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
354 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
355 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b),
356 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
363 /* internal variable names and their initial values */
364 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
366 CONVFMT=0, OFMT, FS, OFS,
367 ORS, RS, RT, FILENAME,
368 SUBSEP, ARGIND, ARGC, ARGV,
371 ENVIRON, F0, _intvarcount_
374 static char * vNames =
375 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
376 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
377 "SUBSEP\0" "ARGIND\0" "ARGC\0" "ARGV\0"
379 "NR\0" "NF\0*" "IGNORECASE\0*"
380 "ENVIRON\0" "$\0*" "\0";
382 static char * vValues =
383 "%.6g\0" "%.6g\0" " \0" " \0"
384 "\n\0" "\n\0" "\0" "\0"
388 /* hash size may grow to these values */
389 #define FIRST_PRIME 61;
390 static const unsigned int PRIMES[] = { 251, 1021, 4093, 16381, 65521 };
391 enum { NPRIMES = sizeof(PRIMES) / sizeof(unsigned int) };
395 extern char **environ;
397 static var * V[_intvarcount_];
398 static chain beginseq, mainseq, endseq, *seq;
399 static int nextrec, nextfile;
400 static node *break_ptr, *continue_ptr;
402 static xhash *vhash, *ahash, *fdhash, *fnhash;
403 static char *programname;
405 static int is_f0_split;
408 static tsplitter fsplitter, rsplitter;
424 /* function prototypes */
425 static void handle_special(var *);
426 static node *parse_expr(uint32_t);
427 static void chain_group(void);
428 static var *evaluate(node *, var *);
429 static rstream *next_input_file(void);
430 static int fmt_num(char *, int, const char *, double, int);
431 static int awk_exit(int) ATTRIBUTE_NORETURN;
433 /* ---- error handling ---- */
435 static const char EMSG_INTERNAL_ERROR[] = "Internal error";
436 static const char EMSG_UNEXP_EOS[] = "Unexpected end of string";
437 static const char EMSG_UNEXP_TOKEN[] = "Unexpected token";
438 static const char EMSG_DIV_BY_ZERO[] = "Division by zero";
439 static const char EMSG_INV_FMT[] = "Invalid format specifier";
440 static const char EMSG_TOO_FEW_ARGS[] = "Too few arguments for builtin";
441 static const char EMSG_NOT_ARRAY[] = "Not an array";
442 static const char EMSG_POSSIBLE_ERROR[] = "Possible syntax error";
443 static const char EMSG_UNDEF_FUNC[] = "Call to undefined function";
444 #ifndef CONFIG_FEATURE_AWK_MATH
445 static const char EMSG_NO_MATH[] = "Math support is not compiled in";
448 static void syntax_error(const char * const message) ATTRIBUTE_NORETURN;
449 static void syntax_error(const char * const message)
451 bb_error_msg_and_die("%s:%i: %s", programname, lineno, message);
454 #define runtime_error(x) syntax_error(x)
457 /* ---- hash stuff ---- */
459 static unsigned int hashidx(const char *name)
463 while (*name) idx = *name++ + (idx << 6) - idx;
467 /* create new hash */
468 static xhash *hash_init(void)
472 newhash = (xhash *)xzalloc(sizeof(xhash));
473 newhash->csize = FIRST_PRIME;
474 newhash->items = (hash_item **)xzalloc(newhash->csize * sizeof(hash_item *));
479 /* find item in hash, return ptr to data, NULL if not found */
480 static void *hash_search(xhash *hash, const char *name)
484 hi = hash->items [ hashidx(name) % hash->csize ];
486 if (strcmp(hi->name, name) == 0)
493 /* grow hash if it becomes too big */
494 static void hash_rebuild(xhash *hash)
496 unsigned int newsize, i, idx;
497 hash_item **newitems, *hi, *thi;
499 if (hash->nprime == NPRIMES)
502 newsize = PRIMES[hash->nprime++];
503 newitems = (hash_item **)xzalloc(newsize * sizeof(hash_item *));
505 for (i=0; i<hash->csize; i++) {
510 idx = hashidx(thi->name) % newsize;
511 thi->next = newitems[idx];
517 hash->csize = newsize;
518 hash->items = newitems;
521 /* find item in hash, add it if necessary. Return ptr to data */
522 static void *hash_find(xhash *hash, const char *name)
528 hi = hash_search(hash, name);
530 if (++hash->nel / hash->csize > 10)
533 l = strlen(name) + 1;
534 hi = xzalloc(sizeof(hash_item) + l);
535 memcpy(hi->name, name, l);
537 idx = hashidx(name) % hash->csize;
538 hi->next = hash->items[idx];
539 hash->items[idx] = hi;
545 #define findvar(hash, name) (var *) hash_find ( (hash) , (name) )
546 #define newvar(name) (var *) hash_find ( vhash , (name) )
547 #define newfile(name) (rstream *) hash_find ( fdhash , (name) )
548 #define newfunc(name) (func *) hash_find ( fnhash , (name) )
550 static void hash_remove(xhash *hash, const char *name)
552 hash_item *hi, **phi;
554 phi = &(hash->items[ hashidx(name) % hash->csize ]);
557 if (strcmp(hi->name, name) == 0) {
558 hash->glen -= (strlen(name) + 1);
568 /* ------ some useful functions ------ */
570 static void skip_spaces(char **s)
574 while(*p == ' ' || *p == '\t' ||
575 (*p == '\\' && *(p+1) == '\n' && (++p, ++t.lineno))) {
581 static char *nextword(char **s)
590 static char nextchar(char **s)
596 if (c == '\\') c = bb_process_escape_sequence((const char**)s);
597 if (c == '\\' && *s == pps) c = *((*s)++);
601 static int ATTRIBUTE_ALWAYS_INLINE isalnum_(int c)
603 return (isalnum(c) || c == '_');
606 static FILE *afopen(const char *path, const char *mode)
608 return (*path == '-' && *(path+1) == '\0') ? stdin : xfopen(path, mode);
611 /* -------- working with variables (set/get/copy/etc) -------- */
613 static xhash *iamarray(var *v)
617 while (a->type & VF_CHILD)
620 if (! (a->type & VF_ARRAY)) {
622 a->x.array = hash_init();
627 static void clear_array(xhash *array)
632 for (i=0; i<array->csize; i++) {
633 hi = array->items[i];
637 free(thi->data.v.string);
640 array->items[i] = NULL;
642 array->glen = array->nel = 0;
645 /* clear a variable */
646 static var *clrvar(var *v)
648 if (!(v->type & VF_FSTR))
651 v->type &= VF_DONTTOUCH;
657 /* assign string value to variable */
658 static var *setvar_p(var *v, char *value)
667 /* same as setvar_p but make a copy of string */
668 static var *setvar_s(var *v, const char *value)
670 return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
673 /* same as setvar_s but set USER flag */
674 static var *setvar_u(var *v, const char *value)
681 /* set array element to user string */
682 static void setari_u(var *a, int idx, const char *s)
685 static char sidx[12];
687 sprintf(sidx, "%d", idx);
688 v = findvar(iamarray(a), sidx);
692 /* assign numeric value to variable */
693 static var *setvar_i(var *v, double value)
696 v->type |= VF_NUMBER;
702 static char *getvar_s(var *v)
704 /* if v is numeric and has no cached string, convert it to string */
705 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
706 fmt_num(buf, MAXVARFMT, getvar_s(V[CONVFMT]), v->number, TRUE);
707 v->string = xstrdup(buf);
708 v->type |= VF_CACHED;
710 return (v->string == NULL) ? "" : v->string;
713 static double getvar_i(var *v)
717 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
721 v->number = strtod(s, &s);
722 if (v->type & VF_USER) {
730 v->type |= VF_CACHED;
735 static var *copyvar(var *dest, const var *src)
739 dest->type |= (src->type & ~VF_DONTTOUCH);
740 dest->number = src->number;
742 dest->string = xstrdup(src->string);
744 handle_special(dest);
748 static var *incvar(var *v)
750 return setvar_i(v, getvar_i(v)+1.);
753 /* return true if v is number or numeric string */
754 static int is_numeric(var *v)
757 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
760 /* return 1 when value of v corresponds to true, 0 otherwise */
761 static int istrue(var *v)
764 return (v->number == 0) ? 0 : 1;
766 return (v->string && *(v->string)) ? 1 : 0;
769 /* temporary variables allocator. Last allocated should be first freed */
770 static var *nvalloc(int n)
778 if ((cb->pos - cb->nv) + n <= cb->size) break;
783 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
784 cb = (nvblock *)xmalloc(sizeof(nvblock) + size * sizeof(var));
789 if (pb) pb->next = cb;
795 while (v < cb->pos) {
804 static void nvfree(var *v)
808 if (v < cb->nv || v >= cb->pos)
809 runtime_error(EMSG_INTERNAL_ERROR);
811 for (p=v; p<cb->pos; p++) {
812 if ((p->type & (VF_ARRAY|VF_CHILD)) == VF_ARRAY) {
813 clear_array(iamarray(p));
814 free(p->x.array->items);
817 if (p->type & VF_WALK)
824 while (cb->prev && cb->pos == cb->nv) {
829 /* ------- awk program text parsing ------- */
831 /* Parse next token pointed by global pos, place results into global t.
832 * If token isn't expected, give away. Return token class
834 static uint32_t next_token(uint32_t expected)
841 static int concat_inserted;
842 static uint32_t save_tclass, save_info;
843 static uint32_t ltclass = TC_OPTERM;
849 } else if (concat_inserted) {
851 concat_inserted = FALSE;
852 t.tclass = save_tclass;
863 while (*p != '\n' && *p != '\0') p++;
871 } else if (*p == '\"') {
875 if (*p == '\0' || *p == '\n')
876 syntax_error(EMSG_UNEXP_EOS);
877 *(s++) = nextchar(&p);
883 } else if ((expected & TC_REGEXP) && *p == '/') {
887 if (*p == '\0' || *p == '\n')
888 syntax_error(EMSG_UNEXP_EOS);
889 if ((*s++ = *p++) == '\\') {
891 *(s-1) = bb_process_escape_sequence((const char **)&p);
892 if (*pp == '\\') *s++ = '\\';
893 if (p == pp) *s++ = *p++;
900 } else if (*p == '.' || isdigit(*p)) {
902 t.number = strtod(p, &p);
904 syntax_error(EMSG_UNEXP_TOKEN);
908 /* search for something known */
918 /* if token class is expected, token
919 * matches and it's not a longer word,
920 * then this is what we are looking for
922 if ((tc & (expected | TC_WORD | TC_NEWLINE)) &&
923 *tl == *p && strncmp(p, tl, l) == 0 &&
924 !((tc & TC_WORD) && isalnum_(*(p + l)))) {
934 /* it's a name (var/array/function),
935 * otherwise it's something wrong
938 syntax_error(EMSG_UNEXP_TOKEN);
941 while(isalnum_(*(++p))) {
946 /* also consume whitespace between functionname and bracket */
947 if (! (expected & TC_VARIABLE)) skip_spaces(&p);
960 /* skipping newlines in some cases */
961 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
964 /* insert concatenation operator when needed */
965 if ((ltclass&TC_CONCAT1) && (tc&TC_CONCAT2) && (expected&TC_BINOP)) {
966 concat_inserted = TRUE;
970 t.info = OC_CONCAT | SS | P(35);
977 /* Are we ready for this? */
978 if (! (ltclass & expected))
979 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
980 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
985 static void rollback_token(void) { t.rollback = TRUE; }
987 static node *new_node(uint32_t info)
991 n = (node *)xzalloc(sizeof(node));
997 static node *mk_re_node(char *s, node *n, regex_t *re)
1002 xregcomp(re, s, REG_EXTENDED);
1003 xregcomp(re+1, s, REG_EXTENDED | REG_ICASE);
1008 static node *condition(void)
1010 next_token(TC_SEQSTART);
1011 return parse_expr(TC_SEQTERM);
1014 /* parse expression terminated by given argument, return ptr
1015 * to built subtree. Terminator is eaten by parse_expr */
1016 static node *parse_expr(uint32_t iexp)
1025 sn.r.n = glptr = NULL;
1026 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1028 while (! ((tc = next_token(xtc)) & iexp)) {
1029 if (glptr && (t.info == (OC_COMPARE|VV|P(39)|2))) {
1030 /* input redirection (<) attached to glptr node */
1031 cn = glptr->l.n = new_node(OC_CONCAT|SS|P(37));
1033 xtc = TC_OPERAND | TC_UOPPRE;
1036 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1037 /* for binary and postfix-unary operators, jump back over
1038 * previous operators with higher priority */
1040 while ( ((t.info & PRIMASK) > (vn->a.n->info & PRIMASK2)) ||
1041 ((t.info == vn->info) && ((t.info & OPCLSMASK) == OC_COLON)) )
1043 if ((t.info & OPCLSMASK) == OC_TERNARY)
1045 cn = vn->a.n->r.n = new_node(t.info);
1047 if (tc & TC_BINOP) {
1049 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1050 if ((t.info & OPCLSMASK) == OC_PGETLINE) {
1052 next_token(TC_GETLINE);
1053 /* give maximum priority to this pipe */
1054 cn->info &= ~PRIMASK;
1055 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1059 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1064 /* for operands and prefix-unary operators, attach them
1067 cn = vn->r.n = new_node(t.info);
1069 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1070 if (tc & (TC_OPERAND | TC_REGEXP)) {
1071 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1072 /* one should be very careful with switch on tclass -
1073 * only simple tclasses should be used! */
1078 if ((v = hash_search(ahash, t.string)) != NULL) {
1079 cn->info = OC_FNARG;
1080 cn->l.i = v->x.aidx;
1082 cn->l.v = newvar(t.string);
1084 if (tc & TC_ARRAY) {
1086 cn->r.n = parse_expr(TC_ARRTERM);
1093 v = cn->l.v = xzalloc(sizeof(var));
1095 setvar_i(v, t.number);
1097 setvar_s(v, t.string);
1101 mk_re_node(t.string, cn,
1102 (regex_t *)xzalloc(sizeof(regex_t)*2));
1107 cn->r.f = newfunc(t.string);
1108 cn->l.n = condition();
1112 cn = vn->r.n = parse_expr(TC_SEQTERM);
1118 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1122 cn->l.n = condition();
1131 /* add node to chain. Return ptr to alloc'd node */
1132 static node *chain_node(uint32_t info)
1137 seq->first = seq->last = new_node(0);
1139 if (seq->programname != programname) {
1140 seq->programname = programname;
1141 n = chain_node(OC_NEWSOURCE);
1142 n->l.s = xstrdup(programname);
1147 seq->last = n->a.n = new_node(OC_DONE);
1152 static void chain_expr(uint32_t info)
1156 n = chain_node(info);
1157 n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1158 if (t.tclass & TC_GRPTERM)
1162 static node *chain_loop(node *nn)
1164 node *n, *n2, *save_brk, *save_cont;
1166 save_brk = break_ptr;
1167 save_cont = continue_ptr;
1169 n = chain_node(OC_BR | Vx);
1170 continue_ptr = new_node(OC_EXEC);
1171 break_ptr = new_node(OC_EXEC);
1173 n2 = chain_node(OC_EXEC | Vx);
1176 continue_ptr->a.n = n2;
1177 break_ptr->a.n = n->r.n = seq->last;
1179 continue_ptr = save_cont;
1180 break_ptr = save_brk;
1185 /* parse group and attach it to chain */
1186 static void chain_group(void)
1192 c = next_token(TC_GRPSEQ);
1193 } while (c & TC_NEWLINE);
1195 if (c & TC_GRPSTART) {
1196 while(next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1197 if (t.tclass & TC_NEWLINE) continue;
1201 } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1203 chain_expr(OC_EXEC | Vx);
1204 } else { /* TC_STATEMNT */
1205 switch (t.info & OPCLSMASK) {
1207 n = chain_node(OC_BR | Vx);
1208 n->l.n = condition();
1210 n2 = chain_node(OC_EXEC);
1212 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE)==TC_ELSE) {
1214 n2->a.n = seq->last;
1222 n = chain_loop(NULL);
1227 n2 = chain_node(OC_EXEC);
1228 n = chain_loop(NULL);
1230 next_token(TC_WHILE);
1231 n->l.n = condition();
1235 next_token(TC_SEQSTART);
1236 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1237 if (t.tclass & TC_SEQTERM) { /* for-in */
1238 if ((n2->info & OPCLSMASK) != OC_IN)
1239 syntax_error(EMSG_UNEXP_TOKEN);
1240 n = chain_node(OC_WALKINIT | VV);
1243 n = chain_loop(NULL);
1244 n->info = OC_WALKNEXT | Vx;
1246 } else { /* for(;;) */
1247 n = chain_node(OC_EXEC | Vx);
1249 n2 = parse_expr(TC_SEMICOL);
1250 n3 = parse_expr(TC_SEQTERM);
1260 n = chain_node(t.info);
1261 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1262 if (t.tclass & TC_OUTRDR) {
1264 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1266 if (t.tclass & TC_GRPTERM)
1271 n = chain_node(OC_EXEC);
1276 n = chain_node(OC_EXEC);
1277 n->a.n = continue_ptr;
1280 /* delete, next, nextfile, return, exit */
1288 static void parse_program(char *p)
1297 while((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1298 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1300 if (tclass & TC_OPTERM)
1304 if (tclass & TC_BEGIN) {
1308 } else if (tclass & TC_END) {
1312 } else if (tclass & TC_FUNCDECL) {
1313 next_token(TC_FUNCTION);
1315 f = newfunc(t.string);
1316 f->body.first = NULL;
1318 while(next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1319 v = findvar(ahash, t.string);
1320 v->x.aidx = (f->nargs)++;
1322 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1329 } else if (tclass & TC_OPSEQ) {
1331 cn = chain_node(OC_TEST);
1332 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1333 if (t.tclass & TC_GRPSTART) {
1337 chain_node(OC_PRINT);
1339 cn->r.n = mainseq.last;
1341 } else /* if (tclass & TC_GRPSTART) */ {
1349 /* -------- program execution part -------- */
1351 static node *mk_splitter(char *s, tsplitter *spl)
1359 if ((n->info & OPCLSMASK) == OC_REGEXP) {
1363 if (strlen(s) > 1) {
1364 mk_re_node(s, n, re);
1366 n->info = (uint32_t) *s;
1372 /* use node as a regular expression. Supplied with node ptr and regex_t
1373 * storage space. Return ptr to regex (if result points to preg, it should
1374 * be later regfree'd manually
1376 static regex_t *as_regex(node *op, regex_t *preg)
1381 if ((op->info & OPCLSMASK) == OC_REGEXP) {
1382 return icase ? op->r.ire : op->l.re;
1385 s = getvar_s(evaluate(op, v));
1386 xregcomp(preg, s, icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED);
1392 /* gradually increasing buffer */
1393 static void qrealloc(char **b, int n, int *size)
1395 if (! *b || n >= *size)
1396 *b = xrealloc(*b, *size = n + (n>>1) + 80);
1399 /* resize field storage space */
1400 static void fsrealloc(int size)
1402 static int maxfields = 0;
1405 if (size >= maxfields) {
1407 maxfields = size + 16;
1408 Fields = (var *)xrealloc(Fields, maxfields * sizeof(var));
1409 for (; i<maxfields; i++) {
1410 Fields[i].type = VF_SPECIAL;
1411 Fields[i].string = NULL;
1415 if (size < nfields) {
1416 for (i=size; i<nfields; i++) {
1423 static int awk_split(char *s, node *spl, char **slist)
1428 regmatch_t pmatch[2];
1430 /* in worst case, each char would be a separate field */
1431 *slist = s1 = xstrndup(s, strlen(s) * 2 + 3);
1433 c[0] = c[1] = (char)spl->info;
1435 if (*getvar_s(V[RS]) == '\0') c[2] = '\n';
1437 if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */
1439 l = strcspn(s, c+2);
1440 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0 &&
1441 pmatch[0].rm_so <= l) {
1442 l = pmatch[0].rm_so;
1443 if (pmatch[0].rm_eo == 0) { l++; pmatch[0].rm_eo++; }
1445 pmatch[0].rm_eo = l;
1446 if (*(s+l)) pmatch[0].rm_eo++;
1452 s += pmatch[0].rm_eo;
1455 } else if (c[0] == '\0') { /* null split */
1461 } else if (c[0] != ' ') { /* single-character split */
1463 c[0] = toupper(c[0]);
1464 c[1] = tolower(c[1]);
1467 while ((s1 = strpbrk(s1, c))) {
1471 } else { /* space split */
1473 s = skip_whitespace(s);
1476 while (*s && !isspace(*s))
1484 static void split_f0(void)
1486 static char *fstrings = NULL;
1496 n = awk_split(getvar_s(V[F0]), &fsplitter.n, &fstrings);
1499 for (i=0; i<n; i++) {
1500 Fields[i].string = nextword(&s);
1501 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1504 /* set NF manually to avoid side effects */
1506 V[NF]->type = VF_NUMBER | VF_SPECIAL;
1507 V[NF]->number = nfields;
1510 /* perform additional actions when some internal variables changed */
1511 static void handle_special(var *v)
1515 int sl, l, len, i, bsize;
1517 if (! (v->type & VF_SPECIAL))
1521 n = (int)getvar_i(v);
1524 /* recalculate $0 */
1525 sep = getvar_s(V[OFS]);
1529 for (i=0; i<n; i++) {
1530 s = getvar_s(&Fields[i]);
1533 memcpy(b+len, sep, sl);
1536 qrealloc(&b, len+l+sl, &bsize);
1537 memcpy(b+len, s, l);
1540 if (b) b[len] = '\0';
1544 } else if (v == V[F0]) {
1545 is_f0_split = FALSE;
1547 } else if (v == V[FS]) {
1548 mk_splitter(getvar_s(v), &fsplitter);
1550 } else if (v == V[RS]) {
1551 mk_splitter(getvar_s(v), &rsplitter);
1553 } else if (v == V[IGNORECASE]) {
1557 n = getvar_i(V[NF]);
1558 setvar_i(V[NF], n > v-Fields ? n : v-Fields+1);
1559 /* right here v is invalid. Just to note... */
1563 /* step through func/builtin/etc arguments */
1564 static node *nextarg(node **pn)
1569 if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1578 static void hashwalk_init(var *v, xhash *array)
1584 if (v->type & VF_WALK)
1588 w = v->x.walker = (char **)xzalloc(2 + 2*sizeof(char *) + array->glen);
1589 *w = *(w+1) = (char *)(w + 2);
1590 for (i=0; i<array->csize; i++) {
1591 hi = array->items[i];
1593 strcpy(*w, hi->name);
1600 static int hashwalk_next(var *v)
1608 setvar_s(v, nextword(w+1));
1612 /* evaluate node, return 1 when result is true, 0 otherwise */
1613 static int ptest(node *pattern)
1616 return istrue(evaluate(pattern, &v));
1619 /* read next record from stream rsm into a variable v */
1620 static int awk_getline(rstream *rsm, var *v)
1623 regmatch_t pmatch[2];
1624 int a, p, pp=0, size;
1625 int fd, so, eo, r, rp;
1628 /* we're using our own buffer since we need access to accumulating
1631 fd = fileno(rsm->F);
1636 c = (char) rsplitter.n.info;
1639 if (! m) qrealloc(&m, 256, &size);
1645 if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1646 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1647 b, 1, pmatch, 0) == 0) {
1648 so = pmatch[0].rm_so;
1649 eo = pmatch[0].rm_eo;
1653 } else if (c != '\0') {
1654 s = strchr(b+pp, c);
1655 if (! s) s = memchr(b+pp, '\0', p - pp);
1662 while (b[rp] == '\n')
1664 s = strstr(b+rp, "\n\n");
1667 while (b[eo] == '\n') eo++;
1675 memmove(m, (const void *)(m+a), p+1);
1680 qrealloc(&m, a+p+128, &size);
1683 p += safe_read(fd, b+p, size-p-1);
1687 setvar_i(V[ERRNO], errno);
1696 c = b[so]; b[so] = '\0';
1700 c = b[eo]; b[eo] = '\0';
1701 setvar_s(V[RT], b+so);
1713 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1717 const char *s=format;
1719 if (int_as_int && n == (int)n) {
1720 r = snprintf(b, size, "%d", (int)n);
1722 do { c = *s; } while (*s && *++s);
1723 if (strchr("diouxX", c)) {
1724 r = snprintf(b, size, format, (int)n);
1725 } else if (strchr("eEfgG", c)) {
1726 r = snprintf(b, size, format, n);
1728 runtime_error(EMSG_INV_FMT);
1735 /* formatted output into an allocated buffer, return ptr to buffer */
1736 static char *awk_printf(node *n)
1739 char *fmt, *s, *s1, *f;
1740 int i, j, incr, bsize;
1745 fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1750 while (*f && (*f != '%' || *(++f) == '%'))
1752 while (*f && !isalpha(*f))
1755 incr = (f - s) + MAXVARFMT;
1756 qrealloc(&b, incr+i, &bsize);
1757 c = *f; if (c != '\0') f++;
1758 c1 = *f ; *f = '\0';
1759 arg = evaluate(nextarg(&n), v);
1762 if (c == 'c' || !c) {
1763 i += sprintf(b+i, s,
1764 is_numeric(arg) ? (char)getvar_i(arg) : *getvar_s(arg));
1766 } else if (c == 's') {
1768 qrealloc(&b, incr+i+strlen(s1), &bsize);
1769 i += sprintf(b+i, s, s1);
1772 i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1776 /* if there was an error while sprintf, return value is negative */
1781 b = xrealloc(b, i+1);
1788 /* common substitution routine
1789 * replace (nm) substring of (src) that match (n) with (repl), store
1790 * result into (dest), return number of substitutions. If nm=0, replace
1791 * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1792 * subexpression matching (\1-\9)
1794 static int awk_sub(node *rn, char *repl, int nm, var *src, var *dest, int ex)
1798 int c, i, j, di, rl, so, eo, nbs, n, dssize;
1799 regmatch_t pmatch[10];
1802 re = as_regex(rn, &sreg);
1803 if (! src) src = V[F0];
1804 if (! dest) dest = V[F0];
1809 while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0:REG_NOTBOL) == 0) {
1810 so = pmatch[0].rm_so;
1811 eo = pmatch[0].rm_eo;
1813 qrealloc(&ds, di + eo + rl, &dssize);
1814 memcpy(ds + di, sp, eo);
1820 for (s = repl; *s; s++) {
1826 if (c == '&' || (ex && c >= '0' && c <= '9')) {
1827 di -= ((nbs + 3) >> 1);
1836 n = pmatch[j].rm_eo - pmatch[j].rm_so;
1837 qrealloc(&ds, di + rl + n, &dssize);
1838 memcpy(ds + di, sp + pmatch[j].rm_so, n);
1849 if (! (ds[di++] = *sp++)) break;
1853 qrealloc(&ds, di + strlen(sp), &dssize);
1854 strcpy(ds + di, sp);
1856 if (re == &sreg) regfree(re);
1860 static var *exec_builtin(node *op, var *res)
1867 regmatch_t pmatch[2];
1869 static tsplitter tspl;
1878 isr = info = op->info;
1881 av[2] = av[3] = NULL;
1882 for (i=0 ; i<4 && op ; i++) {
1883 an[i] = nextarg(&op);
1884 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
1885 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
1890 if (nargs < (info >> 30))
1891 runtime_error(EMSG_TOO_FEW_ARGS);
1893 switch (info & OPNMASK) {
1896 #ifdef CONFIG_FEATURE_AWK_MATH
1897 setvar_i(res, atan2(getvar_i(av[i]), getvar_i(av[1])));
1899 runtime_error(EMSG_NO_MATH);
1905 spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
1906 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
1911 n = awk_split(as[0], spl, &s);
1913 clear_array(iamarray(av[1]));
1914 for (i=1; i<=n; i++)
1915 setari_u(av[1], i, nextword(&s1));
1922 i = getvar_i(av[1]) - 1;
1923 if (i>l) i=l; if (i<0) i=0;
1924 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
1927 strncpy(s, as[0]+i, n);
1933 setvar_i(res, (long)getvar_i(av[0]) & (long)getvar_i(av[1]));
1937 setvar_i(res, ~(long)getvar_i(av[0]));
1941 setvar_i(res, (long)getvar_i(av[0]) << (long)getvar_i(av[1]));
1945 setvar_i(res, (long)getvar_i(av[0]) | (long)getvar_i(av[1]));
1949 setvar_i(res, (long)((unsigned long)getvar_i(av[0]) >> (unsigned long)getvar_i(av[1])));
1953 setvar_i(res, (long)getvar_i(av[0]) ^ (long)getvar_i(av[1]));
1963 s1 = s = xstrdup(as[0]);
1965 *s1 = (*to_xxx)(*s1);
1974 l = strlen(as[0]) - ll;
1975 if (ll > 0 && l >= 0) {
1977 s = strstr(as[0], as[1]);
1978 if (s) n = (s - as[0]) + 1;
1980 /* this piece of code is terribly slow and
1981 * really should be rewritten
1983 for (i=0; i<=l; i++) {
1984 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
1996 tt = getvar_i(av[1]);
1999 s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2000 i = strftime(buf, MAXVARFMT, s, localtime(&tt));
2006 re = as_regex(an[1], &sreg);
2007 n = regexec(re, as[0], 1, pmatch, 0);
2012 pmatch[0].rm_so = 0;
2013 pmatch[0].rm_eo = -1;
2015 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2016 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2017 setvar_i(res, pmatch[0].rm_so);
2018 if (re == &sreg) regfree(re);
2022 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2026 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2030 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2039 * Evaluate node - the heart of the program. Supplied with subtree
2040 * and place where to store result. returns ptr to result.
2042 #define XC(n) ((n) >> 8)
2044 static var *evaluate(node *op, var *res)
2046 /* This procedure is recursive so we should count every byte */
2047 static var *fnargs = NULL;
2048 static unsigned int seed = 1;
2049 static regex_t sreg;
2070 return setvar_s(res, NULL);
2077 opn = (short)(opinfo & OPNMASK);
2078 lineno = op->lineno;
2080 /* execute inevitable things */
2082 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2083 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2084 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2085 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2086 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2088 switch (XC(opinfo & OPCLSMASK)) {
2090 /* -- iterative node type -- */
2094 if ((op1->info & OPCLSMASK) == OC_COMMA) {
2095 /* it's range pattern */
2096 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2097 op->info |= OF_CHECKED;
2098 if (ptest(op1->r.n))
2099 op->info &= ~OF_CHECKED;
2106 op = (ptest(op1)) ? op->a.n : op->r.n;
2110 /* just evaluate an expression, also used as unconditional jump */
2114 /* branch, used in if-else and various loops */
2116 op = istrue(L.v) ? op->a.n : op->r.n;
2119 /* initialize for-in loop */
2120 case XC( OC_WALKINIT ):
2121 hashwalk_init(L.v, iamarray(R.v));
2124 /* get next array item */
2125 case XC( OC_WALKNEXT ):
2126 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2129 case XC( OC_PRINT ):
2130 case XC( OC_PRINTF ):
2133 X.rsm = newfile(R.s);
2136 if((X.rsm->F = popen(R.s, "w")) == NULL)
2137 bb_perror_msg_and_die("popen");
2140 X.rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2146 if ((opinfo & OPCLSMASK) == OC_PRINT) {
2148 fputs(getvar_s(V[F0]), X.F);
2151 L.v = evaluate(nextarg(&op1), v1);
2152 if (L.v->type & VF_NUMBER) {
2153 fmt_num(buf, MAXVARFMT, getvar_s(V[OFMT]),
2154 getvar_i(L.v), TRUE);
2157 fputs(getvar_s(L.v), X.F);
2160 if (op1) fputs(getvar_s(V[OFS]), X.F);
2163 fputs(getvar_s(V[ORS]), X.F);
2165 } else { /* OC_PRINTF */
2166 L.s = awk_printf(op1);
2173 case XC( OC_DELETE ):
2174 X.info = op1->info & OPCLSMASK;
2175 if (X.info == OC_VAR) {
2177 } else if (X.info == OC_FNARG) {
2178 R.v = &fnargs[op1->l.i];
2180 runtime_error(EMSG_NOT_ARRAY);
2185 L.s = getvar_s(evaluate(op1->r.n, v1));
2186 hash_remove(iamarray(R.v), L.s);
2188 clear_array(iamarray(R.v));
2192 case XC( OC_NEWSOURCE ):
2193 programname = op->l.s;
2196 case XC( OC_RETURN ):
2200 case XC( OC_NEXTFILE ):
2211 /* -- recursive node type -- */
2219 case XC( OC_FNARG ):
2220 L.v = &fnargs[op->l.i];
2223 res = (op->r.n) ? findvar(iamarray(L.v), R.s) : L.v;
2227 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2230 case XC( OC_REGEXP ):
2232 L.s = getvar_s(V[F0]);
2235 case XC( OC_MATCH ):
2238 X.re = as_regex(op1, &sreg);
2239 R.i = regexec(X.re, L.s, 0, NULL, 0);
2240 if (X.re == &sreg) regfree(X.re);
2241 setvar_i(res, (R.i == 0 ? 1 : 0) ^ (opn == '!' ? 1 : 0));
2245 /* if source is a temporary string, jusk relink it to dest */
2246 if (R.v == v1+1 && R.v->string) {
2247 res = setvar_p(L.v, R.v->string);
2250 res = copyvar(L.v, R.v);
2254 case XC( OC_TERNARY ):
2255 if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2256 runtime_error(EMSG_POSSIBLE_ERROR);
2257 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2261 if (! op->r.f->body.first)
2262 runtime_error(EMSG_UNDEF_FUNC);
2264 X.v = R.v = nvalloc(op->r.f->nargs+1);
2266 L.v = evaluate(nextarg(&op1), v1);
2268 R.v->type |= VF_CHILD;
2269 R.v->x.parent = L.v;
2270 if (++R.v - X.v >= op->r.f->nargs)
2278 res = evaluate(op->r.f->body.first, res);
2285 case XC( OC_GETLINE ):
2286 case XC( OC_PGETLINE ):
2288 X.rsm = newfile(L.s);
2290 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2291 X.rsm->F = popen(L.s, "r");
2292 X.rsm->is_pipe = TRUE;
2294 X.rsm->F = fopen(L.s, "r"); /* not xfopen! */
2298 if (! iF) iF = next_input_file();
2303 setvar_i(V[ERRNO], errno);
2311 L.i = awk_getline(X.rsm, R.v);
2321 /* simple builtins */
2322 case XC( OC_FBLTIN ):
2330 R.d = (double)rand() / (double)RAND_MAX;
2333 #ifdef CONFIG_FEATURE_AWK_MATH
2359 runtime_error(EMSG_NO_MATH);
2365 seed = op1 ? (unsigned int)L.d : (unsigned int)time(NULL);
2375 L.s = getvar_s(V[F0]);
2381 R.d = (L.s && *L.s) ? (system(L.s) >> 8) : 0;
2389 X.rsm = newfile(L.s);
2398 X.rsm = (rstream *)hash_search(fdhash, L.s);
2400 R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
2401 free(X.rsm->buffer);
2402 hash_remove(fdhash, L.s);
2405 setvar_i(V[ERRNO], errno);
2412 case XC( OC_BUILTIN ):
2413 res = exec_builtin(op, res);
2416 case XC( OC_SPRINTF ):
2417 setvar_p(res, awk_printf(op1));
2420 case XC( OC_UNARY ):
2422 L.d = R.d = getvar_i(R.v);
2437 L.d = istrue(X.v) ? 0 : 1;
2448 case XC( OC_FIELD ):
2449 R.i = (int)getvar_i(R.v);
2457 res = &Fields[R.i-1];
2461 /* concatenation (" ") and index joining (",") */
2462 case XC( OC_CONCAT ):
2463 case XC( OC_COMMA ):
2464 opn = strlen(L.s) + strlen(R.s) + 2;
2465 X.s = (char *)xmalloc(opn);
2467 if ((opinfo & OPCLSMASK) == OC_COMMA) {
2468 L.s = getvar_s(V[SUBSEP]);
2469 X.s = (char *)xrealloc(X.s, opn + strlen(L.s));
2477 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2481 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2484 case XC( OC_BINARY ):
2485 case XC( OC_REPLACE ):
2486 R.d = getvar_i(R.v);
2498 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2502 #ifdef CONFIG_FEATURE_AWK_MATH
2503 L.d = pow(L.d, R.d);
2505 runtime_error(EMSG_NO_MATH);
2509 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2510 L.d -= (int)(L.d / R.d) * R.d;
2513 res = setvar_i(((opinfo&OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2516 case XC( OC_COMPARE ):
2517 if (is_numeric(L.v) && is_numeric(R.v)) {
2518 L.d = getvar_i(L.v) - getvar_i(R.v);
2520 L.s = getvar_s(L.v);
2521 R.s = getvar_s(R.v);
2522 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2524 switch (opn & 0xfe) {
2535 setvar_i(res, (opn & 0x1 ? R.i : !R.i) ? 1 : 0);
2539 runtime_error(EMSG_POSSIBLE_ERROR);
2541 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2543 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2553 /* -------- main & co. -------- */
2555 static int awk_exit(int r)
2564 evaluate(endseq.first, &tv);
2567 /* waiting for children */
2568 for (i=0; i<fdhash->csize; i++) {
2569 hi = fdhash->items[i];
2571 if (hi->data.rs.F && hi->data.rs.is_pipe)
2572 pclose(hi->data.rs.F);
2580 /* if expr looks like "var=value", perform assignment and return 1,
2581 * otherwise return 0 */
2582 static int is_assignment(const char *expr)
2584 char *exprc, *s, *s0, *s1;
2586 exprc = xstrdup(expr);
2587 if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2595 *(s1++) = nextchar(&s);
2598 setvar_u(newvar(exprc), s0);
2603 /* switch to next input file */
2604 static rstream *next_input_file(void)
2609 static int files_happen = FALSE;
2611 if (rsm.F) fclose(rsm.F);
2613 rsm.pos = rsm.adv = 0;
2616 if (getvar_i(V[ARGIND])+1 >= getvar_i(V[ARGC])) {
2622 ind = getvar_s(incvar(V[ARGIND]));
2623 fname = getvar_s(findvar(iamarray(V[ARGV]), ind));
2624 if (fname && *fname && !is_assignment(fname))
2625 F = afopen(fname, "r");
2629 files_happen = TRUE;
2630 setvar_s(V[FILENAME], fname);
2635 int awk_main(int argc, char **argv)
2638 char *opt_F, *opt_v, *opt_W;
2644 static int from_file = FALSE;
2646 FILE *F, *stdfiles[3];
2647 static char * stdnames = "/dev/stdin\0/dev/stdout\0/dev/stderr";
2649 /* allocate global buffer */
2650 buf = xmalloc(MAXVARFMT+1);
2652 vhash = hash_init();
2653 ahash = hash_init();
2654 fdhash = hash_init();
2655 fnhash = hash_init();
2657 /* initialize variables */
2658 for (i=0; *vNames; i++) {
2659 V[i] = v = newvar(nextword(&vNames));
2660 if (*vValues != '\377')
2661 setvar_s(v, nextword(&vValues));
2665 if (*vNames == '*') {
2666 v->type |= VF_SPECIAL;
2671 handle_special(V[FS]);
2672 handle_special(V[RS]);
2674 stdfiles[0] = stdin;
2675 stdfiles[1] = stdout;
2676 stdfiles[2] = stderr;
2677 for (i=0; i<3; i++) {
2678 rsm = newfile(nextword(&stdnames));
2679 rsm->F = stdfiles[i];
2682 for (envp=environ; *envp; envp++) {
2684 s1 = strchr(s, '=');
2689 setvar_u(findvar(iamarray(V[ENVIRON]), s), s1);
2694 opt = getopt32(argc, argv, "F:v:f:W:", &opt_F, &opt_v, &programname, &opt_W);
2695 if (opt & 0x1) setvar_s(V[FS], opt_F); // -F
2696 if (opt & 0x2) if (!is_assignment(opt_v)) bb_show_usage(); // -v
2697 if (opt & 0x4) { // -f
2699 F = afopen(programname, "r");
2701 /* one byte is reserved for some trick in next_token */
2702 if (fseek(F, 0, SEEK_END) == 0) {
2704 s = (char *)xmalloc(flen+4);
2705 fseek(F, 0, SEEK_SET);
2706 i = 1 + fread(s+1, 1, flen, F);
2708 for (i=j=1; j>0; i+=j) {
2709 s = (char *)xrealloc(s, i+4096);
2710 j = fread(s+i, 1, 4094, F);
2718 if (opt & 0x8) // -W
2719 bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
2724 programname = "cmd. line";
2725 parse_program(argv[optind++]);
2729 /* fill in ARGV array */
2730 setvar_i(V[ARGC], argc - optind + 1);
2731 setari_u(V[ARGV], 0, "awk");
2732 for(i=optind; i < argc; i++)
2733 setari_u(V[ARGV], i+1-optind, argv[i]);
2735 evaluate(beginseq.first, &tv);
2736 if (! mainseq.first && ! endseq.first)
2737 awk_exit(EXIT_SUCCESS);
2739 /* input file could already be opened in BEGIN block */
2740 if (! iF) iF = next_input_file();
2742 /* passing through input files */
2746 setvar_i(V[FNR], 0);
2748 while ((c = awk_getline(iF, V[F0])) > 0) {
2753 evaluate(mainseq.first, &tv);
2760 runtime_error(strerror(errno));
2762 iF = next_input_file();
2766 awk_exit(EXIT_SUCCESS);