1 /* vi: set sw=4 ts=4: */
3 * awk implementation for busybox
5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
41 #define VF_NUMBER 0x0001 /* 1 = primary type is number */
42 #define VF_ARRAY 0x0002 /* 1 = it's an array */
44 #define VF_CACHED 0x0100 /* 1 = num/str value has cached str/num eq */
45 #define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
46 #define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
47 #define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
48 #define VF_FSTR 0x1000 /* 1 = string points to fstring buffer */
49 #define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
50 #define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
52 /* these flags are static, don't change them when value is changed */
53 #define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
56 typedef struct var_s {
57 unsigned short type; /* flags */
61 int aidx; /* func arg index (on compilation stage) */
62 struct xhash_s *array; /* array ptr */
63 struct var_s *parent; /* for func args, ptr to actual parameter */
64 char **walker; /* list of array elements (for..in) */
68 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
69 typedef struct chain_s {
76 typedef struct func_s {
82 typedef struct rstream_s {
87 unsigned short is_pipe;
90 typedef struct hash_item_s {
92 struct var_s v; /* variable/array hash */
93 struct rstream_s rs; /* redirect streams hash */
94 struct func_s f; /* functions hash */
96 struct hash_item_s *next; /* next in chain */
97 char name[1]; /* really it's longer */
100 typedef struct xhash_s {
101 unsigned int nel; /* num of elements */
102 unsigned int csize; /* current hash size */
103 unsigned int nprime; /* next hash size in PRIMES[] */
104 unsigned int glen; /* summary length of item names */
105 struct hash_item_s **items;
109 typedef struct node_s {
111 unsigned short lineno;
130 /* Block of temporary variables */
131 typedef struct nvblock_s {
134 struct nvblock_s *prev;
135 struct nvblock_s *next;
139 typedef struct tsplitter_s {
144 /* simple token classes */
145 /* Order and hex values are very important!!! See next_token() */
146 #define TC_SEQSTART 1 /* ( */
147 #define TC_SEQTERM (1 << 1) /* ) */
148 #define TC_REGEXP (1 << 2) /* /.../ */
149 #define TC_OUTRDR (1 << 3) /* | > >> */
150 #define TC_UOPPOST (1 << 4) /* unary postfix operator */
151 #define TC_UOPPRE1 (1 << 5) /* unary prefix operator */
152 #define TC_BINOPX (1 << 6) /* two-opnd operator */
153 #define TC_IN (1 << 7)
154 #define TC_COMMA (1 << 8)
155 #define TC_PIPE (1 << 9) /* input redirection pipe */
156 #define TC_UOPPRE2 (1 << 10) /* unary prefix operator */
157 #define TC_ARRTERM (1 << 11) /* ] */
158 #define TC_GRPSTART (1 << 12) /* { */
159 #define TC_GRPTERM (1 << 13) /* } */
160 #define TC_SEMICOL (1 << 14)
161 #define TC_NEWLINE (1 << 15)
162 #define TC_STATX (1 << 16) /* ctl statement (for, next...) */
163 #define TC_WHILE (1 << 17)
164 #define TC_ELSE (1 << 18)
165 #define TC_BUILTIN (1 << 19)
166 #define TC_GETLINE (1 << 20)
167 #define TC_FUNCDECL (1 << 21) /* `function' `func' */
168 #define TC_BEGIN (1 << 22)
169 #define TC_END (1 << 23)
170 #define TC_EOF (1 << 24)
171 #define TC_VARIABLE (1 << 25)
172 #define TC_ARRAY (1 << 26)
173 #define TC_FUNCTION (1 << 27)
174 #define TC_STRING (1 << 28)
175 #define TC_NUMBER (1 << 29)
177 #define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
179 /* combined token classes */
180 #define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
181 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
182 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION | \
183 TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
185 #define TC_STATEMNT (TC_STATX | TC_WHILE)
186 #define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
188 /* word tokens, cannot mean something else if not expected */
189 #define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN | \
190 TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
192 /* discard newlines after these */
193 #define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM | \
194 TC_BINOP | TC_OPTERM)
196 /* what can expression begin with */
197 #define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
198 /* what can group begin with */
199 #define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
201 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
202 /* operator is inserted between them */
203 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM | \
204 TC_STRING | TC_NUMBER | TC_UOPPOST)
205 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
207 #define OF_RES1 0x010000
208 #define OF_RES2 0x020000
209 #define OF_STR1 0x040000
210 #define OF_STR2 0x080000
211 #define OF_NUM1 0x100000
212 #define OF_CHECKED 0x200000
214 /* combined operator flags */
217 #define xS (OF_RES2 | OF_STR2)
219 #define VV (OF_RES1 | OF_RES2)
220 #define Nx (OF_RES1 | OF_NUM1)
221 #define NV (OF_RES1 | OF_NUM1 | OF_RES2)
222 #define Sx (OF_RES1 | OF_STR1)
223 #define SV (OF_RES1 | OF_STR1 | OF_RES2)
224 #define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
226 #define OPCLSMASK 0xFF00
227 #define OPNMASK 0x007F
229 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
230 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
231 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
233 #define P(x) (x << 24)
234 #define PRIMASK 0x7F000000
235 #define PRIMASK2 0x7E000000
237 /* Operation classes */
239 #define SHIFT_TIL_THIS 0x0600
240 #define RECUR_FROM_THIS 0x1000
243 OC_DELETE=0x0100, OC_EXEC=0x0200, OC_NEWSOURCE=0x0300,
244 OC_PRINT=0x0400, OC_PRINTF=0x0500, OC_WALKINIT=0x0600,
246 OC_BR=0x0700, OC_BREAK=0x0800, OC_CONTINUE=0x0900,
247 OC_EXIT=0x0a00, OC_NEXT=0x0b00, OC_NEXTFILE=0x0c00,
248 OC_TEST=0x0d00, OC_WALKNEXT=0x0e00,
250 OC_BINARY=0x1000, OC_BUILTIN=0x1100, OC_COLON=0x1200,
251 OC_COMMA=0x1300, OC_COMPARE=0x1400, OC_CONCAT=0x1500,
252 OC_FBLTIN=0x1600, OC_FIELD=0x1700, OC_FNARG=0x1800,
253 OC_FUNC=0x1900, OC_GETLINE=0x1a00, OC_IN=0x1b00,
254 OC_LAND=0x1c00, OC_LOR=0x1d00, OC_MATCH=0x1e00,
255 OC_MOVE=0x1f00, OC_PGETLINE=0x2000, OC_REGEXP=0x2100,
256 OC_REPLACE=0x2200, OC_RETURN=0x2300, OC_SPRINTF=0x2400,
257 OC_TERNARY=0x2500, OC_UNARY=0x2600, OC_VAR=0x2700,
260 ST_IF=0x3000, ST_DO=0x3100, ST_FOR=0x3200,
264 /* simple builtins */
266 F_in=0, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
267 F_ti, F_le, F_sy, F_ff, F_cl
272 B_a2=0, B_ix, B_ma, B_sp, B_ss, B_ti, B_lo, B_up,
276 /* tokens and their corresponding info values */
278 #define NTC "\377" /* switch to next token class (tc<<1) */
281 #define OC_B OC_BUILTIN
283 static char * const tokenlist =
286 "\1/" NTC /* REGEXP */
287 "\2>>" "\1>" "\1|" NTC /* OUTRDR */
288 "\2++" "\2--" NTC /* UOPPOST */
289 "\2++" "\2--" "\1$" NTC /* UOPPRE1 */
290 "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */
291 "\2*=" "\2/=" "\2%=" "\2^="
292 "\1+" "\1-" "\3**=" "\2**"
293 "\1/" "\1%" "\1^" "\1*"
294 "\2!=" "\2>=" "\2<=" "\1>"
295 "\1<" "\2!~" "\1~" "\2&&"
296 "\2||" "\1?" "\1:" NTC
300 "\1+" "\1-" "\1!" NTC /* UOPPRE2 */
306 "\2if" "\2do" "\3for" "\5break" /* STATX */
307 "\10continue" "\6delete" "\5print"
308 "\6printf" "\4next" "\10nextfile"
309 "\6return" "\4exit" NTC
313 "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */
314 "\3cos" "\3exp" "\3int" "\3log"
315 "\4rand" "\3sin" "\4sqrt" "\5srand"
316 "\6gensub" "\4gsub" "\5index" "\6length"
317 "\5match" "\5split" "\7sprintf" "\3sub"
318 "\6substr" "\7systime" "\10strftime"
319 "\7tolower" "\7toupper" NTC
321 "\4func" "\10function" NTC
326 static unsigned long tokeninfo[] = {
331 xS|'a', xS|'w', xS|'|',
332 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
333 OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M',
335 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74),
336 OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
337 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/',
338 OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
339 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-',
340 OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
341 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%',
342 OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
343 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3,
344 OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
345 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!',
346 OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
347 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?',
348 OC_COLON|xx|P(67)|':',
351 OC_PGETLINE|SV|P(37),
352 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-',
353 OC_UNARY|xV|P(19)|'!',
359 ST_IF, ST_DO, ST_FOR, OC_BREAK,
360 OC_CONTINUE, OC_DELETE|Vx, OC_PRINT,
361 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
362 OC_RETURN|Vx, OC_EXIT|Nx,
366 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
367 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
368 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
369 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
370 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
371 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b),
372 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
379 /* internal variable names and their initial values */
380 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
382 CONVFMT=0, OFMT, FS, OFS,
383 ORS, RS, RT, FILENAME,
384 SUBSEP, ARGIND, ARGC, ARGV,
387 ENVIRON, F0, _intvarcount_
390 static char * vNames =
391 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
392 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
393 "SUBSEP\0" "ARGIND\0" "ARGC\0" "ARGV\0"
395 "NR\0" "NF\0*" "IGNORECASE\0*"
396 "ENVIRON\0" "$\0*" "\0";
398 static char * vValues =
399 "%.6g\0" "%.6g\0" " \0" " \0"
400 "\n\0" "\n\0" "\0" "\0"
404 /* hash size may grow to these values */
405 #define FIRST_PRIME 61;
406 static const unsigned int PRIMES[] = { 251, 1021, 4093, 16381, 65521 };
407 static const unsigned int NPRIMES = sizeof(PRIMES) / sizeof(unsigned int);
411 extern char **environ;
413 static var * V[_intvarcount_];
414 static chain beginseq, mainseq, endseq, *seq;
415 static int nextrec, nextfile;
416 static node *break_ptr, *continue_ptr;
418 static xhash *vhash, *ahash, *fdhash, *fnhash;
419 static char *programname;
421 static int is_f0_split;
422 static int nfields = 0;
423 static var *Fields = NULL;
424 static tsplitter fsplitter, rsplitter;
425 static nvblock *cb = NULL;
428 static int icase = FALSE;
431 unsigned long tclass;
439 /* function prototypes */
440 extern void xregcomp(regex_t *preg, const char *regex, int cflags);
441 static void handle_special(var *);
442 static node *parse_expr(unsigned long);
443 static void chain_group(void);
444 static var *evaluate(node *, var *);
445 static rstream *next_input_file(void);
446 static int fmt_num(char *, int, char *, double, int);
447 static int awk_exit(int);
449 /* ---- error handling ---- */
451 static const char EMSG_INTERNAL_ERROR[] = "Internal error";
452 static const char EMSG_UNEXP_EOS[] = "Unexpected end of string";
453 static const char EMSG_UNEXP_TOKEN[] = "Unexpected token";
454 static const char EMSG_DIV_BY_ZERO[] = "Division by zero";
455 static const char EMSG_INV_FMT[] = "Invalid format specifier";
456 static const char EMSG_TOO_FEW_ARGS[] = "Too few arguments for builtin";
457 static const char EMSG_NOT_ARRAY[] = "Not an array";
458 static const char EMSG_POSSIBLE_ERROR[] = "Possible syntax error";
459 static const char EMSG_UNDEF_FUNC[] = "Call to undefined function";
460 #ifndef CONFIG_FEATURE_AWK_MATH
461 static const char EMSG_NO_MATH[] = "Math support is not compiled in";
464 static void syntax_error(const char * const message)
466 bb_error_msg("%s:%i: %s", programname, lineno, message);
470 #define runtime_error(x) syntax_error(x)
473 /* ---- hash stuff ---- */
475 static unsigned int hashidx(char *name) {
477 register unsigned int idx=0;
479 while (*name) idx = *name++ + (idx << 6) - idx;
483 /* create new hash */
484 static xhash *hash_init(void) {
488 newhash = (xhash *)xcalloc(1, sizeof(xhash));
489 newhash->csize = FIRST_PRIME;
490 newhash->items = (hash_item **)xcalloc(newhash->csize, sizeof(hash_item *));
495 /* find item in hash, return ptr to data, NULL if not found */
496 static void *hash_search(xhash *hash, char *name) {
500 hi = hash->items [ hashidx(name) % hash->csize ];
502 if (strcmp(hi->name, name) == 0)
509 /* grow hash if it becomes too big */
510 static void hash_rebuild(xhash *hash) {
512 unsigned int newsize, i, idx;
513 hash_item **newitems, *hi, *thi;
515 if (hash->nprime == NPRIMES)
518 newsize = PRIMES[hash->nprime++];
519 newitems = (hash_item **)xcalloc(newsize, sizeof(hash_item *));
521 for (i=0; i<hash->csize; i++) {
526 idx = hashidx(thi->name) % newsize;
527 thi->next = newitems[idx];
533 hash->csize = newsize;
534 hash->items = newitems;
537 /* find item in hash, add it if necessary. Return ptr to data */
538 static void *hash_find(xhash *hash, char *name) {
544 hi = hash_search(hash, name);
546 if (++hash->nel / hash->csize > 10)
549 l = bb_strlen(name) + 1;
550 hi = xcalloc(sizeof(hash_item) + l, 1);
551 memcpy(hi->name, name, l);
553 idx = hashidx(name) % hash->csize;
554 hi->next = hash->items[idx];
555 hash->items[idx] = hi;
561 #define findvar(hash, name) (var *) hash_find ( (hash) , (name) )
562 #define newvar(name) (var *) hash_find ( vhash , (name) )
563 #define newfile(name) (rstream *) hash_find ( fdhash , (name) )
564 #define newfunc(name) (func *) hash_find ( fnhash , (name) )
566 static void hash_remove(xhash *hash, char *name) {
568 hash_item *hi, **phi;
570 phi = &(hash->items[ hashidx(name) % hash->csize ]);
573 if (strcmp(hi->name, name) == 0) {
574 hash->glen -= (bb_strlen(name) + 1);
584 /* ------ some useful functions ------ */
586 static void skip_spaces(char **s) {
588 register char *p = *s;
590 while(*p == ' ' || *p == '\t' ||
591 (*p == '\\' && *(p+1) == '\n' && (++p, ++t.lineno))) {
597 static char *nextword(char **s) {
599 register char *p = *s;
606 static char nextchar(char **s) {
608 register char c, *pps;
612 if (c == '\\') c = bb_process_escape_sequence((const char**)s);
613 if (c == '\\' && *s == pps) c = *((*s)++);
617 static inline int isalnum_(int c) {
619 return (isalnum(c) || c == '_');
622 static FILE *afopen(const char *path, const char *mode) {
624 return (*path == '-' && *(path+1) == '\0') ? stdin : bb_xfopen(path, mode);
627 /* -------- working with variables (set/get/copy/etc) -------- */
629 static xhash *iamarray(var *v) {
633 while (a->type & VF_CHILD)
636 if (! (a->type & VF_ARRAY)) {
638 a->x.array = hash_init();
643 static void clear_array(xhash *array) {
648 for (i=0; i<array->csize; i++) {
649 hi = array->items[i];
653 free(thi->data.v.string);
656 array->items[i] = NULL;
658 array->glen = array->nel = 0;
661 /* clear a variable */
662 static var *clrvar(var *v) {
664 if (!(v->type & VF_FSTR))
667 v->type &= VF_DONTTOUCH;
673 /* assign string value to variable */
674 static var *setvar_p(var *v, char *value) {
683 /* same as setvar_p but make a copy of string */
684 static var *setvar_s(var *v, char *value) {
686 return setvar_p(v, (value && *value) ? bb_xstrdup(value) : NULL);
689 /* same as setvar_s but set USER flag */
690 static var *setvar_u(var *v, char *value) {
697 /* set array element to user string */
698 static void setari_u(var *a, int idx, char *s) {
701 static char sidx[12];
703 sprintf(sidx, "%d", idx);
704 v = findvar(iamarray(a), sidx);
708 /* assign numeric value to variable */
709 static var *setvar_i(var *v, double value) {
712 v->type |= VF_NUMBER;
718 static char *getvar_s(var *v) {
720 /* if v is numeric and has no cached string, convert it to string */
721 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
722 fmt_num(buf, MAXVARFMT, getvar_s(V[CONVFMT]), v->number, TRUE);
723 v->string = bb_xstrdup(buf);
724 v->type |= VF_CACHED;
726 return (v->string == NULL) ? "" : v->string;
729 static double getvar_i(var *v) {
733 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
737 v->number = strtod(s, &s);
738 if (v->type & VF_USER) {
746 v->type |= VF_CACHED;
751 static var *copyvar(var *dest, var *src) {
755 dest->type |= (src->type & ~VF_DONTTOUCH);
756 dest->number = src->number;
758 dest->string = bb_xstrdup(src->string);
760 handle_special(dest);
764 static var *incvar(var *v) {
766 return setvar_i(v, getvar_i(v)+1.);
769 /* return true if v is number or numeric string */
770 static int is_numeric(var *v) {
773 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
776 /* return 1 when value of v corresponds to true, 0 otherwise */
777 static int istrue(var *v) {
780 return (v->number == 0) ? 0 : 1;
782 return (v->string && *(v->string)) ? 1 : 0;
785 /* temporary varables allocator. Last allocated should be first freed */
786 static var *nvalloc(int n) {
794 if ((cb->pos - cb->nv) + n <= cb->size) break;
799 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
800 cb = (nvblock *)xmalloc(sizeof(nvblock) + size * sizeof(var));
805 if (pb) pb->next = cb;
811 while (v < cb->pos) {
820 static void nvfree(var *v) {
824 if (v < cb->nv || v >= cb->pos)
825 runtime_error(EMSG_INTERNAL_ERROR);
827 for (p=v; p<cb->pos; p++) {
828 if ((p->type & (VF_ARRAY|VF_CHILD)) == VF_ARRAY) {
829 clear_array(iamarray(p));
830 free(p->x.array->items);
833 if (p->type & VF_WALK)
840 while (cb->prev && cb->pos == cb->nv) {
845 /* ------- awk program text parsing ------- */
847 /* Parse next token pointed by global pos, place results into global t.
848 * If token isn't expected, give away. Return token class
850 static unsigned long next_token(unsigned long expected) {
854 unsigned long tc, *ti;
856 static int concat_inserted = FALSE;
857 static unsigned long save_tclass, save_info;
858 static unsigned long ltclass = TC_OPTERM;
864 } else if (concat_inserted) {
866 concat_inserted = FALSE;
867 t.tclass = save_tclass;
878 while (*p != '\n' && *p != '\0') p++;
886 } else if (*p == '\"') {
890 if (*p == '\0' || *p == '\n')
891 syntax_error(EMSG_UNEXP_EOS);
892 *(s++) = nextchar(&p);
898 } else if ((expected & TC_REGEXP) && *p == '/') {
902 if (*p == '\0' || *p == '\n')
903 syntax_error(EMSG_UNEXP_EOS);
904 if ((*s++ = *p++) == '\\') {
906 *(s-1) = bb_process_escape_sequence((const char **)&p);
907 if (*pp == '\\') *s++ = '\\';
908 if (p == pp) *s++ = *p++;
915 } else if (*p == '.' || isdigit(*p)) {
917 t.number = strtod(p, &p);
919 syntax_error(EMSG_UNEXP_TOKEN);
923 /* search for something known */
933 /* if token class is expected, token
934 * matches and it's not a longer word,
935 * then this is what we are looking for
937 if ((tc & (expected | TC_WORD | TC_NEWLINE)) &&
938 *tl == *p && strncmp(p, tl, l) == 0 &&
939 !((tc & TC_WORD) && isalnum_(*(p + l)))) {
949 /* it's a name (var/array/function),
950 * otherwise it's something wrong
953 syntax_error(EMSG_UNEXP_TOKEN);
956 while(isalnum_(*(++p))) {
974 /* skipping newlines in some cases */
975 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
978 /* insert concatenation operator when needed */
979 if ((ltclass&TC_CONCAT1) && (tc&TC_CONCAT2) && (expected&TC_BINOP)) {
980 concat_inserted = TRUE;
984 t.info = OC_CONCAT | SS | P(35);
991 /* Are we ready for this? */
992 if (! (ltclass & expected))
993 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
994 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
999 static void rollback_token(void) { t.rollback = TRUE; }
1001 static node *new_node(unsigned long info) {
1005 n = (node *)xcalloc(sizeof(node), 1);
1011 static node *mk_re_node(char *s, node *n, regex_t *re) {
1013 n->info = OC_REGEXP;
1016 xregcomp(re, s, REG_EXTENDED);
1017 xregcomp(re+1, s, REG_EXTENDED | REG_ICASE);
1022 static node *condition(void) {
1024 next_token(TC_SEQSTART);
1025 return parse_expr(TC_SEQTERM);
1028 /* parse expression terminated by given argument, return ptr
1029 * to built subtree. Terminator is eaten by parse_expr */
1030 static node *parse_expr(unsigned long iexp) {
1035 unsigned long tc, xtc;
1039 sn.r.n = glptr = NULL;
1040 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1042 while (! ((tc = next_token(xtc)) & iexp)) {
1043 if (glptr && (t.info == (OC_COMPARE|VV|P(39)|2))) {
1044 /* input redirection (<) attached to glptr node */
1045 cn = glptr->l.n = new_node(OC_CONCAT|SS|P(37));
1046 xtc = TC_OPERAND | TC_UOPPRE;
1049 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1050 /* for binary and postfix-unary operators, jump back over
1051 * previous operators with higher priority */
1053 while ( ((t.info & PRIMASK) > (vn->a.n->info & PRIMASK2)) ||
1054 ((t.info == vn->info) && ((t.info & OPCLSMASK) == OC_COLON)) )
1056 if ((t.info & OPCLSMASK) == OC_TERNARY)
1058 cn = vn->a.n->r.n = new_node(t.info);
1060 if (tc & TC_BINOP) {
1062 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1063 if ((t.info & OPCLSMASK) == OC_PGETLINE) {
1065 next_token(TC_GETLINE);
1066 /* give maximum priority to this pipe */
1067 cn->info &= ~PRIMASK;
1068 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1072 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1077 /* for operands and prefix-unary operators, attach them
1080 cn = vn->r.n = new_node(t.info);
1082 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1083 if (tc & (TC_OPERAND | TC_REGEXP)) {
1084 xtc = TC_UOPPRE | TC_BINOP | TC_OPERAND | iexp;
1085 /* one should be very careful with switch on tclass -
1086 * only simple tclasses should be used! */
1091 if ((v = hash_search(ahash, t.string)) != NULL) {
1092 cn->info = OC_FNARG;
1093 cn->l.i = v->x.aidx;
1095 cn->l.v = newvar(t.string);
1097 if (tc & TC_ARRAY) {
1099 cn->r.n = parse_expr(TC_ARRTERM);
1101 xtc = TC_UOPPOST | TC_UOPPRE | TC_BINOP | TC_OPERAND | iexp;
1107 v = cn->l.v = xcalloc(sizeof(var), 1);
1109 setvar_i(v, t.number);
1111 setvar_s(v, t.string);
1115 mk_re_node(t.string, cn,
1116 (regex_t *)xcalloc(sizeof(regex_t),2));
1121 cn->r.f = newfunc(t.string);
1122 cn->l.n = condition();
1126 cn = vn->r.n = parse_expr(TC_SEQTERM);
1132 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1136 cn->l.n = condition();
1145 /* add node to chain. Return ptr to alloc'd node */
1146 static node *chain_node(unsigned long info) {
1151 seq->first = seq->last = new_node(0);
1153 if (seq->programname != programname) {
1154 seq->programname = programname;
1155 n = chain_node(OC_NEWSOURCE);
1156 n->l.s = bb_xstrdup(programname);
1161 seq->last = n->a.n = new_node(OC_DONE);
1166 static void chain_expr(unsigned long info) {
1170 n = chain_node(info);
1171 n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1172 if (t.tclass & TC_GRPTERM)
1176 static node *chain_loop(node *nn) {
1178 node *n, *n2, *save_brk, *save_cont;
1180 save_brk = break_ptr;
1181 save_cont = continue_ptr;
1183 n = chain_node(OC_BR | Vx);
1184 continue_ptr = new_node(OC_EXEC);
1185 break_ptr = new_node(OC_EXEC);
1187 n2 = chain_node(OC_EXEC | Vx);
1190 continue_ptr->a.n = n2;
1191 break_ptr->a.n = n->r.n = seq->last;
1193 continue_ptr = save_cont;
1194 break_ptr = save_brk;
1199 /* parse group and attach it to chain */
1200 static void chain_group(void) {
1206 c = next_token(TC_GRPSEQ);
1207 } while (c & TC_NEWLINE);
1209 if (c & TC_GRPSTART) {
1210 while(next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1214 } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1216 chain_expr(OC_EXEC | Vx);
1217 } else { /* TC_STATEMNT */
1218 switch (t.info & OPCLSMASK) {
1220 n = chain_node(OC_BR | Vx);
1221 n->l.n = condition();
1223 n2 = chain_node(OC_EXEC);
1225 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE)==TC_ELSE) {
1227 n2->a.n = seq->last;
1235 n = chain_loop(NULL);
1240 n2 = chain_node(OC_EXEC);
1241 n = chain_loop(NULL);
1243 next_token(TC_WHILE);
1244 n->l.n = condition();
1248 next_token(TC_SEQSTART);
1249 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1250 if (t.tclass & TC_SEQTERM) { /* for-in */
1251 if ((n2->info & OPCLSMASK) != OC_IN)
1252 syntax_error(EMSG_UNEXP_TOKEN);
1253 n = chain_node(OC_WALKINIT | VV);
1256 n = chain_loop(NULL);
1257 n->info = OC_WALKNEXT | Vx;
1259 } else { /* for(;;) */
1260 n = chain_node(OC_EXEC | Vx);
1262 n2 = parse_expr(TC_SEMICOL);
1263 n3 = parse_expr(TC_SEQTERM);
1273 n = chain_node(t.info);
1274 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1275 if (t.tclass & TC_OUTRDR) {
1277 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1279 if (t.tclass & TC_GRPTERM)
1284 n = chain_node(OC_EXEC);
1289 n = chain_node(OC_EXEC);
1290 n->a.n = continue_ptr;
1293 /* delete, next, nextfile, return, exit */
1301 static void parse_program(char *p) {
1303 unsigned long tclass;
1310 while((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1311 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1313 if (tclass & TC_OPTERM)
1317 if (tclass & TC_BEGIN) {
1321 } else if (tclass & TC_END) {
1325 } else if (tclass & TC_FUNCDECL) {
1326 next_token(TC_FUNCTION);
1328 f = newfunc(t.string);
1329 f->body.first = NULL;
1331 while(next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1332 v = findvar(ahash, t.string);
1333 v->x.aidx = (f->nargs)++;
1335 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1342 } else if (tclass & TC_OPSEQ) {
1344 cn = chain_node(OC_TEST);
1345 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1346 if (t.tclass & TC_GRPSTART) {
1350 chain_node(OC_PRINT);
1352 cn->r.n = mainseq.last;
1354 } else /* if (tclass & TC_GRPSTART) */ {
1362 /* -------- program execution part -------- */
1364 static node *mk_splitter(char *s, tsplitter *spl) {
1366 register regex_t *re, *ire;
1372 if ((n->info && OPCLSMASK) == OC_REGEXP) {
1376 if (bb_strlen(s) > 1) {
1377 mk_re_node(s, n, re);
1379 n->info = (unsigned long) *s;
1385 /* use node as a regular expression. Supplied with node ptr and regex_t
1386 * storage space. Return ptr to regex (if result points to preg, it shuold
1387 * be later regfree'd manually
1389 static regex_t *as_regex(node *op, regex_t *preg) {
1394 if ((op->info & OPCLSMASK) == OC_REGEXP) {
1395 return icase ? op->r.ire : op->l.re;
1398 s = getvar_s(evaluate(op, v));
1399 xregcomp(preg, s, icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED);
1405 /* gradually increasing buffer */
1406 static void qrealloc(char **b, int n, int *size) {
1408 if (! *b || n >= *size)
1409 *b = xrealloc(*b, *size = n + (n>>1) + 80);
1412 /* resize field storage space */
1413 static void fsrealloc(int size) {
1415 static int maxfields = 0;
1418 if (size >= maxfields) {
1420 maxfields = size + 16;
1421 Fields = (var *)xrealloc(Fields, maxfields * sizeof(var));
1422 for (; i<maxfields; i++) {
1423 Fields[i].type = VF_SPECIAL;
1424 Fields[i].string = NULL;
1428 if (size < nfields) {
1429 for (i=size; i<nfields; i++) {
1436 static int awk_split(char *s, node *spl, char **slist) {
1441 regmatch_t pmatch[2];
1443 /* in worst case, each char would be a separate field */
1444 *slist = s1 = bb_xstrndup(s, bb_strlen(s) * 2 + 3);
1446 c[0] = c[1] = (char)spl->info;
1448 if (*getvar_s(V[RS]) == '\0') c[2] = '\n';
1450 if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */
1452 l = strcspn(s, c+2);
1453 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0 &&
1454 pmatch[0].rm_so <= l) {
1455 l = pmatch[0].rm_so;
1456 if (pmatch[0].rm_eo == 0) { l++; pmatch[0].rm_eo++; }
1458 pmatch[0].rm_eo = l;
1459 if (*(s+l)) pmatch[0].rm_eo++;
1465 s += pmatch[0].rm_eo;
1468 } else if (c[0] == '\0') { /* null split */
1474 } else if (c[0] != ' ') { /* single-character split */
1476 c[0] = toupper(c[0]);
1477 c[1] = tolower(c[1]);
1480 while ((s1 = strpbrk(s1, c))) {
1484 } else { /* space split */
1486 while (isspace(*s)) s++;
1489 while (*s && !isspace(*s))
1497 static void split_f0(void) {
1499 static char *fstrings = NULL;
1509 n = awk_split(getvar_s(V[F0]), &fsplitter.n, &fstrings);
1512 for (i=0; i<n; i++) {
1513 Fields[i].string = nextword(&s);
1514 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1517 /* set NF manually to avoid side effects */
1519 V[NF]->type = VF_NUMBER | VF_SPECIAL;
1520 V[NF]->number = nfields;
1523 /* perform additional actions when some internal variables changed */
1524 static void handle_special(var *v) {
1528 int sl, l, len, i, bsize;
1530 if (! (v->type & VF_SPECIAL))
1534 n = (int)getvar_i(v);
1537 /* recalculate $0 */
1538 sep = getvar_s(V[OFS]);
1539 sl = bb_strlen(sep);
1542 for (i=0; i<n; i++) {
1543 s = getvar_s(&Fields[i]);
1546 memcpy(b+len, sep, sl);
1549 qrealloc(&b, len+l+sl, &bsize);
1550 memcpy(b+len, s, l);
1557 } else if (v == V[F0]) {
1558 is_f0_split = FALSE;
1560 } else if (v == V[FS]) {
1561 mk_splitter(getvar_s(v), &fsplitter);
1563 } else if (v == V[RS]) {
1564 mk_splitter(getvar_s(v), &rsplitter);
1566 } else if (v == V[IGNORECASE]) {
1570 n = getvar_i(V[NF]);
1571 setvar_i(V[NF], n > v-Fields ? n : v-Fields+1);
1572 /* right here v is invalid. Just to note... */
1576 /* step through func/builtin/etc arguments */
1577 static node *nextarg(node **pn) {
1582 if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1591 static void hashwalk_init(var *v, xhash *array) {
1597 if (v->type & VF_WALK)
1601 w = v->x.walker = (char **)xcalloc(2 + 2*sizeof(char *) + array->glen, 1);
1602 *w = *(w+1) = (char *)(w + 2);
1603 for (i=0; i<array->csize; i++) {
1604 hi = array->items[i];
1606 strcpy(*w, hi->name);
1613 static int hashwalk_next(var *v) {
1621 setvar_s(v, nextword(w+1));
1625 /* evaluate node, return 1 when result is true, 0 otherwise */
1626 static int ptest(node *pattern) {
1629 return istrue(evaluate(pattern, &v));
1632 /* read next record from stream rsm into a variable v */
1633 static int awk_getline(rstream *rsm, var *v) {
1636 regmatch_t pmatch[2];
1638 int fd, so, eo, r, rp;
1641 /* we're using our own buffer since we need access to accumulating
1644 fd = fileno(rsm->F);
1648 c = (char) rsplitter.n.info;
1651 qrealloc(&b, p+128, &size);
1655 if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1656 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1657 b, 1, pmatch, 0) == 0) {
1658 so = pmatch[0].rm_so;
1659 eo = pmatch[0].rm_eo;
1663 } else if (c != '\0') {
1664 s = strchr(b+pp, c);
1671 while (b[rp] == '\n')
1673 s = strstr(b+rp, "\n\n");
1676 while (b[eo] == '\n') eo++;
1684 p += safe_read(fd, b+p, size-p-1);
1688 setvar_i(V[ERRNO], errno);
1697 c = b[so]; b[so] = '\0';
1701 c = b[eo]; b[eo] = '\0';
1702 setvar_s(V[RT], b+so);
1707 if (p) memmove(b, (const void *)(b+eo), p+1);
1716 static int fmt_num(char *b, int size, char *format, double n, int int_as_int) {
1721 if (int_as_int && n == (int)n) {
1722 r = snprintf(b, size, "%d", (int)n);
1724 do { c = *s; } while (*s && *++s);
1725 if (strchr("diouxX", c)) {
1726 r = snprintf(b, size, format, (int)n);
1727 } else if (strchr("eEfgG", c)) {
1728 r = snprintf(b, size, format, n);
1730 runtime_error(EMSG_INV_FMT);
1737 /* formatted output into an allocated buffer, return ptr to buffer */
1738 static char *awk_printf(node *n) {
1741 char *fmt, *s, *s1, *f;
1742 int i, j, incr, bsize;
1747 fmt = f = bb_xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1752 while (*f && (*f != '%' || *(++f) == '%'))
1754 while (*f && !isalpha(*f))
1757 incr = (f - s) + MAXVARFMT;
1758 qrealloc(&b, incr+i, &bsize);
1759 c = *f; if (c != '\0') f++;
1760 c1 = *f ; *f = '\0';
1761 arg = evaluate(nextarg(&n), v);
1764 if (c == 'c' || !c) {
1765 i += sprintf(b+i, s,
1766 is_numeric(arg) ? (char)getvar_i(arg) : *getvar_s(arg));
1768 } else if (c == 's') {
1770 qrealloc(&b, incr+i+bb_strlen(s1), &bsize);
1771 i += sprintf(b+i, s, s1);
1774 i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1778 /* if there was an error while sprintf, return value is negative */
1783 b = xrealloc(b, i+1);
1790 /* common substitution routine
1791 * replace (nm) substring of (src) that match (n) with (repl), store
1792 * result into (dest), return number of substitutions. If nm=0, replace
1793 * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1794 * subexpression matching (\1-\9)
1796 static int awk_sub(node *rn, char *repl, int nm, var *src, var *dest, int ex) {
1800 int c, i, j, di, rl, so, eo, nbs, n, dssize;
1801 regmatch_t pmatch[10];
1804 re = as_regex(rn, &sreg);
1805 if (! src) src = V[F0];
1806 if (! dest) dest = V[F0];
1810 rl = bb_strlen(repl);
1811 while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0:REG_NOTBOL) == 0) {
1812 so = pmatch[0].rm_so;
1813 eo = pmatch[0].rm_eo;
1815 qrealloc(&ds, di + eo + rl, &dssize);
1816 memcpy(ds + di, sp, eo);
1822 for (s = repl; *s; s++) {
1828 if (c == '&' || (ex && c >= '0' && c <= '9')) {
1829 di -= ((nbs + 3) >> 1);
1838 n = pmatch[j].rm_eo - pmatch[j].rm_so;
1839 qrealloc(&ds, di + rl + n, &dssize);
1840 memcpy(ds + di, sp + pmatch[j].rm_so, n);
1851 if (! (ds[di++] = *sp++)) break;
1855 qrealloc(&ds, di + strlen(sp), &dssize);
1856 strcpy(ds + di, sp);
1858 if (re == &sreg) regfree(re);
1862 static var *exec_builtin(node *op, var *res) {
1869 regmatch_t pmatch[2];
1871 static tsplitter tspl;
1873 unsigned long isr, info;
1880 isr = info = op->info;
1883 av[2] = av[3] = NULL;
1884 for (i=0 ; i<4 && op ; i++) {
1885 an[i] = nextarg(&op);
1886 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
1887 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
1892 if (nargs < (info >> 30))
1893 runtime_error(EMSG_TOO_FEW_ARGS);
1895 switch (info & OPNMASK) {
1898 #ifdef CONFIG_FEATURE_AWK_MATH
1899 setvar_i(res, atan2(getvar_i(av[i]), getvar_i(av[1])));
1901 runtime_error(EMSG_NO_MATH);
1907 spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
1908 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
1913 n = awk_split(as[0], spl, &s);
1915 clear_array(iamarray(av[1]));
1916 for (i=1; i<=n; i++)
1917 setari_u(av[1], i, nextword(&s1));
1923 l = bb_strlen(as[0]);
1924 i = getvar_i(av[1]) - 1;
1925 if (i>l) i=l; if (i<0) i=0;
1926 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
1929 strncpy(s, as[0]+i, n);
1941 s1 = s = bb_xstrdup(as[0]);
1943 *s1 = (*to_xxx)(*s1);
1951 ll = bb_strlen(as[1]);
1952 l = bb_strlen(as[0]) - ll;
1953 if (ll > 0 && l >= 0) {
1955 s = strstr(as[0], as[1]);
1956 if (s) n = (s - as[0]) + 1;
1958 /* this piece of code is terribly slow and
1959 * really should be rewritten
1961 for (i=0; i<=l; i++) {
1962 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
1974 tt = getvar_i(av[1]);
1977 s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
1978 i = strftime(buf, MAXVARFMT, s, localtime(&tt));
1984 re = as_regex(an[1], &sreg);
1985 n = regexec(re, as[0], 1, pmatch, 0);
1990 pmatch[0].rm_so = 0;
1991 pmatch[0].rm_eo = -1;
1993 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
1994 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
1995 setvar_i(res, pmatch[0].rm_so);
1996 if (re == &sreg) regfree(re);
2000 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2004 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2008 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2017 * Evaluate node - the heart of the program. Supplied with subtree
2018 * and place where to store result. returns ptr to result.
2020 #define XC(n) ((n) >> 8)
2022 static var *evaluate(node *op, var *res) {
2024 /* This procedure is recursive so we should count every byte */
2025 static var *fnargs = NULL;
2026 static unsigned int seed = 1;
2027 static regex_t sreg;
2036 unsigned long opinfo;
2048 return setvar_s(res, NULL);
2055 opn = (short)(opinfo & OPNMASK);
2056 lineno = op->lineno;
2058 /* execute inevitable things */
2060 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2061 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2062 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2063 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2064 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2066 switch (XC(opinfo & OPCLSMASK)) {
2068 /* -- iterative node type -- */
2072 if ((op1->info & OPCLSMASK) == OC_COMMA) {
2073 /* it's range pattern */
2074 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2075 op->info |= OF_CHECKED;
2076 if (ptest(op1->r.n))
2077 op->info &= ~OF_CHECKED;
2084 op = (ptest(op1)) ? op->a.n : op->r.n;
2088 /* just evaluate an expression, also used as unconditional jump */
2092 /* branch, used in if-else and various loops */
2094 op = istrue(L.v) ? op->a.n : op->r.n;
2097 /* initialize for-in loop */
2098 case XC( OC_WALKINIT ):
2099 hashwalk_init(L.v, iamarray(R.v));
2102 /* get next array item */
2103 case XC( OC_WALKNEXT ):
2104 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2107 case XC( OC_PRINT ):
2108 case XC( OC_PRINTF ):
2111 X.rsm = newfile(R.s);
2114 if((X.rsm->F = popen(R.s, "w")) == NULL)
2115 bb_perror_msg_and_die("popen");
2118 X.rsm->F = bb_xfopen(R.s, opn=='w' ? "w" : "a");
2124 if ((opinfo & OPCLSMASK) == OC_PRINT) {
2126 fputs(getvar_s(V[F0]), X.F);
2129 L.v = evaluate(nextarg(&op1), v1);
2130 if (L.v->type & VF_NUMBER) {
2131 fmt_num(buf, MAXVARFMT, getvar_s(V[OFMT]),
2132 getvar_i(L.v), TRUE);
2135 fputs(getvar_s(L.v), X.F);
2138 if (op1) fputs(getvar_s(V[OFS]), X.F);
2141 fputs(getvar_s(V[ORS]), X.F);
2143 } else { /* OC_PRINTF */
2144 L.s = awk_printf(op1);
2151 case XC( OC_DELETE ):
2152 X.info = op1->info & OPCLSMASK;
2153 if (X.info == OC_VAR) {
2155 } else if (X.info == OC_FNARG) {
2156 R.v = &fnargs[op1->l.i];
2158 runtime_error(EMSG_NOT_ARRAY);
2163 L.s = getvar_s(evaluate(op1->r.n, v1));
2164 hash_remove(iamarray(R.v), L.s);
2166 clear_array(iamarray(R.v));
2170 case XC( OC_NEWSOURCE ):
2171 programname = op->l.s;
2174 case XC( OC_RETURN ):
2178 case XC( OC_NEXTFILE ):
2189 /* -- recursive node type -- */
2197 case XC( OC_FNARG ):
2198 L.v = &fnargs[op->l.i];
2201 res = (op->r.n) ? findvar(iamarray(L.v), R.s) : L.v;
2205 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2208 case XC( OC_REGEXP ):
2210 L.s = getvar_s(V[F0]);
2213 case XC( OC_MATCH ):
2216 X.re = as_regex(op1, &sreg);
2217 R.i = regexec(X.re, L.s, 0, NULL, 0);
2218 if (X.re == &sreg) regfree(X.re);
2219 setvar_i(res, (R.i == 0 ? 1 : 0) ^ (opn == '!' ? 1 : 0));
2223 /* if source is a temporary string, jusk relink it to dest */
2224 if (R.v == v1+1 && R.v->string) {
2225 res = setvar_p(L.v, R.v->string);
2228 res = copyvar(L.v, R.v);
2232 case XC( OC_TERNARY ):
2233 if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2234 runtime_error(EMSG_POSSIBLE_ERROR);
2235 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2239 if (! op->r.f->body.first)
2240 runtime_error(EMSG_UNDEF_FUNC);
2242 X.v = R.v = nvalloc(op->r.f->nargs+1);
2244 L.v = evaluate(nextarg(&op1), v1);
2246 R.v->type |= VF_CHILD;
2247 R.v->x.parent = L.v;
2248 if (++R.v - X.v >= op->r.f->nargs)
2256 res = evaluate(op->r.f->body.first, res);
2263 case XC( OC_GETLINE ):
2264 case XC( OC_PGETLINE ):
2266 X.rsm = newfile(L.s);
2268 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2269 X.rsm->F = popen(L.s, "r");
2270 X.rsm->is_pipe = TRUE;
2272 X.rsm->F = fopen(L.s, "r"); /* not bb_xfopen! */
2276 if (! iF) iF = next_input_file();
2281 setvar_i(V[ERRNO], errno);
2289 L.i = awk_getline(X.rsm, R.v);
2299 /* simple builtins */
2300 case XC( OC_FBLTIN ):
2308 R.d = (double)rand() / (double)RAND_MAX;
2311 #ifdef CONFIG_FEATURE_AWK_MATH
2337 runtime_error(EMSG_NO_MATH);
2343 seed = op1 ? (unsigned int)L.d : (unsigned int)time(NULL);
2353 L.s = getvar_s(V[F0]);
2354 R.d = bb_strlen(L.s);
2359 R.d = (L.s && *L.s) ? system(L.s) : 0;
2367 X.rsm = newfile(L.s);
2376 X.rsm = (rstream *)hash_search(fdhash, L.s);
2378 R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
2379 free(X.rsm->buffer);
2380 hash_remove(fdhash, L.s);
2383 setvar_i(V[ERRNO], errno);
2390 case XC( OC_BUILTIN ):
2391 res = exec_builtin(op, res);
2394 case XC( OC_SPRINTF ):
2395 setvar_p(res, awk_printf(op1));
2398 case XC( OC_UNARY ):
2400 L.d = R.d = getvar_i(R.v);
2415 L.d = istrue(X.v) ? 0 : 1;
2426 case XC( OC_FIELD ):
2427 R.i = (int)getvar_i(R.v);
2435 res = &Fields[R.i-1];
2439 /* concatenation (" ") and index joining (",") */
2440 case XC( OC_CONCAT ):
2441 case XC( OC_COMMA ):
2442 opn = bb_strlen(L.s) + bb_strlen(R.s) + 2;
2443 X.s = (char *)xmalloc(opn);
2445 if ((opinfo & OPCLSMASK) == OC_COMMA) {
2446 L.s = getvar_s(V[SUBSEP]);
2447 X.s = (char *)xrealloc(X.s, opn + bb_strlen(L.s));
2455 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2459 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2462 case XC( OC_BINARY ):
2463 case XC( OC_REPLACE ):
2464 R.d = getvar_i(R.v);
2476 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2480 #ifdef CONFIG_FEATURE_AWK_MATH
2481 L.d = pow(L.d, R.d);
2483 runtime_error(EMSG_NO_MATH);
2487 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2488 L.d -= (int)(L.d / R.d) * R.d;
2491 res = setvar_i(((opinfo&OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2494 case XC( OC_COMPARE ):
2495 if (is_numeric(L.v) && is_numeric(R.v)) {
2496 L.d = getvar_i(L.v) - getvar_i(R.v);
2498 L.s = getvar_s(L.v);
2499 R.s = getvar_s(R.v);
2500 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2502 switch (opn & 0xfe) {
2513 setvar_i(res, (opn & 0x1 ? R.i : !R.i) ? 1 : 0);
2517 runtime_error(EMSG_POSSIBLE_ERROR);
2519 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2521 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2531 /* -------- main & co. -------- */
2533 static int awk_exit(int r) {
2538 /* waiting for children */
2539 for (i=0; i<fdhash->csize; i++) {
2540 hi = fdhash->items[i];
2542 if (hi->data.rs.F && hi->data.rs.is_pipe)
2543 pclose(hi->data.rs.F);
2551 /* if expr looks like "var=value", perform assignment and return 1,
2552 * otherwise return 0 */
2553 static int is_assignment(char *expr) {
2555 char *exprc, *s, *s0, *s1;
2557 exprc = bb_xstrdup(expr);
2558 if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2566 *(s1++) = nextchar(&s);
2569 setvar_u(newvar(exprc), s0);
2574 /* switch to next input file */
2575 static rstream *next_input_file(void) {
2580 static int files_happen = FALSE;
2582 if (rsm.F) fclose(rsm.F);
2587 if (getvar_i(V[ARGIND])+1 >= getvar_i(V[ARGC])) {
2593 ind = getvar_s(incvar(V[ARGIND]));
2594 fname = getvar_s(findvar(iamarray(V[ARGV]), ind));
2595 if (fname && *fname && !is_assignment(fname))
2596 F = afopen(fname, "r");
2600 files_happen = TRUE;
2601 setvar_s(V[FILENAME], fname);
2606 extern int awk_main(int argc, char **argv) {
2613 static int from_file = FALSE;
2615 FILE *F, *stdfiles[3];
2616 static char * stdnames = "/dev/stdin\0/dev/stdout\0/dev/stderr";
2618 /* allocate global buffer */
2619 buf = xmalloc(MAXVARFMT+1);
2621 vhash = hash_init();
2622 ahash = hash_init();
2623 fdhash = hash_init();
2624 fnhash = hash_init();
2626 /* initialize variables */
2627 for (i=0; *vNames; i++) {
2628 V[i] = v = newvar(nextword(&vNames));
2629 if (*vValues != '\377')
2630 setvar_s(v, nextword(&vValues));
2634 if (*vNames == '*') {
2635 v->type |= VF_SPECIAL;
2640 handle_special(V[FS]);
2641 handle_special(V[RS]);
2643 stdfiles[0] = stdin;
2644 stdfiles[1] = stdout;
2645 stdfiles[2] = stderr;
2646 for (i=0; i<3; i++) {
2647 rsm = newfile(nextword(&stdnames));
2648 rsm->F = stdfiles[i];
2651 for (envp=environ; *envp; envp++) {
2652 s = bb_xstrdup(*envp);
2653 s1 = strchr(s, '=');
2655 setvar_u(findvar(iamarray(V[ENVIRON]), s), s1);
2659 while((c = getopt(argc, argv, "F:v:f:W:")) != EOF) {
2662 setvar_s(V[FS], optarg);
2665 if (! is_assignment(optarg))
2670 F = afopen(programname = optarg, "r");
2672 /* one byte is reserved for some trick in next_token */
2673 for (i=j=1; j>0; i+=j) {
2674 s = (char *)xrealloc(s, i+4096);
2675 j = fread(s+i, 1, 4094, F);
2683 bb_error_msg("Warning: unrecognized option '-W %s' ignored\n", optarg);
2694 programname="cmd. line";
2695 parse_program(argv[optind++]);
2699 /* fill in ARGV array */
2700 setvar_i(V[ARGC], argc - optind + 1);
2701 setari_u(V[ARGV], 0, "awk");
2702 for(i=optind; i < argc; i++)
2703 setari_u(V[ARGV], i+1-optind, argv[i]);
2705 evaluate(beginseq.first, &tv);
2706 if (! mainseq.first && ! endseq.first)
2707 awk_exit(EXIT_SUCCESS);
2709 /* input file could already be opened in BEGIN block */
2710 if (! iF) iF = next_input_file();
2712 /* passing through input files */
2716 setvar_i(V[FNR], 0);
2718 while ((c = awk_getline(iF, V[F0])) > 0) {
2723 evaluate(mainseq.first, &tv);
2730 runtime_error(strerror(errno));
2732 iF = next_input_file();
2736 evaluate(endseq.first, &tv);
2737 awk_exit(EXIT_SUCCESS);