1 /* vi: set sw=4 ts=4: */
3 * awk implementation for busybox
5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
41 #define VF_NUMBER 0x0001 /* 1 = primary type is number */
42 #define VF_ARRAY 0x0002 /* 1 = it's an array */
44 #define VF_CACHED 0x0100 /* 1 = num/str value has cached str/num eq */
45 #define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
46 #define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
47 #define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
48 #define VF_FSTR 0x1000 /* 1 = string points to fstring buffer */
49 #define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
50 #define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
52 /* these flags are static, don't change them when value is changed */
53 #define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
56 typedef struct var_s {
57 unsigned short type; /* flags */
61 int aidx; /* func arg index (on compilation stage) */
62 struct xhash_s *array; /* array ptr */
63 struct var_s *parent; /* for func args, ptr to actual parameter */
64 char **walker; /* list of array elements (for..in) */
68 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
69 typedef struct chain_s {
76 typedef struct func_s {
82 typedef struct rstream_s {
88 unsigned short is_pipe;
91 typedef struct hash_item_s {
93 struct var_s v; /* variable/array hash */
94 struct rstream_s rs; /* redirect streams hash */
95 struct func_s f; /* functions hash */
97 struct hash_item_s *next; /* next in chain */
98 char name[1]; /* really it's longer */
101 typedef struct xhash_s {
102 unsigned int nel; /* num of elements */
103 unsigned int csize; /* current hash size */
104 unsigned int nprime; /* next hash size in PRIMES[] */
105 unsigned int glen; /* summary length of item names */
106 struct hash_item_s **items;
110 typedef struct node_s {
112 unsigned short lineno;
131 /* Block of temporary variables */
132 typedef struct nvblock_s {
135 struct nvblock_s *prev;
136 struct nvblock_s *next;
140 typedef struct tsplitter_s {
145 /* simple token classes */
146 /* Order and hex values are very important!!! See next_token() */
147 #define TC_SEQSTART 1 /* ( */
148 #define TC_SEQTERM (1 << 1) /* ) */
149 #define TC_REGEXP (1 << 2) /* /.../ */
150 #define TC_OUTRDR (1 << 3) /* | > >> */
151 #define TC_UOPPOST (1 << 4) /* unary postfix operator */
152 #define TC_UOPPRE1 (1 << 5) /* unary prefix operator */
153 #define TC_BINOPX (1 << 6) /* two-opnd operator */
154 #define TC_IN (1 << 7)
155 #define TC_COMMA (1 << 8)
156 #define TC_PIPE (1 << 9) /* input redirection pipe */
157 #define TC_UOPPRE2 (1 << 10) /* unary prefix operator */
158 #define TC_ARRTERM (1 << 11) /* ] */
159 #define TC_GRPSTART (1 << 12) /* { */
160 #define TC_GRPTERM (1 << 13) /* } */
161 #define TC_SEMICOL (1 << 14)
162 #define TC_NEWLINE (1 << 15)
163 #define TC_STATX (1 << 16) /* ctl statement (for, next...) */
164 #define TC_WHILE (1 << 17)
165 #define TC_ELSE (1 << 18)
166 #define TC_BUILTIN (1 << 19)
167 #define TC_GETLINE (1 << 20)
168 #define TC_FUNCDECL (1 << 21) /* `function' `func' */
169 #define TC_BEGIN (1 << 22)
170 #define TC_END (1 << 23)
171 #define TC_EOF (1 << 24)
172 #define TC_VARIABLE (1 << 25)
173 #define TC_ARRAY (1 << 26)
174 #define TC_FUNCTION (1 << 27)
175 #define TC_STRING (1 << 28)
176 #define TC_NUMBER (1 << 29)
178 #define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
180 /* combined token classes */
181 #define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
182 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
183 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION | \
184 TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
186 #define TC_STATEMNT (TC_STATX | TC_WHILE)
187 #define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
189 /* word tokens, cannot mean something else if not expected */
190 #define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN | \
191 TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
193 /* discard newlines after these */
194 #define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM | \
195 TC_BINOP | TC_OPTERM)
197 /* what can expression begin with */
198 #define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
199 /* what can group begin with */
200 #define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
202 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
203 /* operator is inserted between them */
204 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM | \
205 TC_STRING | TC_NUMBER | TC_UOPPOST)
206 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
208 #define OF_RES1 0x010000
209 #define OF_RES2 0x020000
210 #define OF_STR1 0x040000
211 #define OF_STR2 0x080000
212 #define OF_NUM1 0x100000
213 #define OF_CHECKED 0x200000
215 /* combined operator flags */
218 #define xS (OF_RES2 | OF_STR2)
220 #define VV (OF_RES1 | OF_RES2)
221 #define Nx (OF_RES1 | OF_NUM1)
222 #define NV (OF_RES1 | OF_NUM1 | OF_RES2)
223 #define Sx (OF_RES1 | OF_STR1)
224 #define SV (OF_RES1 | OF_STR1 | OF_RES2)
225 #define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
227 #define OPCLSMASK 0xFF00
228 #define OPNMASK 0x007F
230 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
231 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
232 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
234 #define P(x) (x << 24)
235 #define PRIMASK 0x7F000000
236 #define PRIMASK2 0x7E000000
238 /* Operation classes */
240 #define SHIFT_TIL_THIS 0x0600
241 #define RECUR_FROM_THIS 0x1000
244 OC_DELETE=0x0100, OC_EXEC=0x0200, OC_NEWSOURCE=0x0300,
245 OC_PRINT=0x0400, OC_PRINTF=0x0500, OC_WALKINIT=0x0600,
247 OC_BR=0x0700, OC_BREAK=0x0800, OC_CONTINUE=0x0900,
248 OC_EXIT=0x0a00, OC_NEXT=0x0b00, OC_NEXTFILE=0x0c00,
249 OC_TEST=0x0d00, OC_WALKNEXT=0x0e00,
251 OC_BINARY=0x1000, OC_BUILTIN=0x1100, OC_COLON=0x1200,
252 OC_COMMA=0x1300, OC_COMPARE=0x1400, OC_CONCAT=0x1500,
253 OC_FBLTIN=0x1600, OC_FIELD=0x1700, OC_FNARG=0x1800,
254 OC_FUNC=0x1900, OC_GETLINE=0x1a00, OC_IN=0x1b00,
255 OC_LAND=0x1c00, OC_LOR=0x1d00, OC_MATCH=0x1e00,
256 OC_MOVE=0x1f00, OC_PGETLINE=0x2000, OC_REGEXP=0x2100,
257 OC_REPLACE=0x2200, OC_RETURN=0x2300, OC_SPRINTF=0x2400,
258 OC_TERNARY=0x2500, OC_UNARY=0x2600, OC_VAR=0x2700,
261 ST_IF=0x3000, ST_DO=0x3100, ST_FOR=0x3200,
265 /* simple builtins */
267 F_in=0, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
268 F_ti, F_le, F_sy, F_ff, F_cl
273 B_a2=0, B_ix, B_ma, B_sp, B_ss, B_ti, B_lo, B_up,
277 /* tokens and their corresponding info values */
279 #define NTC "\377" /* switch to next token class (tc<<1) */
282 #define OC_B OC_BUILTIN
284 static char * const tokenlist =
287 "\1/" NTC /* REGEXP */
288 "\2>>" "\1>" "\1|" NTC /* OUTRDR */
289 "\2++" "\2--" NTC /* UOPPOST */
290 "\2++" "\2--" "\1$" NTC /* UOPPRE1 */
291 "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */
292 "\2*=" "\2/=" "\2%=" "\2^="
293 "\1+" "\1-" "\3**=" "\2**"
294 "\1/" "\1%" "\1^" "\1*"
295 "\2!=" "\2>=" "\2<=" "\1>"
296 "\1<" "\2!~" "\1~" "\2&&"
297 "\2||" "\1?" "\1:" NTC
301 "\1+" "\1-" "\1!" NTC /* UOPPRE2 */
307 "\2if" "\2do" "\3for" "\5break" /* STATX */
308 "\10continue" "\6delete" "\5print"
309 "\6printf" "\4next" "\10nextfile"
310 "\6return" "\4exit" NTC
314 "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */
315 "\3cos" "\3exp" "\3int" "\3log"
316 "\4rand" "\3sin" "\4sqrt" "\5srand"
317 "\6gensub" "\4gsub" "\5index" "\6length"
318 "\5match" "\5split" "\7sprintf" "\3sub"
319 "\6substr" "\7systime" "\10strftime"
320 "\7tolower" "\7toupper" NTC
322 "\4func" "\10function" NTC
327 static unsigned long tokeninfo[] = {
332 xS|'a', xS|'w', xS|'|',
333 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
334 OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M',
336 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74),
337 OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
338 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/',
339 OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
340 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-',
341 OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
342 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%',
343 OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
344 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3,
345 OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
346 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!',
347 OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
348 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?',
349 OC_COLON|xx|P(67)|':',
352 OC_PGETLINE|SV|P(37),
353 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-',
354 OC_UNARY|xV|P(19)|'!',
360 ST_IF, ST_DO, ST_FOR, OC_BREAK,
361 OC_CONTINUE, OC_DELETE|Vx, OC_PRINT,
362 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
363 OC_RETURN|Vx, OC_EXIT|Nx,
367 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
368 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
369 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
370 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
371 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
372 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b),
373 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
380 /* internal variable names and their initial values */
381 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
383 CONVFMT=0, OFMT, FS, OFS,
384 ORS, RS, RT, FILENAME,
385 SUBSEP, ARGIND, ARGC, ARGV,
388 ENVIRON, F0, _intvarcount_
391 static char * vNames =
392 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
393 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
394 "SUBSEP\0" "ARGIND\0" "ARGC\0" "ARGV\0"
396 "NR\0" "NF\0*" "IGNORECASE\0*"
397 "ENVIRON\0" "$\0*" "\0";
399 static char * vValues =
400 "%.6g\0" "%.6g\0" " \0" " \0"
401 "\n\0" "\n\0" "\0" "\0"
405 /* hash size may grow to these values */
406 #define FIRST_PRIME 61;
407 static const unsigned int PRIMES[] = { 251, 1021, 4093, 16381, 65521 };
408 static const unsigned int NPRIMES = sizeof(PRIMES) / sizeof(unsigned int);
412 extern char **environ;
414 static var * V[_intvarcount_];
415 static chain beginseq, mainseq, endseq, *seq;
416 static int nextrec, nextfile;
417 static node *break_ptr, *continue_ptr;
419 static xhash *vhash, *ahash, *fdhash, *fnhash;
420 static char *programname;
422 static int is_f0_split;
423 static int nfields = 0;
424 static var *Fields = NULL;
425 static tsplitter fsplitter, rsplitter;
426 static nvblock *cb = NULL;
429 static int icase = FALSE;
430 static int exiting = FALSE;
433 unsigned long tclass;
441 /* function prototypes */
442 extern void xregcomp(regex_t *preg, const char *regex, int cflags);
443 static void handle_special(var *);
444 static node *parse_expr(unsigned long);
445 static void chain_group(void);
446 static var *evaluate(node *, var *);
447 static rstream *next_input_file(void);
448 static int fmt_num(char *, int, char *, double, int);
449 static int awk_exit(int);
451 /* ---- error handling ---- */
453 static const char EMSG_INTERNAL_ERROR[] = "Internal error";
454 static const char EMSG_UNEXP_EOS[] = "Unexpected end of string";
455 static const char EMSG_UNEXP_TOKEN[] = "Unexpected token";
456 static const char EMSG_DIV_BY_ZERO[] = "Division by zero";
457 static const char EMSG_INV_FMT[] = "Invalid format specifier";
458 static const char EMSG_TOO_FEW_ARGS[] = "Too few arguments for builtin";
459 static const char EMSG_NOT_ARRAY[] = "Not an array";
460 static const char EMSG_POSSIBLE_ERROR[] = "Possible syntax error";
461 static const char EMSG_UNDEF_FUNC[] = "Call to undefined function";
462 #ifndef CONFIG_FEATURE_AWK_MATH
463 static const char EMSG_NO_MATH[] = "Math support is not compiled in";
466 static void syntax_error(const char * const message)
468 bb_error_msg("%s:%i: %s", programname, lineno, message);
472 #define runtime_error(x) syntax_error(x)
475 /* ---- hash stuff ---- */
477 static unsigned int hashidx(char *name) {
479 register unsigned int idx=0;
481 while (*name) idx = *name++ + (idx << 6) - idx;
485 /* create new hash */
486 static xhash *hash_init(void) {
490 newhash = (xhash *)xcalloc(1, sizeof(xhash));
491 newhash->csize = FIRST_PRIME;
492 newhash->items = (hash_item **)xcalloc(newhash->csize, sizeof(hash_item *));
497 /* find item in hash, return ptr to data, NULL if not found */
498 static void *hash_search(xhash *hash, char *name) {
502 hi = hash->items [ hashidx(name) % hash->csize ];
504 if (strcmp(hi->name, name) == 0)
511 /* grow hash if it becomes too big */
512 static void hash_rebuild(xhash *hash) {
514 unsigned int newsize, i, idx;
515 hash_item **newitems, *hi, *thi;
517 if (hash->nprime == NPRIMES)
520 newsize = PRIMES[hash->nprime++];
521 newitems = (hash_item **)xcalloc(newsize, sizeof(hash_item *));
523 for (i=0; i<hash->csize; i++) {
528 idx = hashidx(thi->name) % newsize;
529 thi->next = newitems[idx];
535 hash->csize = newsize;
536 hash->items = newitems;
539 /* find item in hash, add it if necessary. Return ptr to data */
540 static void *hash_find(xhash *hash, char *name) {
546 hi = hash_search(hash, name);
548 if (++hash->nel / hash->csize > 10)
551 l = bb_strlen(name) + 1;
552 hi = xcalloc(sizeof(hash_item) + l, 1);
553 memcpy(hi->name, name, l);
555 idx = hashidx(name) % hash->csize;
556 hi->next = hash->items[idx];
557 hash->items[idx] = hi;
563 #define findvar(hash, name) (var *) hash_find ( (hash) , (name) )
564 #define newvar(name) (var *) hash_find ( vhash , (name) )
565 #define newfile(name) (rstream *) hash_find ( fdhash , (name) )
566 #define newfunc(name) (func *) hash_find ( fnhash , (name) )
568 static void hash_remove(xhash *hash, char *name) {
570 hash_item *hi, **phi;
572 phi = &(hash->items[ hashidx(name) % hash->csize ]);
575 if (strcmp(hi->name, name) == 0) {
576 hash->glen -= (bb_strlen(name) + 1);
586 /* ------ some useful functions ------ */
588 static void skip_spaces(char **s) {
590 register char *p = *s;
592 while(*p == ' ' || *p == '\t' ||
593 (*p == '\\' && *(p+1) == '\n' && (++p, ++t.lineno))) {
599 static char *nextword(char **s) {
601 register char *p = *s;
608 static char nextchar(char **s) {
610 register char c, *pps;
614 if (c == '\\') c = bb_process_escape_sequence((const char**)s);
615 if (c == '\\' && *s == pps) c = *((*s)++);
619 static inline int isalnum_(int c) {
621 return (isalnum(c) || c == '_');
624 static FILE *afopen(const char *path, const char *mode) {
626 return (*path == '-' && *(path+1) == '\0') ? stdin : bb_xfopen(path, mode);
629 /* -------- working with variables (set/get/copy/etc) -------- */
631 static xhash *iamarray(var *v) {
635 while (a->type & VF_CHILD)
638 if (! (a->type & VF_ARRAY)) {
640 a->x.array = hash_init();
645 static void clear_array(xhash *array) {
650 for (i=0; i<array->csize; i++) {
651 hi = array->items[i];
655 free(thi->data.v.string);
658 array->items[i] = NULL;
660 array->glen = array->nel = 0;
663 /* clear a variable */
664 static var *clrvar(var *v) {
666 if (!(v->type & VF_FSTR))
669 v->type &= VF_DONTTOUCH;
675 /* assign string value to variable */
676 static var *setvar_p(var *v, char *value) {
685 /* same as setvar_p but make a copy of string */
686 static var *setvar_s(var *v, char *value) {
688 return setvar_p(v, (value && *value) ? bb_xstrdup(value) : NULL);
691 /* same as setvar_s but set USER flag */
692 static var *setvar_u(var *v, char *value) {
699 /* set array element to user string */
700 static void setari_u(var *a, int idx, char *s) {
703 static char sidx[12];
705 sprintf(sidx, "%d", idx);
706 v = findvar(iamarray(a), sidx);
710 /* assign numeric value to variable */
711 static var *setvar_i(var *v, double value) {
714 v->type |= VF_NUMBER;
720 static char *getvar_s(var *v) {
722 /* if v is numeric and has no cached string, convert it to string */
723 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
724 fmt_num(buf, MAXVARFMT, getvar_s(V[CONVFMT]), v->number, TRUE);
725 v->string = bb_xstrdup(buf);
726 v->type |= VF_CACHED;
728 return (v->string == NULL) ? "" : v->string;
731 static double getvar_i(var *v) {
735 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
739 v->number = strtod(s, &s);
740 if (v->type & VF_USER) {
748 v->type |= VF_CACHED;
753 static var *copyvar(var *dest, var *src) {
757 dest->type |= (src->type & ~VF_DONTTOUCH);
758 dest->number = src->number;
760 dest->string = bb_xstrdup(src->string);
762 handle_special(dest);
766 static var *incvar(var *v) {
768 return setvar_i(v, getvar_i(v)+1.);
771 /* return true if v is number or numeric string */
772 static int is_numeric(var *v) {
775 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
778 /* return 1 when value of v corresponds to true, 0 otherwise */
779 static int istrue(var *v) {
782 return (v->number == 0) ? 0 : 1;
784 return (v->string && *(v->string)) ? 1 : 0;
787 /* temporary variables allocator. Last allocated should be first freed */
788 static var *nvalloc(int n) {
796 if ((cb->pos - cb->nv) + n <= cb->size) break;
801 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
802 cb = (nvblock *)xmalloc(sizeof(nvblock) + size * sizeof(var));
807 if (pb) pb->next = cb;
813 while (v < cb->pos) {
822 static void nvfree(var *v) {
826 if (v < cb->nv || v >= cb->pos)
827 runtime_error(EMSG_INTERNAL_ERROR);
829 for (p=v; p<cb->pos; p++) {
830 if ((p->type & (VF_ARRAY|VF_CHILD)) == VF_ARRAY) {
831 clear_array(iamarray(p));
832 free(p->x.array->items);
835 if (p->type & VF_WALK)
842 while (cb->prev && cb->pos == cb->nv) {
847 /* ------- awk program text parsing ------- */
849 /* Parse next token pointed by global pos, place results into global t.
850 * If token isn't expected, give away. Return token class
852 static unsigned long next_token(unsigned long expected) {
856 unsigned long tc, *ti;
858 static int concat_inserted = FALSE;
859 static unsigned long save_tclass, save_info;
860 static unsigned long ltclass = TC_OPTERM;
866 } else if (concat_inserted) {
868 concat_inserted = FALSE;
869 t.tclass = save_tclass;
880 while (*p != '\n' && *p != '\0') p++;
888 } else if (*p == '\"') {
892 if (*p == '\0' || *p == '\n')
893 syntax_error(EMSG_UNEXP_EOS);
894 *(s++) = nextchar(&p);
900 } else if ((expected & TC_REGEXP) && *p == '/') {
904 if (*p == '\0' || *p == '\n')
905 syntax_error(EMSG_UNEXP_EOS);
906 if ((*s++ = *p++) == '\\') {
908 *(s-1) = bb_process_escape_sequence((const char **)&p);
909 if (*pp == '\\') *s++ = '\\';
910 if (p == pp) *s++ = *p++;
917 } else if (*p == '.' || isdigit(*p)) {
919 t.number = strtod(p, &p);
921 syntax_error(EMSG_UNEXP_TOKEN);
925 /* search for something known */
935 /* if token class is expected, token
936 * matches and it's not a longer word,
937 * then this is what we are looking for
939 if ((tc & (expected | TC_WORD | TC_NEWLINE)) &&
940 *tl == *p && strncmp(p, tl, l) == 0 &&
941 !((tc & TC_WORD) && isalnum_(*(p + l)))) {
951 /* it's a name (var/array/function),
952 * otherwise it's something wrong
955 syntax_error(EMSG_UNEXP_TOKEN);
958 while(isalnum_(*(++p))) {
976 /* skipping newlines in some cases */
977 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
980 /* insert concatenation operator when needed */
981 if ((ltclass&TC_CONCAT1) && (tc&TC_CONCAT2) && (expected&TC_BINOP)) {
982 concat_inserted = TRUE;
986 t.info = OC_CONCAT | SS | P(35);
993 /* Are we ready for this? */
994 if (! (ltclass & expected))
995 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
996 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1001 static void rollback_token(void) { t.rollback = TRUE; }
1003 static node *new_node(unsigned long info) {
1007 n = (node *)xcalloc(sizeof(node), 1);
1013 static node *mk_re_node(char *s, node *n, regex_t *re) {
1015 n->info = OC_REGEXP;
1018 xregcomp(re, s, REG_EXTENDED);
1019 xregcomp(re+1, s, REG_EXTENDED | REG_ICASE);
1024 static node *condition(void) {
1026 next_token(TC_SEQSTART);
1027 return parse_expr(TC_SEQTERM);
1030 /* parse expression terminated by given argument, return ptr
1031 * to built subtree. Terminator is eaten by parse_expr */
1032 static node *parse_expr(unsigned long iexp) {
1037 unsigned long tc, xtc;
1041 sn.r.n = glptr = NULL;
1042 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1044 while (! ((tc = next_token(xtc)) & iexp)) {
1045 if (glptr && (t.info == (OC_COMPARE|VV|P(39)|2))) {
1046 /* input redirection (<) attached to glptr node */
1047 cn = glptr->l.n = new_node(OC_CONCAT|SS|P(37));
1049 xtc = TC_OPERAND | TC_UOPPRE;
1052 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1053 /* for binary and postfix-unary operators, jump back over
1054 * previous operators with higher priority */
1056 while ( ((t.info & PRIMASK) > (vn->a.n->info & PRIMASK2)) ||
1057 ((t.info == vn->info) && ((t.info & OPCLSMASK) == OC_COLON)) )
1059 if ((t.info & OPCLSMASK) == OC_TERNARY)
1061 cn = vn->a.n->r.n = new_node(t.info);
1063 if (tc & TC_BINOP) {
1065 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1066 if ((t.info & OPCLSMASK) == OC_PGETLINE) {
1068 next_token(TC_GETLINE);
1069 /* give maximum priority to this pipe */
1070 cn->info &= ~PRIMASK;
1071 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1075 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1080 /* for operands and prefix-unary operators, attach them
1083 cn = vn->r.n = new_node(t.info);
1085 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1086 if (tc & (TC_OPERAND | TC_REGEXP)) {
1087 xtc = TC_UOPPRE | TC_BINOP | TC_OPERAND | iexp;
1088 /* one should be very careful with switch on tclass -
1089 * only simple tclasses should be used! */
1094 if ((v = hash_search(ahash, t.string)) != NULL) {
1095 cn->info = OC_FNARG;
1096 cn->l.i = v->x.aidx;
1098 cn->l.v = newvar(t.string);
1100 if (tc & TC_ARRAY) {
1102 cn->r.n = parse_expr(TC_ARRTERM);
1104 xtc = TC_UOPPOST | TC_UOPPRE | TC_BINOP | TC_OPERAND | iexp;
1110 v = cn->l.v = xcalloc(sizeof(var), 1);
1112 setvar_i(v, t.number);
1114 setvar_s(v, t.string);
1118 mk_re_node(t.string, cn,
1119 (regex_t *)xcalloc(sizeof(regex_t),2));
1124 cn->r.f = newfunc(t.string);
1125 cn->l.n = condition();
1129 cn = vn->r.n = parse_expr(TC_SEQTERM);
1135 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1139 cn->l.n = condition();
1148 /* add node to chain. Return ptr to alloc'd node */
1149 static node *chain_node(unsigned long info) {
1154 seq->first = seq->last = new_node(0);
1156 if (seq->programname != programname) {
1157 seq->programname = programname;
1158 n = chain_node(OC_NEWSOURCE);
1159 n->l.s = bb_xstrdup(programname);
1164 seq->last = n->a.n = new_node(OC_DONE);
1169 static void chain_expr(unsigned long info) {
1173 n = chain_node(info);
1174 n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1175 if (t.tclass & TC_GRPTERM)
1179 static node *chain_loop(node *nn) {
1181 node *n, *n2, *save_brk, *save_cont;
1183 save_brk = break_ptr;
1184 save_cont = continue_ptr;
1186 n = chain_node(OC_BR | Vx);
1187 continue_ptr = new_node(OC_EXEC);
1188 break_ptr = new_node(OC_EXEC);
1190 n2 = chain_node(OC_EXEC | Vx);
1193 continue_ptr->a.n = n2;
1194 break_ptr->a.n = n->r.n = seq->last;
1196 continue_ptr = save_cont;
1197 break_ptr = save_brk;
1202 /* parse group and attach it to chain */
1203 static void chain_group(void) {
1209 c = next_token(TC_GRPSEQ);
1210 } while (c & TC_NEWLINE);
1212 if (c & TC_GRPSTART) {
1213 while(next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1214 if (t.tclass & TC_NEWLINE) continue;
1218 } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1220 chain_expr(OC_EXEC | Vx);
1221 } else { /* TC_STATEMNT */
1222 switch (t.info & OPCLSMASK) {
1224 n = chain_node(OC_BR | Vx);
1225 n->l.n = condition();
1227 n2 = chain_node(OC_EXEC);
1229 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE)==TC_ELSE) {
1231 n2->a.n = seq->last;
1239 n = chain_loop(NULL);
1244 n2 = chain_node(OC_EXEC);
1245 n = chain_loop(NULL);
1247 next_token(TC_WHILE);
1248 n->l.n = condition();
1252 next_token(TC_SEQSTART);
1253 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1254 if (t.tclass & TC_SEQTERM) { /* for-in */
1255 if ((n2->info & OPCLSMASK) != OC_IN)
1256 syntax_error(EMSG_UNEXP_TOKEN);
1257 n = chain_node(OC_WALKINIT | VV);
1260 n = chain_loop(NULL);
1261 n->info = OC_WALKNEXT | Vx;
1263 } else { /* for(;;) */
1264 n = chain_node(OC_EXEC | Vx);
1266 n2 = parse_expr(TC_SEMICOL);
1267 n3 = parse_expr(TC_SEQTERM);
1277 n = chain_node(t.info);
1278 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1279 if (t.tclass & TC_OUTRDR) {
1281 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1283 if (t.tclass & TC_GRPTERM)
1288 n = chain_node(OC_EXEC);
1293 n = chain_node(OC_EXEC);
1294 n->a.n = continue_ptr;
1297 /* delete, next, nextfile, return, exit */
1305 static void parse_program(char *p) {
1307 unsigned long tclass;
1314 while((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1315 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1317 if (tclass & TC_OPTERM)
1321 if (tclass & TC_BEGIN) {
1325 } else if (tclass & TC_END) {
1329 } else if (tclass & TC_FUNCDECL) {
1330 next_token(TC_FUNCTION);
1332 f = newfunc(t.string);
1333 f->body.first = NULL;
1335 while(next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1336 v = findvar(ahash, t.string);
1337 v->x.aidx = (f->nargs)++;
1339 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1346 } else if (tclass & TC_OPSEQ) {
1348 cn = chain_node(OC_TEST);
1349 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1350 if (t.tclass & TC_GRPSTART) {
1354 chain_node(OC_PRINT);
1356 cn->r.n = mainseq.last;
1358 } else /* if (tclass & TC_GRPSTART) */ {
1366 /* -------- program execution part -------- */
1368 static node *mk_splitter(char *s, tsplitter *spl) {
1370 register regex_t *re, *ire;
1376 if ((n->info && OPCLSMASK) == OC_REGEXP) {
1380 if (bb_strlen(s) > 1) {
1381 mk_re_node(s, n, re);
1383 n->info = (unsigned long) *s;
1389 /* use node as a regular expression. Supplied with node ptr and regex_t
1390 * storage space. Return ptr to regex (if result points to preg, it should
1391 * be later regfree'd manually
1393 static regex_t *as_regex(node *op, regex_t *preg) {
1398 if ((op->info & OPCLSMASK) == OC_REGEXP) {
1399 return icase ? op->r.ire : op->l.re;
1402 s = getvar_s(evaluate(op, v));
1403 xregcomp(preg, s, icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED);
1409 /* gradually increasing buffer */
1410 static void qrealloc(char **b, int n, int *size) {
1412 if (! *b || n >= *size)
1413 *b = xrealloc(*b, *size = n + (n>>1) + 80);
1416 /* resize field storage space */
1417 static void fsrealloc(int size) {
1419 static int maxfields = 0;
1422 if (size >= maxfields) {
1424 maxfields = size + 16;
1425 Fields = (var *)xrealloc(Fields, maxfields * sizeof(var));
1426 for (; i<maxfields; i++) {
1427 Fields[i].type = VF_SPECIAL;
1428 Fields[i].string = NULL;
1432 if (size < nfields) {
1433 for (i=size; i<nfields; i++) {
1440 static int awk_split(char *s, node *spl, char **slist) {
1445 regmatch_t pmatch[2];
1447 /* in worst case, each char would be a separate field */
1448 *slist = s1 = bb_xstrndup(s, bb_strlen(s) * 2 + 3);
1450 c[0] = c[1] = (char)spl->info;
1452 if (*getvar_s(V[RS]) == '\0') c[2] = '\n';
1454 if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */
1456 l = strcspn(s, c+2);
1457 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0 &&
1458 pmatch[0].rm_so <= l) {
1459 l = pmatch[0].rm_so;
1460 if (pmatch[0].rm_eo == 0) { l++; pmatch[0].rm_eo++; }
1462 pmatch[0].rm_eo = l;
1463 if (*(s+l)) pmatch[0].rm_eo++;
1469 s += pmatch[0].rm_eo;
1472 } else if (c[0] == '\0') { /* null split */
1478 } else if (c[0] != ' ') { /* single-character split */
1480 c[0] = toupper(c[0]);
1481 c[1] = tolower(c[1]);
1484 while ((s1 = strpbrk(s1, c))) {
1488 } else { /* space split */
1490 while (isspace(*s)) s++;
1493 while (*s && !isspace(*s))
1501 static void split_f0(void) {
1503 static char *fstrings = NULL;
1513 n = awk_split(getvar_s(V[F0]), &fsplitter.n, &fstrings);
1516 for (i=0; i<n; i++) {
1517 Fields[i].string = nextword(&s);
1518 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1521 /* set NF manually to avoid side effects */
1523 V[NF]->type = VF_NUMBER | VF_SPECIAL;
1524 V[NF]->number = nfields;
1527 /* perform additional actions when some internal variables changed */
1528 static void handle_special(var *v) {
1532 int sl, l, len, i, bsize;
1534 if (! (v->type & VF_SPECIAL))
1538 n = (int)getvar_i(v);
1541 /* recalculate $0 */
1542 sep = getvar_s(V[OFS]);
1543 sl = bb_strlen(sep);
1546 for (i=0; i<n; i++) {
1547 s = getvar_s(&Fields[i]);
1550 memcpy(b+len, sep, sl);
1553 qrealloc(&b, len+l+sl, &bsize);
1554 memcpy(b+len, s, l);
1557 if (b) b[len] = '\0';
1561 } else if (v == V[F0]) {
1562 is_f0_split = FALSE;
1564 } else if (v == V[FS]) {
1565 mk_splitter(getvar_s(v), &fsplitter);
1567 } else if (v == V[RS]) {
1568 mk_splitter(getvar_s(v), &rsplitter);
1570 } else if (v == V[IGNORECASE]) {
1574 n = getvar_i(V[NF]);
1575 setvar_i(V[NF], n > v-Fields ? n : v-Fields+1);
1576 /* right here v is invalid. Just to note... */
1580 /* step through func/builtin/etc arguments */
1581 static node *nextarg(node **pn) {
1586 if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1595 static void hashwalk_init(var *v, xhash *array) {
1601 if (v->type & VF_WALK)
1605 w = v->x.walker = (char **)xcalloc(2 + 2*sizeof(char *) + array->glen, 1);
1606 *w = *(w+1) = (char *)(w + 2);
1607 for (i=0; i<array->csize; i++) {
1608 hi = array->items[i];
1610 strcpy(*w, hi->name);
1617 static int hashwalk_next(var *v) {
1625 setvar_s(v, nextword(w+1));
1629 /* evaluate node, return 1 when result is true, 0 otherwise */
1630 static int ptest(node *pattern) {
1633 return istrue(evaluate(pattern, &v));
1636 /* read next record from stream rsm into a variable v */
1637 static int awk_getline(rstream *rsm, var *v) {
1640 regmatch_t pmatch[2];
1641 int a, p, pp=0, size;
1642 int fd, so, eo, r, rp;
1645 /* we're using our own buffer since we need access to accumulating
1648 fd = fileno(rsm->F);
1653 c = (char) rsplitter.n.info;
1656 if (! m) qrealloc(&m, 256, &size);
1662 if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1663 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1664 b, 1, pmatch, 0) == 0) {
1665 so = pmatch[0].rm_so;
1666 eo = pmatch[0].rm_eo;
1670 } else if (c != '\0') {
1671 s = strchr(b+pp, c);
1678 while (b[rp] == '\n')
1680 s = strstr(b+rp, "\n\n");
1683 while (b[eo] == '\n') eo++;
1691 memmove(m, (const void *)(m+a), p+1);
1696 qrealloc(&m, a+p+128, &size);
1699 p += safe_read(fd, b+p, size-p-1);
1703 setvar_i(V[ERRNO], errno);
1712 c = b[so]; b[so] = '\0';
1716 c = b[eo]; b[eo] = '\0';
1717 setvar_s(V[RT], b+so);
1729 static int fmt_num(char *b, int size, char *format, double n, int int_as_int) {
1734 if (int_as_int && n == (int)n) {
1735 r = snprintf(b, size, "%d", (int)n);
1737 do { c = *s; } while (*s && *++s);
1738 if (strchr("diouxX", c)) {
1739 r = snprintf(b, size, format, (int)n);
1740 } else if (strchr("eEfgG", c)) {
1741 r = snprintf(b, size, format, n);
1743 runtime_error(EMSG_INV_FMT);
1750 /* formatted output into an allocated buffer, return ptr to buffer */
1751 static char *awk_printf(node *n) {
1754 char *fmt, *s, *s1, *f;
1755 int i, j, incr, bsize;
1760 fmt = f = bb_xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1765 while (*f && (*f != '%' || *(++f) == '%'))
1767 while (*f && !isalpha(*f))
1770 incr = (f - s) + MAXVARFMT;
1771 qrealloc(&b, incr+i, &bsize);
1772 c = *f; if (c != '\0') f++;
1773 c1 = *f ; *f = '\0';
1774 arg = evaluate(nextarg(&n), v);
1777 if (c == 'c' || !c) {
1778 i += sprintf(b+i, s,
1779 is_numeric(arg) ? (char)getvar_i(arg) : *getvar_s(arg));
1781 } else if (c == 's') {
1783 qrealloc(&b, incr+i+bb_strlen(s1), &bsize);
1784 i += sprintf(b+i, s, s1);
1787 i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1791 /* if there was an error while sprintf, return value is negative */
1796 b = xrealloc(b, i+1);
1803 /* common substitution routine
1804 * replace (nm) substring of (src) that match (n) with (repl), store
1805 * result into (dest), return number of substitutions. If nm=0, replace
1806 * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1807 * subexpression matching (\1-\9)
1809 static int awk_sub(node *rn, char *repl, int nm, var *src, var *dest, int ex) {
1813 int c, i, j, di, rl, so, eo, nbs, n, dssize;
1814 regmatch_t pmatch[10];
1817 re = as_regex(rn, &sreg);
1818 if (! src) src = V[F0];
1819 if (! dest) dest = V[F0];
1823 rl = bb_strlen(repl);
1824 while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0:REG_NOTBOL) == 0) {
1825 so = pmatch[0].rm_so;
1826 eo = pmatch[0].rm_eo;
1828 qrealloc(&ds, di + eo + rl, &dssize);
1829 memcpy(ds + di, sp, eo);
1835 for (s = repl; *s; s++) {
1841 if (c == '&' || (ex && c >= '0' && c <= '9')) {
1842 di -= ((nbs + 3) >> 1);
1851 n = pmatch[j].rm_eo - pmatch[j].rm_so;
1852 qrealloc(&ds, di + rl + n, &dssize);
1853 memcpy(ds + di, sp + pmatch[j].rm_so, n);
1864 if (! (ds[di++] = *sp++)) break;
1868 qrealloc(&ds, di + strlen(sp), &dssize);
1869 strcpy(ds + di, sp);
1871 if (re == &sreg) regfree(re);
1875 static var *exec_builtin(node *op, var *res) {
1882 regmatch_t pmatch[2];
1884 static tsplitter tspl;
1886 unsigned long isr, info;
1893 isr = info = op->info;
1896 av[2] = av[3] = NULL;
1897 for (i=0 ; i<4 && op ; i++) {
1898 an[i] = nextarg(&op);
1899 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
1900 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
1905 if (nargs < (info >> 30))
1906 runtime_error(EMSG_TOO_FEW_ARGS);
1908 switch (info & OPNMASK) {
1911 #ifdef CONFIG_FEATURE_AWK_MATH
1912 setvar_i(res, atan2(getvar_i(av[i]), getvar_i(av[1])));
1914 runtime_error(EMSG_NO_MATH);
1920 spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
1921 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
1926 n = awk_split(as[0], spl, &s);
1928 clear_array(iamarray(av[1]));
1929 for (i=1; i<=n; i++)
1930 setari_u(av[1], i, nextword(&s1));
1936 l = bb_strlen(as[0]);
1937 i = getvar_i(av[1]) - 1;
1938 if (i>l) i=l; if (i<0) i=0;
1939 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
1942 strncpy(s, as[0]+i, n);
1954 s1 = s = bb_xstrdup(as[0]);
1956 *s1 = (*to_xxx)(*s1);
1964 ll = bb_strlen(as[1]);
1965 l = bb_strlen(as[0]) - ll;
1966 if (ll > 0 && l >= 0) {
1968 s = strstr(as[0], as[1]);
1969 if (s) n = (s - as[0]) + 1;
1971 /* this piece of code is terribly slow and
1972 * really should be rewritten
1974 for (i=0; i<=l; i++) {
1975 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
1987 tt = getvar_i(av[1]);
1990 s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
1991 i = strftime(buf, MAXVARFMT, s, localtime(&tt));
1997 re = as_regex(an[1], &sreg);
1998 n = regexec(re, as[0], 1, pmatch, 0);
2003 pmatch[0].rm_so = 0;
2004 pmatch[0].rm_eo = -1;
2006 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2007 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2008 setvar_i(res, pmatch[0].rm_so);
2009 if (re == &sreg) regfree(re);
2013 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2017 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2021 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2030 * Evaluate node - the heart of the program. Supplied with subtree
2031 * and place where to store result. returns ptr to result.
2033 #define XC(n) ((n) >> 8)
2035 static var *evaluate(node *op, var *res) {
2037 /* This procedure is recursive so we should count every byte */
2038 static var *fnargs = NULL;
2039 static unsigned int seed = 1;
2040 static regex_t sreg;
2049 unsigned long opinfo;
2061 return setvar_s(res, NULL);
2068 opn = (short)(opinfo & OPNMASK);
2069 lineno = op->lineno;
2071 /* execute inevitable things */
2073 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2074 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2075 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2076 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2077 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2079 switch (XC(opinfo & OPCLSMASK)) {
2081 /* -- iterative node type -- */
2085 if ((op1->info & OPCLSMASK) == OC_COMMA) {
2086 /* it's range pattern */
2087 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2088 op->info |= OF_CHECKED;
2089 if (ptest(op1->r.n))
2090 op->info &= ~OF_CHECKED;
2097 op = (ptest(op1)) ? op->a.n : op->r.n;
2101 /* just evaluate an expression, also used as unconditional jump */
2105 /* branch, used in if-else and various loops */
2107 op = istrue(L.v) ? op->a.n : op->r.n;
2110 /* initialize for-in loop */
2111 case XC( OC_WALKINIT ):
2112 hashwalk_init(L.v, iamarray(R.v));
2115 /* get next array item */
2116 case XC( OC_WALKNEXT ):
2117 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2120 case XC( OC_PRINT ):
2121 case XC( OC_PRINTF ):
2124 X.rsm = newfile(R.s);
2127 if((X.rsm->F = popen(R.s, "w")) == NULL)
2128 bb_perror_msg_and_die("popen");
2131 X.rsm->F = bb_xfopen(R.s, opn=='w' ? "w" : "a");
2137 if ((opinfo & OPCLSMASK) == OC_PRINT) {
2139 fputs(getvar_s(V[F0]), X.F);
2142 L.v = evaluate(nextarg(&op1), v1);
2143 if (L.v->type & VF_NUMBER) {
2144 fmt_num(buf, MAXVARFMT, getvar_s(V[OFMT]),
2145 getvar_i(L.v), TRUE);
2148 fputs(getvar_s(L.v), X.F);
2151 if (op1) fputs(getvar_s(V[OFS]), X.F);
2154 fputs(getvar_s(V[ORS]), X.F);
2156 } else { /* OC_PRINTF */
2157 L.s = awk_printf(op1);
2164 case XC( OC_DELETE ):
2165 X.info = op1->info & OPCLSMASK;
2166 if (X.info == OC_VAR) {
2168 } else if (X.info == OC_FNARG) {
2169 R.v = &fnargs[op1->l.i];
2171 runtime_error(EMSG_NOT_ARRAY);
2176 L.s = getvar_s(evaluate(op1->r.n, v1));
2177 hash_remove(iamarray(R.v), L.s);
2179 clear_array(iamarray(R.v));
2183 case XC( OC_NEWSOURCE ):
2184 programname = op->l.s;
2187 case XC( OC_RETURN ):
2191 case XC( OC_NEXTFILE ):
2202 /* -- recursive node type -- */
2210 case XC( OC_FNARG ):
2211 L.v = &fnargs[op->l.i];
2214 res = (op->r.n) ? findvar(iamarray(L.v), R.s) : L.v;
2218 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2221 case XC( OC_REGEXP ):
2223 L.s = getvar_s(V[F0]);
2226 case XC( OC_MATCH ):
2229 X.re = as_regex(op1, &sreg);
2230 R.i = regexec(X.re, L.s, 0, NULL, 0);
2231 if (X.re == &sreg) regfree(X.re);
2232 setvar_i(res, (R.i == 0 ? 1 : 0) ^ (opn == '!' ? 1 : 0));
2236 /* if source is a temporary string, jusk relink it to dest */
2237 if (R.v == v1+1 && R.v->string) {
2238 res = setvar_p(L.v, R.v->string);
2241 res = copyvar(L.v, R.v);
2245 case XC( OC_TERNARY ):
2246 if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2247 runtime_error(EMSG_POSSIBLE_ERROR);
2248 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2252 if (! op->r.f->body.first)
2253 runtime_error(EMSG_UNDEF_FUNC);
2255 X.v = R.v = nvalloc(op->r.f->nargs+1);
2257 L.v = evaluate(nextarg(&op1), v1);
2259 R.v->type |= VF_CHILD;
2260 R.v->x.parent = L.v;
2261 if (++R.v - X.v >= op->r.f->nargs)
2269 res = evaluate(op->r.f->body.first, res);
2276 case XC( OC_GETLINE ):
2277 case XC( OC_PGETLINE ):
2279 X.rsm = newfile(L.s);
2281 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2282 X.rsm->F = popen(L.s, "r");
2283 X.rsm->is_pipe = TRUE;
2285 X.rsm->F = fopen(L.s, "r"); /* not bb_xfopen! */
2289 if (! iF) iF = next_input_file();
2294 setvar_i(V[ERRNO], errno);
2302 L.i = awk_getline(X.rsm, R.v);
2312 /* simple builtins */
2313 case XC( OC_FBLTIN ):
2321 R.d = (double)rand() / (double)RAND_MAX;
2324 #ifdef CONFIG_FEATURE_AWK_MATH
2350 runtime_error(EMSG_NO_MATH);
2356 seed = op1 ? (unsigned int)L.d : (unsigned int)time(NULL);
2366 L.s = getvar_s(V[F0]);
2367 R.d = bb_strlen(L.s);
2372 R.d = (L.s && *L.s) ? system(L.s) : 0;
2380 X.rsm = newfile(L.s);
2389 X.rsm = (rstream *)hash_search(fdhash, L.s);
2391 R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
2392 free(X.rsm->buffer);
2393 hash_remove(fdhash, L.s);
2396 setvar_i(V[ERRNO], errno);
2403 case XC( OC_BUILTIN ):
2404 res = exec_builtin(op, res);
2407 case XC( OC_SPRINTF ):
2408 setvar_p(res, awk_printf(op1));
2411 case XC( OC_UNARY ):
2413 L.d = R.d = getvar_i(R.v);
2428 L.d = istrue(X.v) ? 0 : 1;
2439 case XC( OC_FIELD ):
2440 R.i = (int)getvar_i(R.v);
2448 res = &Fields[R.i-1];
2452 /* concatenation (" ") and index joining (",") */
2453 case XC( OC_CONCAT ):
2454 case XC( OC_COMMA ):
2455 opn = bb_strlen(L.s) + bb_strlen(R.s) + 2;
2456 X.s = (char *)xmalloc(opn);
2458 if ((opinfo & OPCLSMASK) == OC_COMMA) {
2459 L.s = getvar_s(V[SUBSEP]);
2460 X.s = (char *)xrealloc(X.s, opn + bb_strlen(L.s));
2468 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2472 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2475 case XC( OC_BINARY ):
2476 case XC( OC_REPLACE ):
2477 R.d = getvar_i(R.v);
2489 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2493 #ifdef CONFIG_FEATURE_AWK_MATH
2494 L.d = pow(L.d, R.d);
2496 runtime_error(EMSG_NO_MATH);
2500 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2501 L.d -= (int)(L.d / R.d) * R.d;
2504 res = setvar_i(((opinfo&OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2507 case XC( OC_COMPARE ):
2508 if (is_numeric(L.v) && is_numeric(R.v)) {
2509 L.d = getvar_i(L.v) - getvar_i(R.v);
2511 L.s = getvar_s(L.v);
2512 R.s = getvar_s(R.v);
2513 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2515 switch (opn & 0xfe) {
2526 setvar_i(res, (opn & 0x1 ? R.i : !R.i) ? 1 : 0);
2530 runtime_error(EMSG_POSSIBLE_ERROR);
2532 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2534 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2544 /* -------- main & co. -------- */
2546 static int awk_exit(int r) {
2555 evaluate(endseq.first, &tv);
2558 /* waiting for children */
2559 for (i=0; i<fdhash->csize; i++) {
2560 hi = fdhash->items[i];
2562 if (hi->data.rs.F && hi->data.rs.is_pipe)
2563 pclose(hi->data.rs.F);
2571 /* if expr looks like "var=value", perform assignment and return 1,
2572 * otherwise return 0 */
2573 static int is_assignment(char *expr) {
2575 char *exprc, *s, *s0, *s1;
2577 exprc = bb_xstrdup(expr);
2578 if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2586 *(s1++) = nextchar(&s);
2589 setvar_u(newvar(exprc), s0);
2594 /* switch to next input file */
2595 static rstream *next_input_file(void) {
2600 static int files_happen = FALSE;
2602 if (rsm.F) fclose(rsm.F);
2604 rsm.pos = rsm.adv = 0;
2607 if (getvar_i(V[ARGIND])+1 >= getvar_i(V[ARGC])) {
2613 ind = getvar_s(incvar(V[ARGIND]));
2614 fname = getvar_s(findvar(iamarray(V[ARGV]), ind));
2615 if (fname && *fname && !is_assignment(fname))
2616 F = afopen(fname, "r");
2620 files_happen = TRUE;
2621 setvar_s(V[FILENAME], fname);
2626 extern int awk_main(int argc, char **argv) {
2633 static int from_file = FALSE;
2635 FILE *F, *stdfiles[3];
2636 static char * stdnames = "/dev/stdin\0/dev/stdout\0/dev/stderr";
2638 /* allocate global buffer */
2639 buf = xmalloc(MAXVARFMT+1);
2641 vhash = hash_init();
2642 ahash = hash_init();
2643 fdhash = hash_init();
2644 fnhash = hash_init();
2646 /* initialize variables */
2647 for (i=0; *vNames; i++) {
2648 V[i] = v = newvar(nextword(&vNames));
2649 if (*vValues != '\377')
2650 setvar_s(v, nextword(&vValues));
2654 if (*vNames == '*') {
2655 v->type |= VF_SPECIAL;
2660 handle_special(V[FS]);
2661 handle_special(V[RS]);
2663 stdfiles[0] = stdin;
2664 stdfiles[1] = stdout;
2665 stdfiles[2] = stderr;
2666 for (i=0; i<3; i++) {
2667 rsm = newfile(nextword(&stdnames));
2668 rsm->F = stdfiles[i];
2671 for (envp=environ; *envp; envp++) {
2672 s = bb_xstrdup(*envp);
2673 s1 = strchr(s, '=');
2678 setvar_u(findvar(iamarray(V[ENVIRON]), s), s1);
2683 while((c = getopt(argc, argv, "F:v:f:W:")) != EOF) {
2686 setvar_s(V[FS], optarg);
2689 if (! is_assignment(optarg))
2694 F = afopen(programname = optarg, "r");
2696 /* one byte is reserved for some trick in next_token */
2697 for (i=j=1; j>0; i+=j) {
2698 s = (char *)xrealloc(s, i+4096);
2699 j = fread(s+i, 1, 4094, F);
2707 bb_error_msg("Warning: unrecognized option '-W %s' ignored\n", optarg);
2718 programname="cmd. line";
2719 parse_program(argv[optind++]);
2723 /* fill in ARGV array */
2724 setvar_i(V[ARGC], argc - optind + 1);
2725 setari_u(V[ARGV], 0, "awk");
2726 for(i=optind; i < argc; i++)
2727 setari_u(V[ARGV], i+1-optind, argv[i]);
2729 evaluate(beginseq.first, &tv);
2730 if (! mainseq.first && ! endseq.first)
2731 awk_exit(EXIT_SUCCESS);
2733 /* input file could already be opened in BEGIN block */
2734 if (! iF) iF = next_input_file();
2736 /* passing through input files */
2740 setvar_i(V[FNR], 0);
2742 while ((c = awk_getline(iF, V[F0])) > 0) {
2747 evaluate(mainseq.first, &tv);
2754 runtime_error(strerror(errno));
2756 iF = next_input_file();
2760 awk_exit(EXIT_SUCCESS);