1 /* vi: set sw=4 ts=4: */
3 * awk implementation for busybox
5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
41 #define VF_NUMBER 0x0001 /* 1 = primary type is number */
42 #define VF_ARRAY 0x0002 /* 1 = it's an array */
44 #define VF_CACHED 0x0100 /* 1 = num/str value has cached str/num eq */
45 #define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
46 #define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
47 #define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
48 #define VF_FSTR 0x1000 /* 1 = string points to fstring buffer */
49 #define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
50 #define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
52 /* these flags are static, don't change them when value is changed */
53 #define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
56 typedef struct var_s {
57 unsigned short type; /* flags */
61 int aidx; /* func arg index (on compilation stage) */
62 struct xhash_s *array; /* array ptr */
63 struct var_s *parent; /* for func args, ptr to actual parameter */
64 char **walker; /* list of array elements (for..in) */
68 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
69 typedef struct chain_s {
76 typedef struct func_s {
82 typedef struct rstream_s {
88 unsigned short is_pipe;
91 typedef struct hash_item_s {
93 struct var_s v; /* variable/array hash */
94 struct rstream_s rs; /* redirect streams hash */
95 struct func_s f; /* functions hash */
97 struct hash_item_s *next; /* next in chain */
98 char name[1]; /* really it's longer */
101 typedef struct xhash_s {
102 unsigned int nel; /* num of elements */
103 unsigned int csize; /* current hash size */
104 unsigned int nprime; /* next hash size in PRIMES[] */
105 unsigned int glen; /* summary length of item names */
106 struct hash_item_s **items;
110 typedef struct node_s {
112 unsigned short lineno;
131 /* Block of temporary variables */
132 typedef struct nvblock_s {
135 struct nvblock_s *prev;
136 struct nvblock_s *next;
140 typedef struct tsplitter_s {
145 /* simple token classes */
146 /* Order and hex values are very important!!! See next_token() */
147 #define TC_SEQSTART 1 /* ( */
148 #define TC_SEQTERM (1 << 1) /* ) */
149 #define TC_REGEXP (1 << 2) /* /.../ */
150 #define TC_OUTRDR (1 << 3) /* | > >> */
151 #define TC_UOPPOST (1 << 4) /* unary postfix operator */
152 #define TC_UOPPRE1 (1 << 5) /* unary prefix operator */
153 #define TC_BINOPX (1 << 6) /* two-opnd operator */
154 #define TC_IN (1 << 7)
155 #define TC_COMMA (1 << 8)
156 #define TC_PIPE (1 << 9) /* input redirection pipe */
157 #define TC_UOPPRE2 (1 << 10) /* unary prefix operator */
158 #define TC_ARRTERM (1 << 11) /* ] */
159 #define TC_GRPSTART (1 << 12) /* { */
160 #define TC_GRPTERM (1 << 13) /* } */
161 #define TC_SEMICOL (1 << 14)
162 #define TC_NEWLINE (1 << 15)
163 #define TC_STATX (1 << 16) /* ctl statement (for, next...) */
164 #define TC_WHILE (1 << 17)
165 #define TC_ELSE (1 << 18)
166 #define TC_BUILTIN (1 << 19)
167 #define TC_GETLINE (1 << 20)
168 #define TC_FUNCDECL (1 << 21) /* `function' `func' */
169 #define TC_BEGIN (1 << 22)
170 #define TC_END (1 << 23)
171 #define TC_EOF (1 << 24)
172 #define TC_VARIABLE (1 << 25)
173 #define TC_ARRAY (1 << 26)
174 #define TC_FUNCTION (1 << 27)
175 #define TC_STRING (1 << 28)
176 #define TC_NUMBER (1 << 29)
178 #define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
180 /* combined token classes */
181 #define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
182 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
183 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION | \
184 TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
186 #define TC_STATEMNT (TC_STATX | TC_WHILE)
187 #define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
189 /* word tokens, cannot mean something else if not expected */
190 #define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN | \
191 TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
193 /* discard newlines after these */
194 #define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM | \
195 TC_BINOP | TC_OPTERM)
197 /* what can expression begin with */
198 #define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
199 /* what can group begin with */
200 #define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
202 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
203 /* operator is inserted between them */
204 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM | \
205 TC_STRING | TC_NUMBER | TC_UOPPOST)
206 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
208 #define OF_RES1 0x010000
209 #define OF_RES2 0x020000
210 #define OF_STR1 0x040000
211 #define OF_STR2 0x080000
212 #define OF_NUM1 0x100000
213 #define OF_CHECKED 0x200000
215 /* combined operator flags */
218 #define xS (OF_RES2 | OF_STR2)
220 #define VV (OF_RES1 | OF_RES2)
221 #define Nx (OF_RES1 | OF_NUM1)
222 #define NV (OF_RES1 | OF_NUM1 | OF_RES2)
223 #define Sx (OF_RES1 | OF_STR1)
224 #define SV (OF_RES1 | OF_STR1 | OF_RES2)
225 #define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
227 #define OPCLSMASK 0xFF00
228 #define OPNMASK 0x007F
230 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
231 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
232 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
234 #define P(x) (x << 24)
235 #define PRIMASK 0x7F000000
236 #define PRIMASK2 0x7E000000
238 /* Operation classes */
240 #define SHIFT_TIL_THIS 0x0600
241 #define RECUR_FROM_THIS 0x1000
244 OC_DELETE=0x0100, OC_EXEC=0x0200, OC_NEWSOURCE=0x0300,
245 OC_PRINT=0x0400, OC_PRINTF=0x0500, OC_WALKINIT=0x0600,
247 OC_BR=0x0700, OC_BREAK=0x0800, OC_CONTINUE=0x0900,
248 OC_EXIT=0x0a00, OC_NEXT=0x0b00, OC_NEXTFILE=0x0c00,
249 OC_TEST=0x0d00, OC_WALKNEXT=0x0e00,
251 OC_BINARY=0x1000, OC_BUILTIN=0x1100, OC_COLON=0x1200,
252 OC_COMMA=0x1300, OC_COMPARE=0x1400, OC_CONCAT=0x1500,
253 OC_FBLTIN=0x1600, OC_FIELD=0x1700, OC_FNARG=0x1800,
254 OC_FUNC=0x1900, OC_GETLINE=0x1a00, OC_IN=0x1b00,
255 OC_LAND=0x1c00, OC_LOR=0x1d00, OC_MATCH=0x1e00,
256 OC_MOVE=0x1f00, OC_PGETLINE=0x2000, OC_REGEXP=0x2100,
257 OC_REPLACE=0x2200, OC_RETURN=0x2300, OC_SPRINTF=0x2400,
258 OC_TERNARY=0x2500, OC_UNARY=0x2600, OC_VAR=0x2700,
261 ST_IF=0x3000, ST_DO=0x3100, ST_FOR=0x3200,
265 /* simple builtins */
267 F_in=0, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
268 F_ti, F_le, F_sy, F_ff, F_cl
273 B_a2=0, B_ix, B_ma, B_sp, B_ss, B_ti, B_lo, B_up,
277 /* tokens and their corresponding info values */
279 #define NTC "\377" /* switch to next token class (tc<<1) */
282 #define OC_B OC_BUILTIN
284 static char * const tokenlist =
287 "\1/" NTC /* REGEXP */
288 "\2>>" "\1>" "\1|" NTC /* OUTRDR */
289 "\2++" "\2--" NTC /* UOPPOST */
290 "\2++" "\2--" "\1$" NTC /* UOPPRE1 */
291 "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */
292 "\2*=" "\2/=" "\2%=" "\2^="
293 "\1+" "\1-" "\3**=" "\2**"
294 "\1/" "\1%" "\1^" "\1*"
295 "\2!=" "\2>=" "\2<=" "\1>"
296 "\1<" "\2!~" "\1~" "\2&&"
297 "\2||" "\1?" "\1:" NTC
301 "\1+" "\1-" "\1!" NTC /* UOPPRE2 */
307 "\2if" "\2do" "\3for" "\5break" /* STATX */
308 "\10continue" "\6delete" "\5print"
309 "\6printf" "\4next" "\10nextfile"
310 "\6return" "\4exit" NTC
314 "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */
315 "\3cos" "\3exp" "\3int" "\3log"
316 "\4rand" "\3sin" "\4sqrt" "\5srand"
317 "\6gensub" "\4gsub" "\5index" "\6length"
318 "\5match" "\5split" "\7sprintf" "\3sub"
319 "\6substr" "\7systime" "\10strftime"
320 "\7tolower" "\7toupper" NTC
322 "\4func" "\10function" NTC
327 static unsigned long tokeninfo[] = {
332 xS|'a', xS|'w', xS|'|',
333 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
334 OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M',
336 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74),
337 OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
338 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/',
339 OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
340 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-',
341 OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
342 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%',
343 OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
344 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3,
345 OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
346 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!',
347 OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
348 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?',
349 OC_COLON|xx|P(67)|':',
352 OC_PGETLINE|SV|P(37),
353 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-',
354 OC_UNARY|xV|P(19)|'!',
360 ST_IF, ST_DO, ST_FOR, OC_BREAK,
361 OC_CONTINUE, OC_DELETE|Vx, OC_PRINT,
362 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
363 OC_RETURN|Vx, OC_EXIT|Nx,
367 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
368 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
369 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
370 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
371 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
372 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b),
373 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
380 /* internal variable names and their initial values */
381 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
383 CONVFMT=0, OFMT, FS, OFS,
384 ORS, RS, RT, FILENAME,
385 SUBSEP, ARGIND, ARGC, ARGV,
388 ENVIRON, F0, _intvarcount_
391 static char * vNames =
392 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
393 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
394 "SUBSEP\0" "ARGIND\0" "ARGC\0" "ARGV\0"
396 "NR\0" "NF\0*" "IGNORECASE\0*"
397 "ENVIRON\0" "$\0*" "\0";
399 static char * vValues =
400 "%.6g\0" "%.6g\0" " \0" " \0"
401 "\n\0" "\n\0" "\0" "\0"
405 /* hash size may grow to these values */
406 #define FIRST_PRIME 61;
407 static const unsigned int PRIMES[] = { 251, 1021, 4093, 16381, 65521 };
408 static const unsigned int NPRIMES = sizeof(PRIMES) / sizeof(unsigned int);
412 extern char **environ;
414 static var * V[_intvarcount_];
415 static chain beginseq, mainseq, endseq, *seq;
416 static int nextrec, nextfile;
417 static node *break_ptr, *continue_ptr;
419 static xhash *vhash, *ahash, *fdhash, *fnhash;
420 static char *programname;
422 static int is_f0_split;
423 static int nfields = 0;
424 static var *Fields = NULL;
425 static tsplitter fsplitter, rsplitter;
426 static nvblock *cb = NULL;
429 static int icase = FALSE;
430 static int exiting = FALSE;
433 unsigned long tclass;
441 /* function prototypes */
442 extern void xregcomp(regex_t *preg, const char *regex, int cflags);
443 static void handle_special(var *);
444 static node *parse_expr(unsigned long);
445 static void chain_group(void);
446 static var *evaluate(node *, var *);
447 static rstream *next_input_file(void);
448 static int fmt_num(char *, int, char *, double, int);
449 static int awk_exit(int);
451 /* ---- error handling ---- */
453 static const char EMSG_INTERNAL_ERROR[] = "Internal error";
454 static const char EMSG_UNEXP_EOS[] = "Unexpected end of string";
455 static const char EMSG_UNEXP_TOKEN[] = "Unexpected token";
456 static const char EMSG_DIV_BY_ZERO[] = "Division by zero";
457 static const char EMSG_INV_FMT[] = "Invalid format specifier";
458 static const char EMSG_TOO_FEW_ARGS[] = "Too few arguments for builtin";
459 static const char EMSG_NOT_ARRAY[] = "Not an array";
460 static const char EMSG_POSSIBLE_ERROR[] = "Possible syntax error";
461 static const char EMSG_UNDEF_FUNC[] = "Call to undefined function";
462 #ifndef CONFIG_FEATURE_AWK_MATH
463 static const char EMSG_NO_MATH[] = "Math support is not compiled in";
466 static void syntax_error(const char * const message)
468 bb_error_msg("%s:%i: %s", programname, lineno, message);
472 #define runtime_error(x) syntax_error(x)
475 /* ---- hash stuff ---- */
477 static unsigned int hashidx(char *name) {
479 register unsigned int idx=0;
481 while (*name) idx = *name++ + (idx << 6) - idx;
485 /* create new hash */
486 static xhash *hash_init(void) {
490 newhash = (xhash *)xcalloc(1, sizeof(xhash));
491 newhash->csize = FIRST_PRIME;
492 newhash->items = (hash_item **)xcalloc(newhash->csize, sizeof(hash_item *));
497 /* find item in hash, return ptr to data, NULL if not found */
498 static void *hash_search(xhash *hash, char *name) {
502 hi = hash->items [ hashidx(name) % hash->csize ];
504 if (strcmp(hi->name, name) == 0)
511 /* grow hash if it becomes too big */
512 static void hash_rebuild(xhash *hash) {
514 unsigned int newsize, i, idx;
515 hash_item **newitems, *hi, *thi;
517 if (hash->nprime == NPRIMES)
520 newsize = PRIMES[hash->nprime++];
521 newitems = (hash_item **)xcalloc(newsize, sizeof(hash_item *));
523 for (i=0; i<hash->csize; i++) {
528 idx = hashidx(thi->name) % newsize;
529 thi->next = newitems[idx];
535 hash->csize = newsize;
536 hash->items = newitems;
539 /* find item in hash, add it if necessary. Return ptr to data */
540 static void *hash_find(xhash *hash, char *name) {
546 hi = hash_search(hash, name);
548 if (++hash->nel / hash->csize > 10)
551 l = bb_strlen(name) + 1;
552 hi = xcalloc(sizeof(hash_item) + l, 1);
553 memcpy(hi->name, name, l);
555 idx = hashidx(name) % hash->csize;
556 hi->next = hash->items[idx];
557 hash->items[idx] = hi;
563 #define findvar(hash, name) (var *) hash_find ( (hash) , (name) )
564 #define newvar(name) (var *) hash_find ( vhash , (name) )
565 #define newfile(name) (rstream *) hash_find ( fdhash , (name) )
566 #define newfunc(name) (func *) hash_find ( fnhash , (name) )
568 static void hash_remove(xhash *hash, char *name) {
570 hash_item *hi, **phi;
572 phi = &(hash->items[ hashidx(name) % hash->csize ]);
575 if (strcmp(hi->name, name) == 0) {
576 hash->glen -= (bb_strlen(name) + 1);
586 /* ------ some useful functions ------ */
588 static void skip_spaces(char **s) {
590 register char *p = *s;
592 while(*p == ' ' || *p == '\t' ||
593 (*p == '\\' && *(p+1) == '\n' && (++p, ++t.lineno))) {
599 static char *nextword(char **s) {
601 register char *p = *s;
608 static char nextchar(char **s) {
610 register char c, *pps;
614 if (c == '\\') c = bb_process_escape_sequence((const char**)s);
615 if (c == '\\' && *s == pps) c = *((*s)++);
619 static inline int isalnum_(int c) {
621 return (isalnum(c) || c == '_');
624 static FILE *afopen(const char *path, const char *mode) {
626 return (*path == '-' && *(path+1) == '\0') ? stdin : bb_xfopen(path, mode);
629 /* -------- working with variables (set/get/copy/etc) -------- */
631 static xhash *iamarray(var *v) {
635 while (a->type & VF_CHILD)
638 if (! (a->type & VF_ARRAY)) {
640 a->x.array = hash_init();
645 static void clear_array(xhash *array) {
650 for (i=0; i<array->csize; i++) {
651 hi = array->items[i];
655 free(thi->data.v.string);
658 array->items[i] = NULL;
660 array->glen = array->nel = 0;
663 /* clear a variable */
664 static var *clrvar(var *v) {
666 if (!(v->type & VF_FSTR))
669 v->type &= VF_DONTTOUCH;
675 /* assign string value to variable */
676 static var *setvar_p(var *v, char *value) {
685 /* same as setvar_p but make a copy of string */
686 static var *setvar_s(var *v, char *value) {
688 return setvar_p(v, (value && *value) ? bb_xstrdup(value) : NULL);
691 /* same as setvar_s but set USER flag */
692 static var *setvar_u(var *v, char *value) {
699 /* set array element to user string */
700 static void setari_u(var *a, int idx, char *s) {
703 static char sidx[12];
705 sprintf(sidx, "%d", idx);
706 v = findvar(iamarray(a), sidx);
710 /* assign numeric value to variable */
711 static var *setvar_i(var *v, double value) {
714 v->type |= VF_NUMBER;
720 static char *getvar_s(var *v) {
722 /* if v is numeric and has no cached string, convert it to string */
723 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
724 fmt_num(buf, MAXVARFMT, getvar_s(V[CONVFMT]), v->number, TRUE);
725 v->string = bb_xstrdup(buf);
726 v->type |= VF_CACHED;
728 return (v->string == NULL) ? "" : v->string;
731 static double getvar_i(var *v) {
735 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
739 v->number = strtod(s, &s);
740 if (v->type & VF_USER) {
748 v->type |= VF_CACHED;
753 static var *copyvar(var *dest, var *src) {
757 dest->type |= (src->type & ~VF_DONTTOUCH);
758 dest->number = src->number;
760 dest->string = bb_xstrdup(src->string);
762 handle_special(dest);
766 static var *incvar(var *v) {
768 return setvar_i(v, getvar_i(v)+1.);
771 /* return true if v is number or numeric string */
772 static int is_numeric(var *v) {
775 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
778 /* return 1 when value of v corresponds to true, 0 otherwise */
779 static int istrue(var *v) {
782 return (v->number == 0) ? 0 : 1;
784 return (v->string && *(v->string)) ? 1 : 0;
787 /* temporary varables allocator. Last allocated should be first freed */
788 static var *nvalloc(int n) {
796 if ((cb->pos - cb->nv) + n <= cb->size) break;
801 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
802 cb = (nvblock *)xmalloc(sizeof(nvblock) + size * sizeof(var));
807 if (pb) pb->next = cb;
813 while (v < cb->pos) {
822 static void nvfree(var *v) {
826 if (v < cb->nv || v >= cb->pos)
827 runtime_error(EMSG_INTERNAL_ERROR);
829 for (p=v; p<cb->pos; p++) {
830 if ((p->type & (VF_ARRAY|VF_CHILD)) == VF_ARRAY) {
831 clear_array(iamarray(p));
832 free(p->x.array->items);
835 if (p->type & VF_WALK)
842 while (cb->prev && cb->pos == cb->nv) {
847 /* ------- awk program text parsing ------- */
849 /* Parse next token pointed by global pos, place results into global t.
850 * If token isn't expected, give away. Return token class
852 static unsigned long next_token(unsigned long expected) {
856 unsigned long tc, *ti;
858 static int concat_inserted = FALSE;
859 static unsigned long save_tclass, save_info;
860 static unsigned long ltclass = TC_OPTERM;
866 } else if (concat_inserted) {
868 concat_inserted = FALSE;
869 t.tclass = save_tclass;
880 while (*p != '\n' && *p != '\0') p++;
888 } else if (*p == '\"') {
892 if (*p == '\0' || *p == '\n')
893 syntax_error(EMSG_UNEXP_EOS);
894 *(s++) = nextchar(&p);
900 } else if ((expected & TC_REGEXP) && *p == '/') {
904 if (*p == '\0' || *p == '\n')
905 syntax_error(EMSG_UNEXP_EOS);
906 if ((*s++ = *p++) == '\\') {
908 *(s-1) = bb_process_escape_sequence((const char **)&p);
909 if (*pp == '\\') *s++ = '\\';
910 if (p == pp) *s++ = *p++;
917 } else if (*p == '.' || isdigit(*p)) {
919 t.number = strtod(p, &p);
921 syntax_error(EMSG_UNEXP_TOKEN);
925 /* search for something known */
935 /* if token class is expected, token
936 * matches and it's not a longer word,
937 * then this is what we are looking for
939 if ((tc & (expected | TC_WORD | TC_NEWLINE)) &&
940 *tl == *p && strncmp(p, tl, l) == 0 &&
941 !((tc & TC_WORD) && isalnum_(*(p + l)))) {
951 /* it's a name (var/array/function),
952 * otherwise it's something wrong
955 syntax_error(EMSG_UNEXP_TOKEN);
958 while(isalnum_(*(++p))) {
976 /* skipping newlines in some cases */
977 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
980 /* insert concatenation operator when needed */
981 if ((ltclass&TC_CONCAT1) && (tc&TC_CONCAT2) && (expected&TC_BINOP)) {
982 concat_inserted = TRUE;
986 t.info = OC_CONCAT | SS | P(35);
993 /* Are we ready for this? */
994 if (! (ltclass & expected))
995 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
996 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1001 static void rollback_token(void) { t.rollback = TRUE; }
1003 static node *new_node(unsigned long info) {
1007 n = (node *)xcalloc(sizeof(node), 1);
1013 static node *mk_re_node(char *s, node *n, regex_t *re) {
1015 n->info = OC_REGEXP;
1018 xregcomp(re, s, REG_EXTENDED);
1019 xregcomp(re+1, s, REG_EXTENDED | REG_ICASE);
1024 static node *condition(void) {
1026 next_token(TC_SEQSTART);
1027 return parse_expr(TC_SEQTERM);
1030 /* parse expression terminated by given argument, return ptr
1031 * to built subtree. Terminator is eaten by parse_expr */
1032 static node *parse_expr(unsigned long iexp) {
1037 unsigned long tc, xtc;
1041 sn.r.n = glptr = NULL;
1042 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1044 while (! ((tc = next_token(xtc)) & iexp)) {
1045 if (glptr && (t.info == (OC_COMPARE|VV|P(39)|2))) {
1046 /* input redirection (<) attached to glptr node */
1047 cn = glptr->l.n = new_node(OC_CONCAT|SS|P(37));
1048 xtc = TC_OPERAND | TC_UOPPRE;
1051 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1052 /* for binary and postfix-unary operators, jump back over
1053 * previous operators with higher priority */
1055 while ( ((t.info & PRIMASK) > (vn->a.n->info & PRIMASK2)) ||
1056 ((t.info == vn->info) && ((t.info & OPCLSMASK) == OC_COLON)) )
1058 if ((t.info & OPCLSMASK) == OC_TERNARY)
1060 cn = vn->a.n->r.n = new_node(t.info);
1062 if (tc & TC_BINOP) {
1064 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1065 if ((t.info & OPCLSMASK) == OC_PGETLINE) {
1067 next_token(TC_GETLINE);
1068 /* give maximum priority to this pipe */
1069 cn->info &= ~PRIMASK;
1070 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1074 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1079 /* for operands and prefix-unary operators, attach them
1082 cn = vn->r.n = new_node(t.info);
1084 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1085 if (tc & (TC_OPERAND | TC_REGEXP)) {
1086 xtc = TC_UOPPRE | TC_BINOP | TC_OPERAND | iexp;
1087 /* one should be very careful with switch on tclass -
1088 * only simple tclasses should be used! */
1093 if ((v = hash_search(ahash, t.string)) != NULL) {
1094 cn->info = OC_FNARG;
1095 cn->l.i = v->x.aidx;
1097 cn->l.v = newvar(t.string);
1099 if (tc & TC_ARRAY) {
1101 cn->r.n = parse_expr(TC_ARRTERM);
1103 xtc = TC_UOPPOST | TC_UOPPRE | TC_BINOP | TC_OPERAND | iexp;
1109 v = cn->l.v = xcalloc(sizeof(var), 1);
1111 setvar_i(v, t.number);
1113 setvar_s(v, t.string);
1117 mk_re_node(t.string, cn,
1118 (regex_t *)xcalloc(sizeof(regex_t),2));
1123 cn->r.f = newfunc(t.string);
1124 cn->l.n = condition();
1128 cn = vn->r.n = parse_expr(TC_SEQTERM);
1134 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1138 cn->l.n = condition();
1147 /* add node to chain. Return ptr to alloc'd node */
1148 static node *chain_node(unsigned long info) {
1153 seq->first = seq->last = new_node(0);
1155 if (seq->programname != programname) {
1156 seq->programname = programname;
1157 n = chain_node(OC_NEWSOURCE);
1158 n->l.s = bb_xstrdup(programname);
1163 seq->last = n->a.n = new_node(OC_DONE);
1168 static void chain_expr(unsigned long info) {
1172 n = chain_node(info);
1173 n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1174 if (t.tclass & TC_GRPTERM)
1178 static node *chain_loop(node *nn) {
1180 node *n, *n2, *save_brk, *save_cont;
1182 save_brk = break_ptr;
1183 save_cont = continue_ptr;
1185 n = chain_node(OC_BR | Vx);
1186 continue_ptr = new_node(OC_EXEC);
1187 break_ptr = new_node(OC_EXEC);
1189 n2 = chain_node(OC_EXEC | Vx);
1192 continue_ptr->a.n = n2;
1193 break_ptr->a.n = n->r.n = seq->last;
1195 continue_ptr = save_cont;
1196 break_ptr = save_brk;
1201 /* parse group and attach it to chain */
1202 static void chain_group(void) {
1208 c = next_token(TC_GRPSEQ);
1209 } while (c & TC_NEWLINE);
1211 if (c & TC_GRPSTART) {
1212 while(next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1216 } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1218 chain_expr(OC_EXEC | Vx);
1219 } else { /* TC_STATEMNT */
1220 switch (t.info & OPCLSMASK) {
1222 n = chain_node(OC_BR | Vx);
1223 n->l.n = condition();
1225 n2 = chain_node(OC_EXEC);
1227 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE)==TC_ELSE) {
1229 n2->a.n = seq->last;
1237 n = chain_loop(NULL);
1242 n2 = chain_node(OC_EXEC);
1243 n = chain_loop(NULL);
1245 next_token(TC_WHILE);
1246 n->l.n = condition();
1250 next_token(TC_SEQSTART);
1251 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1252 if (t.tclass & TC_SEQTERM) { /* for-in */
1253 if ((n2->info & OPCLSMASK) != OC_IN)
1254 syntax_error(EMSG_UNEXP_TOKEN);
1255 n = chain_node(OC_WALKINIT | VV);
1258 n = chain_loop(NULL);
1259 n->info = OC_WALKNEXT | Vx;
1261 } else { /* for(;;) */
1262 n = chain_node(OC_EXEC | Vx);
1264 n2 = parse_expr(TC_SEMICOL);
1265 n3 = parse_expr(TC_SEQTERM);
1275 n = chain_node(t.info);
1276 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1277 if (t.tclass & TC_OUTRDR) {
1279 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1281 if (t.tclass & TC_GRPTERM)
1286 n = chain_node(OC_EXEC);
1291 n = chain_node(OC_EXEC);
1292 n->a.n = continue_ptr;
1295 /* delete, next, nextfile, return, exit */
1303 static void parse_program(char *p) {
1305 unsigned long tclass;
1312 while((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1313 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1315 if (tclass & TC_OPTERM)
1319 if (tclass & TC_BEGIN) {
1323 } else if (tclass & TC_END) {
1327 } else if (tclass & TC_FUNCDECL) {
1328 next_token(TC_FUNCTION);
1330 f = newfunc(t.string);
1331 f->body.first = NULL;
1333 while(next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1334 v = findvar(ahash, t.string);
1335 v->x.aidx = (f->nargs)++;
1337 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1344 } else if (tclass & TC_OPSEQ) {
1346 cn = chain_node(OC_TEST);
1347 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1348 if (t.tclass & TC_GRPSTART) {
1352 chain_node(OC_PRINT);
1354 cn->r.n = mainseq.last;
1356 } else /* if (tclass & TC_GRPSTART) */ {
1364 /* -------- program execution part -------- */
1366 static node *mk_splitter(char *s, tsplitter *spl) {
1368 register regex_t *re, *ire;
1374 if ((n->info && OPCLSMASK) == OC_REGEXP) {
1378 if (bb_strlen(s) > 1) {
1379 mk_re_node(s, n, re);
1381 n->info = (unsigned long) *s;
1387 /* use node as a regular expression. Supplied with node ptr and regex_t
1388 * storage space. Return ptr to regex (if result points to preg, it shuold
1389 * be later regfree'd manually
1391 static regex_t *as_regex(node *op, regex_t *preg) {
1396 if ((op->info & OPCLSMASK) == OC_REGEXP) {
1397 return icase ? op->r.ire : op->l.re;
1400 s = getvar_s(evaluate(op, v));
1401 xregcomp(preg, s, icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED);
1407 /* gradually increasing buffer */
1408 static void qrealloc(char **b, int n, int *size) {
1410 if (! *b || n >= *size)
1411 *b = xrealloc(*b, *size = n + (n>>1) + 80);
1414 /* resize field storage space */
1415 static void fsrealloc(int size) {
1417 static int maxfields = 0;
1420 if (size >= maxfields) {
1422 maxfields = size + 16;
1423 Fields = (var *)xrealloc(Fields, maxfields * sizeof(var));
1424 for (; i<maxfields; i++) {
1425 Fields[i].type = VF_SPECIAL;
1426 Fields[i].string = NULL;
1430 if (size < nfields) {
1431 for (i=size; i<nfields; i++) {
1438 static int awk_split(char *s, node *spl, char **slist) {
1443 regmatch_t pmatch[2];
1445 /* in worst case, each char would be a separate field */
1446 *slist = s1 = bb_xstrndup(s, bb_strlen(s) * 2 + 3);
1448 c[0] = c[1] = (char)spl->info;
1450 if (*getvar_s(V[RS]) == '\0') c[2] = '\n';
1452 if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */
1454 l = strcspn(s, c+2);
1455 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0 &&
1456 pmatch[0].rm_so <= l) {
1457 l = pmatch[0].rm_so;
1458 if (pmatch[0].rm_eo == 0) { l++; pmatch[0].rm_eo++; }
1460 pmatch[0].rm_eo = l;
1461 if (*(s+l)) pmatch[0].rm_eo++;
1467 s += pmatch[0].rm_eo;
1470 } else if (c[0] == '\0') { /* null split */
1476 } else if (c[0] != ' ') { /* single-character split */
1478 c[0] = toupper(c[0]);
1479 c[1] = tolower(c[1]);
1482 while ((s1 = strpbrk(s1, c))) {
1486 } else { /* space split */
1488 while (isspace(*s)) s++;
1491 while (*s && !isspace(*s))
1499 static void split_f0(void) {
1501 static char *fstrings = NULL;
1511 n = awk_split(getvar_s(V[F0]), &fsplitter.n, &fstrings);
1514 for (i=0; i<n; i++) {
1515 Fields[i].string = nextword(&s);
1516 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1519 /* set NF manually to avoid side effects */
1521 V[NF]->type = VF_NUMBER | VF_SPECIAL;
1522 V[NF]->number = nfields;
1525 /* perform additional actions when some internal variables changed */
1526 static void handle_special(var *v) {
1530 int sl, l, len, i, bsize;
1532 if (! (v->type & VF_SPECIAL))
1536 n = (int)getvar_i(v);
1539 /* recalculate $0 */
1540 sep = getvar_s(V[OFS]);
1541 sl = bb_strlen(sep);
1544 for (i=0; i<n; i++) {
1545 s = getvar_s(&Fields[i]);
1548 memcpy(b+len, sep, sl);
1551 qrealloc(&b, len+l+sl, &bsize);
1552 memcpy(b+len, s, l);
1559 } else if (v == V[F0]) {
1560 is_f0_split = FALSE;
1562 } else if (v == V[FS]) {
1563 mk_splitter(getvar_s(v), &fsplitter);
1565 } else if (v == V[RS]) {
1566 mk_splitter(getvar_s(v), &rsplitter);
1568 } else if (v == V[IGNORECASE]) {
1572 n = getvar_i(V[NF]);
1573 setvar_i(V[NF], n > v-Fields ? n : v-Fields+1);
1574 /* right here v is invalid. Just to note... */
1578 /* step through func/builtin/etc arguments */
1579 static node *nextarg(node **pn) {
1584 if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1593 static void hashwalk_init(var *v, xhash *array) {
1599 if (v->type & VF_WALK)
1603 w = v->x.walker = (char **)xcalloc(2 + 2*sizeof(char *) + array->glen, 1);
1604 *w = *(w+1) = (char *)(w + 2);
1605 for (i=0; i<array->csize; i++) {
1606 hi = array->items[i];
1608 strcpy(*w, hi->name);
1615 static int hashwalk_next(var *v) {
1623 setvar_s(v, nextword(w+1));
1627 /* evaluate node, return 1 when result is true, 0 otherwise */
1628 static int ptest(node *pattern) {
1631 return istrue(evaluate(pattern, &v));
1634 /* read next record from stream rsm into a variable v */
1635 static int awk_getline(rstream *rsm, var *v) {
1638 regmatch_t pmatch[2];
1639 int a, p, pp=0, size;
1640 int fd, so, eo, r, rp;
1643 /* we're using our own buffer since we need access to accumulating
1646 fd = fileno(rsm->F);
1651 c = (char) rsplitter.n.info;
1654 if (! m) qrealloc(&m, 256, &size);
1660 if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1661 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1662 b, 1, pmatch, 0) == 0) {
1663 so = pmatch[0].rm_so;
1664 eo = pmatch[0].rm_eo;
1668 } else if (c != '\0') {
1669 s = strchr(b+pp, c);
1676 while (b[rp] == '\n')
1678 s = strstr(b+rp, "\n\n");
1681 while (b[eo] == '\n') eo++;
1689 memmove(m, (const void *)(m+a), p+1);
1694 qrealloc(&m, a+p+128, &size);
1697 p += safe_read(fd, b+p, size-p-1);
1701 setvar_i(V[ERRNO], errno);
1710 c = b[so]; b[so] = '\0';
1714 c = b[eo]; b[eo] = '\0';
1715 setvar_s(V[RT], b+so);
1727 static int fmt_num(char *b, int size, char *format, double n, int int_as_int) {
1732 if (int_as_int && n == (int)n) {
1733 r = snprintf(b, size, "%d", (int)n);
1735 do { c = *s; } while (*s && *++s);
1736 if (strchr("diouxX", c)) {
1737 r = snprintf(b, size, format, (int)n);
1738 } else if (strchr("eEfgG", c)) {
1739 r = snprintf(b, size, format, n);
1741 runtime_error(EMSG_INV_FMT);
1748 /* formatted output into an allocated buffer, return ptr to buffer */
1749 static char *awk_printf(node *n) {
1752 char *fmt, *s, *s1, *f;
1753 int i, j, incr, bsize;
1758 fmt = f = bb_xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1763 while (*f && (*f != '%' || *(++f) == '%'))
1765 while (*f && !isalpha(*f))
1768 incr = (f - s) + MAXVARFMT;
1769 qrealloc(&b, incr+i, &bsize);
1770 c = *f; if (c != '\0') f++;
1771 c1 = *f ; *f = '\0';
1772 arg = evaluate(nextarg(&n), v);
1775 if (c == 'c' || !c) {
1776 i += sprintf(b+i, s,
1777 is_numeric(arg) ? (char)getvar_i(arg) : *getvar_s(arg));
1779 } else if (c == 's') {
1781 qrealloc(&b, incr+i+bb_strlen(s1), &bsize);
1782 i += sprintf(b+i, s, s1);
1785 i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1789 /* if there was an error while sprintf, return value is negative */
1794 b = xrealloc(b, i+1);
1801 /* common substitution routine
1802 * replace (nm) substring of (src) that match (n) with (repl), store
1803 * result into (dest), return number of substitutions. If nm=0, replace
1804 * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1805 * subexpression matching (\1-\9)
1807 static int awk_sub(node *rn, char *repl, int nm, var *src, var *dest, int ex) {
1811 int c, i, j, di, rl, so, eo, nbs, n, dssize;
1812 regmatch_t pmatch[10];
1815 re = as_regex(rn, &sreg);
1816 if (! src) src = V[F0];
1817 if (! dest) dest = V[F0];
1821 rl = bb_strlen(repl);
1822 while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0:REG_NOTBOL) == 0) {
1823 so = pmatch[0].rm_so;
1824 eo = pmatch[0].rm_eo;
1826 qrealloc(&ds, di + eo + rl, &dssize);
1827 memcpy(ds + di, sp, eo);
1833 for (s = repl; *s; s++) {
1839 if (c == '&' || (ex && c >= '0' && c <= '9')) {
1840 di -= ((nbs + 3) >> 1);
1849 n = pmatch[j].rm_eo - pmatch[j].rm_so;
1850 qrealloc(&ds, di + rl + n, &dssize);
1851 memcpy(ds + di, sp + pmatch[j].rm_so, n);
1862 if (! (ds[di++] = *sp++)) break;
1866 qrealloc(&ds, di + strlen(sp), &dssize);
1867 strcpy(ds + di, sp);
1869 if (re == &sreg) regfree(re);
1873 static var *exec_builtin(node *op, var *res) {
1880 regmatch_t pmatch[2];
1882 static tsplitter tspl;
1884 unsigned long isr, info;
1891 isr = info = op->info;
1894 av[2] = av[3] = NULL;
1895 for (i=0 ; i<4 && op ; i++) {
1896 an[i] = nextarg(&op);
1897 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
1898 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
1903 if (nargs < (info >> 30))
1904 runtime_error(EMSG_TOO_FEW_ARGS);
1906 switch (info & OPNMASK) {
1909 #ifdef CONFIG_FEATURE_AWK_MATH
1910 setvar_i(res, atan2(getvar_i(av[i]), getvar_i(av[1])));
1912 runtime_error(EMSG_NO_MATH);
1918 spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
1919 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
1924 n = awk_split(as[0], spl, &s);
1926 clear_array(iamarray(av[1]));
1927 for (i=1; i<=n; i++)
1928 setari_u(av[1], i, nextword(&s1));
1934 l = bb_strlen(as[0]);
1935 i = getvar_i(av[1]) - 1;
1936 if (i>l) i=l; if (i<0) i=0;
1937 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
1940 strncpy(s, as[0]+i, n);
1952 s1 = s = bb_xstrdup(as[0]);
1954 *s1 = (*to_xxx)(*s1);
1962 ll = bb_strlen(as[1]);
1963 l = bb_strlen(as[0]) - ll;
1964 if (ll > 0 && l >= 0) {
1966 s = strstr(as[0], as[1]);
1967 if (s) n = (s - as[0]) + 1;
1969 /* this piece of code is terribly slow and
1970 * really should be rewritten
1972 for (i=0; i<=l; i++) {
1973 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
1985 tt = getvar_i(av[1]);
1988 s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
1989 i = strftime(buf, MAXVARFMT, s, localtime(&tt));
1995 re = as_regex(an[1], &sreg);
1996 n = regexec(re, as[0], 1, pmatch, 0);
2001 pmatch[0].rm_so = 0;
2002 pmatch[0].rm_eo = -1;
2004 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2005 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2006 setvar_i(res, pmatch[0].rm_so);
2007 if (re == &sreg) regfree(re);
2011 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2015 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2019 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2028 * Evaluate node - the heart of the program. Supplied with subtree
2029 * and place where to store result. returns ptr to result.
2031 #define XC(n) ((n) >> 8)
2033 static var *evaluate(node *op, var *res) {
2035 /* This procedure is recursive so we should count every byte */
2036 static var *fnargs = NULL;
2037 static unsigned int seed = 1;
2038 static regex_t sreg;
2047 unsigned long opinfo;
2059 return setvar_s(res, NULL);
2066 opn = (short)(opinfo & OPNMASK);
2067 lineno = op->lineno;
2069 /* execute inevitable things */
2071 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2072 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2073 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2074 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2075 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2077 switch (XC(opinfo & OPCLSMASK)) {
2079 /* -- iterative node type -- */
2083 if ((op1->info & OPCLSMASK) == OC_COMMA) {
2084 /* it's range pattern */
2085 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2086 op->info |= OF_CHECKED;
2087 if (ptest(op1->r.n))
2088 op->info &= ~OF_CHECKED;
2095 op = (ptest(op1)) ? op->a.n : op->r.n;
2099 /* just evaluate an expression, also used as unconditional jump */
2103 /* branch, used in if-else and various loops */
2105 op = istrue(L.v) ? op->a.n : op->r.n;
2108 /* initialize for-in loop */
2109 case XC( OC_WALKINIT ):
2110 hashwalk_init(L.v, iamarray(R.v));
2113 /* get next array item */
2114 case XC( OC_WALKNEXT ):
2115 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2118 case XC( OC_PRINT ):
2119 case XC( OC_PRINTF ):
2122 X.rsm = newfile(R.s);
2125 if((X.rsm->F = popen(R.s, "w")) == NULL)
2126 bb_perror_msg_and_die("popen");
2129 X.rsm->F = bb_xfopen(R.s, opn=='w' ? "w" : "a");
2135 if ((opinfo & OPCLSMASK) == OC_PRINT) {
2137 fputs(getvar_s(V[F0]), X.F);
2140 L.v = evaluate(nextarg(&op1), v1);
2141 if (L.v->type & VF_NUMBER) {
2142 fmt_num(buf, MAXVARFMT, getvar_s(V[OFMT]),
2143 getvar_i(L.v), TRUE);
2146 fputs(getvar_s(L.v), X.F);
2149 if (op1) fputs(getvar_s(V[OFS]), X.F);
2152 fputs(getvar_s(V[ORS]), X.F);
2154 } else { /* OC_PRINTF */
2155 L.s = awk_printf(op1);
2162 case XC( OC_DELETE ):
2163 X.info = op1->info & OPCLSMASK;
2164 if (X.info == OC_VAR) {
2166 } else if (X.info == OC_FNARG) {
2167 R.v = &fnargs[op1->l.i];
2169 runtime_error(EMSG_NOT_ARRAY);
2174 L.s = getvar_s(evaluate(op1->r.n, v1));
2175 hash_remove(iamarray(R.v), L.s);
2177 clear_array(iamarray(R.v));
2181 case XC( OC_NEWSOURCE ):
2182 programname = op->l.s;
2185 case XC( OC_RETURN ):
2189 case XC( OC_NEXTFILE ):
2200 /* -- recursive node type -- */
2208 case XC( OC_FNARG ):
2209 L.v = &fnargs[op->l.i];
2212 res = (op->r.n) ? findvar(iamarray(L.v), R.s) : L.v;
2216 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2219 case XC( OC_REGEXP ):
2221 L.s = getvar_s(V[F0]);
2224 case XC( OC_MATCH ):
2227 X.re = as_regex(op1, &sreg);
2228 R.i = regexec(X.re, L.s, 0, NULL, 0);
2229 if (X.re == &sreg) regfree(X.re);
2230 setvar_i(res, (R.i == 0 ? 1 : 0) ^ (opn == '!' ? 1 : 0));
2234 /* if source is a temporary string, jusk relink it to dest */
2235 if (R.v == v1+1 && R.v->string) {
2236 res = setvar_p(L.v, R.v->string);
2239 res = copyvar(L.v, R.v);
2243 case XC( OC_TERNARY ):
2244 if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2245 runtime_error(EMSG_POSSIBLE_ERROR);
2246 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2250 if (! op->r.f->body.first)
2251 runtime_error(EMSG_UNDEF_FUNC);
2253 X.v = R.v = nvalloc(op->r.f->nargs+1);
2255 L.v = evaluate(nextarg(&op1), v1);
2257 R.v->type |= VF_CHILD;
2258 R.v->x.parent = L.v;
2259 if (++R.v - X.v >= op->r.f->nargs)
2267 res = evaluate(op->r.f->body.first, res);
2274 case XC( OC_GETLINE ):
2275 case XC( OC_PGETLINE ):
2277 X.rsm = newfile(L.s);
2279 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2280 X.rsm->F = popen(L.s, "r");
2281 X.rsm->is_pipe = TRUE;
2283 X.rsm->F = fopen(L.s, "r"); /* not bb_xfopen! */
2287 if (! iF) iF = next_input_file();
2292 setvar_i(V[ERRNO], errno);
2300 L.i = awk_getline(X.rsm, R.v);
2310 /* simple builtins */
2311 case XC( OC_FBLTIN ):
2319 R.d = (double)rand() / (double)RAND_MAX;
2322 #ifdef CONFIG_FEATURE_AWK_MATH
2348 runtime_error(EMSG_NO_MATH);
2354 seed = op1 ? (unsigned int)L.d : (unsigned int)time(NULL);
2364 L.s = getvar_s(V[F0]);
2365 R.d = bb_strlen(L.s);
2370 R.d = (L.s && *L.s) ? system(L.s) : 0;
2378 X.rsm = newfile(L.s);
2387 X.rsm = (rstream *)hash_search(fdhash, L.s);
2389 R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
2390 free(X.rsm->buffer);
2391 hash_remove(fdhash, L.s);
2394 setvar_i(V[ERRNO], errno);
2401 case XC( OC_BUILTIN ):
2402 res = exec_builtin(op, res);
2405 case XC( OC_SPRINTF ):
2406 setvar_p(res, awk_printf(op1));
2409 case XC( OC_UNARY ):
2411 L.d = R.d = getvar_i(R.v);
2426 L.d = istrue(X.v) ? 0 : 1;
2437 case XC( OC_FIELD ):
2438 R.i = (int)getvar_i(R.v);
2446 res = &Fields[R.i-1];
2450 /* concatenation (" ") and index joining (",") */
2451 case XC( OC_CONCAT ):
2452 case XC( OC_COMMA ):
2453 opn = bb_strlen(L.s) + bb_strlen(R.s) + 2;
2454 X.s = (char *)xmalloc(opn);
2456 if ((opinfo & OPCLSMASK) == OC_COMMA) {
2457 L.s = getvar_s(V[SUBSEP]);
2458 X.s = (char *)xrealloc(X.s, opn + bb_strlen(L.s));
2466 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2470 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2473 case XC( OC_BINARY ):
2474 case XC( OC_REPLACE ):
2475 R.d = getvar_i(R.v);
2487 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2491 #ifdef CONFIG_FEATURE_AWK_MATH
2492 L.d = pow(L.d, R.d);
2494 runtime_error(EMSG_NO_MATH);
2498 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2499 L.d -= (int)(L.d / R.d) * R.d;
2502 res = setvar_i(((opinfo&OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2505 case XC( OC_COMPARE ):
2506 if (is_numeric(L.v) && is_numeric(R.v)) {
2507 L.d = getvar_i(L.v) - getvar_i(R.v);
2509 L.s = getvar_s(L.v);
2510 R.s = getvar_s(R.v);
2511 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2513 switch (opn & 0xfe) {
2524 setvar_i(res, (opn & 0x1 ? R.i : !R.i) ? 1 : 0);
2528 runtime_error(EMSG_POSSIBLE_ERROR);
2530 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2532 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2542 /* -------- main & co. -------- */
2544 static int awk_exit(int r) {
2552 evaluate(endseq.first, &tv);
2555 /* waiting for children */
2556 for (i=0; i<fdhash->csize; i++) {
2557 hi = fdhash->items[i];
2559 if (hi->data.rs.F && hi->data.rs.is_pipe)
2560 pclose(hi->data.rs.F);
2568 /* if expr looks like "var=value", perform assignment and return 1,
2569 * otherwise return 0 */
2570 static int is_assignment(char *expr) {
2572 char *exprc, *s, *s0, *s1;
2574 exprc = bb_xstrdup(expr);
2575 if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2583 *(s1++) = nextchar(&s);
2586 setvar_u(newvar(exprc), s0);
2591 /* switch to next input file */
2592 static rstream *next_input_file(void) {
2597 static int files_happen = FALSE;
2599 if (rsm.F) fclose(rsm.F);
2601 rsm.pos = rsm.adv = 0;
2604 if (getvar_i(V[ARGIND])+1 >= getvar_i(V[ARGC])) {
2610 ind = getvar_s(incvar(V[ARGIND]));
2611 fname = getvar_s(findvar(iamarray(V[ARGV]), ind));
2612 if (fname && *fname && !is_assignment(fname))
2613 F = afopen(fname, "r");
2617 files_happen = TRUE;
2618 setvar_s(V[FILENAME], fname);
2623 extern int awk_main(int argc, char **argv) {
2630 static int from_file = FALSE;
2632 FILE *F, *stdfiles[3];
2633 static char * stdnames = "/dev/stdin\0/dev/stdout\0/dev/stderr";
2635 /* allocate global buffer */
2636 buf = xmalloc(MAXVARFMT+1);
2638 vhash = hash_init();
2639 ahash = hash_init();
2640 fdhash = hash_init();
2641 fnhash = hash_init();
2643 /* initialize variables */
2644 for (i=0; *vNames; i++) {
2645 V[i] = v = newvar(nextword(&vNames));
2646 if (*vValues != '\377')
2647 setvar_s(v, nextword(&vValues));
2651 if (*vNames == '*') {
2652 v->type |= VF_SPECIAL;
2657 handle_special(V[FS]);
2658 handle_special(V[RS]);
2660 stdfiles[0] = stdin;
2661 stdfiles[1] = stdout;
2662 stdfiles[2] = stderr;
2663 for (i=0; i<3; i++) {
2664 rsm = newfile(nextword(&stdnames));
2665 rsm->F = stdfiles[i];
2668 for (envp=environ; *envp; envp++) {
2669 s = bb_xstrdup(*envp);
2670 s1 = strchr(s, '=');
2672 setvar_u(findvar(iamarray(V[ENVIRON]), s), s1);
2676 while((c = getopt(argc, argv, "F:v:f:W:")) != EOF) {
2679 setvar_s(V[FS], optarg);
2682 if (! is_assignment(optarg))
2687 F = afopen(programname = optarg, "r");
2689 /* one byte is reserved for some trick in next_token */
2690 for (i=j=1; j>0; i+=j) {
2691 s = (char *)xrealloc(s, i+4096);
2692 j = fread(s+i, 1, 4094, F);
2700 bb_error_msg("Warning: unrecognized option '-W %s' ignored\n", optarg);
2711 programname="cmd. line";
2712 parse_program(argv[optind++]);
2716 /* fill in ARGV array */
2717 setvar_i(V[ARGC], argc - optind + 1);
2718 setari_u(V[ARGV], 0, "awk");
2719 for(i=optind; i < argc; i++)
2720 setari_u(V[ARGV], i+1-optind, argv[i]);
2722 evaluate(beginseq.first, &tv);
2723 if (! mainseq.first && ! endseq.first)
2724 awk_exit(EXIT_SUCCESS);
2726 /* input file could already be opened in BEGIN block */
2727 if (! iF) iF = next_input_file();
2729 /* passing through input files */
2733 setvar_i(V[FNR], 0);
2735 while ((c = awk_getline(iF, V[F0])) > 0) {
2740 evaluate(mainseq.first, &tv);
2747 runtime_error(strerror(errno));
2749 iF = next_input_file();
2753 awk_exit(EXIT_SUCCESS);