1 /* vi: set sw=4 ts=4: */
3 * awk implementation for busybox
5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
41 #define VF_NUMBER 0x0001 /* 1 = primary type is number */
42 #define VF_ARRAY 0x0002 /* 1 = it's an array */
44 #define VF_CACHED 0x0100 /* 1 = num/str value has cached str/num eq */
45 #define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
46 #define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
47 #define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
48 #define VF_FSTR 0x1000 /* 1 = string points to fstring buffer */
49 #define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
50 #define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
52 /* these flags are static, don't change them when value is changed */
53 #define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
56 typedef struct var_s {
57 unsigned short type; /* flags */
61 int aidx; /* func arg index (on compilation stage) */
62 struct xhash_s *array; /* array ptr */
63 struct var_s *parent; /* for func args, ptr to actual parameter */
64 char **walker; /* list of array elements (for..in) */
68 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
69 typedef struct chain_s {
76 typedef struct func_s {
82 typedef struct rstream_s {
88 unsigned short is_pipe;
91 typedef struct hash_item_s {
93 struct var_s v; /* variable/array hash */
94 struct rstream_s rs; /* redirect streams hash */
95 struct func_s f; /* functions hash */
97 struct hash_item_s *next; /* next in chain */
98 char name[1]; /* really it's longer */
101 typedef struct xhash_s {
102 unsigned int nel; /* num of elements */
103 unsigned int csize; /* current hash size */
104 unsigned int nprime; /* next hash size in PRIMES[] */
105 unsigned int glen; /* summary length of item names */
106 struct hash_item_s **items;
110 typedef struct node_s {
112 unsigned short lineno;
131 /* Block of temporary variables */
132 typedef struct nvblock_s {
135 struct nvblock_s *prev;
136 struct nvblock_s *next;
140 typedef struct tsplitter_s {
145 /* simple token classes */
146 /* Order and hex values are very important!!! See next_token() */
147 #define TC_SEQSTART 1 /* ( */
148 #define TC_SEQTERM (1 << 1) /* ) */
149 #define TC_REGEXP (1 << 2) /* /.../ */
150 #define TC_OUTRDR (1 << 3) /* | > >> */
151 #define TC_UOPPOST (1 << 4) /* unary postfix operator */
152 #define TC_UOPPRE1 (1 << 5) /* unary prefix operator */
153 #define TC_BINOPX (1 << 6) /* two-opnd operator */
154 #define TC_IN (1 << 7)
155 #define TC_COMMA (1 << 8)
156 #define TC_PIPE (1 << 9) /* input redirection pipe */
157 #define TC_UOPPRE2 (1 << 10) /* unary prefix operator */
158 #define TC_ARRTERM (1 << 11) /* ] */
159 #define TC_GRPSTART (1 << 12) /* { */
160 #define TC_GRPTERM (1 << 13) /* } */
161 #define TC_SEMICOL (1 << 14)
162 #define TC_NEWLINE (1 << 15)
163 #define TC_STATX (1 << 16) /* ctl statement (for, next...) */
164 #define TC_WHILE (1 << 17)
165 #define TC_ELSE (1 << 18)
166 #define TC_BUILTIN (1 << 19)
167 #define TC_GETLINE (1 << 20)
168 #define TC_FUNCDECL (1 << 21) /* `function' `func' */
169 #define TC_BEGIN (1 << 22)
170 #define TC_END (1 << 23)
171 #define TC_EOF (1 << 24)
172 #define TC_VARIABLE (1 << 25)
173 #define TC_ARRAY (1 << 26)
174 #define TC_FUNCTION (1 << 27)
175 #define TC_STRING (1 << 28)
176 #define TC_NUMBER (1 << 29)
178 #define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
180 /* combined token classes */
181 #define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
182 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
183 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION | \
184 TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
186 #define TC_STATEMNT (TC_STATX | TC_WHILE)
187 #define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
189 /* word tokens, cannot mean something else if not expected */
190 #define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN | \
191 TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
193 /* discard newlines after these */
194 #define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM | \
195 TC_BINOP | TC_OPTERM)
197 /* what can expression begin with */
198 #define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
199 /* what can group begin with */
200 #define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
202 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
203 /* operator is inserted between them */
204 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM | \
205 TC_STRING | TC_NUMBER | TC_UOPPOST)
206 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
208 #define OF_RES1 0x010000
209 #define OF_RES2 0x020000
210 #define OF_STR1 0x040000
211 #define OF_STR2 0x080000
212 #define OF_NUM1 0x100000
213 #define OF_CHECKED 0x200000
215 /* combined operator flags */
218 #define xS (OF_RES2 | OF_STR2)
220 #define VV (OF_RES1 | OF_RES2)
221 #define Nx (OF_RES1 | OF_NUM1)
222 #define NV (OF_RES1 | OF_NUM1 | OF_RES2)
223 #define Sx (OF_RES1 | OF_STR1)
224 #define SV (OF_RES1 | OF_STR1 | OF_RES2)
225 #define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
227 #define OPCLSMASK 0xFF00
228 #define OPNMASK 0x007F
230 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
231 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
232 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
234 #define P(x) (x << 24)
235 #define PRIMASK 0x7F000000
236 #define PRIMASK2 0x7E000000
238 /* Operation classes */
240 #define SHIFT_TIL_THIS 0x0600
241 #define RECUR_FROM_THIS 0x1000
244 OC_DELETE=0x0100, OC_EXEC=0x0200, OC_NEWSOURCE=0x0300,
245 OC_PRINT=0x0400, OC_PRINTF=0x0500, OC_WALKINIT=0x0600,
247 OC_BR=0x0700, OC_BREAK=0x0800, OC_CONTINUE=0x0900,
248 OC_EXIT=0x0a00, OC_NEXT=0x0b00, OC_NEXTFILE=0x0c00,
249 OC_TEST=0x0d00, OC_WALKNEXT=0x0e00,
251 OC_BINARY=0x1000, OC_BUILTIN=0x1100, OC_COLON=0x1200,
252 OC_COMMA=0x1300, OC_COMPARE=0x1400, OC_CONCAT=0x1500,
253 OC_FBLTIN=0x1600, OC_FIELD=0x1700, OC_FNARG=0x1800,
254 OC_FUNC=0x1900, OC_GETLINE=0x1a00, OC_IN=0x1b00,
255 OC_LAND=0x1c00, OC_LOR=0x1d00, OC_MATCH=0x1e00,
256 OC_MOVE=0x1f00, OC_PGETLINE=0x2000, OC_REGEXP=0x2100,
257 OC_REPLACE=0x2200, OC_RETURN=0x2300, OC_SPRINTF=0x2400,
258 OC_TERNARY=0x2500, OC_UNARY=0x2600, OC_VAR=0x2700,
261 ST_IF=0x3000, ST_DO=0x3100, ST_FOR=0x3200,
265 /* simple builtins */
267 F_in=0, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
268 F_ti, F_le, F_sy, F_ff, F_cl
273 B_a2=0, B_ix, B_ma, B_sp, B_ss, B_ti, B_lo, B_up,
277 /* tokens and their corresponding info values */
279 #define NTC "\377" /* switch to next token class (tc<<1) */
282 #define OC_B OC_BUILTIN
284 static char * const tokenlist =
287 "\1/" NTC /* REGEXP */
288 "\2>>" "\1>" "\1|" NTC /* OUTRDR */
289 "\2++" "\2--" NTC /* UOPPOST */
290 "\2++" "\2--" "\1$" NTC /* UOPPRE1 */
291 "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */
292 "\2*=" "\2/=" "\2%=" "\2^="
293 "\1+" "\1-" "\3**=" "\2**"
294 "\1/" "\1%" "\1^" "\1*"
295 "\2!=" "\2>=" "\2<=" "\1>"
296 "\1<" "\2!~" "\1~" "\2&&"
297 "\2||" "\1?" "\1:" NTC
301 "\1+" "\1-" "\1!" NTC /* UOPPRE2 */
307 "\2if" "\2do" "\3for" "\5break" /* STATX */
308 "\10continue" "\6delete" "\5print"
309 "\6printf" "\4next" "\10nextfile"
310 "\6return" "\4exit" NTC
314 "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */
315 "\3cos" "\3exp" "\3int" "\3log"
316 "\4rand" "\3sin" "\4sqrt" "\5srand"
317 "\6gensub" "\4gsub" "\5index" "\6length"
318 "\5match" "\5split" "\7sprintf" "\3sub"
319 "\6substr" "\7systime" "\10strftime"
320 "\7tolower" "\7toupper" NTC
322 "\4func" "\10function" NTC
327 static unsigned long tokeninfo[] = {
332 xS|'a', xS|'w', xS|'|',
333 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
334 OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M',
336 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74),
337 OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
338 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/',
339 OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
340 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-',
341 OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
342 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%',
343 OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
344 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3,
345 OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
346 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!',
347 OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
348 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?',
349 OC_COLON|xx|P(67)|':',
352 OC_PGETLINE|SV|P(37),
353 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-',
354 OC_UNARY|xV|P(19)|'!',
360 ST_IF, ST_DO, ST_FOR, OC_BREAK,
361 OC_CONTINUE, OC_DELETE|Vx, OC_PRINT,
362 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
363 OC_RETURN|Vx, OC_EXIT|Nx,
367 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
368 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
369 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
370 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
371 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
372 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b),
373 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
380 /* internal variable names and their initial values */
381 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
383 CONVFMT=0, OFMT, FS, OFS,
384 ORS, RS, RT, FILENAME,
385 SUBSEP, ARGIND, ARGC, ARGV,
388 ENVIRON, F0, _intvarcount_
391 static char * vNames =
392 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
393 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
394 "SUBSEP\0" "ARGIND\0" "ARGC\0" "ARGV\0"
396 "NR\0" "NF\0*" "IGNORECASE\0*"
397 "ENVIRON\0" "$\0*" "\0";
399 static char * vValues =
400 "%.6g\0" "%.6g\0" " \0" " \0"
401 "\n\0" "\n\0" "\0" "\0"
405 /* hash size may grow to these values */
406 #define FIRST_PRIME 61;
407 static const unsigned int PRIMES[] = { 251, 1021, 4093, 16381, 65521 };
408 static const unsigned int NPRIMES = sizeof(PRIMES) / sizeof(unsigned int);
412 extern char **environ;
414 static var * V[_intvarcount_];
415 static chain beginseq, mainseq, endseq, *seq;
416 static int nextrec, nextfile;
417 static node *break_ptr, *continue_ptr;
419 static xhash *vhash, *ahash, *fdhash, *fnhash;
420 static char *programname;
422 static int is_f0_split;
423 static int nfields = 0;
424 static var *Fields = NULL;
425 static tsplitter fsplitter, rsplitter;
426 static nvblock *cb = NULL;
429 static int icase = FALSE;
430 static int exiting = FALSE;
433 unsigned long tclass;
441 /* function prototypes */
442 extern void xregcomp(regex_t *preg, const char *regex, int cflags);
443 static void handle_special(var *);
444 static node *parse_expr(unsigned long);
445 static void chain_group(void);
446 static var *evaluate(node *, var *);
447 static rstream *next_input_file(void);
448 static int fmt_num(char *, int, char *, double, int);
449 static int awk_exit(int);
451 /* ---- error handling ---- */
453 static const char EMSG_INTERNAL_ERROR[] = "Internal error";
454 static const char EMSG_UNEXP_EOS[] = "Unexpected end of string";
455 static const char EMSG_UNEXP_TOKEN[] = "Unexpected token";
456 static const char EMSG_DIV_BY_ZERO[] = "Division by zero";
457 static const char EMSG_INV_FMT[] = "Invalid format specifier";
458 static const char EMSG_TOO_FEW_ARGS[] = "Too few arguments for builtin";
459 static const char EMSG_NOT_ARRAY[] = "Not an array";
460 static const char EMSG_POSSIBLE_ERROR[] = "Possible syntax error";
461 static const char EMSG_UNDEF_FUNC[] = "Call to undefined function";
462 #ifndef CONFIG_FEATURE_AWK_MATH
463 static const char EMSG_NO_MATH[] = "Math support is not compiled in";
466 static void syntax_error(const char * const message)
468 bb_error_msg("%s:%i: %s", programname, lineno, message);
472 #define runtime_error(x) syntax_error(x)
475 /* ---- hash stuff ---- */
477 static unsigned int hashidx(char *name) {
479 register unsigned int idx=0;
481 while (*name) idx = *name++ + (idx << 6) - idx;
485 /* create new hash */
486 static xhash *hash_init(void) {
490 newhash = (xhash *)xcalloc(1, sizeof(xhash));
491 newhash->csize = FIRST_PRIME;
492 newhash->items = (hash_item **)xcalloc(newhash->csize, sizeof(hash_item *));
497 /* find item in hash, return ptr to data, NULL if not found */
498 static void *hash_search(xhash *hash, char *name) {
502 hi = hash->items [ hashidx(name) % hash->csize ];
504 if (strcmp(hi->name, name) == 0)
511 /* grow hash if it becomes too big */
512 static void hash_rebuild(xhash *hash) {
514 unsigned int newsize, i, idx;
515 hash_item **newitems, *hi, *thi;
517 if (hash->nprime == NPRIMES)
520 newsize = PRIMES[hash->nprime++];
521 newitems = (hash_item **)xcalloc(newsize, sizeof(hash_item *));
523 for (i=0; i<hash->csize; i++) {
528 idx = hashidx(thi->name) % newsize;
529 thi->next = newitems[idx];
535 hash->csize = newsize;
536 hash->items = newitems;
539 /* find item in hash, add it if necessary. Return ptr to data */
540 static void *hash_find(xhash *hash, char *name) {
546 hi = hash_search(hash, name);
548 if (++hash->nel / hash->csize > 10)
551 l = bb_strlen(name) + 1;
552 hi = xcalloc(sizeof(hash_item) + l, 1);
553 memcpy(hi->name, name, l);
555 idx = hashidx(name) % hash->csize;
556 hi->next = hash->items[idx];
557 hash->items[idx] = hi;
563 #define findvar(hash, name) (var *) hash_find ( (hash) , (name) )
564 #define newvar(name) (var *) hash_find ( vhash , (name) )
565 #define newfile(name) (rstream *) hash_find ( fdhash , (name) )
566 #define newfunc(name) (func *) hash_find ( fnhash , (name) )
568 static void hash_remove(xhash *hash, char *name) {
570 hash_item *hi, **phi;
572 phi = &(hash->items[ hashidx(name) % hash->csize ]);
575 if (strcmp(hi->name, name) == 0) {
576 hash->glen -= (bb_strlen(name) + 1);
586 /* ------ some useful functions ------ */
588 static void skip_spaces(char **s) {
590 register char *p = *s;
592 while(*p == ' ' || *p == '\t' ||
593 (*p == '\\' && *(p+1) == '\n' && (++p, ++t.lineno))) {
599 static char *nextword(char **s) {
601 register char *p = *s;
608 static char nextchar(char **s) {
610 register char c, *pps;
614 if (c == '\\') c = bb_process_escape_sequence((const char**)s);
615 if (c == '\\' && *s == pps) c = *((*s)++);
619 static inline int isalnum_(int c) {
621 return (isalnum(c) || c == '_');
624 static FILE *afopen(const char *path, const char *mode) {
626 return (*path == '-' && *(path+1) == '\0') ? stdin : bb_xfopen(path, mode);
629 /* -------- working with variables (set/get/copy/etc) -------- */
631 static xhash *iamarray(var *v) {
635 while (a->type & VF_CHILD)
638 if (! (a->type & VF_ARRAY)) {
640 a->x.array = hash_init();
645 static void clear_array(xhash *array) {
650 for (i=0; i<array->csize; i++) {
651 hi = array->items[i];
655 free(thi->data.v.string);
658 array->items[i] = NULL;
660 array->glen = array->nel = 0;
663 /* clear a variable */
664 static var *clrvar(var *v) {
666 if (!(v->type & VF_FSTR))
669 v->type &= VF_DONTTOUCH;
675 /* assign string value to variable */
676 static var *setvar_p(var *v, char *value) {
685 /* same as setvar_p but make a copy of string */
686 static var *setvar_s(var *v, char *value) {
688 return setvar_p(v, (value && *value) ? bb_xstrdup(value) : NULL);
691 /* same as setvar_s but set USER flag */
692 static var *setvar_u(var *v, char *value) {
699 /* set array element to user string */
700 static void setari_u(var *a, int idx, char *s) {
703 static char sidx[12];
705 sprintf(sidx, "%d", idx);
706 v = findvar(iamarray(a), sidx);
710 /* assign numeric value to variable */
711 static var *setvar_i(var *v, double value) {
714 v->type |= VF_NUMBER;
720 static char *getvar_s(var *v) {
722 /* if v is numeric and has no cached string, convert it to string */
723 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
724 fmt_num(buf, MAXVARFMT, getvar_s(V[CONVFMT]), v->number, TRUE);
725 v->string = bb_xstrdup(buf);
726 v->type |= VF_CACHED;
728 return (v->string == NULL) ? "" : v->string;
731 static double getvar_i(var *v) {
735 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
739 v->number = strtod(s, &s);
740 if (v->type & VF_USER) {
748 v->type |= VF_CACHED;
753 static var *copyvar(var *dest, var *src) {
757 dest->type |= (src->type & ~VF_DONTTOUCH);
758 dest->number = src->number;
760 dest->string = bb_xstrdup(src->string);
762 handle_special(dest);
766 static var *incvar(var *v) {
768 return setvar_i(v, getvar_i(v)+1.);
771 /* return true if v is number or numeric string */
772 static int is_numeric(var *v) {
775 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
778 /* return 1 when value of v corresponds to true, 0 otherwise */
779 static int istrue(var *v) {
782 return (v->number == 0) ? 0 : 1;
784 return (v->string && *(v->string)) ? 1 : 0;
787 /* temporary variables allocator. Last allocated should be first freed */
788 static var *nvalloc(int n) {
796 if ((cb->pos - cb->nv) + n <= cb->size) break;
801 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
802 cb = (nvblock *)xmalloc(sizeof(nvblock) + size * sizeof(var));
807 if (pb) pb->next = cb;
813 while (v < cb->pos) {
822 static void nvfree(var *v) {
826 if (v < cb->nv || v >= cb->pos)
827 runtime_error(EMSG_INTERNAL_ERROR);
829 for (p=v; p<cb->pos; p++) {
830 if ((p->type & (VF_ARRAY|VF_CHILD)) == VF_ARRAY) {
831 clear_array(iamarray(p));
832 free(p->x.array->items);
835 if (p->type & VF_WALK)
842 while (cb->prev && cb->pos == cb->nv) {
847 /* ------- awk program text parsing ------- */
849 /* Parse next token pointed by global pos, place results into global t.
850 * If token isn't expected, give away. Return token class
852 static unsigned long next_token(unsigned long expected) {
856 unsigned long tc, *ti;
858 static int concat_inserted = FALSE;
859 static unsigned long save_tclass, save_info;
860 static unsigned long ltclass = TC_OPTERM;
866 } else if (concat_inserted) {
868 concat_inserted = FALSE;
869 t.tclass = save_tclass;
880 while (*p != '\n' && *p != '\0') p++;
888 } else if (*p == '\"') {
892 if (*p == '\0' || *p == '\n')
893 syntax_error(EMSG_UNEXP_EOS);
894 *(s++) = nextchar(&p);
900 } else if ((expected & TC_REGEXP) && *p == '/') {
904 if (*p == '\0' || *p == '\n')
905 syntax_error(EMSG_UNEXP_EOS);
906 if ((*s++ = *p++) == '\\') {
908 *(s-1) = bb_process_escape_sequence((const char **)&p);
909 if (*pp == '\\') *s++ = '\\';
910 if (p == pp) *s++ = *p++;
917 } else if (*p == '.' || isdigit(*p)) {
919 t.number = strtod(p, &p);
921 syntax_error(EMSG_UNEXP_TOKEN);
925 /* search for something known */
935 /* if token class is expected, token
936 * matches and it's not a longer word,
937 * then this is what we are looking for
939 if ((tc & (expected | TC_WORD | TC_NEWLINE)) &&
940 *tl == *p && strncmp(p, tl, l) == 0 &&
941 !((tc & TC_WORD) && isalnum_(*(p + l)))) {
951 /* it's a name (var/array/function),
952 * otherwise it's something wrong
955 syntax_error(EMSG_UNEXP_TOKEN);
958 while(isalnum_(*(++p))) {
976 /* skipping newlines in some cases */
977 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
980 /* insert concatenation operator when needed */
981 if ((ltclass&TC_CONCAT1) && (tc&TC_CONCAT2) && (expected&TC_BINOP)) {
982 concat_inserted = TRUE;
986 t.info = OC_CONCAT | SS | P(35);
993 /* Are we ready for this? */
994 if (! (ltclass & expected))
995 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
996 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1001 static void rollback_token(void) { t.rollback = TRUE; }
1003 static node *new_node(unsigned long info) {
1007 n = (node *)xcalloc(sizeof(node), 1);
1013 static node *mk_re_node(char *s, node *n, regex_t *re) {
1015 n->info = OC_REGEXP;
1018 xregcomp(re, s, REG_EXTENDED);
1019 xregcomp(re+1, s, REG_EXTENDED | REG_ICASE);
1024 static node *condition(void) {
1026 next_token(TC_SEQSTART);
1027 return parse_expr(TC_SEQTERM);
1030 /* parse expression terminated by given argument, return ptr
1031 * to built subtree. Terminator is eaten by parse_expr */
1032 static node *parse_expr(unsigned long iexp) {
1037 unsigned long tc, xtc;
1041 sn.r.n = glptr = NULL;
1042 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1044 while (! ((tc = next_token(xtc)) & iexp)) {
1045 if (glptr && (t.info == (OC_COMPARE|VV|P(39)|2))) {
1046 /* input redirection (<) attached to glptr node */
1047 cn = glptr->l.n = new_node(OC_CONCAT|SS|P(37));
1049 xtc = TC_OPERAND | TC_UOPPRE;
1052 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1053 /* for binary and postfix-unary operators, jump back over
1054 * previous operators with higher priority */
1056 while ( ((t.info & PRIMASK) > (vn->a.n->info & PRIMASK2)) ||
1057 ((t.info == vn->info) && ((t.info & OPCLSMASK) == OC_COLON)) )
1059 if ((t.info & OPCLSMASK) == OC_TERNARY)
1061 cn = vn->a.n->r.n = new_node(t.info);
1063 if (tc & TC_BINOP) {
1065 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1066 if ((t.info & OPCLSMASK) == OC_PGETLINE) {
1068 next_token(TC_GETLINE);
1069 /* give maximum priority to this pipe */
1070 cn->info &= ~PRIMASK;
1071 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1075 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1080 /* for operands and prefix-unary operators, attach them
1083 cn = vn->r.n = new_node(t.info);
1085 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1086 if (tc & (TC_OPERAND | TC_REGEXP)) {
1087 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1088 /* one should be very careful with switch on tclass -
1089 * only simple tclasses should be used! */
1094 if ((v = hash_search(ahash, t.string)) != NULL) {
1095 cn->info = OC_FNARG;
1096 cn->l.i = v->x.aidx;
1098 cn->l.v = newvar(t.string);
1100 if (tc & TC_ARRAY) {
1102 cn->r.n = parse_expr(TC_ARRTERM);
1109 v = cn->l.v = xcalloc(sizeof(var), 1);
1111 setvar_i(v, t.number);
1113 setvar_s(v, t.string);
1117 mk_re_node(t.string, cn,
1118 (regex_t *)xcalloc(sizeof(regex_t),2));
1123 cn->r.f = newfunc(t.string);
1124 cn->l.n = condition();
1128 cn = vn->r.n = parse_expr(TC_SEQTERM);
1134 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1138 cn->l.n = condition();
1147 /* add node to chain. Return ptr to alloc'd node */
1148 static node *chain_node(unsigned long info) {
1153 seq->first = seq->last = new_node(0);
1155 if (seq->programname != programname) {
1156 seq->programname = programname;
1157 n = chain_node(OC_NEWSOURCE);
1158 n->l.s = bb_xstrdup(programname);
1163 seq->last = n->a.n = new_node(OC_DONE);
1168 static void chain_expr(unsigned long info) {
1172 n = chain_node(info);
1173 n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1174 if (t.tclass & TC_GRPTERM)
1178 static node *chain_loop(node *nn) {
1180 node *n, *n2, *save_brk, *save_cont;
1182 save_brk = break_ptr;
1183 save_cont = continue_ptr;
1185 n = chain_node(OC_BR | Vx);
1186 continue_ptr = new_node(OC_EXEC);
1187 break_ptr = new_node(OC_EXEC);
1189 n2 = chain_node(OC_EXEC | Vx);
1192 continue_ptr->a.n = n2;
1193 break_ptr->a.n = n->r.n = seq->last;
1195 continue_ptr = save_cont;
1196 break_ptr = save_brk;
1201 /* parse group and attach it to chain */
1202 static void chain_group(void) {
1208 c = next_token(TC_GRPSEQ);
1209 } while (c & TC_NEWLINE);
1211 if (c & TC_GRPSTART) {
1212 while(next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1213 if (t.tclass & TC_NEWLINE) continue;
1217 } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1219 chain_expr(OC_EXEC | Vx);
1220 } else { /* TC_STATEMNT */
1221 switch (t.info & OPCLSMASK) {
1223 n = chain_node(OC_BR | Vx);
1224 n->l.n = condition();
1226 n2 = chain_node(OC_EXEC);
1228 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE)==TC_ELSE) {
1230 n2->a.n = seq->last;
1238 n = chain_loop(NULL);
1243 n2 = chain_node(OC_EXEC);
1244 n = chain_loop(NULL);
1246 next_token(TC_WHILE);
1247 n->l.n = condition();
1251 next_token(TC_SEQSTART);
1252 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1253 if (t.tclass & TC_SEQTERM) { /* for-in */
1254 if ((n2->info & OPCLSMASK) != OC_IN)
1255 syntax_error(EMSG_UNEXP_TOKEN);
1256 n = chain_node(OC_WALKINIT | VV);
1259 n = chain_loop(NULL);
1260 n->info = OC_WALKNEXT | Vx;
1262 } else { /* for(;;) */
1263 n = chain_node(OC_EXEC | Vx);
1265 n2 = parse_expr(TC_SEMICOL);
1266 n3 = parse_expr(TC_SEQTERM);
1276 n = chain_node(t.info);
1277 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1278 if (t.tclass & TC_OUTRDR) {
1280 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1282 if (t.tclass & TC_GRPTERM)
1287 n = chain_node(OC_EXEC);
1292 n = chain_node(OC_EXEC);
1293 n->a.n = continue_ptr;
1296 /* delete, next, nextfile, return, exit */
1304 static void parse_program(char *p) {
1306 unsigned long tclass;
1313 while((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1314 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1316 if (tclass & TC_OPTERM)
1320 if (tclass & TC_BEGIN) {
1324 } else if (tclass & TC_END) {
1328 } else if (tclass & TC_FUNCDECL) {
1329 next_token(TC_FUNCTION);
1331 f = newfunc(t.string);
1332 f->body.first = NULL;
1334 while(next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1335 v = findvar(ahash, t.string);
1336 v->x.aidx = (f->nargs)++;
1338 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1345 } else if (tclass & TC_OPSEQ) {
1347 cn = chain_node(OC_TEST);
1348 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1349 if (t.tclass & TC_GRPSTART) {
1353 chain_node(OC_PRINT);
1355 cn->r.n = mainseq.last;
1357 } else /* if (tclass & TC_GRPSTART) */ {
1365 /* -------- program execution part -------- */
1367 static node *mk_splitter(char *s, tsplitter *spl) {
1369 register regex_t *re, *ire;
1375 if ((n->info && OPCLSMASK) == OC_REGEXP) {
1379 if (bb_strlen(s) > 1) {
1380 mk_re_node(s, n, re);
1382 n->info = (unsigned long) *s;
1388 /* use node as a regular expression. Supplied with node ptr and regex_t
1389 * storage space. Return ptr to regex (if result points to preg, it should
1390 * be later regfree'd manually
1392 static regex_t *as_regex(node *op, regex_t *preg) {
1397 if ((op->info & OPCLSMASK) == OC_REGEXP) {
1398 return icase ? op->r.ire : op->l.re;
1401 s = getvar_s(evaluate(op, v));
1402 xregcomp(preg, s, icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED);
1408 /* gradually increasing buffer */
1409 static void qrealloc(char **b, int n, int *size) {
1411 if (! *b || n >= *size)
1412 *b = xrealloc(*b, *size = n + (n>>1) + 80);
1415 /* resize field storage space */
1416 static void fsrealloc(int size) {
1418 static int maxfields = 0;
1421 if (size >= maxfields) {
1423 maxfields = size + 16;
1424 Fields = (var *)xrealloc(Fields, maxfields * sizeof(var));
1425 for (; i<maxfields; i++) {
1426 Fields[i].type = VF_SPECIAL;
1427 Fields[i].string = NULL;
1431 if (size < nfields) {
1432 for (i=size; i<nfields; i++) {
1439 static int awk_split(char *s, node *spl, char **slist) {
1444 regmatch_t pmatch[2];
1446 /* in worst case, each char would be a separate field */
1447 *slist = s1 = bb_xstrndup(s, bb_strlen(s) * 2 + 3);
1449 c[0] = c[1] = (char)spl->info;
1451 if (*getvar_s(V[RS]) == '\0') c[2] = '\n';
1453 if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */
1455 l = strcspn(s, c+2);
1456 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0 &&
1457 pmatch[0].rm_so <= l) {
1458 l = pmatch[0].rm_so;
1459 if (pmatch[0].rm_eo == 0) { l++; pmatch[0].rm_eo++; }
1461 pmatch[0].rm_eo = l;
1462 if (*(s+l)) pmatch[0].rm_eo++;
1468 s += pmatch[0].rm_eo;
1471 } else if (c[0] == '\0') { /* null split */
1477 } else if (c[0] != ' ') { /* single-character split */
1479 c[0] = toupper(c[0]);
1480 c[1] = tolower(c[1]);
1483 while ((s1 = strpbrk(s1, c))) {
1487 } else { /* space split */
1489 while (isspace(*s)) s++;
1492 while (*s && !isspace(*s))
1500 static void split_f0(void) {
1502 static char *fstrings = NULL;
1512 n = awk_split(getvar_s(V[F0]), &fsplitter.n, &fstrings);
1515 for (i=0; i<n; i++) {
1516 Fields[i].string = nextword(&s);
1517 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1520 /* set NF manually to avoid side effects */
1522 V[NF]->type = VF_NUMBER | VF_SPECIAL;
1523 V[NF]->number = nfields;
1526 /* perform additional actions when some internal variables changed */
1527 static void handle_special(var *v) {
1531 int sl, l, len, i, bsize;
1533 if (! (v->type & VF_SPECIAL))
1537 n = (int)getvar_i(v);
1540 /* recalculate $0 */
1541 sep = getvar_s(V[OFS]);
1542 sl = bb_strlen(sep);
1545 for (i=0; i<n; i++) {
1546 s = getvar_s(&Fields[i]);
1549 memcpy(b+len, sep, sl);
1552 qrealloc(&b, len+l+sl, &bsize);
1553 memcpy(b+len, s, l);
1556 if (b) b[len] = '\0';
1560 } else if (v == V[F0]) {
1561 is_f0_split = FALSE;
1563 } else if (v == V[FS]) {
1564 mk_splitter(getvar_s(v), &fsplitter);
1566 } else if (v == V[RS]) {
1567 mk_splitter(getvar_s(v), &rsplitter);
1569 } else if (v == V[IGNORECASE]) {
1573 n = getvar_i(V[NF]);
1574 setvar_i(V[NF], n > v-Fields ? n : v-Fields+1);
1575 /* right here v is invalid. Just to note... */
1579 /* step through func/builtin/etc arguments */
1580 static node *nextarg(node **pn) {
1585 if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1594 static void hashwalk_init(var *v, xhash *array) {
1600 if (v->type & VF_WALK)
1604 w = v->x.walker = (char **)xcalloc(2 + 2*sizeof(char *) + array->glen, 1);
1605 *w = *(w+1) = (char *)(w + 2);
1606 for (i=0; i<array->csize; i++) {
1607 hi = array->items[i];
1609 strcpy(*w, hi->name);
1616 static int hashwalk_next(var *v) {
1624 setvar_s(v, nextword(w+1));
1628 /* evaluate node, return 1 when result is true, 0 otherwise */
1629 static int ptest(node *pattern) {
1632 return istrue(evaluate(pattern, &v));
1635 /* read next record from stream rsm into a variable v */
1636 static int awk_getline(rstream *rsm, var *v) {
1639 regmatch_t pmatch[2];
1640 int a, p, pp=0, size;
1641 int fd, so, eo, r, rp;
1644 /* we're using our own buffer since we need access to accumulating
1647 fd = fileno(rsm->F);
1652 c = (char) rsplitter.n.info;
1655 if (! m) qrealloc(&m, 256, &size);
1661 if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1662 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1663 b, 1, pmatch, 0) == 0) {
1664 so = pmatch[0].rm_so;
1665 eo = pmatch[0].rm_eo;
1669 } else if (c != '\0') {
1670 s = strchr(b+pp, c);
1677 while (b[rp] == '\n')
1679 s = strstr(b+rp, "\n\n");
1682 while (b[eo] == '\n') eo++;
1690 memmove(m, (const void *)(m+a), p+1);
1695 qrealloc(&m, a+p+128, &size);
1698 p += safe_read(fd, b+p, size-p-1);
1702 setvar_i(V[ERRNO], errno);
1711 c = b[so]; b[so] = '\0';
1715 c = b[eo]; b[eo] = '\0';
1716 setvar_s(V[RT], b+so);
1728 static int fmt_num(char *b, int size, char *format, double n, int int_as_int) {
1733 if (int_as_int && n == (int)n) {
1734 r = snprintf(b, size, "%d", (int)n);
1736 do { c = *s; } while (*s && *++s);
1737 if (strchr("diouxX", c)) {
1738 r = snprintf(b, size, format, (int)n);
1739 } else if (strchr("eEfgG", c)) {
1740 r = snprintf(b, size, format, n);
1742 runtime_error(EMSG_INV_FMT);
1749 /* formatted output into an allocated buffer, return ptr to buffer */
1750 static char *awk_printf(node *n) {
1753 char *fmt, *s, *s1, *f;
1754 int i, j, incr, bsize;
1759 fmt = f = bb_xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1764 while (*f && (*f != '%' || *(++f) == '%'))
1766 while (*f && !isalpha(*f))
1769 incr = (f - s) + MAXVARFMT;
1770 qrealloc(&b, incr+i, &bsize);
1771 c = *f; if (c != '\0') f++;
1772 c1 = *f ; *f = '\0';
1773 arg = evaluate(nextarg(&n), v);
1776 if (c == 'c' || !c) {
1777 i += sprintf(b+i, s,
1778 is_numeric(arg) ? (char)getvar_i(arg) : *getvar_s(arg));
1780 } else if (c == 's') {
1782 qrealloc(&b, incr+i+bb_strlen(s1), &bsize);
1783 i += sprintf(b+i, s, s1);
1786 i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1790 /* if there was an error while sprintf, return value is negative */
1795 b = xrealloc(b, i+1);
1802 /* common substitution routine
1803 * replace (nm) substring of (src) that match (n) with (repl), store
1804 * result into (dest), return number of substitutions. If nm=0, replace
1805 * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1806 * subexpression matching (\1-\9)
1808 static int awk_sub(node *rn, char *repl, int nm, var *src, var *dest, int ex) {
1812 int c, i, j, di, rl, so, eo, nbs, n, dssize;
1813 regmatch_t pmatch[10];
1816 re = as_regex(rn, &sreg);
1817 if (! src) src = V[F0];
1818 if (! dest) dest = V[F0];
1822 rl = bb_strlen(repl);
1823 while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0:REG_NOTBOL) == 0) {
1824 so = pmatch[0].rm_so;
1825 eo = pmatch[0].rm_eo;
1827 qrealloc(&ds, di + eo + rl, &dssize);
1828 memcpy(ds + di, sp, eo);
1834 for (s = repl; *s; s++) {
1840 if (c == '&' || (ex && c >= '0' && c <= '9')) {
1841 di -= ((nbs + 3) >> 1);
1850 n = pmatch[j].rm_eo - pmatch[j].rm_so;
1851 qrealloc(&ds, di + rl + n, &dssize);
1852 memcpy(ds + di, sp + pmatch[j].rm_so, n);
1863 if (! (ds[di++] = *sp++)) break;
1867 qrealloc(&ds, di + strlen(sp), &dssize);
1868 strcpy(ds + di, sp);
1870 if (re == &sreg) regfree(re);
1874 static var *exec_builtin(node *op, var *res) {
1881 regmatch_t pmatch[2];
1883 static tsplitter tspl;
1885 unsigned long isr, info;
1892 isr = info = op->info;
1895 av[2] = av[3] = NULL;
1896 for (i=0 ; i<4 && op ; i++) {
1897 an[i] = nextarg(&op);
1898 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
1899 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
1904 if (nargs < (info >> 30))
1905 runtime_error(EMSG_TOO_FEW_ARGS);
1907 switch (info & OPNMASK) {
1910 #ifdef CONFIG_FEATURE_AWK_MATH
1911 setvar_i(res, atan2(getvar_i(av[i]), getvar_i(av[1])));
1913 runtime_error(EMSG_NO_MATH);
1919 spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
1920 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
1925 n = awk_split(as[0], spl, &s);
1927 clear_array(iamarray(av[1]));
1928 for (i=1; i<=n; i++)
1929 setari_u(av[1], i, nextword(&s1));
1935 l = bb_strlen(as[0]);
1936 i = getvar_i(av[1]) - 1;
1937 if (i>l) i=l; if (i<0) i=0;
1938 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
1941 strncpy(s, as[0]+i, n);
1953 s1 = s = bb_xstrdup(as[0]);
1955 *s1 = (*to_xxx)(*s1);
1963 ll = bb_strlen(as[1]);
1964 l = bb_strlen(as[0]) - ll;
1965 if (ll > 0 && l >= 0) {
1967 s = strstr(as[0], as[1]);
1968 if (s) n = (s - as[0]) + 1;
1970 /* this piece of code is terribly slow and
1971 * really should be rewritten
1973 for (i=0; i<=l; i++) {
1974 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
1986 tt = getvar_i(av[1]);
1989 s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
1990 i = strftime(buf, MAXVARFMT, s, localtime(&tt));
1996 re = as_regex(an[1], &sreg);
1997 n = regexec(re, as[0], 1, pmatch, 0);
2002 pmatch[0].rm_so = 0;
2003 pmatch[0].rm_eo = -1;
2005 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2006 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2007 setvar_i(res, pmatch[0].rm_so);
2008 if (re == &sreg) regfree(re);
2012 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2016 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2020 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2029 * Evaluate node - the heart of the program. Supplied with subtree
2030 * and place where to store result. returns ptr to result.
2032 #define XC(n) ((n) >> 8)
2034 static var *evaluate(node *op, var *res) {
2036 /* This procedure is recursive so we should count every byte */
2037 static var *fnargs = NULL;
2038 static unsigned int seed = 1;
2039 static regex_t sreg;
2048 unsigned long opinfo;
2060 return setvar_s(res, NULL);
2067 opn = (short)(opinfo & OPNMASK);
2068 lineno = op->lineno;
2070 /* execute inevitable things */
2072 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2073 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2074 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2075 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2076 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2078 switch (XC(opinfo & OPCLSMASK)) {
2080 /* -- iterative node type -- */
2084 if ((op1->info & OPCLSMASK) == OC_COMMA) {
2085 /* it's range pattern */
2086 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2087 op->info |= OF_CHECKED;
2088 if (ptest(op1->r.n))
2089 op->info &= ~OF_CHECKED;
2096 op = (ptest(op1)) ? op->a.n : op->r.n;
2100 /* just evaluate an expression, also used as unconditional jump */
2104 /* branch, used in if-else and various loops */
2106 op = istrue(L.v) ? op->a.n : op->r.n;
2109 /* initialize for-in loop */
2110 case XC( OC_WALKINIT ):
2111 hashwalk_init(L.v, iamarray(R.v));
2114 /* get next array item */
2115 case XC( OC_WALKNEXT ):
2116 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2119 case XC( OC_PRINT ):
2120 case XC( OC_PRINTF ):
2123 X.rsm = newfile(R.s);
2126 if((X.rsm->F = popen(R.s, "w")) == NULL)
2127 bb_perror_msg_and_die("popen");
2130 X.rsm->F = bb_xfopen(R.s, opn=='w' ? "w" : "a");
2136 if ((opinfo & OPCLSMASK) == OC_PRINT) {
2138 fputs(getvar_s(V[F0]), X.F);
2141 L.v = evaluate(nextarg(&op1), v1);
2142 if (L.v->type & VF_NUMBER) {
2143 fmt_num(buf, MAXVARFMT, getvar_s(V[OFMT]),
2144 getvar_i(L.v), TRUE);
2147 fputs(getvar_s(L.v), X.F);
2150 if (op1) fputs(getvar_s(V[OFS]), X.F);
2153 fputs(getvar_s(V[ORS]), X.F);
2155 } else { /* OC_PRINTF */
2156 L.s = awk_printf(op1);
2163 case XC( OC_DELETE ):
2164 X.info = op1->info & OPCLSMASK;
2165 if (X.info == OC_VAR) {
2167 } else if (X.info == OC_FNARG) {
2168 R.v = &fnargs[op1->l.i];
2170 runtime_error(EMSG_NOT_ARRAY);
2175 L.s = getvar_s(evaluate(op1->r.n, v1));
2176 hash_remove(iamarray(R.v), L.s);
2178 clear_array(iamarray(R.v));
2182 case XC( OC_NEWSOURCE ):
2183 programname = op->l.s;
2186 case XC( OC_RETURN ):
2190 case XC( OC_NEXTFILE ):
2201 /* -- recursive node type -- */
2209 case XC( OC_FNARG ):
2210 L.v = &fnargs[op->l.i];
2213 res = (op->r.n) ? findvar(iamarray(L.v), R.s) : L.v;
2217 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2220 case XC( OC_REGEXP ):
2222 L.s = getvar_s(V[F0]);
2225 case XC( OC_MATCH ):
2228 X.re = as_regex(op1, &sreg);
2229 R.i = regexec(X.re, L.s, 0, NULL, 0);
2230 if (X.re == &sreg) regfree(X.re);
2231 setvar_i(res, (R.i == 0 ? 1 : 0) ^ (opn == '!' ? 1 : 0));
2235 /* if source is a temporary string, jusk relink it to dest */
2236 if (R.v == v1+1 && R.v->string) {
2237 res = setvar_p(L.v, R.v->string);
2240 res = copyvar(L.v, R.v);
2244 case XC( OC_TERNARY ):
2245 if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2246 runtime_error(EMSG_POSSIBLE_ERROR);
2247 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2251 if (! op->r.f->body.first)
2252 runtime_error(EMSG_UNDEF_FUNC);
2254 X.v = R.v = nvalloc(op->r.f->nargs+1);
2256 L.v = evaluate(nextarg(&op1), v1);
2258 R.v->type |= VF_CHILD;
2259 R.v->x.parent = L.v;
2260 if (++R.v - X.v >= op->r.f->nargs)
2268 res = evaluate(op->r.f->body.first, res);
2275 case XC( OC_GETLINE ):
2276 case XC( OC_PGETLINE ):
2278 X.rsm = newfile(L.s);
2280 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2281 X.rsm->F = popen(L.s, "r");
2282 X.rsm->is_pipe = TRUE;
2284 X.rsm->F = fopen(L.s, "r"); /* not bb_xfopen! */
2288 if (! iF) iF = next_input_file();
2293 setvar_i(V[ERRNO], errno);
2301 L.i = awk_getline(X.rsm, R.v);
2311 /* simple builtins */
2312 case XC( OC_FBLTIN ):
2320 R.d = (double)rand() / (double)RAND_MAX;
2323 #ifdef CONFIG_FEATURE_AWK_MATH
2349 runtime_error(EMSG_NO_MATH);
2355 seed = op1 ? (unsigned int)L.d : (unsigned int)time(NULL);
2365 L.s = getvar_s(V[F0]);
2366 R.d = bb_strlen(L.s);
2371 R.d = (L.s && *L.s) ? system(L.s) : 0;
2379 X.rsm = newfile(L.s);
2388 X.rsm = (rstream *)hash_search(fdhash, L.s);
2390 R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
2391 free(X.rsm->buffer);
2392 hash_remove(fdhash, L.s);
2395 setvar_i(V[ERRNO], errno);
2402 case XC( OC_BUILTIN ):
2403 res = exec_builtin(op, res);
2406 case XC( OC_SPRINTF ):
2407 setvar_p(res, awk_printf(op1));
2410 case XC( OC_UNARY ):
2412 L.d = R.d = getvar_i(R.v);
2427 L.d = istrue(X.v) ? 0 : 1;
2438 case XC( OC_FIELD ):
2439 R.i = (int)getvar_i(R.v);
2447 res = &Fields[R.i-1];
2451 /* concatenation (" ") and index joining (",") */
2452 case XC( OC_CONCAT ):
2453 case XC( OC_COMMA ):
2454 opn = bb_strlen(L.s) + bb_strlen(R.s) + 2;
2455 X.s = (char *)xmalloc(opn);
2457 if ((opinfo & OPCLSMASK) == OC_COMMA) {
2458 L.s = getvar_s(V[SUBSEP]);
2459 X.s = (char *)xrealloc(X.s, opn + bb_strlen(L.s));
2467 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2471 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2474 case XC( OC_BINARY ):
2475 case XC( OC_REPLACE ):
2476 R.d = getvar_i(R.v);
2488 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2492 #ifdef CONFIG_FEATURE_AWK_MATH
2493 L.d = pow(L.d, R.d);
2495 runtime_error(EMSG_NO_MATH);
2499 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2500 L.d -= (int)(L.d / R.d) * R.d;
2503 res = setvar_i(((opinfo&OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2506 case XC( OC_COMPARE ):
2507 if (is_numeric(L.v) && is_numeric(R.v)) {
2508 L.d = getvar_i(L.v) - getvar_i(R.v);
2510 L.s = getvar_s(L.v);
2511 R.s = getvar_s(R.v);
2512 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2514 switch (opn & 0xfe) {
2525 setvar_i(res, (opn & 0x1 ? R.i : !R.i) ? 1 : 0);
2529 runtime_error(EMSG_POSSIBLE_ERROR);
2531 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2533 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2543 /* -------- main & co. -------- */
2545 static int awk_exit(int r) {
2554 evaluate(endseq.first, &tv);
2557 /* waiting for children */
2558 for (i=0; i<fdhash->csize; i++) {
2559 hi = fdhash->items[i];
2561 if (hi->data.rs.F && hi->data.rs.is_pipe)
2562 pclose(hi->data.rs.F);
2570 /* if expr looks like "var=value", perform assignment and return 1,
2571 * otherwise return 0 */
2572 static int is_assignment(char *expr) {
2574 char *exprc, *s, *s0, *s1;
2576 exprc = bb_xstrdup(expr);
2577 if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2585 *(s1++) = nextchar(&s);
2588 setvar_u(newvar(exprc), s0);
2593 /* switch to next input file */
2594 static rstream *next_input_file(void) {
2599 static int files_happen = FALSE;
2601 if (rsm.F) fclose(rsm.F);
2603 rsm.pos = rsm.adv = 0;
2606 if (getvar_i(V[ARGIND])+1 >= getvar_i(V[ARGC])) {
2612 ind = getvar_s(incvar(V[ARGIND]));
2613 fname = getvar_s(findvar(iamarray(V[ARGV]), ind));
2614 if (fname && *fname && !is_assignment(fname))
2615 F = afopen(fname, "r");
2619 files_happen = TRUE;
2620 setvar_s(V[FILENAME], fname);
2625 extern int awk_main(int argc, char **argv) {
2632 static int from_file = FALSE;
2634 FILE *F, *stdfiles[3];
2635 static char * stdnames = "/dev/stdin\0/dev/stdout\0/dev/stderr";
2637 /* allocate global buffer */
2638 buf = xmalloc(MAXVARFMT+1);
2640 vhash = hash_init();
2641 ahash = hash_init();
2642 fdhash = hash_init();
2643 fnhash = hash_init();
2645 /* initialize variables */
2646 for (i=0; *vNames; i++) {
2647 V[i] = v = newvar(nextword(&vNames));
2648 if (*vValues != '\377')
2649 setvar_s(v, nextword(&vValues));
2653 if (*vNames == '*') {
2654 v->type |= VF_SPECIAL;
2659 handle_special(V[FS]);
2660 handle_special(V[RS]);
2662 stdfiles[0] = stdin;
2663 stdfiles[1] = stdout;
2664 stdfiles[2] = stderr;
2665 for (i=0; i<3; i++) {
2666 rsm = newfile(nextword(&stdnames));
2667 rsm->F = stdfiles[i];
2670 for (envp=environ; *envp; envp++) {
2671 s = bb_xstrdup(*envp);
2672 s1 = strchr(s, '=');
2677 setvar_u(findvar(iamarray(V[ENVIRON]), s), s1);
2682 while((c = getopt(argc, argv, "F:v:f:W:")) != EOF) {
2685 setvar_s(V[FS], optarg);
2688 if (! is_assignment(optarg))
2693 F = afopen(programname = optarg, "r");
2695 /* one byte is reserved for some trick in next_token */
2696 for (i=j=1; j>0; i+=j) {
2697 s = (char *)xrealloc(s, i+4096);
2698 j = fread(s+i, 1, 4094, F);
2706 bb_error_msg("Warning: unrecognized option '-W %s' ignored\n", optarg);
2717 programname="cmd. line";
2718 parse_program(argv[optind++]);
2722 /* fill in ARGV array */
2723 setvar_i(V[ARGC], argc - optind + 1);
2724 setari_u(V[ARGV], 0, "awk");
2725 for(i=optind; i < argc; i++)
2726 setari_u(V[ARGV], i+1-optind, argv[i]);
2728 evaluate(beginseq.first, &tv);
2729 if (! mainseq.first && ! endseq.first)
2730 awk_exit(EXIT_SUCCESS);
2732 /* input file could already be opened in BEGIN block */
2733 if (! iF) iF = next_input_file();
2735 /* passing through input files */
2739 setvar_i(V[FNR], 0);
2741 while ((c = awk_getline(iF, V[F0])) > 0) {
2746 evaluate(mainseq.first, &tv);
2753 runtime_error(strerror(errno));
2755 iF = next_input_file();
2759 awk_exit(EXIT_SUCCESS);