1 /* vi: set sw=4 ts=4: */
3 * awk implementation for busybox
5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
41 #define VF_NUMBER 0x0001 /* 1 = primary type is number */
42 #define VF_ARRAY 0x0002 /* 1 = it's an array */
44 #define VF_CACHED 0x0100 /* 1 = num/str value has cached str/num eq */
45 #define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
46 #define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
47 #define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
48 #define VF_FSTR 0x1000 /* 1 = string points to fstring buffer */
49 #define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
50 #define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
52 /* these flags are static, don't change them when value is changed */
53 #define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
56 typedef struct var_s {
57 unsigned short type; /* flags */
61 int aidx; /* func arg index (on compilation stage) */
62 struct xhash_s *array; /* array ptr */
63 struct var_s *parent; /* for func args, ptr to actual parameter */
64 char **walker; /* list of array elements (for..in) */
68 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
69 typedef struct chain_s {
76 typedef struct func_s {
82 typedef struct rstream_s {
88 unsigned short is_pipe;
91 typedef struct hash_item_s {
93 struct var_s v; /* variable/array hash */
94 struct rstream_s rs; /* redirect streams hash */
95 struct func_s f; /* functions hash */
97 struct hash_item_s *next; /* next in chain */
98 char name[1]; /* really it's longer */
101 typedef struct xhash_s {
102 unsigned int nel; /* num of elements */
103 unsigned int csize; /* current hash size */
104 unsigned int nprime; /* next hash size in PRIMES[] */
105 unsigned int glen; /* summary length of item names */
106 struct hash_item_s **items;
110 typedef struct node_s {
112 unsigned short lineno;
131 /* Block of temporary variables */
132 typedef struct nvblock_s {
135 struct nvblock_s *prev;
136 struct nvblock_s *next;
140 typedef struct tsplitter_s {
145 /* simple token classes */
146 /* Order and hex values are very important!!! See next_token() */
147 #define TC_SEQSTART 1 /* ( */
148 #define TC_SEQTERM (1 << 1) /* ) */
149 #define TC_REGEXP (1 << 2) /* /.../ */
150 #define TC_OUTRDR (1 << 3) /* | > >> */
151 #define TC_UOPPOST (1 << 4) /* unary postfix operator */
152 #define TC_UOPPRE1 (1 << 5) /* unary prefix operator */
153 #define TC_BINOPX (1 << 6) /* two-opnd operator */
154 #define TC_IN (1 << 7)
155 #define TC_COMMA (1 << 8)
156 #define TC_PIPE (1 << 9) /* input redirection pipe */
157 #define TC_UOPPRE2 (1 << 10) /* unary prefix operator */
158 #define TC_ARRTERM (1 << 11) /* ] */
159 #define TC_GRPSTART (1 << 12) /* { */
160 #define TC_GRPTERM (1 << 13) /* } */
161 #define TC_SEMICOL (1 << 14)
162 #define TC_NEWLINE (1 << 15)
163 #define TC_STATX (1 << 16) /* ctl statement (for, next...) */
164 #define TC_WHILE (1 << 17)
165 #define TC_ELSE (1 << 18)
166 #define TC_BUILTIN (1 << 19)
167 #define TC_GETLINE (1 << 20)
168 #define TC_FUNCDECL (1 << 21) /* `function' `func' */
169 #define TC_BEGIN (1 << 22)
170 #define TC_END (1 << 23)
171 #define TC_EOF (1 << 24)
172 #define TC_VARIABLE (1 << 25)
173 #define TC_ARRAY (1 << 26)
174 #define TC_FUNCTION (1 << 27)
175 #define TC_STRING (1 << 28)
176 #define TC_NUMBER (1 << 29)
178 #define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
180 /* combined token classes */
181 #define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
182 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
183 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION | \
184 TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
186 #define TC_STATEMNT (TC_STATX | TC_WHILE)
187 #define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
189 /* word tokens, cannot mean something else if not expected */
190 #define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN | \
191 TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
193 /* discard newlines after these */
194 #define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM | \
195 TC_BINOP | TC_OPTERM)
197 /* what can expression begin with */
198 #define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
199 /* what can group begin with */
200 #define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
202 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
203 /* operator is inserted between them */
204 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM | \
205 TC_STRING | TC_NUMBER | TC_UOPPOST)
206 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
208 #define OF_RES1 0x010000
209 #define OF_RES2 0x020000
210 #define OF_STR1 0x040000
211 #define OF_STR2 0x080000
212 #define OF_NUM1 0x100000
213 #define OF_CHECKED 0x200000
215 /* combined operator flags */
218 #define xS (OF_RES2 | OF_STR2)
220 #define VV (OF_RES1 | OF_RES2)
221 #define Nx (OF_RES1 | OF_NUM1)
222 #define NV (OF_RES1 | OF_NUM1 | OF_RES2)
223 #define Sx (OF_RES1 | OF_STR1)
224 #define SV (OF_RES1 | OF_STR1 | OF_RES2)
225 #define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
227 #define OPCLSMASK 0xFF00
228 #define OPNMASK 0x007F
230 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
231 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
232 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
234 #define P(x) (x << 24)
235 #define PRIMASK 0x7F000000
236 #define PRIMASK2 0x7E000000
238 /* Operation classes */
240 #define SHIFT_TIL_THIS 0x0600
241 #define RECUR_FROM_THIS 0x1000
244 OC_DELETE=0x0100, OC_EXEC=0x0200, OC_NEWSOURCE=0x0300,
245 OC_PRINT=0x0400, OC_PRINTF=0x0500, OC_WALKINIT=0x0600,
247 OC_BR=0x0700, OC_BREAK=0x0800, OC_CONTINUE=0x0900,
248 OC_EXIT=0x0a00, OC_NEXT=0x0b00, OC_NEXTFILE=0x0c00,
249 OC_TEST=0x0d00, OC_WALKNEXT=0x0e00,
251 OC_BINARY=0x1000, OC_BUILTIN=0x1100, OC_COLON=0x1200,
252 OC_COMMA=0x1300, OC_COMPARE=0x1400, OC_CONCAT=0x1500,
253 OC_FBLTIN=0x1600, OC_FIELD=0x1700, OC_FNARG=0x1800,
254 OC_FUNC=0x1900, OC_GETLINE=0x1a00, OC_IN=0x1b00,
255 OC_LAND=0x1c00, OC_LOR=0x1d00, OC_MATCH=0x1e00,
256 OC_MOVE=0x1f00, OC_PGETLINE=0x2000, OC_REGEXP=0x2100,
257 OC_REPLACE=0x2200, OC_RETURN=0x2300, OC_SPRINTF=0x2400,
258 OC_TERNARY=0x2500, OC_UNARY=0x2600, OC_VAR=0x2700,
261 ST_IF=0x3000, ST_DO=0x3100, ST_FOR=0x3200,
265 /* simple builtins */
267 F_in=0, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
268 F_ti, F_le, F_sy, F_ff, F_cl
273 B_a2=0, B_ix, B_ma, B_sp, B_ss, B_ti, B_lo, B_up,
277 /* tokens and their corresponding info values */
279 #define NTC "\377" /* switch to next token class (tc<<1) */
282 #define OC_B OC_BUILTIN
284 static char * const tokenlist =
287 "\1/" NTC /* REGEXP */
288 "\2>>" "\1>" "\1|" NTC /* OUTRDR */
289 "\2++" "\2--" NTC /* UOPPOST */
290 "\2++" "\2--" "\1$" NTC /* UOPPRE1 */
291 "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */
292 "\2*=" "\2/=" "\2%=" "\2^="
293 "\1+" "\1-" "\3**=" "\2**"
294 "\1/" "\1%" "\1^" "\1*"
295 "\2!=" "\2>=" "\2<=" "\1>"
296 "\1<" "\2!~" "\1~" "\2&&"
297 "\2||" "\1?" "\1:" NTC
301 "\1+" "\1-" "\1!" NTC /* UOPPRE2 */
307 "\2if" "\2do" "\3for" "\5break" /* STATX */
308 "\10continue" "\6delete" "\5print"
309 "\6printf" "\4next" "\10nextfile"
310 "\6return" "\4exit" NTC
314 "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */
315 "\3cos" "\3exp" "\3int" "\3log"
316 "\4rand" "\3sin" "\4sqrt" "\5srand"
317 "\6gensub" "\4gsub" "\5index" "\6length"
318 "\5match" "\5split" "\7sprintf" "\3sub"
319 "\6substr" "\7systime" "\10strftime"
320 "\7tolower" "\7toupper" NTC
322 "\4func" "\10function" NTC
327 static const uint32_t tokeninfo[] = {
332 xS|'a', xS|'w', xS|'|',
333 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
334 OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M',
336 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74),
337 OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
338 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/',
339 OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
340 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-',
341 OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
342 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%',
343 OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
344 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3,
345 OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
346 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!',
347 OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
348 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?',
349 OC_COLON|xx|P(67)|':',
352 OC_PGETLINE|SV|P(37),
353 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-',
354 OC_UNARY|xV|P(19)|'!',
360 ST_IF, ST_DO, ST_FOR, OC_BREAK,
361 OC_CONTINUE, OC_DELETE|Vx, OC_PRINT,
362 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
363 OC_RETURN|Vx, OC_EXIT|Nx,
367 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
368 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
369 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
370 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
371 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
372 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b),
373 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
380 /* internal variable names and their initial values */
381 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
383 CONVFMT=0, OFMT, FS, OFS,
384 ORS, RS, RT, FILENAME,
385 SUBSEP, ARGIND, ARGC, ARGV,
388 ENVIRON, F0, _intvarcount_
391 static char * vNames =
392 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
393 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
394 "SUBSEP\0" "ARGIND\0" "ARGC\0" "ARGV\0"
396 "NR\0" "NF\0*" "IGNORECASE\0*"
397 "ENVIRON\0" "$\0*" "\0";
399 static char * vValues =
400 "%.6g\0" "%.6g\0" " \0" " \0"
401 "\n\0" "\n\0" "\0" "\0"
405 /* hash size may grow to these values */
406 #define FIRST_PRIME 61;
407 static const unsigned int PRIMES[] = { 251, 1021, 4093, 16381, 65521 };
408 static const unsigned int NPRIMES = sizeof(PRIMES) / sizeof(unsigned int);
412 extern char **environ;
414 static var * V[_intvarcount_];
415 static chain beginseq, mainseq, endseq, *seq;
416 static int nextrec, nextfile;
417 static node *break_ptr, *continue_ptr;
419 static xhash *vhash, *ahash, *fdhash, *fnhash;
420 static char *programname;
422 static int is_f0_split;
425 static tsplitter fsplitter, rsplitter;
441 /* function prototypes */
442 static void handle_special(var *);
443 static node *parse_expr(uint32_t);
444 static void chain_group(void);
445 static var *evaluate(node *, var *);
446 static rstream *next_input_file(void);
447 static int fmt_num(char *, int, const char *, double, int);
448 static int awk_exit(int) attribute_noreturn;
450 /* ---- error handling ---- */
452 static const char EMSG_INTERNAL_ERROR[] = "Internal error";
453 static const char EMSG_UNEXP_EOS[] = "Unexpected end of string";
454 static const char EMSG_UNEXP_TOKEN[] = "Unexpected token";
455 static const char EMSG_DIV_BY_ZERO[] = "Division by zero";
456 static const char EMSG_INV_FMT[] = "Invalid format specifier";
457 static const char EMSG_TOO_FEW_ARGS[] = "Too few arguments for builtin";
458 static const char EMSG_NOT_ARRAY[] = "Not an array";
459 static const char EMSG_POSSIBLE_ERROR[] = "Possible syntax error";
460 static const char EMSG_UNDEF_FUNC[] = "Call to undefined function";
461 #ifndef CONFIG_FEATURE_AWK_MATH
462 static const char EMSG_NO_MATH[] = "Math support is not compiled in";
465 static void syntax_error(const char * const message) attribute_noreturn;
466 static void syntax_error(const char * const message)
468 bb_error_msg_and_die("%s:%i: %s", programname, lineno, message);
471 #define runtime_error(x) syntax_error(x)
474 /* ---- hash stuff ---- */
476 static unsigned int hashidx(const char *name)
478 register unsigned int idx=0;
480 while (*name) idx = *name++ + (idx << 6) - idx;
484 /* create new hash */
485 static xhash *hash_init(void)
489 newhash = (xhash *)xcalloc(1, sizeof(xhash));
490 newhash->csize = FIRST_PRIME;
491 newhash->items = (hash_item **)xcalloc(newhash->csize, sizeof(hash_item *));
496 /* find item in hash, return ptr to data, NULL if not found */
497 static void *hash_search(xhash *hash, const char *name)
501 hi = hash->items [ hashidx(name) % hash->csize ];
503 if (strcmp(hi->name, name) == 0)
510 /* grow hash if it becomes too big */
511 static void hash_rebuild(xhash *hash)
513 unsigned int newsize, i, idx;
514 hash_item **newitems, *hi, *thi;
516 if (hash->nprime == NPRIMES)
519 newsize = PRIMES[hash->nprime++];
520 newitems = (hash_item **)xcalloc(newsize, sizeof(hash_item *));
522 for (i=0; i<hash->csize; i++) {
527 idx = hashidx(thi->name) % newsize;
528 thi->next = newitems[idx];
534 hash->csize = newsize;
535 hash->items = newitems;
538 /* find item in hash, add it if necessary. Return ptr to data */
539 static void *hash_find(xhash *hash, const char *name)
545 hi = hash_search(hash, name);
547 if (++hash->nel / hash->csize > 10)
550 l = bb_strlen(name) + 1;
551 hi = xcalloc(sizeof(hash_item) + l, 1);
552 memcpy(hi->name, name, l);
554 idx = hashidx(name) % hash->csize;
555 hi->next = hash->items[idx];
556 hash->items[idx] = hi;
562 #define findvar(hash, name) (var *) hash_find ( (hash) , (name) )
563 #define newvar(name) (var *) hash_find ( vhash , (name) )
564 #define newfile(name) (rstream *) hash_find ( fdhash , (name) )
565 #define newfunc(name) (func *) hash_find ( fnhash , (name) )
567 static void hash_remove(xhash *hash, const char *name)
569 hash_item *hi, **phi;
571 phi = &(hash->items[ hashidx(name) % hash->csize ]);
574 if (strcmp(hi->name, name) == 0) {
575 hash->glen -= (bb_strlen(name) + 1);
585 /* ------ some useful functions ------ */
587 static void skip_spaces(char **s)
589 register char *p = *s;
591 while(*p == ' ' || *p == '\t' ||
592 (*p == '\\' && *(p+1) == '\n' && (++p, ++t.lineno))) {
598 static char *nextword(char **s)
600 register char *p = *s;
607 static char nextchar(char **s)
609 register char c, *pps;
613 if (c == '\\') c = bb_process_escape_sequence((const char**)s);
614 if (c == '\\' && *s == pps) c = *((*s)++);
618 static inline int isalnum_(int c)
620 return (isalnum(c) || c == '_');
623 static FILE *afopen(const char *path, const char *mode)
625 return (*path == '-' && *(path+1) == '\0') ? stdin : bb_xfopen(path, mode);
628 /* -------- working with variables (set/get/copy/etc) -------- */
630 static xhash *iamarray(var *v)
634 while (a->type & VF_CHILD)
637 if (! (a->type & VF_ARRAY)) {
639 a->x.array = hash_init();
644 static void clear_array(xhash *array)
649 for (i=0; i<array->csize; i++) {
650 hi = array->items[i];
654 free(thi->data.v.string);
657 array->items[i] = NULL;
659 array->glen = array->nel = 0;
662 /* clear a variable */
663 static var *clrvar(var *v)
665 if (!(v->type & VF_FSTR))
668 v->type &= VF_DONTTOUCH;
674 /* assign string value to variable */
675 static var *setvar_p(var *v, char *value)
684 /* same as setvar_p but make a copy of string */
685 static var *setvar_s(var *v, const char *value)
687 return setvar_p(v, (value && *value) ? bb_xstrdup(value) : NULL);
690 /* same as setvar_s but set USER flag */
691 static var *setvar_u(var *v, const char *value)
698 /* set array element to user string */
699 static void setari_u(var *a, int idx, const char *s)
702 static char sidx[12];
704 sprintf(sidx, "%d", idx);
705 v = findvar(iamarray(a), sidx);
709 /* assign numeric value to variable */
710 static var *setvar_i(var *v, double value)
713 v->type |= VF_NUMBER;
719 static char *getvar_s(var *v)
721 /* if v is numeric and has no cached string, convert it to string */
722 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
723 fmt_num(buf, MAXVARFMT, getvar_s(V[CONVFMT]), v->number, TRUE);
724 v->string = bb_xstrdup(buf);
725 v->type |= VF_CACHED;
727 return (v->string == NULL) ? "" : v->string;
730 static double getvar_i(var *v)
734 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
738 v->number = strtod(s, &s);
739 if (v->type & VF_USER) {
747 v->type |= VF_CACHED;
752 static var *copyvar(var *dest, const var *src)
756 dest->type |= (src->type & ~VF_DONTTOUCH);
757 dest->number = src->number;
759 dest->string = bb_xstrdup(src->string);
761 handle_special(dest);
765 static var *incvar(var *v)
767 return setvar_i(v, getvar_i(v)+1.);
770 /* return true if v is number or numeric string */
771 static int is_numeric(var *v)
774 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
777 /* return 1 when value of v corresponds to true, 0 otherwise */
778 static int istrue(var *v)
781 return (v->number == 0) ? 0 : 1;
783 return (v->string && *(v->string)) ? 1 : 0;
786 /* temporary variables allocator. Last allocated should be first freed */
787 static var *nvalloc(int n)
795 if ((cb->pos - cb->nv) + n <= cb->size) break;
800 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
801 cb = (nvblock *)xmalloc(sizeof(nvblock) + size * sizeof(var));
806 if (pb) pb->next = cb;
812 while (v < cb->pos) {
821 static void nvfree(var *v)
825 if (v < cb->nv || v >= cb->pos)
826 runtime_error(EMSG_INTERNAL_ERROR);
828 for (p=v; p<cb->pos; p++) {
829 if ((p->type & (VF_ARRAY|VF_CHILD)) == VF_ARRAY) {
830 clear_array(iamarray(p));
831 free(p->x.array->items);
834 if (p->type & VF_WALK)
841 while (cb->prev && cb->pos == cb->nv) {
846 /* ------- awk program text parsing ------- */
848 /* Parse next token pointed by global pos, place results into global t.
849 * If token isn't expected, give away. Return token class
851 static uint32_t next_token(uint32_t expected)
858 static int concat_inserted;
859 static uint32_t save_tclass, save_info;
860 static uint32_t ltclass = TC_OPTERM;
866 } else if (concat_inserted) {
868 concat_inserted = FALSE;
869 t.tclass = save_tclass;
880 while (*p != '\n' && *p != '\0') p++;
888 } else if (*p == '\"') {
892 if (*p == '\0' || *p == '\n')
893 syntax_error(EMSG_UNEXP_EOS);
894 *(s++) = nextchar(&p);
900 } else if ((expected & TC_REGEXP) && *p == '/') {
904 if (*p == '\0' || *p == '\n')
905 syntax_error(EMSG_UNEXP_EOS);
906 if ((*s++ = *p++) == '\\') {
908 *(s-1) = bb_process_escape_sequence((const char **)&p);
909 if (*pp == '\\') *s++ = '\\';
910 if (p == pp) *s++ = *p++;
917 } else if (*p == '.' || isdigit(*p)) {
919 t.number = strtod(p, &p);
921 syntax_error(EMSG_UNEXP_TOKEN);
925 /* search for something known */
935 /* if token class is expected, token
936 * matches and it's not a longer word,
937 * then this is what we are looking for
939 if ((tc & (expected | TC_WORD | TC_NEWLINE)) &&
940 *tl == *p && strncmp(p, tl, l) == 0 &&
941 !((tc & TC_WORD) && isalnum_(*(p + l)))) {
951 /* it's a name (var/array/function),
952 * otherwise it's something wrong
955 syntax_error(EMSG_UNEXP_TOKEN);
958 while(isalnum_(*(++p))) {
963 /* also consume whitespace between functionname and bracket */
977 /* skipping newlines in some cases */
978 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
981 /* insert concatenation operator when needed */
982 if ((ltclass&TC_CONCAT1) && (tc&TC_CONCAT2) && (expected&TC_BINOP)) {
983 concat_inserted = TRUE;
987 t.info = OC_CONCAT | SS | P(35);
994 /* Are we ready for this? */
995 if (! (ltclass & expected))
996 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
997 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1002 static void rollback_token(void) { t.rollback = TRUE; }
1004 static node *new_node(uint32_t info)
1008 n = (node *)xcalloc(sizeof(node), 1);
1014 static node *mk_re_node(char *s, node *n, regex_t *re)
1016 n->info = OC_REGEXP;
1019 xregcomp(re, s, REG_EXTENDED);
1020 xregcomp(re+1, s, REG_EXTENDED | REG_ICASE);
1025 static node *condition(void)
1027 next_token(TC_SEQSTART);
1028 return parse_expr(TC_SEQTERM);
1031 /* parse expression terminated by given argument, return ptr
1032 * to built subtree. Terminator is eaten by parse_expr */
1033 static node *parse_expr(uint32_t iexp)
1042 sn.r.n = glptr = NULL;
1043 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1045 while (! ((tc = next_token(xtc)) & iexp)) {
1046 if (glptr && (t.info == (OC_COMPARE|VV|P(39)|2))) {
1047 /* input redirection (<) attached to glptr node */
1048 cn = glptr->l.n = new_node(OC_CONCAT|SS|P(37));
1050 xtc = TC_OPERAND | TC_UOPPRE;
1053 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1054 /* for binary and postfix-unary operators, jump back over
1055 * previous operators with higher priority */
1057 while ( ((t.info & PRIMASK) > (vn->a.n->info & PRIMASK2)) ||
1058 ((t.info == vn->info) && ((t.info & OPCLSMASK) == OC_COLON)) )
1060 if ((t.info & OPCLSMASK) == OC_TERNARY)
1062 cn = vn->a.n->r.n = new_node(t.info);
1064 if (tc & TC_BINOP) {
1066 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1067 if ((t.info & OPCLSMASK) == OC_PGETLINE) {
1069 next_token(TC_GETLINE);
1070 /* give maximum priority to this pipe */
1071 cn->info &= ~PRIMASK;
1072 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1076 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1081 /* for operands and prefix-unary operators, attach them
1084 cn = vn->r.n = new_node(t.info);
1086 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1087 if (tc & (TC_OPERAND | TC_REGEXP)) {
1088 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1089 /* one should be very careful with switch on tclass -
1090 * only simple tclasses should be used! */
1095 if ((v = hash_search(ahash, t.string)) != NULL) {
1096 cn->info = OC_FNARG;
1097 cn->l.i = v->x.aidx;
1099 cn->l.v = newvar(t.string);
1101 if (tc & TC_ARRAY) {
1103 cn->r.n = parse_expr(TC_ARRTERM);
1110 v = cn->l.v = xcalloc(sizeof(var), 1);
1112 setvar_i(v, t.number);
1114 setvar_s(v, t.string);
1118 mk_re_node(t.string, cn,
1119 (regex_t *)xcalloc(sizeof(regex_t),2));
1124 cn->r.f = newfunc(t.string);
1125 cn->l.n = condition();
1129 cn = vn->r.n = parse_expr(TC_SEQTERM);
1135 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1139 cn->l.n = condition();
1148 /* add node to chain. Return ptr to alloc'd node */
1149 static node *chain_node(uint32_t info)
1154 seq->first = seq->last = new_node(0);
1156 if (seq->programname != programname) {
1157 seq->programname = programname;
1158 n = chain_node(OC_NEWSOURCE);
1159 n->l.s = bb_xstrdup(programname);
1164 seq->last = n->a.n = new_node(OC_DONE);
1169 static void chain_expr(uint32_t info)
1173 n = chain_node(info);
1174 n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1175 if (t.tclass & TC_GRPTERM)
1179 static node *chain_loop(node *nn)
1181 node *n, *n2, *save_brk, *save_cont;
1183 save_brk = break_ptr;
1184 save_cont = continue_ptr;
1186 n = chain_node(OC_BR | Vx);
1187 continue_ptr = new_node(OC_EXEC);
1188 break_ptr = new_node(OC_EXEC);
1190 n2 = chain_node(OC_EXEC | Vx);
1193 continue_ptr->a.n = n2;
1194 break_ptr->a.n = n->r.n = seq->last;
1196 continue_ptr = save_cont;
1197 break_ptr = save_brk;
1202 /* parse group and attach it to chain */
1203 static void chain_group(void)
1209 c = next_token(TC_GRPSEQ);
1210 } while (c & TC_NEWLINE);
1212 if (c & TC_GRPSTART) {
1213 while(next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1214 if (t.tclass & TC_NEWLINE) continue;
1218 } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1220 chain_expr(OC_EXEC | Vx);
1221 } else { /* TC_STATEMNT */
1222 switch (t.info & OPCLSMASK) {
1224 n = chain_node(OC_BR | Vx);
1225 n->l.n = condition();
1227 n2 = chain_node(OC_EXEC);
1229 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE)==TC_ELSE) {
1231 n2->a.n = seq->last;
1239 n = chain_loop(NULL);
1244 n2 = chain_node(OC_EXEC);
1245 n = chain_loop(NULL);
1247 next_token(TC_WHILE);
1248 n->l.n = condition();
1252 next_token(TC_SEQSTART);
1253 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1254 if (t.tclass & TC_SEQTERM) { /* for-in */
1255 if ((n2->info & OPCLSMASK) != OC_IN)
1256 syntax_error(EMSG_UNEXP_TOKEN);
1257 n = chain_node(OC_WALKINIT | VV);
1260 n = chain_loop(NULL);
1261 n->info = OC_WALKNEXT | Vx;
1263 } else { /* for(;;) */
1264 n = chain_node(OC_EXEC | Vx);
1266 n2 = parse_expr(TC_SEMICOL);
1267 n3 = parse_expr(TC_SEQTERM);
1277 n = chain_node(t.info);
1278 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1279 if (t.tclass & TC_OUTRDR) {
1281 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1283 if (t.tclass & TC_GRPTERM)
1288 n = chain_node(OC_EXEC);
1293 n = chain_node(OC_EXEC);
1294 n->a.n = continue_ptr;
1297 /* delete, next, nextfile, return, exit */
1305 static void parse_program(char *p)
1314 while((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1315 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1317 if (tclass & TC_OPTERM)
1321 if (tclass & TC_BEGIN) {
1325 } else if (tclass & TC_END) {
1329 } else if (tclass & TC_FUNCDECL) {
1330 next_token(TC_FUNCTION);
1332 f = newfunc(t.string);
1333 f->body.first = NULL;
1335 while(next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1336 v = findvar(ahash, t.string);
1337 v->x.aidx = (f->nargs)++;
1339 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1346 } else if (tclass & TC_OPSEQ) {
1348 cn = chain_node(OC_TEST);
1349 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1350 if (t.tclass & TC_GRPSTART) {
1354 chain_node(OC_PRINT);
1356 cn->r.n = mainseq.last;
1358 } else /* if (tclass & TC_GRPSTART) */ {
1366 /* -------- program execution part -------- */
1368 static node *mk_splitter(char *s, tsplitter *spl)
1370 register regex_t *re, *ire;
1376 if ((n->info && OPCLSMASK) == OC_REGEXP) {
1380 if (bb_strlen(s) > 1) {
1381 mk_re_node(s, n, re);
1383 n->info = (uint32_t) *s;
1389 /* use node as a regular expression. Supplied with node ptr and regex_t
1390 * storage space. Return ptr to regex (if result points to preg, it should
1391 * be later regfree'd manually
1393 static regex_t *as_regex(node *op, regex_t *preg)
1398 if ((op->info & OPCLSMASK) == OC_REGEXP) {
1399 return icase ? op->r.ire : op->l.re;
1402 s = getvar_s(evaluate(op, v));
1403 xregcomp(preg, s, icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED);
1409 /* gradually increasing buffer */
1410 static void qrealloc(char **b, int n, int *size)
1412 if (! *b || n >= *size)
1413 *b = xrealloc(*b, *size = n + (n>>1) + 80);
1416 /* resize field storage space */
1417 static void fsrealloc(int size)
1419 static int maxfields = 0;
1422 if (size >= maxfields) {
1424 maxfields = size + 16;
1425 Fields = (var *)xrealloc(Fields, maxfields * sizeof(var));
1426 for (; i<maxfields; i++) {
1427 Fields[i].type = VF_SPECIAL;
1428 Fields[i].string = NULL;
1432 if (size < nfields) {
1433 for (i=size; i<nfields; i++) {
1440 static int awk_split(char *s, node *spl, char **slist)
1445 regmatch_t pmatch[2];
1447 /* in worst case, each char would be a separate field */
1448 *slist = s1 = bb_xstrndup(s, bb_strlen(s) * 2 + 3);
1450 c[0] = c[1] = (char)spl->info;
1452 if (*getvar_s(V[RS]) == '\0') c[2] = '\n';
1454 if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */
1456 l = strcspn(s, c+2);
1457 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0 &&
1458 pmatch[0].rm_so <= l) {
1459 l = pmatch[0].rm_so;
1460 if (pmatch[0].rm_eo == 0) { l++; pmatch[0].rm_eo++; }
1462 pmatch[0].rm_eo = l;
1463 if (*(s+l)) pmatch[0].rm_eo++;
1469 s += pmatch[0].rm_eo;
1472 } else if (c[0] == '\0') { /* null split */
1478 } else if (c[0] != ' ') { /* single-character split */
1480 c[0] = toupper(c[0]);
1481 c[1] = tolower(c[1]);
1484 while ((s1 = strpbrk(s1, c))) {
1488 } else { /* space split */
1490 while (isspace(*s)) s++;
1493 while (*s && !isspace(*s))
1501 static void split_f0(void)
1503 static char *fstrings = NULL;
1513 n = awk_split(getvar_s(V[F0]), &fsplitter.n, &fstrings);
1516 for (i=0; i<n; i++) {
1517 Fields[i].string = nextword(&s);
1518 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1521 /* set NF manually to avoid side effects */
1523 V[NF]->type = VF_NUMBER | VF_SPECIAL;
1524 V[NF]->number = nfields;
1527 /* perform additional actions when some internal variables changed */
1528 static void handle_special(var *v)
1532 int sl, l, len, i, bsize;
1534 if (! (v->type & VF_SPECIAL))
1538 n = (int)getvar_i(v);
1541 /* recalculate $0 */
1542 sep = getvar_s(V[OFS]);
1543 sl = bb_strlen(sep);
1546 for (i=0; i<n; i++) {
1547 s = getvar_s(&Fields[i]);
1550 memcpy(b+len, sep, sl);
1553 qrealloc(&b, len+l+sl, &bsize);
1554 memcpy(b+len, s, l);
1557 if (b) b[len] = '\0';
1561 } else if (v == V[F0]) {
1562 is_f0_split = FALSE;
1564 } else if (v == V[FS]) {
1565 mk_splitter(getvar_s(v), &fsplitter);
1567 } else if (v == V[RS]) {
1568 mk_splitter(getvar_s(v), &rsplitter);
1570 } else if (v == V[IGNORECASE]) {
1574 n = getvar_i(V[NF]);
1575 setvar_i(V[NF], n > v-Fields ? n : v-Fields+1);
1576 /* right here v is invalid. Just to note... */
1580 /* step through func/builtin/etc arguments */
1581 static node *nextarg(node **pn)
1586 if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1595 static void hashwalk_init(var *v, xhash *array)
1601 if (v->type & VF_WALK)
1605 w = v->x.walker = (char **)xcalloc(2 + 2*sizeof(char *) + array->glen, 1);
1606 *w = *(w+1) = (char *)(w + 2);
1607 for (i=0; i<array->csize; i++) {
1608 hi = array->items[i];
1610 strcpy(*w, hi->name);
1617 static int hashwalk_next(var *v)
1625 setvar_s(v, nextword(w+1));
1629 /* evaluate node, return 1 when result is true, 0 otherwise */
1630 static int ptest(node *pattern)
1633 return istrue(evaluate(pattern, &v));
1636 /* read next record from stream rsm into a variable v */
1637 static int awk_getline(rstream *rsm, var *v)
1640 regmatch_t pmatch[2];
1641 int a, p, pp=0, size;
1642 int fd, so, eo, r, rp;
1645 /* we're using our own buffer since we need access to accumulating
1648 fd = fileno(rsm->F);
1653 c = (char) rsplitter.n.info;
1656 if (! m) qrealloc(&m, 256, &size);
1662 if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1663 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1664 b, 1, pmatch, 0) == 0) {
1665 so = pmatch[0].rm_so;
1666 eo = pmatch[0].rm_eo;
1670 } else if (c != '\0') {
1671 s = strchr(b+pp, c);
1678 while (b[rp] == '\n')
1680 s = strstr(b+rp, "\n\n");
1683 while (b[eo] == '\n') eo++;
1691 memmove(m, (const void *)(m+a), p+1);
1696 qrealloc(&m, a+p+128, &size);
1699 p += safe_read(fd, b+p, size-p-1);
1703 setvar_i(V[ERRNO], errno);
1712 c = b[so]; b[so] = '\0';
1716 c = b[eo]; b[eo] = '\0';
1717 setvar_s(V[RT], b+so);
1729 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1733 const char *s=format;
1735 if (int_as_int && n == (int)n) {
1736 r = snprintf(b, size, "%d", (int)n);
1738 do { c = *s; } while (*s && *++s);
1739 if (strchr("diouxX", c)) {
1740 r = snprintf(b, size, format, (int)n);
1741 } else if (strchr("eEfgG", c)) {
1742 r = snprintf(b, size, format, n);
1744 runtime_error(EMSG_INV_FMT);
1751 /* formatted output into an allocated buffer, return ptr to buffer */
1752 static char *awk_printf(node *n)
1755 char *fmt, *s, *s1, *f;
1756 int i, j, incr, bsize;
1761 fmt = f = bb_xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1766 while (*f && (*f != '%' || *(++f) == '%'))
1768 while (*f && !isalpha(*f))
1771 incr = (f - s) + MAXVARFMT;
1772 qrealloc(&b, incr+i, &bsize);
1773 c = *f; if (c != '\0') f++;
1774 c1 = *f ; *f = '\0';
1775 arg = evaluate(nextarg(&n), v);
1778 if (c == 'c' || !c) {
1779 i += sprintf(b+i, s,
1780 is_numeric(arg) ? (char)getvar_i(arg) : *getvar_s(arg));
1782 } else if (c == 's') {
1784 qrealloc(&b, incr+i+bb_strlen(s1), &bsize);
1785 i += sprintf(b+i, s, s1);
1788 i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1792 /* if there was an error while sprintf, return value is negative */
1797 b = xrealloc(b, i+1);
1804 /* common substitution routine
1805 * replace (nm) substring of (src) that match (n) with (repl), store
1806 * result into (dest), return number of substitutions. If nm=0, replace
1807 * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1808 * subexpression matching (\1-\9)
1810 static int awk_sub(node *rn, char *repl, int nm, var *src, var *dest, int ex)
1814 int c, i, j, di, rl, so, eo, nbs, n, dssize;
1815 regmatch_t pmatch[10];
1818 re = as_regex(rn, &sreg);
1819 if (! src) src = V[F0];
1820 if (! dest) dest = V[F0];
1824 rl = bb_strlen(repl);
1825 while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0:REG_NOTBOL) == 0) {
1826 so = pmatch[0].rm_so;
1827 eo = pmatch[0].rm_eo;
1829 qrealloc(&ds, di + eo + rl, &dssize);
1830 memcpy(ds + di, sp, eo);
1836 for (s = repl; *s; s++) {
1842 if (c == '&' || (ex && c >= '0' && c <= '9')) {
1843 di -= ((nbs + 3) >> 1);
1852 n = pmatch[j].rm_eo - pmatch[j].rm_so;
1853 qrealloc(&ds, di + rl + n, &dssize);
1854 memcpy(ds + di, sp + pmatch[j].rm_so, n);
1865 if (! (ds[di++] = *sp++)) break;
1869 qrealloc(&ds, di + strlen(sp), &dssize);
1870 strcpy(ds + di, sp);
1872 if (re == &sreg) regfree(re);
1876 static var *exec_builtin(node *op, var *res)
1883 regmatch_t pmatch[2];
1885 static tsplitter tspl;
1894 isr = info = op->info;
1897 av[2] = av[3] = NULL;
1898 for (i=0 ; i<4 && op ; i++) {
1899 an[i] = nextarg(&op);
1900 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
1901 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
1906 if (nargs < (info >> 30))
1907 runtime_error(EMSG_TOO_FEW_ARGS);
1909 switch (info & OPNMASK) {
1912 #ifdef CONFIG_FEATURE_AWK_MATH
1913 setvar_i(res, atan2(getvar_i(av[i]), getvar_i(av[1])));
1915 runtime_error(EMSG_NO_MATH);
1921 spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
1922 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
1927 n = awk_split(as[0], spl, &s);
1929 clear_array(iamarray(av[1]));
1930 for (i=1; i<=n; i++)
1931 setari_u(av[1], i, nextword(&s1));
1937 l = bb_strlen(as[0]);
1938 i = getvar_i(av[1]) - 1;
1939 if (i>l) i=l; if (i<0) i=0;
1940 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
1943 strncpy(s, as[0]+i, n);
1955 s1 = s = bb_xstrdup(as[0]);
1957 *s1 = (*to_xxx)(*s1);
1965 ll = bb_strlen(as[1]);
1966 l = bb_strlen(as[0]) - ll;
1967 if (ll > 0 && l >= 0) {
1969 s = strstr(as[0], as[1]);
1970 if (s) n = (s - as[0]) + 1;
1972 /* this piece of code is terribly slow and
1973 * really should be rewritten
1975 for (i=0; i<=l; i++) {
1976 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
1988 tt = getvar_i(av[1]);
1991 s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
1992 i = strftime(buf, MAXVARFMT, s, localtime(&tt));
1998 re = as_regex(an[1], &sreg);
1999 n = regexec(re, as[0], 1, pmatch, 0);
2004 pmatch[0].rm_so = 0;
2005 pmatch[0].rm_eo = -1;
2007 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2008 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2009 setvar_i(res, pmatch[0].rm_so);
2010 if (re == &sreg) regfree(re);
2014 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2018 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2022 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2031 * Evaluate node - the heart of the program. Supplied with subtree
2032 * and place where to store result. returns ptr to result.
2034 #define XC(n) ((n) >> 8)
2036 static var *evaluate(node *op, var *res)
2038 /* This procedure is recursive so we should count every byte */
2039 static var *fnargs = NULL;
2040 static unsigned int seed = 1;
2041 static regex_t sreg;
2062 return setvar_s(res, NULL);
2069 opn = (short)(opinfo & OPNMASK);
2070 lineno = op->lineno;
2072 /* execute inevitable things */
2074 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2075 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2076 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2077 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2078 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2080 switch (XC(opinfo & OPCLSMASK)) {
2082 /* -- iterative node type -- */
2086 if ((op1->info & OPCLSMASK) == OC_COMMA) {
2087 /* it's range pattern */
2088 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2089 op->info |= OF_CHECKED;
2090 if (ptest(op1->r.n))
2091 op->info &= ~OF_CHECKED;
2098 op = (ptest(op1)) ? op->a.n : op->r.n;
2102 /* just evaluate an expression, also used as unconditional jump */
2106 /* branch, used in if-else and various loops */
2108 op = istrue(L.v) ? op->a.n : op->r.n;
2111 /* initialize for-in loop */
2112 case XC( OC_WALKINIT ):
2113 hashwalk_init(L.v, iamarray(R.v));
2116 /* get next array item */
2117 case XC( OC_WALKNEXT ):
2118 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2121 case XC( OC_PRINT ):
2122 case XC( OC_PRINTF ):
2125 X.rsm = newfile(R.s);
2128 if((X.rsm->F = popen(R.s, "w")) == NULL)
2129 bb_perror_msg_and_die("popen");
2132 X.rsm->F = bb_xfopen(R.s, opn=='w' ? "w" : "a");
2138 if ((opinfo & OPCLSMASK) == OC_PRINT) {
2140 fputs(getvar_s(V[F0]), X.F);
2143 L.v = evaluate(nextarg(&op1), v1);
2144 if (L.v->type & VF_NUMBER) {
2145 fmt_num(buf, MAXVARFMT, getvar_s(V[OFMT]),
2146 getvar_i(L.v), TRUE);
2149 fputs(getvar_s(L.v), X.F);
2152 if (op1) fputs(getvar_s(V[OFS]), X.F);
2155 fputs(getvar_s(V[ORS]), X.F);
2157 } else { /* OC_PRINTF */
2158 L.s = awk_printf(op1);
2165 case XC( OC_DELETE ):
2166 X.info = op1->info & OPCLSMASK;
2167 if (X.info == OC_VAR) {
2169 } else if (X.info == OC_FNARG) {
2170 R.v = &fnargs[op1->l.i];
2172 runtime_error(EMSG_NOT_ARRAY);
2177 L.s = getvar_s(evaluate(op1->r.n, v1));
2178 hash_remove(iamarray(R.v), L.s);
2180 clear_array(iamarray(R.v));
2184 case XC( OC_NEWSOURCE ):
2185 programname = op->l.s;
2188 case XC( OC_RETURN ):
2192 case XC( OC_NEXTFILE ):
2203 /* -- recursive node type -- */
2211 case XC( OC_FNARG ):
2212 L.v = &fnargs[op->l.i];
2215 res = (op->r.n) ? findvar(iamarray(L.v), R.s) : L.v;
2219 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2222 case XC( OC_REGEXP ):
2224 L.s = getvar_s(V[F0]);
2227 case XC( OC_MATCH ):
2230 X.re = as_regex(op1, &sreg);
2231 R.i = regexec(X.re, L.s, 0, NULL, 0);
2232 if (X.re == &sreg) regfree(X.re);
2233 setvar_i(res, (R.i == 0 ? 1 : 0) ^ (opn == '!' ? 1 : 0));
2237 /* if source is a temporary string, jusk relink it to dest */
2238 if (R.v == v1+1 && R.v->string) {
2239 res = setvar_p(L.v, R.v->string);
2242 res = copyvar(L.v, R.v);
2246 case XC( OC_TERNARY ):
2247 if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2248 runtime_error(EMSG_POSSIBLE_ERROR);
2249 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2253 if (! op->r.f->body.first)
2254 runtime_error(EMSG_UNDEF_FUNC);
2256 X.v = R.v = nvalloc(op->r.f->nargs+1);
2258 L.v = evaluate(nextarg(&op1), v1);
2260 R.v->type |= VF_CHILD;
2261 R.v->x.parent = L.v;
2262 if (++R.v - X.v >= op->r.f->nargs)
2270 res = evaluate(op->r.f->body.first, res);
2277 case XC( OC_GETLINE ):
2278 case XC( OC_PGETLINE ):
2280 X.rsm = newfile(L.s);
2282 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2283 X.rsm->F = popen(L.s, "r");
2284 X.rsm->is_pipe = TRUE;
2286 X.rsm->F = fopen(L.s, "r"); /* not bb_xfopen! */
2290 if (! iF) iF = next_input_file();
2295 setvar_i(V[ERRNO], errno);
2303 L.i = awk_getline(X.rsm, R.v);
2313 /* simple builtins */
2314 case XC( OC_FBLTIN ):
2322 R.d = (double)rand() / (double)RAND_MAX;
2325 #ifdef CONFIG_FEATURE_AWK_MATH
2351 runtime_error(EMSG_NO_MATH);
2357 seed = op1 ? (unsigned int)L.d : (unsigned int)time(NULL);
2367 L.s = getvar_s(V[F0]);
2368 R.d = bb_strlen(L.s);
2373 R.d = (L.s && *L.s) ? system(L.s) : 0;
2381 X.rsm = newfile(L.s);
2390 X.rsm = (rstream *)hash_search(fdhash, L.s);
2392 R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
2393 free(X.rsm->buffer);
2394 hash_remove(fdhash, L.s);
2397 setvar_i(V[ERRNO], errno);
2404 case XC( OC_BUILTIN ):
2405 res = exec_builtin(op, res);
2408 case XC( OC_SPRINTF ):
2409 setvar_p(res, awk_printf(op1));
2412 case XC( OC_UNARY ):
2414 L.d = R.d = getvar_i(R.v);
2429 L.d = istrue(X.v) ? 0 : 1;
2440 case XC( OC_FIELD ):
2441 R.i = (int)getvar_i(R.v);
2449 res = &Fields[R.i-1];
2453 /* concatenation (" ") and index joining (",") */
2454 case XC( OC_CONCAT ):
2455 case XC( OC_COMMA ):
2456 opn = bb_strlen(L.s) + bb_strlen(R.s) + 2;
2457 X.s = (char *)xmalloc(opn);
2459 if ((opinfo & OPCLSMASK) == OC_COMMA) {
2460 L.s = getvar_s(V[SUBSEP]);
2461 X.s = (char *)xrealloc(X.s, opn + bb_strlen(L.s));
2469 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2473 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2476 case XC( OC_BINARY ):
2477 case XC( OC_REPLACE ):
2478 R.d = getvar_i(R.v);
2490 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2494 #ifdef CONFIG_FEATURE_AWK_MATH
2495 L.d = pow(L.d, R.d);
2497 runtime_error(EMSG_NO_MATH);
2501 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2502 L.d -= (int)(L.d / R.d) * R.d;
2505 res = setvar_i(((opinfo&OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2508 case XC( OC_COMPARE ):
2509 if (is_numeric(L.v) && is_numeric(R.v)) {
2510 L.d = getvar_i(L.v) - getvar_i(R.v);
2512 L.s = getvar_s(L.v);
2513 R.s = getvar_s(R.v);
2514 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2516 switch (opn & 0xfe) {
2527 setvar_i(res, (opn & 0x1 ? R.i : !R.i) ? 1 : 0);
2531 runtime_error(EMSG_POSSIBLE_ERROR);
2533 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2535 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2545 /* -------- main & co. -------- */
2547 static int awk_exit(int r)
2556 evaluate(endseq.first, &tv);
2559 /* waiting for children */
2560 for (i=0; i<fdhash->csize; i++) {
2561 hi = fdhash->items[i];
2563 if (hi->data.rs.F && hi->data.rs.is_pipe)
2564 pclose(hi->data.rs.F);
2572 /* if expr looks like "var=value", perform assignment and return 1,
2573 * otherwise return 0 */
2574 static int is_assignment(const char *expr)
2576 char *exprc, *s, *s0, *s1;
2578 exprc = bb_xstrdup(expr);
2579 if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2587 *(s1++) = nextchar(&s);
2590 setvar_u(newvar(exprc), s0);
2595 /* switch to next input file */
2596 static rstream *next_input_file(void)
2601 static int files_happen = FALSE;
2603 if (rsm.F) fclose(rsm.F);
2605 rsm.pos = rsm.adv = 0;
2608 if (getvar_i(V[ARGIND])+1 >= getvar_i(V[ARGC])) {
2614 ind = getvar_s(incvar(V[ARGIND]));
2615 fname = getvar_s(findvar(iamarray(V[ARGV]), ind));
2616 if (fname && *fname && !is_assignment(fname))
2617 F = afopen(fname, "r");
2621 files_happen = TRUE;
2622 setvar_s(V[FILENAME], fname);
2627 extern int awk_main(int argc, char **argv)
2634 static int from_file = FALSE;
2636 FILE *F, *stdfiles[3];
2637 static char * stdnames = "/dev/stdin\0/dev/stdout\0/dev/stderr";
2639 /* allocate global buffer */
2640 buf = xmalloc(MAXVARFMT+1);
2642 vhash = hash_init();
2643 ahash = hash_init();
2644 fdhash = hash_init();
2645 fnhash = hash_init();
2647 /* initialize variables */
2648 for (i=0; *vNames; i++) {
2649 V[i] = v = newvar(nextword(&vNames));
2650 if (*vValues != '\377')
2651 setvar_s(v, nextword(&vValues));
2655 if (*vNames == '*') {
2656 v->type |= VF_SPECIAL;
2661 handle_special(V[FS]);
2662 handle_special(V[RS]);
2664 stdfiles[0] = stdin;
2665 stdfiles[1] = stdout;
2666 stdfiles[2] = stderr;
2667 for (i=0; i<3; i++) {
2668 rsm = newfile(nextword(&stdnames));
2669 rsm->F = stdfiles[i];
2672 for (envp=environ; *envp; envp++) {
2673 s = bb_xstrdup(*envp);
2674 s1 = strchr(s, '=');
2679 setvar_u(findvar(iamarray(V[ENVIRON]), s), s1);
2684 while((c = getopt(argc, argv, "F:v:f:W:")) != EOF) {
2687 setvar_s(V[FS], optarg);
2690 if (! is_assignment(optarg))
2695 F = afopen(programname = optarg, "r");
2697 /* one byte is reserved for some trick in next_token */
2698 for (i=j=1; j>0; i+=j) {
2699 s = (char *)xrealloc(s, i+4096);
2700 j = fread(s+i, 1, 4094, F);
2708 bb_error_msg("Warning: unrecognized option '-W %s' ignored\n", optarg);
2719 programname="cmd. line";
2720 parse_program(argv[optind++]);
2724 /* fill in ARGV array */
2725 setvar_i(V[ARGC], argc - optind + 1);
2726 setari_u(V[ARGV], 0, "awk");
2727 for(i=optind; i < argc; i++)
2728 setari_u(V[ARGV], i+1-optind, argv[i]);
2730 evaluate(beginseq.first, &tv);
2731 if (! mainseq.first && ! endseq.first)
2732 awk_exit(EXIT_SUCCESS);
2734 /* input file could already be opened in BEGIN block */
2735 if (! iF) iF = next_input_file();
2737 /* passing through input files */
2741 setvar_i(V[FNR], 0);
2743 while ((c = awk_getline(iF, V[F0])) > 0) {
2748 evaluate(mainseq.first, &tv);
2755 runtime_error(strerror(errno));
2757 iF = next_input_file();
2761 awk_exit(EXIT_SUCCESS);