1 /* vi: set sw=4 ts=4: */
3 * awk implementation for busybox
5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
7 * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
29 #define VF_NUMBER 0x0001 /* 1 = primary type is number */
30 #define VF_ARRAY 0x0002 /* 1 = it's an array */
32 #define VF_CACHED 0x0100 /* 1 = num/str value has cached str/num eq */
33 #define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
34 #define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
35 #define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
36 #define VF_FSTR 0x1000 /* 1 = string points to fstring buffer */
37 #define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
38 #define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
40 /* these flags are static, don't change them when value is changed */
41 #define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
44 typedef struct var_s {
45 unsigned short type; /* flags */
49 int aidx; /* func arg idx (for compilation stage) */
50 struct xhash_s *array; /* array ptr */
51 struct var_s *parent; /* for func args, ptr to actual parameter */
52 char **walker; /* list of array elements (for..in) */
56 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
57 typedef struct chain_s {
64 typedef struct func_s {
70 typedef struct rstream_s {
76 unsigned short is_pipe;
79 typedef struct hash_item_s {
81 struct var_s v; /* variable/array hash */
82 struct rstream_s rs; /* redirect streams hash */
83 struct func_s f; /* functions hash */
85 struct hash_item_s *next; /* next in chain */
86 char name[1]; /* really it's longer */
89 typedef struct xhash_s {
90 unsigned int nel; /* num of elements */
91 unsigned int csize; /* current hash size */
92 unsigned int nprime; /* next hash size in PRIMES[] */
93 unsigned int glen; /* summary length of item names */
94 struct hash_item_s **items;
98 typedef struct node_s {
100 unsigned short lineno;
119 /* Block of temporary variables */
120 typedef struct nvblock_s {
123 struct nvblock_s *prev;
124 struct nvblock_s *next;
128 typedef struct tsplitter_s {
133 /* simple token classes */
134 /* Order and hex values are very important!!! See next_token() */
135 #define TC_SEQSTART 1 /* ( */
136 #define TC_SEQTERM (1 << 1) /* ) */
137 #define TC_REGEXP (1 << 2) /* /.../ */
138 #define TC_OUTRDR (1 << 3) /* | > >> */
139 #define TC_UOPPOST (1 << 4) /* unary postfix operator */
140 #define TC_UOPPRE1 (1 << 5) /* unary prefix operator */
141 #define TC_BINOPX (1 << 6) /* two-opnd operator */
142 #define TC_IN (1 << 7)
143 #define TC_COMMA (1 << 8)
144 #define TC_PIPE (1 << 9) /* input redirection pipe */
145 #define TC_UOPPRE2 (1 << 10) /* unary prefix operator */
146 #define TC_ARRTERM (1 << 11) /* ] */
147 #define TC_GRPSTART (1 << 12) /* { */
148 #define TC_GRPTERM (1 << 13) /* } */
149 #define TC_SEMICOL (1 << 14)
150 #define TC_NEWLINE (1 << 15)
151 #define TC_STATX (1 << 16) /* ctl statement (for, next...) */
152 #define TC_WHILE (1 << 17)
153 #define TC_ELSE (1 << 18)
154 #define TC_BUILTIN (1 << 19)
155 #define TC_GETLINE (1 << 20)
156 #define TC_FUNCDECL (1 << 21) /* `function' `func' */
157 #define TC_BEGIN (1 << 22)
158 #define TC_END (1 << 23)
159 #define TC_EOF (1 << 24)
160 #define TC_VARIABLE (1 << 25)
161 #define TC_ARRAY (1 << 26)
162 #define TC_FUNCTION (1 << 27)
163 #define TC_STRING (1 << 28)
164 #define TC_NUMBER (1 << 29)
166 #define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
168 /* combined token classes */
169 #define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
170 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
171 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION | \
172 TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
174 #define TC_STATEMNT (TC_STATX | TC_WHILE)
175 #define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
177 /* word tokens, cannot mean something else if not expected */
178 #define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN | \
179 TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
181 /* discard newlines after these */
182 #define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM | \
183 TC_BINOP | TC_OPTERM)
185 /* what can expression begin with */
186 #define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
187 /* what can group begin with */
188 #define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
190 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
191 /* operator is inserted between them */
192 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM | \
193 TC_STRING | TC_NUMBER | TC_UOPPOST)
194 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
196 #define OF_RES1 0x010000
197 #define OF_RES2 0x020000
198 #define OF_STR1 0x040000
199 #define OF_STR2 0x080000
200 #define OF_NUM1 0x100000
201 #define OF_CHECKED 0x200000
203 /* combined operator flags */
206 #define xS (OF_RES2 | OF_STR2)
208 #define VV (OF_RES1 | OF_RES2)
209 #define Nx (OF_RES1 | OF_NUM1)
210 #define NV (OF_RES1 | OF_NUM1 | OF_RES2)
211 #define Sx (OF_RES1 | OF_STR1)
212 #define SV (OF_RES1 | OF_STR1 | OF_RES2)
213 #define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
215 #define OPCLSMASK 0xFF00
216 #define OPNMASK 0x007F
218 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
219 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
220 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
222 #define P(x) (x << 24)
223 #define PRIMASK 0x7F000000
224 #define PRIMASK2 0x7E000000
226 /* Operation classes */
228 #define SHIFT_TIL_THIS 0x0600
229 #define RECUR_FROM_THIS 0x1000
232 OC_DELETE=0x0100, OC_EXEC=0x0200, OC_NEWSOURCE=0x0300,
233 OC_PRINT=0x0400, OC_PRINTF=0x0500, OC_WALKINIT=0x0600,
235 OC_BR=0x0700, OC_BREAK=0x0800, OC_CONTINUE=0x0900,
236 OC_EXIT=0x0a00, OC_NEXT=0x0b00, OC_NEXTFILE=0x0c00,
237 OC_TEST=0x0d00, OC_WALKNEXT=0x0e00,
239 OC_BINARY=0x1000, OC_BUILTIN=0x1100, OC_COLON=0x1200,
240 OC_COMMA=0x1300, OC_COMPARE=0x1400, OC_CONCAT=0x1500,
241 OC_FBLTIN=0x1600, OC_FIELD=0x1700, OC_FNARG=0x1800,
242 OC_FUNC=0x1900, OC_GETLINE=0x1a00, OC_IN=0x1b00,
243 OC_LAND=0x1c00, OC_LOR=0x1d00, OC_MATCH=0x1e00,
244 OC_MOVE=0x1f00, OC_PGETLINE=0x2000, OC_REGEXP=0x2100,
245 OC_REPLACE=0x2200, OC_RETURN=0x2300, OC_SPRINTF=0x2400,
246 OC_TERNARY=0x2500, OC_UNARY=0x2600, OC_VAR=0x2700,
249 ST_IF=0x3000, ST_DO=0x3100, ST_FOR=0x3200,
253 /* simple builtins */
255 F_in=0, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
256 F_ti, F_le, F_sy, F_ff, F_cl
261 B_a2=0, B_ix, B_ma, B_sp, B_ss, B_ti, B_lo, B_up,
265 /* tokens and their corresponding info values */
267 #define NTC "\377" /* switch to next token class (tc<<1) */
270 #define OC_B OC_BUILTIN
272 static char * const tokenlist =
275 "\1/" NTC /* REGEXP */
276 "\2>>" "\1>" "\1|" NTC /* OUTRDR */
277 "\2++" "\2--" NTC /* UOPPOST */
278 "\2++" "\2--" "\1$" NTC /* UOPPRE1 */
279 "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */
280 "\2*=" "\2/=" "\2%=" "\2^="
281 "\1+" "\1-" "\3**=" "\2**"
282 "\1/" "\1%" "\1^" "\1*"
283 "\2!=" "\2>=" "\2<=" "\1>"
284 "\1<" "\2!~" "\1~" "\2&&"
285 "\2||" "\1?" "\1:" NTC
289 "\1+" "\1-" "\1!" NTC /* UOPPRE2 */
295 "\2if" "\2do" "\3for" "\5break" /* STATX */
296 "\10continue" "\6delete" "\5print"
297 "\6printf" "\4next" "\10nextfile"
298 "\6return" "\4exit" NTC
302 "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */
303 "\3cos" "\3exp" "\3int" "\3log"
304 "\4rand" "\3sin" "\4sqrt" "\5srand"
305 "\6gensub" "\4gsub" "\5index" "\6length"
306 "\5match" "\5split" "\7sprintf" "\3sub"
307 "\6substr" "\7systime" "\10strftime"
308 "\7tolower" "\7toupper" NTC
310 "\4func" "\10function" NTC
315 static const uint32_t tokeninfo[] = {
320 xS|'a', xS|'w', xS|'|',
321 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
322 OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M',
324 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74),
325 OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
326 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/',
327 OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
328 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-',
329 OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
330 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%',
331 OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
332 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3,
333 OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
334 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!',
335 OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
336 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?',
337 OC_COLON|xx|P(67)|':',
340 OC_PGETLINE|SV|P(37),
341 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-',
342 OC_UNARY|xV|P(19)|'!',
348 ST_IF, ST_DO, ST_FOR, OC_BREAK,
349 OC_CONTINUE, OC_DELETE|Vx, OC_PRINT,
350 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
351 OC_RETURN|Vx, OC_EXIT|Nx,
355 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
356 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
357 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
358 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
359 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
360 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b),
361 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
368 /* internal variable names and their initial values */
369 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
371 CONVFMT=0, OFMT, FS, OFS,
372 ORS, RS, RT, FILENAME,
373 SUBSEP, ARGIND, ARGC, ARGV,
376 ENVIRON, F0, _intvarcount_
379 static char * vNames =
380 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
381 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
382 "SUBSEP\0" "ARGIND\0" "ARGC\0" "ARGV\0"
384 "NR\0" "NF\0*" "IGNORECASE\0*"
385 "ENVIRON\0" "$\0*" "\0";
387 static char * vValues =
388 "%.6g\0" "%.6g\0" " \0" " \0"
389 "\n\0" "\n\0" "\0" "\0"
393 /* hash size may grow to these values */
394 #define FIRST_PRIME 61;
395 static const unsigned int PRIMES[] = { 251, 1021, 4093, 16381, 65521 };
396 enum { NPRIMES = sizeof(PRIMES) / sizeof(unsigned int) };
400 extern char **environ;
402 static var * V[_intvarcount_];
403 static chain beginseq, mainseq, endseq, *seq;
404 static int nextrec, nextfile;
405 static node *break_ptr, *continue_ptr;
407 static xhash *vhash, *ahash, *fdhash, *fnhash;
408 static char *programname;
410 static int is_f0_split;
413 static tsplitter fsplitter, rsplitter;
429 /* function prototypes */
430 static void handle_special(var *);
431 static node *parse_expr(uint32_t);
432 static void chain_group(void);
433 static var *evaluate(node *, var *);
434 static rstream *next_input_file(void);
435 static int fmt_num(char *, int, const char *, double, int);
436 static int awk_exit(int) ATTRIBUTE_NORETURN;
438 /* ---- error handling ---- */
440 static const char EMSG_INTERNAL_ERROR[] = "Internal error";
441 static const char EMSG_UNEXP_EOS[] = "Unexpected end of string";
442 static const char EMSG_UNEXP_TOKEN[] = "Unexpected token";
443 static const char EMSG_DIV_BY_ZERO[] = "Division by zero";
444 static const char EMSG_INV_FMT[] = "Invalid format specifier";
445 static const char EMSG_TOO_FEW_ARGS[] = "Too few arguments for builtin";
446 static const char EMSG_NOT_ARRAY[] = "Not an array";
447 static const char EMSG_POSSIBLE_ERROR[] = "Possible syntax error";
448 static const char EMSG_UNDEF_FUNC[] = "Call to undefined function";
449 #ifndef CONFIG_FEATURE_AWK_MATH
450 static const char EMSG_NO_MATH[] = "Math support is not compiled in";
453 static void syntax_error(const char * const message) ATTRIBUTE_NORETURN;
454 static void syntax_error(const char * const message)
456 bb_error_msg_and_die("%s:%i: %s", programname, lineno, message);
459 #define runtime_error(x) syntax_error(x)
462 /* ---- hash stuff ---- */
464 static unsigned int hashidx(const char *name)
468 while (*name) idx = *name++ + (idx << 6) - idx;
472 /* create new hash */
473 static xhash *hash_init(void)
477 newhash = (xhash *)xzalloc(sizeof(xhash));
478 newhash->csize = FIRST_PRIME;
479 newhash->items = (hash_item **)xzalloc(newhash->csize * sizeof(hash_item *));
484 /* find item in hash, return ptr to data, NULL if not found */
485 static void *hash_search(xhash *hash, const char *name)
489 hi = hash->items [ hashidx(name) % hash->csize ];
491 if (strcmp(hi->name, name) == 0)
498 /* grow hash if it becomes too big */
499 static void hash_rebuild(xhash *hash)
501 unsigned int newsize, i, idx;
502 hash_item **newitems, *hi, *thi;
504 if (hash->nprime == NPRIMES)
507 newsize = PRIMES[hash->nprime++];
508 newitems = (hash_item **)xzalloc(newsize * sizeof(hash_item *));
510 for (i=0; i<hash->csize; i++) {
515 idx = hashidx(thi->name) % newsize;
516 thi->next = newitems[idx];
522 hash->csize = newsize;
523 hash->items = newitems;
526 /* find item in hash, add it if necessary. Return ptr to data */
527 static void *hash_find(xhash *hash, const char *name)
533 hi = hash_search(hash, name);
535 if (++hash->nel / hash->csize > 10)
538 l = strlen(name) + 1;
539 hi = xzalloc(sizeof(hash_item) + l);
540 memcpy(hi->name, name, l);
542 idx = hashidx(name) % hash->csize;
543 hi->next = hash->items[idx];
544 hash->items[idx] = hi;
550 #define findvar(hash, name) (var *) hash_find ( (hash) , (name) )
551 #define newvar(name) (var *) hash_find ( vhash , (name) )
552 #define newfile(name) (rstream *) hash_find ( fdhash , (name) )
553 #define newfunc(name) (func *) hash_find ( fnhash , (name) )
555 static void hash_remove(xhash *hash, const char *name)
557 hash_item *hi, **phi;
559 phi = &(hash->items[ hashidx(name) % hash->csize ]);
562 if (strcmp(hi->name, name) == 0) {
563 hash->glen -= (strlen(name) + 1);
573 /* ------ some useful functions ------ */
575 static void skip_spaces(char **s)
579 while(*p == ' ' || *p == '\t' ||
580 (*p == '\\' && *(p+1) == '\n' && (++p, ++t.lineno))) {
586 static char *nextword(char **s)
595 static char nextchar(char **s)
601 if (c == '\\') c = bb_process_escape_sequence((const char**)s);
602 if (c == '\\' && *s == pps) c = *((*s)++);
606 static inline int isalnum_(int c)
608 return (isalnum(c) || c == '_');
611 static FILE *afopen(const char *path, const char *mode)
613 return (*path == '-' && *(path+1) == '\0') ? stdin : bb_xfopen(path, mode);
616 /* -------- working with variables (set/get/copy/etc) -------- */
618 static xhash *iamarray(var *v)
622 while (a->type & VF_CHILD)
625 if (! (a->type & VF_ARRAY)) {
627 a->x.array = hash_init();
632 static void clear_array(xhash *array)
637 for (i=0; i<array->csize; i++) {
638 hi = array->items[i];
642 free(thi->data.v.string);
645 array->items[i] = NULL;
647 array->glen = array->nel = 0;
650 /* clear a variable */
651 static var *clrvar(var *v)
653 if (!(v->type & VF_FSTR))
656 v->type &= VF_DONTTOUCH;
662 /* assign string value to variable */
663 static var *setvar_p(var *v, char *value)
672 /* same as setvar_p but make a copy of string */
673 static var *setvar_s(var *v, const char *value)
675 return setvar_p(v, (value && *value) ? bb_xstrdup(value) : NULL);
678 /* same as setvar_s but set USER flag */
679 static var *setvar_u(var *v, const char *value)
686 /* set array element to user string */
687 static void setari_u(var *a, int idx, const char *s)
690 static char sidx[12];
692 sprintf(sidx, "%d", idx);
693 v = findvar(iamarray(a), sidx);
697 /* assign numeric value to variable */
698 static var *setvar_i(var *v, double value)
701 v->type |= VF_NUMBER;
707 static char *getvar_s(var *v)
709 /* if v is numeric and has no cached string, convert it to string */
710 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
711 fmt_num(buf, MAXVARFMT, getvar_s(V[CONVFMT]), v->number, TRUE);
712 v->string = bb_xstrdup(buf);
713 v->type |= VF_CACHED;
715 return (v->string == NULL) ? "" : v->string;
718 static double getvar_i(var *v)
722 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
726 v->number = strtod(s, &s);
727 if (v->type & VF_USER) {
735 v->type |= VF_CACHED;
740 static var *copyvar(var *dest, const var *src)
744 dest->type |= (src->type & ~VF_DONTTOUCH);
745 dest->number = src->number;
747 dest->string = bb_xstrdup(src->string);
749 handle_special(dest);
753 static var *incvar(var *v)
755 return setvar_i(v, getvar_i(v)+1.);
758 /* return true if v is number or numeric string */
759 static int is_numeric(var *v)
762 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
765 /* return 1 when value of v corresponds to true, 0 otherwise */
766 static int istrue(var *v)
769 return (v->number == 0) ? 0 : 1;
771 return (v->string && *(v->string)) ? 1 : 0;
774 /* temporary variables allocator. Last allocated should be first freed */
775 static var *nvalloc(int n)
783 if ((cb->pos - cb->nv) + n <= cb->size) break;
788 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
789 cb = (nvblock *)xmalloc(sizeof(nvblock) + size * sizeof(var));
794 if (pb) pb->next = cb;
800 while (v < cb->pos) {
809 static void nvfree(var *v)
813 if (v < cb->nv || v >= cb->pos)
814 runtime_error(EMSG_INTERNAL_ERROR);
816 for (p=v; p<cb->pos; p++) {
817 if ((p->type & (VF_ARRAY|VF_CHILD)) == VF_ARRAY) {
818 clear_array(iamarray(p));
819 free(p->x.array->items);
822 if (p->type & VF_WALK)
829 while (cb->prev && cb->pos == cb->nv) {
834 /* ------- awk program text parsing ------- */
836 /* Parse next token pointed by global pos, place results into global t.
837 * If token isn't expected, give away. Return token class
839 static uint32_t next_token(uint32_t expected)
846 static int concat_inserted;
847 static uint32_t save_tclass, save_info;
848 static uint32_t ltclass = TC_OPTERM;
854 } else if (concat_inserted) {
856 concat_inserted = FALSE;
857 t.tclass = save_tclass;
868 while (*p != '\n' && *p != '\0') p++;
876 } else if (*p == '\"') {
880 if (*p == '\0' || *p == '\n')
881 syntax_error(EMSG_UNEXP_EOS);
882 *(s++) = nextchar(&p);
888 } else if ((expected & TC_REGEXP) && *p == '/') {
892 if (*p == '\0' || *p == '\n')
893 syntax_error(EMSG_UNEXP_EOS);
894 if ((*s++ = *p++) == '\\') {
896 *(s-1) = bb_process_escape_sequence((const char **)&p);
897 if (*pp == '\\') *s++ = '\\';
898 if (p == pp) *s++ = *p++;
905 } else if (*p == '.' || isdigit(*p)) {
907 t.number = strtod(p, &p);
909 syntax_error(EMSG_UNEXP_TOKEN);
913 /* search for something known */
923 /* if token class is expected, token
924 * matches and it's not a longer word,
925 * then this is what we are looking for
927 if ((tc & (expected | TC_WORD | TC_NEWLINE)) &&
928 *tl == *p && strncmp(p, tl, l) == 0 &&
929 !((tc & TC_WORD) && isalnum_(*(p + l)))) {
939 /* it's a name (var/array/function),
940 * otherwise it's something wrong
943 syntax_error(EMSG_UNEXP_TOKEN);
946 while(isalnum_(*(++p))) {
951 /* also consume whitespace between functionname and bracket */
952 if (! (expected & TC_VARIABLE)) skip_spaces(&p);
965 /* skipping newlines in some cases */
966 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
969 /* insert concatenation operator when needed */
970 if ((ltclass&TC_CONCAT1) && (tc&TC_CONCAT2) && (expected&TC_BINOP)) {
971 concat_inserted = TRUE;
975 t.info = OC_CONCAT | SS | P(35);
982 /* Are we ready for this? */
983 if (! (ltclass & expected))
984 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
985 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
990 static void rollback_token(void) { t.rollback = TRUE; }
992 static node *new_node(uint32_t info)
996 n = (node *)xzalloc(sizeof(node));
1002 static node *mk_re_node(char *s, node *n, regex_t *re)
1004 n->info = OC_REGEXP;
1007 xregcomp(re, s, REG_EXTENDED);
1008 xregcomp(re+1, s, REG_EXTENDED | REG_ICASE);
1013 static node *condition(void)
1015 next_token(TC_SEQSTART);
1016 return parse_expr(TC_SEQTERM);
1019 /* parse expression terminated by given argument, return ptr
1020 * to built subtree. Terminator is eaten by parse_expr */
1021 static node *parse_expr(uint32_t iexp)
1030 sn.r.n = glptr = NULL;
1031 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1033 while (! ((tc = next_token(xtc)) & iexp)) {
1034 if (glptr && (t.info == (OC_COMPARE|VV|P(39)|2))) {
1035 /* input redirection (<) attached to glptr node */
1036 cn = glptr->l.n = new_node(OC_CONCAT|SS|P(37));
1038 xtc = TC_OPERAND | TC_UOPPRE;
1041 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1042 /* for binary and postfix-unary operators, jump back over
1043 * previous operators with higher priority */
1045 while ( ((t.info & PRIMASK) > (vn->a.n->info & PRIMASK2)) ||
1046 ((t.info == vn->info) && ((t.info & OPCLSMASK) == OC_COLON)) )
1048 if ((t.info & OPCLSMASK) == OC_TERNARY)
1050 cn = vn->a.n->r.n = new_node(t.info);
1052 if (tc & TC_BINOP) {
1054 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1055 if ((t.info & OPCLSMASK) == OC_PGETLINE) {
1057 next_token(TC_GETLINE);
1058 /* give maximum priority to this pipe */
1059 cn->info &= ~PRIMASK;
1060 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1064 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1069 /* for operands and prefix-unary operators, attach them
1072 cn = vn->r.n = new_node(t.info);
1074 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1075 if (tc & (TC_OPERAND | TC_REGEXP)) {
1076 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1077 /* one should be very careful with switch on tclass -
1078 * only simple tclasses should be used! */
1083 if ((v = hash_search(ahash, t.string)) != NULL) {
1084 cn->info = OC_FNARG;
1085 cn->l.i = v->x.aidx;
1087 cn->l.v = newvar(t.string);
1089 if (tc & TC_ARRAY) {
1091 cn->r.n = parse_expr(TC_ARRTERM);
1098 v = cn->l.v = xzalloc(sizeof(var));
1100 setvar_i(v, t.number);
1102 setvar_s(v, t.string);
1106 mk_re_node(t.string, cn,
1107 (regex_t *)xzalloc(sizeof(regex_t)*2));
1112 cn->r.f = newfunc(t.string);
1113 cn->l.n = condition();
1117 cn = vn->r.n = parse_expr(TC_SEQTERM);
1123 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1127 cn->l.n = condition();
1136 /* add node to chain. Return ptr to alloc'd node */
1137 static node *chain_node(uint32_t info)
1142 seq->first = seq->last = new_node(0);
1144 if (seq->programname != programname) {
1145 seq->programname = programname;
1146 n = chain_node(OC_NEWSOURCE);
1147 n->l.s = bb_xstrdup(programname);
1152 seq->last = n->a.n = new_node(OC_DONE);
1157 static void chain_expr(uint32_t info)
1161 n = chain_node(info);
1162 n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1163 if (t.tclass & TC_GRPTERM)
1167 static node *chain_loop(node *nn)
1169 node *n, *n2, *save_brk, *save_cont;
1171 save_brk = break_ptr;
1172 save_cont = continue_ptr;
1174 n = chain_node(OC_BR | Vx);
1175 continue_ptr = new_node(OC_EXEC);
1176 break_ptr = new_node(OC_EXEC);
1178 n2 = chain_node(OC_EXEC | Vx);
1181 continue_ptr->a.n = n2;
1182 break_ptr->a.n = n->r.n = seq->last;
1184 continue_ptr = save_cont;
1185 break_ptr = save_brk;
1190 /* parse group and attach it to chain */
1191 static void chain_group(void)
1197 c = next_token(TC_GRPSEQ);
1198 } while (c & TC_NEWLINE);
1200 if (c & TC_GRPSTART) {
1201 while(next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1202 if (t.tclass & TC_NEWLINE) continue;
1206 } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1208 chain_expr(OC_EXEC | Vx);
1209 } else { /* TC_STATEMNT */
1210 switch (t.info & OPCLSMASK) {
1212 n = chain_node(OC_BR | Vx);
1213 n->l.n = condition();
1215 n2 = chain_node(OC_EXEC);
1217 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE)==TC_ELSE) {
1219 n2->a.n = seq->last;
1227 n = chain_loop(NULL);
1232 n2 = chain_node(OC_EXEC);
1233 n = chain_loop(NULL);
1235 next_token(TC_WHILE);
1236 n->l.n = condition();
1240 next_token(TC_SEQSTART);
1241 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1242 if (t.tclass & TC_SEQTERM) { /* for-in */
1243 if ((n2->info & OPCLSMASK) != OC_IN)
1244 syntax_error(EMSG_UNEXP_TOKEN);
1245 n = chain_node(OC_WALKINIT | VV);
1248 n = chain_loop(NULL);
1249 n->info = OC_WALKNEXT | Vx;
1251 } else { /* for(;;) */
1252 n = chain_node(OC_EXEC | Vx);
1254 n2 = parse_expr(TC_SEMICOL);
1255 n3 = parse_expr(TC_SEQTERM);
1265 n = chain_node(t.info);
1266 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1267 if (t.tclass & TC_OUTRDR) {
1269 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1271 if (t.tclass & TC_GRPTERM)
1276 n = chain_node(OC_EXEC);
1281 n = chain_node(OC_EXEC);
1282 n->a.n = continue_ptr;
1285 /* delete, next, nextfile, return, exit */
1293 static void parse_program(char *p)
1302 while((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1303 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1305 if (tclass & TC_OPTERM)
1309 if (tclass & TC_BEGIN) {
1313 } else if (tclass & TC_END) {
1317 } else if (tclass & TC_FUNCDECL) {
1318 next_token(TC_FUNCTION);
1320 f = newfunc(t.string);
1321 f->body.first = NULL;
1323 while(next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1324 v = findvar(ahash, t.string);
1325 v->x.aidx = (f->nargs)++;
1327 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1334 } else if (tclass & TC_OPSEQ) {
1336 cn = chain_node(OC_TEST);
1337 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1338 if (t.tclass & TC_GRPSTART) {
1342 chain_node(OC_PRINT);
1344 cn->r.n = mainseq.last;
1346 } else /* if (tclass & TC_GRPSTART) */ {
1354 /* -------- program execution part -------- */
1356 static node *mk_splitter(char *s, tsplitter *spl)
1364 if ((n->info && OPCLSMASK) == OC_REGEXP) {
1368 if (strlen(s) > 1) {
1369 mk_re_node(s, n, re);
1371 n->info = (uint32_t) *s;
1377 /* use node as a regular expression. Supplied with node ptr and regex_t
1378 * storage space. Return ptr to regex (if result points to preg, it should
1379 * be later regfree'd manually
1381 static regex_t *as_regex(node *op, regex_t *preg)
1386 if ((op->info & OPCLSMASK) == OC_REGEXP) {
1387 return icase ? op->r.ire : op->l.re;
1390 s = getvar_s(evaluate(op, v));
1391 xregcomp(preg, s, icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED);
1397 /* gradually increasing buffer */
1398 static void qrealloc(char **b, int n, int *size)
1400 if (! *b || n >= *size)
1401 *b = xrealloc(*b, *size = n + (n>>1) + 80);
1404 /* resize field storage space */
1405 static void fsrealloc(int size)
1407 static int maxfields = 0;
1410 if (size >= maxfields) {
1412 maxfields = size + 16;
1413 Fields = (var *)xrealloc(Fields, maxfields * sizeof(var));
1414 for (; i<maxfields; i++) {
1415 Fields[i].type = VF_SPECIAL;
1416 Fields[i].string = NULL;
1420 if (size < nfields) {
1421 for (i=size; i<nfields; i++) {
1428 static int awk_split(char *s, node *spl, char **slist)
1433 regmatch_t pmatch[2];
1435 /* in worst case, each char would be a separate field */
1436 *slist = s1 = bb_xstrndup(s, strlen(s) * 2 + 3);
1438 c[0] = c[1] = (char)spl->info;
1440 if (*getvar_s(V[RS]) == '\0') c[2] = '\n';
1442 if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */
1444 l = strcspn(s, c+2);
1445 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0 &&
1446 pmatch[0].rm_so <= l) {
1447 l = pmatch[0].rm_so;
1448 if (pmatch[0].rm_eo == 0) { l++; pmatch[0].rm_eo++; }
1450 pmatch[0].rm_eo = l;
1451 if (*(s+l)) pmatch[0].rm_eo++;
1457 s += pmatch[0].rm_eo;
1460 } else if (c[0] == '\0') { /* null split */
1466 } else if (c[0] != ' ') { /* single-character split */
1468 c[0] = toupper(c[0]);
1469 c[1] = tolower(c[1]);
1472 while ((s1 = strpbrk(s1, c))) {
1476 } else { /* space split */
1478 while (isspace(*s)) s++;
1481 while (*s && !isspace(*s))
1489 static void split_f0(void)
1491 static char *fstrings = NULL;
1501 n = awk_split(getvar_s(V[F0]), &fsplitter.n, &fstrings);
1504 for (i=0; i<n; i++) {
1505 Fields[i].string = nextword(&s);
1506 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1509 /* set NF manually to avoid side effects */
1511 V[NF]->type = VF_NUMBER | VF_SPECIAL;
1512 V[NF]->number = nfields;
1515 /* perform additional actions when some internal variables changed */
1516 static void handle_special(var *v)
1520 int sl, l, len, i, bsize;
1522 if (! (v->type & VF_SPECIAL))
1526 n = (int)getvar_i(v);
1529 /* recalculate $0 */
1530 sep = getvar_s(V[OFS]);
1534 for (i=0; i<n; i++) {
1535 s = getvar_s(&Fields[i]);
1538 memcpy(b+len, sep, sl);
1541 qrealloc(&b, len+l+sl, &bsize);
1542 memcpy(b+len, s, l);
1545 if (b) b[len] = '\0';
1549 } else if (v == V[F0]) {
1550 is_f0_split = FALSE;
1552 } else if (v == V[FS]) {
1553 mk_splitter(getvar_s(v), &fsplitter);
1555 } else if (v == V[RS]) {
1556 mk_splitter(getvar_s(v), &rsplitter);
1558 } else if (v == V[IGNORECASE]) {
1562 n = getvar_i(V[NF]);
1563 setvar_i(V[NF], n > v-Fields ? n : v-Fields+1);
1564 /* right here v is invalid. Just to note... */
1568 /* step through func/builtin/etc arguments */
1569 static node *nextarg(node **pn)
1574 if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1583 static void hashwalk_init(var *v, xhash *array)
1589 if (v->type & VF_WALK)
1593 w = v->x.walker = (char **)xzalloc(2 + 2*sizeof(char *) + array->glen);
1594 *w = *(w+1) = (char *)(w + 2);
1595 for (i=0; i<array->csize; i++) {
1596 hi = array->items[i];
1598 strcpy(*w, hi->name);
1605 static int hashwalk_next(var *v)
1613 setvar_s(v, nextword(w+1));
1617 /* evaluate node, return 1 when result is true, 0 otherwise */
1618 static int ptest(node *pattern)
1621 return istrue(evaluate(pattern, &v));
1624 /* read next record from stream rsm into a variable v */
1625 static int awk_getline(rstream *rsm, var *v)
1628 regmatch_t pmatch[2];
1629 int a, p, pp=0, size;
1630 int fd, so, eo, r, rp;
1633 /* we're using our own buffer since we need access to accumulating
1636 fd = fileno(rsm->F);
1641 c = (char) rsplitter.n.info;
1644 if (! m) qrealloc(&m, 256, &size);
1650 if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1651 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1652 b, 1, pmatch, 0) == 0) {
1653 so = pmatch[0].rm_so;
1654 eo = pmatch[0].rm_eo;
1658 } else if (c != '\0') {
1659 s = strchr(b+pp, c);
1660 if (! s) s = memchr(b+pp, '\0', p - pp);
1667 while (b[rp] == '\n')
1669 s = strstr(b+rp, "\n\n");
1672 while (b[eo] == '\n') eo++;
1680 memmove(m, (const void *)(m+a), p+1);
1685 qrealloc(&m, a+p+128, &size);
1688 p += safe_read(fd, b+p, size-p-1);
1692 setvar_i(V[ERRNO], errno);
1701 c = b[so]; b[so] = '\0';
1705 c = b[eo]; b[eo] = '\0';
1706 setvar_s(V[RT], b+so);
1718 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1722 const char *s=format;
1724 if (int_as_int && n == (int)n) {
1725 r = snprintf(b, size, "%d", (int)n);
1727 do { c = *s; } while (*s && *++s);
1728 if (strchr("diouxX", c)) {
1729 r = snprintf(b, size, format, (int)n);
1730 } else if (strchr("eEfgG", c)) {
1731 r = snprintf(b, size, format, n);
1733 runtime_error(EMSG_INV_FMT);
1740 /* formatted output into an allocated buffer, return ptr to buffer */
1741 static char *awk_printf(node *n)
1744 char *fmt, *s, *s1, *f;
1745 int i, j, incr, bsize;
1750 fmt = f = bb_xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1755 while (*f && (*f != '%' || *(++f) == '%'))
1757 while (*f && !isalpha(*f))
1760 incr = (f - s) + MAXVARFMT;
1761 qrealloc(&b, incr+i, &bsize);
1762 c = *f; if (c != '\0') f++;
1763 c1 = *f ; *f = '\0';
1764 arg = evaluate(nextarg(&n), v);
1767 if (c == 'c' || !c) {
1768 i += sprintf(b+i, s,
1769 is_numeric(arg) ? (char)getvar_i(arg) : *getvar_s(arg));
1771 } else if (c == 's') {
1773 qrealloc(&b, incr+i+strlen(s1), &bsize);
1774 i += sprintf(b+i, s, s1);
1777 i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1781 /* if there was an error while sprintf, return value is negative */
1786 b = xrealloc(b, i+1);
1793 /* common substitution routine
1794 * replace (nm) substring of (src) that match (n) with (repl), store
1795 * result into (dest), return number of substitutions. If nm=0, replace
1796 * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1797 * subexpression matching (\1-\9)
1799 static int awk_sub(node *rn, char *repl, int nm, var *src, var *dest, int ex)
1803 int c, i, j, di, rl, so, eo, nbs, n, dssize;
1804 regmatch_t pmatch[10];
1807 re = as_regex(rn, &sreg);
1808 if (! src) src = V[F0];
1809 if (! dest) dest = V[F0];
1814 while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0:REG_NOTBOL) == 0) {
1815 so = pmatch[0].rm_so;
1816 eo = pmatch[0].rm_eo;
1818 qrealloc(&ds, di + eo + rl, &dssize);
1819 memcpy(ds + di, sp, eo);
1825 for (s = repl; *s; s++) {
1831 if (c == '&' || (ex && c >= '0' && c <= '9')) {
1832 di -= ((nbs + 3) >> 1);
1841 n = pmatch[j].rm_eo - pmatch[j].rm_so;
1842 qrealloc(&ds, di + rl + n, &dssize);
1843 memcpy(ds + di, sp + pmatch[j].rm_so, n);
1854 if (! (ds[di++] = *sp++)) break;
1858 qrealloc(&ds, di + strlen(sp), &dssize);
1859 strcpy(ds + di, sp);
1861 if (re == &sreg) regfree(re);
1865 static var *exec_builtin(node *op, var *res)
1872 regmatch_t pmatch[2];
1874 static tsplitter tspl;
1883 isr = info = op->info;
1886 av[2] = av[3] = NULL;
1887 for (i=0 ; i<4 && op ; i++) {
1888 an[i] = nextarg(&op);
1889 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
1890 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
1895 if (nargs < (info >> 30))
1896 runtime_error(EMSG_TOO_FEW_ARGS);
1898 switch (info & OPNMASK) {
1901 #ifdef CONFIG_FEATURE_AWK_MATH
1902 setvar_i(res, atan2(getvar_i(av[i]), getvar_i(av[1])));
1904 runtime_error(EMSG_NO_MATH);
1910 spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
1911 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
1916 n = awk_split(as[0], spl, &s);
1918 clear_array(iamarray(av[1]));
1919 for (i=1; i<=n; i++)
1920 setari_u(av[1], i, nextword(&s1));
1927 i = getvar_i(av[1]) - 1;
1928 if (i>l) i=l; if (i<0) i=0;
1929 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
1932 strncpy(s, as[0]+i, n);
1944 s1 = s = bb_xstrdup(as[0]);
1946 *s1 = (*to_xxx)(*s1);
1955 l = strlen(as[0]) - ll;
1956 if (ll > 0 && l >= 0) {
1958 s = strstr(as[0], as[1]);
1959 if (s) n = (s - as[0]) + 1;
1961 /* this piece of code is terribly slow and
1962 * really should be rewritten
1964 for (i=0; i<=l; i++) {
1965 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
1977 tt = getvar_i(av[1]);
1980 s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
1981 i = strftime(buf, MAXVARFMT, s, localtime(&tt));
1987 re = as_regex(an[1], &sreg);
1988 n = regexec(re, as[0], 1, pmatch, 0);
1993 pmatch[0].rm_so = 0;
1994 pmatch[0].rm_eo = -1;
1996 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
1997 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
1998 setvar_i(res, pmatch[0].rm_so);
1999 if (re == &sreg) regfree(re);
2003 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2007 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2011 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2020 * Evaluate node - the heart of the program. Supplied with subtree
2021 * and place where to store result. returns ptr to result.
2023 #define XC(n) ((n) >> 8)
2025 static var *evaluate(node *op, var *res)
2027 /* This procedure is recursive so we should count every byte */
2028 static var *fnargs = NULL;
2029 static unsigned int seed = 1;
2030 static regex_t sreg;
2051 return setvar_s(res, NULL);
2058 opn = (short)(opinfo & OPNMASK);
2059 lineno = op->lineno;
2061 /* execute inevitable things */
2063 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2064 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2065 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2066 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2067 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2069 switch (XC(opinfo & OPCLSMASK)) {
2071 /* -- iterative node type -- */
2075 if ((op1->info & OPCLSMASK) == OC_COMMA) {
2076 /* it's range pattern */
2077 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2078 op->info |= OF_CHECKED;
2079 if (ptest(op1->r.n))
2080 op->info &= ~OF_CHECKED;
2087 op = (ptest(op1)) ? op->a.n : op->r.n;
2091 /* just evaluate an expression, also used as unconditional jump */
2095 /* branch, used in if-else and various loops */
2097 op = istrue(L.v) ? op->a.n : op->r.n;
2100 /* initialize for-in loop */
2101 case XC( OC_WALKINIT ):
2102 hashwalk_init(L.v, iamarray(R.v));
2105 /* get next array item */
2106 case XC( OC_WALKNEXT ):
2107 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2110 case XC( OC_PRINT ):
2111 case XC( OC_PRINTF ):
2114 X.rsm = newfile(R.s);
2117 if((X.rsm->F = popen(R.s, "w")) == NULL)
2118 bb_perror_msg_and_die("popen");
2121 X.rsm->F = bb_xfopen(R.s, opn=='w' ? "w" : "a");
2127 if ((opinfo & OPCLSMASK) == OC_PRINT) {
2129 fputs(getvar_s(V[F0]), X.F);
2132 L.v = evaluate(nextarg(&op1), v1);
2133 if (L.v->type & VF_NUMBER) {
2134 fmt_num(buf, MAXVARFMT, getvar_s(V[OFMT]),
2135 getvar_i(L.v), TRUE);
2138 fputs(getvar_s(L.v), X.F);
2141 if (op1) fputs(getvar_s(V[OFS]), X.F);
2144 fputs(getvar_s(V[ORS]), X.F);
2146 } else { /* OC_PRINTF */
2147 L.s = awk_printf(op1);
2154 case XC( OC_DELETE ):
2155 X.info = op1->info & OPCLSMASK;
2156 if (X.info == OC_VAR) {
2158 } else if (X.info == OC_FNARG) {
2159 R.v = &fnargs[op1->l.i];
2161 runtime_error(EMSG_NOT_ARRAY);
2166 L.s = getvar_s(evaluate(op1->r.n, v1));
2167 hash_remove(iamarray(R.v), L.s);
2169 clear_array(iamarray(R.v));
2173 case XC( OC_NEWSOURCE ):
2174 programname = op->l.s;
2177 case XC( OC_RETURN ):
2181 case XC( OC_NEXTFILE ):
2192 /* -- recursive node type -- */
2200 case XC( OC_FNARG ):
2201 L.v = &fnargs[op->l.i];
2204 res = (op->r.n) ? findvar(iamarray(L.v), R.s) : L.v;
2208 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2211 case XC( OC_REGEXP ):
2213 L.s = getvar_s(V[F0]);
2216 case XC( OC_MATCH ):
2219 X.re = as_regex(op1, &sreg);
2220 R.i = regexec(X.re, L.s, 0, NULL, 0);
2221 if (X.re == &sreg) regfree(X.re);
2222 setvar_i(res, (R.i == 0 ? 1 : 0) ^ (opn == '!' ? 1 : 0));
2226 /* if source is a temporary string, jusk relink it to dest */
2227 if (R.v == v1+1 && R.v->string) {
2228 res = setvar_p(L.v, R.v->string);
2231 res = copyvar(L.v, R.v);
2235 case XC( OC_TERNARY ):
2236 if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2237 runtime_error(EMSG_POSSIBLE_ERROR);
2238 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2242 if (! op->r.f->body.first)
2243 runtime_error(EMSG_UNDEF_FUNC);
2245 X.v = R.v = nvalloc(op->r.f->nargs+1);
2247 L.v = evaluate(nextarg(&op1), v1);
2249 R.v->type |= VF_CHILD;
2250 R.v->x.parent = L.v;
2251 if (++R.v - X.v >= op->r.f->nargs)
2259 res = evaluate(op->r.f->body.first, res);
2266 case XC( OC_GETLINE ):
2267 case XC( OC_PGETLINE ):
2269 X.rsm = newfile(L.s);
2271 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2272 X.rsm->F = popen(L.s, "r");
2273 X.rsm->is_pipe = TRUE;
2275 X.rsm->F = fopen(L.s, "r"); /* not bb_xfopen! */
2279 if (! iF) iF = next_input_file();
2284 setvar_i(V[ERRNO], errno);
2292 L.i = awk_getline(X.rsm, R.v);
2302 /* simple builtins */
2303 case XC( OC_FBLTIN ):
2311 R.d = (double)rand() / (double)RAND_MAX;
2314 #ifdef CONFIG_FEATURE_AWK_MATH
2340 runtime_error(EMSG_NO_MATH);
2346 seed = op1 ? (unsigned int)L.d : (unsigned int)time(NULL);
2356 L.s = getvar_s(V[F0]);
2362 R.d = (L.s && *L.s) ? (system(L.s) >> 8) : 0;
2370 X.rsm = newfile(L.s);
2379 X.rsm = (rstream *)hash_search(fdhash, L.s);
2381 R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
2382 free(X.rsm->buffer);
2383 hash_remove(fdhash, L.s);
2386 setvar_i(V[ERRNO], errno);
2393 case XC( OC_BUILTIN ):
2394 res = exec_builtin(op, res);
2397 case XC( OC_SPRINTF ):
2398 setvar_p(res, awk_printf(op1));
2401 case XC( OC_UNARY ):
2403 L.d = R.d = getvar_i(R.v);
2418 L.d = istrue(X.v) ? 0 : 1;
2429 case XC( OC_FIELD ):
2430 R.i = (int)getvar_i(R.v);
2438 res = &Fields[R.i-1];
2442 /* concatenation (" ") and index joining (",") */
2443 case XC( OC_CONCAT ):
2444 case XC( OC_COMMA ):
2445 opn = strlen(L.s) + strlen(R.s) + 2;
2446 X.s = (char *)xmalloc(opn);
2448 if ((opinfo & OPCLSMASK) == OC_COMMA) {
2449 L.s = getvar_s(V[SUBSEP]);
2450 X.s = (char *)xrealloc(X.s, opn + strlen(L.s));
2458 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2462 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2465 case XC( OC_BINARY ):
2466 case XC( OC_REPLACE ):
2467 R.d = getvar_i(R.v);
2479 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2483 #ifdef CONFIG_FEATURE_AWK_MATH
2484 L.d = pow(L.d, R.d);
2486 runtime_error(EMSG_NO_MATH);
2490 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2491 L.d -= (int)(L.d / R.d) * R.d;
2494 res = setvar_i(((opinfo&OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2497 case XC( OC_COMPARE ):
2498 if (is_numeric(L.v) && is_numeric(R.v)) {
2499 L.d = getvar_i(L.v) - getvar_i(R.v);
2501 L.s = getvar_s(L.v);
2502 R.s = getvar_s(R.v);
2503 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2505 switch (opn & 0xfe) {
2516 setvar_i(res, (opn & 0x1 ? R.i : !R.i) ? 1 : 0);
2520 runtime_error(EMSG_POSSIBLE_ERROR);
2522 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2524 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2534 /* -------- main & co. -------- */
2536 static int awk_exit(int r)
2545 evaluate(endseq.first, &tv);
2548 /* waiting for children */
2549 for (i=0; i<fdhash->csize; i++) {
2550 hi = fdhash->items[i];
2552 if (hi->data.rs.F && hi->data.rs.is_pipe)
2553 pclose(hi->data.rs.F);
2561 /* if expr looks like "var=value", perform assignment and return 1,
2562 * otherwise return 0 */
2563 static int is_assignment(const char *expr)
2565 char *exprc, *s, *s0, *s1;
2567 exprc = bb_xstrdup(expr);
2568 if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2576 *(s1++) = nextchar(&s);
2579 setvar_u(newvar(exprc), s0);
2584 /* switch to next input file */
2585 static rstream *next_input_file(void)
2590 static int files_happen = FALSE;
2592 if (rsm.F) fclose(rsm.F);
2594 rsm.pos = rsm.adv = 0;
2597 if (getvar_i(V[ARGIND])+1 >= getvar_i(V[ARGC])) {
2603 ind = getvar_s(incvar(V[ARGIND]));
2604 fname = getvar_s(findvar(iamarray(V[ARGV]), ind));
2605 if (fname && *fname && !is_assignment(fname))
2606 F = afopen(fname, "r");
2610 files_happen = TRUE;
2611 setvar_s(V[FILENAME], fname);
2616 int awk_main(int argc, char **argv)
2623 static int from_file = FALSE;
2625 FILE *F, *stdfiles[3];
2626 static char * stdnames = "/dev/stdin\0/dev/stdout\0/dev/stderr";
2628 /* allocate global buffer */
2629 buf = xmalloc(MAXVARFMT+1);
2631 vhash = hash_init();
2632 ahash = hash_init();
2633 fdhash = hash_init();
2634 fnhash = hash_init();
2636 /* initialize variables */
2637 for (i=0; *vNames; i++) {
2638 V[i] = v = newvar(nextword(&vNames));
2639 if (*vValues != '\377')
2640 setvar_s(v, nextword(&vValues));
2644 if (*vNames == '*') {
2645 v->type |= VF_SPECIAL;
2650 handle_special(V[FS]);
2651 handle_special(V[RS]);
2653 stdfiles[0] = stdin;
2654 stdfiles[1] = stdout;
2655 stdfiles[2] = stderr;
2656 for (i=0; i<3; i++) {
2657 rsm = newfile(nextword(&stdnames));
2658 rsm->F = stdfiles[i];
2661 for (envp=environ; *envp; envp++) {
2662 s = bb_xstrdup(*envp);
2663 s1 = strchr(s, '=');
2668 setvar_u(findvar(iamarray(V[ENVIRON]), s), s1);
2673 while((c = getopt(argc, argv, "F:v:f:W:")) != EOF) {
2676 setvar_s(V[FS], optarg);
2679 if (! is_assignment(optarg))
2684 F = afopen(programname = optarg, "r");
2686 /* one byte is reserved for some trick in next_token */
2687 if (fseek(F, 0, SEEK_END) == 0) {
2689 s = (char *)xmalloc(flen+4);
2690 fseek(F, 0, SEEK_SET);
2691 i = 1 + fread(s+1, 1, flen, F);
2693 for (i=j=1; j>0; i+=j) {
2694 s = (char *)xrealloc(s, i+4096);
2695 j = fread(s+i, 1, 4094, F);
2704 bb_error_msg("Warning: unrecognized option '-W %s' ignored\n", optarg);
2715 programname="cmd. line";
2716 parse_program(argv[optind++]);
2720 /* fill in ARGV array */
2721 setvar_i(V[ARGC], argc - optind + 1);
2722 setari_u(V[ARGV], 0, "awk");
2723 for(i=optind; i < argc; i++)
2724 setari_u(V[ARGV], i+1-optind, argv[i]);
2726 evaluate(beginseq.first, &tv);
2727 if (! mainseq.first && ! endseq.first)
2728 awk_exit(EXIT_SUCCESS);
2730 /* input file could already be opened in BEGIN block */
2731 if (! iF) iF = next_input_file();
2733 /* passing through input files */
2737 setvar_i(V[FNR], 0);
2739 while ((c = awk_getline(iF, V[F0])) > 0) {
2744 evaluate(mainseq.first, &tv);
2751 runtime_error(strerror(errno));
2753 iF = next_input_file();
2757 awk_exit(EXIT_SUCCESS);