1 /* vi: set sw=4 ts=4: */
3 * awk implementation for busybox
5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
7 * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
19 #define VF_NUMBER 0x0001 /* 1 = primary type is number */
20 #define VF_ARRAY 0x0002 /* 1 = it's an array */
22 #define VF_CACHED 0x0100 /* 1 = num/str value has cached str/num eq */
23 #define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
24 #define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
25 #define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
26 #define VF_FSTR 0x1000 /* 1 = string points to fstring buffer */
27 #define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
28 #define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
30 /* these flags are static, don't change them when value is changed */
31 #define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
34 typedef struct var_s {
35 unsigned short type; /* flags */
39 int aidx; /* func arg idx (for compilation stage) */
40 struct xhash_s *array; /* array ptr */
41 struct var_s *parent; /* for func args, ptr to actual parameter */
42 char **walker; /* list of array elements (for..in) */
46 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
47 typedef struct chain_s {
54 typedef struct func_s {
60 typedef struct rstream_s {
66 unsigned short is_pipe;
69 typedef struct hash_item_s {
71 struct var_s v; /* variable/array hash */
72 struct rstream_s rs; /* redirect streams hash */
73 struct func_s f; /* functions hash */
75 struct hash_item_s *next; /* next in chain */
76 char name[1]; /* really it's longer */
79 typedef struct xhash_s {
80 unsigned int nel; /* num of elements */
81 unsigned int csize; /* current hash size */
82 unsigned int nprime; /* next hash size in PRIMES[] */
83 unsigned int glen; /* summary length of item names */
84 struct hash_item_s **items;
88 typedef struct node_s {
90 unsigned short lineno;
109 /* Block of temporary variables */
110 typedef struct nvblock_s {
113 struct nvblock_s *prev;
114 struct nvblock_s *next;
118 typedef struct tsplitter_s {
123 /* simple token classes */
124 /* Order and hex values are very important!!! See next_token() */
125 #define TC_SEQSTART 1 /* ( */
126 #define TC_SEQTERM (1 << 1) /* ) */
127 #define TC_REGEXP (1 << 2) /* /.../ */
128 #define TC_OUTRDR (1 << 3) /* | > >> */
129 #define TC_UOPPOST (1 << 4) /* unary postfix operator */
130 #define TC_UOPPRE1 (1 << 5) /* unary prefix operator */
131 #define TC_BINOPX (1 << 6) /* two-opnd operator */
132 #define TC_IN (1 << 7)
133 #define TC_COMMA (1 << 8)
134 #define TC_PIPE (1 << 9) /* input redirection pipe */
135 #define TC_UOPPRE2 (1 << 10) /* unary prefix operator */
136 #define TC_ARRTERM (1 << 11) /* ] */
137 #define TC_GRPSTART (1 << 12) /* { */
138 #define TC_GRPTERM (1 << 13) /* } */
139 #define TC_SEMICOL (1 << 14)
140 #define TC_NEWLINE (1 << 15)
141 #define TC_STATX (1 << 16) /* ctl statement (for, next...) */
142 #define TC_WHILE (1 << 17)
143 #define TC_ELSE (1 << 18)
144 #define TC_BUILTIN (1 << 19)
145 #define TC_GETLINE (1 << 20)
146 #define TC_FUNCDECL (1 << 21) /* `function' `func' */
147 #define TC_BEGIN (1 << 22)
148 #define TC_END (1 << 23)
149 #define TC_EOF (1 << 24)
150 #define TC_VARIABLE (1 << 25)
151 #define TC_ARRAY (1 << 26)
152 #define TC_FUNCTION (1 << 27)
153 #define TC_STRING (1 << 28)
154 #define TC_NUMBER (1 << 29)
156 #define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
158 /* combined token classes */
159 #define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
160 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
161 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION | \
162 TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
164 #define TC_STATEMNT (TC_STATX | TC_WHILE)
165 #define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
167 /* word tokens, cannot mean something else if not expected */
168 #define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN | \
169 TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
171 /* discard newlines after these */
172 #define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM | \
173 TC_BINOP | TC_OPTERM)
175 /* what can expression begin with */
176 #define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
177 /* what can group begin with */
178 #define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
180 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
181 /* operator is inserted between them */
182 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM | \
183 TC_STRING | TC_NUMBER | TC_UOPPOST)
184 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
186 #define OF_RES1 0x010000
187 #define OF_RES2 0x020000
188 #define OF_STR1 0x040000
189 #define OF_STR2 0x080000
190 #define OF_NUM1 0x100000
191 #define OF_CHECKED 0x200000
193 /* combined operator flags */
196 #define xS (OF_RES2 | OF_STR2)
198 #define VV (OF_RES1 | OF_RES2)
199 #define Nx (OF_RES1 | OF_NUM1)
200 #define NV (OF_RES1 | OF_NUM1 | OF_RES2)
201 #define Sx (OF_RES1 | OF_STR1)
202 #define SV (OF_RES1 | OF_STR1 | OF_RES2)
203 #define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
205 #define OPCLSMASK 0xFF00
206 #define OPNMASK 0x007F
208 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
209 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
210 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
212 #define P(x) (x << 24)
213 #define PRIMASK 0x7F000000
214 #define PRIMASK2 0x7E000000
216 /* Operation classes */
218 #define SHIFT_TIL_THIS 0x0600
219 #define RECUR_FROM_THIS 0x1000
222 OC_DELETE=0x0100, OC_EXEC=0x0200, OC_NEWSOURCE=0x0300,
223 OC_PRINT=0x0400, OC_PRINTF=0x0500, OC_WALKINIT=0x0600,
225 OC_BR=0x0700, OC_BREAK=0x0800, OC_CONTINUE=0x0900,
226 OC_EXIT=0x0a00, OC_NEXT=0x0b00, OC_NEXTFILE=0x0c00,
227 OC_TEST=0x0d00, OC_WALKNEXT=0x0e00,
229 OC_BINARY=0x1000, OC_BUILTIN=0x1100, OC_COLON=0x1200,
230 OC_COMMA=0x1300, OC_COMPARE=0x1400, OC_CONCAT=0x1500,
231 OC_FBLTIN=0x1600, OC_FIELD=0x1700, OC_FNARG=0x1800,
232 OC_FUNC=0x1900, OC_GETLINE=0x1a00, OC_IN=0x1b00,
233 OC_LAND=0x1c00, OC_LOR=0x1d00, OC_MATCH=0x1e00,
234 OC_MOVE=0x1f00, OC_PGETLINE=0x2000, OC_REGEXP=0x2100,
235 OC_REPLACE=0x2200, OC_RETURN=0x2300, OC_SPRINTF=0x2400,
236 OC_TERNARY=0x2500, OC_UNARY=0x2600, OC_VAR=0x2700,
239 ST_IF=0x3000, ST_DO=0x3100, ST_FOR=0x3200,
243 /* simple builtins */
245 F_in=0, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
246 F_ti, F_le, F_sy, F_ff, F_cl
251 B_a2=0, B_ix, B_ma, B_sp, B_ss, B_ti, B_lo, B_up,
255 /* tokens and their corresponding info values */
257 #define NTC "\377" /* switch to next token class (tc<<1) */
260 #define OC_B OC_BUILTIN
262 static char * const tokenlist =
265 "\1/" NTC /* REGEXP */
266 "\2>>" "\1>" "\1|" NTC /* OUTRDR */
267 "\2++" "\2--" NTC /* UOPPOST */
268 "\2++" "\2--" "\1$" NTC /* UOPPRE1 */
269 "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */
270 "\2*=" "\2/=" "\2%=" "\2^="
271 "\1+" "\1-" "\3**=" "\2**"
272 "\1/" "\1%" "\1^" "\1*"
273 "\2!=" "\2>=" "\2<=" "\1>"
274 "\1<" "\2!~" "\1~" "\2&&"
275 "\2||" "\1?" "\1:" NTC
279 "\1+" "\1-" "\1!" NTC /* UOPPRE2 */
285 "\2if" "\2do" "\3for" "\5break" /* STATX */
286 "\10continue" "\6delete" "\5print"
287 "\6printf" "\4next" "\10nextfile"
288 "\6return" "\4exit" NTC
292 "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */
293 "\3cos" "\3exp" "\3int" "\3log"
294 "\4rand" "\3sin" "\4sqrt" "\5srand"
295 "\6gensub" "\4gsub" "\5index" "\6length"
296 "\5match" "\5split" "\7sprintf" "\3sub"
297 "\6substr" "\7systime" "\10strftime"
298 "\7tolower" "\7toupper" NTC
300 "\4func" "\10function" NTC
305 static const uint32_t tokeninfo[] = {
310 xS|'a', xS|'w', xS|'|',
311 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
312 OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M',
314 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74),
315 OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
316 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/',
317 OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
318 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-',
319 OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
320 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%',
321 OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
322 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3,
323 OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
324 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!',
325 OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
326 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?',
327 OC_COLON|xx|P(67)|':',
330 OC_PGETLINE|SV|P(37),
331 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-',
332 OC_UNARY|xV|P(19)|'!',
338 ST_IF, ST_DO, ST_FOR, OC_BREAK,
339 OC_CONTINUE, OC_DELETE|Vx, OC_PRINT,
340 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
341 OC_RETURN|Vx, OC_EXIT|Nx,
345 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
346 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
347 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
348 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
349 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
350 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b),
351 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
358 /* internal variable names and their initial values */
359 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
361 CONVFMT=0, OFMT, FS, OFS,
362 ORS, RS, RT, FILENAME,
363 SUBSEP, ARGIND, ARGC, ARGV,
366 ENVIRON, F0, _intvarcount_
369 static char * vNames =
370 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
371 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
372 "SUBSEP\0" "ARGIND\0" "ARGC\0" "ARGV\0"
374 "NR\0" "NF\0*" "IGNORECASE\0*"
375 "ENVIRON\0" "$\0*" "\0";
377 static char * vValues =
378 "%.6g\0" "%.6g\0" " \0" " \0"
379 "\n\0" "\n\0" "\0" "\0"
383 /* hash size may grow to these values */
384 #define FIRST_PRIME 61;
385 static const unsigned int PRIMES[] = { 251, 1021, 4093, 16381, 65521 };
386 enum { NPRIMES = sizeof(PRIMES) / sizeof(unsigned int) };
390 extern char **environ;
392 static var * V[_intvarcount_];
393 static chain beginseq, mainseq, endseq, *seq;
394 static int nextrec, nextfile;
395 static node *break_ptr, *continue_ptr;
397 static xhash *vhash, *ahash, *fdhash, *fnhash;
398 static char *programname;
400 static int is_f0_split;
403 static tsplitter fsplitter, rsplitter;
419 /* function prototypes */
420 static void handle_special(var *);
421 static node *parse_expr(uint32_t);
422 static void chain_group(void);
423 static var *evaluate(node *, var *);
424 static rstream *next_input_file(void);
425 static int fmt_num(char *, int, const char *, double, int);
426 static int awk_exit(int) ATTRIBUTE_NORETURN;
428 /* ---- error handling ---- */
430 static const char EMSG_INTERNAL_ERROR[] = "Internal error";
431 static const char EMSG_UNEXP_EOS[] = "Unexpected end of string";
432 static const char EMSG_UNEXP_TOKEN[] = "Unexpected token";
433 static const char EMSG_DIV_BY_ZERO[] = "Division by zero";
434 static const char EMSG_INV_FMT[] = "Invalid format specifier";
435 static const char EMSG_TOO_FEW_ARGS[] = "Too few arguments for builtin";
436 static const char EMSG_NOT_ARRAY[] = "Not an array";
437 static const char EMSG_POSSIBLE_ERROR[] = "Possible syntax error";
438 static const char EMSG_UNDEF_FUNC[] = "Call to undefined function";
439 #ifndef CONFIG_FEATURE_AWK_MATH
440 static const char EMSG_NO_MATH[] = "Math support is not compiled in";
443 static void syntax_error(const char * const message) ATTRIBUTE_NORETURN;
444 static void syntax_error(const char * const message)
446 bb_error_msg_and_die("%s:%i: %s", programname, lineno, message);
449 #define runtime_error(x) syntax_error(x)
452 /* ---- hash stuff ---- */
454 static unsigned int hashidx(const char *name)
458 while (*name) idx = *name++ + (idx << 6) - idx;
462 /* create new hash */
463 static xhash *hash_init(void)
467 newhash = (xhash *)xzalloc(sizeof(xhash));
468 newhash->csize = FIRST_PRIME;
469 newhash->items = (hash_item **)xzalloc(newhash->csize * sizeof(hash_item *));
474 /* find item in hash, return ptr to data, NULL if not found */
475 static void *hash_search(xhash *hash, const char *name)
479 hi = hash->items [ hashidx(name) % hash->csize ];
481 if (strcmp(hi->name, name) == 0)
488 /* grow hash if it becomes too big */
489 static void hash_rebuild(xhash *hash)
491 unsigned int newsize, i, idx;
492 hash_item **newitems, *hi, *thi;
494 if (hash->nprime == NPRIMES)
497 newsize = PRIMES[hash->nprime++];
498 newitems = (hash_item **)xzalloc(newsize * sizeof(hash_item *));
500 for (i=0; i<hash->csize; i++) {
505 idx = hashidx(thi->name) % newsize;
506 thi->next = newitems[idx];
512 hash->csize = newsize;
513 hash->items = newitems;
516 /* find item in hash, add it if necessary. Return ptr to data */
517 static void *hash_find(xhash *hash, const char *name)
523 hi = hash_search(hash, name);
525 if (++hash->nel / hash->csize > 10)
528 l = strlen(name) + 1;
529 hi = xzalloc(sizeof(hash_item) + l);
530 memcpy(hi->name, name, l);
532 idx = hashidx(name) % hash->csize;
533 hi->next = hash->items[idx];
534 hash->items[idx] = hi;
540 #define findvar(hash, name) (var *) hash_find ( (hash) , (name) )
541 #define newvar(name) (var *) hash_find ( vhash , (name) )
542 #define newfile(name) (rstream *) hash_find ( fdhash , (name) )
543 #define newfunc(name) (func *) hash_find ( fnhash , (name) )
545 static void hash_remove(xhash *hash, const char *name)
547 hash_item *hi, **phi;
549 phi = &(hash->items[ hashidx(name) % hash->csize ]);
552 if (strcmp(hi->name, name) == 0) {
553 hash->glen -= (strlen(name) + 1);
563 /* ------ some useful functions ------ */
565 static void skip_spaces(char **s)
569 while(*p == ' ' || *p == '\t' ||
570 (*p == '\\' && *(p+1) == '\n' && (++p, ++t.lineno))) {
576 static char *nextword(char **s)
585 static char nextchar(char **s)
591 if (c == '\\') c = bb_process_escape_sequence((const char**)s);
592 if (c == '\\' && *s == pps) c = *((*s)++);
596 static inline int isalnum_(int c)
598 return (isalnum(c) || c == '_');
601 static FILE *afopen(const char *path, const char *mode)
603 return (*path == '-' && *(path+1) == '\0') ? stdin : xfopen(path, mode);
606 /* -------- working with variables (set/get/copy/etc) -------- */
608 static xhash *iamarray(var *v)
612 while (a->type & VF_CHILD)
615 if (! (a->type & VF_ARRAY)) {
617 a->x.array = hash_init();
622 static void clear_array(xhash *array)
627 for (i=0; i<array->csize; i++) {
628 hi = array->items[i];
632 free(thi->data.v.string);
635 array->items[i] = NULL;
637 array->glen = array->nel = 0;
640 /* clear a variable */
641 static var *clrvar(var *v)
643 if (!(v->type & VF_FSTR))
646 v->type &= VF_DONTTOUCH;
652 /* assign string value to variable */
653 static var *setvar_p(var *v, char *value)
662 /* same as setvar_p but make a copy of string */
663 static var *setvar_s(var *v, const char *value)
665 return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
668 /* same as setvar_s but set USER flag */
669 static var *setvar_u(var *v, const char *value)
676 /* set array element to user string */
677 static void setari_u(var *a, int idx, const char *s)
680 static char sidx[12];
682 sprintf(sidx, "%d", idx);
683 v = findvar(iamarray(a), sidx);
687 /* assign numeric value to variable */
688 static var *setvar_i(var *v, double value)
691 v->type |= VF_NUMBER;
697 static char *getvar_s(var *v)
699 /* if v is numeric and has no cached string, convert it to string */
700 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
701 fmt_num(buf, MAXVARFMT, getvar_s(V[CONVFMT]), v->number, TRUE);
702 v->string = xstrdup(buf);
703 v->type |= VF_CACHED;
705 return (v->string == NULL) ? "" : v->string;
708 static double getvar_i(var *v)
712 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
716 v->number = strtod(s, &s);
717 if (v->type & VF_USER) {
725 v->type |= VF_CACHED;
730 static var *copyvar(var *dest, const var *src)
734 dest->type |= (src->type & ~VF_DONTTOUCH);
735 dest->number = src->number;
737 dest->string = xstrdup(src->string);
739 handle_special(dest);
743 static var *incvar(var *v)
745 return setvar_i(v, getvar_i(v)+1.);
748 /* return true if v is number or numeric string */
749 static int is_numeric(var *v)
752 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
755 /* return 1 when value of v corresponds to true, 0 otherwise */
756 static int istrue(var *v)
759 return (v->number == 0) ? 0 : 1;
761 return (v->string && *(v->string)) ? 1 : 0;
764 /* temporary variables allocator. Last allocated should be first freed */
765 static var *nvalloc(int n)
773 if ((cb->pos - cb->nv) + n <= cb->size) break;
778 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
779 cb = (nvblock *)xmalloc(sizeof(nvblock) + size * sizeof(var));
784 if (pb) pb->next = cb;
790 while (v < cb->pos) {
799 static void nvfree(var *v)
803 if (v < cb->nv || v >= cb->pos)
804 runtime_error(EMSG_INTERNAL_ERROR);
806 for (p=v; p<cb->pos; p++) {
807 if ((p->type & (VF_ARRAY|VF_CHILD)) == VF_ARRAY) {
808 clear_array(iamarray(p));
809 free(p->x.array->items);
812 if (p->type & VF_WALK)
819 while (cb->prev && cb->pos == cb->nv) {
824 /* ------- awk program text parsing ------- */
826 /* Parse next token pointed by global pos, place results into global t.
827 * If token isn't expected, give away. Return token class
829 static uint32_t next_token(uint32_t expected)
836 static int concat_inserted;
837 static uint32_t save_tclass, save_info;
838 static uint32_t ltclass = TC_OPTERM;
844 } else if (concat_inserted) {
846 concat_inserted = FALSE;
847 t.tclass = save_tclass;
858 while (*p != '\n' && *p != '\0') p++;
866 } else if (*p == '\"') {
870 if (*p == '\0' || *p == '\n')
871 syntax_error(EMSG_UNEXP_EOS);
872 *(s++) = nextchar(&p);
878 } else if ((expected & TC_REGEXP) && *p == '/') {
882 if (*p == '\0' || *p == '\n')
883 syntax_error(EMSG_UNEXP_EOS);
884 if ((*s++ = *p++) == '\\') {
886 *(s-1) = bb_process_escape_sequence((const char **)&p);
887 if (*pp == '\\') *s++ = '\\';
888 if (p == pp) *s++ = *p++;
895 } else if (*p == '.' || isdigit(*p)) {
897 t.number = strtod(p, &p);
899 syntax_error(EMSG_UNEXP_TOKEN);
903 /* search for something known */
913 /* if token class is expected, token
914 * matches and it's not a longer word,
915 * then this is what we are looking for
917 if ((tc & (expected | TC_WORD | TC_NEWLINE)) &&
918 *tl == *p && strncmp(p, tl, l) == 0 &&
919 !((tc & TC_WORD) && isalnum_(*(p + l)))) {
929 /* it's a name (var/array/function),
930 * otherwise it's something wrong
933 syntax_error(EMSG_UNEXP_TOKEN);
936 while(isalnum_(*(++p))) {
941 /* also consume whitespace between functionname and bracket */
942 if (! (expected & TC_VARIABLE)) skip_spaces(&p);
955 /* skipping newlines in some cases */
956 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
959 /* insert concatenation operator when needed */
960 if ((ltclass&TC_CONCAT1) && (tc&TC_CONCAT2) && (expected&TC_BINOP)) {
961 concat_inserted = TRUE;
965 t.info = OC_CONCAT | SS | P(35);
972 /* Are we ready for this? */
973 if (! (ltclass & expected))
974 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
975 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
980 static void rollback_token(void) { t.rollback = TRUE; }
982 static node *new_node(uint32_t info)
986 n = (node *)xzalloc(sizeof(node));
992 static node *mk_re_node(char *s, node *n, regex_t *re)
997 xregcomp(re, s, REG_EXTENDED);
998 xregcomp(re+1, s, REG_EXTENDED | REG_ICASE);
1003 static node *condition(void)
1005 next_token(TC_SEQSTART);
1006 return parse_expr(TC_SEQTERM);
1009 /* parse expression terminated by given argument, return ptr
1010 * to built subtree. Terminator is eaten by parse_expr */
1011 static node *parse_expr(uint32_t iexp)
1020 sn.r.n = glptr = NULL;
1021 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1023 while (! ((tc = next_token(xtc)) & iexp)) {
1024 if (glptr && (t.info == (OC_COMPARE|VV|P(39)|2))) {
1025 /* input redirection (<) attached to glptr node */
1026 cn = glptr->l.n = new_node(OC_CONCAT|SS|P(37));
1028 xtc = TC_OPERAND | TC_UOPPRE;
1031 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1032 /* for binary and postfix-unary operators, jump back over
1033 * previous operators with higher priority */
1035 while ( ((t.info & PRIMASK) > (vn->a.n->info & PRIMASK2)) ||
1036 ((t.info == vn->info) && ((t.info & OPCLSMASK) == OC_COLON)) )
1038 if ((t.info & OPCLSMASK) == OC_TERNARY)
1040 cn = vn->a.n->r.n = new_node(t.info);
1042 if (tc & TC_BINOP) {
1044 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1045 if ((t.info & OPCLSMASK) == OC_PGETLINE) {
1047 next_token(TC_GETLINE);
1048 /* give maximum priority to this pipe */
1049 cn->info &= ~PRIMASK;
1050 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1054 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1059 /* for operands and prefix-unary operators, attach them
1062 cn = vn->r.n = new_node(t.info);
1064 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1065 if (tc & (TC_OPERAND | TC_REGEXP)) {
1066 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1067 /* one should be very careful with switch on tclass -
1068 * only simple tclasses should be used! */
1073 if ((v = hash_search(ahash, t.string)) != NULL) {
1074 cn->info = OC_FNARG;
1075 cn->l.i = v->x.aidx;
1077 cn->l.v = newvar(t.string);
1079 if (tc & TC_ARRAY) {
1081 cn->r.n = parse_expr(TC_ARRTERM);
1088 v = cn->l.v = xzalloc(sizeof(var));
1090 setvar_i(v, t.number);
1092 setvar_s(v, t.string);
1096 mk_re_node(t.string, cn,
1097 (regex_t *)xzalloc(sizeof(regex_t)*2));
1102 cn->r.f = newfunc(t.string);
1103 cn->l.n = condition();
1107 cn = vn->r.n = parse_expr(TC_SEQTERM);
1113 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1117 cn->l.n = condition();
1126 /* add node to chain. Return ptr to alloc'd node */
1127 static node *chain_node(uint32_t info)
1132 seq->first = seq->last = new_node(0);
1134 if (seq->programname != programname) {
1135 seq->programname = programname;
1136 n = chain_node(OC_NEWSOURCE);
1137 n->l.s = xstrdup(programname);
1142 seq->last = n->a.n = new_node(OC_DONE);
1147 static void chain_expr(uint32_t info)
1151 n = chain_node(info);
1152 n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1153 if (t.tclass & TC_GRPTERM)
1157 static node *chain_loop(node *nn)
1159 node *n, *n2, *save_brk, *save_cont;
1161 save_brk = break_ptr;
1162 save_cont = continue_ptr;
1164 n = chain_node(OC_BR | Vx);
1165 continue_ptr = new_node(OC_EXEC);
1166 break_ptr = new_node(OC_EXEC);
1168 n2 = chain_node(OC_EXEC | Vx);
1171 continue_ptr->a.n = n2;
1172 break_ptr->a.n = n->r.n = seq->last;
1174 continue_ptr = save_cont;
1175 break_ptr = save_brk;
1180 /* parse group and attach it to chain */
1181 static void chain_group(void)
1187 c = next_token(TC_GRPSEQ);
1188 } while (c & TC_NEWLINE);
1190 if (c & TC_GRPSTART) {
1191 while(next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1192 if (t.tclass & TC_NEWLINE) continue;
1196 } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1198 chain_expr(OC_EXEC | Vx);
1199 } else { /* TC_STATEMNT */
1200 switch (t.info & OPCLSMASK) {
1202 n = chain_node(OC_BR | Vx);
1203 n->l.n = condition();
1205 n2 = chain_node(OC_EXEC);
1207 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE)==TC_ELSE) {
1209 n2->a.n = seq->last;
1217 n = chain_loop(NULL);
1222 n2 = chain_node(OC_EXEC);
1223 n = chain_loop(NULL);
1225 next_token(TC_WHILE);
1226 n->l.n = condition();
1230 next_token(TC_SEQSTART);
1231 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1232 if (t.tclass & TC_SEQTERM) { /* for-in */
1233 if ((n2->info & OPCLSMASK) != OC_IN)
1234 syntax_error(EMSG_UNEXP_TOKEN);
1235 n = chain_node(OC_WALKINIT | VV);
1238 n = chain_loop(NULL);
1239 n->info = OC_WALKNEXT | Vx;
1241 } else { /* for(;;) */
1242 n = chain_node(OC_EXEC | Vx);
1244 n2 = parse_expr(TC_SEMICOL);
1245 n3 = parse_expr(TC_SEQTERM);
1255 n = chain_node(t.info);
1256 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1257 if (t.tclass & TC_OUTRDR) {
1259 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1261 if (t.tclass & TC_GRPTERM)
1266 n = chain_node(OC_EXEC);
1271 n = chain_node(OC_EXEC);
1272 n->a.n = continue_ptr;
1275 /* delete, next, nextfile, return, exit */
1283 static void parse_program(char *p)
1292 while((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1293 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1295 if (tclass & TC_OPTERM)
1299 if (tclass & TC_BEGIN) {
1303 } else if (tclass & TC_END) {
1307 } else if (tclass & TC_FUNCDECL) {
1308 next_token(TC_FUNCTION);
1310 f = newfunc(t.string);
1311 f->body.first = NULL;
1313 while(next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1314 v = findvar(ahash, t.string);
1315 v->x.aidx = (f->nargs)++;
1317 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1324 } else if (tclass & TC_OPSEQ) {
1326 cn = chain_node(OC_TEST);
1327 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1328 if (t.tclass & TC_GRPSTART) {
1332 chain_node(OC_PRINT);
1334 cn->r.n = mainseq.last;
1336 } else /* if (tclass & TC_GRPSTART) */ {
1344 /* -------- program execution part -------- */
1346 static node *mk_splitter(char *s, tsplitter *spl)
1354 if ((n->info && OPCLSMASK) == OC_REGEXP) {
1358 if (strlen(s) > 1) {
1359 mk_re_node(s, n, re);
1361 n->info = (uint32_t) *s;
1367 /* use node as a regular expression. Supplied with node ptr and regex_t
1368 * storage space. Return ptr to regex (if result points to preg, it should
1369 * be later regfree'd manually
1371 static regex_t *as_regex(node *op, regex_t *preg)
1376 if ((op->info & OPCLSMASK) == OC_REGEXP) {
1377 return icase ? op->r.ire : op->l.re;
1380 s = getvar_s(evaluate(op, v));
1381 xregcomp(preg, s, icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED);
1387 /* gradually increasing buffer */
1388 static void qrealloc(char **b, int n, int *size)
1390 if (! *b || n >= *size)
1391 *b = xrealloc(*b, *size = n + (n>>1) + 80);
1394 /* resize field storage space */
1395 static void fsrealloc(int size)
1397 static int maxfields = 0;
1400 if (size >= maxfields) {
1402 maxfields = size + 16;
1403 Fields = (var *)xrealloc(Fields, maxfields * sizeof(var));
1404 for (; i<maxfields; i++) {
1405 Fields[i].type = VF_SPECIAL;
1406 Fields[i].string = NULL;
1410 if (size < nfields) {
1411 for (i=size; i<nfields; i++) {
1418 static int awk_split(char *s, node *spl, char **slist)
1423 regmatch_t pmatch[2];
1425 /* in worst case, each char would be a separate field */
1426 *slist = s1 = xstrndup(s, strlen(s) * 2 + 3);
1428 c[0] = c[1] = (char)spl->info;
1430 if (*getvar_s(V[RS]) == '\0') c[2] = '\n';
1432 if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */
1434 l = strcspn(s, c+2);
1435 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0 &&
1436 pmatch[0].rm_so <= l) {
1437 l = pmatch[0].rm_so;
1438 if (pmatch[0].rm_eo == 0) { l++; pmatch[0].rm_eo++; }
1440 pmatch[0].rm_eo = l;
1441 if (*(s+l)) pmatch[0].rm_eo++;
1447 s += pmatch[0].rm_eo;
1450 } else if (c[0] == '\0') { /* null split */
1456 } else if (c[0] != ' ') { /* single-character split */
1458 c[0] = toupper(c[0]);
1459 c[1] = tolower(c[1]);
1462 while ((s1 = strpbrk(s1, c))) {
1466 } else { /* space split */
1468 while (isspace(*s)) s++;
1471 while (*s && !isspace(*s))
1479 static void split_f0(void)
1481 static char *fstrings = NULL;
1491 n = awk_split(getvar_s(V[F0]), &fsplitter.n, &fstrings);
1494 for (i=0; i<n; i++) {
1495 Fields[i].string = nextword(&s);
1496 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1499 /* set NF manually to avoid side effects */
1501 V[NF]->type = VF_NUMBER | VF_SPECIAL;
1502 V[NF]->number = nfields;
1505 /* perform additional actions when some internal variables changed */
1506 static void handle_special(var *v)
1510 int sl, l, len, i, bsize;
1512 if (! (v->type & VF_SPECIAL))
1516 n = (int)getvar_i(v);
1519 /* recalculate $0 */
1520 sep = getvar_s(V[OFS]);
1524 for (i=0; i<n; i++) {
1525 s = getvar_s(&Fields[i]);
1528 memcpy(b+len, sep, sl);
1531 qrealloc(&b, len+l+sl, &bsize);
1532 memcpy(b+len, s, l);
1535 if (b) b[len] = '\0';
1539 } else if (v == V[F0]) {
1540 is_f0_split = FALSE;
1542 } else if (v == V[FS]) {
1543 mk_splitter(getvar_s(v), &fsplitter);
1545 } else if (v == V[RS]) {
1546 mk_splitter(getvar_s(v), &rsplitter);
1548 } else if (v == V[IGNORECASE]) {
1552 n = getvar_i(V[NF]);
1553 setvar_i(V[NF], n > v-Fields ? n : v-Fields+1);
1554 /* right here v is invalid. Just to note... */
1558 /* step through func/builtin/etc arguments */
1559 static node *nextarg(node **pn)
1564 if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1573 static void hashwalk_init(var *v, xhash *array)
1579 if (v->type & VF_WALK)
1583 w = v->x.walker = (char **)xzalloc(2 + 2*sizeof(char *) + array->glen);
1584 *w = *(w+1) = (char *)(w + 2);
1585 for (i=0; i<array->csize; i++) {
1586 hi = array->items[i];
1588 strcpy(*w, hi->name);
1595 static int hashwalk_next(var *v)
1603 setvar_s(v, nextword(w+1));
1607 /* evaluate node, return 1 when result is true, 0 otherwise */
1608 static int ptest(node *pattern)
1611 return istrue(evaluate(pattern, &v));
1614 /* read next record from stream rsm into a variable v */
1615 static int awk_getline(rstream *rsm, var *v)
1618 regmatch_t pmatch[2];
1619 int a, p, pp=0, size;
1620 int fd, so, eo, r, rp;
1623 /* we're using our own buffer since we need access to accumulating
1626 fd = fileno(rsm->F);
1631 c = (char) rsplitter.n.info;
1634 if (! m) qrealloc(&m, 256, &size);
1640 if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1641 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1642 b, 1, pmatch, 0) == 0) {
1643 so = pmatch[0].rm_so;
1644 eo = pmatch[0].rm_eo;
1648 } else if (c != '\0') {
1649 s = strchr(b+pp, c);
1650 if (! s) s = memchr(b+pp, '\0', p - pp);
1657 while (b[rp] == '\n')
1659 s = strstr(b+rp, "\n\n");
1662 while (b[eo] == '\n') eo++;
1670 memmove(m, (const void *)(m+a), p+1);
1675 qrealloc(&m, a+p+128, &size);
1678 p += safe_read(fd, b+p, size-p-1);
1682 setvar_i(V[ERRNO], errno);
1691 c = b[so]; b[so] = '\0';
1695 c = b[eo]; b[eo] = '\0';
1696 setvar_s(V[RT], b+so);
1708 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1712 const char *s=format;
1714 if (int_as_int && n == (int)n) {
1715 r = snprintf(b, size, "%d", (int)n);
1717 do { c = *s; } while (*s && *++s);
1718 if (strchr("diouxX", c)) {
1719 r = snprintf(b, size, format, (int)n);
1720 } else if (strchr("eEfgG", c)) {
1721 r = snprintf(b, size, format, n);
1723 runtime_error(EMSG_INV_FMT);
1730 /* formatted output into an allocated buffer, return ptr to buffer */
1731 static char *awk_printf(node *n)
1734 char *fmt, *s, *s1, *f;
1735 int i, j, incr, bsize;
1740 fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1745 while (*f && (*f != '%' || *(++f) == '%'))
1747 while (*f && !isalpha(*f))
1750 incr = (f - s) + MAXVARFMT;
1751 qrealloc(&b, incr+i, &bsize);
1752 c = *f; if (c != '\0') f++;
1753 c1 = *f ; *f = '\0';
1754 arg = evaluate(nextarg(&n), v);
1757 if (c == 'c' || !c) {
1758 i += sprintf(b+i, s,
1759 is_numeric(arg) ? (char)getvar_i(arg) : *getvar_s(arg));
1761 } else if (c == 's') {
1763 qrealloc(&b, incr+i+strlen(s1), &bsize);
1764 i += sprintf(b+i, s, s1);
1767 i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1771 /* if there was an error while sprintf, return value is negative */
1776 b = xrealloc(b, i+1);
1783 /* common substitution routine
1784 * replace (nm) substring of (src) that match (n) with (repl), store
1785 * result into (dest), return number of substitutions. If nm=0, replace
1786 * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1787 * subexpression matching (\1-\9)
1789 static int awk_sub(node *rn, char *repl, int nm, var *src, var *dest, int ex)
1793 int c, i, j, di, rl, so, eo, nbs, n, dssize;
1794 regmatch_t pmatch[10];
1797 re = as_regex(rn, &sreg);
1798 if (! src) src = V[F0];
1799 if (! dest) dest = V[F0];
1804 while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0:REG_NOTBOL) == 0) {
1805 so = pmatch[0].rm_so;
1806 eo = pmatch[0].rm_eo;
1808 qrealloc(&ds, di + eo + rl, &dssize);
1809 memcpy(ds + di, sp, eo);
1815 for (s = repl; *s; s++) {
1821 if (c == '&' || (ex && c >= '0' && c <= '9')) {
1822 di -= ((nbs + 3) >> 1);
1831 n = pmatch[j].rm_eo - pmatch[j].rm_so;
1832 qrealloc(&ds, di + rl + n, &dssize);
1833 memcpy(ds + di, sp + pmatch[j].rm_so, n);
1844 if (! (ds[di++] = *sp++)) break;
1848 qrealloc(&ds, di + strlen(sp), &dssize);
1849 strcpy(ds + di, sp);
1851 if (re == &sreg) regfree(re);
1855 static var *exec_builtin(node *op, var *res)
1862 regmatch_t pmatch[2];
1864 static tsplitter tspl;
1873 isr = info = op->info;
1876 av[2] = av[3] = NULL;
1877 for (i=0 ; i<4 && op ; i++) {
1878 an[i] = nextarg(&op);
1879 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
1880 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
1885 if (nargs < (info >> 30))
1886 runtime_error(EMSG_TOO_FEW_ARGS);
1888 switch (info & OPNMASK) {
1891 #ifdef CONFIG_FEATURE_AWK_MATH
1892 setvar_i(res, atan2(getvar_i(av[i]), getvar_i(av[1])));
1894 runtime_error(EMSG_NO_MATH);
1900 spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
1901 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
1906 n = awk_split(as[0], spl, &s);
1908 clear_array(iamarray(av[1]));
1909 for (i=1; i<=n; i++)
1910 setari_u(av[1], i, nextword(&s1));
1917 i = getvar_i(av[1]) - 1;
1918 if (i>l) i=l; if (i<0) i=0;
1919 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
1922 strncpy(s, as[0]+i, n);
1934 s1 = s = xstrdup(as[0]);
1936 *s1 = (*to_xxx)(*s1);
1945 l = strlen(as[0]) - ll;
1946 if (ll > 0 && l >= 0) {
1948 s = strstr(as[0], as[1]);
1949 if (s) n = (s - as[0]) + 1;
1951 /* this piece of code is terribly slow and
1952 * really should be rewritten
1954 for (i=0; i<=l; i++) {
1955 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
1967 tt = getvar_i(av[1]);
1970 s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
1971 i = strftime(buf, MAXVARFMT, s, localtime(&tt));
1977 re = as_regex(an[1], &sreg);
1978 n = regexec(re, as[0], 1, pmatch, 0);
1983 pmatch[0].rm_so = 0;
1984 pmatch[0].rm_eo = -1;
1986 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
1987 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
1988 setvar_i(res, pmatch[0].rm_so);
1989 if (re == &sreg) regfree(re);
1993 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
1997 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2001 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2010 * Evaluate node - the heart of the program. Supplied with subtree
2011 * and place where to store result. returns ptr to result.
2013 #define XC(n) ((n) >> 8)
2015 static var *evaluate(node *op, var *res)
2017 /* This procedure is recursive so we should count every byte */
2018 static var *fnargs = NULL;
2019 static unsigned int seed = 1;
2020 static regex_t sreg;
2041 return setvar_s(res, NULL);
2048 opn = (short)(opinfo & OPNMASK);
2049 lineno = op->lineno;
2051 /* execute inevitable things */
2053 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2054 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2055 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2056 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2057 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2059 switch (XC(opinfo & OPCLSMASK)) {
2061 /* -- iterative node type -- */
2065 if ((op1->info & OPCLSMASK) == OC_COMMA) {
2066 /* it's range pattern */
2067 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2068 op->info |= OF_CHECKED;
2069 if (ptest(op1->r.n))
2070 op->info &= ~OF_CHECKED;
2077 op = (ptest(op1)) ? op->a.n : op->r.n;
2081 /* just evaluate an expression, also used as unconditional jump */
2085 /* branch, used in if-else and various loops */
2087 op = istrue(L.v) ? op->a.n : op->r.n;
2090 /* initialize for-in loop */
2091 case XC( OC_WALKINIT ):
2092 hashwalk_init(L.v, iamarray(R.v));
2095 /* get next array item */
2096 case XC( OC_WALKNEXT ):
2097 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2100 case XC( OC_PRINT ):
2101 case XC( OC_PRINTF ):
2104 X.rsm = newfile(R.s);
2107 if((X.rsm->F = popen(R.s, "w")) == NULL)
2108 bb_perror_msg_and_die("popen");
2111 X.rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2117 if ((opinfo & OPCLSMASK) == OC_PRINT) {
2119 fputs(getvar_s(V[F0]), X.F);
2122 L.v = evaluate(nextarg(&op1), v1);
2123 if (L.v->type & VF_NUMBER) {
2124 fmt_num(buf, MAXVARFMT, getvar_s(V[OFMT]),
2125 getvar_i(L.v), TRUE);
2128 fputs(getvar_s(L.v), X.F);
2131 if (op1) fputs(getvar_s(V[OFS]), X.F);
2134 fputs(getvar_s(V[ORS]), X.F);
2136 } else { /* OC_PRINTF */
2137 L.s = awk_printf(op1);
2144 case XC( OC_DELETE ):
2145 X.info = op1->info & OPCLSMASK;
2146 if (X.info == OC_VAR) {
2148 } else if (X.info == OC_FNARG) {
2149 R.v = &fnargs[op1->l.i];
2151 runtime_error(EMSG_NOT_ARRAY);
2156 L.s = getvar_s(evaluate(op1->r.n, v1));
2157 hash_remove(iamarray(R.v), L.s);
2159 clear_array(iamarray(R.v));
2163 case XC( OC_NEWSOURCE ):
2164 programname = op->l.s;
2167 case XC( OC_RETURN ):
2171 case XC( OC_NEXTFILE ):
2182 /* -- recursive node type -- */
2190 case XC( OC_FNARG ):
2191 L.v = &fnargs[op->l.i];
2194 res = (op->r.n) ? findvar(iamarray(L.v), R.s) : L.v;
2198 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2201 case XC( OC_REGEXP ):
2203 L.s = getvar_s(V[F0]);
2206 case XC( OC_MATCH ):
2209 X.re = as_regex(op1, &sreg);
2210 R.i = regexec(X.re, L.s, 0, NULL, 0);
2211 if (X.re == &sreg) regfree(X.re);
2212 setvar_i(res, (R.i == 0 ? 1 : 0) ^ (opn == '!' ? 1 : 0));
2216 /* if source is a temporary string, jusk relink it to dest */
2217 if (R.v == v1+1 && R.v->string) {
2218 res = setvar_p(L.v, R.v->string);
2221 res = copyvar(L.v, R.v);
2225 case XC( OC_TERNARY ):
2226 if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2227 runtime_error(EMSG_POSSIBLE_ERROR);
2228 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2232 if (! op->r.f->body.first)
2233 runtime_error(EMSG_UNDEF_FUNC);
2235 X.v = R.v = nvalloc(op->r.f->nargs+1);
2237 L.v = evaluate(nextarg(&op1), v1);
2239 R.v->type |= VF_CHILD;
2240 R.v->x.parent = L.v;
2241 if (++R.v - X.v >= op->r.f->nargs)
2249 res = evaluate(op->r.f->body.first, res);
2256 case XC( OC_GETLINE ):
2257 case XC( OC_PGETLINE ):
2259 X.rsm = newfile(L.s);
2261 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2262 X.rsm->F = popen(L.s, "r");
2263 X.rsm->is_pipe = TRUE;
2265 X.rsm->F = fopen(L.s, "r"); /* not xfopen! */
2269 if (! iF) iF = next_input_file();
2274 setvar_i(V[ERRNO], errno);
2282 L.i = awk_getline(X.rsm, R.v);
2292 /* simple builtins */
2293 case XC( OC_FBLTIN ):
2301 R.d = (double)rand() / (double)RAND_MAX;
2304 #ifdef CONFIG_FEATURE_AWK_MATH
2330 runtime_error(EMSG_NO_MATH);
2336 seed = op1 ? (unsigned int)L.d : (unsigned int)time(NULL);
2346 L.s = getvar_s(V[F0]);
2352 R.d = (L.s && *L.s) ? (system(L.s) >> 8) : 0;
2360 X.rsm = newfile(L.s);
2369 X.rsm = (rstream *)hash_search(fdhash, L.s);
2371 R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
2372 free(X.rsm->buffer);
2373 hash_remove(fdhash, L.s);
2376 setvar_i(V[ERRNO], errno);
2383 case XC( OC_BUILTIN ):
2384 res = exec_builtin(op, res);
2387 case XC( OC_SPRINTF ):
2388 setvar_p(res, awk_printf(op1));
2391 case XC( OC_UNARY ):
2393 L.d = R.d = getvar_i(R.v);
2408 L.d = istrue(X.v) ? 0 : 1;
2419 case XC( OC_FIELD ):
2420 R.i = (int)getvar_i(R.v);
2428 res = &Fields[R.i-1];
2432 /* concatenation (" ") and index joining (",") */
2433 case XC( OC_CONCAT ):
2434 case XC( OC_COMMA ):
2435 opn = strlen(L.s) + strlen(R.s) + 2;
2436 X.s = (char *)xmalloc(opn);
2438 if ((opinfo & OPCLSMASK) == OC_COMMA) {
2439 L.s = getvar_s(V[SUBSEP]);
2440 X.s = (char *)xrealloc(X.s, opn + strlen(L.s));
2448 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2452 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2455 case XC( OC_BINARY ):
2456 case XC( OC_REPLACE ):
2457 R.d = getvar_i(R.v);
2469 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2473 #ifdef CONFIG_FEATURE_AWK_MATH
2474 L.d = pow(L.d, R.d);
2476 runtime_error(EMSG_NO_MATH);
2480 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2481 L.d -= (int)(L.d / R.d) * R.d;
2484 res = setvar_i(((opinfo&OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2487 case XC( OC_COMPARE ):
2488 if (is_numeric(L.v) && is_numeric(R.v)) {
2489 L.d = getvar_i(L.v) - getvar_i(R.v);
2491 L.s = getvar_s(L.v);
2492 R.s = getvar_s(R.v);
2493 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2495 switch (opn & 0xfe) {
2506 setvar_i(res, (opn & 0x1 ? R.i : !R.i) ? 1 : 0);
2510 runtime_error(EMSG_POSSIBLE_ERROR);
2512 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2514 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2524 /* -------- main & co. -------- */
2526 static int awk_exit(int r)
2535 evaluate(endseq.first, &tv);
2538 /* waiting for children */
2539 for (i=0; i<fdhash->csize; i++) {
2540 hi = fdhash->items[i];
2542 if (hi->data.rs.F && hi->data.rs.is_pipe)
2543 pclose(hi->data.rs.F);
2551 /* if expr looks like "var=value", perform assignment and return 1,
2552 * otherwise return 0 */
2553 static int is_assignment(const char *expr)
2555 char *exprc, *s, *s0, *s1;
2557 exprc = xstrdup(expr);
2558 if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2566 *(s1++) = nextchar(&s);
2569 setvar_u(newvar(exprc), s0);
2574 /* switch to next input file */
2575 static rstream *next_input_file(void)
2580 static int files_happen = FALSE;
2582 if (rsm.F) fclose(rsm.F);
2584 rsm.pos = rsm.adv = 0;
2587 if (getvar_i(V[ARGIND])+1 >= getvar_i(V[ARGC])) {
2593 ind = getvar_s(incvar(V[ARGIND]));
2594 fname = getvar_s(findvar(iamarray(V[ARGV]), ind));
2595 if (fname && *fname && !is_assignment(fname))
2596 F = afopen(fname, "r");
2600 files_happen = TRUE;
2601 setvar_s(V[FILENAME], fname);
2606 int awk_main(int argc, char **argv)
2613 static int from_file = FALSE;
2615 FILE *F, *stdfiles[3];
2616 static char * stdnames = "/dev/stdin\0/dev/stdout\0/dev/stderr";
2618 /* allocate global buffer */
2619 buf = xmalloc(MAXVARFMT+1);
2621 vhash = hash_init();
2622 ahash = hash_init();
2623 fdhash = hash_init();
2624 fnhash = hash_init();
2626 /* initialize variables */
2627 for (i=0; *vNames; i++) {
2628 V[i] = v = newvar(nextword(&vNames));
2629 if (*vValues != '\377')
2630 setvar_s(v, nextword(&vValues));
2634 if (*vNames == '*') {
2635 v->type |= VF_SPECIAL;
2640 handle_special(V[FS]);
2641 handle_special(V[RS]);
2643 stdfiles[0] = stdin;
2644 stdfiles[1] = stdout;
2645 stdfiles[2] = stderr;
2646 for (i=0; i<3; i++) {
2647 rsm = newfile(nextword(&stdnames));
2648 rsm->F = stdfiles[i];
2651 for (envp=environ; *envp; envp++) {
2653 s1 = strchr(s, '=');
2658 setvar_u(findvar(iamarray(V[ENVIRON]), s), s1);
2663 while((c = getopt(argc, argv, "F:v:f:W:")) != EOF) {
2666 setvar_s(V[FS], optarg);
2669 if (! is_assignment(optarg))
2674 F = afopen(programname = optarg, "r");
2676 /* one byte is reserved for some trick in next_token */
2677 if (fseek(F, 0, SEEK_END) == 0) {
2679 s = (char *)xmalloc(flen+4);
2680 fseek(F, 0, SEEK_SET);
2681 i = 1 + fread(s+1, 1, flen, F);
2683 for (i=j=1; j>0; i+=j) {
2684 s = (char *)xrealloc(s, i+4096);
2685 j = fread(s+i, 1, 4094, F);
2694 bb_error_msg("Warning: unrecognized option '-W %s' ignored\n", optarg);
2705 programname="cmd. line";
2706 parse_program(argv[optind++]);
2710 /* fill in ARGV array */
2711 setvar_i(V[ARGC], argc - optind + 1);
2712 setari_u(V[ARGV], 0, "awk");
2713 for(i=optind; i < argc; i++)
2714 setari_u(V[ARGV], i+1-optind, argv[i]);
2716 evaluate(beginseq.first, &tv);
2717 if (! mainseq.first && ! endseq.first)
2718 awk_exit(EXIT_SUCCESS);
2720 /* input file could already be opened in BEGIN block */
2721 if (! iF) iF = next_input_file();
2723 /* passing through input files */
2727 setvar_i(V[FNR], 0);
2729 while ((c = awk_getline(iF, V[F0])) > 0) {
2734 evaluate(mainseq.first, &tv);
2741 runtime_error(strerror(errno));
2743 iF = next_input_file();
2747 awk_exit(EXIT_SUCCESS);