1 /* vi: set sw=4 ts=4: */
3 * awk implementation for busybox
5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
7 * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
28 #define VF_NUMBER 0x0001 /* 1 = primary type is number */
29 #define VF_ARRAY 0x0002 /* 1 = it's an array */
31 #define VF_CACHED 0x0100 /* 1 = num/str value has cached str/num eq */
32 #define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
33 #define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
34 #define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
35 #define VF_FSTR 0x1000 /* 1 = string points to fstring buffer */
36 #define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
37 #define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
39 /* these flags are static, don't change them when value is changed */
40 #define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
43 typedef struct var_s {
44 unsigned short type; /* flags */
48 int aidx; /* func arg idx (for compilation stage) */
49 struct xhash_s *array; /* array ptr */
50 struct var_s *parent; /* for func args, ptr to actual parameter */
51 char **walker; /* list of array elements (for..in) */
55 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
56 typedef struct chain_s {
63 typedef struct func_s {
69 typedef struct rstream_s {
75 unsigned short is_pipe;
78 typedef struct hash_item_s {
80 struct var_s v; /* variable/array hash */
81 struct rstream_s rs; /* redirect streams hash */
82 struct func_s f; /* functions hash */
84 struct hash_item_s *next; /* next in chain */
85 char name[1]; /* really it's longer */
88 typedef struct xhash_s {
89 unsigned int nel; /* num of elements */
90 unsigned int csize; /* current hash size */
91 unsigned int nprime; /* next hash size in PRIMES[] */
92 unsigned int glen; /* summary length of item names */
93 struct hash_item_s **items;
97 typedef struct node_s {
99 unsigned short lineno;
118 /* Block of temporary variables */
119 typedef struct nvblock_s {
122 struct nvblock_s *prev;
123 struct nvblock_s *next;
127 typedef struct tsplitter_s {
132 /* simple token classes */
133 /* Order and hex values are very important!!! See next_token() */
134 #define TC_SEQSTART 1 /* ( */
135 #define TC_SEQTERM (1 << 1) /* ) */
136 #define TC_REGEXP (1 << 2) /* /.../ */
137 #define TC_OUTRDR (1 << 3) /* | > >> */
138 #define TC_UOPPOST (1 << 4) /* unary postfix operator */
139 #define TC_UOPPRE1 (1 << 5) /* unary prefix operator */
140 #define TC_BINOPX (1 << 6) /* two-opnd operator */
141 #define TC_IN (1 << 7)
142 #define TC_COMMA (1 << 8)
143 #define TC_PIPE (1 << 9) /* input redirection pipe */
144 #define TC_UOPPRE2 (1 << 10) /* unary prefix operator */
145 #define TC_ARRTERM (1 << 11) /* ] */
146 #define TC_GRPSTART (1 << 12) /* { */
147 #define TC_GRPTERM (1 << 13) /* } */
148 #define TC_SEMICOL (1 << 14)
149 #define TC_NEWLINE (1 << 15)
150 #define TC_STATX (1 << 16) /* ctl statement (for, next...) */
151 #define TC_WHILE (1 << 17)
152 #define TC_ELSE (1 << 18)
153 #define TC_BUILTIN (1 << 19)
154 #define TC_GETLINE (1 << 20)
155 #define TC_FUNCDECL (1 << 21) /* `function' `func' */
156 #define TC_BEGIN (1 << 22)
157 #define TC_END (1 << 23)
158 #define TC_EOF (1 << 24)
159 #define TC_VARIABLE (1 << 25)
160 #define TC_ARRAY (1 << 26)
161 #define TC_FUNCTION (1 << 27)
162 #define TC_STRING (1 << 28)
163 #define TC_NUMBER (1 << 29)
165 #define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
167 /* combined token classes */
168 #define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
169 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
170 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION | \
171 TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
173 #define TC_STATEMNT (TC_STATX | TC_WHILE)
174 #define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
176 /* word tokens, cannot mean something else if not expected */
177 #define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN | \
178 TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
180 /* discard newlines after these */
181 #define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM | \
182 TC_BINOP | TC_OPTERM)
184 /* what can expression begin with */
185 #define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
186 /* what can group begin with */
187 #define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
189 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
190 /* operator is inserted between them */
191 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM | \
192 TC_STRING | TC_NUMBER | TC_UOPPOST)
193 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
195 #define OF_RES1 0x010000
196 #define OF_RES2 0x020000
197 #define OF_STR1 0x040000
198 #define OF_STR2 0x080000
199 #define OF_NUM1 0x100000
200 #define OF_CHECKED 0x200000
202 /* combined operator flags */
205 #define xS (OF_RES2 | OF_STR2)
207 #define VV (OF_RES1 | OF_RES2)
208 #define Nx (OF_RES1 | OF_NUM1)
209 #define NV (OF_RES1 | OF_NUM1 | OF_RES2)
210 #define Sx (OF_RES1 | OF_STR1)
211 #define SV (OF_RES1 | OF_STR1 | OF_RES2)
212 #define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
214 #define OPCLSMASK 0xFF00
215 #define OPNMASK 0x007F
217 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
218 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
219 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
221 #define P(x) (x << 24)
222 #define PRIMASK 0x7F000000
223 #define PRIMASK2 0x7E000000
225 /* Operation classes */
227 #define SHIFT_TIL_THIS 0x0600
228 #define RECUR_FROM_THIS 0x1000
231 OC_DELETE=0x0100, OC_EXEC=0x0200, OC_NEWSOURCE=0x0300,
232 OC_PRINT=0x0400, OC_PRINTF=0x0500, OC_WALKINIT=0x0600,
234 OC_BR=0x0700, OC_BREAK=0x0800, OC_CONTINUE=0x0900,
235 OC_EXIT=0x0a00, OC_NEXT=0x0b00, OC_NEXTFILE=0x0c00,
236 OC_TEST=0x0d00, OC_WALKNEXT=0x0e00,
238 OC_BINARY=0x1000, OC_BUILTIN=0x1100, OC_COLON=0x1200,
239 OC_COMMA=0x1300, OC_COMPARE=0x1400, OC_CONCAT=0x1500,
240 OC_FBLTIN=0x1600, OC_FIELD=0x1700, OC_FNARG=0x1800,
241 OC_FUNC=0x1900, OC_GETLINE=0x1a00, OC_IN=0x1b00,
242 OC_LAND=0x1c00, OC_LOR=0x1d00, OC_MATCH=0x1e00,
243 OC_MOVE=0x1f00, OC_PGETLINE=0x2000, OC_REGEXP=0x2100,
244 OC_REPLACE=0x2200, OC_RETURN=0x2300, OC_SPRINTF=0x2400,
245 OC_TERNARY=0x2500, OC_UNARY=0x2600, OC_VAR=0x2700,
248 ST_IF=0x3000, ST_DO=0x3100, ST_FOR=0x3200,
252 /* simple builtins */
254 F_in=0, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
255 F_ti, F_le, F_sy, F_ff, F_cl
260 B_a2=0, B_ix, B_ma, B_sp, B_ss, B_ti, B_lo, B_up,
264 /* tokens and their corresponding info values */
266 #define NTC "\377" /* switch to next token class (tc<<1) */
269 #define OC_B OC_BUILTIN
271 static char * const tokenlist =
274 "\1/" NTC /* REGEXP */
275 "\2>>" "\1>" "\1|" NTC /* OUTRDR */
276 "\2++" "\2--" NTC /* UOPPOST */
277 "\2++" "\2--" "\1$" NTC /* UOPPRE1 */
278 "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */
279 "\2*=" "\2/=" "\2%=" "\2^="
280 "\1+" "\1-" "\3**=" "\2**"
281 "\1/" "\1%" "\1^" "\1*"
282 "\2!=" "\2>=" "\2<=" "\1>"
283 "\1<" "\2!~" "\1~" "\2&&"
284 "\2||" "\1?" "\1:" NTC
288 "\1+" "\1-" "\1!" NTC /* UOPPRE2 */
294 "\2if" "\2do" "\3for" "\5break" /* STATX */
295 "\10continue" "\6delete" "\5print"
296 "\6printf" "\4next" "\10nextfile"
297 "\6return" "\4exit" NTC
301 "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */
302 "\3cos" "\3exp" "\3int" "\3log"
303 "\4rand" "\3sin" "\4sqrt" "\5srand"
304 "\6gensub" "\4gsub" "\5index" "\6length"
305 "\5match" "\5split" "\7sprintf" "\3sub"
306 "\6substr" "\7systime" "\10strftime"
307 "\7tolower" "\7toupper" NTC
309 "\4func" "\10function" NTC
314 static const uint32_t tokeninfo[] = {
319 xS|'a', xS|'w', xS|'|',
320 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
321 OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M',
323 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74),
324 OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
325 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/',
326 OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
327 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-',
328 OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
329 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%',
330 OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
331 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3,
332 OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
333 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!',
334 OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
335 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?',
336 OC_COLON|xx|P(67)|':',
339 OC_PGETLINE|SV|P(37),
340 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-',
341 OC_UNARY|xV|P(19)|'!',
347 ST_IF, ST_DO, ST_FOR, OC_BREAK,
348 OC_CONTINUE, OC_DELETE|Vx, OC_PRINT,
349 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
350 OC_RETURN|Vx, OC_EXIT|Nx,
354 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
355 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
356 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
357 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
358 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
359 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b),
360 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
367 /* internal variable names and their initial values */
368 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
370 CONVFMT=0, OFMT, FS, OFS,
371 ORS, RS, RT, FILENAME,
372 SUBSEP, ARGIND, ARGC, ARGV,
375 ENVIRON, F0, _intvarcount_
378 static char * vNames =
379 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
380 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
381 "SUBSEP\0" "ARGIND\0" "ARGC\0" "ARGV\0"
383 "NR\0" "NF\0*" "IGNORECASE\0*"
384 "ENVIRON\0" "$\0*" "\0";
386 static char * vValues =
387 "%.6g\0" "%.6g\0" " \0" " \0"
388 "\n\0" "\n\0" "\0" "\0"
392 /* hash size may grow to these values */
393 #define FIRST_PRIME 61;
394 static const unsigned int PRIMES[] = { 251, 1021, 4093, 16381, 65521 };
395 enum { NPRIMES = sizeof(PRIMES) / sizeof(unsigned int) };
399 extern char **environ;
401 static var * V[_intvarcount_];
402 static chain beginseq, mainseq, endseq, *seq;
403 static int nextrec, nextfile;
404 static node *break_ptr, *continue_ptr;
406 static xhash *vhash, *ahash, *fdhash, *fnhash;
407 static char *programname;
409 static int is_f0_split;
412 static tsplitter fsplitter, rsplitter;
428 /* function prototypes */
429 static void handle_special(var *);
430 static node *parse_expr(uint32_t);
431 static void chain_group(void);
432 static var *evaluate(node *, var *);
433 static rstream *next_input_file(void);
434 static int fmt_num(char *, int, const char *, double, int);
435 static int awk_exit(int) ATTRIBUTE_NORETURN;
437 /* ---- error handling ---- */
439 static const char EMSG_INTERNAL_ERROR[] = "Internal error";
440 static const char EMSG_UNEXP_EOS[] = "Unexpected end of string";
441 static const char EMSG_UNEXP_TOKEN[] = "Unexpected token";
442 static const char EMSG_DIV_BY_ZERO[] = "Division by zero";
443 static const char EMSG_INV_FMT[] = "Invalid format specifier";
444 static const char EMSG_TOO_FEW_ARGS[] = "Too few arguments for builtin";
445 static const char EMSG_NOT_ARRAY[] = "Not an array";
446 static const char EMSG_POSSIBLE_ERROR[] = "Possible syntax error";
447 static const char EMSG_UNDEF_FUNC[] = "Call to undefined function";
448 #ifndef CONFIG_FEATURE_AWK_MATH
449 static const char EMSG_NO_MATH[] = "Math support is not compiled in";
452 static void syntax_error(const char * const message) ATTRIBUTE_NORETURN;
453 static void syntax_error(const char * const message)
455 bb_error_msg_and_die("%s:%i: %s", programname, lineno, message);
458 #define runtime_error(x) syntax_error(x)
461 /* ---- hash stuff ---- */
463 static unsigned int hashidx(const char *name)
465 register unsigned int idx=0;
467 while (*name) idx = *name++ + (idx << 6) - idx;
471 /* create new hash */
472 static xhash *hash_init(void)
476 newhash = (xhash *)xcalloc(1, sizeof(xhash));
477 newhash->csize = FIRST_PRIME;
478 newhash->items = (hash_item **)xcalloc(newhash->csize, sizeof(hash_item *));
483 /* find item in hash, return ptr to data, NULL if not found */
484 static void *hash_search(xhash *hash, const char *name)
488 hi = hash->items [ hashidx(name) % hash->csize ];
490 if (strcmp(hi->name, name) == 0)
497 /* grow hash if it becomes too big */
498 static void hash_rebuild(xhash *hash)
500 unsigned int newsize, i, idx;
501 hash_item **newitems, *hi, *thi;
503 if (hash->nprime == NPRIMES)
506 newsize = PRIMES[hash->nprime++];
507 newitems = (hash_item **)xcalloc(newsize, sizeof(hash_item *));
509 for (i=0; i<hash->csize; i++) {
514 idx = hashidx(thi->name) % newsize;
515 thi->next = newitems[idx];
521 hash->csize = newsize;
522 hash->items = newitems;
525 /* find item in hash, add it if necessary. Return ptr to data */
526 static void *hash_find(xhash *hash, const char *name)
532 hi = hash_search(hash, name);
534 if (++hash->nel / hash->csize > 10)
537 l = strlen(name) + 1;
538 hi = xcalloc(sizeof(hash_item) + l, 1);
539 memcpy(hi->name, name, l);
541 idx = hashidx(name) % hash->csize;
542 hi->next = hash->items[idx];
543 hash->items[idx] = hi;
549 #define findvar(hash, name) (var *) hash_find ( (hash) , (name) )
550 #define newvar(name) (var *) hash_find ( vhash , (name) )
551 #define newfile(name) (rstream *) hash_find ( fdhash , (name) )
552 #define newfunc(name) (func *) hash_find ( fnhash , (name) )
554 static void hash_remove(xhash *hash, const char *name)
556 hash_item *hi, **phi;
558 phi = &(hash->items[ hashidx(name) % hash->csize ]);
561 if (strcmp(hi->name, name) == 0) {
562 hash->glen -= (strlen(name) + 1);
572 /* ------ some useful functions ------ */
574 static void skip_spaces(char **s)
576 register char *p = *s;
578 while(*p == ' ' || *p == '\t' ||
579 (*p == '\\' && *(p+1) == '\n' && (++p, ++t.lineno))) {
585 static char *nextword(char **s)
587 register char *p = *s;
594 static char nextchar(char **s)
596 register char c, *pps;
600 if (c == '\\') c = bb_process_escape_sequence((const char**)s);
601 if (c == '\\' && *s == pps) c = *((*s)++);
605 static inline int isalnum_(int c)
607 return (isalnum(c) || c == '_');
610 static FILE *afopen(const char *path, const char *mode)
612 return (*path == '-' && *(path+1) == '\0') ? stdin : bb_xfopen(path, mode);
615 /* -------- working with variables (set/get/copy/etc) -------- */
617 static xhash *iamarray(var *v)
621 while (a->type & VF_CHILD)
624 if (! (a->type & VF_ARRAY)) {
626 a->x.array = hash_init();
631 static void clear_array(xhash *array)
636 for (i=0; i<array->csize; i++) {
637 hi = array->items[i];
641 free(thi->data.v.string);
644 array->items[i] = NULL;
646 array->glen = array->nel = 0;
649 /* clear a variable */
650 static var *clrvar(var *v)
652 if (!(v->type & VF_FSTR))
655 v->type &= VF_DONTTOUCH;
661 /* assign string value to variable */
662 static var *setvar_p(var *v, char *value)
671 /* same as setvar_p but make a copy of string */
672 static var *setvar_s(var *v, const char *value)
674 return setvar_p(v, (value && *value) ? bb_xstrdup(value) : NULL);
677 /* same as setvar_s but set USER flag */
678 static var *setvar_u(var *v, const char *value)
685 /* set array element to user string */
686 static void setari_u(var *a, int idx, const char *s)
689 static char sidx[12];
691 sprintf(sidx, "%d", idx);
692 v = findvar(iamarray(a), sidx);
696 /* assign numeric value to variable */
697 static var *setvar_i(var *v, double value)
700 v->type |= VF_NUMBER;
706 static char *getvar_s(var *v)
708 /* if v is numeric and has no cached string, convert it to string */
709 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
710 fmt_num(buf, MAXVARFMT, getvar_s(V[CONVFMT]), v->number, TRUE);
711 v->string = bb_xstrdup(buf);
712 v->type |= VF_CACHED;
714 return (v->string == NULL) ? "" : v->string;
717 static double getvar_i(var *v)
721 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
725 v->number = strtod(s, &s);
726 if (v->type & VF_USER) {
734 v->type |= VF_CACHED;
739 static var *copyvar(var *dest, const var *src)
743 dest->type |= (src->type & ~VF_DONTTOUCH);
744 dest->number = src->number;
746 dest->string = bb_xstrdup(src->string);
748 handle_special(dest);
752 static var *incvar(var *v)
754 return setvar_i(v, getvar_i(v)+1.);
757 /* return true if v is number or numeric string */
758 static int is_numeric(var *v)
761 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
764 /* return 1 when value of v corresponds to true, 0 otherwise */
765 static int istrue(var *v)
768 return (v->number == 0) ? 0 : 1;
770 return (v->string && *(v->string)) ? 1 : 0;
773 /* temporary variables allocator. Last allocated should be first freed */
774 static var *nvalloc(int n)
782 if ((cb->pos - cb->nv) + n <= cb->size) break;
787 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
788 cb = (nvblock *)xmalloc(sizeof(nvblock) + size * sizeof(var));
793 if (pb) pb->next = cb;
799 while (v < cb->pos) {
808 static void nvfree(var *v)
812 if (v < cb->nv || v >= cb->pos)
813 runtime_error(EMSG_INTERNAL_ERROR);
815 for (p=v; p<cb->pos; p++) {
816 if ((p->type & (VF_ARRAY|VF_CHILD)) == VF_ARRAY) {
817 clear_array(iamarray(p));
818 free(p->x.array->items);
821 if (p->type & VF_WALK)
828 while (cb->prev && cb->pos == cb->nv) {
833 /* ------- awk program text parsing ------- */
835 /* Parse next token pointed by global pos, place results into global t.
836 * If token isn't expected, give away. Return token class
838 static uint32_t next_token(uint32_t expected)
845 static int concat_inserted;
846 static uint32_t save_tclass, save_info;
847 static uint32_t ltclass = TC_OPTERM;
853 } else if (concat_inserted) {
855 concat_inserted = FALSE;
856 t.tclass = save_tclass;
867 while (*p != '\n' && *p != '\0') p++;
875 } else if (*p == '\"') {
879 if (*p == '\0' || *p == '\n')
880 syntax_error(EMSG_UNEXP_EOS);
881 *(s++) = nextchar(&p);
887 } else if ((expected & TC_REGEXP) && *p == '/') {
891 if (*p == '\0' || *p == '\n')
892 syntax_error(EMSG_UNEXP_EOS);
893 if ((*s++ = *p++) == '\\') {
895 *(s-1) = bb_process_escape_sequence((const char **)&p);
896 if (*pp == '\\') *s++ = '\\';
897 if (p == pp) *s++ = *p++;
904 } else if (*p == '.' || isdigit(*p)) {
906 t.number = strtod(p, &p);
908 syntax_error(EMSG_UNEXP_TOKEN);
912 /* search for something known */
922 /* if token class is expected, token
923 * matches and it's not a longer word,
924 * then this is what we are looking for
926 if ((tc & (expected | TC_WORD | TC_NEWLINE)) &&
927 *tl == *p && strncmp(p, tl, l) == 0 &&
928 !((tc & TC_WORD) && isalnum_(*(p + l)))) {
938 /* it's a name (var/array/function),
939 * otherwise it's something wrong
942 syntax_error(EMSG_UNEXP_TOKEN);
945 while(isalnum_(*(++p))) {
950 /* also consume whitespace between functionname and bracket */
951 if (! (expected & TC_VARIABLE)) skip_spaces(&p);
964 /* skipping newlines in some cases */
965 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
968 /* insert concatenation operator when needed */
969 if ((ltclass&TC_CONCAT1) && (tc&TC_CONCAT2) && (expected&TC_BINOP)) {
970 concat_inserted = TRUE;
974 t.info = OC_CONCAT | SS | P(35);
981 /* Are we ready for this? */
982 if (! (ltclass & expected))
983 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
984 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
989 static void rollback_token(void) { t.rollback = TRUE; }
991 static node *new_node(uint32_t info)
995 n = (node *)xcalloc(sizeof(node), 1);
1001 static node *mk_re_node(char *s, node *n, regex_t *re)
1003 n->info = OC_REGEXP;
1006 xregcomp(re, s, REG_EXTENDED);
1007 xregcomp(re+1, s, REG_EXTENDED | REG_ICASE);
1012 static node *condition(void)
1014 next_token(TC_SEQSTART);
1015 return parse_expr(TC_SEQTERM);
1018 /* parse expression terminated by given argument, return ptr
1019 * to built subtree. Terminator is eaten by parse_expr */
1020 static node *parse_expr(uint32_t iexp)
1029 sn.r.n = glptr = NULL;
1030 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1032 while (! ((tc = next_token(xtc)) & iexp)) {
1033 if (glptr && (t.info == (OC_COMPARE|VV|P(39)|2))) {
1034 /* input redirection (<) attached to glptr node */
1035 cn = glptr->l.n = new_node(OC_CONCAT|SS|P(37));
1037 xtc = TC_OPERAND | TC_UOPPRE;
1040 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1041 /* for binary and postfix-unary operators, jump back over
1042 * previous operators with higher priority */
1044 while ( ((t.info & PRIMASK) > (vn->a.n->info & PRIMASK2)) ||
1045 ((t.info == vn->info) && ((t.info & OPCLSMASK) == OC_COLON)) )
1047 if ((t.info & OPCLSMASK) == OC_TERNARY)
1049 cn = vn->a.n->r.n = new_node(t.info);
1051 if (tc & TC_BINOP) {
1053 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1054 if ((t.info & OPCLSMASK) == OC_PGETLINE) {
1056 next_token(TC_GETLINE);
1057 /* give maximum priority to this pipe */
1058 cn->info &= ~PRIMASK;
1059 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1063 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1068 /* for operands and prefix-unary operators, attach them
1071 cn = vn->r.n = new_node(t.info);
1073 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1074 if (tc & (TC_OPERAND | TC_REGEXP)) {
1075 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1076 /* one should be very careful with switch on tclass -
1077 * only simple tclasses should be used! */
1082 if ((v = hash_search(ahash, t.string)) != NULL) {
1083 cn->info = OC_FNARG;
1084 cn->l.i = v->x.aidx;
1086 cn->l.v = newvar(t.string);
1088 if (tc & TC_ARRAY) {
1090 cn->r.n = parse_expr(TC_ARRTERM);
1097 v = cn->l.v = xcalloc(sizeof(var), 1);
1099 setvar_i(v, t.number);
1101 setvar_s(v, t.string);
1105 mk_re_node(t.string, cn,
1106 (regex_t *)xcalloc(sizeof(regex_t),2));
1111 cn->r.f = newfunc(t.string);
1112 cn->l.n = condition();
1116 cn = vn->r.n = parse_expr(TC_SEQTERM);
1122 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1126 cn->l.n = condition();
1135 /* add node to chain. Return ptr to alloc'd node */
1136 static node *chain_node(uint32_t info)
1141 seq->first = seq->last = new_node(0);
1143 if (seq->programname != programname) {
1144 seq->programname = programname;
1145 n = chain_node(OC_NEWSOURCE);
1146 n->l.s = bb_xstrdup(programname);
1151 seq->last = n->a.n = new_node(OC_DONE);
1156 static void chain_expr(uint32_t info)
1160 n = chain_node(info);
1161 n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1162 if (t.tclass & TC_GRPTERM)
1166 static node *chain_loop(node *nn)
1168 node *n, *n2, *save_brk, *save_cont;
1170 save_brk = break_ptr;
1171 save_cont = continue_ptr;
1173 n = chain_node(OC_BR | Vx);
1174 continue_ptr = new_node(OC_EXEC);
1175 break_ptr = new_node(OC_EXEC);
1177 n2 = chain_node(OC_EXEC | Vx);
1180 continue_ptr->a.n = n2;
1181 break_ptr->a.n = n->r.n = seq->last;
1183 continue_ptr = save_cont;
1184 break_ptr = save_brk;
1189 /* parse group and attach it to chain */
1190 static void chain_group(void)
1196 c = next_token(TC_GRPSEQ);
1197 } while (c & TC_NEWLINE);
1199 if (c & TC_GRPSTART) {
1200 while(next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1201 if (t.tclass & TC_NEWLINE) continue;
1205 } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1207 chain_expr(OC_EXEC | Vx);
1208 } else { /* TC_STATEMNT */
1209 switch (t.info & OPCLSMASK) {
1211 n = chain_node(OC_BR | Vx);
1212 n->l.n = condition();
1214 n2 = chain_node(OC_EXEC);
1216 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE)==TC_ELSE) {
1218 n2->a.n = seq->last;
1226 n = chain_loop(NULL);
1231 n2 = chain_node(OC_EXEC);
1232 n = chain_loop(NULL);
1234 next_token(TC_WHILE);
1235 n->l.n = condition();
1239 next_token(TC_SEQSTART);
1240 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1241 if (t.tclass & TC_SEQTERM) { /* for-in */
1242 if ((n2->info & OPCLSMASK) != OC_IN)
1243 syntax_error(EMSG_UNEXP_TOKEN);
1244 n = chain_node(OC_WALKINIT | VV);
1247 n = chain_loop(NULL);
1248 n->info = OC_WALKNEXT | Vx;
1250 } else { /* for(;;) */
1251 n = chain_node(OC_EXEC | Vx);
1253 n2 = parse_expr(TC_SEMICOL);
1254 n3 = parse_expr(TC_SEQTERM);
1264 n = chain_node(t.info);
1265 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1266 if (t.tclass & TC_OUTRDR) {
1268 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1270 if (t.tclass & TC_GRPTERM)
1275 n = chain_node(OC_EXEC);
1280 n = chain_node(OC_EXEC);
1281 n->a.n = continue_ptr;
1284 /* delete, next, nextfile, return, exit */
1292 static void parse_program(char *p)
1301 while((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1302 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1304 if (tclass & TC_OPTERM)
1308 if (tclass & TC_BEGIN) {
1312 } else if (tclass & TC_END) {
1316 } else if (tclass & TC_FUNCDECL) {
1317 next_token(TC_FUNCTION);
1319 f = newfunc(t.string);
1320 f->body.first = NULL;
1322 while(next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1323 v = findvar(ahash, t.string);
1324 v->x.aidx = (f->nargs)++;
1326 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1333 } else if (tclass & TC_OPSEQ) {
1335 cn = chain_node(OC_TEST);
1336 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1337 if (t.tclass & TC_GRPSTART) {
1341 chain_node(OC_PRINT);
1343 cn->r.n = mainseq.last;
1345 } else /* if (tclass & TC_GRPSTART) */ {
1353 /* -------- program execution part -------- */
1355 static node *mk_splitter(char *s, tsplitter *spl)
1357 register regex_t *re, *ire;
1363 if ((n->info && OPCLSMASK) == OC_REGEXP) {
1367 if (strlen(s) > 1) {
1368 mk_re_node(s, n, re);
1370 n->info = (uint32_t) *s;
1376 /* use node as a regular expression. Supplied with node ptr and regex_t
1377 * storage space. Return ptr to regex (if result points to preg, it should
1378 * be later regfree'd manually
1380 static regex_t *as_regex(node *op, regex_t *preg)
1385 if ((op->info & OPCLSMASK) == OC_REGEXP) {
1386 return icase ? op->r.ire : op->l.re;
1389 s = getvar_s(evaluate(op, v));
1390 xregcomp(preg, s, icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED);
1396 /* gradually increasing buffer */
1397 static void qrealloc(char **b, int n, int *size)
1399 if (! *b || n >= *size)
1400 *b = xrealloc(*b, *size = n + (n>>1) + 80);
1403 /* resize field storage space */
1404 static void fsrealloc(int size)
1406 static int maxfields = 0;
1409 if (size >= maxfields) {
1411 maxfields = size + 16;
1412 Fields = (var *)xrealloc(Fields, maxfields * sizeof(var));
1413 for (; i<maxfields; i++) {
1414 Fields[i].type = VF_SPECIAL;
1415 Fields[i].string = NULL;
1419 if (size < nfields) {
1420 for (i=size; i<nfields; i++) {
1427 static int awk_split(char *s, node *spl, char **slist)
1432 regmatch_t pmatch[2];
1434 /* in worst case, each char would be a separate field */
1435 *slist = s1 = bb_xstrndup(s, strlen(s) * 2 + 3);
1437 c[0] = c[1] = (char)spl->info;
1439 if (*getvar_s(V[RS]) == '\0') c[2] = '\n';
1441 if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */
1443 l = strcspn(s, c+2);
1444 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0 &&
1445 pmatch[0].rm_so <= l) {
1446 l = pmatch[0].rm_so;
1447 if (pmatch[0].rm_eo == 0) { l++; pmatch[0].rm_eo++; }
1449 pmatch[0].rm_eo = l;
1450 if (*(s+l)) pmatch[0].rm_eo++;
1456 s += pmatch[0].rm_eo;
1459 } else if (c[0] == '\0') { /* null split */
1465 } else if (c[0] != ' ') { /* single-character split */
1467 c[0] = toupper(c[0]);
1468 c[1] = tolower(c[1]);
1471 while ((s1 = strpbrk(s1, c))) {
1475 } else { /* space split */
1477 while (isspace(*s)) s++;
1480 while (*s && !isspace(*s))
1488 static void split_f0(void)
1490 static char *fstrings = NULL;
1500 n = awk_split(getvar_s(V[F0]), &fsplitter.n, &fstrings);
1503 for (i=0; i<n; i++) {
1504 Fields[i].string = nextword(&s);
1505 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1508 /* set NF manually to avoid side effects */
1510 V[NF]->type = VF_NUMBER | VF_SPECIAL;
1511 V[NF]->number = nfields;
1514 /* perform additional actions when some internal variables changed */
1515 static void handle_special(var *v)
1519 int sl, l, len, i, bsize;
1521 if (! (v->type & VF_SPECIAL))
1525 n = (int)getvar_i(v);
1528 /* recalculate $0 */
1529 sep = getvar_s(V[OFS]);
1533 for (i=0; i<n; i++) {
1534 s = getvar_s(&Fields[i]);
1537 memcpy(b+len, sep, sl);
1540 qrealloc(&b, len+l+sl, &bsize);
1541 memcpy(b+len, s, l);
1544 if (b) b[len] = '\0';
1548 } else if (v == V[F0]) {
1549 is_f0_split = FALSE;
1551 } else if (v == V[FS]) {
1552 mk_splitter(getvar_s(v), &fsplitter);
1554 } else if (v == V[RS]) {
1555 mk_splitter(getvar_s(v), &rsplitter);
1557 } else if (v == V[IGNORECASE]) {
1561 n = getvar_i(V[NF]);
1562 setvar_i(V[NF], n > v-Fields ? n : v-Fields+1);
1563 /* right here v is invalid. Just to note... */
1567 /* step through func/builtin/etc arguments */
1568 static node *nextarg(node **pn)
1573 if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1582 static void hashwalk_init(var *v, xhash *array)
1588 if (v->type & VF_WALK)
1592 w = v->x.walker = (char **)xcalloc(2 + 2*sizeof(char *) + array->glen, 1);
1593 *w = *(w+1) = (char *)(w + 2);
1594 for (i=0; i<array->csize; i++) {
1595 hi = array->items[i];
1597 strcpy(*w, hi->name);
1604 static int hashwalk_next(var *v)
1612 setvar_s(v, nextword(w+1));
1616 /* evaluate node, return 1 when result is true, 0 otherwise */
1617 static int ptest(node *pattern)
1620 return istrue(evaluate(pattern, &v));
1623 /* read next record from stream rsm into a variable v */
1624 static int awk_getline(rstream *rsm, var *v)
1627 regmatch_t pmatch[2];
1628 int a, p, pp=0, size;
1629 int fd, so, eo, r, rp;
1632 /* we're using our own buffer since we need access to accumulating
1635 fd = fileno(rsm->F);
1640 c = (char) rsplitter.n.info;
1643 if (! m) qrealloc(&m, 256, &size);
1649 if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1650 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1651 b, 1, pmatch, 0) == 0) {
1652 so = pmatch[0].rm_so;
1653 eo = pmatch[0].rm_eo;
1657 } else if (c != '\0') {
1658 s = strchr(b+pp, c);
1659 if (! s) s = memchr(b+pp, '\0', p - pp);
1666 while (b[rp] == '\n')
1668 s = strstr(b+rp, "\n\n");
1671 while (b[eo] == '\n') eo++;
1679 memmove(m, (const void *)(m+a), p+1);
1684 qrealloc(&m, a+p+128, &size);
1687 p += safe_read(fd, b+p, size-p-1);
1691 setvar_i(V[ERRNO], errno);
1700 c = b[so]; b[so] = '\0';
1704 c = b[eo]; b[eo] = '\0';
1705 setvar_s(V[RT], b+so);
1717 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1721 const char *s=format;
1723 if (int_as_int && n == (int)n) {
1724 r = snprintf(b, size, "%d", (int)n);
1726 do { c = *s; } while (*s && *++s);
1727 if (strchr("diouxX", c)) {
1728 r = snprintf(b, size, format, (int)n);
1729 } else if (strchr("eEfgG", c)) {
1730 r = snprintf(b, size, format, n);
1732 runtime_error(EMSG_INV_FMT);
1739 /* formatted output into an allocated buffer, return ptr to buffer */
1740 static char *awk_printf(node *n)
1743 char *fmt, *s, *s1, *f;
1744 int i, j, incr, bsize;
1749 fmt = f = bb_xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1754 while (*f && (*f != '%' || *(++f) == '%'))
1756 while (*f && !isalpha(*f))
1759 incr = (f - s) + MAXVARFMT;
1760 qrealloc(&b, incr+i, &bsize);
1761 c = *f; if (c != '\0') f++;
1762 c1 = *f ; *f = '\0';
1763 arg = evaluate(nextarg(&n), v);
1766 if (c == 'c' || !c) {
1767 i += sprintf(b+i, s,
1768 is_numeric(arg) ? (char)getvar_i(arg) : *getvar_s(arg));
1770 } else if (c == 's') {
1772 qrealloc(&b, incr+i+strlen(s1), &bsize);
1773 i += sprintf(b+i, s, s1);
1776 i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1780 /* if there was an error while sprintf, return value is negative */
1785 b = xrealloc(b, i+1);
1792 /* common substitution routine
1793 * replace (nm) substring of (src) that match (n) with (repl), store
1794 * result into (dest), return number of substitutions. If nm=0, replace
1795 * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1796 * subexpression matching (\1-\9)
1798 static int awk_sub(node *rn, char *repl, int nm, var *src, var *dest, int ex)
1802 int c, i, j, di, rl, so, eo, nbs, n, dssize;
1803 regmatch_t pmatch[10];
1806 re = as_regex(rn, &sreg);
1807 if (! src) src = V[F0];
1808 if (! dest) dest = V[F0];
1813 while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0:REG_NOTBOL) == 0) {
1814 so = pmatch[0].rm_so;
1815 eo = pmatch[0].rm_eo;
1817 qrealloc(&ds, di + eo + rl, &dssize);
1818 memcpy(ds + di, sp, eo);
1824 for (s = repl; *s; s++) {
1830 if (c == '&' || (ex && c >= '0' && c <= '9')) {
1831 di -= ((nbs + 3) >> 1);
1840 n = pmatch[j].rm_eo - pmatch[j].rm_so;
1841 qrealloc(&ds, di + rl + n, &dssize);
1842 memcpy(ds + di, sp + pmatch[j].rm_so, n);
1853 if (! (ds[di++] = *sp++)) break;
1857 qrealloc(&ds, di + strlen(sp), &dssize);
1858 strcpy(ds + di, sp);
1860 if (re == &sreg) regfree(re);
1864 static var *exec_builtin(node *op, var *res)
1871 regmatch_t pmatch[2];
1873 static tsplitter tspl;
1882 isr = info = op->info;
1885 av[2] = av[3] = NULL;
1886 for (i=0 ; i<4 && op ; i++) {
1887 an[i] = nextarg(&op);
1888 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
1889 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
1894 if (nargs < (info >> 30))
1895 runtime_error(EMSG_TOO_FEW_ARGS);
1897 switch (info & OPNMASK) {
1900 #ifdef CONFIG_FEATURE_AWK_MATH
1901 setvar_i(res, atan2(getvar_i(av[i]), getvar_i(av[1])));
1903 runtime_error(EMSG_NO_MATH);
1909 spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
1910 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
1915 n = awk_split(as[0], spl, &s);
1917 clear_array(iamarray(av[1]));
1918 for (i=1; i<=n; i++)
1919 setari_u(av[1], i, nextword(&s1));
1926 i = getvar_i(av[1]) - 1;
1927 if (i>l) i=l; if (i<0) i=0;
1928 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
1931 strncpy(s, as[0]+i, n);
1943 s1 = s = bb_xstrdup(as[0]);
1945 *s1 = (*to_xxx)(*s1);
1954 l = strlen(as[0]) - ll;
1955 if (ll > 0 && l >= 0) {
1957 s = strstr(as[0], as[1]);
1958 if (s) n = (s - as[0]) + 1;
1960 /* this piece of code is terribly slow and
1961 * really should be rewritten
1963 for (i=0; i<=l; i++) {
1964 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
1976 tt = getvar_i(av[1]);
1979 s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
1980 i = strftime(buf, MAXVARFMT, s, localtime(&tt));
1986 re = as_regex(an[1], &sreg);
1987 n = regexec(re, as[0], 1, pmatch, 0);
1992 pmatch[0].rm_so = 0;
1993 pmatch[0].rm_eo = -1;
1995 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
1996 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
1997 setvar_i(res, pmatch[0].rm_so);
1998 if (re == &sreg) regfree(re);
2002 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2006 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2010 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2019 * Evaluate node - the heart of the program. Supplied with subtree
2020 * and place where to store result. returns ptr to result.
2022 #define XC(n) ((n) >> 8)
2024 static var *evaluate(node *op, var *res)
2026 /* This procedure is recursive so we should count every byte */
2027 static var *fnargs = NULL;
2028 static unsigned int seed = 1;
2029 static regex_t sreg;
2050 return setvar_s(res, NULL);
2057 opn = (short)(opinfo & OPNMASK);
2058 lineno = op->lineno;
2060 /* execute inevitable things */
2062 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2063 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2064 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2065 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2066 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2068 switch (XC(opinfo & OPCLSMASK)) {
2070 /* -- iterative node type -- */
2074 if ((op1->info & OPCLSMASK) == OC_COMMA) {
2075 /* it's range pattern */
2076 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2077 op->info |= OF_CHECKED;
2078 if (ptest(op1->r.n))
2079 op->info &= ~OF_CHECKED;
2086 op = (ptest(op1)) ? op->a.n : op->r.n;
2090 /* just evaluate an expression, also used as unconditional jump */
2094 /* branch, used in if-else and various loops */
2096 op = istrue(L.v) ? op->a.n : op->r.n;
2099 /* initialize for-in loop */
2100 case XC( OC_WALKINIT ):
2101 hashwalk_init(L.v, iamarray(R.v));
2104 /* get next array item */
2105 case XC( OC_WALKNEXT ):
2106 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2109 case XC( OC_PRINT ):
2110 case XC( OC_PRINTF ):
2113 X.rsm = newfile(R.s);
2116 if((X.rsm->F = popen(R.s, "w")) == NULL)
2117 bb_perror_msg_and_die("popen");
2120 X.rsm->F = bb_xfopen(R.s, opn=='w' ? "w" : "a");
2126 if ((opinfo & OPCLSMASK) == OC_PRINT) {
2128 fputs(getvar_s(V[F0]), X.F);
2131 L.v = evaluate(nextarg(&op1), v1);
2132 if (L.v->type & VF_NUMBER) {
2133 fmt_num(buf, MAXVARFMT, getvar_s(V[OFMT]),
2134 getvar_i(L.v), TRUE);
2137 fputs(getvar_s(L.v), X.F);
2140 if (op1) fputs(getvar_s(V[OFS]), X.F);
2143 fputs(getvar_s(V[ORS]), X.F);
2145 } else { /* OC_PRINTF */
2146 L.s = awk_printf(op1);
2153 case XC( OC_DELETE ):
2154 X.info = op1->info & OPCLSMASK;
2155 if (X.info == OC_VAR) {
2157 } else if (X.info == OC_FNARG) {
2158 R.v = &fnargs[op1->l.i];
2160 runtime_error(EMSG_NOT_ARRAY);
2165 L.s = getvar_s(evaluate(op1->r.n, v1));
2166 hash_remove(iamarray(R.v), L.s);
2168 clear_array(iamarray(R.v));
2172 case XC( OC_NEWSOURCE ):
2173 programname = op->l.s;
2176 case XC( OC_RETURN ):
2180 case XC( OC_NEXTFILE ):
2191 /* -- recursive node type -- */
2199 case XC( OC_FNARG ):
2200 L.v = &fnargs[op->l.i];
2203 res = (op->r.n) ? findvar(iamarray(L.v), R.s) : L.v;
2207 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2210 case XC( OC_REGEXP ):
2212 L.s = getvar_s(V[F0]);
2215 case XC( OC_MATCH ):
2218 X.re = as_regex(op1, &sreg);
2219 R.i = regexec(X.re, L.s, 0, NULL, 0);
2220 if (X.re == &sreg) regfree(X.re);
2221 setvar_i(res, (R.i == 0 ? 1 : 0) ^ (opn == '!' ? 1 : 0));
2225 /* if source is a temporary string, jusk relink it to dest */
2226 if (R.v == v1+1 && R.v->string) {
2227 res = setvar_p(L.v, R.v->string);
2230 res = copyvar(L.v, R.v);
2234 case XC( OC_TERNARY ):
2235 if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2236 runtime_error(EMSG_POSSIBLE_ERROR);
2237 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2241 if (! op->r.f->body.first)
2242 runtime_error(EMSG_UNDEF_FUNC);
2244 X.v = R.v = nvalloc(op->r.f->nargs+1);
2246 L.v = evaluate(nextarg(&op1), v1);
2248 R.v->type |= VF_CHILD;
2249 R.v->x.parent = L.v;
2250 if (++R.v - X.v >= op->r.f->nargs)
2258 res = evaluate(op->r.f->body.first, res);
2265 case XC( OC_GETLINE ):
2266 case XC( OC_PGETLINE ):
2268 X.rsm = newfile(L.s);
2270 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2271 X.rsm->F = popen(L.s, "r");
2272 X.rsm->is_pipe = TRUE;
2274 X.rsm->F = fopen(L.s, "r"); /* not bb_xfopen! */
2278 if (! iF) iF = next_input_file();
2283 setvar_i(V[ERRNO], errno);
2291 L.i = awk_getline(X.rsm, R.v);
2301 /* simple builtins */
2302 case XC( OC_FBLTIN ):
2310 R.d = (double)rand() / (double)RAND_MAX;
2313 #ifdef CONFIG_FEATURE_AWK_MATH
2339 runtime_error(EMSG_NO_MATH);
2345 seed = op1 ? (unsigned int)L.d : (unsigned int)time(NULL);
2355 L.s = getvar_s(V[F0]);
2361 R.d = (L.s && *L.s) ? (system(L.s) >> 8) : 0;
2369 X.rsm = newfile(L.s);
2378 X.rsm = (rstream *)hash_search(fdhash, L.s);
2380 R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
2381 free(X.rsm->buffer);
2382 hash_remove(fdhash, L.s);
2385 setvar_i(V[ERRNO], errno);
2392 case XC( OC_BUILTIN ):
2393 res = exec_builtin(op, res);
2396 case XC( OC_SPRINTF ):
2397 setvar_p(res, awk_printf(op1));
2400 case XC( OC_UNARY ):
2402 L.d = R.d = getvar_i(R.v);
2417 L.d = istrue(X.v) ? 0 : 1;
2428 case XC( OC_FIELD ):
2429 R.i = (int)getvar_i(R.v);
2437 res = &Fields[R.i-1];
2441 /* concatenation (" ") and index joining (",") */
2442 case XC( OC_CONCAT ):
2443 case XC( OC_COMMA ):
2444 opn = strlen(L.s) + strlen(R.s) + 2;
2445 X.s = (char *)xmalloc(opn);
2447 if ((opinfo & OPCLSMASK) == OC_COMMA) {
2448 L.s = getvar_s(V[SUBSEP]);
2449 X.s = (char *)xrealloc(X.s, opn + strlen(L.s));
2457 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2461 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2464 case XC( OC_BINARY ):
2465 case XC( OC_REPLACE ):
2466 R.d = getvar_i(R.v);
2478 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2482 #ifdef CONFIG_FEATURE_AWK_MATH
2483 L.d = pow(L.d, R.d);
2485 runtime_error(EMSG_NO_MATH);
2489 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2490 L.d -= (int)(L.d / R.d) * R.d;
2493 res = setvar_i(((opinfo&OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2496 case XC( OC_COMPARE ):
2497 if (is_numeric(L.v) && is_numeric(R.v)) {
2498 L.d = getvar_i(L.v) - getvar_i(R.v);
2500 L.s = getvar_s(L.v);
2501 R.s = getvar_s(R.v);
2502 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2504 switch (opn & 0xfe) {
2515 setvar_i(res, (opn & 0x1 ? R.i : !R.i) ? 1 : 0);
2519 runtime_error(EMSG_POSSIBLE_ERROR);
2521 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2523 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2533 /* -------- main & co. -------- */
2535 static int awk_exit(int r)
2544 evaluate(endseq.first, &tv);
2547 /* waiting for children */
2548 for (i=0; i<fdhash->csize; i++) {
2549 hi = fdhash->items[i];
2551 if (hi->data.rs.F && hi->data.rs.is_pipe)
2552 pclose(hi->data.rs.F);
2560 /* if expr looks like "var=value", perform assignment and return 1,
2561 * otherwise return 0 */
2562 static int is_assignment(const char *expr)
2564 char *exprc, *s, *s0, *s1;
2566 exprc = bb_xstrdup(expr);
2567 if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2575 *(s1++) = nextchar(&s);
2578 setvar_u(newvar(exprc), s0);
2583 /* switch to next input file */
2584 static rstream *next_input_file(void)
2589 static int files_happen = FALSE;
2591 if (rsm.F) fclose(rsm.F);
2593 rsm.pos = rsm.adv = 0;
2596 if (getvar_i(V[ARGIND])+1 >= getvar_i(V[ARGC])) {
2602 ind = getvar_s(incvar(V[ARGIND]));
2603 fname = getvar_s(findvar(iamarray(V[ARGV]), ind));
2604 if (fname && *fname && !is_assignment(fname))
2605 F = afopen(fname, "r");
2609 files_happen = TRUE;
2610 setvar_s(V[FILENAME], fname);
2615 int awk_main(int argc, char **argv)
2622 static int from_file = FALSE;
2624 FILE *F, *stdfiles[3];
2625 static char * stdnames = "/dev/stdin\0/dev/stdout\0/dev/stderr";
2627 /* allocate global buffer */
2628 buf = xmalloc(MAXVARFMT+1);
2630 vhash = hash_init();
2631 ahash = hash_init();
2632 fdhash = hash_init();
2633 fnhash = hash_init();
2635 /* initialize variables */
2636 for (i=0; *vNames; i++) {
2637 V[i] = v = newvar(nextword(&vNames));
2638 if (*vValues != '\377')
2639 setvar_s(v, nextword(&vValues));
2643 if (*vNames == '*') {
2644 v->type |= VF_SPECIAL;
2649 handle_special(V[FS]);
2650 handle_special(V[RS]);
2652 stdfiles[0] = stdin;
2653 stdfiles[1] = stdout;
2654 stdfiles[2] = stderr;
2655 for (i=0; i<3; i++) {
2656 rsm = newfile(nextword(&stdnames));
2657 rsm->F = stdfiles[i];
2660 for (envp=environ; *envp; envp++) {
2661 s = bb_xstrdup(*envp);
2662 s1 = strchr(s, '=');
2667 setvar_u(findvar(iamarray(V[ENVIRON]), s), s1);
2672 while((c = getopt(argc, argv, "F:v:f:W:")) != EOF) {
2675 setvar_s(V[FS], optarg);
2678 if (! is_assignment(optarg))
2683 F = afopen(programname = optarg, "r");
2685 /* one byte is reserved for some trick in next_token */
2686 if (fseek(F, 0, SEEK_END) == 0) {
2688 s = (char *)xmalloc(flen+4);
2689 fseek(F, 0, SEEK_SET);
2690 i = 1 + fread(s+1, 1, flen, F);
2692 for (i=j=1; j>0; i+=j) {
2693 s = (char *)xrealloc(s, i+4096);
2694 j = fread(s+i, 1, 4094, F);
2703 bb_error_msg("Warning: unrecognized option '-W %s' ignored\n", optarg);
2714 programname="cmd. line";
2715 parse_program(argv[optind++]);
2719 /* fill in ARGV array */
2720 setvar_i(V[ARGC], argc - optind + 1);
2721 setari_u(V[ARGV], 0, "awk");
2722 for(i=optind; i < argc; i++)
2723 setari_u(V[ARGV], i+1-optind, argv[i]);
2725 evaluate(beginseq.first, &tv);
2726 if (! mainseq.first && ! endseq.first)
2727 awk_exit(EXIT_SUCCESS);
2729 /* input file could already be opened in BEGIN block */
2730 if (! iF) iF = next_input_file();
2732 /* passing through input files */
2736 setvar_i(V[FNR], 0);
2738 while ((c = awk_getline(iF, V[F0])) > 0) {
2743 evaluate(mainseq.first, &tv);
2750 runtime_error(strerror(errno));
2752 iF = next_input_file();
2756 awk_exit(EXIT_SUCCESS);