1 /* vi: set sw=4 ts=4: */
3 * awk implementation for busybox
5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
7 * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
14 /* This is a NOEXEC applet. Be very careful! */
21 #define VF_NUMBER 0x0001 /* 1 = primary type is number */
22 #define VF_ARRAY 0x0002 /* 1 = it's an array */
24 #define VF_CACHED 0x0100 /* 1 = num/str value has cached str/num eq */
25 #define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
26 #define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
27 #define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
28 #define VF_FSTR 0x1000 /* 1 = var::string points to fstring buffer */
29 #define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
30 #define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
32 /* these flags are static, don't change them when value is changed */
33 #define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
36 typedef struct var_s {
37 unsigned short type; /* flags */
41 int aidx; /* func arg idx (for compilation stage) */
42 struct xhash_s *array; /* array ptr */
43 struct var_s *parent; /* for func args, ptr to actual parameter */
44 char **walker; /* list of array elements (for..in) */
48 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
49 typedef struct chain_s {
52 const char *programname;
56 typedef struct func_s {
62 typedef struct rstream_s {
68 unsigned short is_pipe;
71 typedef struct hash_item_s {
73 struct var_s v; /* variable/array hash */
74 struct rstream_s rs; /* redirect streams hash */
75 struct func_s f; /* functions hash */
77 struct hash_item_s *next; /* next in chain */
78 char name[1]; /* really it's longer */
81 typedef struct xhash_s {
82 unsigned nel; /* num of elements */
83 unsigned csize; /* current hash size */
84 unsigned nprime; /* next hash size in PRIMES[] */
85 unsigned glen; /* summary length of item names */
86 struct hash_item_s **items;
90 typedef struct node_s {
92 unsigned short lineno;
111 /* Block of temporary variables */
112 typedef struct nvblock_s {
115 struct nvblock_s *prev;
116 struct nvblock_s *next;
120 typedef struct tsplitter_s {
125 /* simple token classes */
126 /* Order and hex values are very important!!! See next_token() */
127 #define TC_SEQSTART 1 /* ( */
128 #define TC_SEQTERM (1 << 1) /* ) */
129 #define TC_REGEXP (1 << 2) /* /.../ */
130 #define TC_OUTRDR (1 << 3) /* | > >> */
131 #define TC_UOPPOST (1 << 4) /* unary postfix operator */
132 #define TC_UOPPRE1 (1 << 5) /* unary prefix operator */
133 #define TC_BINOPX (1 << 6) /* two-opnd operator */
134 #define TC_IN (1 << 7)
135 #define TC_COMMA (1 << 8)
136 #define TC_PIPE (1 << 9) /* input redirection pipe */
137 #define TC_UOPPRE2 (1 << 10) /* unary prefix operator */
138 #define TC_ARRTERM (1 << 11) /* ] */
139 #define TC_GRPSTART (1 << 12) /* { */
140 #define TC_GRPTERM (1 << 13) /* } */
141 #define TC_SEMICOL (1 << 14)
142 #define TC_NEWLINE (1 << 15)
143 #define TC_STATX (1 << 16) /* ctl statement (for, next...) */
144 #define TC_WHILE (1 << 17)
145 #define TC_ELSE (1 << 18)
146 #define TC_BUILTIN (1 << 19)
147 #define TC_GETLINE (1 << 20)
148 #define TC_FUNCDECL (1 << 21) /* `function' `func' */
149 #define TC_BEGIN (1 << 22)
150 #define TC_END (1 << 23)
151 #define TC_EOF (1 << 24)
152 #define TC_VARIABLE (1 << 25)
153 #define TC_ARRAY (1 << 26)
154 #define TC_FUNCTION (1 << 27)
155 #define TC_STRING (1 << 28)
156 #define TC_NUMBER (1 << 29)
158 #define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
160 /* combined token classes */
161 #define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
162 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
163 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION | \
164 TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
166 #define TC_STATEMNT (TC_STATX | TC_WHILE)
167 #define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
169 /* word tokens, cannot mean something else if not expected */
170 #define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN | \
171 TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
173 /* discard newlines after these */
174 #define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM | \
175 TC_BINOP | TC_OPTERM)
177 /* what can expression begin with */
178 #define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
179 /* what can group begin with */
180 #define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
182 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
183 /* operator is inserted between them */
184 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM | \
185 TC_STRING | TC_NUMBER | TC_UOPPOST)
186 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
188 #define OF_RES1 0x010000
189 #define OF_RES2 0x020000
190 #define OF_STR1 0x040000
191 #define OF_STR2 0x080000
192 #define OF_NUM1 0x100000
193 #define OF_CHECKED 0x200000
195 /* combined operator flags */
198 #define xS (OF_RES2 | OF_STR2)
200 #define VV (OF_RES1 | OF_RES2)
201 #define Nx (OF_RES1 | OF_NUM1)
202 #define NV (OF_RES1 | OF_NUM1 | OF_RES2)
203 #define Sx (OF_RES1 | OF_STR1)
204 #define SV (OF_RES1 | OF_STR1 | OF_RES2)
205 #define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
207 #define OPCLSMASK 0xFF00
208 #define OPNMASK 0x007F
210 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
211 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
212 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
214 #define P(x) (x << 24)
215 #define PRIMASK 0x7F000000
216 #define PRIMASK2 0x7E000000
218 /* Operation classes */
220 #define SHIFT_TIL_THIS 0x0600
221 #define RECUR_FROM_THIS 0x1000
224 OC_DELETE=0x0100, OC_EXEC=0x0200, OC_NEWSOURCE=0x0300,
225 OC_PRINT=0x0400, OC_PRINTF=0x0500, OC_WALKINIT=0x0600,
227 OC_BR=0x0700, OC_BREAK=0x0800, OC_CONTINUE=0x0900,
228 OC_EXIT=0x0a00, OC_NEXT=0x0b00, OC_NEXTFILE=0x0c00,
229 OC_TEST=0x0d00, OC_WALKNEXT=0x0e00,
231 OC_BINARY=0x1000, OC_BUILTIN=0x1100, OC_COLON=0x1200,
232 OC_COMMA=0x1300, OC_COMPARE=0x1400, OC_CONCAT=0x1500,
233 OC_FBLTIN=0x1600, OC_FIELD=0x1700, OC_FNARG=0x1800,
234 OC_FUNC=0x1900, OC_GETLINE=0x1a00, OC_IN=0x1b00,
235 OC_LAND=0x1c00, OC_LOR=0x1d00, OC_MATCH=0x1e00,
236 OC_MOVE=0x1f00, OC_PGETLINE=0x2000, OC_REGEXP=0x2100,
237 OC_REPLACE=0x2200, OC_RETURN=0x2300, OC_SPRINTF=0x2400,
238 OC_TERNARY=0x2500, OC_UNARY=0x2600, OC_VAR=0x2700,
241 ST_IF=0x3000, ST_DO=0x3100, ST_FOR=0x3200,
245 /* simple builtins */
247 F_in=0, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
248 F_ti, F_le, F_sy, F_ff, F_cl
253 B_a2=0, B_ix, B_ma, B_sp, B_ss, B_ti, B_lo, B_up,
255 B_an, B_co, B_ls, B_or, B_rs, B_xo,
258 /* tokens and their corresponding info values */
260 #define NTC "\377" /* switch to next token class (tc<<1) */
263 #define OC_B OC_BUILTIN
265 static const char tokenlist[] =
268 "\1/" NTC /* REGEXP */
269 "\2>>" "\1>" "\1|" NTC /* OUTRDR */
270 "\2++" "\2--" NTC /* UOPPOST */
271 "\2++" "\2--" "\1$" NTC /* UOPPRE1 */
272 "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */
273 "\2*=" "\2/=" "\2%=" "\2^="
274 "\1+" "\1-" "\3**=" "\2**"
275 "\1/" "\1%" "\1^" "\1*"
276 "\2!=" "\2>=" "\2<=" "\1>"
277 "\1<" "\2!~" "\1~" "\2&&"
278 "\2||" "\1?" "\1:" NTC
282 "\1+" "\1-" "\1!" NTC /* UOPPRE2 */
288 "\2if" "\2do" "\3for" "\5break" /* STATX */
289 "\10continue" "\6delete" "\5print"
290 "\6printf" "\4next" "\10nextfile"
291 "\6return" "\4exit" NTC
295 "\3and" "\5compl" "\6lshift" "\2or"
297 "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */
298 "\3cos" "\3exp" "\3int" "\3log"
299 "\4rand" "\3sin" "\4sqrt" "\5srand"
300 "\6gensub" "\4gsub" "\5index" "\6length"
301 "\5match" "\5split" "\7sprintf" "\3sub"
302 "\6substr" "\7systime" "\10strftime"
303 "\7tolower" "\7toupper" NTC
305 "\4func" "\10function" NTC
310 static const uint32_t tokeninfo[] = {
314 xS|'a', xS|'w', xS|'|',
315 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
316 OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M',
318 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74),
319 OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
320 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/',
321 OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
322 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-',
323 OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
324 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%',
325 OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
326 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3,
327 OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
328 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!',
329 OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
330 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?',
331 OC_COLON|xx|P(67)|':',
334 OC_PGETLINE|SV|P(37),
335 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-',
336 OC_UNARY|xV|P(19)|'!',
342 ST_IF, ST_DO, ST_FOR, OC_BREAK,
343 OC_CONTINUE, OC_DELETE|Vx, OC_PRINT,
344 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
345 OC_RETURN|Vx, OC_EXIT|Nx,
349 OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
350 OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
351 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
352 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
353 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
354 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
355 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
356 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b),
357 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
364 /* internal variable names and their initial values */
365 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
367 CONVFMT=0, OFMT, FS, OFS,
368 ORS, RS, RT, FILENAME,
369 SUBSEP, ARGIND, ARGC, ARGV,
372 ENVIRON, F0, _intvarcount_
375 static const char vNames[] =
376 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
377 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
378 "SUBSEP\0" "ARGIND\0" "ARGC\0" "ARGV\0"
380 "NR\0" "NF\0*" "IGNORECASE\0*"
381 "ENVIRON\0" "$\0*" "\0";
383 static const char vValues[] =
384 "%.6g\0" "%.6g\0" " \0" " \0"
385 "\n\0" "\n\0" "\0" "\0"
389 /* hash size may grow to these values */
390 #define FIRST_PRIME 61;
391 static const unsigned PRIMES[] = { 251, 1021, 4093, 16381, 65521 };
392 enum { NPRIMES = sizeof(PRIMES) / sizeof(unsigned) };
396 extern char **environ;
398 static var * V[_intvarcount_];
399 static chain beginseq, mainseq, endseq, *seq;
400 static int nextrec, nextfile;
401 static node *break_ptr, *continue_ptr;
403 static xhash *vhash, *ahash, *fdhash, *fnhash;
404 static const char *programname;
406 static int is_f0_split;
409 static tsplitter fsplitter, rsplitter;
425 /* function prototypes */
426 static void handle_special(var *);
427 static node *parse_expr(uint32_t);
428 static void chain_group(void);
429 static var *evaluate(node *, var *);
430 static rstream *next_input_file(void);
431 static int fmt_num(char *, int, const char *, double, int);
432 static int awk_exit(int) ATTRIBUTE_NORETURN;
434 /* ---- error handling ---- */
436 static const char EMSG_INTERNAL_ERROR[] = "Internal error";
437 static const char EMSG_UNEXP_EOS[] = "Unexpected end of string";
438 static const char EMSG_UNEXP_TOKEN[] = "Unexpected token";
439 static const char EMSG_DIV_BY_ZERO[] = "Division by zero";
440 static const char EMSG_INV_FMT[] = "Invalid format specifier";
441 static const char EMSG_TOO_FEW_ARGS[] = "Too few arguments for builtin";
442 static const char EMSG_NOT_ARRAY[] = "Not an array";
443 static const char EMSG_POSSIBLE_ERROR[] = "Possible syntax error";
444 static const char EMSG_UNDEF_FUNC[] = "Call to undefined function";
445 #if !ENABLE_FEATURE_AWK_MATH
446 static const char EMSG_NO_MATH[] = "Math support is not compiled in";
449 static void zero_out_var(var * vp)
451 memset(vp, 0, sizeof(*vp));
454 static void syntax_error(const char * const message) ATTRIBUTE_NORETURN;
455 static void syntax_error(const char * const message)
457 bb_error_msg_and_die("%s:%i: %s", programname, lineno, message);
460 #define runtime_error(x) syntax_error(x)
463 /* ---- hash stuff ---- */
465 static unsigned hashidx(const char *name)
469 while (*name) idx = *name++ + (idx << 6) - idx;
473 /* create new hash */
474 static xhash *hash_init(void)
478 newhash = xzalloc(sizeof(xhash));
479 newhash->csize = FIRST_PRIME;
480 newhash->items = xzalloc(newhash->csize * sizeof(hash_item *));
485 /* find item in hash, return ptr to data, NULL if not found */
486 static void *hash_search(xhash *hash, const char *name)
490 hi = hash->items [ hashidx(name) % hash->csize ];
492 if (strcmp(hi->name, name) == 0)
499 /* grow hash if it becomes too big */
500 static void hash_rebuild(xhash *hash)
502 unsigned newsize, i, idx;
503 hash_item **newitems, *hi, *thi;
505 if (hash->nprime == NPRIMES)
508 newsize = PRIMES[hash->nprime++];
509 newitems = xzalloc(newsize * sizeof(hash_item *));
511 for (i=0; i<hash->csize; i++) {
516 idx = hashidx(thi->name) % newsize;
517 thi->next = newitems[idx];
523 hash->csize = newsize;
524 hash->items = newitems;
527 /* find item in hash, add it if necessary. Return ptr to data */
528 static void *hash_find(xhash *hash, const char *name)
534 hi = hash_search(hash, name);
536 if (++hash->nel / hash->csize > 10)
539 l = strlen(name) + 1;
540 hi = xzalloc(sizeof(hash_item) + l);
541 memcpy(hi->name, name, l);
543 idx = hashidx(name) % hash->csize;
544 hi->next = hash->items[idx];
545 hash->items[idx] = hi;
551 #define findvar(hash, name) ((var*) hash_find((hash) , (name)))
552 #define newvar(name) ((var*) hash_find(vhash , (name)))
553 #define newfile(name) ((rstream*)hash_find(fdhash ,(name)))
554 #define newfunc(name) ((func*) hash_find(fnhash , (name)))
556 static void hash_remove(xhash *hash, const char *name)
558 hash_item *hi, **phi;
560 phi = &(hash->items[ hashidx(name) % hash->csize ]);
563 if (strcmp(hi->name, name) == 0) {
564 hash->glen -= (strlen(name) + 1);
574 /* ------ some useful functions ------ */
576 static void skip_spaces(char **s)
580 while (*p == ' ' || *p == '\t' ||
581 (*p == '\\' && *(p+1) == '\n' && (++p, ++t.lineno))) {
587 static char *nextword(char **s)
591 while (*(*s)++) /* */;
596 static char nextchar(char **s)
602 if (c == '\\') c = bb_process_escape_sequence((const char**)s);
603 if (c == '\\' && *s == pps) c = *((*s)++);
607 static int ATTRIBUTE_ALWAYS_INLINE isalnum_(int c)
609 return (isalnum(c) || c == '_');
612 static FILE *afopen(const char *path, const char *mode)
614 return (*path == '-' && *(path+1) == '\0') ? stdin : xfopen(path, mode);
617 /* -------- working with variables (set/get/copy/etc) -------- */
619 static xhash *iamarray(var *v)
623 while (a->type & VF_CHILD)
626 if (! (a->type & VF_ARRAY)) {
628 a->x.array = hash_init();
633 static void clear_array(xhash *array)
638 for (i=0; i<array->csize; i++) {
639 hi = array->items[i];
643 free(thi->data.v.string);
646 array->items[i] = NULL;
648 array->glen = array->nel = 0;
651 /* clear a variable */
652 static var *clrvar(var *v)
654 if (!(v->type & VF_FSTR))
657 v->type &= VF_DONTTOUCH;
663 /* assign string value to variable */
664 static var *setvar_p(var *v, char *value)
673 /* same as setvar_p but make a copy of string */
674 static var *setvar_s(var *v, const char *value)
676 return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
679 /* same as setvar_s but set USER flag */
680 static var *setvar_u(var *v, const char *value)
687 /* set array element to user string */
688 static void setari_u(var *a, int idx, const char *s)
691 static char sidx[12];
693 sprintf(sidx, "%d", idx);
694 v = findvar(iamarray(a), sidx);
698 /* assign numeric value to variable */
699 static var *setvar_i(var *v, double value)
702 v->type |= VF_NUMBER;
708 static const char *getvar_s(var *v)
710 /* if v is numeric and has no cached string, convert it to string */
711 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
712 fmt_num(buf, MAXVARFMT, getvar_s(V[CONVFMT]), v->number, TRUE);
713 v->string = xstrdup(buf);
714 v->type |= VF_CACHED;
716 return (v->string == NULL) ? "" : v->string;
719 static double getvar_i(var *v)
723 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
727 v->number = strtod(s, &s);
728 if (v->type & VF_USER) {
736 v->type |= VF_CACHED;
741 static var *copyvar(var *dest, const var *src)
745 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
746 dest->number = src->number;
748 dest->string = xstrdup(src->string);
750 handle_special(dest);
754 static var *incvar(var *v)
756 return setvar_i(v, getvar_i(v)+1.);
759 /* return true if v is number or numeric string */
760 static int is_numeric(var *v)
763 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
766 /* return 1 when value of v corresponds to true, 0 otherwise */
767 static int istrue(var *v)
770 return (v->number == 0) ? 0 : 1;
772 return (v->string && *(v->string)) ? 1 : 0;
775 /* temporary variables allocator. Last allocated should be first freed */
776 static var *nvalloc(int n)
784 if ((cb->pos - cb->nv) + n <= cb->size) break;
789 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
790 cb = xmalloc(sizeof(nvblock) + size * sizeof(var));
795 if (pb) pb->next = cb;
801 while (v < cb->pos) {
810 static void nvfree(var *v)
814 if (v < cb->nv || v >= cb->pos)
815 runtime_error(EMSG_INTERNAL_ERROR);
817 for (p=v; p<cb->pos; p++) {
818 if ((p->type & (VF_ARRAY|VF_CHILD)) == VF_ARRAY) {
819 clear_array(iamarray(p));
820 free(p->x.array->items);
823 if (p->type & VF_WALK)
830 while (cb->prev && cb->pos == cb->nv) {
835 /* ------- awk program text parsing ------- */
837 /* Parse next token pointed by global pos, place results into global t.
838 * If token isn't expected, give away. Return token class
840 static uint32_t next_token(uint32_t expected)
842 static int concat_inserted;
843 static uint32_t save_tclass, save_info;
844 static uint32_t ltclass = TC_OPTERM;
855 } else if (concat_inserted) {
856 concat_inserted = FALSE;
857 t.tclass = save_tclass;
866 while (*p != '\n' && *p != '\0') p++;
874 } else if (*p == '\"') {
878 if (*p == '\0' || *p == '\n')
879 syntax_error(EMSG_UNEXP_EOS);
880 *(s++) = nextchar(&p);
886 } else if ((expected & TC_REGEXP) && *p == '/') {
890 if (*p == '\0' || *p == '\n')
891 syntax_error(EMSG_UNEXP_EOS);
892 if ((*s++ = *p++) == '\\') {
894 *(s-1) = bb_process_escape_sequence((const char **)&p);
895 if (*pp == '\\') *s++ = '\\';
896 if (p == pp) *s++ = *p++;
903 } else if (*p == '.' || isdigit(*p)) {
905 t.number = strtod(p, &p);
907 syntax_error(EMSG_UNEXP_TOKEN);
911 /* search for something known */
921 /* if token class is expected, token
922 * matches and it's not a longer word,
923 * then this is what we are looking for
925 if ((tc & (expected | TC_WORD | TC_NEWLINE)) &&
926 *tl == *p && strncmp(p, tl, l) == 0 &&
927 !((tc & TC_WORD) && isalnum_(*(p + l)))) {
937 /* it's a name (var/array/function),
938 * otherwise it's something wrong
941 syntax_error(EMSG_UNEXP_TOKEN);
944 while (isalnum_(*(++p))) {
949 /* also consume whitespace between functionname and bracket */
950 if (!(expected & TC_VARIABLE)) skip_spaces(&p);
963 /* skipping newlines in some cases */
964 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
967 /* insert concatenation operator when needed */
968 if ((ltclass&TC_CONCAT1) && (tc&TC_CONCAT2) && (expected&TC_BINOP)) {
969 concat_inserted = TRUE;
973 t.info = OC_CONCAT | SS | P(35);
980 /* Are we ready for this? */
981 if (! (ltclass & expected))
982 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
983 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
988 static void rollback_token(void) { t.rollback = TRUE; }
990 static node *new_node(uint32_t info)
994 n = xzalloc(sizeof(node));
1000 static node *mk_re_node(const char *s, node *n, regex_t *re)
1002 n->info = OC_REGEXP;
1005 xregcomp(re, s, REG_EXTENDED);
1006 xregcomp(re+1, s, REG_EXTENDED | REG_ICASE);
1011 static node *condition(void)
1013 next_token(TC_SEQSTART);
1014 return parse_expr(TC_SEQTERM);
1017 /* parse expression terminated by given argument, return ptr
1018 * to built subtree. Terminator is eaten by parse_expr */
1019 static node *parse_expr(uint32_t iexp)
1028 sn.r.n = glptr = NULL;
1029 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1031 while (! ((tc = next_token(xtc)) & iexp)) {
1032 if (glptr && (t.info == (OC_COMPARE|VV|P(39)|2))) {
1033 /* input redirection (<) attached to glptr node */
1034 cn = glptr->l.n = new_node(OC_CONCAT|SS|P(37));
1036 xtc = TC_OPERAND | TC_UOPPRE;
1039 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1040 /* for binary and postfix-unary operators, jump back over
1041 * previous operators with higher priority */
1043 while ( ((t.info & PRIMASK) > (vn->a.n->info & PRIMASK2)) ||
1044 ((t.info == vn->info) && ((t.info & OPCLSMASK) == OC_COLON)) )
1046 if ((t.info & OPCLSMASK) == OC_TERNARY)
1048 cn = vn->a.n->r.n = new_node(t.info);
1050 if (tc & TC_BINOP) {
1052 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1053 if ((t.info & OPCLSMASK) == OC_PGETLINE) {
1055 next_token(TC_GETLINE);
1056 /* give maximum priority to this pipe */
1057 cn->info &= ~PRIMASK;
1058 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1062 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1067 /* for operands and prefix-unary operators, attach them
1070 cn = vn->r.n = new_node(t.info);
1072 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1073 if (tc & (TC_OPERAND | TC_REGEXP)) {
1074 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1075 /* one should be very careful with switch on tclass -
1076 * only simple tclasses should be used! */
1081 if ((v = hash_search(ahash, t.string)) != NULL) {
1082 cn->info = OC_FNARG;
1083 cn->l.i = v->x.aidx;
1085 cn->l.v = newvar(t.string);
1087 if (tc & TC_ARRAY) {
1089 cn->r.n = parse_expr(TC_ARRTERM);
1096 v = cn->l.v = xzalloc(sizeof(var));
1098 setvar_i(v, t.number);
1100 setvar_s(v, t.string);
1104 mk_re_node(t.string, cn, xzalloc(sizeof(regex_t)*2));
1109 cn->r.f = newfunc(t.string);
1110 cn->l.n = condition();
1114 cn = vn->r.n = parse_expr(TC_SEQTERM);
1120 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1124 cn->l.n = condition();
1133 /* add node to chain. Return ptr to alloc'd node */
1134 static node *chain_node(uint32_t info)
1139 seq->first = seq->last = new_node(0);
1141 if (seq->programname != programname) {
1142 seq->programname = programname;
1143 n = chain_node(OC_NEWSOURCE);
1144 n->l.s = xstrdup(programname);
1149 seq->last = n->a.n = new_node(OC_DONE);
1154 static void chain_expr(uint32_t info)
1158 n = chain_node(info);
1159 n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1160 if (t.tclass & TC_GRPTERM)
1164 static node *chain_loop(node *nn)
1166 node *n, *n2, *save_brk, *save_cont;
1168 save_brk = break_ptr;
1169 save_cont = continue_ptr;
1171 n = chain_node(OC_BR | Vx);
1172 continue_ptr = new_node(OC_EXEC);
1173 break_ptr = new_node(OC_EXEC);
1175 n2 = chain_node(OC_EXEC | Vx);
1178 continue_ptr->a.n = n2;
1179 break_ptr->a.n = n->r.n = seq->last;
1181 continue_ptr = save_cont;
1182 break_ptr = save_brk;
1187 /* parse group and attach it to chain */
1188 static void chain_group(void)
1194 c = next_token(TC_GRPSEQ);
1195 } while (c & TC_NEWLINE);
1197 if (c & TC_GRPSTART) {
1198 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1199 if (t.tclass & TC_NEWLINE) continue;
1203 } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1205 chain_expr(OC_EXEC | Vx);
1206 } else { /* TC_STATEMNT */
1207 switch (t.info & OPCLSMASK) {
1209 n = chain_node(OC_BR | Vx);
1210 n->l.n = condition();
1212 n2 = chain_node(OC_EXEC);
1214 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE)==TC_ELSE) {
1216 n2->a.n = seq->last;
1224 n = chain_loop(NULL);
1229 n2 = chain_node(OC_EXEC);
1230 n = chain_loop(NULL);
1232 next_token(TC_WHILE);
1233 n->l.n = condition();
1237 next_token(TC_SEQSTART);
1238 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1239 if (t.tclass & TC_SEQTERM) { /* for-in */
1240 if ((n2->info & OPCLSMASK) != OC_IN)
1241 syntax_error(EMSG_UNEXP_TOKEN);
1242 n = chain_node(OC_WALKINIT | VV);
1245 n = chain_loop(NULL);
1246 n->info = OC_WALKNEXT | Vx;
1248 } else { /* for (;;) */
1249 n = chain_node(OC_EXEC | Vx);
1251 n2 = parse_expr(TC_SEMICOL);
1252 n3 = parse_expr(TC_SEQTERM);
1262 n = chain_node(t.info);
1263 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1264 if (t.tclass & TC_OUTRDR) {
1266 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1268 if (t.tclass & TC_GRPTERM)
1273 n = chain_node(OC_EXEC);
1278 n = chain_node(OC_EXEC);
1279 n->a.n = continue_ptr;
1282 /* delete, next, nextfile, return, exit */
1289 static void parse_program(char *p)
1298 while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1299 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1301 if (tclass & TC_OPTERM)
1305 if (tclass & TC_BEGIN) {
1309 } else if (tclass & TC_END) {
1313 } else if (tclass & TC_FUNCDECL) {
1314 next_token(TC_FUNCTION);
1316 f = newfunc(t.string);
1317 f->body.first = NULL;
1319 while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1320 v = findvar(ahash, t.string);
1321 v->x.aidx = (f->nargs)++;
1323 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1330 } else if (tclass & TC_OPSEQ) {
1332 cn = chain_node(OC_TEST);
1333 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1334 if (t.tclass & TC_GRPSTART) {
1338 chain_node(OC_PRINT);
1340 cn->r.n = mainseq.last;
1342 } else /* if (tclass & TC_GRPSTART) */ {
1350 /* -------- program execution part -------- */
1352 static node *mk_splitter(const char *s, tsplitter *spl)
1360 if ((n->info & OPCLSMASK) == OC_REGEXP) {
1364 if (strlen(s) > 1) {
1365 mk_re_node(s, n, re);
1367 n->info = (uint32_t) *s;
1373 /* use node as a regular expression. Supplied with node ptr and regex_t
1374 * storage space. Return ptr to regex (if result points to preg, it should
1375 * be later regfree'd manually
1377 static regex_t *as_regex(node *op, regex_t *preg)
1382 if ((op->info & OPCLSMASK) == OC_REGEXP) {
1383 return icase ? op->r.ire : op->l.re;
1386 s = getvar_s(evaluate(op, v));
1387 xregcomp(preg, s, icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED);
1393 /* gradually increasing buffer */
1394 static void qrealloc(char **b, int n, int *size)
1396 if (!*b || n >= *size)
1397 *b = xrealloc(*b, *size = n + (n>>1) + 80);
1400 /* resize field storage space */
1401 static void fsrealloc(int size)
1403 static int maxfields; /* = 0;*/
1406 if (size >= maxfields) {
1408 maxfields = size + 16;
1409 Fields = xrealloc(Fields, maxfields * sizeof(var));
1410 for (; i < maxfields; i++) {
1411 Fields[i].type = VF_SPECIAL;
1412 Fields[i].string = NULL;
1416 if (size < nfields) {
1417 for (i = size; i < nfields; i++) {
1424 static int awk_split(const char *s, node *spl, char **slist)
1429 regmatch_t pmatch[2];
1431 /* in worst case, each char would be a separate field */
1432 *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1435 c[0] = c[1] = (char)spl->info;
1437 if (*getvar_s(V[RS]) == '\0') c[2] = '\n';
1439 if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */
1441 l = strcspn(s, c+2);
1442 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1443 && pmatch[0].rm_so <= l
1445 l = pmatch[0].rm_so;
1446 if (pmatch[0].rm_eo == 0) { l++; pmatch[0].rm_eo++; }
1448 pmatch[0].rm_eo = l;
1449 if (s[l]) pmatch[0].rm_eo++;
1455 s += pmatch[0].rm_eo;
1458 } else if (c[0] == '\0') { /* null split */
1464 } else if (c[0] != ' ') { /* single-character split */
1466 c[0] = toupper(c[0]);
1467 c[1] = tolower(c[1]);
1470 while ((s1 = strpbrk(s1, c))) {
1474 } else { /* space split */
1476 s = skip_whitespace(s);
1479 while (*s && !isspace(*s))
1487 static void split_f0(void)
1489 static char *fstrings = NULL;
1499 n = awk_split(getvar_s(V[F0]), &fsplitter.n, &fstrings);
1502 for (i = 0; i < n; i++) {
1503 Fields[i].string = nextword(&s);
1504 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1507 /* set NF manually to avoid side effects */
1509 V[NF]->type = VF_NUMBER | VF_SPECIAL;
1510 V[NF]->number = nfields;
1513 /* perform additional actions when some internal variables changed */
1514 static void handle_special(var *v)
1518 const char *sep, *s;
1519 int sl, l, len, i, bsize;
1521 if (!(v->type & VF_SPECIAL))
1525 n = (int)getvar_i(v);
1528 /* recalculate $0 */
1529 sep = getvar_s(V[OFS]);
1533 for (i=0; i<n; i++) {
1534 s = getvar_s(&Fields[i]);
1537 memcpy(b+len, sep, sl);
1540 qrealloc(&b, len+l+sl, &bsize);
1541 memcpy(b+len, s, l);
1549 } else if (v == V[F0]) {
1550 is_f0_split = FALSE;
1552 } else if (v == V[FS]) {
1553 mk_splitter(getvar_s(v), &fsplitter);
1555 } else if (v == V[RS]) {
1556 mk_splitter(getvar_s(v), &rsplitter);
1558 } else if (v == V[IGNORECASE]) {
1562 n = getvar_i(V[NF]);
1563 setvar_i(V[NF], n > v-Fields ? n : v-Fields+1);
1564 /* right here v is invalid. Just to note... */
1568 /* step through func/builtin/etc arguments */
1569 static node *nextarg(node **pn)
1574 if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1583 static void hashwalk_init(var *v, xhash *array)
1589 if (v->type & VF_WALK)
1593 w = v->x.walker = xzalloc(2 + 2*sizeof(char *) + array->glen);
1594 *w = *(w+1) = (char *)(w + 2);
1595 for (i=0; i<array->csize; i++) {
1596 hi = array->items[i];
1598 strcpy(*w, hi->name);
1605 static int hashwalk_next(var *v)
1613 setvar_s(v, nextword(w+1));
1617 /* evaluate node, return 1 when result is true, 0 otherwise */
1618 static int ptest(node *pattern)
1620 static var v; /* static: to save stack space? */
1622 return istrue(evaluate(pattern, &v));
1625 /* read next record from stream rsm into a variable v */
1626 static int awk_getline(rstream *rsm, var *v)
1629 regmatch_t pmatch[2];
1630 int a, p, pp=0, size;
1631 int fd, so, eo, r, rp;
1634 /* we're using our own buffer since we need access to accumulating
1637 fd = fileno(rsm->F);
1642 c = (char) rsplitter.n.info;
1645 if (! m) qrealloc(&m, 256, &size);
1651 if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1652 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1653 b, 1, pmatch, 0) == 0) {
1654 so = pmatch[0].rm_so;
1655 eo = pmatch[0].rm_eo;
1659 } else if (c != '\0') {
1660 s = strchr(b+pp, c);
1661 if (! s) s = memchr(b+pp, '\0', p - pp);
1668 while (b[rp] == '\n')
1670 s = strstr(b+rp, "\n\n");
1673 while (b[eo] == '\n') eo++;
1681 memmove(m, (const void *)(m+a), p+1);
1686 qrealloc(&m, a+p+128, &size);
1689 p += safe_read(fd, b+p, size-p-1);
1693 setvar_i(V[ERRNO], errno);
1702 c = b[so]; b[so] = '\0';
1706 c = b[eo]; b[eo] = '\0';
1707 setvar_s(V[RT], b+so);
1719 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1723 const char *s = format;
1725 if (int_as_int && n == (int)n) {
1726 r = snprintf(b, size, "%d", (int)n);
1728 do { c = *s; } while (c && *++s);
1729 if (strchr("diouxX", c)) {
1730 r = snprintf(b, size, format, (int)n);
1731 } else if (strchr("eEfgG", c)) {
1732 r = snprintf(b, size, format, n);
1734 runtime_error(EMSG_INV_FMT);
1741 /* formatted output into an allocated buffer, return ptr to buffer */
1742 static char *awk_printf(node *n)
1747 int i, j, incr, bsize;
1752 fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1757 while (*f && (*f != '%' || *(++f) == '%'))
1759 while (*f && !isalpha(*f)) {
1761 syntax_error("%*x formats are not supported");
1765 incr = (f - s) + MAXVARFMT;
1766 qrealloc(&b, incr + i, &bsize);
1771 arg = evaluate(nextarg(&n), v);
1774 if (c == 'c' || !c) {
1775 i += sprintf(b+i, s, is_numeric(arg) ?
1776 (char)getvar_i(arg) : *getvar_s(arg));
1778 } else if (c == 's') {
1780 qrealloc(&b, incr+i+strlen(s1), &bsize);
1781 i += sprintf(b+i, s, s1);
1784 i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1788 /* if there was an error while sprintf, return value is negative */
1792 b = xrealloc(b, i + 1);
1799 /* common substitution routine
1800 * replace (nm) substring of (src) that match (n) with (repl), store
1801 * result into (dest), return number of substitutions. If nm=0, replace
1802 * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1803 * subexpression matching (\1-\9)
1805 static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int ex)
1810 int c, i, j, di, rl, so, eo, nbs, n, dssize;
1811 regmatch_t pmatch[10];
1814 re = as_regex(rn, &sreg);
1815 if (! src) src = V[F0];
1816 if (! dest) dest = V[F0];
1821 while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0:REG_NOTBOL) == 0) {
1822 so = pmatch[0].rm_so;
1823 eo = pmatch[0].rm_eo;
1825 qrealloc(&ds, di + eo + rl, &dssize);
1826 memcpy(ds + di, sp, eo);
1832 for (s = repl; *s; s++) {
1838 if (c == '&' || (ex && c >= '0' && c <= '9')) {
1839 di -= ((nbs + 3) >> 1);
1848 n = pmatch[j].rm_eo - pmatch[j].rm_so;
1849 qrealloc(&ds, di + rl + n, &dssize);
1850 memcpy(ds + di, sp + pmatch[j].rm_so, n);
1861 if (! (ds[di++] = *sp++)) break;
1865 qrealloc(&ds, di + strlen(sp), &dssize);
1866 strcpy(ds + di, sp);
1868 if (re == &sreg) regfree(re);
1872 static var *exec_builtin(node *op, var *res)
1879 regmatch_t pmatch[2];
1881 static tsplitter tspl;
1890 isr = info = op->info;
1893 av[2] = av[3] = NULL;
1894 for (i=0 ; i<4 && op ; i++) {
1895 an[i] = nextarg(&op);
1896 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
1897 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
1902 if (nargs < (info >> 30))
1903 runtime_error(EMSG_TOO_FEW_ARGS);
1905 switch (info & OPNMASK) {
1908 #if ENABLE_FEATURE_AWK_MATH
1909 setvar_i(res, atan2(getvar_i(av[i]), getvar_i(av[1])));
1911 runtime_error(EMSG_NO_MATH);
1917 spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
1918 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
1923 n = awk_split(as[0], spl, &s);
1925 clear_array(iamarray(av[1]));
1926 for (i=1; i<=n; i++)
1927 setari_u(av[1], i, nextword(&s1));
1934 i = getvar_i(av[1]) - 1;
1935 if (i>l) i=l; if (i<0) i=0;
1936 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
1939 strncpy(s, as[0]+i, n);
1945 setvar_i(res, (long)getvar_i(av[0]) & (long)getvar_i(av[1]));
1949 setvar_i(res, ~(long)getvar_i(av[0]));
1953 setvar_i(res, (long)getvar_i(av[0]) << (long)getvar_i(av[1]));
1957 setvar_i(res, (long)getvar_i(av[0]) | (long)getvar_i(av[1]));
1961 setvar_i(res, (long)((unsigned long)getvar_i(av[0]) >> (unsigned long)getvar_i(av[1])));
1965 setvar_i(res, (long)getvar_i(av[0]) ^ (long)getvar_i(av[1]));
1975 s1 = s = xstrdup(as[0]);
1977 *s1 = (*to_xxx)(*s1);
1986 l = strlen(as[0]) - ll;
1987 if (ll > 0 && l >= 0) {
1989 s = strstr(as[0], as[1]);
1990 if (s) n = (s - as[0]) + 1;
1992 /* this piece of code is terribly slow and
1993 * really should be rewritten
1995 for (i=0; i<=l; i++) {
1996 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2008 tt = getvar_i(av[1]);
2011 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2012 i = strftime(buf, MAXVARFMT,
2013 ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2020 re = as_regex(an[1], &sreg);
2021 n = regexec(re, as[0], 1, pmatch, 0);
2026 pmatch[0].rm_so = 0;
2027 pmatch[0].rm_eo = -1;
2029 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2030 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2031 setvar_i(res, pmatch[0].rm_so);
2032 if (re == &sreg) regfree(re);
2036 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2040 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2044 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2053 * Evaluate node - the heart of the program. Supplied with subtree
2054 * and place where to store result. returns ptr to result.
2056 #define XC(n) ((n) >> 8)
2058 static var *evaluate(node *op, var *res)
2060 /* This procedure is recursive so we should count every byte */
2061 static var *fnargs = NULL;
2062 static unsigned seed = 1;
2063 static regex_t sreg;
2085 return setvar_s(res, NULL);
2092 opn = (short)(opinfo & OPNMASK);
2093 lineno = op->lineno;
2095 /* execute inevitable things */
2097 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2098 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2099 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2100 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2101 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2103 switch (XC(opinfo & OPCLSMASK)) {
2105 /* -- iterative node type -- */
2109 if ((op1->info & OPCLSMASK) == OC_COMMA) {
2110 /* it's range pattern */
2111 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2112 op->info |= OF_CHECKED;
2113 if (ptest(op1->r.n))
2114 op->info &= ~OF_CHECKED;
2121 op = (ptest(op1)) ? op->a.n : op->r.n;
2125 /* just evaluate an expression, also used as unconditional jump */
2129 /* branch, used in if-else and various loops */
2131 op = istrue(L.v) ? op->a.n : op->r.n;
2134 /* initialize for-in loop */
2135 case XC( OC_WALKINIT ):
2136 hashwalk_init(L.v, iamarray(R.v));
2139 /* get next array item */
2140 case XC( OC_WALKNEXT ):
2141 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2144 case XC( OC_PRINT ):
2145 case XC( OC_PRINTF ):
2148 X.rsm = newfile(R.s);
2151 X.rsm->F = popen(R.s, "w");
2152 if (X.rsm->F == NULL)
2153 bb_perror_msg_and_die("popen");
2156 X.rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2162 if ((opinfo & OPCLSMASK) == OC_PRINT) {
2164 fputs(getvar_s(V[F0]), X.F);
2167 L.v = evaluate(nextarg(&op1), v1);
2168 if (L.v->type & VF_NUMBER) {
2169 fmt_num(buf, MAXVARFMT, getvar_s(V[OFMT]),
2170 getvar_i(L.v), TRUE);
2173 fputs(getvar_s(L.v), X.F);
2176 if (op1) fputs(getvar_s(V[OFS]), X.F);
2179 fputs(getvar_s(V[ORS]), X.F);
2181 } else { /* OC_PRINTF */
2182 L.s = awk_printf(op1);
2189 case XC( OC_DELETE ):
2190 X.info = op1->info & OPCLSMASK;
2191 if (X.info == OC_VAR) {
2193 } else if (X.info == OC_FNARG) {
2194 R.v = &fnargs[op1->l.i];
2196 runtime_error(EMSG_NOT_ARRAY);
2201 L.s = getvar_s(evaluate(op1->r.n, v1));
2202 hash_remove(iamarray(R.v), L.s);
2204 clear_array(iamarray(R.v));
2208 case XC( OC_NEWSOURCE ):
2209 programname = op->l.s;
2212 case XC( OC_RETURN ):
2216 case XC( OC_NEXTFILE ):
2227 /* -- recursive node type -- */
2235 case XC( OC_FNARG ):
2236 L.v = &fnargs[op->l.i];
2238 res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2242 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2245 case XC( OC_REGEXP ):
2247 L.s = getvar_s(V[F0]);
2250 case XC( OC_MATCH ):
2253 X.re = as_regex(op1, &sreg);
2254 R.i = regexec(X.re, L.s, 0, NULL, 0);
2255 if (X.re == &sreg) regfree(X.re);
2256 setvar_i(res, (R.i == 0 ? 1 : 0) ^ (opn == '!' ? 1 : 0));
2260 /* if source is a temporary string, jusk relink it to dest */
2261 if (R.v == v1+1 && R.v->string) {
2262 res = setvar_p(L.v, R.v->string);
2265 res = copyvar(L.v, R.v);
2269 case XC( OC_TERNARY ):
2270 if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2271 runtime_error(EMSG_POSSIBLE_ERROR);
2272 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2276 if (! op->r.f->body.first)
2277 runtime_error(EMSG_UNDEF_FUNC);
2279 X.v = R.v = nvalloc(op->r.f->nargs+1);
2281 L.v = evaluate(nextarg(&op1), v1);
2283 R.v->type |= VF_CHILD;
2284 R.v->x.parent = L.v;
2285 if (++R.v - X.v >= op->r.f->nargs)
2293 res = evaluate(op->r.f->body.first, res);
2300 case XC( OC_GETLINE ):
2301 case XC( OC_PGETLINE ):
2303 X.rsm = newfile(L.s);
2305 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2306 X.rsm->F = popen(L.s, "r");
2307 X.rsm->is_pipe = TRUE;
2309 X.rsm->F = fopen(L.s, "r"); /* not xfopen! */
2313 if (! iF) iF = next_input_file();
2318 setvar_i(V[ERRNO], errno);
2326 L.i = awk_getline(X.rsm, R.v);
2336 /* simple builtins */
2337 case XC( OC_FBLTIN ):
2345 R.d = (double)rand() / (double)RAND_MAX;
2348 #if ENABLE_FEATURE_AWK_MATH
2374 runtime_error(EMSG_NO_MATH);
2380 seed = op1 ? (unsigned)L.d : (unsigned)time(NULL);
2390 L.s = getvar_s(V[F0]);
2396 R.d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2397 ? (system(L.s) >> 8) : 0;
2405 X.rsm = newfile(L.s);
2414 X.rsm = (rstream *)hash_search(fdhash, L.s);
2416 R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
2417 free(X.rsm->buffer);
2418 hash_remove(fdhash, L.s);
2421 setvar_i(V[ERRNO], errno);
2428 case XC( OC_BUILTIN ):
2429 res = exec_builtin(op, res);
2432 case XC( OC_SPRINTF ):
2433 setvar_p(res, awk_printf(op1));
2436 case XC( OC_UNARY ):
2438 L.d = R.d = getvar_i(R.v);
2453 L.d = istrue(X.v) ? 0 : 1;
2464 case XC( OC_FIELD ):
2465 R.i = (int)getvar_i(R.v);
2473 res = &Fields[R.i-1];
2477 /* concatenation (" ") and index joining (",") */
2478 case XC( OC_CONCAT ):
2479 case XC( OC_COMMA ):
2480 opn = strlen(L.s) + strlen(R.s) + 2;
2483 if ((opinfo & OPCLSMASK) == OC_COMMA) {
2484 L.s = getvar_s(V[SUBSEP]);
2485 X.s = xrealloc(X.s, opn + strlen(L.s));
2493 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2497 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2500 case XC( OC_BINARY ):
2501 case XC( OC_REPLACE ):
2502 R.d = getvar_i(R.v);
2514 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2518 #if ENABLE_FEATURE_AWK_MATH
2519 L.d = pow(L.d, R.d);
2521 runtime_error(EMSG_NO_MATH);
2525 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2526 L.d -= (int)(L.d / R.d) * R.d;
2529 res = setvar_i(((opinfo&OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2532 case XC( OC_COMPARE ):
2533 if (is_numeric(L.v) && is_numeric(R.v)) {
2534 L.d = getvar_i(L.v) - getvar_i(R.v);
2536 L.s = getvar_s(L.v);
2537 R.s = getvar_s(R.v);
2538 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2540 switch (opn & 0xfe) {
2551 setvar_i(res, (opn & 0x1 ? R.i : !R.i) ? 1 : 0);
2555 runtime_error(EMSG_POSSIBLE_ERROR);
2557 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2559 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2569 /* -------- main & co. -------- */
2571 static int awk_exit(int r)
2582 evaluate(endseq.first, &tv);
2585 /* waiting for children */
2586 for (i = 0; i < fdhash->csize; i++) {
2587 hi = fdhash->items[i];
2589 if (hi->data.rs.F && hi->data.rs.is_pipe)
2590 pclose(hi->data.rs.F);
2598 /* if expr looks like "var=value", perform assignment and return 1,
2599 * otherwise return 0 */
2600 static int is_assignment(const char *expr)
2602 char *exprc, *s, *s0, *s1;
2604 exprc = xstrdup(expr);
2605 if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2613 *(s1++) = nextchar(&s);
2616 setvar_u(newvar(exprc), s0);
2621 /* switch to next input file */
2622 static rstream *next_input_file(void)
2626 const char *fname, *ind;
2627 static int files_happen = FALSE;
2629 if (rsm.F) fclose(rsm.F);
2631 rsm.pos = rsm.adv = 0;
2634 if (getvar_i(V[ARGIND])+1 >= getvar_i(V[ARGC])) {
2640 ind = getvar_s(incvar(V[ARGIND]));
2641 fname = getvar_s(findvar(iamarray(V[ARGV]), ind));
2642 if (fname && *fname && !is_assignment(fname))
2643 F = afopen(fname, "r");
2647 files_happen = TRUE;
2648 setvar_s(V[FILENAME], fname);
2653 int awk_main(int argc, char **argv);
2654 int awk_main(int argc, char **argv)
2657 char *opt_F, *opt_W;
2658 llist_t *opt_v = NULL;
2663 char *vnames = (char *)vNames; /* cheat */
2664 char *vvalues = (char *)vValues;
2666 /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
2667 * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
2668 if (ENABLE_LOCALE_SUPPORT)
2669 setlocale(LC_NUMERIC, "C");
2673 /* allocate global buffer */
2674 buf = xmalloc(MAXVARFMT + 1);
2676 vhash = hash_init();
2677 ahash = hash_init();
2678 fdhash = hash_init();
2679 fnhash = hash_init();
2681 /* initialize variables */
2682 for (i = 0; *vnames; i++) {
2683 V[i] = v = newvar(nextword(&vnames));
2684 if (*vvalues != '\377')
2685 setvar_s(v, nextword(&vvalues));
2689 if (*vnames == '*') {
2690 v->type |= VF_SPECIAL;
2695 handle_special(V[FS]);
2696 handle_special(V[RS]);
2698 newfile("/dev/stdin")->F = stdin;
2699 newfile("/dev/stdout")->F = stdout;
2700 newfile("/dev/stderr")->F = stderr;
2702 /* Huh, people report that sometimes environ is NULL. Oh well. */
2703 if (environ) for (envp = environ; *envp; envp++) {
2704 char *s = xstrdup(*envp);
2705 char *s1 = strchr(s, '=');
2708 setvar_u(findvar(iamarray(V[ENVIRON]), s), s1);
2712 opt_complementary = "v::";
2713 opt = getopt32(argc, argv, "F:v:f:W:", &opt_F, &opt_v, &programname, &opt_W);
2716 if (opt & 0x1) setvar_s(V[FS], opt_F); // -F
2717 while (opt_v) { /* -v */
2718 if (!is_assignment(llist_pop(&opt_v)))
2721 if (opt & 0x4) { // -f
2722 char *s = s; /* die, gcc, die */
2723 FILE *from_file = afopen(programname, "r");
2724 /* one byte is reserved for some trick in next_token */
2725 if (fseek(from_file, 0, SEEK_END) == 0) {
2726 flen = ftell(from_file);
2727 s = xmalloc(flen + 4);
2728 fseek(from_file, 0, SEEK_SET);
2729 i = 1 + fread(s + 1, 1, flen, from_file);
2731 for (i = j = 1; j > 0; i += j) {
2732 s = xrealloc(s, i + 4096);
2733 j = fread(s + i, 1, 4094, from_file);
2738 parse_program(s + 1);
2740 } else { // no -f: take program from 1st parameter
2743 programname = "cmd. line";
2744 parse_program(*argv++);
2747 if (opt & 0x8) // -W
2748 bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
2750 /* fill in ARGV array */
2751 setvar_i(V[ARGC], argc + 1);
2752 setari_u(V[ARGV], 0, "awk");
2755 setari_u(V[ARGV], ++i, *argv++);
2757 evaluate(beginseq.first, &tv);
2758 if (!mainseq.first && !endseq.first)
2759 awk_exit(EXIT_SUCCESS);
2761 /* input file could already be opened in BEGIN block */
2762 if (!iF) iF = next_input_file();
2764 /* passing through input files */
2767 setvar_i(V[FNR], 0);
2769 while ((i = awk_getline(iF, V[F0])) > 0) {
2773 evaluate(mainseq.first, &tv);
2780 runtime_error(strerror(errno));
2782 iF = next_input_file();
2785 awk_exit(EXIT_SUCCESS);