1 /* vi: set sw=4 ts=4: */
3 * awk implementation for busybox
5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
7 * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
14 /* This is a NOEXEC applet. Be very careful! */
21 #define VF_NUMBER 0x0001 /* 1 = primary type is number */
22 #define VF_ARRAY 0x0002 /* 1 = it's an array */
24 #define VF_CACHED 0x0100 /* 1 = num/str value has cached str/num eq */
25 #define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
26 #define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
27 #define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
28 #define VF_FSTR 0x1000 /* 1 = var::string points to fstring buffer */
29 #define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
30 #define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
32 /* these flags are static, don't change them when value is changed */
33 #define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
36 typedef struct var_s {
37 unsigned short type; /* flags */
41 int aidx; /* func arg idx (for compilation stage) */
42 struct xhash_s *array; /* array ptr */
43 struct var_s *parent; /* for func args, ptr to actual parameter */
44 char **walker; /* list of array elements (for..in) */
48 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
49 typedef struct chain_s {
52 const char *programname;
56 typedef struct func_s {
62 typedef struct rstream_s {
68 unsigned short is_pipe;
71 typedef struct hash_item_s {
73 struct var_s v; /* variable/array hash */
74 struct rstream_s rs; /* redirect streams hash */
75 struct func_s f; /* functions hash */
77 struct hash_item_s *next; /* next in chain */
78 char name[1]; /* really it's longer */
81 typedef struct xhash_s {
82 unsigned nel; /* num of elements */
83 unsigned csize; /* current hash size */
84 unsigned nprime; /* next hash size in PRIMES[] */
85 unsigned glen; /* summary length of item names */
86 struct hash_item_s **items;
90 typedef struct node_s {
92 unsigned short lineno;
111 /* Block of temporary variables */
112 typedef struct nvblock_s {
115 struct nvblock_s *prev;
116 struct nvblock_s *next;
120 typedef struct tsplitter_s {
125 /* simple token classes */
126 /* Order and hex values are very important!!! See next_token() */
127 #define TC_SEQSTART 1 /* ( */
128 #define TC_SEQTERM (1 << 1) /* ) */
129 #define TC_REGEXP (1 << 2) /* /.../ */
130 #define TC_OUTRDR (1 << 3) /* | > >> */
131 #define TC_UOPPOST (1 << 4) /* unary postfix operator */
132 #define TC_UOPPRE1 (1 << 5) /* unary prefix operator */
133 #define TC_BINOPX (1 << 6) /* two-opnd operator */
134 #define TC_IN (1 << 7)
135 #define TC_COMMA (1 << 8)
136 #define TC_PIPE (1 << 9) /* input redirection pipe */
137 #define TC_UOPPRE2 (1 << 10) /* unary prefix operator */
138 #define TC_ARRTERM (1 << 11) /* ] */
139 #define TC_GRPSTART (1 << 12) /* { */
140 #define TC_GRPTERM (1 << 13) /* } */
141 #define TC_SEMICOL (1 << 14)
142 #define TC_NEWLINE (1 << 15)
143 #define TC_STATX (1 << 16) /* ctl statement (for, next...) */
144 #define TC_WHILE (1 << 17)
145 #define TC_ELSE (1 << 18)
146 #define TC_BUILTIN (1 << 19)
147 #define TC_GETLINE (1 << 20)
148 #define TC_FUNCDECL (1 << 21) /* `function' `func' */
149 #define TC_BEGIN (1 << 22)
150 #define TC_END (1 << 23)
151 #define TC_EOF (1 << 24)
152 #define TC_VARIABLE (1 << 25)
153 #define TC_ARRAY (1 << 26)
154 #define TC_FUNCTION (1 << 27)
155 #define TC_STRING (1 << 28)
156 #define TC_NUMBER (1 << 29)
158 #define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
160 /* combined token classes */
161 #define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
162 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
163 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION | \
164 TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
166 #define TC_STATEMNT (TC_STATX | TC_WHILE)
167 #define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
169 /* word tokens, cannot mean something else if not expected */
170 #define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN | \
171 TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
173 /* discard newlines after these */
174 #define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM | \
175 TC_BINOP | TC_OPTERM)
177 /* what can expression begin with */
178 #define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
179 /* what can group begin with */
180 #define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
182 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
183 /* operator is inserted between them */
184 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM | \
185 TC_STRING | TC_NUMBER | TC_UOPPOST)
186 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
188 #define OF_RES1 0x010000
189 #define OF_RES2 0x020000
190 #define OF_STR1 0x040000
191 #define OF_STR2 0x080000
192 #define OF_NUM1 0x100000
193 #define OF_CHECKED 0x200000
195 /* combined operator flags */
198 #define xS (OF_RES2 | OF_STR2)
200 #define VV (OF_RES1 | OF_RES2)
201 #define Nx (OF_RES1 | OF_NUM1)
202 #define NV (OF_RES1 | OF_NUM1 | OF_RES2)
203 #define Sx (OF_RES1 | OF_STR1)
204 #define SV (OF_RES1 | OF_STR1 | OF_RES2)
205 #define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
207 #define OPCLSMASK 0xFF00
208 #define OPNMASK 0x007F
210 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
211 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
212 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
214 #define P(x) (x << 24)
215 #define PRIMASK 0x7F000000
216 #define PRIMASK2 0x7E000000
218 /* Operation classes */
220 #define SHIFT_TIL_THIS 0x0600
221 #define RECUR_FROM_THIS 0x1000
224 OC_DELETE=0x0100, OC_EXEC=0x0200, OC_NEWSOURCE=0x0300,
225 OC_PRINT=0x0400, OC_PRINTF=0x0500, OC_WALKINIT=0x0600,
227 OC_BR=0x0700, OC_BREAK=0x0800, OC_CONTINUE=0x0900,
228 OC_EXIT=0x0a00, OC_NEXT=0x0b00, OC_NEXTFILE=0x0c00,
229 OC_TEST=0x0d00, OC_WALKNEXT=0x0e00,
231 OC_BINARY=0x1000, OC_BUILTIN=0x1100, OC_COLON=0x1200,
232 OC_COMMA=0x1300, OC_COMPARE=0x1400, OC_CONCAT=0x1500,
233 OC_FBLTIN=0x1600, OC_FIELD=0x1700, OC_FNARG=0x1800,
234 OC_FUNC=0x1900, OC_GETLINE=0x1a00, OC_IN=0x1b00,
235 OC_LAND=0x1c00, OC_LOR=0x1d00, OC_MATCH=0x1e00,
236 OC_MOVE=0x1f00, OC_PGETLINE=0x2000, OC_REGEXP=0x2100,
237 OC_REPLACE=0x2200, OC_RETURN=0x2300, OC_SPRINTF=0x2400,
238 OC_TERNARY=0x2500, OC_UNARY=0x2600, OC_VAR=0x2700,
241 ST_IF=0x3000, ST_DO=0x3100, ST_FOR=0x3200,
245 /* simple builtins */
247 F_in=0, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
248 F_ti, F_le, F_sy, F_ff, F_cl
253 B_a2=0, B_ix, B_ma, B_sp, B_ss, B_ti, B_lo, B_up,
255 B_an, B_co, B_ls, B_or, B_rs, B_xo,
258 /* tokens and their corresponding info values */
260 #define NTC "\377" /* switch to next token class (tc<<1) */
263 #define OC_B OC_BUILTIN
265 static const char tokenlist[] =
268 "\1/" NTC /* REGEXP */
269 "\2>>" "\1>" "\1|" NTC /* OUTRDR */
270 "\2++" "\2--" NTC /* UOPPOST */
271 "\2++" "\2--" "\1$" NTC /* UOPPRE1 */
272 "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */
273 "\2*=" "\2/=" "\2%=" "\2^="
274 "\1+" "\1-" "\3**=" "\2**"
275 "\1/" "\1%" "\1^" "\1*"
276 "\2!=" "\2>=" "\2<=" "\1>"
277 "\1<" "\2!~" "\1~" "\2&&"
278 "\2||" "\1?" "\1:" NTC
282 "\1+" "\1-" "\1!" NTC /* UOPPRE2 */
288 "\2if" "\2do" "\3for" "\5break" /* STATX */
289 "\10continue" "\6delete" "\5print"
290 "\6printf" "\4next" "\10nextfile"
291 "\6return" "\4exit" NTC
295 "\3and" "\5compl" "\6lshift" "\2or"
297 "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */
298 "\3cos" "\3exp" "\3int" "\3log"
299 "\4rand" "\3sin" "\4sqrt" "\5srand"
300 "\6gensub" "\4gsub" "\5index" "\6length"
301 "\5match" "\5split" "\7sprintf" "\3sub"
302 "\6substr" "\7systime" "\10strftime"
303 "\7tolower" "\7toupper" NTC
305 "\4func" "\10function" NTC
310 static const uint32_t tokeninfo[] = {
314 xS|'a', xS|'w', xS|'|',
315 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
316 OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M',
318 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74),
319 OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
320 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/',
321 OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
322 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-',
323 OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
324 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%',
325 OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
326 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3,
327 OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
328 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!',
329 OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
330 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?',
331 OC_COLON|xx|P(67)|':',
334 OC_PGETLINE|SV|P(37),
335 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-',
336 OC_UNARY|xV|P(19)|'!',
342 ST_IF, ST_DO, ST_FOR, OC_BREAK,
343 OC_CONTINUE, OC_DELETE|Vx, OC_PRINT,
344 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
345 OC_RETURN|Vx, OC_EXIT|Nx,
349 OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
350 OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
351 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
352 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
353 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
354 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
355 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
356 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b),
357 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
364 /* internal variable names and their initial values */
365 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
367 CONVFMT=0, OFMT, FS, OFS,
368 ORS, RS, RT, FILENAME,
369 SUBSEP, ARGIND, ARGC, ARGV,
372 ENVIRON, F0, _intvarcount_
375 static const char vNames[] =
376 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
377 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
378 "SUBSEP\0" "ARGIND\0" "ARGC\0" "ARGV\0"
380 "NR\0" "NF\0*" "IGNORECASE\0*"
381 "ENVIRON\0" "$\0*" "\0";
383 static const char vValues[] =
384 "%.6g\0" "%.6g\0" " \0" " \0"
385 "\n\0" "\n\0" "\0" "\0"
389 /* hash size may grow to these values */
390 #define FIRST_PRIME 61;
391 static const unsigned PRIMES[] = { 251, 1021, 4093, 16381, 65521 };
392 enum { NPRIMES = sizeof(PRIMES) / sizeof(unsigned) };
396 extern char **environ;
398 static var * V[_intvarcount_];
399 static chain beginseq, mainseq, endseq, *seq;
400 static int nextrec, nextfile;
401 static node *break_ptr, *continue_ptr;
403 static xhash *vhash, *ahash, *fdhash, *fnhash;
404 static const char *programname;
406 static int is_f0_split;
409 static tsplitter fsplitter, rsplitter;
425 /* function prototypes */
426 static void handle_special(var *);
427 static node *parse_expr(uint32_t);
428 static void chain_group(void);
429 static var *evaluate(node *, var *);
430 static rstream *next_input_file(void);
431 static int fmt_num(char *, int, const char *, double, int);
432 static int awk_exit(int) ATTRIBUTE_NORETURN;
434 /* ---- error handling ---- */
436 static const char EMSG_INTERNAL_ERROR[] = "Internal error";
437 static const char EMSG_UNEXP_EOS[] = "Unexpected end of string";
438 static const char EMSG_UNEXP_TOKEN[] = "Unexpected token";
439 static const char EMSG_DIV_BY_ZERO[] = "Division by zero";
440 static const char EMSG_INV_FMT[] = "Invalid format specifier";
441 static const char EMSG_TOO_FEW_ARGS[] = "Too few arguments for builtin";
442 static const char EMSG_NOT_ARRAY[] = "Not an array";
443 static const char EMSG_POSSIBLE_ERROR[] = "Possible syntax error";
444 static const char EMSG_UNDEF_FUNC[] = "Call to undefined function";
445 #if !ENABLE_FEATURE_AWK_MATH
446 static const char EMSG_NO_MATH[] = "Math support is not compiled in";
449 static void zero_out_var(var * vp)
451 memset(vp, 0, sizeof(*vp));
454 static void syntax_error(const char * const message) ATTRIBUTE_NORETURN;
455 static void syntax_error(const char * const message)
457 bb_error_msg_and_die("%s:%i: %s", programname, lineno, message);
460 #define runtime_error(x) syntax_error(x)
463 /* ---- hash stuff ---- */
465 static unsigned hashidx(const char *name)
469 while (*name) idx = *name++ + (idx << 6) - idx;
473 /* create new hash */
474 static xhash *hash_init(void)
478 newhash = xzalloc(sizeof(xhash));
479 newhash->csize = FIRST_PRIME;
480 newhash->items = xzalloc(newhash->csize * sizeof(hash_item *));
485 /* find item in hash, return ptr to data, NULL if not found */
486 static void *hash_search(xhash *hash, const char *name)
490 hi = hash->items [ hashidx(name) % hash->csize ];
492 if (strcmp(hi->name, name) == 0)
499 /* grow hash if it becomes too big */
500 static void hash_rebuild(xhash *hash)
502 unsigned newsize, i, idx;
503 hash_item **newitems, *hi, *thi;
505 if (hash->nprime == NPRIMES)
508 newsize = PRIMES[hash->nprime++];
509 newitems = xzalloc(newsize * sizeof(hash_item *));
511 for (i=0; i<hash->csize; i++) {
516 idx = hashidx(thi->name) % newsize;
517 thi->next = newitems[idx];
523 hash->csize = newsize;
524 hash->items = newitems;
527 /* find item in hash, add it if necessary. Return ptr to data */
528 static void *hash_find(xhash *hash, const char *name)
534 hi = hash_search(hash, name);
536 if (++hash->nel / hash->csize > 10)
539 l = strlen(name) + 1;
540 hi = xzalloc(sizeof(hash_item) + l);
541 memcpy(hi->name, name, l);
543 idx = hashidx(name) % hash->csize;
544 hi->next = hash->items[idx];
545 hash->items[idx] = hi;
551 #define findvar(hash, name) ((var*) hash_find((hash) , (name)))
552 #define newvar(name) ((var*) hash_find(vhash , (name)))
553 #define newfile(name) ((rstream*)hash_find(fdhash ,(name)))
554 #define newfunc(name) ((func*) hash_find(fnhash , (name)))
556 static void hash_remove(xhash *hash, const char *name)
558 hash_item *hi, **phi;
560 phi = &(hash->items[ hashidx(name) % hash->csize ]);
563 if (strcmp(hi->name, name) == 0) {
564 hash->glen -= (strlen(name) + 1);
574 /* ------ some useful functions ------ */
576 static void skip_spaces(char **s)
580 while (*p == ' ' || *p == '\t' ||
581 (*p == '\\' && *(p+1) == '\n' && (++p, ++t.lineno))) {
587 static char *nextword(char **s)
591 while (*(*s)++) /* */;
596 static char nextchar(char **s)
602 if (c == '\\') c = bb_process_escape_sequence((const char**)s);
603 if (c == '\\' && *s == pps) c = *((*s)++);
607 static int ATTRIBUTE_ALWAYS_INLINE isalnum_(int c)
609 return (isalnum(c) || c == '_');
612 static FILE *afopen(const char *path, const char *mode)
614 return (*path == '-' && *(path+1) == '\0') ? stdin : xfopen(path, mode);
617 /* -------- working with variables (set/get/copy/etc) -------- */
619 static xhash *iamarray(var *v)
623 while (a->type & VF_CHILD)
626 if (! (a->type & VF_ARRAY)) {
628 a->x.array = hash_init();
633 static void clear_array(xhash *array)
638 for (i=0; i<array->csize; i++) {
639 hi = array->items[i];
643 free(thi->data.v.string);
646 array->items[i] = NULL;
648 array->glen = array->nel = 0;
651 /* clear a variable */
652 static var *clrvar(var *v)
654 if (!(v->type & VF_FSTR))
657 v->type &= VF_DONTTOUCH;
663 /* assign string value to variable */
664 static var *setvar_p(var *v, char *value)
673 /* same as setvar_p but make a copy of string */
674 static var *setvar_s(var *v, const char *value)
676 return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
679 /* same as setvar_s but set USER flag */
680 static var *setvar_u(var *v, const char *value)
687 /* set array element to user string */
688 static void setari_u(var *a, int idx, const char *s)
691 static char sidx[12];
693 sprintf(sidx, "%d", idx);
694 v = findvar(iamarray(a), sidx);
698 /* assign numeric value to variable */
699 static var *setvar_i(var *v, double value)
702 v->type |= VF_NUMBER;
708 static const char *getvar_s(var *v)
710 /* if v is numeric and has no cached string, convert it to string */
711 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
712 fmt_num(buf, MAXVARFMT, getvar_s(V[CONVFMT]), v->number, TRUE);
713 v->string = xstrdup(buf);
714 v->type |= VF_CACHED;
716 return (v->string == NULL) ? "" : v->string;
719 static double getvar_i(var *v)
723 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
727 v->number = strtod(s, &s);
728 if (v->type & VF_USER) {
736 v->type |= VF_CACHED;
741 static var *copyvar(var *dest, const var *src)
745 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
746 dest->number = src->number;
748 dest->string = xstrdup(src->string);
750 handle_special(dest);
754 static var *incvar(var *v)
756 return setvar_i(v, getvar_i(v)+1.);
759 /* return true if v is number or numeric string */
760 static int is_numeric(var *v)
763 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
766 /* return 1 when value of v corresponds to true, 0 otherwise */
767 static int istrue(var *v)
770 return (v->number == 0) ? 0 : 1;
772 return (v->string && *(v->string)) ? 1 : 0;
775 /* temporary variables allocator. Last allocated should be first freed */
776 static var *nvalloc(int n)
784 if ((cb->pos - cb->nv) + n <= cb->size) break;
789 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
790 cb = xmalloc(sizeof(nvblock) + size * sizeof(var));
795 if (pb) pb->next = cb;
801 while (v < cb->pos) {
810 static void nvfree(var *v)
814 if (v < cb->nv || v >= cb->pos)
815 runtime_error(EMSG_INTERNAL_ERROR);
817 for (p=v; p<cb->pos; p++) {
818 if ((p->type & (VF_ARRAY|VF_CHILD)) == VF_ARRAY) {
819 clear_array(iamarray(p));
820 free(p->x.array->items);
823 if (p->type & VF_WALK)
830 while (cb->prev && cb->pos == cb->nv) {
835 /* ------- awk program text parsing ------- */
837 /* Parse next token pointed by global pos, place results into global t.
838 * If token isn't expected, give away. Return token class
840 static uint32_t next_token(uint32_t expected)
842 static int concat_inserted;
843 static uint32_t save_tclass, save_info;
844 static uint32_t ltclass = TC_OPTERM;
855 } else if (concat_inserted) {
856 concat_inserted = FALSE;
857 t.tclass = save_tclass;
866 while (*p != '\n' && *p != '\0') p++;
874 } else if (*p == '\"') {
878 if (*p == '\0' || *p == '\n')
879 syntax_error(EMSG_UNEXP_EOS);
880 *(s++) = nextchar(&p);
886 } else if ((expected & TC_REGEXP) && *p == '/') {
890 if (*p == '\0' || *p == '\n')
891 syntax_error(EMSG_UNEXP_EOS);
892 if ((*s++ = *p++) == '\\') {
894 *(s-1) = bb_process_escape_sequence((const char **)&p);
895 if (*pp == '\\') *s++ = '\\';
896 if (p == pp) *s++ = *p++;
903 } else if (*p == '.' || isdigit(*p)) {
905 t.number = strtod(p, &p);
907 syntax_error(EMSG_UNEXP_TOKEN);
911 /* search for something known */
921 /* if token class is expected, token
922 * matches and it's not a longer word,
923 * then this is what we are looking for
925 if ((tc & (expected | TC_WORD | TC_NEWLINE)) &&
926 *tl == *p && strncmp(p, tl, l) == 0 &&
927 !((tc & TC_WORD) && isalnum_(*(p + l)))) {
937 /* it's a name (var/array/function),
938 * otherwise it's something wrong
941 syntax_error(EMSG_UNEXP_TOKEN);
944 while (isalnum_(*(++p))) {
949 /* also consume whitespace between functionname and bracket */
950 if (!(expected & TC_VARIABLE)) skip_spaces(&p);
963 /* skipping newlines in some cases */
964 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
967 /* insert concatenation operator when needed */
968 if ((ltclass&TC_CONCAT1) && (tc&TC_CONCAT2) && (expected&TC_BINOP)) {
969 concat_inserted = TRUE;
973 t.info = OC_CONCAT | SS | P(35);
980 /* Are we ready for this? */
981 if (! (ltclass & expected))
982 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
983 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
988 static void rollback_token(void) { t.rollback = TRUE; }
990 static node *new_node(uint32_t info)
994 n = xzalloc(sizeof(node));
1000 static node *mk_re_node(const char *s, node *n, regex_t *re)
1002 n->info = OC_REGEXP;
1005 xregcomp(re, s, REG_EXTENDED);
1006 xregcomp(re+1, s, REG_EXTENDED | REG_ICASE);
1011 static node *condition(void)
1013 next_token(TC_SEQSTART);
1014 return parse_expr(TC_SEQTERM);
1017 /* parse expression terminated by given argument, return ptr
1018 * to built subtree. Terminator is eaten by parse_expr */
1019 static node *parse_expr(uint32_t iexp)
1028 sn.r.n = glptr = NULL;
1029 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1031 while (! ((tc = next_token(xtc)) & iexp)) {
1032 if (glptr && (t.info == (OC_COMPARE|VV|P(39)|2))) {
1033 /* input redirection (<) attached to glptr node */
1034 cn = glptr->l.n = new_node(OC_CONCAT|SS|P(37));
1036 xtc = TC_OPERAND | TC_UOPPRE;
1039 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1040 /* for binary and postfix-unary operators, jump back over
1041 * previous operators with higher priority */
1043 while ( ((t.info & PRIMASK) > (vn->a.n->info & PRIMASK2)) ||
1044 ((t.info == vn->info) && ((t.info & OPCLSMASK) == OC_COLON)) )
1046 if ((t.info & OPCLSMASK) == OC_TERNARY)
1048 cn = vn->a.n->r.n = new_node(t.info);
1050 if (tc & TC_BINOP) {
1052 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1053 if ((t.info & OPCLSMASK) == OC_PGETLINE) {
1055 next_token(TC_GETLINE);
1056 /* give maximum priority to this pipe */
1057 cn->info &= ~PRIMASK;
1058 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1062 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1067 /* for operands and prefix-unary operators, attach them
1070 cn = vn->r.n = new_node(t.info);
1072 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1073 if (tc & (TC_OPERAND | TC_REGEXP)) {
1074 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1075 /* one should be very careful with switch on tclass -
1076 * only simple tclasses should be used! */
1081 if ((v = hash_search(ahash, t.string)) != NULL) {
1082 cn->info = OC_FNARG;
1083 cn->l.i = v->x.aidx;
1085 cn->l.v = newvar(t.string);
1087 if (tc & TC_ARRAY) {
1089 cn->r.n = parse_expr(TC_ARRTERM);
1096 v = cn->l.v = xzalloc(sizeof(var));
1098 setvar_i(v, t.number);
1100 setvar_s(v, t.string);
1104 mk_re_node(t.string, cn, xzalloc(sizeof(regex_t)*2));
1109 cn->r.f = newfunc(t.string);
1110 cn->l.n = condition();
1114 cn = vn->r.n = parse_expr(TC_SEQTERM);
1120 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1124 cn->l.n = condition();
1133 /* add node to chain. Return ptr to alloc'd node */
1134 static node *chain_node(uint32_t info)
1139 seq->first = seq->last = new_node(0);
1141 if (seq->programname != programname) {
1142 seq->programname = programname;
1143 n = chain_node(OC_NEWSOURCE);
1144 n->l.s = xstrdup(programname);
1149 seq->last = n->a.n = new_node(OC_DONE);
1154 static void chain_expr(uint32_t info)
1158 n = chain_node(info);
1159 n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1160 if (t.tclass & TC_GRPTERM)
1164 static node *chain_loop(node *nn)
1166 node *n, *n2, *save_brk, *save_cont;
1168 save_brk = break_ptr;
1169 save_cont = continue_ptr;
1171 n = chain_node(OC_BR | Vx);
1172 continue_ptr = new_node(OC_EXEC);
1173 break_ptr = new_node(OC_EXEC);
1175 n2 = chain_node(OC_EXEC | Vx);
1178 continue_ptr->a.n = n2;
1179 break_ptr->a.n = n->r.n = seq->last;
1181 continue_ptr = save_cont;
1182 break_ptr = save_brk;
1187 /* parse group and attach it to chain */
1188 static void chain_group(void)
1194 c = next_token(TC_GRPSEQ);
1195 } while (c & TC_NEWLINE);
1197 if (c & TC_GRPSTART) {
1198 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1199 if (t.tclass & TC_NEWLINE) continue;
1203 } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1205 chain_expr(OC_EXEC | Vx);
1206 } else { /* TC_STATEMNT */
1207 switch (t.info & OPCLSMASK) {
1209 n = chain_node(OC_BR | Vx);
1210 n->l.n = condition();
1212 n2 = chain_node(OC_EXEC);
1214 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE)==TC_ELSE) {
1216 n2->a.n = seq->last;
1224 n = chain_loop(NULL);
1229 n2 = chain_node(OC_EXEC);
1230 n = chain_loop(NULL);
1232 next_token(TC_WHILE);
1233 n->l.n = condition();
1237 next_token(TC_SEQSTART);
1238 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1239 if (t.tclass & TC_SEQTERM) { /* for-in */
1240 if ((n2->info & OPCLSMASK) != OC_IN)
1241 syntax_error(EMSG_UNEXP_TOKEN);
1242 n = chain_node(OC_WALKINIT | VV);
1245 n = chain_loop(NULL);
1246 n->info = OC_WALKNEXT | Vx;
1248 } else { /* for (;;) */
1249 n = chain_node(OC_EXEC | Vx);
1251 n2 = parse_expr(TC_SEMICOL);
1252 n3 = parse_expr(TC_SEQTERM);
1262 n = chain_node(t.info);
1263 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1264 if (t.tclass & TC_OUTRDR) {
1266 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1268 if (t.tclass & TC_GRPTERM)
1273 n = chain_node(OC_EXEC);
1278 n = chain_node(OC_EXEC);
1279 n->a.n = continue_ptr;
1282 /* delete, next, nextfile, return, exit */
1289 static void parse_program(char *p)
1298 while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1299 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1301 if (tclass & TC_OPTERM)
1305 if (tclass & TC_BEGIN) {
1309 } else if (tclass & TC_END) {
1313 } else if (tclass & TC_FUNCDECL) {
1314 next_token(TC_FUNCTION);
1316 f = newfunc(t.string);
1317 f->body.first = NULL;
1319 while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1320 v = findvar(ahash, t.string);
1321 v->x.aidx = (f->nargs)++;
1323 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1330 } else if (tclass & TC_OPSEQ) {
1332 cn = chain_node(OC_TEST);
1333 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1334 if (t.tclass & TC_GRPSTART) {
1338 chain_node(OC_PRINT);
1340 cn->r.n = mainseq.last;
1342 } else /* if (tclass & TC_GRPSTART) */ {
1350 /* -------- program execution part -------- */
1352 static node *mk_splitter(const char *s, tsplitter *spl)
1360 if ((n->info & OPCLSMASK) == OC_REGEXP) {
1364 if (strlen(s) > 1) {
1365 mk_re_node(s, n, re);
1367 n->info = (uint32_t) *s;
1373 /* use node as a regular expression. Supplied with node ptr and regex_t
1374 * storage space. Return ptr to regex (if result points to preg, it should
1375 * be later regfree'd manually
1377 static regex_t *as_regex(node *op, regex_t *preg)
1382 if ((op->info & OPCLSMASK) == OC_REGEXP) {
1383 return icase ? op->r.ire : op->l.re;
1386 s = getvar_s(evaluate(op, v));
1387 xregcomp(preg, s, icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED);
1393 /* gradually increasing buffer */
1394 static void qrealloc(char **b, int n, int *size)
1396 if (!*b || n >= *size)
1397 *b = xrealloc(*b, *size = n + (n>>1) + 80);
1400 /* resize field storage space */
1401 static void fsrealloc(int size)
1403 static int maxfields; /* = 0;*/
1406 if (size >= maxfields) {
1408 maxfields = size + 16;
1409 Fields = xrealloc(Fields, maxfields * sizeof(var));
1410 for (; i < maxfields; i++) {
1411 Fields[i].type = VF_SPECIAL;
1412 Fields[i].string = NULL;
1416 if (size < nfields) {
1417 for (i = size; i < nfields; i++) {
1424 static int awk_split(const char *s, node *spl, char **slist)
1429 regmatch_t pmatch[2];
1431 /* in worst case, each char would be a separate field */
1432 *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1435 c[0] = c[1] = (char)spl->info;
1437 if (*getvar_s(V[RS]) == '\0') c[2] = '\n';
1439 if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */
1441 l = strcspn(s, c+2);
1442 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1443 && pmatch[0].rm_so <= l
1445 l = pmatch[0].rm_so;
1446 if (pmatch[0].rm_eo == 0) { l++; pmatch[0].rm_eo++; }
1448 pmatch[0].rm_eo = l;
1449 if (s[l]) pmatch[0].rm_eo++;
1455 s += pmatch[0].rm_eo;
1458 } else if (c[0] == '\0') { /* null split */
1464 } else if (c[0] != ' ') { /* single-character split */
1466 c[0] = toupper(c[0]);
1467 c[1] = tolower(c[1]);
1470 while ((s1 = strpbrk(s1, c))) {
1474 } else { /* space split */
1476 s = skip_whitespace(s);
1479 while (*s && !isspace(*s))
1487 static void split_f0(void)
1489 static char *fstrings = NULL;
1499 n = awk_split(getvar_s(V[F0]), &fsplitter.n, &fstrings);
1502 for (i = 0; i < n; i++) {
1503 Fields[i].string = nextword(&s);
1504 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1507 /* set NF manually to avoid side effects */
1509 V[NF]->type = VF_NUMBER | VF_SPECIAL;
1510 V[NF]->number = nfields;
1513 /* perform additional actions when some internal variables changed */
1514 static void handle_special(var *v)
1518 const char *sep, *s;
1519 int sl, l, len, i, bsize;
1521 if (!(v->type & VF_SPECIAL))
1525 n = (int)getvar_i(v);
1528 /* recalculate $0 */
1529 sep = getvar_s(V[OFS]);
1533 for (i=0; i<n; i++) {
1534 s = getvar_s(&Fields[i]);
1537 memcpy(b+len, sep, sl);
1540 qrealloc(&b, len+l+sl, &bsize);
1541 memcpy(b+len, s, l);
1549 } else if (v == V[F0]) {
1550 is_f0_split = FALSE;
1552 } else if (v == V[FS]) {
1553 mk_splitter(getvar_s(v), &fsplitter);
1555 } else if (v == V[RS]) {
1556 mk_splitter(getvar_s(v), &rsplitter);
1558 } else if (v == V[IGNORECASE]) {
1562 n = getvar_i(V[NF]);
1563 setvar_i(V[NF], n > v-Fields ? n : v-Fields+1);
1564 /* right here v is invalid. Just to note... */
1568 /* step through func/builtin/etc arguments */
1569 static node *nextarg(node **pn)
1574 if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1583 static void hashwalk_init(var *v, xhash *array)
1589 if (v->type & VF_WALK)
1593 w = v->x.walker = xzalloc(2 + 2*sizeof(char *) + array->glen);
1594 *w = *(w+1) = (char *)(w + 2);
1595 for (i=0; i<array->csize; i++) {
1596 hi = array->items[i];
1598 strcpy(*w, hi->name);
1605 static int hashwalk_next(var *v)
1613 setvar_s(v, nextword(w+1));
1617 /* evaluate node, return 1 when result is true, 0 otherwise */
1618 static int ptest(node *pattern)
1620 static var v; /* static: to save stack space? */
1622 return istrue(evaluate(pattern, &v));
1625 /* read next record from stream rsm into a variable v */
1626 static int awk_getline(rstream *rsm, var *v)
1629 regmatch_t pmatch[2];
1630 int a, p, pp=0, size;
1631 int fd, so, eo, r, rp;
1634 /* we're using our own buffer since we need access to accumulating
1637 fd = fileno(rsm->F);
1642 c = (char) rsplitter.n.info;
1645 if (! m) qrealloc(&m, 256, &size);
1651 if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1652 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1653 b, 1, pmatch, 0) == 0) {
1654 so = pmatch[0].rm_so;
1655 eo = pmatch[0].rm_eo;
1659 } else if (c != '\0') {
1660 s = strchr(b+pp, c);
1661 if (! s) s = memchr(b+pp, '\0', p - pp);
1668 while (b[rp] == '\n')
1670 s = strstr(b+rp, "\n\n");
1673 while (b[eo] == '\n') eo++;
1681 memmove(m, (const void *)(m+a), p+1);
1686 qrealloc(&m, a+p+128, &size);
1689 p += safe_read(fd, b+p, size-p-1);
1693 setvar_i(V[ERRNO], errno);
1702 c = b[so]; b[so] = '\0';
1706 c = b[eo]; b[eo] = '\0';
1707 setvar_s(V[RT], b+so);
1719 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1723 const char *s = format;
1725 if (int_as_int && n == (int)n) {
1726 r = snprintf(b, size, "%d", (int)n);
1728 do { c = *s; } while (c && *++s);
1729 if (strchr("diouxX", c)) {
1730 r = snprintf(b, size, format, (int)n);
1731 } else if (strchr("eEfgG", c)) {
1732 r = snprintf(b, size, format, n);
1734 runtime_error(EMSG_INV_FMT);
1741 /* formatted output into an allocated buffer, return ptr to buffer */
1742 static char *awk_printf(node *n)
1747 int i, j, incr, bsize;
1752 fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1757 while (*f && (*f != '%' || *(++f) == '%'))
1759 while (*f && !isalpha(*f))
1762 incr = (f - s) + MAXVARFMT;
1763 qrealloc(&b, incr + i, &bsize);
1768 arg = evaluate(nextarg(&n), v);
1771 if (c == 'c' || !c) {
1772 i += sprintf(b+i, s, is_numeric(arg) ?
1773 (char)getvar_i(arg) : *getvar_s(arg));
1775 } else if (c == 's') {
1777 qrealloc(&b, incr+i+strlen(s1), &bsize);
1778 i += sprintf(b+i, s, s1);
1781 i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1785 /* if there was an error while sprintf, return value is negative */
1789 b = xrealloc(b, i + 1);
1796 /* common substitution routine
1797 * replace (nm) substring of (src) that match (n) with (repl), store
1798 * result into (dest), return number of substitutions. If nm=0, replace
1799 * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1800 * subexpression matching (\1-\9)
1802 static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int ex)
1807 int c, i, j, di, rl, so, eo, nbs, n, dssize;
1808 regmatch_t pmatch[10];
1811 re = as_regex(rn, &sreg);
1812 if (! src) src = V[F0];
1813 if (! dest) dest = V[F0];
1818 while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0:REG_NOTBOL) == 0) {
1819 so = pmatch[0].rm_so;
1820 eo = pmatch[0].rm_eo;
1822 qrealloc(&ds, di + eo + rl, &dssize);
1823 memcpy(ds + di, sp, eo);
1829 for (s = repl; *s; s++) {
1835 if (c == '&' || (ex && c >= '0' && c <= '9')) {
1836 di -= ((nbs + 3) >> 1);
1845 n = pmatch[j].rm_eo - pmatch[j].rm_so;
1846 qrealloc(&ds, di + rl + n, &dssize);
1847 memcpy(ds + di, sp + pmatch[j].rm_so, n);
1858 if (! (ds[di++] = *sp++)) break;
1862 qrealloc(&ds, di + strlen(sp), &dssize);
1863 strcpy(ds + di, sp);
1865 if (re == &sreg) regfree(re);
1869 static var *exec_builtin(node *op, var *res)
1876 regmatch_t pmatch[2];
1878 static tsplitter tspl;
1887 isr = info = op->info;
1890 av[2] = av[3] = NULL;
1891 for (i=0 ; i<4 && op ; i++) {
1892 an[i] = nextarg(&op);
1893 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
1894 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
1899 if (nargs < (info >> 30))
1900 runtime_error(EMSG_TOO_FEW_ARGS);
1902 switch (info & OPNMASK) {
1905 #if ENABLE_FEATURE_AWK_MATH
1906 setvar_i(res, atan2(getvar_i(av[i]), getvar_i(av[1])));
1908 runtime_error(EMSG_NO_MATH);
1914 spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
1915 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
1920 n = awk_split(as[0], spl, &s);
1922 clear_array(iamarray(av[1]));
1923 for (i=1; i<=n; i++)
1924 setari_u(av[1], i, nextword(&s1));
1931 i = getvar_i(av[1]) - 1;
1932 if (i>l) i=l; if (i<0) i=0;
1933 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
1936 strncpy(s, as[0]+i, n);
1942 setvar_i(res, (long)getvar_i(av[0]) & (long)getvar_i(av[1]));
1946 setvar_i(res, ~(long)getvar_i(av[0]));
1950 setvar_i(res, (long)getvar_i(av[0]) << (long)getvar_i(av[1]));
1954 setvar_i(res, (long)getvar_i(av[0]) | (long)getvar_i(av[1]));
1958 setvar_i(res, (long)((unsigned long)getvar_i(av[0]) >> (unsigned long)getvar_i(av[1])));
1962 setvar_i(res, (long)getvar_i(av[0]) ^ (long)getvar_i(av[1]));
1972 s1 = s = xstrdup(as[0]);
1974 *s1 = (*to_xxx)(*s1);
1983 l = strlen(as[0]) - ll;
1984 if (ll > 0 && l >= 0) {
1986 s = strstr(as[0], as[1]);
1987 if (s) n = (s - as[0]) + 1;
1989 /* this piece of code is terribly slow and
1990 * really should be rewritten
1992 for (i=0; i<=l; i++) {
1993 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2005 tt = getvar_i(av[1]);
2008 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2009 i = strftime(buf, MAXVARFMT,
2010 ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2017 re = as_regex(an[1], &sreg);
2018 n = regexec(re, as[0], 1, pmatch, 0);
2023 pmatch[0].rm_so = 0;
2024 pmatch[0].rm_eo = -1;
2026 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2027 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2028 setvar_i(res, pmatch[0].rm_so);
2029 if (re == &sreg) regfree(re);
2033 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2037 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2041 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2050 * Evaluate node - the heart of the program. Supplied with subtree
2051 * and place where to store result. returns ptr to result.
2053 #define XC(n) ((n) >> 8)
2055 static var *evaluate(node *op, var *res)
2057 /* This procedure is recursive so we should count every byte */
2058 static var *fnargs = NULL;
2059 static unsigned seed = 1;
2060 static regex_t sreg;
2082 return setvar_s(res, NULL);
2089 opn = (short)(opinfo & OPNMASK);
2090 lineno = op->lineno;
2092 /* execute inevitable things */
2094 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2095 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2096 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2097 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2098 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2100 switch (XC(opinfo & OPCLSMASK)) {
2102 /* -- iterative node type -- */
2106 if ((op1->info & OPCLSMASK) == OC_COMMA) {
2107 /* it's range pattern */
2108 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2109 op->info |= OF_CHECKED;
2110 if (ptest(op1->r.n))
2111 op->info &= ~OF_CHECKED;
2118 op = (ptest(op1)) ? op->a.n : op->r.n;
2122 /* just evaluate an expression, also used as unconditional jump */
2126 /* branch, used in if-else and various loops */
2128 op = istrue(L.v) ? op->a.n : op->r.n;
2131 /* initialize for-in loop */
2132 case XC( OC_WALKINIT ):
2133 hashwalk_init(L.v, iamarray(R.v));
2136 /* get next array item */
2137 case XC( OC_WALKNEXT ):
2138 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2141 case XC( OC_PRINT ):
2142 case XC( OC_PRINTF ):
2145 X.rsm = newfile(R.s);
2148 X.rsm->F = popen(R.s, "w");
2149 if (X.rsm->F == NULL)
2150 bb_perror_msg_and_die("popen");
2153 X.rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2159 if ((opinfo & OPCLSMASK) == OC_PRINT) {
2161 fputs(getvar_s(V[F0]), X.F);
2164 L.v = evaluate(nextarg(&op1), v1);
2165 if (L.v->type & VF_NUMBER) {
2166 fmt_num(buf, MAXVARFMT, getvar_s(V[OFMT]),
2167 getvar_i(L.v), TRUE);
2170 fputs(getvar_s(L.v), X.F);
2173 if (op1) fputs(getvar_s(V[OFS]), X.F);
2176 fputs(getvar_s(V[ORS]), X.F);
2178 } else { /* OC_PRINTF */
2179 L.s = awk_printf(op1);
2186 case XC( OC_DELETE ):
2187 X.info = op1->info & OPCLSMASK;
2188 if (X.info == OC_VAR) {
2190 } else if (X.info == OC_FNARG) {
2191 R.v = &fnargs[op1->l.i];
2193 runtime_error(EMSG_NOT_ARRAY);
2198 L.s = getvar_s(evaluate(op1->r.n, v1));
2199 hash_remove(iamarray(R.v), L.s);
2201 clear_array(iamarray(R.v));
2205 case XC( OC_NEWSOURCE ):
2206 programname = op->l.s;
2209 case XC( OC_RETURN ):
2213 case XC( OC_NEXTFILE ):
2224 /* -- recursive node type -- */
2232 case XC( OC_FNARG ):
2233 L.v = &fnargs[op->l.i];
2235 res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2239 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2242 case XC( OC_REGEXP ):
2244 L.s = getvar_s(V[F0]);
2247 case XC( OC_MATCH ):
2250 X.re = as_regex(op1, &sreg);
2251 R.i = regexec(X.re, L.s, 0, NULL, 0);
2252 if (X.re == &sreg) regfree(X.re);
2253 setvar_i(res, (R.i == 0 ? 1 : 0) ^ (opn == '!' ? 1 : 0));
2257 /* if source is a temporary string, jusk relink it to dest */
2258 if (R.v == v1+1 && R.v->string) {
2259 res = setvar_p(L.v, R.v->string);
2262 res = copyvar(L.v, R.v);
2266 case XC( OC_TERNARY ):
2267 if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2268 runtime_error(EMSG_POSSIBLE_ERROR);
2269 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2273 if (! op->r.f->body.first)
2274 runtime_error(EMSG_UNDEF_FUNC);
2276 X.v = R.v = nvalloc(op->r.f->nargs+1);
2278 L.v = evaluate(nextarg(&op1), v1);
2280 R.v->type |= VF_CHILD;
2281 R.v->x.parent = L.v;
2282 if (++R.v - X.v >= op->r.f->nargs)
2290 res = evaluate(op->r.f->body.first, res);
2297 case XC( OC_GETLINE ):
2298 case XC( OC_PGETLINE ):
2300 X.rsm = newfile(L.s);
2302 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2303 X.rsm->F = popen(L.s, "r");
2304 X.rsm->is_pipe = TRUE;
2306 X.rsm->F = fopen(L.s, "r"); /* not xfopen! */
2310 if (! iF) iF = next_input_file();
2315 setvar_i(V[ERRNO], errno);
2323 L.i = awk_getline(X.rsm, R.v);
2333 /* simple builtins */
2334 case XC( OC_FBLTIN ):
2342 R.d = (double)rand() / (double)RAND_MAX;
2345 #if ENABLE_FEATURE_AWK_MATH
2371 runtime_error(EMSG_NO_MATH);
2377 seed = op1 ? (unsigned)L.d : (unsigned)time(NULL);
2387 L.s = getvar_s(V[F0]);
2393 R.d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2394 ? (system(L.s) >> 8) : 0;
2402 X.rsm = newfile(L.s);
2411 X.rsm = (rstream *)hash_search(fdhash, L.s);
2413 R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
2414 free(X.rsm->buffer);
2415 hash_remove(fdhash, L.s);
2418 setvar_i(V[ERRNO], errno);
2425 case XC( OC_BUILTIN ):
2426 res = exec_builtin(op, res);
2429 case XC( OC_SPRINTF ):
2430 setvar_p(res, awk_printf(op1));
2433 case XC( OC_UNARY ):
2435 L.d = R.d = getvar_i(R.v);
2450 L.d = istrue(X.v) ? 0 : 1;
2461 case XC( OC_FIELD ):
2462 R.i = (int)getvar_i(R.v);
2470 res = &Fields[R.i-1];
2474 /* concatenation (" ") and index joining (",") */
2475 case XC( OC_CONCAT ):
2476 case XC( OC_COMMA ):
2477 opn = strlen(L.s) + strlen(R.s) + 2;
2480 if ((opinfo & OPCLSMASK) == OC_COMMA) {
2481 L.s = getvar_s(V[SUBSEP]);
2482 X.s = xrealloc(X.s, opn + strlen(L.s));
2490 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2494 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2497 case XC( OC_BINARY ):
2498 case XC( OC_REPLACE ):
2499 R.d = getvar_i(R.v);
2511 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2515 #if ENABLE_FEATURE_AWK_MATH
2516 L.d = pow(L.d, R.d);
2518 runtime_error(EMSG_NO_MATH);
2522 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2523 L.d -= (int)(L.d / R.d) * R.d;
2526 res = setvar_i(((opinfo&OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2529 case XC( OC_COMPARE ):
2530 if (is_numeric(L.v) && is_numeric(R.v)) {
2531 L.d = getvar_i(L.v) - getvar_i(R.v);
2533 L.s = getvar_s(L.v);
2534 R.s = getvar_s(R.v);
2535 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2537 switch (opn & 0xfe) {
2548 setvar_i(res, (opn & 0x1 ? R.i : !R.i) ? 1 : 0);
2552 runtime_error(EMSG_POSSIBLE_ERROR);
2554 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2556 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2566 /* -------- main & co. -------- */
2568 static int awk_exit(int r)
2579 evaluate(endseq.first, &tv);
2582 /* waiting for children */
2583 for (i = 0; i < fdhash->csize; i++) {
2584 hi = fdhash->items[i];
2586 if (hi->data.rs.F && hi->data.rs.is_pipe)
2587 pclose(hi->data.rs.F);
2595 /* if expr looks like "var=value", perform assignment and return 1,
2596 * otherwise return 0 */
2597 static int is_assignment(const char *expr)
2599 char *exprc, *s, *s0, *s1;
2601 exprc = xstrdup(expr);
2602 if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2610 *(s1++) = nextchar(&s);
2613 setvar_u(newvar(exprc), s0);
2618 /* switch to next input file */
2619 static rstream *next_input_file(void)
2623 const char *fname, *ind;
2624 static int files_happen = FALSE;
2626 if (rsm.F) fclose(rsm.F);
2628 rsm.pos = rsm.adv = 0;
2631 if (getvar_i(V[ARGIND])+1 >= getvar_i(V[ARGC])) {
2637 ind = getvar_s(incvar(V[ARGIND]));
2638 fname = getvar_s(findvar(iamarray(V[ARGV]), ind));
2639 if (fname && *fname && !is_assignment(fname))
2640 F = afopen(fname, "r");
2644 files_happen = TRUE;
2645 setvar_s(V[FILENAME], fname);
2650 int awk_main(int argc, char **argv);
2651 int awk_main(int argc, char **argv)
2654 char *opt_F, *opt_W;
2655 llist_t *opt_v = NULL;
2660 char *vnames = (char *)vNames; /* cheat */
2661 char *vvalues = (char *)vValues;
2663 /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
2664 * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
2665 if (ENABLE_LOCALE_SUPPORT)
2666 setlocale(LC_NUMERIC, "C");
2670 /* allocate global buffer */
2671 buf = xmalloc(MAXVARFMT + 1);
2673 vhash = hash_init();
2674 ahash = hash_init();
2675 fdhash = hash_init();
2676 fnhash = hash_init();
2678 /* initialize variables */
2679 for (i = 0; *vnames; i++) {
2680 V[i] = v = newvar(nextword(&vnames));
2681 if (*vvalues != '\377')
2682 setvar_s(v, nextword(&vvalues));
2686 if (*vnames == '*') {
2687 v->type |= VF_SPECIAL;
2692 handle_special(V[FS]);
2693 handle_special(V[RS]);
2695 newfile("/dev/stdin")->F = stdin;
2696 newfile("/dev/stdout")->F = stdout;
2697 newfile("/dev/stderr")->F = stderr;
2699 for (envp = environ; *envp; envp++) {
2700 char *s = xstrdup(*envp);
2701 char *s1 = strchr(s, '=');
2704 setvar_u(findvar(iamarray(V[ENVIRON]), s), s1);
2708 opt_complementary = "v::";
2709 opt = getopt32(argc, argv, "F:v:f:W:", &opt_F, &opt_v, &programname, &opt_W);
2712 if (opt & 0x1) setvar_s(V[FS], opt_F); // -F
2713 while (opt_v) { /* -v */
2714 if (!is_assignment(llist_pop(&opt_v)))
2717 if (opt & 0x4) { // -f
2718 char *s = s; /* die, gcc, die */
2719 FILE *from_file = afopen(programname, "r");
2720 /* one byte is reserved for some trick in next_token */
2721 if (fseek(from_file, 0, SEEK_END) == 0) {
2722 flen = ftell(from_file);
2723 s = xmalloc(flen + 4);
2724 fseek(from_file, 0, SEEK_SET);
2725 i = 1 + fread(s + 1, 1, flen, from_file);
2727 for (i = j = 1; j > 0; i += j) {
2728 s = xrealloc(s, i + 4096);
2729 j = fread(s + i, 1, 4094, from_file);
2734 parse_program(s + 1);
2736 } else { // no -f: take program from 1st parameter
2739 programname = "cmd. line";
2740 parse_program(*argv++);
2743 if (opt & 0x8) // -W
2744 bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
2746 /* fill in ARGV array */
2747 setvar_i(V[ARGC], argc + 1);
2748 setari_u(V[ARGV], 0, "awk");
2751 setari_u(V[ARGV], ++i, *argv++);
2753 evaluate(beginseq.first, &tv);
2754 if (!mainseq.first && !endseq.first)
2755 awk_exit(EXIT_SUCCESS);
2757 /* input file could already be opened in BEGIN block */
2758 if (!iF) iF = next_input_file();
2760 /* passing through input files */
2763 setvar_i(V[FNR], 0);
2765 while ((i = awk_getline(iF, V[F0])) > 0) {
2769 evaluate(mainseq.first, &tv);
2776 runtime_error(strerror(errno));
2778 iF = next_input_file();
2781 awk_exit(EXIT_SUCCESS);