1 /* vi: set sw=4 ts=4: */
3 * awk implementation for busybox
5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
7 * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
19 #define VF_NUMBER 0x0001 /* 1 = primary type is number */
20 #define VF_ARRAY 0x0002 /* 1 = it's an array */
22 #define VF_CACHED 0x0100 /* 1 = num/str value has cached str/num eq */
23 #define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
24 #define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
25 #define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
26 #define VF_FSTR 0x1000 /* 1 = string points to fstring buffer */
27 #define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
28 #define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
30 /* these flags are static, don't change them when value is changed */
31 #define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
34 typedef struct var_s {
35 unsigned short type; /* flags */
39 int aidx; /* func arg idx (for compilation stage) */
40 struct xhash_s *array; /* array ptr */
41 struct var_s *parent; /* for func args, ptr to actual parameter */
42 char **walker; /* list of array elements (for..in) */
46 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
47 typedef struct chain_s {
50 const char *programname;
54 typedef struct func_s {
60 typedef struct rstream_s {
66 unsigned short is_pipe;
69 typedef struct hash_item_s {
71 struct var_s v; /* variable/array hash */
72 struct rstream_s rs; /* redirect streams hash */
73 struct func_s f; /* functions hash */
75 struct hash_item_s *next; /* next in chain */
76 char name[1]; /* really it's longer */
79 typedef struct xhash_s {
80 unsigned nel; /* num of elements */
81 unsigned csize; /* current hash size */
82 unsigned nprime; /* next hash size in PRIMES[] */
83 unsigned glen; /* summary length of item names */
84 struct hash_item_s **items;
88 typedef struct node_s {
90 unsigned short lineno;
109 /* Block of temporary variables */
110 typedef struct nvblock_s {
113 struct nvblock_s *prev;
114 struct nvblock_s *next;
118 typedef struct tsplitter_s {
123 /* simple token classes */
124 /* Order and hex values are very important!!! See next_token() */
125 #define TC_SEQSTART 1 /* ( */
126 #define TC_SEQTERM (1 << 1) /* ) */
127 #define TC_REGEXP (1 << 2) /* /.../ */
128 #define TC_OUTRDR (1 << 3) /* | > >> */
129 #define TC_UOPPOST (1 << 4) /* unary postfix operator */
130 #define TC_UOPPRE1 (1 << 5) /* unary prefix operator */
131 #define TC_BINOPX (1 << 6) /* two-opnd operator */
132 #define TC_IN (1 << 7)
133 #define TC_COMMA (1 << 8)
134 #define TC_PIPE (1 << 9) /* input redirection pipe */
135 #define TC_UOPPRE2 (1 << 10) /* unary prefix operator */
136 #define TC_ARRTERM (1 << 11) /* ] */
137 #define TC_GRPSTART (1 << 12) /* { */
138 #define TC_GRPTERM (1 << 13) /* } */
139 #define TC_SEMICOL (1 << 14)
140 #define TC_NEWLINE (1 << 15)
141 #define TC_STATX (1 << 16) /* ctl statement (for, next...) */
142 #define TC_WHILE (1 << 17)
143 #define TC_ELSE (1 << 18)
144 #define TC_BUILTIN (1 << 19)
145 #define TC_GETLINE (1 << 20)
146 #define TC_FUNCDECL (1 << 21) /* `function' `func' */
147 #define TC_BEGIN (1 << 22)
148 #define TC_END (1 << 23)
149 #define TC_EOF (1 << 24)
150 #define TC_VARIABLE (1 << 25)
151 #define TC_ARRAY (1 << 26)
152 #define TC_FUNCTION (1 << 27)
153 #define TC_STRING (1 << 28)
154 #define TC_NUMBER (1 << 29)
156 #define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
158 /* combined token classes */
159 #define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
160 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
161 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION | \
162 TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
164 #define TC_STATEMNT (TC_STATX | TC_WHILE)
165 #define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
167 /* word tokens, cannot mean something else if not expected */
168 #define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN | \
169 TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
171 /* discard newlines after these */
172 #define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM | \
173 TC_BINOP | TC_OPTERM)
175 /* what can expression begin with */
176 #define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
177 /* what can group begin with */
178 #define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
180 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
181 /* operator is inserted between them */
182 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM | \
183 TC_STRING | TC_NUMBER | TC_UOPPOST)
184 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
186 #define OF_RES1 0x010000
187 #define OF_RES2 0x020000
188 #define OF_STR1 0x040000
189 #define OF_STR2 0x080000
190 #define OF_NUM1 0x100000
191 #define OF_CHECKED 0x200000
193 /* combined operator flags */
196 #define xS (OF_RES2 | OF_STR2)
198 #define VV (OF_RES1 | OF_RES2)
199 #define Nx (OF_RES1 | OF_NUM1)
200 #define NV (OF_RES1 | OF_NUM1 | OF_RES2)
201 #define Sx (OF_RES1 | OF_STR1)
202 #define SV (OF_RES1 | OF_STR1 | OF_RES2)
203 #define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
205 #define OPCLSMASK 0xFF00
206 #define OPNMASK 0x007F
208 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
209 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
210 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
212 #define P(x) (x << 24)
213 #define PRIMASK 0x7F000000
214 #define PRIMASK2 0x7E000000
216 /* Operation classes */
218 #define SHIFT_TIL_THIS 0x0600
219 #define RECUR_FROM_THIS 0x1000
222 OC_DELETE=0x0100, OC_EXEC=0x0200, OC_NEWSOURCE=0x0300,
223 OC_PRINT=0x0400, OC_PRINTF=0x0500, OC_WALKINIT=0x0600,
225 OC_BR=0x0700, OC_BREAK=0x0800, OC_CONTINUE=0x0900,
226 OC_EXIT=0x0a00, OC_NEXT=0x0b00, OC_NEXTFILE=0x0c00,
227 OC_TEST=0x0d00, OC_WALKNEXT=0x0e00,
229 OC_BINARY=0x1000, OC_BUILTIN=0x1100, OC_COLON=0x1200,
230 OC_COMMA=0x1300, OC_COMPARE=0x1400, OC_CONCAT=0x1500,
231 OC_FBLTIN=0x1600, OC_FIELD=0x1700, OC_FNARG=0x1800,
232 OC_FUNC=0x1900, OC_GETLINE=0x1a00, OC_IN=0x1b00,
233 OC_LAND=0x1c00, OC_LOR=0x1d00, OC_MATCH=0x1e00,
234 OC_MOVE=0x1f00, OC_PGETLINE=0x2000, OC_REGEXP=0x2100,
235 OC_REPLACE=0x2200, OC_RETURN=0x2300, OC_SPRINTF=0x2400,
236 OC_TERNARY=0x2500, OC_UNARY=0x2600, OC_VAR=0x2700,
239 ST_IF=0x3000, ST_DO=0x3100, ST_FOR=0x3200,
243 /* simple builtins */
245 F_in=0, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
246 F_ti, F_le, F_sy, F_ff, F_cl
251 B_a2=0, B_ix, B_ma, B_sp, B_ss, B_ti, B_lo, B_up,
253 B_an, B_co, B_ls, B_or, B_rs, B_xo,
256 /* tokens and their corresponding info values */
258 #define NTC "\377" /* switch to next token class (tc<<1) */
261 #define OC_B OC_BUILTIN
263 static const char tokenlist[] =
266 "\1/" NTC /* REGEXP */
267 "\2>>" "\1>" "\1|" NTC /* OUTRDR */
268 "\2++" "\2--" NTC /* UOPPOST */
269 "\2++" "\2--" "\1$" NTC /* UOPPRE1 */
270 "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */
271 "\2*=" "\2/=" "\2%=" "\2^="
272 "\1+" "\1-" "\3**=" "\2**"
273 "\1/" "\1%" "\1^" "\1*"
274 "\2!=" "\2>=" "\2<=" "\1>"
275 "\1<" "\2!~" "\1~" "\2&&"
276 "\2||" "\1?" "\1:" NTC
280 "\1+" "\1-" "\1!" NTC /* UOPPRE2 */
286 "\2if" "\2do" "\3for" "\5break" /* STATX */
287 "\10continue" "\6delete" "\5print"
288 "\6printf" "\4next" "\10nextfile"
289 "\6return" "\4exit" NTC
293 "\3and" "\5compl" "\6lshift" "\2or"
295 "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */
296 "\3cos" "\3exp" "\3int" "\3log"
297 "\4rand" "\3sin" "\4sqrt" "\5srand"
298 "\6gensub" "\4gsub" "\5index" "\6length"
299 "\5match" "\5split" "\7sprintf" "\3sub"
300 "\6substr" "\7systime" "\10strftime"
301 "\7tolower" "\7toupper" NTC
303 "\4func" "\10function" NTC
308 static const uint32_t tokeninfo[] = {
312 xS|'a', xS|'w', xS|'|',
313 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
314 OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M',
316 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74),
317 OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
318 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/',
319 OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
320 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-',
321 OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
322 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%',
323 OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
324 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3,
325 OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
326 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!',
327 OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
328 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?',
329 OC_COLON|xx|P(67)|':',
332 OC_PGETLINE|SV|P(37),
333 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-',
334 OC_UNARY|xV|P(19)|'!',
340 ST_IF, ST_DO, ST_FOR, OC_BREAK,
341 OC_CONTINUE, OC_DELETE|Vx, OC_PRINT,
342 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
343 OC_RETURN|Vx, OC_EXIT|Nx,
347 OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
348 OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
349 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
350 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
351 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
352 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
353 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
354 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b),
355 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
362 /* internal variable names and their initial values */
363 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
365 CONVFMT=0, OFMT, FS, OFS,
366 ORS, RS, RT, FILENAME,
367 SUBSEP, ARGIND, ARGC, ARGV,
370 ENVIRON, F0, _intvarcount_
373 static const char vNames[] =
374 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
375 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
376 "SUBSEP\0" "ARGIND\0" "ARGC\0" "ARGV\0"
378 "NR\0" "NF\0*" "IGNORECASE\0*"
379 "ENVIRON\0" "$\0*" "\0";
381 static const char vValues[] =
382 "%.6g\0" "%.6g\0" " \0" " \0"
383 "\n\0" "\n\0" "\0" "\0"
387 /* hash size may grow to these values */
388 #define FIRST_PRIME 61;
389 static const unsigned PRIMES[] = { 251, 1021, 4093, 16381, 65521 };
390 enum { NPRIMES = sizeof(PRIMES) / sizeof(unsigned) };
394 extern char **environ;
396 static var * V[_intvarcount_];
397 static chain beginseq, mainseq, endseq, *seq;
398 static int nextrec, nextfile;
399 static node *break_ptr, *continue_ptr;
401 static xhash *vhash, *ahash, *fdhash, *fnhash;
402 static const char *programname;
404 static int is_f0_split;
407 static tsplitter fsplitter, rsplitter;
423 /* function prototypes */
424 static void handle_special(var *);
425 static node *parse_expr(uint32_t);
426 static void chain_group(void);
427 static var *evaluate(node *, var *);
428 static rstream *next_input_file(void);
429 static int fmt_num(char *, int, const char *, double, int);
430 static int awk_exit(int) ATTRIBUTE_NORETURN;
432 /* ---- error handling ---- */
434 static const char EMSG_INTERNAL_ERROR[] = "Internal error";
435 static const char EMSG_UNEXP_EOS[] = "Unexpected end of string";
436 static const char EMSG_UNEXP_TOKEN[] = "Unexpected token";
437 static const char EMSG_DIV_BY_ZERO[] = "Division by zero";
438 static const char EMSG_INV_FMT[] = "Invalid format specifier";
439 static const char EMSG_TOO_FEW_ARGS[] = "Too few arguments for builtin";
440 static const char EMSG_NOT_ARRAY[] = "Not an array";
441 static const char EMSG_POSSIBLE_ERROR[] = "Possible syntax error";
442 static const char EMSG_UNDEF_FUNC[] = "Call to undefined function";
443 #if !ENABLE_FEATURE_AWK_MATH
444 static const char EMSG_NO_MATH[] = "Math support is not compiled in";
447 static void zero_out_var(var * vp)
449 memset(vp, 0, sizeof(*vp));
452 static void syntax_error(const char * const message) ATTRIBUTE_NORETURN;
453 static void syntax_error(const char * const message)
455 bb_error_msg_and_die("%s:%i: %s", programname, lineno, message);
458 #define runtime_error(x) syntax_error(x)
461 /* ---- hash stuff ---- */
463 static unsigned hashidx(const char *name)
467 while (*name) idx = *name++ + (idx << 6) - idx;
471 /* create new hash */
472 static xhash *hash_init(void)
476 newhash = xzalloc(sizeof(xhash));
477 newhash->csize = FIRST_PRIME;
478 newhash->items = xzalloc(newhash->csize * sizeof(hash_item *));
483 /* find item in hash, return ptr to data, NULL if not found */
484 static void *hash_search(xhash *hash, const char *name)
488 hi = hash->items [ hashidx(name) % hash->csize ];
490 if (strcmp(hi->name, name) == 0)
497 /* grow hash if it becomes too big */
498 static void hash_rebuild(xhash *hash)
500 unsigned newsize, i, idx;
501 hash_item **newitems, *hi, *thi;
503 if (hash->nprime == NPRIMES)
506 newsize = PRIMES[hash->nprime++];
507 newitems = xzalloc(newsize * sizeof(hash_item *));
509 for (i=0; i<hash->csize; i++) {
514 idx = hashidx(thi->name) % newsize;
515 thi->next = newitems[idx];
521 hash->csize = newsize;
522 hash->items = newitems;
525 /* find item in hash, add it if necessary. Return ptr to data */
526 static void *hash_find(xhash *hash, const char *name)
532 hi = hash_search(hash, name);
534 if (++hash->nel / hash->csize > 10)
537 l = strlen(name) + 1;
538 hi = xzalloc(sizeof(hash_item) + l);
539 memcpy(hi->name, name, l);
541 idx = hashidx(name) % hash->csize;
542 hi->next = hash->items[idx];
543 hash->items[idx] = hi;
549 #define findvar(hash, name) ((var*) hash_find((hash) , (name)))
550 #define newvar(name) ((var*) hash_find(vhash , (name)))
551 #define newfile(name) ((rstream*)hash_find(fdhash ,(name)))
552 #define newfunc(name) ((func*) hash_find(fnhash , (name)))
554 static void hash_remove(xhash *hash, const char *name)
556 hash_item *hi, **phi;
558 phi = &(hash->items[ hashidx(name) % hash->csize ]);
561 if (strcmp(hi->name, name) == 0) {
562 hash->glen -= (strlen(name) + 1);
572 /* ------ some useful functions ------ */
574 static void skip_spaces(char **s)
578 while (*p == ' ' || *p == '\t' ||
579 (*p == '\\' && *(p+1) == '\n' && (++p, ++t.lineno))) {
585 static char *nextword(char **s)
589 while (*(*s)++) /* */;
594 static char nextchar(char **s)
600 if (c == '\\') c = bb_process_escape_sequence((const char**)s);
601 if (c == '\\' && *s == pps) c = *((*s)++);
605 static int ATTRIBUTE_ALWAYS_INLINE isalnum_(int c)
607 return (isalnum(c) || c == '_');
610 static FILE *afopen(const char *path, const char *mode)
612 return (*path == '-' && *(path+1) == '\0') ? stdin : xfopen(path, mode);
615 /* -------- working with variables (set/get/copy/etc) -------- */
617 static xhash *iamarray(var *v)
621 while (a->type & VF_CHILD)
624 if (! (a->type & VF_ARRAY)) {
626 a->x.array = hash_init();
631 static void clear_array(xhash *array)
636 for (i=0; i<array->csize; i++) {
637 hi = array->items[i];
641 free(thi->data.v.string);
644 array->items[i] = NULL;
646 array->glen = array->nel = 0;
649 /* clear a variable */
650 static var *clrvar(var *v)
652 if (!(v->type & VF_FSTR))
655 v->type &= VF_DONTTOUCH;
661 /* assign string value to variable */
662 static var *setvar_p(var *v, char *value)
671 /* same as setvar_p but make a copy of string */
672 static var *setvar_s(var *v, const char *value)
674 return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
677 /* same as setvar_s but set USER flag */
678 static var *setvar_u(var *v, const char *value)
685 /* set array element to user string */
686 static void setari_u(var *a, int idx, const char *s)
689 static char sidx[12];
691 sprintf(sidx, "%d", idx);
692 v = findvar(iamarray(a), sidx);
696 /* assign numeric value to variable */
697 static var *setvar_i(var *v, double value)
700 v->type |= VF_NUMBER;
706 static const char *getvar_s(var *v)
708 /* if v is numeric and has no cached string, convert it to string */
709 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
710 fmt_num(buf, MAXVARFMT, getvar_s(V[CONVFMT]), v->number, TRUE);
711 v->string = xstrdup(buf);
712 v->type |= VF_CACHED;
714 return (v->string == NULL) ? "" : v->string;
717 static double getvar_i(var *v)
721 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
725 v->number = strtod(s, &s);
726 if (v->type & VF_USER) {
734 v->type |= VF_CACHED;
739 static var *copyvar(var *dest, const var *src)
743 dest->type |= (src->type & ~VF_DONTTOUCH);
744 dest->number = src->number;
746 dest->string = xstrdup(src->string);
748 handle_special(dest);
752 static var *incvar(var *v)
754 return setvar_i(v, getvar_i(v)+1.);
757 /* return true if v is number or numeric string */
758 static int is_numeric(var *v)
761 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
764 /* return 1 when value of v corresponds to true, 0 otherwise */
765 static int istrue(var *v)
768 return (v->number == 0) ? 0 : 1;
770 return (v->string && *(v->string)) ? 1 : 0;
773 /* temporary variables allocator. Last allocated should be first freed */
774 static var *nvalloc(int n)
782 if ((cb->pos - cb->nv) + n <= cb->size) break;
787 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
788 cb = xmalloc(sizeof(nvblock) + size * sizeof(var));
793 if (pb) pb->next = cb;
799 while (v < cb->pos) {
808 static void nvfree(var *v)
812 if (v < cb->nv || v >= cb->pos)
813 runtime_error(EMSG_INTERNAL_ERROR);
815 for (p=v; p<cb->pos; p++) {
816 if ((p->type & (VF_ARRAY|VF_CHILD)) == VF_ARRAY) {
817 clear_array(iamarray(p));
818 free(p->x.array->items);
821 if (p->type & VF_WALK)
828 while (cb->prev && cb->pos == cb->nv) {
833 /* ------- awk program text parsing ------- */
835 /* Parse next token pointed by global pos, place results into global t.
836 * If token isn't expected, give away. Return token class
838 static uint32_t next_token(uint32_t expected)
840 static int concat_inserted;
841 static uint32_t save_tclass, save_info;
842 static uint32_t ltclass = TC_OPTERM;
853 } else if (concat_inserted) {
854 concat_inserted = FALSE;
855 t.tclass = save_tclass;
864 while (*p != '\n' && *p != '\0') p++;
872 } else if (*p == '\"') {
876 if (*p == '\0' || *p == '\n')
877 syntax_error(EMSG_UNEXP_EOS);
878 *(s++) = nextchar(&p);
884 } else if ((expected & TC_REGEXP) && *p == '/') {
888 if (*p == '\0' || *p == '\n')
889 syntax_error(EMSG_UNEXP_EOS);
890 if ((*s++ = *p++) == '\\') {
892 *(s-1) = bb_process_escape_sequence((const char **)&p);
893 if (*pp == '\\') *s++ = '\\';
894 if (p == pp) *s++ = *p++;
901 } else if (*p == '.' || isdigit(*p)) {
903 t.number = strtod(p, &p);
905 syntax_error(EMSG_UNEXP_TOKEN);
909 /* search for something known */
919 /* if token class is expected, token
920 * matches and it's not a longer word,
921 * then this is what we are looking for
923 if ((tc & (expected | TC_WORD | TC_NEWLINE)) &&
924 *tl == *p && strncmp(p, tl, l) == 0 &&
925 !((tc & TC_WORD) && isalnum_(*(p + l)))) {
935 /* it's a name (var/array/function),
936 * otherwise it's something wrong
939 syntax_error(EMSG_UNEXP_TOKEN);
942 while (isalnum_(*(++p))) {
947 /* also consume whitespace between functionname and bracket */
948 if (!(expected & TC_VARIABLE)) skip_spaces(&p);
961 /* skipping newlines in some cases */
962 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
965 /* insert concatenation operator when needed */
966 if ((ltclass&TC_CONCAT1) && (tc&TC_CONCAT2) && (expected&TC_BINOP)) {
967 concat_inserted = TRUE;
971 t.info = OC_CONCAT | SS | P(35);
978 /* Are we ready for this? */
979 if (! (ltclass & expected))
980 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
981 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
986 static void rollback_token(void) { t.rollback = TRUE; }
988 static node *new_node(uint32_t info)
992 n = xzalloc(sizeof(node));
998 static node *mk_re_node(const char *s, node *n, regex_t *re)
1000 n->info = OC_REGEXP;
1003 xregcomp(re, s, REG_EXTENDED);
1004 xregcomp(re+1, s, REG_EXTENDED | REG_ICASE);
1009 static node *condition(void)
1011 next_token(TC_SEQSTART);
1012 return parse_expr(TC_SEQTERM);
1015 /* parse expression terminated by given argument, return ptr
1016 * to built subtree. Terminator is eaten by parse_expr */
1017 static node *parse_expr(uint32_t iexp)
1026 sn.r.n = glptr = NULL;
1027 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1029 while (! ((tc = next_token(xtc)) & iexp)) {
1030 if (glptr && (t.info == (OC_COMPARE|VV|P(39)|2))) {
1031 /* input redirection (<) attached to glptr node */
1032 cn = glptr->l.n = new_node(OC_CONCAT|SS|P(37));
1034 xtc = TC_OPERAND | TC_UOPPRE;
1037 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1038 /* for binary and postfix-unary operators, jump back over
1039 * previous operators with higher priority */
1041 while ( ((t.info & PRIMASK) > (vn->a.n->info & PRIMASK2)) ||
1042 ((t.info == vn->info) && ((t.info & OPCLSMASK) == OC_COLON)) )
1044 if ((t.info & OPCLSMASK) == OC_TERNARY)
1046 cn = vn->a.n->r.n = new_node(t.info);
1048 if (tc & TC_BINOP) {
1050 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1051 if ((t.info & OPCLSMASK) == OC_PGETLINE) {
1053 next_token(TC_GETLINE);
1054 /* give maximum priority to this pipe */
1055 cn->info &= ~PRIMASK;
1056 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1060 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1065 /* for operands and prefix-unary operators, attach them
1068 cn = vn->r.n = new_node(t.info);
1070 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1071 if (tc & (TC_OPERAND | TC_REGEXP)) {
1072 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1073 /* one should be very careful with switch on tclass -
1074 * only simple tclasses should be used! */
1079 if ((v = hash_search(ahash, t.string)) != NULL) {
1080 cn->info = OC_FNARG;
1081 cn->l.i = v->x.aidx;
1083 cn->l.v = newvar(t.string);
1085 if (tc & TC_ARRAY) {
1087 cn->r.n = parse_expr(TC_ARRTERM);
1094 v = cn->l.v = xzalloc(sizeof(var));
1096 setvar_i(v, t.number);
1098 setvar_s(v, t.string);
1102 mk_re_node(t.string, cn, xzalloc(sizeof(regex_t)*2));
1107 cn->r.f = newfunc(t.string);
1108 cn->l.n = condition();
1112 cn = vn->r.n = parse_expr(TC_SEQTERM);
1118 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1122 cn->l.n = condition();
1131 /* add node to chain. Return ptr to alloc'd node */
1132 static node *chain_node(uint32_t info)
1137 seq->first = seq->last = new_node(0);
1139 if (seq->programname != programname) {
1140 seq->programname = programname;
1141 n = chain_node(OC_NEWSOURCE);
1142 n->l.s = xstrdup(programname);
1147 seq->last = n->a.n = new_node(OC_DONE);
1152 static void chain_expr(uint32_t info)
1156 n = chain_node(info);
1157 n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1158 if (t.tclass & TC_GRPTERM)
1162 static node *chain_loop(node *nn)
1164 node *n, *n2, *save_brk, *save_cont;
1166 save_brk = break_ptr;
1167 save_cont = continue_ptr;
1169 n = chain_node(OC_BR | Vx);
1170 continue_ptr = new_node(OC_EXEC);
1171 break_ptr = new_node(OC_EXEC);
1173 n2 = chain_node(OC_EXEC | Vx);
1176 continue_ptr->a.n = n2;
1177 break_ptr->a.n = n->r.n = seq->last;
1179 continue_ptr = save_cont;
1180 break_ptr = save_brk;
1185 /* parse group and attach it to chain */
1186 static void chain_group(void)
1192 c = next_token(TC_GRPSEQ);
1193 } while (c & TC_NEWLINE);
1195 if (c & TC_GRPSTART) {
1196 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1197 if (t.tclass & TC_NEWLINE) continue;
1201 } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1203 chain_expr(OC_EXEC | Vx);
1204 } else { /* TC_STATEMNT */
1205 switch (t.info & OPCLSMASK) {
1207 n = chain_node(OC_BR | Vx);
1208 n->l.n = condition();
1210 n2 = chain_node(OC_EXEC);
1212 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE)==TC_ELSE) {
1214 n2->a.n = seq->last;
1222 n = chain_loop(NULL);
1227 n2 = chain_node(OC_EXEC);
1228 n = chain_loop(NULL);
1230 next_token(TC_WHILE);
1231 n->l.n = condition();
1235 next_token(TC_SEQSTART);
1236 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1237 if (t.tclass & TC_SEQTERM) { /* for-in */
1238 if ((n2->info & OPCLSMASK) != OC_IN)
1239 syntax_error(EMSG_UNEXP_TOKEN);
1240 n = chain_node(OC_WALKINIT | VV);
1243 n = chain_loop(NULL);
1244 n->info = OC_WALKNEXT | Vx;
1246 } else { /* for (;;) */
1247 n = chain_node(OC_EXEC | Vx);
1249 n2 = parse_expr(TC_SEMICOL);
1250 n3 = parse_expr(TC_SEQTERM);
1260 n = chain_node(t.info);
1261 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1262 if (t.tclass & TC_OUTRDR) {
1264 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1266 if (t.tclass & TC_GRPTERM)
1271 n = chain_node(OC_EXEC);
1276 n = chain_node(OC_EXEC);
1277 n->a.n = continue_ptr;
1280 /* delete, next, nextfile, return, exit */
1287 static void parse_program(char *p)
1296 while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1297 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1299 if (tclass & TC_OPTERM)
1303 if (tclass & TC_BEGIN) {
1307 } else if (tclass & TC_END) {
1311 } else if (tclass & TC_FUNCDECL) {
1312 next_token(TC_FUNCTION);
1314 f = newfunc(t.string);
1315 f->body.first = NULL;
1317 while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1318 v = findvar(ahash, t.string);
1319 v->x.aidx = (f->nargs)++;
1321 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1328 } else if (tclass & TC_OPSEQ) {
1330 cn = chain_node(OC_TEST);
1331 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1332 if (t.tclass & TC_GRPSTART) {
1336 chain_node(OC_PRINT);
1338 cn->r.n = mainseq.last;
1340 } else /* if (tclass & TC_GRPSTART) */ {
1348 /* -------- program execution part -------- */
1350 static node *mk_splitter(const char *s, tsplitter *spl)
1358 if ((n->info & OPCLSMASK) == OC_REGEXP) {
1362 if (strlen(s) > 1) {
1363 mk_re_node(s, n, re);
1365 n->info = (uint32_t) *s;
1371 /* use node as a regular expression. Supplied with node ptr and regex_t
1372 * storage space. Return ptr to regex (if result points to preg, it should
1373 * be later regfree'd manually
1375 static regex_t *as_regex(node *op, regex_t *preg)
1380 if ((op->info & OPCLSMASK) == OC_REGEXP) {
1381 return icase ? op->r.ire : op->l.re;
1384 s = getvar_s(evaluate(op, v));
1385 xregcomp(preg, s, icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED);
1391 /* gradually increasing buffer */
1392 static void qrealloc(char **b, int n, int *size)
1394 if (!*b || n >= *size)
1395 *b = xrealloc(*b, *size = n + (n>>1) + 80);
1398 /* resize field storage space */
1399 static void fsrealloc(int size)
1401 static int maxfields; /* = 0;*/
1404 if (size >= maxfields) {
1406 maxfields = size + 16;
1407 Fields = xrealloc(Fields, maxfields * sizeof(var));
1408 for (; i < maxfields; i++) {
1409 Fields[i].type = VF_SPECIAL;
1410 Fields[i].string = NULL;
1414 if (size < nfields) {
1415 for (i = size; i < nfields; i++) {
1422 static int awk_split(const char *s, node *spl, char **slist)
1427 regmatch_t pmatch[2];
1429 /* in worst case, each char would be a separate field */
1430 *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1433 c[0] = c[1] = (char)spl->info;
1435 if (*getvar_s(V[RS]) == '\0') c[2] = '\n';
1437 if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */
1439 l = strcspn(s, c+2);
1440 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1441 && pmatch[0].rm_so <= l
1443 l = pmatch[0].rm_so;
1444 if (pmatch[0].rm_eo == 0) { l++; pmatch[0].rm_eo++; }
1446 pmatch[0].rm_eo = l;
1447 if (s[l]) pmatch[0].rm_eo++;
1453 s += pmatch[0].rm_eo;
1456 } else if (c[0] == '\0') { /* null split */
1462 } else if (c[0] != ' ') { /* single-character split */
1464 c[0] = toupper(c[0]);
1465 c[1] = tolower(c[1]);
1468 while ((s1 = strpbrk(s1, c))) {
1472 } else { /* space split */
1474 s = skip_whitespace(s);
1477 while (*s && !isspace(*s))
1485 static void split_f0(void)
1487 static char *fstrings = NULL;
1497 n = awk_split(getvar_s(V[F0]), &fsplitter.n, &fstrings);
1500 for (i = 0; i < n; i++) {
1501 Fields[i].string = nextword(&s);
1502 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1505 /* set NF manually to avoid side effects */
1507 V[NF]->type = VF_NUMBER | VF_SPECIAL;
1508 V[NF]->number = nfields;
1511 /* perform additional actions when some internal variables changed */
1512 static void handle_special(var *v)
1516 const char *sep, *s;
1517 int sl, l, len, i, bsize;
1519 if (!(v->type & VF_SPECIAL))
1523 n = (int)getvar_i(v);
1526 /* recalculate $0 */
1527 sep = getvar_s(V[OFS]);
1531 for (i=0; i<n; i++) {
1532 s = getvar_s(&Fields[i]);
1535 memcpy(b+len, sep, sl);
1538 qrealloc(&b, len+l+sl, &bsize);
1539 memcpy(b+len, s, l);
1547 } else if (v == V[F0]) {
1548 is_f0_split = FALSE;
1550 } else if (v == V[FS]) {
1551 mk_splitter(getvar_s(v), &fsplitter);
1553 } else if (v == V[RS]) {
1554 mk_splitter(getvar_s(v), &rsplitter);
1556 } else if (v == V[IGNORECASE]) {
1560 n = getvar_i(V[NF]);
1561 setvar_i(V[NF], n > v-Fields ? n : v-Fields+1);
1562 /* right here v is invalid. Just to note... */
1566 /* step through func/builtin/etc arguments */
1567 static node *nextarg(node **pn)
1572 if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1581 static void hashwalk_init(var *v, xhash *array)
1587 if (v->type & VF_WALK)
1591 w = v->x.walker = xzalloc(2 + 2*sizeof(char *) + array->glen);
1592 *w = *(w+1) = (char *)(w + 2);
1593 for (i=0; i<array->csize; i++) {
1594 hi = array->items[i];
1596 strcpy(*w, hi->name);
1603 static int hashwalk_next(var *v)
1611 setvar_s(v, nextword(w+1));
1615 /* evaluate node, return 1 when result is true, 0 otherwise */
1616 static int ptest(node *pattern)
1618 static var v; /* static: to save stack space? */
1620 return istrue(evaluate(pattern, &v));
1623 /* read next record from stream rsm into a variable v */
1624 static int awk_getline(rstream *rsm, var *v)
1627 regmatch_t pmatch[2];
1628 int a, p, pp=0, size;
1629 int fd, so, eo, r, rp;
1632 /* we're using our own buffer since we need access to accumulating
1635 fd = fileno(rsm->F);
1640 c = (char) rsplitter.n.info;
1643 if (! m) qrealloc(&m, 256, &size);
1649 if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1650 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1651 b, 1, pmatch, 0) == 0) {
1652 so = pmatch[0].rm_so;
1653 eo = pmatch[0].rm_eo;
1657 } else if (c != '\0') {
1658 s = strchr(b+pp, c);
1659 if (! s) s = memchr(b+pp, '\0', p - pp);
1666 while (b[rp] == '\n')
1668 s = strstr(b+rp, "\n\n");
1671 while (b[eo] == '\n') eo++;
1679 memmove(m, (const void *)(m+a), p+1);
1684 qrealloc(&m, a+p+128, &size);
1687 p += safe_read(fd, b+p, size-p-1);
1691 setvar_i(V[ERRNO], errno);
1700 c = b[so]; b[so] = '\0';
1704 c = b[eo]; b[eo] = '\0';
1705 setvar_s(V[RT], b+so);
1717 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1721 const char *s = format;
1723 if (int_as_int && n == (int)n) {
1724 r = snprintf(b, size, "%d", (int)n);
1726 do { c = *s; } while (c && *++s);
1727 if (strchr("diouxX", c)) {
1728 r = snprintf(b, size, format, (int)n);
1729 } else if (strchr("eEfgG", c)) {
1730 r = snprintf(b, size, format, n);
1732 runtime_error(EMSG_INV_FMT);
1739 /* formatted output into an allocated buffer, return ptr to buffer */
1740 static char *awk_printf(node *n)
1745 int i, j, incr, bsize;
1750 fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1755 while (*f && (*f != '%' || *(++f) == '%'))
1757 while (*f && !isalpha(*f))
1760 incr = (f - s) + MAXVARFMT;
1761 qrealloc(&b, incr + i, &bsize);
1766 arg = evaluate(nextarg(&n), v);
1769 if (c == 'c' || !c) {
1770 i += sprintf(b+i, s, is_numeric(arg) ?
1771 (char)getvar_i(arg) : *getvar_s(arg));
1773 } else if (c == 's') {
1775 qrealloc(&b, incr+i+strlen(s1), &bsize);
1776 i += sprintf(b+i, s, s1);
1779 i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1783 /* if there was an error while sprintf, return value is negative */
1787 b = xrealloc(b, i + 1);
1794 /* common substitution routine
1795 * replace (nm) substring of (src) that match (n) with (repl), store
1796 * result into (dest), return number of substitutions. If nm=0, replace
1797 * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1798 * subexpression matching (\1-\9)
1800 static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int ex)
1805 int c, i, j, di, rl, so, eo, nbs, n, dssize;
1806 regmatch_t pmatch[10];
1809 re = as_regex(rn, &sreg);
1810 if (! src) src = V[F0];
1811 if (! dest) dest = V[F0];
1816 while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0:REG_NOTBOL) == 0) {
1817 so = pmatch[0].rm_so;
1818 eo = pmatch[0].rm_eo;
1820 qrealloc(&ds, di + eo + rl, &dssize);
1821 memcpy(ds + di, sp, eo);
1827 for (s = repl; *s; s++) {
1833 if (c == '&' || (ex && c >= '0' && c <= '9')) {
1834 di -= ((nbs + 3) >> 1);
1843 n = pmatch[j].rm_eo - pmatch[j].rm_so;
1844 qrealloc(&ds, di + rl + n, &dssize);
1845 memcpy(ds + di, sp + pmatch[j].rm_so, n);
1856 if (! (ds[di++] = *sp++)) break;
1860 qrealloc(&ds, di + strlen(sp), &dssize);
1861 strcpy(ds + di, sp);
1863 if (re == &sreg) regfree(re);
1867 static var *exec_builtin(node *op, var *res)
1874 regmatch_t pmatch[2];
1876 static tsplitter tspl;
1885 isr = info = op->info;
1888 av[2] = av[3] = NULL;
1889 for (i=0 ; i<4 && op ; i++) {
1890 an[i] = nextarg(&op);
1891 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
1892 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
1897 if (nargs < (info >> 30))
1898 runtime_error(EMSG_TOO_FEW_ARGS);
1900 switch (info & OPNMASK) {
1903 #if ENABLE_FEATURE_AWK_MATH
1904 setvar_i(res, atan2(getvar_i(av[i]), getvar_i(av[1])));
1906 runtime_error(EMSG_NO_MATH);
1912 spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
1913 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
1918 n = awk_split(as[0], spl, &s);
1920 clear_array(iamarray(av[1]));
1921 for (i=1; i<=n; i++)
1922 setari_u(av[1], i, nextword(&s1));
1929 i = getvar_i(av[1]) - 1;
1930 if (i>l) i=l; if (i<0) i=0;
1931 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
1934 strncpy(s, as[0]+i, n);
1940 setvar_i(res, (long)getvar_i(av[0]) & (long)getvar_i(av[1]));
1944 setvar_i(res, ~(long)getvar_i(av[0]));
1948 setvar_i(res, (long)getvar_i(av[0]) << (long)getvar_i(av[1]));
1952 setvar_i(res, (long)getvar_i(av[0]) | (long)getvar_i(av[1]));
1956 setvar_i(res, (long)((unsigned long)getvar_i(av[0]) >> (unsigned long)getvar_i(av[1])));
1960 setvar_i(res, (long)getvar_i(av[0]) ^ (long)getvar_i(av[1]));
1970 s1 = s = xstrdup(as[0]);
1972 *s1 = (*to_xxx)(*s1);
1981 l = strlen(as[0]) - ll;
1982 if (ll > 0 && l >= 0) {
1984 s = strstr(as[0], as[1]);
1985 if (s) n = (s - as[0]) + 1;
1987 /* this piece of code is terribly slow and
1988 * really should be rewritten
1990 for (i=0; i<=l; i++) {
1991 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2003 tt = getvar_i(av[1]);
2006 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2007 i = strftime(buf, MAXVARFMT,
2008 ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2015 re = as_regex(an[1], &sreg);
2016 n = regexec(re, as[0], 1, pmatch, 0);
2021 pmatch[0].rm_so = 0;
2022 pmatch[0].rm_eo = -1;
2024 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2025 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2026 setvar_i(res, pmatch[0].rm_so);
2027 if (re == &sreg) regfree(re);
2031 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2035 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2039 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2048 * Evaluate node - the heart of the program. Supplied with subtree
2049 * and place where to store result. returns ptr to result.
2051 #define XC(n) ((n) >> 8)
2053 static var *evaluate(node *op, var *res)
2055 /* This procedure is recursive so we should count every byte */
2056 static var *fnargs = NULL;
2057 static unsigned seed = 1;
2058 static regex_t sreg;
2080 return setvar_s(res, NULL);
2087 opn = (short)(opinfo & OPNMASK);
2088 lineno = op->lineno;
2090 /* execute inevitable things */
2092 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2093 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2094 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2095 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2096 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2098 switch (XC(opinfo & OPCLSMASK)) {
2100 /* -- iterative node type -- */
2104 if ((op1->info & OPCLSMASK) == OC_COMMA) {
2105 /* it's range pattern */
2106 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2107 op->info |= OF_CHECKED;
2108 if (ptest(op1->r.n))
2109 op->info &= ~OF_CHECKED;
2116 op = (ptest(op1)) ? op->a.n : op->r.n;
2120 /* just evaluate an expression, also used as unconditional jump */
2124 /* branch, used in if-else and various loops */
2126 op = istrue(L.v) ? op->a.n : op->r.n;
2129 /* initialize for-in loop */
2130 case XC( OC_WALKINIT ):
2131 hashwalk_init(L.v, iamarray(R.v));
2134 /* get next array item */
2135 case XC( OC_WALKNEXT ):
2136 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2139 case XC( OC_PRINT ):
2140 case XC( OC_PRINTF ):
2143 X.rsm = newfile(R.s);
2146 if((X.rsm->F = popen(R.s, "w")) == NULL)
2147 bb_perror_msg_and_die("popen");
2150 X.rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2156 if ((opinfo & OPCLSMASK) == OC_PRINT) {
2158 fputs(getvar_s(V[F0]), X.F);
2161 L.v = evaluate(nextarg(&op1), v1);
2162 if (L.v->type & VF_NUMBER) {
2163 fmt_num(buf, MAXVARFMT, getvar_s(V[OFMT]),
2164 getvar_i(L.v), TRUE);
2167 fputs(getvar_s(L.v), X.F);
2170 if (op1) fputs(getvar_s(V[OFS]), X.F);
2173 fputs(getvar_s(V[ORS]), X.F);
2175 } else { /* OC_PRINTF */
2176 L.s = awk_printf(op1);
2183 case XC( OC_DELETE ):
2184 X.info = op1->info & OPCLSMASK;
2185 if (X.info == OC_VAR) {
2187 } else if (X.info == OC_FNARG) {
2188 R.v = &fnargs[op1->l.i];
2190 runtime_error(EMSG_NOT_ARRAY);
2195 L.s = getvar_s(evaluate(op1->r.n, v1));
2196 hash_remove(iamarray(R.v), L.s);
2198 clear_array(iamarray(R.v));
2202 case XC( OC_NEWSOURCE ):
2203 programname = op->l.s;
2206 case XC( OC_RETURN ):
2210 case XC( OC_NEXTFILE ):
2221 /* -- recursive node type -- */
2229 case XC( OC_FNARG ):
2230 L.v = &fnargs[op->l.i];
2232 res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2236 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2239 case XC( OC_REGEXP ):
2241 L.s = getvar_s(V[F0]);
2244 case XC( OC_MATCH ):
2247 X.re = as_regex(op1, &sreg);
2248 R.i = regexec(X.re, L.s, 0, NULL, 0);
2249 if (X.re == &sreg) regfree(X.re);
2250 setvar_i(res, (R.i == 0 ? 1 : 0) ^ (opn == '!' ? 1 : 0));
2254 /* if source is a temporary string, jusk relink it to dest */
2255 if (R.v == v1+1 && R.v->string) {
2256 res = setvar_p(L.v, R.v->string);
2259 res = copyvar(L.v, R.v);
2263 case XC( OC_TERNARY ):
2264 if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2265 runtime_error(EMSG_POSSIBLE_ERROR);
2266 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2270 if (! op->r.f->body.first)
2271 runtime_error(EMSG_UNDEF_FUNC);
2273 X.v = R.v = nvalloc(op->r.f->nargs+1);
2275 L.v = evaluate(nextarg(&op1), v1);
2277 R.v->type |= VF_CHILD;
2278 R.v->x.parent = L.v;
2279 if (++R.v - X.v >= op->r.f->nargs)
2287 res = evaluate(op->r.f->body.first, res);
2294 case XC( OC_GETLINE ):
2295 case XC( OC_PGETLINE ):
2297 X.rsm = newfile(L.s);
2299 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2300 X.rsm->F = popen(L.s, "r");
2301 X.rsm->is_pipe = TRUE;
2303 X.rsm->F = fopen(L.s, "r"); /* not xfopen! */
2307 if (! iF) iF = next_input_file();
2312 setvar_i(V[ERRNO], errno);
2320 L.i = awk_getline(X.rsm, R.v);
2330 /* simple builtins */
2331 case XC( OC_FBLTIN ):
2339 R.d = (double)rand() / (double)RAND_MAX;
2342 #if ENABLE_FEATURE_AWK_MATH
2368 runtime_error(EMSG_NO_MATH);
2374 seed = op1 ? (unsigned)L.d : (unsigned)time(NULL);
2384 L.s = getvar_s(V[F0]);
2390 R.d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2391 ? (system(L.s) >> 8) : 0;
2399 X.rsm = newfile(L.s);
2408 X.rsm = (rstream *)hash_search(fdhash, L.s);
2410 R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
2411 free(X.rsm->buffer);
2412 hash_remove(fdhash, L.s);
2415 setvar_i(V[ERRNO], errno);
2422 case XC( OC_BUILTIN ):
2423 res = exec_builtin(op, res);
2426 case XC( OC_SPRINTF ):
2427 setvar_p(res, awk_printf(op1));
2430 case XC( OC_UNARY ):
2432 L.d = R.d = getvar_i(R.v);
2447 L.d = istrue(X.v) ? 0 : 1;
2458 case XC( OC_FIELD ):
2459 R.i = (int)getvar_i(R.v);
2467 res = &Fields[R.i-1];
2471 /* concatenation (" ") and index joining (",") */
2472 case XC( OC_CONCAT ):
2473 case XC( OC_COMMA ):
2474 opn = strlen(L.s) + strlen(R.s) + 2;
2477 if ((opinfo & OPCLSMASK) == OC_COMMA) {
2478 L.s = getvar_s(V[SUBSEP]);
2479 X.s = xrealloc(X.s, opn + strlen(L.s));
2487 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2491 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2494 case XC( OC_BINARY ):
2495 case XC( OC_REPLACE ):
2496 R.d = getvar_i(R.v);
2508 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2512 #if ENABLE_FEATURE_AWK_MATH
2513 L.d = pow(L.d, R.d);
2515 runtime_error(EMSG_NO_MATH);
2519 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2520 L.d -= (int)(L.d / R.d) * R.d;
2523 res = setvar_i(((opinfo&OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2526 case XC( OC_COMPARE ):
2527 if (is_numeric(L.v) && is_numeric(R.v)) {
2528 L.d = getvar_i(L.v) - getvar_i(R.v);
2530 L.s = getvar_s(L.v);
2531 R.s = getvar_s(R.v);
2532 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2534 switch (opn & 0xfe) {
2545 setvar_i(res, (opn & 0x1 ? R.i : !R.i) ? 1 : 0);
2549 runtime_error(EMSG_POSSIBLE_ERROR);
2551 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2553 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2563 /* -------- main & co. -------- */
2565 static int awk_exit(int r)
2576 evaluate(endseq.first, &tv);
2579 /* waiting for children */
2580 for (i = 0; i < fdhash->csize; i++) {
2581 hi = fdhash->items[i];
2583 if (hi->data.rs.F && hi->data.rs.is_pipe)
2584 pclose(hi->data.rs.F);
2592 /* if expr looks like "var=value", perform assignment and return 1,
2593 * otherwise return 0 */
2594 static int is_assignment(const char *expr)
2596 char *exprc, *s, *s0, *s1;
2598 exprc = xstrdup(expr);
2599 if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2607 *(s1++) = nextchar(&s);
2610 setvar_u(newvar(exprc), s0);
2615 /* switch to next input file */
2616 static rstream *next_input_file(void)
2620 const char *fname, *ind;
2621 static int files_happen = FALSE;
2623 if (rsm.F) fclose(rsm.F);
2625 rsm.pos = rsm.adv = 0;
2628 if (getvar_i(V[ARGIND])+1 >= getvar_i(V[ARGC])) {
2634 ind = getvar_s(incvar(V[ARGIND]));
2635 fname = getvar_s(findvar(iamarray(V[ARGV]), ind));
2636 if (fname && *fname && !is_assignment(fname))
2637 F = afopen(fname, "r");
2641 files_happen = TRUE;
2642 setvar_s(V[FILENAME], fname);
2647 int awk_main(int argc, char **argv);
2648 int awk_main(int argc, char **argv)
2651 char *opt_F, *opt_v, *opt_W;
2656 char *vnames = (char *)vNames; /* cheat */
2657 char *vvalues = (char *)vValues;
2659 /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
2660 * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
2661 if (ENABLE_LOCALE_SUPPORT)
2662 setlocale(LC_NUMERIC, "C");
2666 /* allocate global buffer */
2667 buf = xmalloc(MAXVARFMT + 1);
2669 vhash = hash_init();
2670 ahash = hash_init();
2671 fdhash = hash_init();
2672 fnhash = hash_init();
2674 /* initialize variables */
2675 for (i = 0; *vnames; i++) {
2676 V[i] = v = newvar(nextword(&vnames));
2677 if (*vvalues != '\377')
2678 setvar_s(v, nextword(&vvalues));
2682 if (*vnames == '*') {
2683 v->type |= VF_SPECIAL;
2688 handle_special(V[FS]);
2689 handle_special(V[RS]);
2691 newfile("/dev/stdin")->F = stdin;
2692 newfile("/dev/stdout")->F = stdout;
2693 newfile("/dev/stderr")->F = stderr;
2695 for (envp = environ; *envp; envp++) {
2696 char *s = xstrdup(*envp);
2697 char *s1 = strchr(s, '=');
2700 setvar_u(findvar(iamarray(V[ENVIRON]), s), s1);
2705 opt = getopt32(argc, argv, "F:v:f:W:", &opt_F, &opt_v, &programname, &opt_W);
2708 if (opt & 0x1) setvar_s(V[FS], opt_F); // -F
2709 if (opt & 0x2) if (!is_assignment(opt_v)) bb_show_usage(); // -v
2710 if (opt & 0x4) { // -f
2711 char *s = s; /* die, gcc, die */
2712 FILE *from_file = afopen(programname, "r");
2713 /* one byte is reserved for some trick in next_token */
2714 if (fseek(from_file, 0, SEEK_END) == 0) {
2715 flen = ftell(from_file);
2716 s = xmalloc(flen + 4);
2717 fseek(from_file, 0, SEEK_SET);
2718 i = 1 + fread(s + 1, 1, flen, from_file);
2720 for (i = j = 1; j > 0; i += j) {
2721 s = xrealloc(s, i + 4096);
2722 j = fread(s + i, 1, 4094, from_file);
2727 parse_program(s + 1);
2729 } else { // no -f: take program from 1st parameter
2732 programname = "cmd. line";
2733 parse_program(*argv++);
2736 if (opt & 0x8) // -W
2737 bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
2739 /* fill in ARGV array */
2740 setvar_i(V[ARGC], argc + 1);
2741 setari_u(V[ARGV], 0, "awk");
2744 setari_u(V[ARGV], ++i, *argv++);
2746 evaluate(beginseq.first, &tv);
2747 if (!mainseq.first && !endseq.first)
2748 awk_exit(EXIT_SUCCESS);
2750 /* input file could already be opened in BEGIN block */
2751 if (!iF) iF = next_input_file();
2753 /* passing through input files */
2756 setvar_i(V[FNR], 0);
2758 while ((i = awk_getline(iF, V[F0])) > 0) {
2762 evaluate(mainseq.first, &tv);
2769 runtime_error(strerror(errno));
2771 iF = next_input_file();
2774 awk_exit(EXIT_SUCCESS);