1 /* vi: set sw=4 ts=4: */
3 * awk implementation for busybox
5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
7 * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
19 #define VF_NUMBER 0x0001 /* 1 = primary type is number */
20 #define VF_ARRAY 0x0002 /* 1 = it's an array */
22 #define VF_CACHED 0x0100 /* 1 = num/str value has cached str/num eq */
23 #define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
24 #define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
25 #define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
26 #define VF_FSTR 0x1000 /* 1 = string points to fstring buffer */
27 #define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
28 #define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
30 /* these flags are static, don't change them when value is changed */
31 #define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
34 typedef struct var_s {
35 unsigned short type; /* flags */
39 int aidx; /* func arg idx (for compilation stage) */
40 struct xhash_s *array; /* array ptr */
41 struct var_s *parent; /* for func args, ptr to actual parameter */
42 char **walker; /* list of array elements (for..in) */
46 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
47 typedef struct chain_s {
54 typedef struct func_s {
60 typedef struct rstream_s {
66 unsigned short is_pipe;
69 typedef struct hash_item_s {
71 struct var_s v; /* variable/array hash */
72 struct rstream_s rs; /* redirect streams hash */
73 struct func_s f; /* functions hash */
75 struct hash_item_s *next; /* next in chain */
76 char name[1]; /* really it's longer */
79 typedef struct xhash_s {
80 unsigned nel; /* num of elements */
81 unsigned csize; /* current hash size */
82 unsigned nprime; /* next hash size in PRIMES[] */
83 unsigned glen; /* summary length of item names */
84 struct hash_item_s **items;
88 typedef struct node_s {
90 unsigned short lineno;
109 /* Block of temporary variables */
110 typedef struct nvblock_s {
113 struct nvblock_s *prev;
114 struct nvblock_s *next;
118 typedef struct tsplitter_s {
123 /* simple token classes */
124 /* Order and hex values are very important!!! See next_token() */
125 #define TC_SEQSTART 1 /* ( */
126 #define TC_SEQTERM (1 << 1) /* ) */
127 #define TC_REGEXP (1 << 2) /* /.../ */
128 #define TC_OUTRDR (1 << 3) /* | > >> */
129 #define TC_UOPPOST (1 << 4) /* unary postfix operator */
130 #define TC_UOPPRE1 (1 << 5) /* unary prefix operator */
131 #define TC_BINOPX (1 << 6) /* two-opnd operator */
132 #define TC_IN (1 << 7)
133 #define TC_COMMA (1 << 8)
134 #define TC_PIPE (1 << 9) /* input redirection pipe */
135 #define TC_UOPPRE2 (1 << 10) /* unary prefix operator */
136 #define TC_ARRTERM (1 << 11) /* ] */
137 #define TC_GRPSTART (1 << 12) /* { */
138 #define TC_GRPTERM (1 << 13) /* } */
139 #define TC_SEMICOL (1 << 14)
140 #define TC_NEWLINE (1 << 15)
141 #define TC_STATX (1 << 16) /* ctl statement (for, next...) */
142 #define TC_WHILE (1 << 17)
143 #define TC_ELSE (1 << 18)
144 #define TC_BUILTIN (1 << 19)
145 #define TC_GETLINE (1 << 20)
146 #define TC_FUNCDECL (1 << 21) /* `function' `func' */
147 #define TC_BEGIN (1 << 22)
148 #define TC_END (1 << 23)
149 #define TC_EOF (1 << 24)
150 #define TC_VARIABLE (1 << 25)
151 #define TC_ARRAY (1 << 26)
152 #define TC_FUNCTION (1 << 27)
153 #define TC_STRING (1 << 28)
154 #define TC_NUMBER (1 << 29)
156 #define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
158 /* combined token classes */
159 #define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
160 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
161 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION | \
162 TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
164 #define TC_STATEMNT (TC_STATX | TC_WHILE)
165 #define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
167 /* word tokens, cannot mean something else if not expected */
168 #define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN | \
169 TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
171 /* discard newlines after these */
172 #define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM | \
173 TC_BINOP | TC_OPTERM)
175 /* what can expression begin with */
176 #define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
177 /* what can group begin with */
178 #define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
180 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
181 /* operator is inserted between them */
182 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM | \
183 TC_STRING | TC_NUMBER | TC_UOPPOST)
184 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
186 #define OF_RES1 0x010000
187 #define OF_RES2 0x020000
188 #define OF_STR1 0x040000
189 #define OF_STR2 0x080000
190 #define OF_NUM1 0x100000
191 #define OF_CHECKED 0x200000
193 /* combined operator flags */
196 #define xS (OF_RES2 | OF_STR2)
198 #define VV (OF_RES1 | OF_RES2)
199 #define Nx (OF_RES1 | OF_NUM1)
200 #define NV (OF_RES1 | OF_NUM1 | OF_RES2)
201 #define Sx (OF_RES1 | OF_STR1)
202 #define SV (OF_RES1 | OF_STR1 | OF_RES2)
203 #define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
205 #define OPCLSMASK 0xFF00
206 #define OPNMASK 0x007F
208 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
209 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
210 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
212 #define P(x) (x << 24)
213 #define PRIMASK 0x7F000000
214 #define PRIMASK2 0x7E000000
216 /* Operation classes */
218 #define SHIFT_TIL_THIS 0x0600
219 #define RECUR_FROM_THIS 0x1000
222 OC_DELETE=0x0100, OC_EXEC=0x0200, OC_NEWSOURCE=0x0300,
223 OC_PRINT=0x0400, OC_PRINTF=0x0500, OC_WALKINIT=0x0600,
225 OC_BR=0x0700, OC_BREAK=0x0800, OC_CONTINUE=0x0900,
226 OC_EXIT=0x0a00, OC_NEXT=0x0b00, OC_NEXTFILE=0x0c00,
227 OC_TEST=0x0d00, OC_WALKNEXT=0x0e00,
229 OC_BINARY=0x1000, OC_BUILTIN=0x1100, OC_COLON=0x1200,
230 OC_COMMA=0x1300, OC_COMPARE=0x1400, OC_CONCAT=0x1500,
231 OC_FBLTIN=0x1600, OC_FIELD=0x1700, OC_FNARG=0x1800,
232 OC_FUNC=0x1900, OC_GETLINE=0x1a00, OC_IN=0x1b00,
233 OC_LAND=0x1c00, OC_LOR=0x1d00, OC_MATCH=0x1e00,
234 OC_MOVE=0x1f00, OC_PGETLINE=0x2000, OC_REGEXP=0x2100,
235 OC_REPLACE=0x2200, OC_RETURN=0x2300, OC_SPRINTF=0x2400,
236 OC_TERNARY=0x2500, OC_UNARY=0x2600, OC_VAR=0x2700,
239 ST_IF=0x3000, ST_DO=0x3100, ST_FOR=0x3200,
243 /* simple builtins */
245 F_in=0, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
246 F_ti, F_le, F_sy, F_ff, F_cl
251 B_a2=0, B_ix, B_ma, B_sp, B_ss, B_ti, B_lo, B_up,
253 B_an, B_co, B_ls, B_or, B_rs, B_xo,
256 /* tokens and their corresponding info values */
258 #define NTC "\377" /* switch to next token class (tc<<1) */
261 #define OC_B OC_BUILTIN
263 static const char tokenlist[] =
266 "\1/" NTC /* REGEXP */
267 "\2>>" "\1>" "\1|" NTC /* OUTRDR */
268 "\2++" "\2--" NTC /* UOPPOST */
269 "\2++" "\2--" "\1$" NTC /* UOPPRE1 */
270 "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */
271 "\2*=" "\2/=" "\2%=" "\2^="
272 "\1+" "\1-" "\3**=" "\2**"
273 "\1/" "\1%" "\1^" "\1*"
274 "\2!=" "\2>=" "\2<=" "\1>"
275 "\1<" "\2!~" "\1~" "\2&&"
276 "\2||" "\1?" "\1:" NTC
280 "\1+" "\1-" "\1!" NTC /* UOPPRE2 */
286 "\2if" "\2do" "\3for" "\5break" /* STATX */
287 "\10continue" "\6delete" "\5print"
288 "\6printf" "\4next" "\10nextfile"
289 "\6return" "\4exit" NTC
293 "\3and" "\5compl" "\6lshift" "\2or"
295 "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */
296 "\3cos" "\3exp" "\3int" "\3log"
297 "\4rand" "\3sin" "\4sqrt" "\5srand"
298 "\6gensub" "\4gsub" "\5index" "\6length"
299 "\5match" "\5split" "\7sprintf" "\3sub"
300 "\6substr" "\7systime" "\10strftime"
301 "\7tolower" "\7toupper" NTC
303 "\4func" "\10function" NTC
308 static const uint32_t tokeninfo[] = {
312 xS|'a', xS|'w', xS|'|',
313 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
314 OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M',
316 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74),
317 OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
318 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/',
319 OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
320 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-',
321 OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
322 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%',
323 OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
324 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3,
325 OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
326 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!',
327 OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
328 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?',
329 OC_COLON|xx|P(67)|':',
332 OC_PGETLINE|SV|P(37),
333 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-',
334 OC_UNARY|xV|P(19)|'!',
340 ST_IF, ST_DO, ST_FOR, OC_BREAK,
341 OC_CONTINUE, OC_DELETE|Vx, OC_PRINT,
342 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
343 OC_RETURN|Vx, OC_EXIT|Nx,
347 OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
348 OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
349 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
350 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
351 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
352 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
353 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
354 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b),
355 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
362 /* internal variable names and their initial values */
363 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
365 CONVFMT=0, OFMT, FS, OFS,
366 ORS, RS, RT, FILENAME,
367 SUBSEP, ARGIND, ARGC, ARGV,
370 ENVIRON, F0, _intvarcount_
373 static const char vNames[] =
374 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
375 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
376 "SUBSEP\0" "ARGIND\0" "ARGC\0" "ARGV\0"
378 "NR\0" "NF\0*" "IGNORECASE\0*"
379 "ENVIRON\0" "$\0*" "\0";
381 static const char vValues[] =
382 "%.6g\0" "%.6g\0" " \0" " \0"
383 "\n\0" "\n\0" "\0" "\0"
387 /* hash size may grow to these values */
388 #define FIRST_PRIME 61;
389 static const unsigned PRIMES[] = { 251, 1021, 4093, 16381, 65521 };
390 enum { NPRIMES = sizeof(PRIMES) / sizeof(unsigned) };
394 extern char **environ;
396 static var * V[_intvarcount_];
397 static chain beginseq, mainseq, endseq, *seq;
398 static int nextrec, nextfile;
399 static node *break_ptr, *continue_ptr;
401 static xhash *vhash, *ahash, *fdhash, *fnhash;
402 static char *programname;
404 static int is_f0_split;
407 static tsplitter fsplitter, rsplitter;
423 /* function prototypes */
424 static void handle_special(var *);
425 static node *parse_expr(uint32_t);
426 static void chain_group(void);
427 static var *evaluate(node *, var *);
428 static rstream *next_input_file(void);
429 static int fmt_num(char *, int, const char *, double, int);
430 static int awk_exit(int) ATTRIBUTE_NORETURN;
432 /* ---- error handling ---- */
434 static const char EMSG_INTERNAL_ERROR[] = "Internal error";
435 static const char EMSG_UNEXP_EOS[] = "Unexpected end of string";
436 static const char EMSG_UNEXP_TOKEN[] = "Unexpected token";
437 static const char EMSG_DIV_BY_ZERO[] = "Division by zero";
438 static const char EMSG_INV_FMT[] = "Invalid format specifier";
439 static const char EMSG_TOO_FEW_ARGS[] = "Too few arguments for builtin";
440 static const char EMSG_NOT_ARRAY[] = "Not an array";
441 static const char EMSG_POSSIBLE_ERROR[] = "Possible syntax error";
442 static const char EMSG_UNDEF_FUNC[] = "Call to undefined function";
443 #if !ENABLE_FEATURE_AWK_MATH
444 static const char EMSG_NO_MATH[] = "Math support is not compiled in";
447 static void zero_out_var(var * vp)
449 memset(vp, 0, sizeof(*vp));
452 static void syntax_error(const char * const message) ATTRIBUTE_NORETURN;
453 static void syntax_error(const char * const message)
455 bb_error_msg_and_die("%s:%i: %s", programname, lineno, message);
458 #define runtime_error(x) syntax_error(x)
461 /* ---- hash stuff ---- */
463 static unsigned hashidx(const char *name)
467 while (*name) idx = *name++ + (idx << 6) - idx;
471 /* create new hash */
472 static xhash *hash_init(void)
476 newhash = xzalloc(sizeof(xhash));
477 newhash->csize = FIRST_PRIME;
478 newhash->items = xzalloc(newhash->csize * sizeof(hash_item *));
483 /* find item in hash, return ptr to data, NULL if not found */
484 static void *hash_search(xhash *hash, const char *name)
488 hi = hash->items [ hashidx(name) % hash->csize ];
490 if (strcmp(hi->name, name) == 0)
497 /* grow hash if it becomes too big */
498 static void hash_rebuild(xhash *hash)
500 unsigned newsize, i, idx;
501 hash_item **newitems, *hi, *thi;
503 if (hash->nprime == NPRIMES)
506 newsize = PRIMES[hash->nprime++];
507 newitems = xzalloc(newsize * sizeof(hash_item *));
509 for (i=0; i<hash->csize; i++) {
514 idx = hashidx(thi->name) % newsize;
515 thi->next = newitems[idx];
521 hash->csize = newsize;
522 hash->items = newitems;
525 /* find item in hash, add it if necessary. Return ptr to data */
526 static void *hash_find(xhash *hash, const char *name)
532 hi = hash_search(hash, name);
534 if (++hash->nel / hash->csize > 10)
537 l = strlen(name) + 1;
538 hi = xzalloc(sizeof(hash_item) + l);
539 memcpy(hi->name, name, l);
541 idx = hashidx(name) % hash->csize;
542 hi->next = hash->items[idx];
543 hash->items[idx] = hi;
549 #define findvar(hash, name) ((var*) hash_find((hash) , (name)))
550 #define newvar(name) ((var*) hash_find(vhash , (name)))
551 #define newfile(name) ((rstream*)hash_find(fdhash ,(name)))
552 #define newfunc(name) ((func*) hash_find(fnhash , (name)))
554 static void hash_remove(xhash *hash, const char *name)
556 hash_item *hi, **phi;
558 phi = &(hash->items[ hashidx(name) % hash->csize ]);
561 if (strcmp(hi->name, name) == 0) {
562 hash->glen -= (strlen(name) + 1);
572 /* ------ some useful functions ------ */
574 static void skip_spaces(char **s)
578 while (*p == ' ' || *p == '\t' ||
579 (*p == '\\' && *(p+1) == '\n' && (++p, ++t.lineno))) {
585 static char *nextword(char **s)
589 while (*(*s)++) /* */;
594 static char nextchar(char **s)
600 if (c == '\\') c = bb_process_escape_sequence((const char**)s);
601 if (c == '\\' && *s == pps) c = *((*s)++);
605 static int ATTRIBUTE_ALWAYS_INLINE isalnum_(int c)
607 return (isalnum(c) || c == '_');
610 static FILE *afopen(const char *path, const char *mode)
612 return (*path == '-' && *(path+1) == '\0') ? stdin : xfopen(path, mode);
615 /* -------- working with variables (set/get/copy/etc) -------- */
617 static xhash *iamarray(var *v)
621 while (a->type & VF_CHILD)
624 if (! (a->type & VF_ARRAY)) {
626 a->x.array = hash_init();
631 static void clear_array(xhash *array)
636 for (i=0; i<array->csize; i++) {
637 hi = array->items[i];
641 free(thi->data.v.string);
644 array->items[i] = NULL;
646 array->glen = array->nel = 0;
649 /* clear a variable */
650 static var *clrvar(var *v)
652 if (!(v->type & VF_FSTR))
655 v->type &= VF_DONTTOUCH;
661 /* assign string value to variable */
662 static var *setvar_p(var *v, char *value)
671 /* same as setvar_p but make a copy of string */
672 static var *setvar_s(var *v, const char *value)
674 return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
677 /* same as setvar_s but set USER flag */
678 static var *setvar_u(var *v, const char *value)
685 /* set array element to user string */
686 static void setari_u(var *a, int idx, const char *s)
689 static char sidx[12];
691 sprintf(sidx, "%d", idx);
692 v = findvar(iamarray(a), sidx);
696 /* assign numeric value to variable */
697 static var *setvar_i(var *v, double value)
700 v->type |= VF_NUMBER;
706 static char *getvar_s(var *v)
708 /* if v is numeric and has no cached string, convert it to string */
709 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
710 fmt_num(buf, MAXVARFMT, getvar_s(V[CONVFMT]), v->number, TRUE);
711 v->string = xstrdup(buf);
712 v->type |= VF_CACHED;
714 return (v->string == NULL) ? "" : v->string;
717 static double getvar_i(var *v)
721 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
725 v->number = strtod(s, &s);
726 if (v->type & VF_USER) {
734 v->type |= VF_CACHED;
739 static var *copyvar(var *dest, const var *src)
743 dest->type |= (src->type & ~VF_DONTTOUCH);
744 dest->number = src->number;
746 dest->string = xstrdup(src->string);
748 handle_special(dest);
752 static var *incvar(var *v)
754 return setvar_i(v, getvar_i(v)+1.);
757 /* return true if v is number or numeric string */
758 static int is_numeric(var *v)
761 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
764 /* return 1 when value of v corresponds to true, 0 otherwise */
765 static int istrue(var *v)
768 return (v->number == 0) ? 0 : 1;
770 return (v->string && *(v->string)) ? 1 : 0;
773 /* temporary variables allocator. Last allocated should be first freed */
774 static var *nvalloc(int n)
782 if ((cb->pos - cb->nv) + n <= cb->size) break;
787 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
788 cb = xmalloc(sizeof(nvblock) + size * sizeof(var));
793 if (pb) pb->next = cb;
799 while (v < cb->pos) {
808 static void nvfree(var *v)
812 if (v < cb->nv || v >= cb->pos)
813 runtime_error(EMSG_INTERNAL_ERROR);
815 for (p=v; p<cb->pos; p++) {
816 if ((p->type & (VF_ARRAY|VF_CHILD)) == VF_ARRAY) {
817 clear_array(iamarray(p));
818 free(p->x.array->items);
821 if (p->type & VF_WALK)
828 while (cb->prev && cb->pos == cb->nv) {
833 /* ------- awk program text parsing ------- */
835 /* Parse next token pointed by global pos, place results into global t.
836 * If token isn't expected, give away. Return token class
838 static uint32_t next_token(uint32_t expected)
840 static int concat_inserted;
841 static uint32_t save_tclass, save_info;
842 static uint32_t ltclass = TC_OPTERM;
854 } else if (concat_inserted) {
856 concat_inserted = FALSE;
857 t.tclass = save_tclass;
868 while (*p != '\n' && *p != '\0') p++;
876 } else if (*p == '\"') {
880 if (*p == '\0' || *p == '\n')
881 syntax_error(EMSG_UNEXP_EOS);
882 *(s++) = nextchar(&p);
888 } else if ((expected & TC_REGEXP) && *p == '/') {
892 if (*p == '\0' || *p == '\n')
893 syntax_error(EMSG_UNEXP_EOS);
894 if ((*s++ = *p++) == '\\') {
896 *(s-1) = bb_process_escape_sequence((const char **)&p);
897 if (*pp == '\\') *s++ = '\\';
898 if (p == pp) *s++ = *p++;
905 } else if (*p == '.' || isdigit(*p)) {
907 t.number = strtod(p, &p);
909 syntax_error(EMSG_UNEXP_TOKEN);
913 /* search for something known */
923 /* if token class is expected, token
924 * matches and it's not a longer word,
925 * then this is what we are looking for
927 if ((tc & (expected | TC_WORD | TC_NEWLINE)) &&
928 *tl == *p && strncmp(p, tl, l) == 0 &&
929 !((tc & TC_WORD) && isalnum_(*(p + l)))) {
939 /* it's a name (var/array/function),
940 * otherwise it's something wrong
943 syntax_error(EMSG_UNEXP_TOKEN);
946 while (isalnum_(*(++p))) {
951 /* also consume whitespace between functionname and bracket */
952 if (! (expected & TC_VARIABLE)) skip_spaces(&p);
965 /* skipping newlines in some cases */
966 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
969 /* insert concatenation operator when needed */
970 if ((ltclass&TC_CONCAT1) && (tc&TC_CONCAT2) && (expected&TC_BINOP)) {
971 concat_inserted = TRUE;
975 t.info = OC_CONCAT | SS | P(35);
982 /* Are we ready for this? */
983 if (! (ltclass & expected))
984 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
985 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
990 static void rollback_token(void) { t.rollback = TRUE; }
992 static node *new_node(uint32_t info)
996 n = xzalloc(sizeof(node));
1002 static node *mk_re_node(char *s, node *n, regex_t *re)
1004 n->info = OC_REGEXP;
1007 xregcomp(re, s, REG_EXTENDED);
1008 xregcomp(re+1, s, REG_EXTENDED | REG_ICASE);
1013 static node *condition(void)
1015 next_token(TC_SEQSTART);
1016 return parse_expr(TC_SEQTERM);
1019 /* parse expression terminated by given argument, return ptr
1020 * to built subtree. Terminator is eaten by parse_expr */
1021 static node *parse_expr(uint32_t iexp)
1030 sn.r.n = glptr = NULL;
1031 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1033 while (! ((tc = next_token(xtc)) & iexp)) {
1034 if (glptr && (t.info == (OC_COMPARE|VV|P(39)|2))) {
1035 /* input redirection (<) attached to glptr node */
1036 cn = glptr->l.n = new_node(OC_CONCAT|SS|P(37));
1038 xtc = TC_OPERAND | TC_UOPPRE;
1041 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1042 /* for binary and postfix-unary operators, jump back over
1043 * previous operators with higher priority */
1045 while ( ((t.info & PRIMASK) > (vn->a.n->info & PRIMASK2)) ||
1046 ((t.info == vn->info) && ((t.info & OPCLSMASK) == OC_COLON)) )
1048 if ((t.info & OPCLSMASK) == OC_TERNARY)
1050 cn = vn->a.n->r.n = new_node(t.info);
1052 if (tc & TC_BINOP) {
1054 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1055 if ((t.info & OPCLSMASK) == OC_PGETLINE) {
1057 next_token(TC_GETLINE);
1058 /* give maximum priority to this pipe */
1059 cn->info &= ~PRIMASK;
1060 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1064 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1069 /* for operands and prefix-unary operators, attach them
1072 cn = vn->r.n = new_node(t.info);
1074 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1075 if (tc & (TC_OPERAND | TC_REGEXP)) {
1076 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1077 /* one should be very careful with switch on tclass -
1078 * only simple tclasses should be used! */
1083 if ((v = hash_search(ahash, t.string)) != NULL) {
1084 cn->info = OC_FNARG;
1085 cn->l.i = v->x.aidx;
1087 cn->l.v = newvar(t.string);
1089 if (tc & TC_ARRAY) {
1091 cn->r.n = parse_expr(TC_ARRTERM);
1098 v = cn->l.v = xzalloc(sizeof(var));
1100 setvar_i(v, t.number);
1102 setvar_s(v, t.string);
1106 mk_re_node(t.string, cn, xzalloc(sizeof(regex_t)*2));
1111 cn->r.f = newfunc(t.string);
1112 cn->l.n = condition();
1116 cn = vn->r.n = parse_expr(TC_SEQTERM);
1122 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1126 cn->l.n = condition();
1135 /* add node to chain. Return ptr to alloc'd node */
1136 static node *chain_node(uint32_t info)
1141 seq->first = seq->last = new_node(0);
1143 if (seq->programname != programname) {
1144 seq->programname = programname;
1145 n = chain_node(OC_NEWSOURCE);
1146 n->l.s = xstrdup(programname);
1151 seq->last = n->a.n = new_node(OC_DONE);
1156 static void chain_expr(uint32_t info)
1160 n = chain_node(info);
1161 n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1162 if (t.tclass & TC_GRPTERM)
1166 static node *chain_loop(node *nn)
1168 node *n, *n2, *save_brk, *save_cont;
1170 save_brk = break_ptr;
1171 save_cont = continue_ptr;
1173 n = chain_node(OC_BR | Vx);
1174 continue_ptr = new_node(OC_EXEC);
1175 break_ptr = new_node(OC_EXEC);
1177 n2 = chain_node(OC_EXEC | Vx);
1180 continue_ptr->a.n = n2;
1181 break_ptr->a.n = n->r.n = seq->last;
1183 continue_ptr = save_cont;
1184 break_ptr = save_brk;
1189 /* parse group and attach it to chain */
1190 static void chain_group(void)
1196 c = next_token(TC_GRPSEQ);
1197 } while (c & TC_NEWLINE);
1199 if (c & TC_GRPSTART) {
1200 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1201 if (t.tclass & TC_NEWLINE) continue;
1205 } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1207 chain_expr(OC_EXEC | Vx);
1208 } else { /* TC_STATEMNT */
1209 switch (t.info & OPCLSMASK) {
1211 n = chain_node(OC_BR | Vx);
1212 n->l.n = condition();
1214 n2 = chain_node(OC_EXEC);
1216 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE)==TC_ELSE) {
1218 n2->a.n = seq->last;
1226 n = chain_loop(NULL);
1231 n2 = chain_node(OC_EXEC);
1232 n = chain_loop(NULL);
1234 next_token(TC_WHILE);
1235 n->l.n = condition();
1239 next_token(TC_SEQSTART);
1240 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1241 if (t.tclass & TC_SEQTERM) { /* for-in */
1242 if ((n2->info & OPCLSMASK) != OC_IN)
1243 syntax_error(EMSG_UNEXP_TOKEN);
1244 n = chain_node(OC_WALKINIT | VV);
1247 n = chain_loop(NULL);
1248 n->info = OC_WALKNEXT | Vx;
1250 } else { /* for (;;) */
1251 n = chain_node(OC_EXEC | Vx);
1253 n2 = parse_expr(TC_SEMICOL);
1254 n3 = parse_expr(TC_SEQTERM);
1264 n = chain_node(t.info);
1265 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1266 if (t.tclass & TC_OUTRDR) {
1268 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1270 if (t.tclass & TC_GRPTERM)
1275 n = chain_node(OC_EXEC);
1280 n = chain_node(OC_EXEC);
1281 n->a.n = continue_ptr;
1284 /* delete, next, nextfile, return, exit */
1291 static void parse_program(char *p)
1300 while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1301 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1303 if (tclass & TC_OPTERM)
1307 if (tclass & TC_BEGIN) {
1311 } else if (tclass & TC_END) {
1315 } else if (tclass & TC_FUNCDECL) {
1316 next_token(TC_FUNCTION);
1318 f = newfunc(t.string);
1319 f->body.first = NULL;
1321 while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1322 v = findvar(ahash, t.string);
1323 v->x.aidx = (f->nargs)++;
1325 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1332 } else if (tclass & TC_OPSEQ) {
1334 cn = chain_node(OC_TEST);
1335 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1336 if (t.tclass & TC_GRPSTART) {
1340 chain_node(OC_PRINT);
1342 cn->r.n = mainseq.last;
1344 } else /* if (tclass & TC_GRPSTART) */ {
1352 /* -------- program execution part -------- */
1354 static node *mk_splitter(char *s, tsplitter *spl)
1362 if ((n->info & OPCLSMASK) == OC_REGEXP) {
1366 if (strlen(s) > 1) {
1367 mk_re_node(s, n, re);
1369 n->info = (uint32_t) *s;
1375 /* use node as a regular expression. Supplied with node ptr and regex_t
1376 * storage space. Return ptr to regex (if result points to preg, it should
1377 * be later regfree'd manually
1379 static regex_t *as_regex(node *op, regex_t *preg)
1384 if ((op->info & OPCLSMASK) == OC_REGEXP) {
1385 return icase ? op->r.ire : op->l.re;
1388 s = getvar_s(evaluate(op, v));
1389 xregcomp(preg, s, icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED);
1395 /* gradually increasing buffer */
1396 static void qrealloc(char **b, int n, int *size)
1398 if (! *b || n >= *size)
1399 *b = xrealloc(*b, *size = n + (n>>1) + 80);
1402 /* resize field storage space */
1403 static void fsrealloc(int size)
1405 static int maxfields = 0;
1408 if (size >= maxfields) {
1410 maxfields = size + 16;
1411 Fields = xrealloc(Fields, maxfields * sizeof(var));
1412 for (; i < maxfields; i++) {
1413 Fields[i].type = VF_SPECIAL;
1414 Fields[i].string = NULL;
1418 if (size < nfields) {
1419 for (i=size; i<nfields; i++) {
1426 static int awk_split(char *s, node *spl, char **slist)
1431 regmatch_t pmatch[2];
1433 /* in worst case, each char would be a separate field */
1434 *slist = s1 = xstrndup(s, strlen(s) * 2 + 3);
1436 c[0] = c[1] = (char)spl->info;
1438 if (*getvar_s(V[RS]) == '\0') c[2] = '\n';
1440 if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */
1442 l = strcspn(s, c+2);
1443 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0 &&
1444 pmatch[0].rm_so <= l) {
1445 l = pmatch[0].rm_so;
1446 if (pmatch[0].rm_eo == 0) { l++; pmatch[0].rm_eo++; }
1448 pmatch[0].rm_eo = l;
1449 if (s[l]) pmatch[0].rm_eo++;
1455 s += pmatch[0].rm_eo;
1458 } else if (c[0] == '\0') { /* null split */
1464 } else if (c[0] != ' ') { /* single-character split */
1466 c[0] = toupper(c[0]);
1467 c[1] = tolower(c[1]);
1470 while ((s1 = strpbrk(s1, c))) {
1474 } else { /* space split */
1476 s = skip_whitespace(s);
1479 while (*s && !isspace(*s))
1487 static void split_f0(void)
1489 static char *fstrings = NULL;
1499 n = awk_split(getvar_s(V[F0]), &fsplitter.n, &fstrings);
1502 for (i = 0; i < n; i++) {
1503 Fields[i].string = nextword(&s);
1504 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1507 /* set NF manually to avoid side effects */
1509 V[NF]->type = VF_NUMBER | VF_SPECIAL;
1510 V[NF]->number = nfields;
1513 /* perform additional actions when some internal variables changed */
1514 static void handle_special(var *v)
1518 int sl, l, len, i, bsize;
1520 if (! (v->type & VF_SPECIAL))
1524 n = (int)getvar_i(v);
1527 /* recalculate $0 */
1528 sep = getvar_s(V[OFS]);
1532 for (i=0; i<n; i++) {
1533 s = getvar_s(&Fields[i]);
1536 memcpy(b+len, sep, sl);
1539 qrealloc(&b, len+l+sl, &bsize);
1540 memcpy(b+len, s, l);
1543 if (b) b[len] = '\0';
1547 } else if (v == V[F0]) {
1548 is_f0_split = FALSE;
1550 } else if (v == V[FS]) {
1551 mk_splitter(getvar_s(v), &fsplitter);
1553 } else if (v == V[RS]) {
1554 mk_splitter(getvar_s(v), &rsplitter);
1556 } else if (v == V[IGNORECASE]) {
1560 n = getvar_i(V[NF]);
1561 setvar_i(V[NF], n > v-Fields ? n : v-Fields+1);
1562 /* right here v is invalid. Just to note... */
1566 /* step through func/builtin/etc arguments */
1567 static node *nextarg(node **pn)
1572 if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1581 static void hashwalk_init(var *v, xhash *array)
1587 if (v->type & VF_WALK)
1591 w = v->x.walker = xzalloc(2 + 2*sizeof(char *) + array->glen);
1592 *w = *(w+1) = (char *)(w + 2);
1593 for (i=0; i<array->csize; i++) {
1594 hi = array->items[i];
1596 strcpy(*w, hi->name);
1603 static int hashwalk_next(var *v)
1611 setvar_s(v, nextword(w+1));
1615 /* evaluate node, return 1 when result is true, 0 otherwise */
1616 static int ptest(node *pattern)
1618 static var v; /* static: to save stack space? */
1620 return istrue(evaluate(pattern, &v));
1623 /* read next record from stream rsm into a variable v */
1624 static int awk_getline(rstream *rsm, var *v)
1627 regmatch_t pmatch[2];
1628 int a, p, pp=0, size;
1629 int fd, so, eo, r, rp;
1632 /* we're using our own buffer since we need access to accumulating
1635 fd = fileno(rsm->F);
1640 c = (char) rsplitter.n.info;
1643 if (! m) qrealloc(&m, 256, &size);
1649 if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1650 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1651 b, 1, pmatch, 0) == 0) {
1652 so = pmatch[0].rm_so;
1653 eo = pmatch[0].rm_eo;
1657 } else if (c != '\0') {
1658 s = strchr(b+pp, c);
1659 if (! s) s = memchr(b+pp, '\0', p - pp);
1666 while (b[rp] == '\n')
1668 s = strstr(b+rp, "\n\n");
1671 while (b[eo] == '\n') eo++;
1679 memmove(m, (const void *)(m+a), p+1);
1684 qrealloc(&m, a+p+128, &size);
1687 p += safe_read(fd, b+p, size-p-1);
1691 setvar_i(V[ERRNO], errno);
1700 c = b[so]; b[so] = '\0';
1704 c = b[eo]; b[eo] = '\0';
1705 setvar_s(V[RT], b+so);
1717 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1721 const char *s = format;
1723 if (int_as_int && n == (int)n) {
1724 r = snprintf(b, size, "%d", (int)n);
1726 do { c = *s; } while (c && *++s);
1727 if (strchr("diouxX", c)) {
1728 r = snprintf(b, size, format, (int)n);
1729 } else if (strchr("eEfgG", c)) {
1730 r = snprintf(b, size, format, n);
1732 runtime_error(EMSG_INV_FMT);
1739 /* formatted output into an allocated buffer, return ptr to buffer */
1740 static char *awk_printf(node *n)
1743 char *fmt, *s, *s1, *f;
1744 int i, j, incr, bsize;
1749 fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1754 while (*f && (*f != '%' || *(++f) == '%'))
1756 while (*f && !isalpha(*f))
1759 incr = (f - s) + MAXVARFMT;
1760 qrealloc(&b, incr + i, &bsize);
1765 arg = evaluate(nextarg(&n), v);
1768 if (c == 'c' || !c) {
1769 i += sprintf(b+i, s, is_numeric(arg) ?
1770 (char)getvar_i(arg) : *getvar_s(arg));
1772 } else if (c == 's') {
1774 qrealloc(&b, incr+i+strlen(s1), &bsize);
1775 i += sprintf(b+i, s, s1);
1778 i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1782 /* if there was an error while sprintf, return value is negative */
1787 b = xrealloc(b, i + 1);
1794 /* common substitution routine
1795 * replace (nm) substring of (src) that match (n) with (repl), store
1796 * result into (dest), return number of substitutions. If nm=0, replace
1797 * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1798 * subexpression matching (\1-\9)
1800 static int awk_sub(node *rn, char *repl, int nm, var *src, var *dest, int ex)
1804 int c, i, j, di, rl, so, eo, nbs, n, dssize;
1805 regmatch_t pmatch[10];
1808 re = as_regex(rn, &sreg);
1809 if (! src) src = V[F0];
1810 if (! dest) dest = V[F0];
1815 while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0:REG_NOTBOL) == 0) {
1816 so = pmatch[0].rm_so;
1817 eo = pmatch[0].rm_eo;
1819 qrealloc(&ds, di + eo + rl, &dssize);
1820 memcpy(ds + di, sp, eo);
1826 for (s = repl; *s; s++) {
1832 if (c == '&' || (ex && c >= '0' && c <= '9')) {
1833 di -= ((nbs + 3) >> 1);
1842 n = pmatch[j].rm_eo - pmatch[j].rm_so;
1843 qrealloc(&ds, di + rl + n, &dssize);
1844 memcpy(ds + di, sp + pmatch[j].rm_so, n);
1855 if (! (ds[di++] = *sp++)) break;
1859 qrealloc(&ds, di + strlen(sp), &dssize);
1860 strcpy(ds + di, sp);
1862 if (re == &sreg) regfree(re);
1866 static var *exec_builtin(node *op, var *res)
1873 regmatch_t pmatch[2];
1875 static tsplitter tspl;
1884 isr = info = op->info;
1887 av[2] = av[3] = NULL;
1888 for (i=0 ; i<4 && op ; i++) {
1889 an[i] = nextarg(&op);
1890 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
1891 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
1896 if (nargs < (info >> 30))
1897 runtime_error(EMSG_TOO_FEW_ARGS);
1899 switch (info & OPNMASK) {
1902 #if ENABLE_FEATURE_AWK_MATH
1903 setvar_i(res, atan2(getvar_i(av[i]), getvar_i(av[1])));
1905 runtime_error(EMSG_NO_MATH);
1911 spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
1912 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
1917 n = awk_split(as[0], spl, &s);
1919 clear_array(iamarray(av[1]));
1920 for (i=1; i<=n; i++)
1921 setari_u(av[1], i, nextword(&s1));
1928 i = getvar_i(av[1]) - 1;
1929 if (i>l) i=l; if (i<0) i=0;
1930 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
1933 strncpy(s, as[0]+i, n);
1939 setvar_i(res, (long)getvar_i(av[0]) & (long)getvar_i(av[1]));
1943 setvar_i(res, ~(long)getvar_i(av[0]));
1947 setvar_i(res, (long)getvar_i(av[0]) << (long)getvar_i(av[1]));
1951 setvar_i(res, (long)getvar_i(av[0]) | (long)getvar_i(av[1]));
1955 setvar_i(res, (long)((unsigned long)getvar_i(av[0]) >> (unsigned long)getvar_i(av[1])));
1959 setvar_i(res, (long)getvar_i(av[0]) ^ (long)getvar_i(av[1]));
1969 s1 = s = xstrdup(as[0]);
1971 *s1 = (*to_xxx)(*s1);
1980 l = strlen(as[0]) - ll;
1981 if (ll > 0 && l >= 0) {
1983 s = strstr(as[0], as[1]);
1984 if (s) n = (s - as[0]) + 1;
1986 /* this piece of code is terribly slow and
1987 * really should be rewritten
1989 for (i=0; i<=l; i++) {
1990 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2002 tt = getvar_i(av[1]);
2005 s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2006 i = strftime(buf, MAXVARFMT, s, localtime(&tt));
2012 re = as_regex(an[1], &sreg);
2013 n = regexec(re, as[0], 1, pmatch, 0);
2018 pmatch[0].rm_so = 0;
2019 pmatch[0].rm_eo = -1;
2021 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2022 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2023 setvar_i(res, pmatch[0].rm_so);
2024 if (re == &sreg) regfree(re);
2028 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2032 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2036 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2045 * Evaluate node - the heart of the program. Supplied with subtree
2046 * and place where to store result. returns ptr to result.
2048 #define XC(n) ((n) >> 8)
2050 static var *evaluate(node *op, var *res)
2052 /* This procedure is recursive so we should count every byte */
2053 static var *fnargs = NULL;
2054 static unsigned seed = 1;
2055 static regex_t sreg;
2076 return setvar_s(res, NULL);
2083 opn = (short)(opinfo & OPNMASK);
2084 lineno = op->lineno;
2086 /* execute inevitable things */
2088 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2089 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2090 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2091 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2092 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2094 switch (XC(opinfo & OPCLSMASK)) {
2096 /* -- iterative node type -- */
2100 if ((op1->info & OPCLSMASK) == OC_COMMA) {
2101 /* it's range pattern */
2102 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2103 op->info |= OF_CHECKED;
2104 if (ptest(op1->r.n))
2105 op->info &= ~OF_CHECKED;
2112 op = (ptest(op1)) ? op->a.n : op->r.n;
2116 /* just evaluate an expression, also used as unconditional jump */
2120 /* branch, used in if-else and various loops */
2122 op = istrue(L.v) ? op->a.n : op->r.n;
2125 /* initialize for-in loop */
2126 case XC( OC_WALKINIT ):
2127 hashwalk_init(L.v, iamarray(R.v));
2130 /* get next array item */
2131 case XC( OC_WALKNEXT ):
2132 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2135 case XC( OC_PRINT ):
2136 case XC( OC_PRINTF ):
2139 X.rsm = newfile(R.s);
2142 if((X.rsm->F = popen(R.s, "w")) == NULL)
2143 bb_perror_msg_and_die("popen");
2146 X.rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2152 if ((opinfo & OPCLSMASK) == OC_PRINT) {
2154 fputs(getvar_s(V[F0]), X.F);
2157 L.v = evaluate(nextarg(&op1), v1);
2158 if (L.v->type & VF_NUMBER) {
2159 fmt_num(buf, MAXVARFMT, getvar_s(V[OFMT]),
2160 getvar_i(L.v), TRUE);
2163 fputs(getvar_s(L.v), X.F);
2166 if (op1) fputs(getvar_s(V[OFS]), X.F);
2169 fputs(getvar_s(V[ORS]), X.F);
2171 } else { /* OC_PRINTF */
2172 L.s = awk_printf(op1);
2179 case XC( OC_DELETE ):
2180 X.info = op1->info & OPCLSMASK;
2181 if (X.info == OC_VAR) {
2183 } else if (X.info == OC_FNARG) {
2184 R.v = &fnargs[op1->l.i];
2186 runtime_error(EMSG_NOT_ARRAY);
2191 L.s = getvar_s(evaluate(op1->r.n, v1));
2192 hash_remove(iamarray(R.v), L.s);
2194 clear_array(iamarray(R.v));
2198 case XC( OC_NEWSOURCE ):
2199 programname = op->l.s;
2202 case XC( OC_RETURN ):
2206 case XC( OC_NEXTFILE ):
2217 /* -- recursive node type -- */
2225 case XC( OC_FNARG ):
2226 L.v = &fnargs[op->l.i];
2229 res = (op->r.n) ? findvar(iamarray(L.v), R.s) : L.v;
2233 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2236 case XC( OC_REGEXP ):
2238 L.s = getvar_s(V[F0]);
2241 case XC( OC_MATCH ):
2244 X.re = as_regex(op1, &sreg);
2245 R.i = regexec(X.re, L.s, 0, NULL, 0);
2246 if (X.re == &sreg) regfree(X.re);
2247 setvar_i(res, (R.i == 0 ? 1 : 0) ^ (opn == '!' ? 1 : 0));
2251 /* if source is a temporary string, jusk relink it to dest */
2252 if (R.v == v1+1 && R.v->string) {
2253 res = setvar_p(L.v, R.v->string);
2256 res = copyvar(L.v, R.v);
2260 case XC( OC_TERNARY ):
2261 if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2262 runtime_error(EMSG_POSSIBLE_ERROR);
2263 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2267 if (! op->r.f->body.first)
2268 runtime_error(EMSG_UNDEF_FUNC);
2270 X.v = R.v = nvalloc(op->r.f->nargs+1);
2272 L.v = evaluate(nextarg(&op1), v1);
2274 R.v->type |= VF_CHILD;
2275 R.v->x.parent = L.v;
2276 if (++R.v - X.v >= op->r.f->nargs)
2284 res = evaluate(op->r.f->body.first, res);
2291 case XC( OC_GETLINE ):
2292 case XC( OC_PGETLINE ):
2294 X.rsm = newfile(L.s);
2296 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2297 X.rsm->F = popen(L.s, "r");
2298 X.rsm->is_pipe = TRUE;
2300 X.rsm->F = fopen(L.s, "r"); /* not xfopen! */
2304 if (! iF) iF = next_input_file();
2309 setvar_i(V[ERRNO], errno);
2317 L.i = awk_getline(X.rsm, R.v);
2327 /* simple builtins */
2328 case XC( OC_FBLTIN ):
2336 R.d = (double)rand() / (double)RAND_MAX;
2339 #if ENABLE_FEATURE_AWK_MATH
2365 runtime_error(EMSG_NO_MATH);
2371 seed = op1 ? (unsigned)L.d : (unsigned)time(NULL);
2381 L.s = getvar_s(V[F0]);
2387 R.d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2388 ? (system(L.s) >> 8) : 0;
2396 X.rsm = newfile(L.s);
2405 X.rsm = (rstream *)hash_search(fdhash, L.s);
2407 R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
2408 free(X.rsm->buffer);
2409 hash_remove(fdhash, L.s);
2412 setvar_i(V[ERRNO], errno);
2419 case XC( OC_BUILTIN ):
2420 res = exec_builtin(op, res);
2423 case XC( OC_SPRINTF ):
2424 setvar_p(res, awk_printf(op1));
2427 case XC( OC_UNARY ):
2429 L.d = R.d = getvar_i(R.v);
2444 L.d = istrue(X.v) ? 0 : 1;
2455 case XC( OC_FIELD ):
2456 R.i = (int)getvar_i(R.v);
2464 res = &Fields[R.i-1];
2468 /* concatenation (" ") and index joining (",") */
2469 case XC( OC_CONCAT ):
2470 case XC( OC_COMMA ):
2471 opn = strlen(L.s) + strlen(R.s) + 2;
2474 if ((opinfo & OPCLSMASK) == OC_COMMA) {
2475 L.s = getvar_s(V[SUBSEP]);
2476 X.s = xrealloc(X.s, opn + strlen(L.s));
2484 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2488 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2491 case XC( OC_BINARY ):
2492 case XC( OC_REPLACE ):
2493 R.d = getvar_i(R.v);
2505 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2509 #if ENABLE_FEATURE_AWK_MATH
2510 L.d = pow(L.d, R.d);
2512 runtime_error(EMSG_NO_MATH);
2516 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2517 L.d -= (int)(L.d / R.d) * R.d;
2520 res = setvar_i(((opinfo&OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2523 case XC( OC_COMPARE ):
2524 if (is_numeric(L.v) && is_numeric(R.v)) {
2525 L.d = getvar_i(L.v) - getvar_i(R.v);
2527 L.s = getvar_s(L.v);
2528 R.s = getvar_s(R.v);
2529 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2531 switch (opn & 0xfe) {
2542 setvar_i(res, (opn & 0x1 ? R.i : !R.i) ? 1 : 0);
2546 runtime_error(EMSG_POSSIBLE_ERROR);
2548 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2550 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2560 /* -------- main & co. -------- */
2562 static int awk_exit(int r)
2573 evaluate(endseq.first, &tv);
2576 /* waiting for children */
2577 for (i = 0; i < fdhash->csize; i++) {
2578 hi = fdhash->items[i];
2580 if (hi->data.rs.F && hi->data.rs.is_pipe)
2581 pclose(hi->data.rs.F);
2589 /* if expr looks like "var=value", perform assignment and return 1,
2590 * otherwise return 0 */
2591 static int is_assignment(const char *expr)
2593 char *exprc, *s, *s0, *s1;
2595 exprc = xstrdup(expr);
2596 if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2604 *(s1++) = nextchar(&s);
2607 setvar_u(newvar(exprc), s0);
2612 /* switch to next input file */
2613 static rstream *next_input_file(void)
2618 static int files_happen = FALSE;
2620 if (rsm.F) fclose(rsm.F);
2622 rsm.pos = rsm.adv = 0;
2625 if (getvar_i(V[ARGIND])+1 >= getvar_i(V[ARGC])) {
2631 ind = getvar_s(incvar(V[ARGIND]));
2632 fname = getvar_s(findvar(iamarray(V[ARGV]), ind));
2633 if (fname && *fname && !is_assignment(fname))
2634 F = afopen(fname, "r");
2638 files_happen = TRUE;
2639 setvar_s(V[FILENAME], fname);
2644 int awk_main(int argc, char **argv)
2647 char *opt_F, *opt_v, *opt_W;
2652 char *vnames = (char *)vNames; /* cheat */
2653 char *vvalues = (char *)vValues;
2657 /* allocate global buffer */
2658 buf = xmalloc(MAXVARFMT + 1);
2660 vhash = hash_init();
2661 ahash = hash_init();
2662 fdhash = hash_init();
2663 fnhash = hash_init();
2665 /* initialize variables */
2666 for (i = 0; *vnames; i++) {
2667 V[i] = v = newvar(nextword(&vnames));
2668 if (*vvalues != '\377')
2669 setvar_s(v, nextword(&vvalues));
2673 if (*vnames == '*') {
2674 v->type |= VF_SPECIAL;
2679 handle_special(V[FS]);
2680 handle_special(V[RS]);
2682 newfile("/dev/stdin")->F = stdin;
2683 newfile("/dev/stdout")->F = stdout;
2684 newfile("/dev/stderr")->F = stderr;
2686 for (envp = environ; *envp; envp++) {
2687 char *s = xstrdup(*envp);
2688 char *s1 = strchr(s, '=');
2691 setvar_u(findvar(iamarray(V[ENVIRON]), s), s1);
2696 opt = getopt32(argc, argv, "F:v:f:W:", &opt_F, &opt_v, &programname, &opt_W);
2699 if (opt & 0x1) setvar_s(V[FS], opt_F); // -F
2700 if (opt & 0x2) if (!is_assignment(opt_v)) bb_show_usage(); // -v
2701 if (opt & 0x4) { // -f
2702 char *s = s; /* die, gcc, die */
2703 FILE *from_file = afopen(programname, "r");
2704 /* one byte is reserved for some trick in next_token */
2705 if (fseek(from_file, 0, SEEK_END) == 0) {
2706 flen = ftell(from_file);
2707 s = xmalloc(flen + 4);
2708 fseek(from_file, 0, SEEK_SET);
2709 i = 1 + fread(s + 1, 1, flen, from_file);
2711 for (i = j = 1; j > 0; i += j) {
2712 s = xrealloc(s, i + 4096);
2713 j = fread(s + i, 1, 4094, from_file);
2718 parse_program(s + 1);
2720 } else { // no -f: take program from 1st parameter
2723 programname = "cmd. line";
2724 parse_program(*argv++);
2727 if (opt & 0x8) // -W
2728 bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
2730 /* fill in ARGV array */
2731 setvar_i(V[ARGC], argc + 1);
2732 setari_u(V[ARGV], 0, "awk");
2735 setari_u(V[ARGV], ++i, *argv++);
2737 evaluate(beginseq.first, &tv);
2738 if (!mainseq.first && !endseq.first)
2739 awk_exit(EXIT_SUCCESS);
2741 /* input file could already be opened in BEGIN block */
2742 if (!iF) iF = next_input_file();
2744 /* passing through input files */
2747 setvar_i(V[FNR], 0);
2749 while ((i = awk_getline(iF, V[F0])) > 0) {
2753 evaluate(mainseq.first, &tv);
2760 runtime_error(strerror(errno));
2762 iF = next_input_file();
2765 awk_exit(EXIT_SUCCESS);