1 /* vi: set sw=4 ts=4: */
3 * awk implementation for busybox
5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
7 * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
13 extern char **environ;
15 /* This is a NOEXEC applet. Be very careful! */
22 #define VF_NUMBER 0x0001 /* 1 = primary type is number */
23 #define VF_ARRAY 0x0002 /* 1 = it's an array */
25 #define VF_CACHED 0x0100 /* 1 = num/str value has cached str/num eq */
26 #define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
27 #define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
28 #define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
29 #define VF_FSTR 0x1000 /* 1 = var::string points to fstring buffer */
30 #define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
31 #define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
33 /* these flags are static, don't change them when value is changed */
34 #define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
37 typedef struct var_s {
38 unsigned short type; /* flags */
42 int aidx; /* func arg idx (for compilation stage) */
43 struct xhash_s *array; /* array ptr */
44 struct var_s *parent; /* for func args, ptr to actual parameter */
45 char **walker; /* list of array elements (for..in) */
49 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
50 typedef struct chain_s {
53 const char *programname;
57 typedef struct func_s {
63 typedef struct rstream_s {
69 unsigned short is_pipe;
72 typedef struct hash_item_s {
74 struct var_s v; /* variable/array hash */
75 struct rstream_s rs; /* redirect streams hash */
76 struct func_s f; /* functions hash */
78 struct hash_item_s *next; /* next in chain */
79 char name[1]; /* really it's longer */
82 typedef struct xhash_s {
83 unsigned nel; /* num of elements */
84 unsigned csize; /* current hash size */
85 unsigned nprime; /* next hash size in PRIMES[] */
86 unsigned glen; /* summary length of item names */
87 struct hash_item_s **items;
91 typedef struct node_s {
112 /* Block of temporary variables */
113 typedef struct nvblock_s {
116 struct nvblock_s *prev;
117 struct nvblock_s *next;
121 typedef struct tsplitter_s {
126 /* simple token classes */
127 /* Order and hex values are very important!!! See next_token() */
128 #define TC_SEQSTART 1 /* ( */
129 #define TC_SEQTERM (1 << 1) /* ) */
130 #define TC_REGEXP (1 << 2) /* /.../ */
131 #define TC_OUTRDR (1 << 3) /* | > >> */
132 #define TC_UOPPOST (1 << 4) /* unary postfix operator */
133 #define TC_UOPPRE1 (1 << 5) /* unary prefix operator */
134 #define TC_BINOPX (1 << 6) /* two-opnd operator */
135 #define TC_IN (1 << 7)
136 #define TC_COMMA (1 << 8)
137 #define TC_PIPE (1 << 9) /* input redirection pipe */
138 #define TC_UOPPRE2 (1 << 10) /* unary prefix operator */
139 #define TC_ARRTERM (1 << 11) /* ] */
140 #define TC_GRPSTART (1 << 12) /* { */
141 #define TC_GRPTERM (1 << 13) /* } */
142 #define TC_SEMICOL (1 << 14)
143 #define TC_NEWLINE (1 << 15)
144 #define TC_STATX (1 << 16) /* ctl statement (for, next...) */
145 #define TC_WHILE (1 << 17)
146 #define TC_ELSE (1 << 18)
147 #define TC_BUILTIN (1 << 19)
148 #define TC_GETLINE (1 << 20)
149 #define TC_FUNCDECL (1 << 21) /* `function' `func' */
150 #define TC_BEGIN (1 << 22)
151 #define TC_END (1 << 23)
152 #define TC_EOF (1 << 24)
153 #define TC_VARIABLE (1 << 25)
154 #define TC_ARRAY (1 << 26)
155 #define TC_FUNCTION (1 << 27)
156 #define TC_STRING (1 << 28)
157 #define TC_NUMBER (1 << 29)
159 #define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
161 /* combined token classes */
162 #define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
163 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
164 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
165 | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
167 #define TC_STATEMNT (TC_STATX | TC_WHILE)
168 #define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
170 /* word tokens, cannot mean something else if not expected */
171 #define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
172 | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
174 /* discard newlines after these */
175 #define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
176 | TC_BINOP | TC_OPTERM)
178 /* what can expression begin with */
179 #define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
180 /* what can group begin with */
181 #define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
183 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
184 /* operator is inserted between them */
185 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
186 | TC_STRING | TC_NUMBER | TC_UOPPOST)
187 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
189 #define OF_RES1 0x010000
190 #define OF_RES2 0x020000
191 #define OF_STR1 0x040000
192 #define OF_STR2 0x080000
193 #define OF_NUM1 0x100000
194 #define OF_CHECKED 0x200000
196 /* combined operator flags */
199 #define xS (OF_RES2 | OF_STR2)
201 #define VV (OF_RES1 | OF_RES2)
202 #define Nx (OF_RES1 | OF_NUM1)
203 #define NV (OF_RES1 | OF_NUM1 | OF_RES2)
204 #define Sx (OF_RES1 | OF_STR1)
205 #define SV (OF_RES1 | OF_STR1 | OF_RES2)
206 #define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
208 #define OPCLSMASK 0xFF00
209 #define OPNMASK 0x007F
211 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
212 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
213 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
215 #define P(x) (x << 24)
216 #define PRIMASK 0x7F000000
217 #define PRIMASK2 0x7E000000
219 /* Operation classes */
221 #define SHIFT_TIL_THIS 0x0600
222 #define RECUR_FROM_THIS 0x1000
225 OC_DELETE = 0x0100, OC_EXEC = 0x0200, OC_NEWSOURCE = 0x0300,
226 OC_PRINT = 0x0400, OC_PRINTF = 0x0500, OC_WALKINIT = 0x0600,
228 OC_BR = 0x0700, OC_BREAK = 0x0800, OC_CONTINUE = 0x0900,
229 OC_EXIT = 0x0a00, OC_NEXT = 0x0b00, OC_NEXTFILE = 0x0c00,
230 OC_TEST = 0x0d00, OC_WALKNEXT = 0x0e00,
232 OC_BINARY = 0x1000, OC_BUILTIN = 0x1100, OC_COLON = 0x1200,
233 OC_COMMA = 0x1300, OC_COMPARE = 0x1400, OC_CONCAT = 0x1500,
234 OC_FBLTIN = 0x1600, OC_FIELD = 0x1700, OC_FNARG = 0x1800,
235 OC_FUNC = 0x1900, OC_GETLINE = 0x1a00, OC_IN = 0x1b00,
236 OC_LAND = 0x1c00, OC_LOR = 0x1d00, OC_MATCH = 0x1e00,
237 OC_MOVE = 0x1f00, OC_PGETLINE = 0x2000, OC_REGEXP = 0x2100,
238 OC_REPLACE = 0x2200, OC_RETURN = 0x2300, OC_SPRINTF = 0x2400,
239 OC_TERNARY = 0x2500, OC_UNARY = 0x2600, OC_VAR = 0x2700,
242 ST_IF = 0x3000, ST_DO = 0x3100, ST_FOR = 0x3200,
246 /* simple builtins */
248 F_in, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
249 F_ti, F_le, F_sy, F_ff, F_cl
254 B_a2, B_ix, B_ma, B_sp, B_ss, B_ti, B_lo, B_up,
256 B_an, B_co, B_ls, B_or, B_rs, B_xo,
259 /* tokens and their corresponding info values */
261 #define NTC "\377" /* switch to next token class (tc<<1) */
264 #define OC_B OC_BUILTIN
266 static const char tokenlist[] =
269 "\1/" NTC /* REGEXP */
270 "\2>>" "\1>" "\1|" NTC /* OUTRDR */
271 "\2++" "\2--" NTC /* UOPPOST */
272 "\2++" "\2--" "\1$" NTC /* UOPPRE1 */
273 "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */
274 "\2*=" "\2/=" "\2%=" "\2^="
275 "\1+" "\1-" "\3**=" "\2**"
276 "\1/" "\1%" "\1^" "\1*"
277 "\2!=" "\2>=" "\2<=" "\1>"
278 "\1<" "\2!~" "\1~" "\2&&"
279 "\2||" "\1?" "\1:" NTC
283 "\1+" "\1-" "\1!" NTC /* UOPPRE2 */
289 "\2if" "\2do" "\3for" "\5break" /* STATX */
290 "\10continue" "\6delete" "\5print"
291 "\6printf" "\4next" "\10nextfile"
292 "\6return" "\4exit" NTC
296 "\3and" "\5compl" "\6lshift" "\2or"
298 "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */
299 "\3cos" "\3exp" "\3int" "\3log"
300 "\4rand" "\3sin" "\4sqrt" "\5srand"
301 "\6gensub" "\4gsub" "\5index" "\6length"
302 "\5match" "\5split" "\7sprintf" "\3sub"
303 "\6substr" "\7systime" "\10strftime"
304 "\7tolower" "\7toupper" NTC
306 "\4func" "\10function" NTC
311 static const uint32_t tokeninfo[] = {
315 xS|'a', xS|'w', xS|'|',
316 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
317 OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M',
319 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74),
320 OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
321 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/',
322 OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
323 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-',
324 OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
325 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%',
326 OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
327 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3,
328 OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
329 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!',
330 OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
331 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?',
332 OC_COLON|xx|P(67)|':',
335 OC_PGETLINE|SV|P(37),
336 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-',
337 OC_UNARY|xV|P(19)|'!',
343 ST_IF, ST_DO, ST_FOR, OC_BREAK,
344 OC_CONTINUE, OC_DELETE|Vx, OC_PRINT,
345 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
346 OC_RETURN|Vx, OC_EXIT|Nx,
350 OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
351 OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
352 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
353 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
354 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
355 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
356 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
357 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b),
358 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
365 /* internal variable names and their initial values */
366 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
368 CONVFMT, OFMT, FS, OFS,
369 ORS, RS, RT, FILENAME,
370 SUBSEP, ARGIND, ARGC, ARGV,
373 ENVIRON, F0, NUM_INTERNAL_VARS
376 static const char vNames[] =
377 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
378 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
379 "SUBSEP\0" "ARGIND\0" "ARGC\0" "ARGV\0"
381 "NR\0" "NF\0*" "IGNORECASE\0*"
382 "ENVIRON\0" "$\0*" "\0";
384 static const char vValues[] =
385 "%.6g\0" "%.6g\0" " \0" " \0"
386 "\n\0" "\n\0" "\0" "\0"
390 /* hash size may grow to these values */
391 #define FIRST_PRIME 61;
392 static const unsigned PRIMES[] = { 251, 1021, 4093, 16381, 65521 };
393 enum { NPRIMES = sizeof(PRIMES) / sizeof(PRIMES[0]) };
397 static var *intvar[NUM_INTERNAL_VARS];
398 static chain beginseq, mainseq, endseq, *seq;
399 static int nextrec, nextfile;
400 static node *break_ptr, *continue_ptr;
402 static xhash *vhash, *ahash, *fdhash, *fnhash;
403 static const char *programname;
405 static int is_f0_split;
408 static tsplitter fsplitter, rsplitter;
423 /* It had even better name: 't'. Whoever knows what is it, please rename! */
424 /* (actually it looks like unrelated stuff lumped together...) */
426 /* function prototypes */
427 static void handle_special(var *);
428 static node *parse_expr(uint32_t);
429 static void chain_group(void);
430 static var *evaluate(node *, var *);
431 static rstream *next_input_file(void);
432 static int fmt_num(char *, int, const char *, double, int);
433 static int awk_exit(int) ATTRIBUTE_NORETURN;
435 /* ---- error handling ---- */
437 static const char EMSG_INTERNAL_ERROR[] = "Internal error";
438 static const char EMSG_UNEXP_EOS[] = "Unexpected end of string";
439 static const char EMSG_UNEXP_TOKEN[] = "Unexpected token";
440 static const char EMSG_DIV_BY_ZERO[] = "Division by zero";
441 static const char EMSG_INV_FMT[] = "Invalid format specifier";
442 static const char EMSG_TOO_FEW_ARGS[] = "Too few arguments for builtin";
443 static const char EMSG_NOT_ARRAY[] = "Not an array";
444 static const char EMSG_POSSIBLE_ERROR[] = "Possible syntax error";
445 static const char EMSG_UNDEF_FUNC[] = "Call to undefined function";
446 #if !ENABLE_FEATURE_AWK_MATH
447 static const char EMSG_NO_MATH[] = "Math support is not compiled in";
450 static void zero_out_var(var * vp)
452 memset(vp, 0, sizeof(*vp));
455 static void syntax_error(const char * const message) ATTRIBUTE_NORETURN;
456 static void syntax_error(const char * const message)
458 bb_error_msg_and_die("%s:%i: %s", programname, lineno, message);
461 #define runtime_error(x) syntax_error(x)
464 /* ---- hash stuff ---- */
466 static unsigned hashidx(const char *name)
470 while (*name) idx = *name++ + (idx << 6) - idx;
474 /* create new hash */
475 static xhash *hash_init(void)
479 newhash = xzalloc(sizeof(xhash));
480 newhash->csize = FIRST_PRIME;
481 newhash->items = xzalloc(newhash->csize * sizeof(hash_item *));
486 /* find item in hash, return ptr to data, NULL if not found */
487 static void *hash_search(xhash *hash, const char *name)
491 hi = hash->items [ hashidx(name) % hash->csize ];
493 if (strcmp(hi->name, name) == 0)
500 /* grow hash if it becomes too big */
501 static void hash_rebuild(xhash *hash)
503 unsigned newsize, i, idx;
504 hash_item **newitems, *hi, *thi;
506 if (hash->nprime == NPRIMES)
509 newsize = PRIMES[hash->nprime++];
510 newitems = xzalloc(newsize * sizeof(hash_item *));
512 for (i = 0; i < hash->csize; i++) {
517 idx = hashidx(thi->name) % newsize;
518 thi->next = newitems[idx];
524 hash->csize = newsize;
525 hash->items = newitems;
528 /* find item in hash, add it if necessary. Return ptr to data */
529 static void *hash_find(xhash *hash, const char *name)
535 hi = hash_search(hash, name);
537 if (++hash->nel / hash->csize > 10)
540 l = strlen(name) + 1;
541 hi = xzalloc(sizeof(hash_item) + l);
542 memcpy(hi->name, name, l);
544 idx = hashidx(name) % hash->csize;
545 hi->next = hash->items[idx];
546 hash->items[idx] = hi;
552 #define findvar(hash, name) ((var*) hash_find((hash), (name)))
553 #define newvar(name) ((var*) hash_find(vhash, (name)))
554 #define newfile(name) ((rstream*)hash_find(fdhash, (name)))
555 #define newfunc(name) ((func*) hash_find(fnhash, (name)))
557 static void hash_remove(xhash *hash, const char *name)
559 hash_item *hi, **phi;
561 phi = &(hash->items[hashidx(name) % hash->csize]);
564 if (strcmp(hi->name, name) == 0) {
565 hash->glen -= (strlen(name) + 1);
575 /* ------ some useful functions ------ */
577 static void skip_spaces(char **s)
582 if (*p == '\\' && p[1] == '\n') {
585 } else if (*p != ' ' && *p != '\t') {
593 static char *nextword(char **s)
597 while (*(*s)++) /* */;
602 static char nextchar(char **s)
608 if (c == '\\') c = bb_process_escape_sequence((const char**)s);
609 if (c == '\\' && *s == pps) c = *((*s)++);
613 static int ATTRIBUTE_ALWAYS_INLINE isalnum_(int c)
615 return (isalnum(c) || c == '_');
618 static FILE *afopen(const char *path, const char *mode)
620 return (*path == '-' && *(path+1) == '\0') ? stdin : xfopen(path, mode);
623 /* -------- working with variables (set/get/copy/etc) -------- */
625 static xhash *iamarray(var *v)
629 while (a->type & VF_CHILD)
632 if (!(a->type & VF_ARRAY)) {
634 a->x.array = hash_init();
639 static void clear_array(xhash *array)
644 for (i = 0; i < array->csize; i++) {
645 hi = array->items[i];
649 free(thi->data.v.string);
652 array->items[i] = NULL;
654 array->glen = array->nel = 0;
657 /* clear a variable */
658 static var *clrvar(var *v)
660 if (!(v->type & VF_FSTR))
663 v->type &= VF_DONTTOUCH;
669 /* assign string value to variable */
670 static var *setvar_p(var *v, char *value)
678 /* same as setvar_p but make a copy of string */
679 static var *setvar_s(var *v, const char *value)
681 return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
684 /* same as setvar_s but set USER flag */
685 static var *setvar_u(var *v, const char *value)
692 /* set array element to user string */
693 static void setari_u(var *a, int idx, const char *s)
695 char sidx[sizeof(int)*3 + 1];
698 sprintf(sidx, "%d", idx);
699 v = findvar(iamarray(a), sidx);
703 /* assign numeric value to variable */
704 static var *setvar_i(var *v, double value)
707 v->type |= VF_NUMBER;
713 static const char *getvar_s(var *v)
715 /* if v is numeric and has no cached string, convert it to string */
716 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
717 fmt_num(buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
718 v->string = xstrdup(buf);
719 v->type |= VF_CACHED;
721 return (v->string == NULL) ? "" : v->string;
724 static double getvar_i(var *v)
728 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
732 v->number = strtod(s, &s);
733 if (v->type & VF_USER) {
741 v->type |= VF_CACHED;
746 static var *copyvar(var *dest, const var *src)
750 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
751 dest->number = src->number;
753 dest->string = xstrdup(src->string);
755 handle_special(dest);
759 static var *incvar(var *v)
761 return setvar_i(v, getvar_i(v)+1.);
764 /* return true if v is number or numeric string */
765 static int is_numeric(var *v)
768 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
771 /* return 1 when value of v corresponds to true, 0 otherwise */
772 static int istrue(var *v)
775 return (v->number == 0) ? 0 : 1;
777 return (v->string && *(v->string)) ? 1 : 0;
780 /* temporary variables allocator. Last allocated should be first freed */
781 static var *nvalloc(int n)
789 if ((cb->pos - cb->nv) + n <= cb->size) break;
794 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
795 cb = xmalloc(sizeof(nvblock) + size * sizeof(var));
800 if (pb) pb->next = cb;
806 while (v < cb->pos) {
815 static void nvfree(var *v)
819 if (v < cb->nv || v >= cb->pos)
820 runtime_error(EMSG_INTERNAL_ERROR);
822 for (p = v; p < cb->pos; p++) {
823 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
824 clear_array(iamarray(p));
825 free(p->x.array->items);
828 if (p->type & VF_WALK)
835 while (cb->prev && cb->pos == cb->nv) {
840 /* ------- awk program text parsing ------- */
842 /* Parse next token pointed by global pos, place results into global ttt.
843 * If token isn't expected, give away. Return token class
845 static uint32_t next_token(uint32_t expected)
847 static int concat_inserted;
848 static uint32_t save_tclass, save_info;
849 static uint32_t ltclass = TC_OPTERM;
858 ttt.rollback = FALSE;
860 } else if (concat_inserted) {
861 concat_inserted = FALSE;
862 ttt.tclass = save_tclass;
863 ttt.info = save_info;
871 while (*p != '\n' && *p != '\0')
880 } else if (*p == '\"') {
882 ttt.string = s = ++p;
884 if (*p == '\0' || *p == '\n')
885 syntax_error(EMSG_UNEXP_EOS);
886 *(s++) = nextchar(&p);
892 } else if ((expected & TC_REGEXP) && *p == '/') {
894 ttt.string = s = ++p;
896 if (*p == '\0' || *p == '\n')
897 syntax_error(EMSG_UNEXP_EOS);
901 *(s-1) = bb_process_escape_sequence((const char **)&p);
912 } else if (*p == '.' || isdigit(*p)) {
914 ttt.number = strtod(p, &p);
916 syntax_error(EMSG_UNEXP_TOKEN);
920 /* search for something known */
930 /* if token class is expected, token
931 * matches and it's not a longer word,
932 * then this is what we are looking for
934 if ((tc & (expected | TC_WORD | TC_NEWLINE))
935 && *tl == *p && strncmp(p, tl, l) == 0
936 && !((tc & TC_WORD) && isalnum_(p[l]))
947 /* it's a name (var/array/function),
948 * otherwise it's something wrong
951 syntax_error(EMSG_UNEXP_TOKEN);
954 while (isalnum_(*(++p))) {
959 /* also consume whitespace between functionname and bracket */
960 if (!(expected & TC_VARIABLE))
974 /* skipping newlines in some cases */
975 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
978 /* insert concatenation operator when needed */
979 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
980 concat_inserted = TRUE;
982 save_info = ttt.info;
984 ttt.info = OC_CONCAT | SS | P(35);
989 ltclass = ttt.tclass;
991 /* Are we ready for this? */
992 if (!(ltclass & expected))
993 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
994 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
999 static void rollback_token(void)
1001 ttt.rollback = TRUE;
1004 static node *new_node(uint32_t info)
1008 n = xzalloc(sizeof(node));
1014 static node *mk_re_node(const char *s, node *n, regex_t *re)
1016 n->info = OC_REGEXP;
1019 xregcomp(re, s, REG_EXTENDED);
1020 xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1025 static node *condition(void)
1027 next_token(TC_SEQSTART);
1028 return parse_expr(TC_SEQTERM);
1031 /* parse expression terminated by given argument, return ptr
1032 * to built subtree. Terminator is eaten by parse_expr */
1033 static node *parse_expr(uint32_t iexp)
1042 sn.r.n = glptr = NULL;
1043 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1045 while (!((tc = next_token(xtc)) & iexp)) {
1046 if (glptr && (ttt.info == (OC_COMPARE | VV | P(39) | 2))) {
1047 /* input redirection (<) attached to glptr node */
1048 cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1050 xtc = TC_OPERAND | TC_UOPPRE;
1053 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1054 /* for binary and postfix-unary operators, jump back over
1055 * previous operators with higher priority */
1057 while ( ((ttt.info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1058 || ((ttt.info == vn->info) && ((ttt.info & OPCLSMASK) == OC_COLON)) )
1060 if ((ttt.info & OPCLSMASK) == OC_TERNARY)
1062 cn = vn->a.n->r.n = new_node(ttt.info);
1064 if (tc & TC_BINOP) {
1066 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1067 if ((ttt.info & OPCLSMASK) == OC_PGETLINE) {
1069 next_token(TC_GETLINE);
1070 /* give maximum priority to this pipe */
1071 cn->info &= ~PRIMASK;
1072 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1076 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1081 /* for operands and prefix-unary operators, attach them
1084 cn = vn->r.n = new_node(ttt.info);
1086 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1087 if (tc & (TC_OPERAND | TC_REGEXP)) {
1088 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1089 /* one should be very careful with switch on tclass -
1090 * only simple tclasses should be used! */
1095 v = hash_search(ahash, ttt.string);
1097 cn->info = OC_FNARG;
1098 cn->l.i = v->x.aidx;
1100 cn->l.v = newvar(ttt.string);
1102 if (tc & TC_ARRAY) {
1104 cn->r.n = parse_expr(TC_ARRTERM);
1111 v = cn->l.v = xzalloc(sizeof(var));
1113 setvar_i(v, ttt.number);
1115 setvar_s(v, ttt.string);
1119 mk_re_node(ttt.string, cn, xzalloc(sizeof(regex_t)*2));
1124 cn->r.f = newfunc(ttt.string);
1125 cn->l.n = condition();
1129 cn = vn->r.n = parse_expr(TC_SEQTERM);
1135 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1139 cn->l.n = condition();
1148 /* add node to chain. Return ptr to alloc'd node */
1149 static node *chain_node(uint32_t info)
1154 seq->first = seq->last = new_node(0);
1156 if (seq->programname != programname) {
1157 seq->programname = programname;
1158 n = chain_node(OC_NEWSOURCE);
1159 n->l.s = xstrdup(programname);
1164 seq->last = n->a.n = new_node(OC_DONE);
1169 static void chain_expr(uint32_t info)
1173 n = chain_node(info);
1174 n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1175 if (ttt.tclass & TC_GRPTERM)
1179 static node *chain_loop(node *nn)
1181 node *n, *n2, *save_brk, *save_cont;
1183 save_brk = break_ptr;
1184 save_cont = continue_ptr;
1186 n = chain_node(OC_BR | Vx);
1187 continue_ptr = new_node(OC_EXEC);
1188 break_ptr = new_node(OC_EXEC);
1190 n2 = chain_node(OC_EXEC | Vx);
1193 continue_ptr->a.n = n2;
1194 break_ptr->a.n = n->r.n = seq->last;
1196 continue_ptr = save_cont;
1197 break_ptr = save_brk;
1202 /* parse group and attach it to chain */
1203 static void chain_group(void)
1209 c = next_token(TC_GRPSEQ);
1210 } while (c & TC_NEWLINE);
1212 if (c & TC_GRPSTART) {
1213 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1214 if (ttt.tclass & TC_NEWLINE) continue;
1218 } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1220 chain_expr(OC_EXEC | Vx);
1221 } else { /* TC_STATEMNT */
1222 switch (ttt.info & OPCLSMASK) {
1224 n = chain_node(OC_BR | Vx);
1225 n->l.n = condition();
1227 n2 = chain_node(OC_EXEC);
1229 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1231 n2->a.n = seq->last;
1239 n = chain_loop(NULL);
1244 n2 = chain_node(OC_EXEC);
1245 n = chain_loop(NULL);
1247 next_token(TC_WHILE);
1248 n->l.n = condition();
1252 next_token(TC_SEQSTART);
1253 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1254 if (ttt.tclass & TC_SEQTERM) { /* for-in */
1255 if ((n2->info & OPCLSMASK) != OC_IN)
1256 syntax_error(EMSG_UNEXP_TOKEN);
1257 n = chain_node(OC_WALKINIT | VV);
1260 n = chain_loop(NULL);
1261 n->info = OC_WALKNEXT | Vx;
1263 } else { /* for (;;) */
1264 n = chain_node(OC_EXEC | Vx);
1266 n2 = parse_expr(TC_SEMICOL);
1267 n3 = parse_expr(TC_SEQTERM);
1277 n = chain_node(ttt.info);
1278 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1279 if (ttt.tclass & TC_OUTRDR) {
1280 n->info |= ttt.info;
1281 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1283 if (ttt.tclass & TC_GRPTERM)
1288 n = chain_node(OC_EXEC);
1293 n = chain_node(OC_EXEC);
1294 n->a.n = continue_ptr;
1297 /* delete, next, nextfile, return, exit */
1299 chain_expr(ttt.info);
1304 static void parse_program(char *p)
1313 while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1314 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1316 if (tclass & TC_OPTERM)
1320 if (tclass & TC_BEGIN) {
1324 } else if (tclass & TC_END) {
1328 } else if (tclass & TC_FUNCDECL) {
1329 next_token(TC_FUNCTION);
1331 f = newfunc(ttt.string);
1332 f->body.first = NULL;
1334 while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1335 v = findvar(ahash, ttt.string);
1336 v->x.aidx = (f->nargs)++;
1338 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1345 } else if (tclass & TC_OPSEQ) {
1347 cn = chain_node(OC_TEST);
1348 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1349 if (ttt.tclass & TC_GRPSTART) {
1353 chain_node(OC_PRINT);
1355 cn->r.n = mainseq.last;
1357 } else /* if (tclass & TC_GRPSTART) */ {
1365 /* -------- program execution part -------- */
1367 static node *mk_splitter(const char *s, tsplitter *spl)
1375 if ((n->info & OPCLSMASK) == OC_REGEXP) {
1379 if (strlen(s) > 1) {
1380 mk_re_node(s, n, re);
1382 n->info = (uint32_t) *s;
1388 /* use node as a regular expression. Supplied with node ptr and regex_t
1389 * storage space. Return ptr to regex (if result points to preg, it should
1390 * be later regfree'd manually
1392 static regex_t *as_regex(node *op, regex_t *preg)
1397 if ((op->info & OPCLSMASK) == OC_REGEXP) {
1398 return icase ? op->r.ire : op->l.re;
1401 s = getvar_s(evaluate(op, v));
1402 xregcomp(preg, s, icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED);
1407 /* gradually increasing buffer */
1408 static void qrealloc(char **b, int n, int *size)
1410 if (!*b || n >= *size)
1411 *b = xrealloc(*b, *size = n + (n>>1) + 80);
1414 /* resize field storage space */
1415 static void fsrealloc(int size)
1417 static int maxfields; /* = 0;*/
1420 if (size >= maxfields) {
1422 maxfields = size + 16;
1423 Fields = xrealloc(Fields, maxfields * sizeof(var));
1424 for (; i < maxfields; i++) {
1425 Fields[i].type = VF_SPECIAL;
1426 Fields[i].string = NULL;
1430 if (size < nfields) {
1431 for (i = size; i < nfields; i++) {
1438 static int awk_split(const char *s, node *spl, char **slist)
1443 regmatch_t pmatch[2];
1445 /* in worst case, each char would be a separate field */
1446 *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1449 c[0] = c[1] = (char)spl->info;
1451 if (*getvar_s(intvar[RS]) == '\0')
1454 if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */
1456 l = strcspn(s, c+2);
1457 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1458 && pmatch[0].rm_so <= l
1460 l = pmatch[0].rm_so;
1461 if (pmatch[0].rm_eo == 0) {
1466 pmatch[0].rm_eo = l;
1467 if (s[l]) pmatch[0].rm_eo++;
1473 s += pmatch[0].rm_eo;
1476 } else if (c[0] == '\0') { /* null split */
1482 } else if (c[0] != ' ') { /* single-character split */
1484 c[0] = toupper(c[0]);
1485 c[1] = tolower(c[1]);
1488 while ((s1 = strpbrk(s1, c))) {
1492 } else { /* space split */
1494 s = skip_whitespace(s);
1497 while (*s && !isspace(*s))
1505 static void split_f0(void)
1507 static char *fstrings = NULL;
1518 n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1521 for (i = 0; i < n; i++) {
1522 Fields[i].string = nextword(&s);
1523 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1526 /* set NF manually to avoid side effects */
1528 intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1529 intvar[NF]->number = nfields;
1532 /* perform additional actions when some internal variables changed */
1533 static void handle_special(var *v)
1537 const char *sep, *s;
1538 int sl, l, len, i, bsize;
1540 if (!(v->type & VF_SPECIAL))
1543 if (v == intvar[NF]) {
1544 n = (int)getvar_i(v);
1547 /* recalculate $0 */
1548 sep = getvar_s(intvar[OFS]);
1552 for (i = 0; i < n; i++) {
1553 s = getvar_s(&Fields[i]);
1556 memcpy(b+len, sep, sl);
1559 qrealloc(&b, len+l+sl, &bsize);
1560 memcpy(b+len, s, l);
1565 setvar_p(intvar[F0], b);
1568 } else if (v == intvar[F0]) {
1569 is_f0_split = FALSE;
1571 } else if (v == intvar[FS]) {
1572 mk_splitter(getvar_s(v), &fsplitter);
1574 } else if (v == intvar[RS]) {
1575 mk_splitter(getvar_s(v), &rsplitter);
1577 } else if (v == intvar[IGNORECASE]) {
1581 n = getvar_i(intvar[NF]);
1582 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1583 /* right here v is invalid. Just to note... */
1587 /* step through func/builtin/etc arguments */
1588 static node *nextarg(node **pn)
1593 if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1602 static void hashwalk_init(var *v, xhash *array)
1608 if (v->type & VF_WALK)
1612 w = v->x.walker = xzalloc(2 + 2*sizeof(char *) + array->glen);
1613 w[0] = w[1] = (char *)(w + 2);
1614 for (i = 0; i < array->csize; i++) {
1615 hi = array->items[i];
1617 strcpy(*w, hi->name);
1624 static int hashwalk_next(var *v)
1632 setvar_s(v, nextword(w+1));
1636 /* evaluate node, return 1 when result is true, 0 otherwise */
1637 static int ptest(node *pattern)
1639 static var v; /* static: to save stack space? */
1641 return istrue(evaluate(pattern, &v));
1644 /* read next record from stream rsm into a variable v */
1645 static int awk_getline(rstream *rsm, var *v)
1648 regmatch_t pmatch[2];
1649 int a, p, pp=0, size;
1650 int fd, so, eo, r, rp;
1653 /* we're using our own buffer since we need access to accumulating
1656 fd = fileno(rsm->F);
1661 c = (char) rsplitter.n.info;
1664 if (! m) qrealloc(&m, 256, &size);
1670 if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1671 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1672 b, 1, pmatch, 0) == 0) {
1673 so = pmatch[0].rm_so;
1674 eo = pmatch[0].rm_eo;
1678 } else if (c != '\0') {
1679 s = strchr(b+pp, c);
1680 if (! s) s = memchr(b+pp, '\0', p - pp);
1687 while (b[rp] == '\n')
1689 s = strstr(b+rp, "\n\n");
1692 while (b[eo] == '\n') eo++;
1700 memmove(m, (const void *)(m+a), p+1);
1705 qrealloc(&m, a+p+128, &size);
1708 p += safe_read(fd, b+p, size-p-1);
1712 setvar_i(intvar[ERRNO], errno);
1721 c = b[so]; b[so] = '\0';
1725 c = b[eo]; b[eo] = '\0';
1726 setvar_s(intvar[RT], b+so);
1738 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1742 const char *s = format;
1744 if (int_as_int && n == (int)n) {
1745 r = snprintf(b, size, "%d", (int)n);
1747 do { c = *s; } while (c && *++s);
1748 if (strchr("diouxX", c)) {
1749 r = snprintf(b, size, format, (int)n);
1750 } else if (strchr("eEfgG", c)) {
1751 r = snprintf(b, size, format, n);
1753 runtime_error(EMSG_INV_FMT);
1760 /* formatted output into an allocated buffer, return ptr to buffer */
1761 static char *awk_printf(node *n)
1766 int i, j, incr, bsize;
1771 fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1776 while (*f && (*f != '%' || *(++f) == '%'))
1778 while (*f && !isalpha(*f)) {
1780 syntax_error("%*x formats are not supported");
1784 incr = (f - s) + MAXVARFMT;
1785 qrealloc(&b, incr + i, &bsize);
1790 arg = evaluate(nextarg(&n), v);
1793 if (c == 'c' || !c) {
1794 i += sprintf(b+i, s, is_numeric(arg) ?
1795 (char)getvar_i(arg) : *getvar_s(arg));
1796 } else if (c == 's') {
1798 qrealloc(&b, incr+i+strlen(s1), &bsize);
1799 i += sprintf(b+i, s, s1);
1801 i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1805 /* if there was an error while sprintf, return value is negative */
1809 b = xrealloc(b, i + 1);
1816 /* common substitution routine
1817 * replace (nm) substring of (src) that match (n) with (repl), store
1818 * result into (dest), return number of substitutions. If nm=0, replace
1819 * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1820 * subexpression matching (\1-\9)
1822 static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int ex)
1827 int c, i, j, di, rl, so, eo, nbs, n, dssize;
1828 regmatch_t pmatch[10];
1831 re = as_regex(rn, &sreg);
1832 if (! src) src = intvar[F0];
1833 if (! dest) dest = intvar[F0];
1838 while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0 : REG_NOTBOL) == 0) {
1839 so = pmatch[0].rm_so;
1840 eo = pmatch[0].rm_eo;
1842 qrealloc(&ds, di + eo + rl, &dssize);
1843 memcpy(ds + di, sp, eo);
1849 for (s = repl; *s; s++) {
1855 if (c == '&' || (ex && c >= '0' && c <= '9')) {
1856 di -= ((nbs + 3) >> 1);
1865 n = pmatch[j].rm_eo - pmatch[j].rm_so;
1866 qrealloc(&ds, di + rl + n, &dssize);
1867 memcpy(ds + di, sp + pmatch[j].rm_so, n);
1878 if (! (ds[di++] = *sp++)) break;
1882 qrealloc(&ds, di + strlen(sp), &dssize);
1883 strcpy(ds + di, sp);
1885 if (re == &sreg) regfree(re);
1889 static var *exec_builtin(node *op, var *res)
1896 regmatch_t pmatch[2];
1898 static tsplitter tspl;
1907 isr = info = op->info;
1910 av[2] = av[3] = NULL;
1911 for (i = 0; i < 4 && op; i++) {
1912 an[i] = nextarg(&op);
1913 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
1914 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
1919 if (nargs < (info >> 30))
1920 runtime_error(EMSG_TOO_FEW_ARGS);
1922 switch (info & OPNMASK) {
1925 #if ENABLE_FEATURE_AWK_MATH
1926 setvar_i(res, atan2(getvar_i(av[i]), getvar_i(av[1])));
1928 runtime_error(EMSG_NO_MATH);
1934 spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
1935 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
1940 n = awk_split(as[0], spl, &s);
1942 clear_array(iamarray(av[1]));
1943 for (i=1; i<=n; i++)
1944 setari_u(av[1], i, nextword(&s1));
1951 i = getvar_i(av[1]) - 1;
1954 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
1957 strncpy(s, as[0]+i, n);
1963 setvar_i(res, (long)getvar_i(av[0]) & (long)getvar_i(av[1]));
1967 setvar_i(res, ~(long)getvar_i(av[0]));
1971 setvar_i(res, (long)getvar_i(av[0]) << (long)getvar_i(av[1]));
1975 setvar_i(res, (long)getvar_i(av[0]) | (long)getvar_i(av[1]));
1979 setvar_i(res, (long)((unsigned long)getvar_i(av[0]) >> (unsigned long)getvar_i(av[1])));
1983 setvar_i(res, (long)getvar_i(av[0]) ^ (long)getvar_i(av[1]));
1993 s1 = s = xstrdup(as[0]);
1995 *s1 = (*to_xxx)(*s1);
2004 l = strlen(as[0]) - ll;
2005 if (ll > 0 && l >= 0) {
2007 s = strstr(as[0], as[1]);
2008 if (s) n = (s - as[0]) + 1;
2010 /* this piece of code is terribly slow and
2011 * really should be rewritten
2013 for (i=0; i<=l; i++) {
2014 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2026 tt = getvar_i(av[1]);
2029 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2030 i = strftime(buf, MAXVARFMT,
2031 ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2038 re = as_regex(an[1], &sreg);
2039 n = regexec(re, as[0], 1, pmatch, 0);
2044 pmatch[0].rm_so = 0;
2045 pmatch[0].rm_eo = -1;
2047 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2048 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2049 setvar_i(res, pmatch[0].rm_so);
2050 if (re == &sreg) regfree(re);
2054 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2058 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2062 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2071 * Evaluate node - the heart of the program. Supplied with subtree
2072 * and place where to store result. returns ptr to result.
2074 #define XC(n) ((n) >> 8)
2076 static var *evaluate(node *op, var *res)
2078 /* This procedure is recursive so we should count every byte */
2079 static var *fnargs = NULL;
2080 static unsigned seed = 1;
2081 static regex_t sreg;
2103 return setvar_s(res, NULL);
2109 opn = (short)(opinfo & OPNMASK);
2110 lineno = op->lineno;
2112 /* execute inevitable things */
2114 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2115 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2116 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2117 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2118 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2120 switch (XC(opinfo & OPCLSMASK)) {
2122 /* -- iterative node type -- */
2126 if ((op1->info & OPCLSMASK) == OC_COMMA) {
2127 /* it's range pattern */
2128 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2129 op->info |= OF_CHECKED;
2130 if (ptest(op1->r.n))
2131 op->info &= ~OF_CHECKED;
2138 op = (ptest(op1)) ? op->a.n : op->r.n;
2142 /* just evaluate an expression, also used as unconditional jump */
2146 /* branch, used in if-else and various loops */
2148 op = istrue(L.v) ? op->a.n : op->r.n;
2151 /* initialize for-in loop */
2152 case XC( OC_WALKINIT ):
2153 hashwalk_init(L.v, iamarray(R.v));
2156 /* get next array item */
2157 case XC( OC_WALKNEXT ):
2158 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2161 case XC( OC_PRINT ):
2162 case XC( OC_PRINTF ):
2165 X.rsm = newfile(R.s);
2168 X.rsm->F = popen(R.s, "w");
2169 if (X.rsm->F == NULL)
2170 bb_perror_msg_and_die("popen");
2173 X.rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2179 if ((opinfo & OPCLSMASK) == OC_PRINT) {
2181 fputs(getvar_s(intvar[F0]), X.F);
2184 L.v = evaluate(nextarg(&op1), v1);
2185 if (L.v->type & VF_NUMBER) {
2186 fmt_num(buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2187 getvar_i(L.v), TRUE);
2190 fputs(getvar_s(L.v), X.F);
2193 if (op1) fputs(getvar_s(intvar[OFS]), X.F);
2196 fputs(getvar_s(intvar[ORS]), X.F);
2198 } else { /* OC_PRINTF */
2199 L.s = awk_printf(op1);
2206 case XC( OC_DELETE ):
2207 X.info = op1->info & OPCLSMASK;
2208 if (X.info == OC_VAR) {
2210 } else if (X.info == OC_FNARG) {
2211 R.v = &fnargs[op1->l.i];
2213 runtime_error(EMSG_NOT_ARRAY);
2218 L.s = getvar_s(evaluate(op1->r.n, v1));
2219 hash_remove(iamarray(R.v), L.s);
2221 clear_array(iamarray(R.v));
2225 case XC( OC_NEWSOURCE ):
2226 programname = op->l.s;
2229 case XC( OC_RETURN ):
2233 case XC( OC_NEXTFILE ):
2244 /* -- recursive node type -- */
2248 if (L.v == intvar[NF])
2252 case XC( OC_FNARG ):
2253 L.v = &fnargs[op->l.i];
2255 res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2259 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2262 case XC( OC_REGEXP ):
2264 L.s = getvar_s(intvar[F0]);
2267 case XC( OC_MATCH ):
2270 X.re = as_regex(op1, &sreg);
2271 R.i = regexec(X.re, L.s, 0, NULL, 0);
2272 if (X.re == &sreg) regfree(X.re);
2273 setvar_i(res, (R.i == 0 ? 1 : 0) ^ (opn == '!' ? 1 : 0));
2277 /* if source is a temporary string, jusk relink it to dest */
2278 if (R.v == v1+1 && R.v->string) {
2279 res = setvar_p(L.v, R.v->string);
2282 res = copyvar(L.v, R.v);
2286 case XC( OC_TERNARY ):
2287 if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2288 runtime_error(EMSG_POSSIBLE_ERROR);
2289 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2293 if (!op->r.f->body.first)
2294 runtime_error(EMSG_UNDEF_FUNC);
2296 X.v = R.v = nvalloc(op->r.f->nargs+1);
2298 L.v = evaluate(nextarg(&op1), v1);
2300 R.v->type |= VF_CHILD;
2301 R.v->x.parent = L.v;
2302 if (++R.v - X.v >= op->r.f->nargs)
2310 res = evaluate(op->r.f->body.first, res);
2317 case XC( OC_GETLINE ):
2318 case XC( OC_PGETLINE ):
2320 X.rsm = newfile(L.s);
2322 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2323 X.rsm->F = popen(L.s, "r");
2324 X.rsm->is_pipe = TRUE;
2326 X.rsm->F = fopen(L.s, "r"); /* not xfopen! */
2330 if (!iF) iF = next_input_file();
2335 setvar_i(intvar[ERRNO], errno);
2343 L.i = awk_getline(X.rsm, R.v);
2346 incvar(intvar[FNR]);
2353 /* simple builtins */
2354 case XC( OC_FBLTIN ):
2362 R.d = (double)rand() / (double)RAND_MAX;
2364 #if ENABLE_FEATURE_AWK_MATH
2390 runtime_error(EMSG_NO_MATH);
2395 seed = op1 ? (unsigned)L.d : (unsigned)time(NULL);
2405 L.s = getvar_s(intvar[F0]);
2411 R.d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2412 ? (system(L.s) >> 8) : 0;
2420 X.rsm = newfile(L.s);
2429 X.rsm = (rstream *)hash_search(fdhash, L.s);
2431 R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
2432 free(X.rsm->buffer);
2433 hash_remove(fdhash, L.s);
2436 setvar_i(intvar[ERRNO], errno);
2443 case XC( OC_BUILTIN ):
2444 res = exec_builtin(op, res);
2447 case XC( OC_SPRINTF ):
2448 setvar_p(res, awk_printf(op1));
2451 case XC( OC_UNARY ):
2453 L.d = R.d = getvar_i(R.v);
2468 L.d = istrue(X.v) ? 0 : 1;
2479 case XC( OC_FIELD ):
2480 R.i = (int)getvar_i(R.v);
2487 res = &Fields[R.i - 1];
2491 /* concatenation (" ") and index joining (",") */
2492 case XC( OC_CONCAT ):
2493 case XC( OC_COMMA ):
2494 opn = strlen(L.s) + strlen(R.s) + 2;
2497 if ((opinfo & OPCLSMASK) == OC_COMMA) {
2498 L.s = getvar_s(intvar[SUBSEP]);
2499 X.s = xrealloc(X.s, opn + strlen(L.s));
2507 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2511 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2514 case XC( OC_BINARY ):
2515 case XC( OC_REPLACE ):
2516 R.d = getvar_i(R.v);
2528 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2532 #if ENABLE_FEATURE_AWK_MATH
2533 L.d = pow(L.d, R.d);
2535 runtime_error(EMSG_NO_MATH);
2539 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2540 L.d -= (int)(L.d / R.d) * R.d;
2543 res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2546 case XC( OC_COMPARE ):
2547 if (is_numeric(L.v) && is_numeric(R.v)) {
2548 L.d = getvar_i(L.v) - getvar_i(R.v);
2550 L.s = getvar_s(L.v);
2551 R.s = getvar_s(R.v);
2552 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2554 switch (opn & 0xfe) {
2565 setvar_i(res, (opn & 0x1 ? R.i : !R.i) ? 1 : 0);
2569 runtime_error(EMSG_POSSIBLE_ERROR);
2571 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2573 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2583 /* -------- main & co. -------- */
2585 static int awk_exit(int r)
2596 evaluate(endseq.first, &tv);
2599 /* waiting for children */
2600 for (i = 0; i < fdhash->csize; i++) {
2601 hi = fdhash->items[i];
2603 if (hi->data.rs.F && hi->data.rs.is_pipe)
2604 pclose(hi->data.rs.F);
2612 /* if expr looks like "var=value", perform assignment and return 1,
2613 * otherwise return 0 */
2614 static int is_assignment(const char *expr)
2616 char *exprc, *s, *s0, *s1;
2618 exprc = xstrdup(expr);
2619 if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2627 *(s1++) = nextchar(&s);
2630 setvar_u(newvar(exprc), s0);
2635 /* switch to next input file */
2636 static rstream *next_input_file(void)
2639 static int files_happen = FALSE;
2642 const char *fname, *ind;
2644 if (rsm.F) fclose(rsm.F);
2646 rsm.pos = rsm.adv = 0;
2649 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
2655 ind = getvar_s(incvar(intvar[ARGIND]));
2656 fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
2657 if (fname && *fname && !is_assignment(fname))
2658 F = afopen(fname, "r");
2662 files_happen = TRUE;
2663 setvar_s(intvar[FILENAME], fname);
2668 int awk_main(int argc, char **argv);
2669 int awk_main(int argc, char **argv)
2672 char *opt_F, *opt_W;
2673 llist_t *opt_v = NULL;
2678 char *vnames = (char *)vNames; /* cheat */
2679 char *vvalues = (char *)vValues;
2681 /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
2682 * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
2683 if (ENABLE_LOCALE_SUPPORT)
2684 setlocale(LC_NUMERIC, "C");
2688 /* allocate global buffer */
2689 buf = xmalloc(MAXVARFMT + 1);
2691 vhash = hash_init();
2692 ahash = hash_init();
2693 fdhash = hash_init();
2694 fnhash = hash_init();
2696 /* initialize variables */
2697 for (i = 0; *vnames; i++) {
2698 intvar[i] = v = newvar(nextword(&vnames));
2699 if (*vvalues != '\377')
2700 setvar_s(v, nextword(&vvalues));
2704 if (*vnames == '*') {
2705 v->type |= VF_SPECIAL;
2710 handle_special(intvar[FS]);
2711 handle_special(intvar[RS]);
2713 newfile("/dev/stdin")->F = stdin;
2714 newfile("/dev/stdout")->F = stdout;
2715 newfile("/dev/stderr")->F = stderr;
2717 /* Huh, people report that sometimes environ is NULL. Oh well. */
2718 if (environ) for (envp = environ; *envp; envp++) {
2719 char *s = xstrdup(*envp);
2720 char *s1 = strchr(s, '=');
2723 setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1);
2727 opt_complementary = "v::";
2728 opt = getopt32(argc, argv, "F:v:f:W:", &opt_F, &opt_v, &programname, &opt_W);
2732 setvar_s(intvar[FS], opt_F); // -F
2733 while (opt_v) { /* -v */
2734 if (!is_assignment(llist_pop(&opt_v)))
2737 if (opt & 0x4) { // -f
2738 char *s = s; /* die, gcc, die */
2739 FILE *from_file = afopen(programname, "r");
2740 /* one byte is reserved for some trick in next_token */
2741 if (fseek(from_file, 0, SEEK_END) == 0) {
2742 flen = ftell(from_file);
2743 s = xmalloc(flen + 4);
2744 fseek(from_file, 0, SEEK_SET);
2745 i = 1 + fread(s + 1, 1, flen, from_file);
2747 for (i = j = 1; j > 0; i += j) {
2748 s = xrealloc(s, i + 4096);
2749 j = fread(s + i, 1, 4094, from_file);
2754 parse_program(s + 1);
2756 } else { // no -f: take program from 1st parameter
2759 programname = "cmd. line";
2760 parse_program(*argv++);
2763 if (opt & 0x8) // -W
2764 bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
2766 /* fill in ARGV array */
2767 setvar_i(intvar[ARGC], argc + 1);
2768 setari_u(intvar[ARGV], 0, "awk");
2771 setari_u(intvar[ARGV], ++i, *argv++);
2773 evaluate(beginseq.first, &tv);
2774 if (!mainseq.first && !endseq.first)
2775 awk_exit(EXIT_SUCCESS);
2777 /* input file could already be opened in BEGIN block */
2778 if (!iF) iF = next_input_file();
2780 /* passing through input files */
2783 setvar_i(intvar[FNR], 0);
2785 while ((i = awk_getline(iF, intvar[F0])) > 0) {
2788 incvar(intvar[FNR]);
2789 evaluate(mainseq.first, &tv);
2796 runtime_error(strerror(errno));
2798 iF = next_input_file();
2801 awk_exit(EXIT_SUCCESS);