1 /* vi: set sw=4 ts=4: */
3 * awk implementation for busybox
5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
7 * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
13 extern char **environ;
15 /* This is a NOEXEC applet. Be very careful! */
22 #define VF_NUMBER 0x0001 /* 1 = primary type is number */
23 #define VF_ARRAY 0x0002 /* 1 = it's an array */
25 #define VF_CACHED 0x0100 /* 1 = num/str value has cached str/num eq */
26 #define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
27 #define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
28 #define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
29 #define VF_FSTR 0x1000 /* 1 = var::string points to fstring buffer */
30 #define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
31 #define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
33 /* these flags are static, don't change them when value is changed */
34 #define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
37 typedef struct var_s {
38 unsigned short type; /* flags */
42 int aidx; /* func arg idx (for compilation stage) */
43 struct xhash_s *array; /* array ptr */
44 struct var_s *parent; /* for func args, ptr to actual parameter */
45 char **walker; /* list of array elements (for..in) */
49 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
50 typedef struct chain_s {
53 const char *programname;
57 typedef struct func_s {
63 typedef struct rstream_s {
69 unsigned short is_pipe;
72 typedef struct hash_item_s {
74 struct var_s v; /* variable/array hash */
75 struct rstream_s rs; /* redirect streams hash */
76 struct func_s f; /* functions hash */
78 struct hash_item_s *next; /* next in chain */
79 char name[1]; /* really it's longer */
82 typedef struct xhash_s {
83 unsigned nel; /* num of elements */
84 unsigned csize; /* current hash size */
85 unsigned nprime; /* next hash size in PRIMES[] */
86 unsigned glen; /* summary length of item names */
87 struct hash_item_s **items;
91 typedef struct node_s {
112 /* Block of temporary variables */
113 typedef struct nvblock_s {
116 struct nvblock_s *prev;
117 struct nvblock_s *next;
121 typedef struct tsplitter_s {
126 /* simple token classes */
127 /* Order and hex values are very important!!! See next_token() */
128 #define TC_SEQSTART 1 /* ( */
129 #define TC_SEQTERM (1 << 1) /* ) */
130 #define TC_REGEXP (1 << 2) /* /.../ */
131 #define TC_OUTRDR (1 << 3) /* | > >> */
132 #define TC_UOPPOST (1 << 4) /* unary postfix operator */
133 #define TC_UOPPRE1 (1 << 5) /* unary prefix operator */
134 #define TC_BINOPX (1 << 6) /* two-opnd operator */
135 #define TC_IN (1 << 7)
136 #define TC_COMMA (1 << 8)
137 #define TC_PIPE (1 << 9) /* input redirection pipe */
138 #define TC_UOPPRE2 (1 << 10) /* unary prefix operator */
139 #define TC_ARRTERM (1 << 11) /* ] */
140 #define TC_GRPSTART (1 << 12) /* { */
141 #define TC_GRPTERM (1 << 13) /* } */
142 #define TC_SEMICOL (1 << 14)
143 #define TC_NEWLINE (1 << 15)
144 #define TC_STATX (1 << 16) /* ctl statement (for, next...) */
145 #define TC_WHILE (1 << 17)
146 #define TC_ELSE (1 << 18)
147 #define TC_BUILTIN (1 << 19)
148 #define TC_GETLINE (1 << 20)
149 #define TC_FUNCDECL (1 << 21) /* `function' `func' */
150 #define TC_BEGIN (1 << 22)
151 #define TC_END (1 << 23)
152 #define TC_EOF (1 << 24)
153 #define TC_VARIABLE (1 << 25)
154 #define TC_ARRAY (1 << 26)
155 #define TC_FUNCTION (1 << 27)
156 #define TC_STRING (1 << 28)
157 #define TC_NUMBER (1 << 29)
159 #define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
161 /* combined token classes */
162 #define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
163 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
164 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION | \
165 TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
167 #define TC_STATEMNT (TC_STATX | TC_WHILE)
168 #define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
170 /* word tokens, cannot mean something else if not expected */
171 #define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN | \
172 TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
174 /* discard newlines after these */
175 #define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM | \
176 TC_BINOP | TC_OPTERM)
178 /* what can expression begin with */
179 #define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
180 /* what can group begin with */
181 #define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
183 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
184 /* operator is inserted between them */
185 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM | \
186 TC_STRING | TC_NUMBER | TC_UOPPOST)
187 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
189 #define OF_RES1 0x010000
190 #define OF_RES2 0x020000
191 #define OF_STR1 0x040000
192 #define OF_STR2 0x080000
193 #define OF_NUM1 0x100000
194 #define OF_CHECKED 0x200000
196 /* combined operator flags */
199 #define xS (OF_RES2 | OF_STR2)
201 #define VV (OF_RES1 | OF_RES2)
202 #define Nx (OF_RES1 | OF_NUM1)
203 #define NV (OF_RES1 | OF_NUM1 | OF_RES2)
204 #define Sx (OF_RES1 | OF_STR1)
205 #define SV (OF_RES1 | OF_STR1 | OF_RES2)
206 #define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
208 #define OPCLSMASK 0xFF00
209 #define OPNMASK 0x007F
211 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
212 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
213 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
215 #define P(x) (x << 24)
216 #define PRIMASK 0x7F000000
217 #define PRIMASK2 0x7E000000
219 /* Operation classes */
221 #define SHIFT_TIL_THIS 0x0600
222 #define RECUR_FROM_THIS 0x1000
225 OC_DELETE=0x0100, OC_EXEC=0x0200, OC_NEWSOURCE=0x0300,
226 OC_PRINT=0x0400, OC_PRINTF=0x0500, OC_WALKINIT=0x0600,
228 OC_BR=0x0700, OC_BREAK=0x0800, OC_CONTINUE=0x0900,
229 OC_EXIT=0x0a00, OC_NEXT=0x0b00, OC_NEXTFILE=0x0c00,
230 OC_TEST=0x0d00, OC_WALKNEXT=0x0e00,
232 OC_BINARY=0x1000, OC_BUILTIN=0x1100, OC_COLON=0x1200,
233 OC_COMMA=0x1300, OC_COMPARE=0x1400, OC_CONCAT=0x1500,
234 OC_FBLTIN=0x1600, OC_FIELD=0x1700, OC_FNARG=0x1800,
235 OC_FUNC=0x1900, OC_GETLINE=0x1a00, OC_IN=0x1b00,
236 OC_LAND=0x1c00, OC_LOR=0x1d00, OC_MATCH=0x1e00,
237 OC_MOVE=0x1f00, OC_PGETLINE=0x2000, OC_REGEXP=0x2100,
238 OC_REPLACE=0x2200, OC_RETURN=0x2300, OC_SPRINTF=0x2400,
239 OC_TERNARY=0x2500, OC_UNARY=0x2600, OC_VAR=0x2700,
242 ST_IF=0x3000, ST_DO=0x3100, ST_FOR=0x3200,
246 /* simple builtins */
248 F_in=0, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
249 F_ti, F_le, F_sy, F_ff, F_cl
254 B_a2=0, B_ix, B_ma, B_sp, B_ss, B_ti, B_lo, B_up,
256 B_an, B_co, B_ls, B_or, B_rs, B_xo,
259 /* tokens and their corresponding info values */
261 #define NTC "\377" /* switch to next token class (tc<<1) */
264 #define OC_B OC_BUILTIN
266 static const char tokenlist[] =
269 "\1/" NTC /* REGEXP */
270 "\2>>" "\1>" "\1|" NTC /* OUTRDR */
271 "\2++" "\2--" NTC /* UOPPOST */
272 "\2++" "\2--" "\1$" NTC /* UOPPRE1 */
273 "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */
274 "\2*=" "\2/=" "\2%=" "\2^="
275 "\1+" "\1-" "\3**=" "\2**"
276 "\1/" "\1%" "\1^" "\1*"
277 "\2!=" "\2>=" "\2<=" "\1>"
278 "\1<" "\2!~" "\1~" "\2&&"
279 "\2||" "\1?" "\1:" NTC
283 "\1+" "\1-" "\1!" NTC /* UOPPRE2 */
289 "\2if" "\2do" "\3for" "\5break" /* STATX */
290 "\10continue" "\6delete" "\5print"
291 "\6printf" "\4next" "\10nextfile"
292 "\6return" "\4exit" NTC
296 "\3and" "\5compl" "\6lshift" "\2or"
298 "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */
299 "\3cos" "\3exp" "\3int" "\3log"
300 "\4rand" "\3sin" "\4sqrt" "\5srand"
301 "\6gensub" "\4gsub" "\5index" "\6length"
302 "\5match" "\5split" "\7sprintf" "\3sub"
303 "\6substr" "\7systime" "\10strftime"
304 "\7tolower" "\7toupper" NTC
306 "\4func" "\10function" NTC
311 static const uint32_t tokeninfo[] = {
315 xS|'a', xS|'w', xS|'|',
316 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
317 OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M',
319 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74),
320 OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
321 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/',
322 OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
323 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-',
324 OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
325 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%',
326 OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
327 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3,
328 OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
329 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!',
330 OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
331 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?',
332 OC_COLON|xx|P(67)|':',
335 OC_PGETLINE|SV|P(37),
336 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-',
337 OC_UNARY|xV|P(19)|'!',
343 ST_IF, ST_DO, ST_FOR, OC_BREAK,
344 OC_CONTINUE, OC_DELETE|Vx, OC_PRINT,
345 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
346 OC_RETURN|Vx, OC_EXIT|Nx,
350 OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
351 OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
352 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
353 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
354 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
355 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
356 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
357 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b),
358 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
365 /* internal variable names and their initial values */
366 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
368 CONVFMT=0, OFMT, FS, OFS,
369 ORS, RS, RT, FILENAME,
370 SUBSEP, ARGIND, ARGC, ARGV,
373 ENVIRON, F0, _intvarcount_
376 static const char vNames[] =
377 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
378 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
379 "SUBSEP\0" "ARGIND\0" "ARGC\0" "ARGV\0"
381 "NR\0" "NF\0*" "IGNORECASE\0*"
382 "ENVIRON\0" "$\0*" "\0";
384 static const char vValues[] =
385 "%.6g\0" "%.6g\0" " \0" " \0"
386 "\n\0" "\n\0" "\0" "\0"
390 /* hash size may grow to these values */
391 #define FIRST_PRIME 61;
392 static const unsigned PRIMES[] = { 251, 1021, 4093, 16381, 65521 };
393 enum { NPRIMES = sizeof(PRIMES) / sizeof(unsigned) };
397 static var * V[_intvarcount_];
398 static chain beginseq, mainseq, endseq, *seq;
399 static int nextrec, nextfile;
400 static node *break_ptr, *continue_ptr;
402 static xhash *vhash, *ahash, *fdhash, *fnhash;
403 static const char *programname;
405 static int is_f0_split;
408 static tsplitter fsplitter, rsplitter;
423 /* It had even better name: 't'. Whoever knows what is it, please rename! */
425 /* function prototypes */
426 static void handle_special(var *);
427 static node *parse_expr(uint32_t);
428 static void chain_group(void);
429 static var *evaluate(node *, var *);
430 static rstream *next_input_file(void);
431 static int fmt_num(char *, int, const char *, double, int);
432 static int awk_exit(int) ATTRIBUTE_NORETURN;
434 /* ---- error handling ---- */
436 static const char EMSG_INTERNAL_ERROR[] = "Internal error";
437 static const char EMSG_UNEXP_EOS[] = "Unexpected end of string";
438 static const char EMSG_UNEXP_TOKEN[] = "Unexpected token";
439 static const char EMSG_DIV_BY_ZERO[] = "Division by zero";
440 static const char EMSG_INV_FMT[] = "Invalid format specifier";
441 static const char EMSG_TOO_FEW_ARGS[] = "Too few arguments for builtin";
442 static const char EMSG_NOT_ARRAY[] = "Not an array";
443 static const char EMSG_POSSIBLE_ERROR[] = "Possible syntax error";
444 static const char EMSG_UNDEF_FUNC[] = "Call to undefined function";
445 #if !ENABLE_FEATURE_AWK_MATH
446 static const char EMSG_NO_MATH[] = "Math support is not compiled in";
449 static void zero_out_var(var * vp)
451 memset(vp, 0, sizeof(*vp));
454 static void syntax_error(const char * const message) ATTRIBUTE_NORETURN;
455 static void syntax_error(const char * const message)
457 bb_error_msg_and_die("%s:%i: %s", programname, lineno, message);
460 #define runtime_error(x) syntax_error(x)
463 /* ---- hash stuff ---- */
465 static unsigned hashidx(const char *name)
469 while (*name) idx = *name++ + (idx << 6) - idx;
473 /* create new hash */
474 static xhash *hash_init(void)
478 newhash = xzalloc(sizeof(xhash));
479 newhash->csize = FIRST_PRIME;
480 newhash->items = xzalloc(newhash->csize * sizeof(hash_item *));
485 /* find item in hash, return ptr to data, NULL if not found */
486 static void *hash_search(xhash *hash, const char *name)
490 hi = hash->items [ hashidx(name) % hash->csize ];
492 if (strcmp(hi->name, name) == 0)
499 /* grow hash if it becomes too big */
500 static void hash_rebuild(xhash *hash)
502 unsigned newsize, i, idx;
503 hash_item **newitems, *hi, *thi;
505 if (hash->nprime == NPRIMES)
508 newsize = PRIMES[hash->nprime++];
509 newitems = xzalloc(newsize * sizeof(hash_item *));
511 for (i=0; i<hash->csize; i++) {
516 idx = hashidx(thi->name) % newsize;
517 thi->next = newitems[idx];
523 hash->csize = newsize;
524 hash->items = newitems;
527 /* find item in hash, add it if necessary. Return ptr to data */
528 static void *hash_find(xhash *hash, const char *name)
534 hi = hash_search(hash, name);
536 if (++hash->nel / hash->csize > 10)
539 l = strlen(name) + 1;
540 hi = xzalloc(sizeof(hash_item) + l);
541 memcpy(hi->name, name, l);
543 idx = hashidx(name) % hash->csize;
544 hi->next = hash->items[idx];
545 hash->items[idx] = hi;
551 #define findvar(hash, name) ((var*) hash_find((hash) , (name)))
552 #define newvar(name) ((var*) hash_find(vhash , (name)))
553 #define newfile(name) ((rstream*)hash_find(fdhash ,(name)))
554 #define newfunc(name) ((func*) hash_find(fnhash , (name)))
556 static void hash_remove(xhash *hash, const char *name)
558 hash_item *hi, **phi;
560 phi = &(hash->items[ hashidx(name) % hash->csize ]);
563 if (strcmp(hi->name, name) == 0) {
564 hash->glen -= (strlen(name) + 1);
574 /* ------ some useful functions ------ */
576 static void skip_spaces(char **s)
581 if (*p == '\\' && p[1] == '\n') {
584 } else if (*p != ' ' && *p != '\t') {
592 static char *nextword(char **s)
596 while (*(*s)++) /* */;
601 static char nextchar(char **s)
607 if (c == '\\') c = bb_process_escape_sequence((const char**)s);
608 if (c == '\\' && *s == pps) c = *((*s)++);
612 static int ATTRIBUTE_ALWAYS_INLINE isalnum_(int c)
614 return (isalnum(c) || c == '_');
617 static FILE *afopen(const char *path, const char *mode)
619 return (*path == '-' && *(path+1) == '\0') ? stdin : xfopen(path, mode);
622 /* -------- working with variables (set/get/copy/etc) -------- */
624 static xhash *iamarray(var *v)
628 while (a->type & VF_CHILD)
631 if (!(a->type & VF_ARRAY)) {
633 a->x.array = hash_init();
638 static void clear_array(xhash *array)
643 for (i = 0; i < array->csize; i++) {
644 hi = array->items[i];
648 free(thi->data.v.string);
651 array->items[i] = NULL;
653 array->glen = array->nel = 0;
656 /* clear a variable */
657 static var *clrvar(var *v)
659 if (!(v->type & VF_FSTR))
662 v->type &= VF_DONTTOUCH;
668 /* assign string value to variable */
669 static var *setvar_p(var *v, char *value)
678 /* same as setvar_p but make a copy of string */
679 static var *setvar_s(var *v, const char *value)
681 return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
684 /* same as setvar_s but set USER flag */
685 static var *setvar_u(var *v, const char *value)
692 /* set array element to user string */
693 static void setari_u(var *a, int idx, const char *s)
696 static char sidx[12];
698 sprintf(sidx, "%d", idx);
699 v = findvar(iamarray(a), sidx);
703 /* assign numeric value to variable */
704 static var *setvar_i(var *v, double value)
707 v->type |= VF_NUMBER;
713 static const char *getvar_s(var *v)
715 /* if v is numeric and has no cached string, convert it to string */
716 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
717 fmt_num(buf, MAXVARFMT, getvar_s(V[CONVFMT]), v->number, TRUE);
718 v->string = xstrdup(buf);
719 v->type |= VF_CACHED;
721 return (v->string == NULL) ? "" : v->string;
724 static double getvar_i(var *v)
728 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
732 v->number = strtod(s, &s);
733 if (v->type & VF_USER) {
741 v->type |= VF_CACHED;
746 static var *copyvar(var *dest, const var *src)
750 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
751 dest->number = src->number;
753 dest->string = xstrdup(src->string);
755 handle_special(dest);
759 static var *incvar(var *v)
761 return setvar_i(v, getvar_i(v)+1.);
764 /* return true if v is number or numeric string */
765 static int is_numeric(var *v)
768 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
771 /* return 1 when value of v corresponds to true, 0 otherwise */
772 static int istrue(var *v)
775 return (v->number == 0) ? 0 : 1;
777 return (v->string && *(v->string)) ? 1 : 0;
780 /* temporary variables allocator. Last allocated should be first freed */
781 static var *nvalloc(int n)
789 if ((cb->pos - cb->nv) + n <= cb->size) break;
794 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
795 cb = xmalloc(sizeof(nvblock) + size * sizeof(var));
800 if (pb) pb->next = cb;
806 while (v < cb->pos) {
815 static void nvfree(var *v)
819 if (v < cb->nv || v >= cb->pos)
820 runtime_error(EMSG_INTERNAL_ERROR);
822 for (p=v; p<cb->pos; p++) {
823 if ((p->type & (VF_ARRAY|VF_CHILD)) == VF_ARRAY) {
824 clear_array(iamarray(p));
825 free(p->x.array->items);
828 if (p->type & VF_WALK)
835 while (cb->prev && cb->pos == cb->nv) {
840 /* ------- awk program text parsing ------- */
842 /* Parse next token pointed by global pos, place results into global ttt.
843 * If token isn't expected, give away. Return token class
845 static uint32_t next_token(uint32_t expected)
847 static int concat_inserted;
848 static uint32_t save_tclass, save_info;
849 static uint32_t ltclass = TC_OPTERM;
858 ttt.rollback = FALSE;
860 } else if (concat_inserted) {
861 concat_inserted = FALSE;
862 ttt.tclass = save_tclass;
863 ttt.info = save_info;
871 while (*p != '\n' && *p != '\0') p++;
879 } else if (*p == '\"') {
881 ttt.string = s = ++p;
883 if (*p == '\0' || *p == '\n')
884 syntax_error(EMSG_UNEXP_EOS);
885 *(s++) = nextchar(&p);
891 } else if ((expected & TC_REGEXP) && *p == '/') {
893 ttt.string = s = ++p;
895 if (*p == '\0' || *p == '\n')
896 syntax_error(EMSG_UNEXP_EOS);
897 if ((*s++ = *p++) == '\\') {
899 *(s-1) = bb_process_escape_sequence((const char **)&p);
900 if (*pp == '\\') *s++ = '\\';
901 if (p == pp) *s++ = *p++;
908 } else if (*p == '.' || isdigit(*p)) {
910 ttt.number = strtod(p, &p);
912 syntax_error(EMSG_UNEXP_TOKEN);
916 /* search for something known */
926 /* if token class is expected, token
927 * matches and it's not a longer word,
928 * then this is what we are looking for
930 if ((tc & (expected | TC_WORD | TC_NEWLINE)) &&
931 *tl == *p && strncmp(p, tl, l) == 0 &&
932 !((tc & TC_WORD) && isalnum_(*(p + l)))) {
942 /* it's a name (var/array/function),
943 * otherwise it's something wrong
946 syntax_error(EMSG_UNEXP_TOKEN);
949 while (isalnum_(*(++p))) {
954 /* also consume whitespace between functionname and bracket */
955 if (!(expected & TC_VARIABLE)) skip_spaces(&p);
968 /* skipping newlines in some cases */
969 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
972 /* insert concatenation operator when needed */
973 if ((ltclass&TC_CONCAT1) && (tc&TC_CONCAT2) && (expected&TC_BINOP)) {
974 concat_inserted = TRUE;
976 save_info = ttt.info;
978 ttt.info = OC_CONCAT | SS | P(35);
983 ltclass = ttt.tclass;
985 /* Are we ready for this? */
986 if (! (ltclass & expected))
987 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
988 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
993 static void rollback_token(void)
998 static node *new_node(uint32_t info)
1002 n = xzalloc(sizeof(node));
1008 static node *mk_re_node(const char *s, node *n, regex_t *re)
1010 n->info = OC_REGEXP;
1013 xregcomp(re, s, REG_EXTENDED);
1014 xregcomp(re+1, s, REG_EXTENDED | REG_ICASE);
1019 static node *condition(void)
1021 next_token(TC_SEQSTART);
1022 return parse_expr(TC_SEQTERM);
1025 /* parse expression terminated by given argument, return ptr
1026 * to built subtree. Terminator is eaten by parse_expr */
1027 static node *parse_expr(uint32_t iexp)
1036 sn.r.n = glptr = NULL;
1037 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1039 while (!((tc = next_token(xtc)) & iexp)) {
1040 if (glptr && (ttt.info == (OC_COMPARE|VV|P(39)|2))) {
1041 /* input redirection (<) attached to glptr node */
1042 cn = glptr->l.n = new_node(OC_CONCAT|SS|P(37));
1044 xtc = TC_OPERAND | TC_UOPPRE;
1047 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1048 /* for binary and postfix-unary operators, jump back over
1049 * previous operators with higher priority */
1051 while ( ((ttt.info & PRIMASK) > (vn->a.n->info & PRIMASK2)) ||
1052 ((ttt.info == vn->info) && ((ttt.info & OPCLSMASK) == OC_COLON)) )
1054 if ((ttt.info & OPCLSMASK) == OC_TERNARY)
1056 cn = vn->a.n->r.n = new_node(ttt.info);
1058 if (tc & TC_BINOP) {
1060 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1061 if ((ttt.info & OPCLSMASK) == OC_PGETLINE) {
1063 next_token(TC_GETLINE);
1064 /* give maximum priority to this pipe */
1065 cn->info &= ~PRIMASK;
1066 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1070 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1075 /* for operands and prefix-unary operators, attach them
1078 cn = vn->r.n = new_node(ttt.info);
1080 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1081 if (tc & (TC_OPERAND | TC_REGEXP)) {
1082 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1083 /* one should be very careful with switch on tclass -
1084 * only simple tclasses should be used! */
1089 if ((v = hash_search(ahash, ttt.string)) != NULL) {
1090 cn->info = OC_FNARG;
1091 cn->l.i = v->x.aidx;
1093 cn->l.v = newvar(ttt.string);
1095 if (tc & TC_ARRAY) {
1097 cn->r.n = parse_expr(TC_ARRTERM);
1104 v = cn->l.v = xzalloc(sizeof(var));
1106 setvar_i(v, ttt.number);
1108 setvar_s(v, ttt.string);
1112 mk_re_node(ttt.string, cn, xzalloc(sizeof(regex_t)*2));
1117 cn->r.f = newfunc(ttt.string);
1118 cn->l.n = condition();
1122 cn = vn->r.n = parse_expr(TC_SEQTERM);
1128 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1132 cn->l.n = condition();
1141 /* add node to chain. Return ptr to alloc'd node */
1142 static node *chain_node(uint32_t info)
1147 seq->first = seq->last = new_node(0);
1149 if (seq->programname != programname) {
1150 seq->programname = programname;
1151 n = chain_node(OC_NEWSOURCE);
1152 n->l.s = xstrdup(programname);
1157 seq->last = n->a.n = new_node(OC_DONE);
1162 static void chain_expr(uint32_t info)
1166 n = chain_node(info);
1167 n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1168 if (ttt.tclass & TC_GRPTERM)
1172 static node *chain_loop(node *nn)
1174 node *n, *n2, *save_brk, *save_cont;
1176 save_brk = break_ptr;
1177 save_cont = continue_ptr;
1179 n = chain_node(OC_BR | Vx);
1180 continue_ptr = new_node(OC_EXEC);
1181 break_ptr = new_node(OC_EXEC);
1183 n2 = chain_node(OC_EXEC | Vx);
1186 continue_ptr->a.n = n2;
1187 break_ptr->a.n = n->r.n = seq->last;
1189 continue_ptr = save_cont;
1190 break_ptr = save_brk;
1195 /* parse group and attach it to chain */
1196 static void chain_group(void)
1202 c = next_token(TC_GRPSEQ);
1203 } while (c & TC_NEWLINE);
1205 if (c & TC_GRPSTART) {
1206 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1207 if (ttt.tclass & TC_NEWLINE) continue;
1211 } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1213 chain_expr(OC_EXEC | Vx);
1214 } else { /* TC_STATEMNT */
1215 switch (ttt.info & OPCLSMASK) {
1217 n = chain_node(OC_BR | Vx);
1218 n->l.n = condition();
1220 n2 = chain_node(OC_EXEC);
1222 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE)==TC_ELSE) {
1224 n2->a.n = seq->last;
1232 n = chain_loop(NULL);
1237 n2 = chain_node(OC_EXEC);
1238 n = chain_loop(NULL);
1240 next_token(TC_WHILE);
1241 n->l.n = condition();
1245 next_token(TC_SEQSTART);
1246 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1247 if (ttt.tclass & TC_SEQTERM) { /* for-in */
1248 if ((n2->info & OPCLSMASK) != OC_IN)
1249 syntax_error(EMSG_UNEXP_TOKEN);
1250 n = chain_node(OC_WALKINIT | VV);
1253 n = chain_loop(NULL);
1254 n->info = OC_WALKNEXT | Vx;
1256 } else { /* for (;;) */
1257 n = chain_node(OC_EXEC | Vx);
1259 n2 = parse_expr(TC_SEMICOL);
1260 n3 = parse_expr(TC_SEQTERM);
1270 n = chain_node(ttt.info);
1271 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1272 if (ttt.tclass & TC_OUTRDR) {
1273 n->info |= ttt.info;
1274 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1276 if (ttt.tclass & TC_GRPTERM)
1281 n = chain_node(OC_EXEC);
1286 n = chain_node(OC_EXEC);
1287 n->a.n = continue_ptr;
1290 /* delete, next, nextfile, return, exit */
1292 chain_expr(ttt.info);
1297 static void parse_program(char *p)
1306 while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1307 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1309 if (tclass & TC_OPTERM)
1313 if (tclass & TC_BEGIN) {
1317 } else if (tclass & TC_END) {
1321 } else if (tclass & TC_FUNCDECL) {
1322 next_token(TC_FUNCTION);
1324 f = newfunc(ttt.string);
1325 f->body.first = NULL;
1327 while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1328 v = findvar(ahash, ttt.string);
1329 v->x.aidx = (f->nargs)++;
1331 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1338 } else if (tclass & TC_OPSEQ) {
1340 cn = chain_node(OC_TEST);
1341 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1342 if (ttt.tclass & TC_GRPSTART) {
1346 chain_node(OC_PRINT);
1348 cn->r.n = mainseq.last;
1350 } else /* if (tclass & TC_GRPSTART) */ {
1358 /* -------- program execution part -------- */
1360 static node *mk_splitter(const char *s, tsplitter *spl)
1368 if ((n->info & OPCLSMASK) == OC_REGEXP) {
1372 if (strlen(s) > 1) {
1373 mk_re_node(s, n, re);
1375 n->info = (uint32_t) *s;
1381 /* use node as a regular expression. Supplied with node ptr and regex_t
1382 * storage space. Return ptr to regex (if result points to preg, it should
1383 * be later regfree'd manually
1385 static regex_t *as_regex(node *op, regex_t *preg)
1390 if ((op->info & OPCLSMASK) == OC_REGEXP) {
1391 return icase ? op->r.ire : op->l.re;
1394 s = getvar_s(evaluate(op, v));
1395 xregcomp(preg, s, icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED);
1401 /* gradually increasing buffer */
1402 static void qrealloc(char **b, int n, int *size)
1404 if (!*b || n >= *size)
1405 *b = xrealloc(*b, *size = n + (n>>1) + 80);
1408 /* resize field storage space */
1409 static void fsrealloc(int size)
1411 static int maxfields; /* = 0;*/
1414 if (size >= maxfields) {
1416 maxfields = size + 16;
1417 Fields = xrealloc(Fields, maxfields * sizeof(var));
1418 for (; i < maxfields; i++) {
1419 Fields[i].type = VF_SPECIAL;
1420 Fields[i].string = NULL;
1424 if (size < nfields) {
1425 for (i = size; i < nfields; i++) {
1432 static int awk_split(const char *s, node *spl, char **slist)
1437 regmatch_t pmatch[2];
1439 /* in worst case, each char would be a separate field */
1440 *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1443 c[0] = c[1] = (char)spl->info;
1445 if (*getvar_s(V[RS]) == '\0') c[2] = '\n';
1447 if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */
1449 l = strcspn(s, c+2);
1450 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1451 && pmatch[0].rm_so <= l
1453 l = pmatch[0].rm_so;
1454 if (pmatch[0].rm_eo == 0) { l++; pmatch[0].rm_eo++; }
1456 pmatch[0].rm_eo = l;
1457 if (s[l]) pmatch[0].rm_eo++;
1463 s += pmatch[0].rm_eo;
1466 } else if (c[0] == '\0') { /* null split */
1472 } else if (c[0] != ' ') { /* single-character split */
1474 c[0] = toupper(c[0]);
1475 c[1] = tolower(c[1]);
1478 while ((s1 = strpbrk(s1, c))) {
1482 } else { /* space split */
1484 s = skip_whitespace(s);
1487 while (*s && !isspace(*s))
1495 static void split_f0(void)
1497 static char *fstrings = NULL;
1507 n = awk_split(getvar_s(V[F0]), &fsplitter.n, &fstrings);
1510 for (i = 0; i < n; i++) {
1511 Fields[i].string = nextword(&s);
1512 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1515 /* set NF manually to avoid side effects */
1517 V[NF]->type = VF_NUMBER | VF_SPECIAL;
1518 V[NF]->number = nfields;
1521 /* perform additional actions when some internal variables changed */
1522 static void handle_special(var *v)
1526 const char *sep, *s;
1527 int sl, l, len, i, bsize;
1529 if (!(v->type & VF_SPECIAL))
1533 n = (int)getvar_i(v);
1536 /* recalculate $0 */
1537 sep = getvar_s(V[OFS]);
1541 for (i=0; i<n; i++) {
1542 s = getvar_s(&Fields[i]);
1545 memcpy(b+len, sep, sl);
1548 qrealloc(&b, len+l+sl, &bsize);
1549 memcpy(b+len, s, l);
1557 } else if (v == V[F0]) {
1558 is_f0_split = FALSE;
1560 } else if (v == V[FS]) {
1561 mk_splitter(getvar_s(v), &fsplitter);
1563 } else if (v == V[RS]) {
1564 mk_splitter(getvar_s(v), &rsplitter);
1566 } else if (v == V[IGNORECASE]) {
1570 n = getvar_i(V[NF]);
1571 setvar_i(V[NF], n > v-Fields ? n : v-Fields+1);
1572 /* right here v is invalid. Just to note... */
1576 /* step through func/builtin/etc arguments */
1577 static node *nextarg(node **pn)
1582 if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1591 static void hashwalk_init(var *v, xhash *array)
1597 if (v->type & VF_WALK)
1601 w = v->x.walker = xzalloc(2 + 2*sizeof(char *) + array->glen);
1602 *w = *(w+1) = (char *)(w + 2);
1603 for (i=0; i<array->csize; i++) {
1604 hi = array->items[i];
1606 strcpy(*w, hi->name);
1613 static int hashwalk_next(var *v)
1621 setvar_s(v, nextword(w+1));
1625 /* evaluate node, return 1 when result is true, 0 otherwise */
1626 static int ptest(node *pattern)
1628 static var v; /* static: to save stack space? */
1630 return istrue(evaluate(pattern, &v));
1633 /* read next record from stream rsm into a variable v */
1634 static int awk_getline(rstream *rsm, var *v)
1637 regmatch_t pmatch[2];
1638 int a, p, pp=0, size;
1639 int fd, so, eo, r, rp;
1642 /* we're using our own buffer since we need access to accumulating
1645 fd = fileno(rsm->F);
1650 c = (char) rsplitter.n.info;
1653 if (! m) qrealloc(&m, 256, &size);
1659 if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1660 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1661 b, 1, pmatch, 0) == 0) {
1662 so = pmatch[0].rm_so;
1663 eo = pmatch[0].rm_eo;
1667 } else if (c != '\0') {
1668 s = strchr(b+pp, c);
1669 if (! s) s = memchr(b+pp, '\0', p - pp);
1676 while (b[rp] == '\n')
1678 s = strstr(b+rp, "\n\n");
1681 while (b[eo] == '\n') eo++;
1689 memmove(m, (const void *)(m+a), p+1);
1694 qrealloc(&m, a+p+128, &size);
1697 p += safe_read(fd, b+p, size-p-1);
1701 setvar_i(V[ERRNO], errno);
1710 c = b[so]; b[so] = '\0';
1714 c = b[eo]; b[eo] = '\0';
1715 setvar_s(V[RT], b+so);
1727 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1731 const char *s = format;
1733 if (int_as_int && n == (int)n) {
1734 r = snprintf(b, size, "%d", (int)n);
1736 do { c = *s; } while (c && *++s);
1737 if (strchr("diouxX", c)) {
1738 r = snprintf(b, size, format, (int)n);
1739 } else if (strchr("eEfgG", c)) {
1740 r = snprintf(b, size, format, n);
1742 runtime_error(EMSG_INV_FMT);
1749 /* formatted output into an allocated buffer, return ptr to buffer */
1750 static char *awk_printf(node *n)
1755 int i, j, incr, bsize;
1760 fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1765 while (*f && (*f != '%' || *(++f) == '%'))
1767 while (*f && !isalpha(*f)) {
1769 syntax_error("%*x formats are not supported");
1773 incr = (f - s) + MAXVARFMT;
1774 qrealloc(&b, incr + i, &bsize);
1779 arg = evaluate(nextarg(&n), v);
1782 if (c == 'c' || !c) {
1783 i += sprintf(b+i, s, is_numeric(arg) ?
1784 (char)getvar_i(arg) : *getvar_s(arg));
1786 } else if (c == 's') {
1788 qrealloc(&b, incr+i+strlen(s1), &bsize);
1789 i += sprintf(b+i, s, s1);
1792 i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1796 /* if there was an error while sprintf, return value is negative */
1800 b = xrealloc(b, i + 1);
1807 /* common substitution routine
1808 * replace (nm) substring of (src) that match (n) with (repl), store
1809 * result into (dest), return number of substitutions. If nm=0, replace
1810 * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1811 * subexpression matching (\1-\9)
1813 static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int ex)
1818 int c, i, j, di, rl, so, eo, nbs, n, dssize;
1819 regmatch_t pmatch[10];
1822 re = as_regex(rn, &sreg);
1823 if (! src) src = V[F0];
1824 if (! dest) dest = V[F0];
1829 while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0:REG_NOTBOL) == 0) {
1830 so = pmatch[0].rm_so;
1831 eo = pmatch[0].rm_eo;
1833 qrealloc(&ds, di + eo + rl, &dssize);
1834 memcpy(ds + di, sp, eo);
1840 for (s = repl; *s; s++) {
1846 if (c == '&' || (ex && c >= '0' && c <= '9')) {
1847 di -= ((nbs + 3) >> 1);
1856 n = pmatch[j].rm_eo - pmatch[j].rm_so;
1857 qrealloc(&ds, di + rl + n, &dssize);
1858 memcpy(ds + di, sp + pmatch[j].rm_so, n);
1869 if (! (ds[di++] = *sp++)) break;
1873 qrealloc(&ds, di + strlen(sp), &dssize);
1874 strcpy(ds + di, sp);
1876 if (re == &sreg) regfree(re);
1880 static var *exec_builtin(node *op, var *res)
1887 regmatch_t pmatch[2];
1889 static tsplitter tspl;
1898 isr = info = op->info;
1901 av[2] = av[3] = NULL;
1902 for (i=0 ; i<4 && op ; i++) {
1903 an[i] = nextarg(&op);
1904 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
1905 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
1910 if (nargs < (info >> 30))
1911 runtime_error(EMSG_TOO_FEW_ARGS);
1913 switch (info & OPNMASK) {
1916 #if ENABLE_FEATURE_AWK_MATH
1917 setvar_i(res, atan2(getvar_i(av[i]), getvar_i(av[1])));
1919 runtime_error(EMSG_NO_MATH);
1925 spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
1926 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
1931 n = awk_split(as[0], spl, &s);
1933 clear_array(iamarray(av[1]));
1934 for (i=1; i<=n; i++)
1935 setari_u(av[1], i, nextword(&s1));
1942 i = getvar_i(av[1]) - 1;
1943 if (i>l) i=l; if (i<0) i=0;
1944 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
1947 strncpy(s, as[0]+i, n);
1953 setvar_i(res, (long)getvar_i(av[0]) & (long)getvar_i(av[1]));
1957 setvar_i(res, ~(long)getvar_i(av[0]));
1961 setvar_i(res, (long)getvar_i(av[0]) << (long)getvar_i(av[1]));
1965 setvar_i(res, (long)getvar_i(av[0]) | (long)getvar_i(av[1]));
1969 setvar_i(res, (long)((unsigned long)getvar_i(av[0]) >> (unsigned long)getvar_i(av[1])));
1973 setvar_i(res, (long)getvar_i(av[0]) ^ (long)getvar_i(av[1]));
1983 s1 = s = xstrdup(as[0]);
1985 *s1 = (*to_xxx)(*s1);
1994 l = strlen(as[0]) - ll;
1995 if (ll > 0 && l >= 0) {
1997 s = strstr(as[0], as[1]);
1998 if (s) n = (s - as[0]) + 1;
2000 /* this piece of code is terribly slow and
2001 * really should be rewritten
2003 for (i=0; i<=l; i++) {
2004 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2016 tt = getvar_i(av[1]);
2019 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2020 i = strftime(buf, MAXVARFMT,
2021 ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2028 re = as_regex(an[1], &sreg);
2029 n = regexec(re, as[0], 1, pmatch, 0);
2034 pmatch[0].rm_so = 0;
2035 pmatch[0].rm_eo = -1;
2037 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2038 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2039 setvar_i(res, pmatch[0].rm_so);
2040 if (re == &sreg) regfree(re);
2044 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2048 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2052 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2061 * Evaluate node - the heart of the program. Supplied with subtree
2062 * and place where to store result. returns ptr to result.
2064 #define XC(n) ((n) >> 8)
2066 static var *evaluate(node *op, var *res)
2068 /* This procedure is recursive so we should count every byte */
2069 static var *fnargs = NULL;
2070 static unsigned seed = 1;
2071 static regex_t sreg;
2093 return setvar_s(res, NULL);
2099 opn = (short)(opinfo & OPNMASK);
2100 lineno = op->lineno;
2102 /* execute inevitable things */
2104 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2105 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2106 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2107 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2108 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2110 switch (XC(opinfo & OPCLSMASK)) {
2112 /* -- iterative node type -- */
2116 if ((op1->info & OPCLSMASK) == OC_COMMA) {
2117 /* it's range pattern */
2118 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2119 op->info |= OF_CHECKED;
2120 if (ptest(op1->r.n))
2121 op->info &= ~OF_CHECKED;
2128 op = (ptest(op1)) ? op->a.n : op->r.n;
2132 /* just evaluate an expression, also used as unconditional jump */
2136 /* branch, used in if-else and various loops */
2138 op = istrue(L.v) ? op->a.n : op->r.n;
2141 /* initialize for-in loop */
2142 case XC( OC_WALKINIT ):
2143 hashwalk_init(L.v, iamarray(R.v));
2146 /* get next array item */
2147 case XC( OC_WALKNEXT ):
2148 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2151 case XC( OC_PRINT ):
2152 case XC( OC_PRINTF ):
2155 X.rsm = newfile(R.s);
2158 X.rsm->F = popen(R.s, "w");
2159 if (X.rsm->F == NULL)
2160 bb_perror_msg_and_die("popen");
2163 X.rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2169 if ((opinfo & OPCLSMASK) == OC_PRINT) {
2171 fputs(getvar_s(V[F0]), X.F);
2174 L.v = evaluate(nextarg(&op1), v1);
2175 if (L.v->type & VF_NUMBER) {
2176 fmt_num(buf, MAXVARFMT, getvar_s(V[OFMT]),
2177 getvar_i(L.v), TRUE);
2180 fputs(getvar_s(L.v), X.F);
2183 if (op1) fputs(getvar_s(V[OFS]), X.F);
2186 fputs(getvar_s(V[ORS]), X.F);
2188 } else { /* OC_PRINTF */
2189 L.s = awk_printf(op1);
2196 case XC( OC_DELETE ):
2197 X.info = op1->info & OPCLSMASK;
2198 if (X.info == OC_VAR) {
2200 } else if (X.info == OC_FNARG) {
2201 R.v = &fnargs[op1->l.i];
2203 runtime_error(EMSG_NOT_ARRAY);
2208 L.s = getvar_s(evaluate(op1->r.n, v1));
2209 hash_remove(iamarray(R.v), L.s);
2211 clear_array(iamarray(R.v));
2215 case XC( OC_NEWSOURCE ):
2216 programname = op->l.s;
2219 case XC( OC_RETURN ):
2223 case XC( OC_NEXTFILE ):
2234 /* -- recursive node type -- */
2242 case XC( OC_FNARG ):
2243 L.v = &fnargs[op->l.i];
2245 res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2249 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2252 case XC( OC_REGEXP ):
2254 L.s = getvar_s(V[F0]);
2257 case XC( OC_MATCH ):
2260 X.re = as_regex(op1, &sreg);
2261 R.i = regexec(X.re, L.s, 0, NULL, 0);
2262 if (X.re == &sreg) regfree(X.re);
2263 setvar_i(res, (R.i == 0 ? 1 : 0) ^ (opn == '!' ? 1 : 0));
2267 /* if source is a temporary string, jusk relink it to dest */
2268 if (R.v == v1+1 && R.v->string) {
2269 res = setvar_p(L.v, R.v->string);
2272 res = copyvar(L.v, R.v);
2276 case XC( OC_TERNARY ):
2277 if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2278 runtime_error(EMSG_POSSIBLE_ERROR);
2279 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2283 if (! op->r.f->body.first)
2284 runtime_error(EMSG_UNDEF_FUNC);
2286 X.v = R.v = nvalloc(op->r.f->nargs+1);
2288 L.v = evaluate(nextarg(&op1), v1);
2290 R.v->type |= VF_CHILD;
2291 R.v->x.parent = L.v;
2292 if (++R.v - X.v >= op->r.f->nargs)
2300 res = evaluate(op->r.f->body.first, res);
2307 case XC( OC_GETLINE ):
2308 case XC( OC_PGETLINE ):
2310 X.rsm = newfile(L.s);
2312 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2313 X.rsm->F = popen(L.s, "r");
2314 X.rsm->is_pipe = TRUE;
2316 X.rsm->F = fopen(L.s, "r"); /* not xfopen! */
2320 if (! iF) iF = next_input_file();
2325 setvar_i(V[ERRNO], errno);
2333 L.i = awk_getline(X.rsm, R.v);
2343 /* simple builtins */
2344 case XC( OC_FBLTIN ):
2352 R.d = (double)rand() / (double)RAND_MAX;
2354 #if ENABLE_FEATURE_AWK_MATH
2380 runtime_error(EMSG_NO_MATH);
2385 seed = op1 ? (unsigned)L.d : (unsigned)time(NULL);
2395 L.s = getvar_s(V[F0]);
2401 R.d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2402 ? (system(L.s) >> 8) : 0;
2410 X.rsm = newfile(L.s);
2419 X.rsm = (rstream *)hash_search(fdhash, L.s);
2421 R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
2422 free(X.rsm->buffer);
2423 hash_remove(fdhash, L.s);
2426 setvar_i(V[ERRNO], errno);
2433 case XC( OC_BUILTIN ):
2434 res = exec_builtin(op, res);
2437 case XC( OC_SPRINTF ):
2438 setvar_p(res, awk_printf(op1));
2441 case XC( OC_UNARY ):
2443 L.d = R.d = getvar_i(R.v);
2458 L.d = istrue(X.v) ? 0 : 1;
2469 case XC( OC_FIELD ):
2470 R.i = (int)getvar_i(R.v);
2478 res = &Fields[R.i-1];
2482 /* concatenation (" ") and index joining (",") */
2483 case XC( OC_CONCAT ):
2484 case XC( OC_COMMA ):
2485 opn = strlen(L.s) + strlen(R.s) + 2;
2488 if ((opinfo & OPCLSMASK) == OC_COMMA) {
2489 L.s = getvar_s(V[SUBSEP]);
2490 X.s = xrealloc(X.s, opn + strlen(L.s));
2498 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2502 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2505 case XC( OC_BINARY ):
2506 case XC( OC_REPLACE ):
2507 R.d = getvar_i(R.v);
2519 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2523 #if ENABLE_FEATURE_AWK_MATH
2524 L.d = pow(L.d, R.d);
2526 runtime_error(EMSG_NO_MATH);
2530 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2531 L.d -= (int)(L.d / R.d) * R.d;
2534 res = setvar_i(((opinfo&OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2537 case XC( OC_COMPARE ):
2538 if (is_numeric(L.v) && is_numeric(R.v)) {
2539 L.d = getvar_i(L.v) - getvar_i(R.v);
2541 L.s = getvar_s(L.v);
2542 R.s = getvar_s(R.v);
2543 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2545 switch (opn & 0xfe) {
2556 setvar_i(res, (opn & 0x1 ? R.i : !R.i) ? 1 : 0);
2560 runtime_error(EMSG_POSSIBLE_ERROR);
2562 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2564 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2574 /* -------- main & co. -------- */
2576 static int awk_exit(int r)
2587 evaluate(endseq.first, &tv);
2590 /* waiting for children */
2591 for (i = 0; i < fdhash->csize; i++) {
2592 hi = fdhash->items[i];
2594 if (hi->data.rs.F && hi->data.rs.is_pipe)
2595 pclose(hi->data.rs.F);
2603 /* if expr looks like "var=value", perform assignment and return 1,
2604 * otherwise return 0 */
2605 static int is_assignment(const char *expr)
2607 char *exprc, *s, *s0, *s1;
2609 exprc = xstrdup(expr);
2610 if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2618 *(s1++) = nextchar(&s);
2621 setvar_u(newvar(exprc), s0);
2626 /* switch to next input file */
2627 static rstream *next_input_file(void)
2631 const char *fname, *ind;
2632 static int files_happen = FALSE;
2634 if (rsm.F) fclose(rsm.F);
2636 rsm.pos = rsm.adv = 0;
2639 if (getvar_i(V[ARGIND])+1 >= getvar_i(V[ARGC])) {
2645 ind = getvar_s(incvar(V[ARGIND]));
2646 fname = getvar_s(findvar(iamarray(V[ARGV]), ind));
2647 if (fname && *fname && !is_assignment(fname))
2648 F = afopen(fname, "r");
2652 files_happen = TRUE;
2653 setvar_s(V[FILENAME], fname);
2658 int awk_main(int argc, char **argv);
2659 int awk_main(int argc, char **argv)
2662 char *opt_F, *opt_W;
2663 llist_t *opt_v = NULL;
2668 char *vnames = (char *)vNames; /* cheat */
2669 char *vvalues = (char *)vValues;
2671 /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
2672 * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
2673 if (ENABLE_LOCALE_SUPPORT)
2674 setlocale(LC_NUMERIC, "C");
2678 /* allocate global buffer */
2679 buf = xmalloc(MAXVARFMT + 1);
2681 vhash = hash_init();
2682 ahash = hash_init();
2683 fdhash = hash_init();
2684 fnhash = hash_init();
2686 /* initialize variables */
2687 for (i = 0; *vnames; i++) {
2688 V[i] = v = newvar(nextword(&vnames));
2689 if (*vvalues != '\377')
2690 setvar_s(v, nextword(&vvalues));
2694 if (*vnames == '*') {
2695 v->type |= VF_SPECIAL;
2700 handle_special(V[FS]);
2701 handle_special(V[RS]);
2703 newfile("/dev/stdin")->F = stdin;
2704 newfile("/dev/stdout")->F = stdout;
2705 newfile("/dev/stderr")->F = stderr;
2707 /* Huh, people report that sometimes environ is NULL. Oh well. */
2708 if (environ) for (envp = environ; *envp; envp++) {
2709 char *s = xstrdup(*envp);
2710 char *s1 = strchr(s, '=');
2713 setvar_u(findvar(iamarray(V[ENVIRON]), s), s1);
2717 opt_complementary = "v::";
2718 opt = getopt32(argc, argv, "F:v:f:W:", &opt_F, &opt_v, &programname, &opt_W);
2721 if (opt & 0x1) setvar_s(V[FS], opt_F); // -F
2722 while (opt_v) { /* -v */
2723 if (!is_assignment(llist_pop(&opt_v)))
2726 if (opt & 0x4) { // -f
2727 char *s = s; /* die, gcc, die */
2728 FILE *from_file = afopen(programname, "r");
2729 /* one byte is reserved for some trick in next_token */
2730 if (fseek(from_file, 0, SEEK_END) == 0) {
2731 flen = ftell(from_file);
2732 s = xmalloc(flen + 4);
2733 fseek(from_file, 0, SEEK_SET);
2734 i = 1 + fread(s + 1, 1, flen, from_file);
2736 for (i = j = 1; j > 0; i += j) {
2737 s = xrealloc(s, i + 4096);
2738 j = fread(s + i, 1, 4094, from_file);
2743 parse_program(s + 1);
2745 } else { // no -f: take program from 1st parameter
2748 programname = "cmd. line";
2749 parse_program(*argv++);
2752 if (opt & 0x8) // -W
2753 bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
2755 /* fill in ARGV array */
2756 setvar_i(V[ARGC], argc + 1);
2757 setari_u(V[ARGV], 0, "awk");
2760 setari_u(V[ARGV], ++i, *argv++);
2762 evaluate(beginseq.first, &tv);
2763 if (!mainseq.first && !endseq.first)
2764 awk_exit(EXIT_SUCCESS);
2766 /* input file could already be opened in BEGIN block */
2767 if (!iF) iF = next_input_file();
2769 /* passing through input files */
2772 setvar_i(V[FNR], 0);
2774 while ((i = awk_getline(iF, V[F0])) > 0) {
2778 evaluate(mainseq.first, &tv);
2785 runtime_error(strerror(errno));
2787 iF = next_input_file();
2790 awk_exit(EXIT_SUCCESS);