1 /* vi: set sw=4 ts=4: */
3 * awk implementation for busybox
5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
7 * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
13 extern char **environ;
15 /* This is a NOEXEC applet. Be very careful! */
22 #define VF_NUMBER 0x0001 /* 1 = primary type is number */
23 #define VF_ARRAY 0x0002 /* 1 = it's an array */
25 #define VF_CACHED 0x0100 /* 1 = num/str value has cached str/num eq */
26 #define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
27 #define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
28 #define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
29 #define VF_FSTR 0x1000 /* 1 = var::string points to fstring buffer */
30 #define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
31 #define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
33 /* these flags are static, don't change them when value is changed */
34 #define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
37 typedef struct var_s {
38 unsigned type; /* flags */
42 int aidx; /* func arg idx (for compilation stage) */
43 struct xhash_s *array; /* array ptr */
44 struct var_s *parent; /* for func args, ptr to actual parameter */
45 char **walker; /* list of array elements (for..in) */
49 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
50 typedef struct chain_s {
53 const char *programname;
57 typedef struct func_s {
63 typedef struct rstream_s {
72 typedef struct hash_item_s {
74 struct var_s v; /* variable/array hash */
75 struct rstream_s rs; /* redirect streams hash */
76 struct func_s f; /* functions hash */
78 struct hash_item_s *next; /* next in chain */
79 char name[1]; /* really it's longer */
82 typedef struct xhash_s {
83 unsigned nel; /* num of elements */
84 unsigned csize; /* current hash size */
85 unsigned nprime; /* next hash size in PRIMES[] */
86 unsigned glen; /* summary length of item names */
87 struct hash_item_s **items;
91 typedef struct node_s {
112 /* Block of temporary variables */
113 typedef struct nvblock_s {
116 struct nvblock_s *prev;
117 struct nvblock_s *next;
121 typedef struct tsplitter_s {
126 /* simple token classes */
127 /* Order and hex values are very important!!! See next_token() */
128 #define TC_SEQSTART 1 /* ( */
129 #define TC_SEQTERM (1 << 1) /* ) */
130 #define TC_REGEXP (1 << 2) /* /.../ */
131 #define TC_OUTRDR (1 << 3) /* | > >> */
132 #define TC_UOPPOST (1 << 4) /* unary postfix operator */
133 #define TC_UOPPRE1 (1 << 5) /* unary prefix operator */
134 #define TC_BINOPX (1 << 6) /* two-opnd operator */
135 #define TC_IN (1 << 7)
136 #define TC_COMMA (1 << 8)
137 #define TC_PIPE (1 << 9) /* input redirection pipe */
138 #define TC_UOPPRE2 (1 << 10) /* unary prefix operator */
139 #define TC_ARRTERM (1 << 11) /* ] */
140 #define TC_GRPSTART (1 << 12) /* { */
141 #define TC_GRPTERM (1 << 13) /* } */
142 #define TC_SEMICOL (1 << 14)
143 #define TC_NEWLINE (1 << 15)
144 #define TC_STATX (1 << 16) /* ctl statement (for, next...) */
145 #define TC_WHILE (1 << 17)
146 #define TC_ELSE (1 << 18)
147 #define TC_BUILTIN (1 << 19)
148 #define TC_GETLINE (1 << 20)
149 #define TC_FUNCDECL (1 << 21) /* `function' `func' */
150 #define TC_BEGIN (1 << 22)
151 #define TC_END (1 << 23)
152 #define TC_EOF (1 << 24)
153 #define TC_VARIABLE (1 << 25)
154 #define TC_ARRAY (1 << 26)
155 #define TC_FUNCTION (1 << 27)
156 #define TC_STRING (1 << 28)
157 #define TC_NUMBER (1 << 29)
159 #define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
161 /* combined token classes */
162 #define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
163 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
164 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
165 | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
167 #define TC_STATEMNT (TC_STATX | TC_WHILE)
168 #define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
170 /* word tokens, cannot mean something else if not expected */
171 #define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
172 | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
174 /* discard newlines after these */
175 #define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
176 | TC_BINOP | TC_OPTERM)
178 /* what can expression begin with */
179 #define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
180 /* what can group begin with */
181 #define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
183 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
184 /* operator is inserted between them */
185 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
186 | TC_STRING | TC_NUMBER | TC_UOPPOST)
187 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
189 #define OF_RES1 0x010000
190 #define OF_RES2 0x020000
191 #define OF_STR1 0x040000
192 #define OF_STR2 0x080000
193 #define OF_NUM1 0x100000
194 #define OF_CHECKED 0x200000
196 /* combined operator flags */
199 #define xS (OF_RES2 | OF_STR2)
201 #define VV (OF_RES1 | OF_RES2)
202 #define Nx (OF_RES1 | OF_NUM1)
203 #define NV (OF_RES1 | OF_NUM1 | OF_RES2)
204 #define Sx (OF_RES1 | OF_STR1)
205 #define SV (OF_RES1 | OF_STR1 | OF_RES2)
206 #define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
208 #define OPCLSMASK 0xFF00
209 #define OPNMASK 0x007F
211 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
212 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
213 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
215 #define P(x) (x << 24)
216 #define PRIMASK 0x7F000000
217 #define PRIMASK2 0x7E000000
219 /* Operation classes */
221 #define SHIFT_TIL_THIS 0x0600
222 #define RECUR_FROM_THIS 0x1000
225 OC_DELETE = 0x0100, OC_EXEC = 0x0200, OC_NEWSOURCE = 0x0300,
226 OC_PRINT = 0x0400, OC_PRINTF = 0x0500, OC_WALKINIT = 0x0600,
228 OC_BR = 0x0700, OC_BREAK = 0x0800, OC_CONTINUE = 0x0900,
229 OC_EXIT = 0x0a00, OC_NEXT = 0x0b00, OC_NEXTFILE = 0x0c00,
230 OC_TEST = 0x0d00, OC_WALKNEXT = 0x0e00,
232 OC_BINARY = 0x1000, OC_BUILTIN = 0x1100, OC_COLON = 0x1200,
233 OC_COMMA = 0x1300, OC_COMPARE = 0x1400, OC_CONCAT = 0x1500,
234 OC_FBLTIN = 0x1600, OC_FIELD = 0x1700, OC_FNARG = 0x1800,
235 OC_FUNC = 0x1900, OC_GETLINE = 0x1a00, OC_IN = 0x1b00,
236 OC_LAND = 0x1c00, OC_LOR = 0x1d00, OC_MATCH = 0x1e00,
237 OC_MOVE = 0x1f00, OC_PGETLINE = 0x2000, OC_REGEXP = 0x2100,
238 OC_REPLACE = 0x2200, OC_RETURN = 0x2300, OC_SPRINTF = 0x2400,
239 OC_TERNARY = 0x2500, OC_UNARY = 0x2600, OC_VAR = 0x2700,
242 ST_IF = 0x3000, ST_DO = 0x3100, ST_FOR = 0x3200,
246 /* simple builtins */
248 F_in, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
249 F_ti, F_le, F_sy, F_ff, F_cl
254 B_a2, B_ix, B_ma, B_sp, B_ss, B_ti, B_lo, B_up,
256 B_an, B_co, B_ls, B_or, B_rs, B_xo,
259 /* tokens and their corresponding info values */
261 #define NTC "\377" /* switch to next token class (tc<<1) */
264 #define OC_B OC_BUILTIN
266 static const char tokenlist[] =
269 "\1/" NTC /* REGEXP */
270 "\2>>" "\1>" "\1|" NTC /* OUTRDR */
271 "\2++" "\2--" NTC /* UOPPOST */
272 "\2++" "\2--" "\1$" NTC /* UOPPRE1 */
273 "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */
274 "\2*=" "\2/=" "\2%=" "\2^="
275 "\1+" "\1-" "\3**=" "\2**"
276 "\1/" "\1%" "\1^" "\1*"
277 "\2!=" "\2>=" "\2<=" "\1>"
278 "\1<" "\2!~" "\1~" "\2&&"
279 "\2||" "\1?" "\1:" NTC
283 "\1+" "\1-" "\1!" NTC /* UOPPRE2 */
289 "\2if" "\2do" "\3for" "\5break" /* STATX */
290 "\10continue" "\6delete" "\5print"
291 "\6printf" "\4next" "\10nextfile"
292 "\6return" "\4exit" NTC
296 "\3and" "\5compl" "\6lshift" "\2or"
298 "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */
299 "\3cos" "\3exp" "\3int" "\3log"
300 "\4rand" "\3sin" "\4sqrt" "\5srand"
301 "\6gensub" "\4gsub" "\5index" "\6length"
302 "\5match" "\5split" "\7sprintf" "\3sub"
303 "\6substr" "\7systime" "\10strftime"
304 "\7tolower" "\7toupper" NTC
306 "\4func" "\10function" NTC
311 static const uint32_t tokeninfo[] = {
315 xS|'a', xS|'w', xS|'|',
316 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
317 OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M',
319 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74),
320 OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
321 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/',
322 OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
323 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-',
324 OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
325 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%',
326 OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
327 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3,
328 OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
329 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!',
330 OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
331 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?',
332 OC_COLON|xx|P(67)|':',
335 OC_PGETLINE|SV|P(37),
336 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-',
337 OC_UNARY|xV|P(19)|'!',
343 ST_IF, ST_DO, ST_FOR, OC_BREAK,
344 OC_CONTINUE, OC_DELETE|Vx, OC_PRINT,
345 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
346 OC_RETURN|Vx, OC_EXIT|Nx,
350 OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
351 OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
352 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
353 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
354 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
355 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
356 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
357 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b),
358 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
365 /* internal variable names and their initial values */
366 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
368 CONVFMT, OFMT, FS, OFS,
369 ORS, RS, RT, FILENAME,
370 SUBSEP, ARGIND, ARGC, ARGV,
373 ENVIRON, F0, NUM_INTERNAL_VARS
376 static const char vNames[] =
377 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
378 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
379 "SUBSEP\0" "ARGIND\0" "ARGC\0" "ARGV\0"
381 "NR\0" "NF\0*" "IGNORECASE\0*"
382 "ENVIRON\0" "$\0*" "\0";
384 static const char vValues[] =
385 "%.6g\0" "%.6g\0" " \0" " \0"
386 "\n\0" "\n\0" "\0" "\0"
390 /* hash size may grow to these values */
391 #define FIRST_PRIME 61;
392 static const unsigned PRIMES[] = { 251, 1021, 4093, 16381, 65521 };
396 /* Globals. Split in two parts so that first one is addressed
397 * with (mostly short) negative offsets */
399 chain beginseq, mainseq, endseq, *seq;
400 node *break_ptr, *continue_ptr;
402 xhash *vhash, *ahash, *fdhash, *fnhash;
403 const char *g_progname;
406 int maxfields; /* used in fsrealloc() only */
415 smallint is_f0_split;
418 uint32_t t_info; /* often used */
424 var *intvar[NUM_INTERNAL_VARS]; /* often used */
426 /* former statics from various functions */
427 char *split_f0__fstrings;
429 uint32_t next_token__save_tclass;
430 uint32_t next_token__save_info;
431 uint32_t next_token__ltclass;
432 smallint next_token__concat_inserted;
434 smallint next_input_file__files_happen;
435 rstream next_input_file__rsm;
437 var *evaluate__fnargs;
438 unsigned evaluate__seed;
439 regex_t evaluate__sreg;
443 tsplitter exec_builtin__tspl;
445 /* biggest and least used members go last */
447 tsplitter fsplitter, rsplitter;
449 #define G1 (ptr_to_globals[-1])
450 #define G (*(struct globals2 *const)ptr_to_globals)
451 /* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
452 /* char G1size[sizeof(G1)]; - 0x6c */
453 /* char Gsize[sizeof(G)]; - 0x1cc */
454 /* Trying to keep most of members accessible with short offsets: */
455 /* char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
456 #define beginseq (G1.beginseq )
457 #define mainseq (G1.mainseq )
458 #define endseq (G1.endseq )
459 #define seq (G1.seq )
460 #define break_ptr (G1.break_ptr )
461 #define continue_ptr (G1.continue_ptr)
463 #define vhash (G1.vhash )
464 #define ahash (G1.ahash )
465 #define fdhash (G1.fdhash )
466 #define fnhash (G1.fnhash )
467 #define g_progname (G1.g_progname )
468 #define g_lineno (G1.g_lineno )
469 #define nfields (G1.nfields )
470 #define maxfields (G1.maxfields )
471 #define Fields (G1.Fields )
472 #define g_cb (G1.g_cb )
473 #define g_pos (G1.g_pos )
474 #define g_buf (G1.g_buf )
475 #define icase (G1.icase )
476 #define exiting (G1.exiting )
477 #define nextrec (G1.nextrec )
478 #define nextfile (G1.nextfile )
479 #define is_f0_split (G1.is_f0_split )
480 #define t_info (G.t_info )
481 #define t_tclass (G.t_tclass )
482 #define t_string (G.t_string )
483 #define t_double (G.t_double )
484 #define t_lineno (G.t_lineno )
485 #define t_rollback (G.t_rollback )
486 #define intvar (G.intvar )
487 #define fsplitter (G.fsplitter )
488 #define rsplitter (G.rsplitter )
489 #define INIT_G() do { \
490 PTR_TO_GLOBALS = xzalloc(sizeof(G1) + sizeof(G)) + sizeof(G1); \
491 G.next_token__ltclass = TC_OPTERM; \
492 G.evaluate__seed = 1; \
496 /* function prototypes */
497 static void handle_special(var *);
498 static node *parse_expr(uint32_t);
499 static void chain_group(void);
500 static var *evaluate(node *, var *);
501 static rstream *next_input_file(void);
502 static int fmt_num(char *, int, const char *, double, int);
503 static int awk_exit(int) ATTRIBUTE_NORETURN;
505 /* ---- error handling ---- */
507 static const char EMSG_INTERNAL_ERROR[] = "Internal error";
508 static const char EMSG_UNEXP_EOS[] = "Unexpected end of string";
509 static const char EMSG_UNEXP_TOKEN[] = "Unexpected token";
510 static const char EMSG_DIV_BY_ZERO[] = "Division by zero";
511 static const char EMSG_INV_FMT[] = "Invalid format specifier";
512 static const char EMSG_TOO_FEW_ARGS[] = "Too few arguments for builtin";
513 static const char EMSG_NOT_ARRAY[] = "Not an array";
514 static const char EMSG_POSSIBLE_ERROR[] = "Possible syntax error";
515 static const char EMSG_UNDEF_FUNC[] = "Call to undefined function";
516 #if !ENABLE_FEATURE_AWK_MATH
517 static const char EMSG_NO_MATH[] = "Math support is not compiled in";
520 static void zero_out_var(var * vp)
522 memset(vp, 0, sizeof(*vp));
525 static void syntax_error(const char * const message) ATTRIBUTE_NORETURN;
526 static void syntax_error(const char * const message)
528 bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
531 /* ---- hash stuff ---- */
533 static unsigned hashidx(const char *name)
537 while (*name) idx = *name++ + (idx << 6) - idx;
541 /* create new hash */
542 static xhash *hash_init(void)
546 newhash = xzalloc(sizeof(xhash));
547 newhash->csize = FIRST_PRIME;
548 newhash->items = xzalloc(newhash->csize * sizeof(hash_item *));
553 /* find item in hash, return ptr to data, NULL if not found */
554 static void *hash_search(xhash *hash, const char *name)
558 hi = hash->items [ hashidx(name) % hash->csize ];
560 if (strcmp(hi->name, name) == 0)
567 /* grow hash if it becomes too big */
568 static void hash_rebuild(xhash *hash)
570 unsigned newsize, i, idx;
571 hash_item **newitems, *hi, *thi;
573 if (hash->nprime == ARRAY_SIZE(PRIMES))
576 newsize = PRIMES[hash->nprime++];
577 newitems = xzalloc(newsize * sizeof(hash_item *));
579 for (i = 0; i < hash->csize; i++) {
584 idx = hashidx(thi->name) % newsize;
585 thi->next = newitems[idx];
591 hash->csize = newsize;
592 hash->items = newitems;
595 /* find item in hash, add it if necessary. Return ptr to data */
596 static void *hash_find(xhash *hash, const char *name)
602 hi = hash_search(hash, name);
604 if (++hash->nel / hash->csize > 10)
607 l = strlen(name) + 1;
608 hi = xzalloc(sizeof(hash_item) + l);
609 memcpy(hi->name, name, l);
611 idx = hashidx(name) % hash->csize;
612 hi->next = hash->items[idx];
613 hash->items[idx] = hi;
619 #define findvar(hash, name) ((var*) hash_find((hash), (name)))
620 #define newvar(name) ((var*) hash_find(vhash, (name)))
621 #define newfile(name) ((rstream*)hash_find(fdhash, (name)))
622 #define newfunc(name) ((func*) hash_find(fnhash, (name)))
624 static void hash_remove(xhash *hash, const char *name)
626 hash_item *hi, **phi;
628 phi = &(hash->items[hashidx(name) % hash->csize]);
631 if (strcmp(hi->name, name) == 0) {
632 hash->glen -= (strlen(name) + 1);
642 /* ------ some useful functions ------ */
644 static void skip_spaces(char **s)
649 if (*p == '\\' && p[1] == '\n') {
652 } else if (*p != ' ' && *p != '\t') {
660 static char *nextword(char **s)
664 while (*(*s)++) /* */;
669 static char nextchar(char **s)
675 if (c == '\\') c = bb_process_escape_sequence((const char**)s);
676 if (c == '\\' && *s == pps) c = *((*s)++);
680 static int ALWAYS_INLINE isalnum_(int c)
682 return (isalnum(c) || c == '_');
685 static FILE *afopen(const char *path, const char *mode)
687 return (*path == '-' && *(path+1) == '\0') ? stdin : xfopen(path, mode);
690 /* -------- working with variables (set/get/copy/etc) -------- */
692 static xhash *iamarray(var *v)
696 while (a->type & VF_CHILD)
699 if (!(a->type & VF_ARRAY)) {
701 a->x.array = hash_init();
706 static void clear_array(xhash *array)
711 for (i = 0; i < array->csize; i++) {
712 hi = array->items[i];
716 free(thi->data.v.string);
719 array->items[i] = NULL;
721 array->glen = array->nel = 0;
724 /* clear a variable */
725 static var *clrvar(var *v)
727 if (!(v->type & VF_FSTR))
730 v->type &= VF_DONTTOUCH;
736 /* assign string value to variable */
737 static var *setvar_p(var *v, char *value)
745 /* same as setvar_p but make a copy of string */
746 static var *setvar_s(var *v, const char *value)
748 return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
751 /* same as setvar_s but set USER flag */
752 static var *setvar_u(var *v, const char *value)
759 /* set array element to user string */
760 static void setari_u(var *a, int idx, const char *s)
762 char sidx[sizeof(int)*3 + 1];
765 sprintf(sidx, "%d", idx);
766 v = findvar(iamarray(a), sidx);
770 /* assign numeric value to variable */
771 static var *setvar_i(var *v, double value)
774 v->type |= VF_NUMBER;
780 static const char *getvar_s(var *v)
782 /* if v is numeric and has no cached string, convert it to string */
783 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
784 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
785 v->string = xstrdup(g_buf);
786 v->type |= VF_CACHED;
788 return (v->string == NULL) ? "" : v->string;
791 static double getvar_i(var *v)
795 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
799 v->number = strtod(s, &s);
800 if (v->type & VF_USER) {
808 v->type |= VF_CACHED;
813 static var *copyvar(var *dest, const var *src)
817 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
818 dest->number = src->number;
820 dest->string = xstrdup(src->string);
822 handle_special(dest);
826 static var *incvar(var *v)
828 return setvar_i(v, getvar_i(v)+1.);
831 /* return true if v is number or numeric string */
832 static int is_numeric(var *v)
835 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
838 /* return 1 when value of v corresponds to true, 0 otherwise */
839 static int istrue(var *v)
842 return (v->number == 0) ? 0 : 1;
843 return (v->string && *(v->string)) ? 1 : 0;
846 /* temporary variables allocator. Last allocated should be first freed */
847 static var *nvalloc(int n)
855 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size) break;
860 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
861 g_cb = xmalloc(sizeof(nvblock) + size * sizeof(var));
863 g_cb->pos = g_cb->nv;
866 if (pb) pb->next = g_cb;
872 while (v < g_cb->pos) {
881 static void nvfree(var *v)
885 if (v < g_cb->nv || v >= g_cb->pos)
886 syntax_error(EMSG_INTERNAL_ERROR);
888 for (p = v; p < g_cb->pos; p++) {
889 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
890 clear_array(iamarray(p));
891 free(p->x.array->items);
894 if (p->type & VF_WALK)
901 while (g_cb->prev && g_cb->pos == g_cb->nv) {
906 /* ------- awk program text parsing ------- */
908 /* Parse next token pointed by global pos, place results into global ttt.
909 * If token isn't expected, give away. Return token class
911 static uint32_t next_token(uint32_t expected)
913 #define concat_inserted (G.next_token__concat_inserted)
914 #define save_tclass (G.next_token__save_tclass)
915 #define save_info (G.next_token__save_info)
916 /* Initialized to TC_OPTERM: */
917 #define ltclass (G.next_token__ltclass)
928 } else if (concat_inserted) {
929 concat_inserted = FALSE;
930 t_tclass = save_tclass;
939 while (*p != '\n' && *p != '\0')
948 } else if (*p == '\"') {
952 if (*p == '\0' || *p == '\n')
953 syntax_error(EMSG_UNEXP_EOS);
954 *(s++) = nextchar(&p);
960 } else if ((expected & TC_REGEXP) && *p == '/') {
964 if (*p == '\0' || *p == '\n')
965 syntax_error(EMSG_UNEXP_EOS);
969 *(s-1) = bb_process_escape_sequence((const char **)&p);
980 } else if (*p == '.' || isdigit(*p)) {
982 t_double = strtod(p, &p);
984 syntax_error(EMSG_UNEXP_TOKEN);
988 /* search for something known */
998 /* if token class is expected, token
999 * matches and it's not a longer word,
1000 * then this is what we are looking for
1002 if ((tc & (expected | TC_WORD | TC_NEWLINE))
1003 && *tl == *p && strncmp(p, tl, l) == 0
1004 && !((tc & TC_WORD) && isalnum_(p[l]))
1015 /* it's a name (var/array/function),
1016 * otherwise it's something wrong
1019 syntax_error(EMSG_UNEXP_TOKEN);
1022 while (isalnum_(*(++p))) {
1027 /* also consume whitespace between functionname and bracket */
1028 if (!(expected & TC_VARIABLE))
1042 /* skipping newlines in some cases */
1043 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1046 /* insert concatenation operator when needed */
1047 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1048 concat_inserted = TRUE;
1052 t_info = OC_CONCAT | SS | P(35);
1059 /* Are we ready for this? */
1060 if (!(ltclass & expected))
1061 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1062 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1065 #undef concat_inserted
1071 static void rollback_token(void)
1076 static node *new_node(uint32_t info)
1080 n = xzalloc(sizeof(node));
1082 n->lineno = g_lineno;
1086 static node *mk_re_node(const char *s, node *n, regex_t *re)
1088 n->info = OC_REGEXP;
1091 xregcomp(re, s, REG_EXTENDED);
1092 xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1097 static node *condition(void)
1099 next_token(TC_SEQSTART);
1100 return parse_expr(TC_SEQTERM);
1103 /* parse expression terminated by given argument, return ptr
1104 * to built subtree. Terminator is eaten by parse_expr */
1105 static node *parse_expr(uint32_t iexp)
1114 sn.r.n = glptr = NULL;
1115 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1117 while (!((tc = next_token(xtc)) & iexp)) {
1118 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1119 /* input redirection (<) attached to glptr node */
1120 cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1122 xtc = TC_OPERAND | TC_UOPPRE;
1125 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1126 /* for binary and postfix-unary operators, jump back over
1127 * previous operators with higher priority */
1129 while ( ((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1130 || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON)) )
1132 if ((t_info & OPCLSMASK) == OC_TERNARY)
1134 cn = vn->a.n->r.n = new_node(t_info);
1136 if (tc & TC_BINOP) {
1138 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1139 if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1141 next_token(TC_GETLINE);
1142 /* give maximum priority to this pipe */
1143 cn->info &= ~PRIMASK;
1144 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1148 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1153 /* for operands and prefix-unary operators, attach them
1156 cn = vn->r.n = new_node(t_info);
1158 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1159 if (tc & (TC_OPERAND | TC_REGEXP)) {
1160 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1161 /* one should be very careful with switch on tclass -
1162 * only simple tclasses should be used! */
1167 v = hash_search(ahash, t_string);
1169 cn->info = OC_FNARG;
1170 cn->l.i = v->x.aidx;
1172 cn->l.v = newvar(t_string);
1174 if (tc & TC_ARRAY) {
1176 cn->r.n = parse_expr(TC_ARRTERM);
1183 v = cn->l.v = xzalloc(sizeof(var));
1185 setvar_i(v, t_double);
1187 setvar_s(v, t_string);
1191 mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1196 cn->r.f = newfunc(t_string);
1197 cn->l.n = condition();
1201 cn = vn->r.n = parse_expr(TC_SEQTERM);
1207 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1211 cn->l.n = condition();
1220 /* add node to chain. Return ptr to alloc'd node */
1221 static node *chain_node(uint32_t info)
1226 seq->first = seq->last = new_node(0);
1228 if (seq->programname != g_progname) {
1229 seq->programname = g_progname;
1230 n = chain_node(OC_NEWSOURCE);
1231 n->l.s = xstrdup(g_progname);
1236 seq->last = n->a.n = new_node(OC_DONE);
1241 static void chain_expr(uint32_t info)
1245 n = chain_node(info);
1246 n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1247 if (t_tclass & TC_GRPTERM)
1251 static node *chain_loop(node *nn)
1253 node *n, *n2, *save_brk, *save_cont;
1255 save_brk = break_ptr;
1256 save_cont = continue_ptr;
1258 n = chain_node(OC_BR | Vx);
1259 continue_ptr = new_node(OC_EXEC);
1260 break_ptr = new_node(OC_EXEC);
1262 n2 = chain_node(OC_EXEC | Vx);
1265 continue_ptr->a.n = n2;
1266 break_ptr->a.n = n->r.n = seq->last;
1268 continue_ptr = save_cont;
1269 break_ptr = save_brk;
1274 /* parse group and attach it to chain */
1275 static void chain_group(void)
1281 c = next_token(TC_GRPSEQ);
1282 } while (c & TC_NEWLINE);
1284 if (c & TC_GRPSTART) {
1285 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1286 if (t_tclass & TC_NEWLINE) continue;
1290 } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1292 chain_expr(OC_EXEC | Vx);
1293 } else { /* TC_STATEMNT */
1294 switch (t_info & OPCLSMASK) {
1296 n = chain_node(OC_BR | Vx);
1297 n->l.n = condition();
1299 n2 = chain_node(OC_EXEC);
1301 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1303 n2->a.n = seq->last;
1311 n = chain_loop(NULL);
1316 n2 = chain_node(OC_EXEC);
1317 n = chain_loop(NULL);
1319 next_token(TC_WHILE);
1320 n->l.n = condition();
1324 next_token(TC_SEQSTART);
1325 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1326 if (t_tclass & TC_SEQTERM) { /* for-in */
1327 if ((n2->info & OPCLSMASK) != OC_IN)
1328 syntax_error(EMSG_UNEXP_TOKEN);
1329 n = chain_node(OC_WALKINIT | VV);
1332 n = chain_loop(NULL);
1333 n->info = OC_WALKNEXT | Vx;
1335 } else { /* for (;;) */
1336 n = chain_node(OC_EXEC | Vx);
1338 n2 = parse_expr(TC_SEMICOL);
1339 n3 = parse_expr(TC_SEQTERM);
1349 n = chain_node(t_info);
1350 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1351 if (t_tclass & TC_OUTRDR) {
1353 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1355 if (t_tclass & TC_GRPTERM)
1360 n = chain_node(OC_EXEC);
1365 n = chain_node(OC_EXEC);
1366 n->a.n = continue_ptr;
1369 /* delete, next, nextfile, return, exit */
1376 static void parse_program(char *p)
1385 while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1386 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1388 if (tclass & TC_OPTERM)
1392 if (tclass & TC_BEGIN) {
1396 } else if (tclass & TC_END) {
1400 } else if (tclass & TC_FUNCDECL) {
1401 next_token(TC_FUNCTION);
1403 f = newfunc(t_string);
1404 f->body.first = NULL;
1406 while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1407 v = findvar(ahash, t_string);
1408 v->x.aidx = (f->nargs)++;
1410 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1417 } else if (tclass & TC_OPSEQ) {
1419 cn = chain_node(OC_TEST);
1420 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1421 if (t_tclass & TC_GRPSTART) {
1425 chain_node(OC_PRINT);
1427 cn->r.n = mainseq.last;
1429 } else /* if (tclass & TC_GRPSTART) */ {
1437 /* -------- program execution part -------- */
1439 static node *mk_splitter(const char *s, tsplitter *spl)
1447 if ((n->info & OPCLSMASK) == OC_REGEXP) {
1449 regfree(ire); // TODO: nuke ire, use re+1?
1451 if (strlen(s) > 1) {
1452 mk_re_node(s, n, re);
1454 n->info = (uint32_t) *s;
1460 /* use node as a regular expression. Supplied with node ptr and regex_t
1461 * storage space. Return ptr to regex (if result points to preg, it should
1462 * be later regfree'd manually
1464 static regex_t *as_regex(node *op, regex_t *preg)
1469 if ((op->info & OPCLSMASK) == OC_REGEXP) {
1470 return icase ? op->r.ire : op->l.re;
1473 s = getvar_s(evaluate(op, v));
1474 xregcomp(preg, s, icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED);
1479 /* gradually increasing buffer */
1480 static void qrealloc(char **b, int n, int *size)
1482 if (!*b || n >= *size)
1483 *b = xrealloc(*b, *size = n + (n>>1) + 80);
1486 /* resize field storage space */
1487 static void fsrealloc(int size)
1491 if (size >= maxfields) {
1493 maxfields = size + 16;
1494 Fields = xrealloc(Fields, maxfields * sizeof(var));
1495 for (; i < maxfields; i++) {
1496 Fields[i].type = VF_SPECIAL;
1497 Fields[i].string = NULL;
1501 if (size < nfields) {
1502 for (i = size; i < nfields; i++) {
1509 static int awk_split(const char *s, node *spl, char **slist)
1514 regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1516 /* in worst case, each char would be a separate field */
1517 *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1520 c[0] = c[1] = (char)spl->info;
1522 if (*getvar_s(intvar[RS]) == '\0')
1525 if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */
1527 return n; /* "": zero fields */
1528 n++; /* at least one field will be there */
1530 l = strcspn(s, c+2); /* len till next NUL or \n */
1531 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1532 && pmatch[0].rm_so <= l
1534 l = pmatch[0].rm_so;
1535 if (pmatch[0].rm_eo == 0) {
1539 n++; /* we saw yet another delimiter */
1541 pmatch[0].rm_eo = l;
1542 if (s[l]) pmatch[0].rm_eo++;
1547 s += pmatch[0].rm_eo;
1551 if (c[0] == '\0') { /* null split */
1559 if (c[0] != ' ') { /* single-character split */
1561 c[0] = toupper(c[0]);
1562 c[1] = tolower(c[1]);
1565 while ((s1 = strpbrk(s1, c))) {
1573 s = skip_whitespace(s);
1576 while (*s && !isspace(*s))
1583 static void split_f0(void)
1585 /* static char *fstrings; */
1586 #define fstrings (G.split_f0__fstrings)
1597 n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1600 for (i = 0; i < n; i++) {
1601 Fields[i].string = nextword(&s);
1602 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1605 /* set NF manually to avoid side effects */
1607 intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1608 intvar[NF]->number = nfields;
1612 /* perform additional actions when some internal variables changed */
1613 static void handle_special(var *v)
1617 const char *sep, *s;
1618 int sl, l, len, i, bsize;
1620 if (!(v->type & VF_SPECIAL))
1623 if (v == intvar[NF]) {
1624 n = (int)getvar_i(v);
1627 /* recalculate $0 */
1628 sep = getvar_s(intvar[OFS]);
1632 for (i = 0; i < n; i++) {
1633 s = getvar_s(&Fields[i]);
1636 memcpy(b+len, sep, sl);
1639 qrealloc(&b, len+l+sl, &bsize);
1640 memcpy(b+len, s, l);
1645 setvar_p(intvar[F0], b);
1648 } else if (v == intvar[F0]) {
1649 is_f0_split = FALSE;
1651 } else if (v == intvar[FS]) {
1652 mk_splitter(getvar_s(v), &fsplitter);
1654 } else if (v == intvar[RS]) {
1655 mk_splitter(getvar_s(v), &rsplitter);
1657 } else if (v == intvar[IGNORECASE]) {
1661 n = getvar_i(intvar[NF]);
1662 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1663 /* right here v is invalid. Just to note... */
1667 /* step through func/builtin/etc arguments */
1668 static node *nextarg(node **pn)
1673 if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1682 static void hashwalk_init(var *v, xhash *array)
1688 if (v->type & VF_WALK)
1692 w = v->x.walker = xzalloc(2 + 2*sizeof(char *) + array->glen);
1693 w[0] = w[1] = (char *)(w + 2);
1694 for (i = 0; i < array->csize; i++) {
1695 hi = array->items[i];
1697 strcpy(*w, hi->name);
1704 static int hashwalk_next(var *v)
1712 setvar_s(v, nextword(w+1));
1716 /* evaluate node, return 1 when result is true, 0 otherwise */
1717 static int ptest(node *pattern)
1719 /* ptest__v is "static": to save stack space? */
1720 return istrue(evaluate(pattern, &G.ptest__v));
1723 /* read next record from stream rsm into a variable v */
1724 static int awk_getline(rstream *rsm, var *v)
1727 regmatch_t pmatch[2];
1728 int a, p, pp=0, size;
1729 int fd, so, eo, r, rp;
1732 /* we're using our own buffer since we need access to accumulating
1735 fd = fileno(rsm->F);
1740 c = (char) rsplitter.n.info;
1743 if (!m) qrealloc(&m, 256, &size);
1749 if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1750 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1751 b, 1, pmatch, 0) == 0) {
1752 so = pmatch[0].rm_so;
1753 eo = pmatch[0].rm_eo;
1757 } else if (c != '\0') {
1758 s = strchr(b+pp, c);
1759 if (!s) s = memchr(b+pp, '\0', p - pp);
1766 while (b[rp] == '\n')
1768 s = strstr(b+rp, "\n\n");
1771 while (b[eo] == '\n') eo++;
1779 memmove(m, (const void *)(m+a), p+1);
1784 qrealloc(&m, a+p+128, &size);
1787 p += safe_read(fd, b+p, size-p-1);
1791 setvar_i(intvar[ERRNO], errno);
1800 c = b[so]; b[so] = '\0';
1804 c = b[eo]; b[eo] = '\0';
1805 setvar_s(intvar[RT], b+so);
1817 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1821 const char *s = format;
1823 if (int_as_int && n == (int)n) {
1824 r = snprintf(b, size, "%d", (int)n);
1826 do { c = *s; } while (c && *++s);
1827 if (strchr("diouxX", c)) {
1828 r = snprintf(b, size, format, (int)n);
1829 } else if (strchr("eEfgG", c)) {
1830 r = snprintf(b, size, format, n);
1832 syntax_error(EMSG_INV_FMT);
1839 /* formatted output into an allocated buffer, return ptr to buffer */
1840 static char *awk_printf(node *n)
1845 int i, j, incr, bsize;
1850 fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1855 while (*f && (*f != '%' || *(++f) == '%'))
1857 while (*f && !isalpha(*f)) {
1859 syntax_error("%*x formats are not supported");
1863 incr = (f - s) + MAXVARFMT;
1864 qrealloc(&b, incr + i, &bsize);
1869 arg = evaluate(nextarg(&n), v);
1872 if (c == 'c' || !c) {
1873 i += sprintf(b+i, s, is_numeric(arg) ?
1874 (char)getvar_i(arg) : *getvar_s(arg));
1875 } else if (c == 's') {
1877 qrealloc(&b, incr+i+strlen(s1), &bsize);
1878 i += sprintf(b+i, s, s1);
1880 i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1884 /* if there was an error while sprintf, return value is negative */
1888 b = xrealloc(b, i + 1);
1895 /* common substitution routine
1896 * replace (nm) substring of (src) that match (n) with (repl), store
1897 * result into (dest), return number of substitutions. If nm=0, replace
1898 * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1899 * subexpression matching (\1-\9)
1901 static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int ex)
1906 int c, i, j, di, rl, so, eo, nbs, n, dssize;
1907 regmatch_t pmatch[10];
1910 re = as_regex(rn, &sreg);
1911 if (!src) src = intvar[F0];
1912 if (!dest) dest = intvar[F0];
1917 while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0 : REG_NOTBOL) == 0) {
1918 so = pmatch[0].rm_so;
1919 eo = pmatch[0].rm_eo;
1921 qrealloc(&ds, di + eo + rl, &dssize);
1922 memcpy(ds + di, sp, eo);
1928 for (s = repl; *s; s++) {
1934 if (c == '&' || (ex && c >= '0' && c <= '9')) {
1935 di -= ((nbs + 3) >> 1);
1944 n = pmatch[j].rm_eo - pmatch[j].rm_so;
1945 qrealloc(&ds, di + rl + n, &dssize);
1946 memcpy(ds + di, sp + pmatch[j].rm_so, n);
1958 if (!ds[di++]) break;
1962 qrealloc(&ds, di + strlen(sp), &dssize);
1963 strcpy(ds + di, sp);
1965 if (re == &sreg) regfree(re);
1969 static var *exec_builtin(node *op, var *res)
1971 #define tspl (G.exec_builtin__tspl)
1978 regmatch_t pmatch[2];
1988 isr = info = op->info;
1991 av[2] = av[3] = NULL;
1992 for (i = 0; i < 4 && op; i++) {
1993 an[i] = nextarg(&op);
1994 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
1995 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
2000 if (nargs < (info >> 30))
2001 syntax_error(EMSG_TOO_FEW_ARGS);
2003 switch (info & OPNMASK) {
2006 #if ENABLE_FEATURE_AWK_MATH
2007 setvar_i(res, atan2(getvar_i(av[i]), getvar_i(av[1])));
2009 syntax_error(EMSG_NO_MATH);
2015 spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2016 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2021 n = awk_split(as[0], spl, &s);
2023 clear_array(iamarray(av[1]));
2024 for (i=1; i<=n; i++)
2025 setari_u(av[1], i, nextword(&s1));
2032 i = getvar_i(av[1]) - 1;
2035 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2038 strncpy(s, as[0]+i, n);
2044 setvar_i(res, (long)getvar_i(av[0]) & (long)getvar_i(av[1]));
2048 setvar_i(res, ~(long)getvar_i(av[0]));
2052 setvar_i(res, (long)getvar_i(av[0]) << (long)getvar_i(av[1]));
2056 setvar_i(res, (long)getvar_i(av[0]) | (long)getvar_i(av[1]));
2060 setvar_i(res, (long)((unsigned long)getvar_i(av[0]) >> (unsigned long)getvar_i(av[1])));
2064 setvar_i(res, (long)getvar_i(av[0]) ^ (long)getvar_i(av[1]));
2074 s1 = s = xstrdup(as[0]);
2076 *s1 = (*to_xxx)(*s1);
2085 l = strlen(as[0]) - ll;
2086 if (ll > 0 && l >= 0) {
2088 s = strstr(as[0], as[1]);
2089 if (s) n = (s - as[0]) + 1;
2091 /* this piece of code is terribly slow and
2092 * really should be rewritten
2094 for (i=0; i<=l; i++) {
2095 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2107 tt = getvar_i(av[1]);
2110 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2111 i = strftime(g_buf, MAXVARFMT,
2112 ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2115 setvar_s(res, g_buf);
2119 re = as_regex(an[1], &sreg);
2120 n = regexec(re, as[0], 1, pmatch, 0);
2125 pmatch[0].rm_so = 0;
2126 pmatch[0].rm_eo = -1;
2128 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2129 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2130 setvar_i(res, pmatch[0].rm_so);
2131 if (re == &sreg) regfree(re);
2135 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2139 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2143 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2153 * Evaluate node - the heart of the program. Supplied with subtree
2154 * and place where to store result. returns ptr to result.
2156 #define XC(n) ((n) >> 8)
2158 static var *evaluate(node *op, var *res)
2160 /* This procedure is recursive so we should count every byte */
2161 #define fnargs (G.evaluate__fnargs)
2162 /* seed is initialized to 1 */
2163 #define seed (G.evaluate__seed)
2164 #define sreg (G.evaluate__sreg)
2186 return setvar_s(res, NULL);
2192 opn = (opinfo & OPNMASK);
2193 g_lineno = op->lineno;
2195 /* execute inevitable things */
2197 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2198 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2199 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2200 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2201 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2203 switch (XC(opinfo & OPCLSMASK)) {
2205 /* -- iterative node type -- */
2209 if ((op1->info & OPCLSMASK) == OC_COMMA) {
2210 /* it's range pattern */
2211 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2212 op->info |= OF_CHECKED;
2213 if (ptest(op1->r.n))
2214 op->info &= ~OF_CHECKED;
2221 op = (ptest(op1)) ? op->a.n : op->r.n;
2225 /* just evaluate an expression, also used as unconditional jump */
2229 /* branch, used in if-else and various loops */
2231 op = istrue(L.v) ? op->a.n : op->r.n;
2234 /* initialize for-in loop */
2235 case XC( OC_WALKINIT ):
2236 hashwalk_init(L.v, iamarray(R.v));
2239 /* get next array item */
2240 case XC( OC_WALKNEXT ):
2241 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2244 case XC( OC_PRINT ):
2245 case XC( OC_PRINTF ):
2248 X.rsm = newfile(R.s);
2251 X.rsm->F = popen(R.s, "w");
2252 if (X.rsm->F == NULL)
2253 bb_perror_msg_and_die("popen");
2256 X.rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2262 if ((opinfo & OPCLSMASK) == OC_PRINT) {
2264 fputs(getvar_s(intvar[F0]), X.F);
2267 L.v = evaluate(nextarg(&op1), v1);
2268 if (L.v->type & VF_NUMBER) {
2269 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2270 getvar_i(L.v), TRUE);
2273 fputs(getvar_s(L.v), X.F);
2276 if (op1) fputs(getvar_s(intvar[OFS]), X.F);
2279 fputs(getvar_s(intvar[ORS]), X.F);
2281 } else { /* OC_PRINTF */
2282 L.s = awk_printf(op1);
2289 case XC( OC_DELETE ):
2290 X.info = op1->info & OPCLSMASK;
2291 if (X.info == OC_VAR) {
2293 } else if (X.info == OC_FNARG) {
2294 R.v = &fnargs[op1->l.i];
2296 syntax_error(EMSG_NOT_ARRAY);
2301 L.s = getvar_s(evaluate(op1->r.n, v1));
2302 hash_remove(iamarray(R.v), L.s);
2304 clear_array(iamarray(R.v));
2308 case XC( OC_NEWSOURCE ):
2309 g_progname = op->l.s;
2312 case XC( OC_RETURN ):
2316 case XC( OC_NEXTFILE ):
2327 /* -- recursive node type -- */
2331 if (L.v == intvar[NF])
2335 case XC( OC_FNARG ):
2336 L.v = &fnargs[op->l.i];
2338 res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2342 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2345 case XC( OC_REGEXP ):
2347 L.s = getvar_s(intvar[F0]);
2350 case XC( OC_MATCH ):
2353 X.re = as_regex(op1, &sreg);
2354 R.i = regexec(X.re, L.s, 0, NULL, 0);
2355 if (X.re == &sreg) regfree(X.re);
2356 setvar_i(res, (R.i == 0 ? 1 : 0) ^ (opn == '!' ? 1 : 0));
2360 /* if source is a temporary string, jusk relink it to dest */
2361 if (R.v == v1+1 && R.v->string) {
2362 res = setvar_p(L.v, R.v->string);
2365 res = copyvar(L.v, R.v);
2369 case XC( OC_TERNARY ):
2370 if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2371 syntax_error(EMSG_POSSIBLE_ERROR);
2372 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2376 if (!op->r.f->body.first)
2377 syntax_error(EMSG_UNDEF_FUNC);
2379 X.v = R.v = nvalloc(op->r.f->nargs+1);
2381 L.v = evaluate(nextarg(&op1), v1);
2383 R.v->type |= VF_CHILD;
2384 R.v->x.parent = L.v;
2385 if (++R.v - X.v >= op->r.f->nargs)
2393 res = evaluate(op->r.f->body.first, res);
2400 case XC( OC_GETLINE ):
2401 case XC( OC_PGETLINE ):
2403 X.rsm = newfile(L.s);
2405 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2406 X.rsm->F = popen(L.s, "r");
2407 X.rsm->is_pipe = TRUE;
2409 X.rsm->F = fopen(L.s, "r"); /* not xfopen! */
2413 if (!iF) iF = next_input_file();
2418 setvar_i(intvar[ERRNO], errno);
2426 L.i = awk_getline(X.rsm, R.v);
2429 incvar(intvar[FNR]);
2436 /* simple builtins */
2437 case XC( OC_FBLTIN ):
2445 R.d = (double)rand() / (double)RAND_MAX;
2447 #if ENABLE_FEATURE_AWK_MATH
2473 syntax_error(EMSG_NO_MATH);
2478 seed = op1 ? (unsigned)L.d : (unsigned)time(NULL);
2488 L.s = getvar_s(intvar[F0]);
2494 R.d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2495 ? (system(L.s) >> 8) : 0;
2503 X.rsm = newfile(L.s);
2512 X.rsm = (rstream *)hash_search(fdhash, L.s);
2514 R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
2515 free(X.rsm->buffer);
2516 hash_remove(fdhash, L.s);
2519 setvar_i(intvar[ERRNO], errno);
2526 case XC( OC_BUILTIN ):
2527 res = exec_builtin(op, res);
2530 case XC( OC_SPRINTF ):
2531 setvar_p(res, awk_printf(op1));
2534 case XC( OC_UNARY ):
2536 L.d = R.d = getvar_i(R.v);
2551 L.d = istrue(X.v) ? 0 : 1;
2562 case XC( OC_FIELD ):
2563 R.i = (int)getvar_i(R.v);
2570 res = &Fields[R.i - 1];
2574 /* concatenation (" ") and index joining (",") */
2575 case XC( OC_CONCAT ):
2576 case XC( OC_COMMA ):
2577 opn = strlen(L.s) + strlen(R.s) + 2;
2580 if ((opinfo & OPCLSMASK) == OC_COMMA) {
2581 L.s = getvar_s(intvar[SUBSEP]);
2582 X.s = xrealloc(X.s, opn + strlen(L.s));
2590 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2594 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2597 case XC( OC_BINARY ):
2598 case XC( OC_REPLACE ):
2599 R.d = getvar_i(R.v);
2611 if (R.d == 0) syntax_error(EMSG_DIV_BY_ZERO);
2615 #if ENABLE_FEATURE_AWK_MATH
2616 L.d = pow(L.d, R.d);
2618 syntax_error(EMSG_NO_MATH);
2622 if (R.d == 0) syntax_error(EMSG_DIV_BY_ZERO);
2623 L.d -= (int)(L.d / R.d) * R.d;
2626 res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2629 case XC( OC_COMPARE ):
2630 if (is_numeric(L.v) && is_numeric(R.v)) {
2631 L.d = getvar_i(L.v) - getvar_i(R.v);
2633 L.s = getvar_s(L.v);
2634 R.s = getvar_s(R.v);
2635 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2637 switch (opn & 0xfe) {
2648 setvar_i(res, (opn & 0x1 ? R.i : !R.i) ? 1 : 0);
2652 syntax_error(EMSG_POSSIBLE_ERROR);
2654 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2656 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2669 /* -------- main & co. -------- */
2671 static int awk_exit(int r)
2682 evaluate(endseq.first, &tv);
2685 /* waiting for children */
2686 for (i = 0; i < fdhash->csize; i++) {
2687 hi = fdhash->items[i];
2689 if (hi->data.rs.F && hi->data.rs.is_pipe)
2690 pclose(hi->data.rs.F);
2698 /* if expr looks like "var=value", perform assignment and return 1,
2699 * otherwise return 0 */
2700 static int is_assignment(const char *expr)
2702 char *exprc, *s, *s0, *s1;
2704 exprc = xstrdup(expr);
2705 if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2713 *(s1++) = nextchar(&s);
2716 setvar_u(newvar(exprc), s0);
2721 /* switch to next input file */
2722 static rstream *next_input_file(void)
2724 #define rsm (G.next_input_file__rsm)
2725 #define files_happen (G.next_input_file__files_happen)
2728 const char *fname, *ind;
2730 if (rsm.F) fclose(rsm.F);
2732 rsm.pos = rsm.adv = 0;
2735 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
2741 ind = getvar_s(incvar(intvar[ARGIND]));
2742 fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
2743 if (fname && *fname && !is_assignment(fname))
2744 F = afopen(fname, "r");
2748 files_happen = TRUE;
2749 setvar_s(intvar[FILENAME], fname);
2756 int awk_main(int argc, char **argv);
2757 int awk_main(int argc, char **argv)
2760 char *opt_F, *opt_W;
2761 llist_t *opt_v = NULL;
2766 char *vnames = (char *)vNames; /* cheat */
2767 char *vvalues = (char *)vValues;
2771 /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
2772 * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
2773 if (ENABLE_LOCALE_SUPPORT)
2774 setlocale(LC_NUMERIC, "C");
2778 /* allocate global buffer */
2779 g_buf = xmalloc(MAXVARFMT + 1);
2781 vhash = hash_init();
2782 ahash = hash_init();
2783 fdhash = hash_init();
2784 fnhash = hash_init();
2786 /* initialize variables */
2787 for (i = 0; *vnames; i++) {
2788 intvar[i] = v = newvar(nextword(&vnames));
2789 if (*vvalues != '\377')
2790 setvar_s(v, nextword(&vvalues));
2794 if (*vnames == '*') {
2795 v->type |= VF_SPECIAL;
2800 handle_special(intvar[FS]);
2801 handle_special(intvar[RS]);
2803 newfile("/dev/stdin")->F = stdin;
2804 newfile("/dev/stdout")->F = stdout;
2805 newfile("/dev/stderr")->F = stderr;
2807 /* Huh, people report that sometimes environ is NULL. Oh well. */
2808 if (environ) for (envp = environ; *envp; envp++) {
2809 /* environ is writable, thus we don't strdup it needlessly */
2811 char *s1 = strchr(s, '=');
2814 /* Both findvar and setvar_u take const char*
2815 * as 2nd arg -> environment is not trashed */
2816 setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
2820 opt_complementary = "v::";
2821 opt = getopt32(argc, argv, "F:v:f:W:", &opt_F, &opt_v, &g_progname, &opt_W);
2825 setvar_s(intvar[FS], opt_F); // -F
2826 while (opt_v) { /* -v */
2827 if (!is_assignment(llist_pop(&opt_v)))
2830 if (opt & 0x4) { // -f
2831 char *s = s; /* die, gcc, die */
2832 FILE *from_file = afopen(g_progname, "r");
2833 /* one byte is reserved for some trick in next_token */
2834 if (fseek(from_file, 0, SEEK_END) == 0) {
2835 flen = ftell(from_file);
2836 s = xmalloc(flen + 4);
2837 fseek(from_file, 0, SEEK_SET);
2838 i = 1 + fread(s + 1, 1, flen, from_file);
2840 for (i = j = 1; j > 0; i += j) {
2841 s = xrealloc(s, i + 4096);
2842 j = fread(s + i, 1, 4094, from_file);
2847 parse_program(s + 1);
2849 } else { // no -f: take program from 1st parameter
2852 g_progname = "cmd. line";
2853 parse_program(*argv++);
2856 if (opt & 0x8) // -W
2857 bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
2859 /* fill in ARGV array */
2860 setvar_i(intvar[ARGC], argc + 1);
2861 setari_u(intvar[ARGV], 0, "awk");
2864 setari_u(intvar[ARGV], ++i, *argv++);
2866 evaluate(beginseq.first, &tv);
2867 if (!mainseq.first && !endseq.first)
2868 awk_exit(EXIT_SUCCESS);
2870 /* input file could already be opened in BEGIN block */
2871 if (!iF) iF = next_input_file();
2873 /* passing through input files */
2876 setvar_i(intvar[FNR], 0);
2878 while ((i = awk_getline(iF, intvar[F0])) > 0) {
2881 incvar(intvar[FNR]);
2882 evaluate(mainseq.first, &tv);
2889 syntax_error(strerror(errno));
2891 iF = next_input_file();
2894 awk_exit(EXIT_SUCCESS);