1 /* vi: set sw=4 ts=4: */
3 * awk implementation for busybox
5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
7 * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
13 extern char **environ;
15 /* This is a NOEXEC applet. Be very careful! */
22 #define VF_NUMBER 0x0001 /* 1 = primary type is number */
23 #define VF_ARRAY 0x0002 /* 1 = it's an array */
25 #define VF_CACHED 0x0100 /* 1 = num/str value has cached str/num eq */
26 #define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
27 #define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
28 #define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
29 #define VF_FSTR 0x1000 /* 1 = var::string points to fstring buffer */
30 #define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
31 #define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
33 /* these flags are static, don't change them when value is changed */
34 #define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
37 typedef struct var_s {
38 unsigned type; /* flags */
42 int aidx; /* func arg idx (for compilation stage) */
43 struct xhash_s *array; /* array ptr */
44 struct var_s *parent; /* for func args, ptr to actual parameter */
45 char **walker; /* list of array elements (for..in) */
49 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
50 typedef struct chain_s {
53 const char *programname;
57 typedef struct func_s {
63 typedef struct rstream_s {
72 typedef struct hash_item_s {
74 struct var_s v; /* variable/array hash */
75 struct rstream_s rs; /* redirect streams hash */
76 struct func_s f; /* functions hash */
78 struct hash_item_s *next; /* next in chain */
79 char name[1]; /* really it's longer */
82 typedef struct xhash_s {
83 unsigned nel; /* num of elements */
84 unsigned csize; /* current hash size */
85 unsigned nprime; /* next hash size in PRIMES[] */
86 unsigned glen; /* summary length of item names */
87 struct hash_item_s **items;
91 typedef struct node_s {
112 /* Block of temporary variables */
113 typedef struct nvblock_s {
116 struct nvblock_s *prev;
117 struct nvblock_s *next;
121 typedef struct tsplitter_s {
126 /* simple token classes */
127 /* Order and hex values are very important!!! See next_token() */
128 #define TC_SEQSTART 1 /* ( */
129 #define TC_SEQTERM (1 << 1) /* ) */
130 #define TC_REGEXP (1 << 2) /* /.../ */
131 #define TC_OUTRDR (1 << 3) /* | > >> */
132 #define TC_UOPPOST (1 << 4) /* unary postfix operator */
133 #define TC_UOPPRE1 (1 << 5) /* unary prefix operator */
134 #define TC_BINOPX (1 << 6) /* two-opnd operator */
135 #define TC_IN (1 << 7)
136 #define TC_COMMA (1 << 8)
137 #define TC_PIPE (1 << 9) /* input redirection pipe */
138 #define TC_UOPPRE2 (1 << 10) /* unary prefix operator */
139 #define TC_ARRTERM (1 << 11) /* ] */
140 #define TC_GRPSTART (1 << 12) /* { */
141 #define TC_GRPTERM (1 << 13) /* } */
142 #define TC_SEMICOL (1 << 14)
143 #define TC_NEWLINE (1 << 15)
144 #define TC_STATX (1 << 16) /* ctl statement (for, next...) */
145 #define TC_WHILE (1 << 17)
146 #define TC_ELSE (1 << 18)
147 #define TC_BUILTIN (1 << 19)
148 #define TC_GETLINE (1 << 20)
149 #define TC_FUNCDECL (1 << 21) /* `function' `func' */
150 #define TC_BEGIN (1 << 22)
151 #define TC_END (1 << 23)
152 #define TC_EOF (1 << 24)
153 #define TC_VARIABLE (1 << 25)
154 #define TC_ARRAY (1 << 26)
155 #define TC_FUNCTION (1 << 27)
156 #define TC_STRING (1 << 28)
157 #define TC_NUMBER (1 << 29)
159 #define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
161 /* combined token classes */
162 #define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
163 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
164 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
165 | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
167 #define TC_STATEMNT (TC_STATX | TC_WHILE)
168 #define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
170 /* word tokens, cannot mean something else if not expected */
171 #define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
172 | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
174 /* discard newlines after these */
175 #define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
176 | TC_BINOP | TC_OPTERM)
178 /* what can expression begin with */
179 #define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
180 /* what can group begin with */
181 #define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
183 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
184 /* operator is inserted between them */
185 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
186 | TC_STRING | TC_NUMBER | TC_UOPPOST)
187 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
189 #define OF_RES1 0x010000
190 #define OF_RES2 0x020000
191 #define OF_STR1 0x040000
192 #define OF_STR2 0x080000
193 #define OF_NUM1 0x100000
194 #define OF_CHECKED 0x200000
196 /* combined operator flags */
199 #define xS (OF_RES2 | OF_STR2)
201 #define VV (OF_RES1 | OF_RES2)
202 #define Nx (OF_RES1 | OF_NUM1)
203 #define NV (OF_RES1 | OF_NUM1 | OF_RES2)
204 #define Sx (OF_RES1 | OF_STR1)
205 #define SV (OF_RES1 | OF_STR1 | OF_RES2)
206 #define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
208 #define OPCLSMASK 0xFF00
209 #define OPNMASK 0x007F
211 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
212 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
213 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
215 #define P(x) (x << 24)
216 #define PRIMASK 0x7F000000
217 #define PRIMASK2 0x7E000000
219 /* Operation classes */
221 #define SHIFT_TIL_THIS 0x0600
222 #define RECUR_FROM_THIS 0x1000
225 OC_DELETE = 0x0100, OC_EXEC = 0x0200, OC_NEWSOURCE = 0x0300,
226 OC_PRINT = 0x0400, OC_PRINTF = 0x0500, OC_WALKINIT = 0x0600,
228 OC_BR = 0x0700, OC_BREAK = 0x0800, OC_CONTINUE = 0x0900,
229 OC_EXIT = 0x0a00, OC_NEXT = 0x0b00, OC_NEXTFILE = 0x0c00,
230 OC_TEST = 0x0d00, OC_WALKNEXT = 0x0e00,
232 OC_BINARY = 0x1000, OC_BUILTIN = 0x1100, OC_COLON = 0x1200,
233 OC_COMMA = 0x1300, OC_COMPARE = 0x1400, OC_CONCAT = 0x1500,
234 OC_FBLTIN = 0x1600, OC_FIELD = 0x1700, OC_FNARG = 0x1800,
235 OC_FUNC = 0x1900, OC_GETLINE = 0x1a00, OC_IN = 0x1b00,
236 OC_LAND = 0x1c00, OC_LOR = 0x1d00, OC_MATCH = 0x1e00,
237 OC_MOVE = 0x1f00, OC_PGETLINE = 0x2000, OC_REGEXP = 0x2100,
238 OC_REPLACE = 0x2200, OC_RETURN = 0x2300, OC_SPRINTF = 0x2400,
239 OC_TERNARY = 0x2500, OC_UNARY = 0x2600, OC_VAR = 0x2700,
242 ST_IF = 0x3000, ST_DO = 0x3100, ST_FOR = 0x3200,
246 /* simple builtins */
248 F_in, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
249 F_ti, F_le, F_sy, F_ff, F_cl
254 B_a2, B_ix, B_ma, B_sp, B_ss, B_ti, B_lo, B_up,
256 B_an, B_co, B_ls, B_or, B_rs, B_xo,
259 /* tokens and their corresponding info values */
261 #define NTC "\377" /* switch to next token class (tc<<1) */
264 #define OC_B OC_BUILTIN
266 static const char tokenlist[] =
269 "\1/" NTC /* REGEXP */
270 "\2>>" "\1>" "\1|" NTC /* OUTRDR */
271 "\2++" "\2--" NTC /* UOPPOST */
272 "\2++" "\2--" "\1$" NTC /* UOPPRE1 */
273 "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */
274 "\2*=" "\2/=" "\2%=" "\2^="
275 "\1+" "\1-" "\3**=" "\2**"
276 "\1/" "\1%" "\1^" "\1*"
277 "\2!=" "\2>=" "\2<=" "\1>"
278 "\1<" "\2!~" "\1~" "\2&&"
279 "\2||" "\1?" "\1:" NTC
283 "\1+" "\1-" "\1!" NTC /* UOPPRE2 */
289 "\2if" "\2do" "\3for" "\5break" /* STATX */
290 "\10continue" "\6delete" "\5print"
291 "\6printf" "\4next" "\10nextfile"
292 "\6return" "\4exit" NTC
296 "\3and" "\5compl" "\6lshift" "\2or"
298 "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */
299 "\3cos" "\3exp" "\3int" "\3log"
300 "\4rand" "\3sin" "\4sqrt" "\5srand"
301 "\6gensub" "\4gsub" "\5index" "\6length"
302 "\5match" "\5split" "\7sprintf" "\3sub"
303 "\6substr" "\7systime" "\10strftime"
304 "\7tolower" "\7toupper" NTC
306 "\4func" "\10function" NTC
311 static const uint32_t tokeninfo[] = {
315 xS|'a', xS|'w', xS|'|',
316 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
317 OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M',
319 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74),
320 OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
321 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/',
322 OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
323 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-',
324 OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
325 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%',
326 OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
327 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3,
328 OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
329 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!',
330 OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
331 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?',
332 OC_COLON|xx|P(67)|':',
335 OC_PGETLINE|SV|P(37),
336 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-',
337 OC_UNARY|xV|P(19)|'!',
343 ST_IF, ST_DO, ST_FOR, OC_BREAK,
344 OC_CONTINUE, OC_DELETE|Vx, OC_PRINT,
345 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
346 OC_RETURN|Vx, OC_EXIT|Nx,
350 OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
351 OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
352 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
353 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
354 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
355 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
356 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
357 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b),
358 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
365 /* internal variable names and their initial values */
366 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
368 CONVFMT, OFMT, FS, OFS,
369 ORS, RS, RT, FILENAME,
370 SUBSEP, ARGIND, ARGC, ARGV,
373 ENVIRON, F0, NUM_INTERNAL_VARS
376 static const char vNames[] =
377 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
378 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
379 "SUBSEP\0" "ARGIND\0" "ARGC\0" "ARGV\0"
381 "NR\0" "NF\0*" "IGNORECASE\0*"
382 "ENVIRON\0" "$\0*" "\0";
384 static const char vValues[] =
385 "%.6g\0" "%.6g\0" " \0" " \0"
386 "\n\0" "\n\0" "\0" "\0"
390 /* hash size may grow to these values */
391 #define FIRST_PRIME 61;
392 static const unsigned PRIMES[] = { 251, 1021, 4093, 16381, 65521 };
396 /* Globals. Split in two parts so that first one is addressed
397 * with (mostly short) negative offsets */
399 chain beginseq, mainseq, endseq, *seq;
400 node *break_ptr, *continue_ptr;
402 xhash *vhash, *ahash, *fdhash, *fnhash;
403 const char *g_progname;
406 int maxfields; /* used in fsrealloc() only */
415 smallint is_f0_split;
418 uint32_t t_info; /* often used */
424 var *intvar[NUM_INTERNAL_VARS]; /* often used */
426 /* former statics from various functions */
427 char *split_f0__fstrings;
429 uint32_t next_token__save_tclass;
430 uint32_t next_token__save_info;
431 uint32_t next_token__ltclass;
432 smallint next_token__concat_inserted;
434 smallint next_input_file__files_happen;
435 rstream next_input_file__rsm;
437 var *evaluate__fnargs;
438 unsigned evaluate__seed;
439 regex_t evaluate__sreg;
443 tsplitter exec_builtin__tspl;
445 /* biggest and least used members go last */
447 tsplitter fsplitter, rsplitter;
449 #define G1 (ptr_to_globals[-1])
450 #define G (*(struct globals2 *const)ptr_to_globals)
451 /* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
452 /* char G1size[sizeof(G1)]; - 0x6c */
453 /* char Gsize[sizeof(G)]; - 0x1cc */
454 /* Trying to keep most of members accessible with short offsets: */
455 /* char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
456 #define beginseq (G1.beginseq )
457 #define mainseq (G1.mainseq )
458 #define endseq (G1.endseq )
459 #define seq (G1.seq )
460 #define break_ptr (G1.break_ptr )
461 #define continue_ptr (G1.continue_ptr)
463 #define vhash (G1.vhash )
464 #define ahash (G1.ahash )
465 #define fdhash (G1.fdhash )
466 #define fnhash (G1.fnhash )
467 #define g_progname (G1.g_progname )
468 #define g_lineno (G1.g_lineno )
469 #define nfields (G1.nfields )
470 #define maxfields (G1.maxfields )
471 #define Fields (G1.Fields )
472 #define g_cb (G1.g_cb )
473 #define g_pos (G1.g_pos )
474 #define g_buf (G1.g_buf )
475 #define icase (G1.icase )
476 #define exiting (G1.exiting )
477 #define nextrec (G1.nextrec )
478 #define nextfile (G1.nextfile )
479 #define is_f0_split (G1.is_f0_split )
480 #define t_info (G.t_info )
481 #define t_tclass (G.t_tclass )
482 #define t_string (G.t_string )
483 #define t_double (G.t_double )
484 #define t_lineno (G.t_lineno )
485 #define t_rollback (G.t_rollback )
486 #define intvar (G.intvar )
487 #define fsplitter (G.fsplitter )
488 #define rsplitter (G.rsplitter )
489 #define INIT_G() do { \
490 PTR_TO_GLOBALS = xzalloc(sizeof(G1) + sizeof(G)) + sizeof(G1); \
491 G.next_token__ltclass = TC_OPTERM; \
492 G.evaluate__seed = 1; \
496 /* function prototypes */
497 static void handle_special(var *);
498 static node *parse_expr(uint32_t);
499 static void chain_group(void);
500 static var *evaluate(node *, var *);
501 static rstream *next_input_file(void);
502 static int fmt_num(char *, int, const char *, double, int);
503 static int awk_exit(int) ATTRIBUTE_NORETURN;
505 /* ---- error handling ---- */
507 static const char EMSG_INTERNAL_ERROR[] = "Internal error";
508 static const char EMSG_UNEXP_EOS[] = "Unexpected end of string";
509 static const char EMSG_UNEXP_TOKEN[] = "Unexpected token";
510 static const char EMSG_DIV_BY_ZERO[] = "Division by zero";
511 static const char EMSG_INV_FMT[] = "Invalid format specifier";
512 static const char EMSG_TOO_FEW_ARGS[] = "Too few arguments for builtin";
513 static const char EMSG_NOT_ARRAY[] = "Not an array";
514 static const char EMSG_POSSIBLE_ERROR[] = "Possible syntax error";
515 static const char EMSG_UNDEF_FUNC[] = "Call to undefined function";
516 #if !ENABLE_FEATURE_AWK_MATH
517 static const char EMSG_NO_MATH[] = "Math support is not compiled in";
520 static void zero_out_var(var * vp)
522 memset(vp, 0, sizeof(*vp));
525 static void syntax_error(const char * const message) ATTRIBUTE_NORETURN;
526 static void syntax_error(const char * const message)
528 bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
531 /* ---- hash stuff ---- */
533 static unsigned hashidx(const char *name)
537 while (*name) idx = *name++ + (idx << 6) - idx;
541 /* create new hash */
542 static xhash *hash_init(void)
546 newhash = xzalloc(sizeof(xhash));
547 newhash->csize = FIRST_PRIME;
548 newhash->items = xzalloc(newhash->csize * sizeof(hash_item *));
553 /* find item in hash, return ptr to data, NULL if not found */
554 static void *hash_search(xhash *hash, const char *name)
558 hi = hash->items [ hashidx(name) % hash->csize ];
560 if (strcmp(hi->name, name) == 0)
567 /* grow hash if it becomes too big */
568 static void hash_rebuild(xhash *hash)
570 unsigned newsize, i, idx;
571 hash_item **newitems, *hi, *thi;
573 if (hash->nprime == ARRAY_SIZE(PRIMES))
576 newsize = PRIMES[hash->nprime++];
577 newitems = xzalloc(newsize * sizeof(hash_item *));
579 for (i = 0; i < hash->csize; i++) {
584 idx = hashidx(thi->name) % newsize;
585 thi->next = newitems[idx];
591 hash->csize = newsize;
592 hash->items = newitems;
595 /* find item in hash, add it if necessary. Return ptr to data */
596 static void *hash_find(xhash *hash, const char *name)
602 hi = hash_search(hash, name);
604 if (++hash->nel / hash->csize > 10)
607 l = strlen(name) + 1;
608 hi = xzalloc(sizeof(hash_item) + l);
609 memcpy(hi->name, name, l);
611 idx = hashidx(name) % hash->csize;
612 hi->next = hash->items[idx];
613 hash->items[idx] = hi;
619 #define findvar(hash, name) ((var*) hash_find((hash), (name)))
620 #define newvar(name) ((var*) hash_find(vhash, (name)))
621 #define newfile(name) ((rstream*)hash_find(fdhash, (name)))
622 #define newfunc(name) ((func*) hash_find(fnhash, (name)))
624 static void hash_remove(xhash *hash, const char *name)
626 hash_item *hi, **phi;
628 phi = &(hash->items[hashidx(name) % hash->csize]);
631 if (strcmp(hi->name, name) == 0) {
632 hash->glen -= (strlen(name) + 1);
642 /* ------ some useful functions ------ */
644 static void skip_spaces(char **s)
649 if (*p == '\\' && p[1] == '\n') {
652 } else if (*p != ' ' && *p != '\t') {
660 static char *nextword(char **s)
664 while (*(*s)++) /* */;
669 static char nextchar(char **s)
675 if (c == '\\') c = bb_process_escape_sequence((const char**)s);
676 if (c == '\\' && *s == pps) c = *((*s)++);
680 static int ALWAYS_INLINE isalnum_(int c)
682 return (isalnum(c) || c == '_');
685 static FILE *afopen(const char *path, const char *mode)
687 return (*path == '-' && *(path+1) == '\0') ? stdin : xfopen(path, mode);
690 /* -------- working with variables (set/get/copy/etc) -------- */
692 static xhash *iamarray(var *v)
696 while (a->type & VF_CHILD)
699 if (!(a->type & VF_ARRAY)) {
701 a->x.array = hash_init();
706 static void clear_array(xhash *array)
711 for (i = 0; i < array->csize; i++) {
712 hi = array->items[i];
716 free(thi->data.v.string);
719 array->items[i] = NULL;
721 array->glen = array->nel = 0;
724 /* clear a variable */
725 static var *clrvar(var *v)
727 if (!(v->type & VF_FSTR))
730 v->type &= VF_DONTTOUCH;
736 /* assign string value to variable */
737 static var *setvar_p(var *v, char *value)
745 /* same as setvar_p but make a copy of string */
746 static var *setvar_s(var *v, const char *value)
748 return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
751 /* same as setvar_s but set USER flag */
752 static var *setvar_u(var *v, const char *value)
759 /* set array element to user string */
760 static void setari_u(var *a, int idx, const char *s)
762 char sidx[sizeof(int)*3 + 1];
765 sprintf(sidx, "%d", idx);
766 v = findvar(iamarray(a), sidx);
770 /* assign numeric value to variable */
771 static var *setvar_i(var *v, double value)
774 v->type |= VF_NUMBER;
780 static const char *getvar_s(var *v)
782 /* if v is numeric and has no cached string, convert it to string */
783 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
784 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
785 v->string = xstrdup(g_buf);
786 v->type |= VF_CACHED;
788 return (v->string == NULL) ? "" : v->string;
791 static double getvar_i(var *v)
795 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
799 v->number = strtod(s, &s);
800 if (v->type & VF_USER) {
808 v->type |= VF_CACHED;
813 static var *copyvar(var *dest, const var *src)
817 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
818 dest->number = src->number;
820 dest->string = xstrdup(src->string);
822 handle_special(dest);
826 static var *incvar(var *v)
828 return setvar_i(v, getvar_i(v)+1.);
831 /* return true if v is number or numeric string */
832 static int is_numeric(var *v)
835 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
838 /* return 1 when value of v corresponds to true, 0 otherwise */
839 static int istrue(var *v)
842 return (v->number == 0) ? 0 : 1;
843 return (v->string && *(v->string)) ? 1 : 0;
846 /* temporary variables allocator. Last allocated should be first freed */
847 static var *nvalloc(int n)
855 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size) break;
860 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
861 g_cb = xmalloc(sizeof(nvblock) + size * sizeof(var));
863 g_cb->pos = g_cb->nv;
866 if (pb) pb->next = g_cb;
872 while (v < g_cb->pos) {
881 static void nvfree(var *v)
885 if (v < g_cb->nv || v >= g_cb->pos)
886 syntax_error(EMSG_INTERNAL_ERROR);
888 for (p = v; p < g_cb->pos; p++) {
889 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
890 clear_array(iamarray(p));
891 free(p->x.array->items);
894 if (p->type & VF_WALK)
901 while (g_cb->prev && g_cb->pos == g_cb->nv) {
906 /* ------- awk program text parsing ------- */
908 /* Parse next token pointed by global pos, place results into global ttt.
909 * If token isn't expected, give away. Return token class
911 static uint32_t next_token(uint32_t expected)
913 #define concat_inserted (G.next_token__concat_inserted)
914 #define save_tclass (G.next_token__save_tclass)
915 #define save_info (G.next_token__save_info)
916 /* Initialized to TC_OPTERM: */
917 #define ltclass (G.next_token__ltclass)
928 } else if (concat_inserted) {
929 concat_inserted = FALSE;
930 t_tclass = save_tclass;
939 while (*p != '\n' && *p != '\0')
948 } else if (*p == '\"') {
952 if (*p == '\0' || *p == '\n')
953 syntax_error(EMSG_UNEXP_EOS);
954 *(s++) = nextchar(&p);
960 } else if ((expected & TC_REGEXP) && *p == '/') {
964 if (*p == '\0' || *p == '\n')
965 syntax_error(EMSG_UNEXP_EOS);
969 *(s-1) = bb_process_escape_sequence((const char **)&p);
980 } else if (*p == '.' || isdigit(*p)) {
982 t_double = strtod(p, &p);
984 syntax_error(EMSG_UNEXP_TOKEN);
988 /* search for something known */
998 /* if token class is expected, token
999 * matches and it's not a longer word,
1000 * then this is what we are looking for
1002 if ((tc & (expected | TC_WORD | TC_NEWLINE))
1003 && *tl == *p && strncmp(p, tl, l) == 0
1004 && !((tc & TC_WORD) && isalnum_(p[l]))
1015 /* it's a name (var/array/function),
1016 * otherwise it's something wrong
1019 syntax_error(EMSG_UNEXP_TOKEN);
1022 while (isalnum_(*(++p))) {
1027 /* also consume whitespace between functionname and bracket */
1028 if (!(expected & TC_VARIABLE))
1042 /* skipping newlines in some cases */
1043 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1046 /* insert concatenation operator when needed */
1047 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1048 concat_inserted = TRUE;
1052 t_info = OC_CONCAT | SS | P(35);
1059 /* Are we ready for this? */
1060 if (!(ltclass & expected))
1061 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1062 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1065 #undef concat_inserted
1071 static void rollback_token(void)
1076 static node *new_node(uint32_t info)
1080 n = xzalloc(sizeof(node));
1082 n->lineno = g_lineno;
1086 static node *mk_re_node(const char *s, node *n, regex_t *re)
1088 n->info = OC_REGEXP;
1091 xregcomp(re, s, REG_EXTENDED);
1092 xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1097 static node *condition(void)
1099 next_token(TC_SEQSTART);
1100 return parse_expr(TC_SEQTERM);
1103 /* parse expression terminated by given argument, return ptr
1104 * to built subtree. Terminator is eaten by parse_expr */
1105 static node *parse_expr(uint32_t iexp)
1114 sn.r.n = glptr = NULL;
1115 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1117 while (!((tc = next_token(xtc)) & iexp)) {
1118 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1119 /* input redirection (<) attached to glptr node */
1120 cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1122 xtc = TC_OPERAND | TC_UOPPRE;
1125 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1126 /* for binary and postfix-unary operators, jump back over
1127 * previous operators with higher priority */
1129 while ( ((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1130 || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON)) )
1132 if ((t_info & OPCLSMASK) == OC_TERNARY)
1134 cn = vn->a.n->r.n = new_node(t_info);
1136 if (tc & TC_BINOP) {
1138 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1139 if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1141 next_token(TC_GETLINE);
1142 /* give maximum priority to this pipe */
1143 cn->info &= ~PRIMASK;
1144 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1148 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1153 /* for operands and prefix-unary operators, attach them
1156 cn = vn->r.n = new_node(t_info);
1158 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1159 if (tc & (TC_OPERAND | TC_REGEXP)) {
1160 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1161 /* one should be very careful with switch on tclass -
1162 * only simple tclasses should be used! */
1167 v = hash_search(ahash, t_string);
1169 cn->info = OC_FNARG;
1170 cn->l.i = v->x.aidx;
1172 cn->l.v = newvar(t_string);
1174 if (tc & TC_ARRAY) {
1176 cn->r.n = parse_expr(TC_ARRTERM);
1183 v = cn->l.v = xzalloc(sizeof(var));
1185 setvar_i(v, t_double);
1187 setvar_s(v, t_string);
1191 mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1196 cn->r.f = newfunc(t_string);
1197 cn->l.n = condition();
1201 cn = vn->r.n = parse_expr(TC_SEQTERM);
1207 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1211 cn->l.n = condition();
1220 /* add node to chain. Return ptr to alloc'd node */
1221 static node *chain_node(uint32_t info)
1226 seq->first = seq->last = new_node(0);
1228 if (seq->programname != g_progname) {
1229 seq->programname = g_progname;
1230 n = chain_node(OC_NEWSOURCE);
1231 n->l.s = xstrdup(g_progname);
1236 seq->last = n->a.n = new_node(OC_DONE);
1241 static void chain_expr(uint32_t info)
1245 n = chain_node(info);
1246 n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1247 if (t_tclass & TC_GRPTERM)
1251 static node *chain_loop(node *nn)
1253 node *n, *n2, *save_brk, *save_cont;
1255 save_brk = break_ptr;
1256 save_cont = continue_ptr;
1258 n = chain_node(OC_BR | Vx);
1259 continue_ptr = new_node(OC_EXEC);
1260 break_ptr = new_node(OC_EXEC);
1262 n2 = chain_node(OC_EXEC | Vx);
1265 continue_ptr->a.n = n2;
1266 break_ptr->a.n = n->r.n = seq->last;
1268 continue_ptr = save_cont;
1269 break_ptr = save_brk;
1274 /* parse group and attach it to chain */
1275 static void chain_group(void)
1281 c = next_token(TC_GRPSEQ);
1282 } while (c & TC_NEWLINE);
1284 if (c & TC_GRPSTART) {
1285 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1286 if (t_tclass & TC_NEWLINE) continue;
1290 } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1292 chain_expr(OC_EXEC | Vx);
1293 } else { /* TC_STATEMNT */
1294 switch (t_info & OPCLSMASK) {
1296 n = chain_node(OC_BR | Vx);
1297 n->l.n = condition();
1299 n2 = chain_node(OC_EXEC);
1301 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1303 n2->a.n = seq->last;
1311 n = chain_loop(NULL);
1316 n2 = chain_node(OC_EXEC);
1317 n = chain_loop(NULL);
1319 next_token(TC_WHILE);
1320 n->l.n = condition();
1324 next_token(TC_SEQSTART);
1325 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1326 if (t_tclass & TC_SEQTERM) { /* for-in */
1327 if ((n2->info & OPCLSMASK) != OC_IN)
1328 syntax_error(EMSG_UNEXP_TOKEN);
1329 n = chain_node(OC_WALKINIT | VV);
1332 n = chain_loop(NULL);
1333 n->info = OC_WALKNEXT | Vx;
1335 } else { /* for (;;) */
1336 n = chain_node(OC_EXEC | Vx);
1338 n2 = parse_expr(TC_SEMICOL);
1339 n3 = parse_expr(TC_SEQTERM);
1349 n = chain_node(t_info);
1350 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1351 if (t_tclass & TC_OUTRDR) {
1353 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1355 if (t_tclass & TC_GRPTERM)
1360 n = chain_node(OC_EXEC);
1365 n = chain_node(OC_EXEC);
1366 n->a.n = continue_ptr;
1369 /* delete, next, nextfile, return, exit */
1376 static void parse_program(char *p)
1385 while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1386 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1388 if (tclass & TC_OPTERM)
1392 if (tclass & TC_BEGIN) {
1396 } else if (tclass & TC_END) {
1400 } else if (tclass & TC_FUNCDECL) {
1401 next_token(TC_FUNCTION);
1403 f = newfunc(t_string);
1404 f->body.first = NULL;
1406 while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1407 v = findvar(ahash, t_string);
1408 v->x.aidx = (f->nargs)++;
1410 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1417 } else if (tclass & TC_OPSEQ) {
1419 cn = chain_node(OC_TEST);
1420 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1421 if (t_tclass & TC_GRPSTART) {
1425 chain_node(OC_PRINT);
1427 cn->r.n = mainseq.last;
1429 } else /* if (tclass & TC_GRPSTART) */ {
1437 /* -------- program execution part -------- */
1439 static node *mk_splitter(const char *s, tsplitter *spl)
1447 if ((n->info & OPCLSMASK) == OC_REGEXP) {
1451 if (strlen(s) > 1) {
1452 mk_re_node(s, n, re);
1454 n->info = (uint32_t) *s;
1460 /* use node as a regular expression. Supplied with node ptr and regex_t
1461 * storage space. Return ptr to regex (if result points to preg, it should
1462 * be later regfree'd manually
1464 static regex_t *as_regex(node *op, regex_t *preg)
1469 if ((op->info & OPCLSMASK) == OC_REGEXP) {
1470 return icase ? op->r.ire : op->l.re;
1473 s = getvar_s(evaluate(op, v));
1474 xregcomp(preg, s, icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED);
1479 /* gradually increasing buffer */
1480 static void qrealloc(char **b, int n, int *size)
1482 if (!*b || n >= *size)
1483 *b = xrealloc(*b, *size = n + (n>>1) + 80);
1486 /* resize field storage space */
1487 static void fsrealloc(int size)
1491 if (size >= maxfields) {
1493 maxfields = size + 16;
1494 Fields = xrealloc(Fields, maxfields * sizeof(var));
1495 for (; i < maxfields; i++) {
1496 Fields[i].type = VF_SPECIAL;
1497 Fields[i].string = NULL;
1501 if (size < nfields) {
1502 for (i = size; i < nfields; i++) {
1509 static int awk_split(const char *s, node *spl, char **slist)
1514 regmatch_t pmatch[2];
1516 /* in worst case, each char would be a separate field */
1517 *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1520 c[0] = c[1] = (char)spl->info;
1522 if (*getvar_s(intvar[RS]) == '\0')
1525 if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */
1527 l = strcspn(s, c+2);
1528 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1529 && pmatch[0].rm_so <= l
1531 l = pmatch[0].rm_so;
1532 if (pmatch[0].rm_eo == 0) {
1537 pmatch[0].rm_eo = l;
1538 if (s[l]) pmatch[0].rm_eo++;
1544 s += pmatch[0].rm_eo;
1547 } else if (c[0] == '\0') { /* null split */
1553 } else if (c[0] != ' ') { /* single-character split */
1555 c[0] = toupper(c[0]);
1556 c[1] = tolower(c[1]);
1559 while ((s1 = strpbrk(s1, c))) {
1563 } else { /* space split */
1565 s = skip_whitespace(s);
1568 while (*s && !isspace(*s))
1576 static void split_f0(void)
1578 #define fstrings (G.split_f0__fstrings)
1589 n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1592 for (i = 0; i < n; i++) {
1593 Fields[i].string = nextword(&s);
1594 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1597 /* set NF manually to avoid side effects */
1599 intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1600 intvar[NF]->number = nfields;
1604 /* perform additional actions when some internal variables changed */
1605 static void handle_special(var *v)
1609 const char *sep, *s;
1610 int sl, l, len, i, bsize;
1612 if (!(v->type & VF_SPECIAL))
1615 if (v == intvar[NF]) {
1616 n = (int)getvar_i(v);
1619 /* recalculate $0 */
1620 sep = getvar_s(intvar[OFS]);
1624 for (i = 0; i < n; i++) {
1625 s = getvar_s(&Fields[i]);
1628 memcpy(b+len, sep, sl);
1631 qrealloc(&b, len+l+sl, &bsize);
1632 memcpy(b+len, s, l);
1637 setvar_p(intvar[F0], b);
1640 } else if (v == intvar[F0]) {
1641 is_f0_split = FALSE;
1643 } else if (v == intvar[FS]) {
1644 mk_splitter(getvar_s(v), &fsplitter);
1646 } else if (v == intvar[RS]) {
1647 mk_splitter(getvar_s(v), &rsplitter);
1649 } else if (v == intvar[IGNORECASE]) {
1653 n = getvar_i(intvar[NF]);
1654 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1655 /* right here v is invalid. Just to note... */
1659 /* step through func/builtin/etc arguments */
1660 static node *nextarg(node **pn)
1665 if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1674 static void hashwalk_init(var *v, xhash *array)
1680 if (v->type & VF_WALK)
1684 w = v->x.walker = xzalloc(2 + 2*sizeof(char *) + array->glen);
1685 w[0] = w[1] = (char *)(w + 2);
1686 for (i = 0; i < array->csize; i++) {
1687 hi = array->items[i];
1689 strcpy(*w, hi->name);
1696 static int hashwalk_next(var *v)
1704 setvar_s(v, nextword(w+1));
1708 /* evaluate node, return 1 when result is true, 0 otherwise */
1709 static int ptest(node *pattern)
1711 /* ptest__v is "static": to save stack space? */
1712 return istrue(evaluate(pattern, &G.ptest__v));
1715 /* read next record from stream rsm into a variable v */
1716 static int awk_getline(rstream *rsm, var *v)
1719 regmatch_t pmatch[2];
1720 int a, p, pp=0, size;
1721 int fd, so, eo, r, rp;
1724 /* we're using our own buffer since we need access to accumulating
1727 fd = fileno(rsm->F);
1732 c = (char) rsplitter.n.info;
1735 if (! m) qrealloc(&m, 256, &size);
1741 if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1742 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1743 b, 1, pmatch, 0) == 0) {
1744 so = pmatch[0].rm_so;
1745 eo = pmatch[0].rm_eo;
1749 } else if (c != '\0') {
1750 s = strchr(b+pp, c);
1751 if (! s) s = memchr(b+pp, '\0', p - pp);
1758 while (b[rp] == '\n')
1760 s = strstr(b+rp, "\n\n");
1763 while (b[eo] == '\n') eo++;
1771 memmove(m, (const void *)(m+a), p+1);
1776 qrealloc(&m, a+p+128, &size);
1779 p += safe_read(fd, b+p, size-p-1);
1783 setvar_i(intvar[ERRNO], errno);
1792 c = b[so]; b[so] = '\0';
1796 c = b[eo]; b[eo] = '\0';
1797 setvar_s(intvar[RT], b+so);
1809 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1813 const char *s = format;
1815 if (int_as_int && n == (int)n) {
1816 r = snprintf(b, size, "%d", (int)n);
1818 do { c = *s; } while (c && *++s);
1819 if (strchr("diouxX", c)) {
1820 r = snprintf(b, size, format, (int)n);
1821 } else if (strchr("eEfgG", c)) {
1822 r = snprintf(b, size, format, n);
1824 syntax_error(EMSG_INV_FMT);
1831 /* formatted output into an allocated buffer, return ptr to buffer */
1832 static char *awk_printf(node *n)
1837 int i, j, incr, bsize;
1842 fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1847 while (*f && (*f != '%' || *(++f) == '%'))
1849 while (*f && !isalpha(*f)) {
1851 syntax_error("%*x formats are not supported");
1855 incr = (f - s) + MAXVARFMT;
1856 qrealloc(&b, incr + i, &bsize);
1861 arg = evaluate(nextarg(&n), v);
1864 if (c == 'c' || !c) {
1865 i += sprintf(b+i, s, is_numeric(arg) ?
1866 (char)getvar_i(arg) : *getvar_s(arg));
1867 } else if (c == 's') {
1869 qrealloc(&b, incr+i+strlen(s1), &bsize);
1870 i += sprintf(b+i, s, s1);
1872 i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1876 /* if there was an error while sprintf, return value is negative */
1880 b = xrealloc(b, i + 1);
1887 /* common substitution routine
1888 * replace (nm) substring of (src) that match (n) with (repl), store
1889 * result into (dest), return number of substitutions. If nm=0, replace
1890 * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1891 * subexpression matching (\1-\9)
1893 static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int ex)
1898 int c, i, j, di, rl, so, eo, nbs, n, dssize;
1899 regmatch_t pmatch[10];
1902 re = as_regex(rn, &sreg);
1903 if (! src) src = intvar[F0];
1904 if (! dest) dest = intvar[F0];
1909 while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0 : REG_NOTBOL) == 0) {
1910 so = pmatch[0].rm_so;
1911 eo = pmatch[0].rm_eo;
1913 qrealloc(&ds, di + eo + rl, &dssize);
1914 memcpy(ds + di, sp, eo);
1920 for (s = repl; *s; s++) {
1926 if (c == '&' || (ex && c >= '0' && c <= '9')) {
1927 di -= ((nbs + 3) >> 1);
1936 n = pmatch[j].rm_eo - pmatch[j].rm_so;
1937 qrealloc(&ds, di + rl + n, &dssize);
1938 memcpy(ds + di, sp + pmatch[j].rm_so, n);
1949 if (! (ds[di++] = *sp++)) break;
1953 qrealloc(&ds, di + strlen(sp), &dssize);
1954 strcpy(ds + di, sp);
1956 if (re == &sreg) regfree(re);
1960 static var *exec_builtin(node *op, var *res)
1962 #define tspl (G.exec_builtin__tspl)
1969 regmatch_t pmatch[2];
1979 isr = info = op->info;
1982 av[2] = av[3] = NULL;
1983 for (i = 0; i < 4 && op; i++) {
1984 an[i] = nextarg(&op);
1985 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
1986 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
1991 if (nargs < (info >> 30))
1992 syntax_error(EMSG_TOO_FEW_ARGS);
1994 switch (info & OPNMASK) {
1997 #if ENABLE_FEATURE_AWK_MATH
1998 setvar_i(res, atan2(getvar_i(av[i]), getvar_i(av[1])));
2000 syntax_error(EMSG_NO_MATH);
2006 spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2007 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2012 n = awk_split(as[0], spl, &s);
2014 clear_array(iamarray(av[1]));
2015 for (i=1; i<=n; i++)
2016 setari_u(av[1], i, nextword(&s1));
2023 i = getvar_i(av[1]) - 1;
2026 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2029 strncpy(s, as[0]+i, n);
2035 setvar_i(res, (long)getvar_i(av[0]) & (long)getvar_i(av[1]));
2039 setvar_i(res, ~(long)getvar_i(av[0]));
2043 setvar_i(res, (long)getvar_i(av[0]) << (long)getvar_i(av[1]));
2047 setvar_i(res, (long)getvar_i(av[0]) | (long)getvar_i(av[1]));
2051 setvar_i(res, (long)((unsigned long)getvar_i(av[0]) >> (unsigned long)getvar_i(av[1])));
2055 setvar_i(res, (long)getvar_i(av[0]) ^ (long)getvar_i(av[1]));
2065 s1 = s = xstrdup(as[0]);
2067 *s1 = (*to_xxx)(*s1);
2076 l = strlen(as[0]) - ll;
2077 if (ll > 0 && l >= 0) {
2079 s = strstr(as[0], as[1]);
2080 if (s) n = (s - as[0]) + 1;
2082 /* this piece of code is terribly slow and
2083 * really should be rewritten
2085 for (i=0; i<=l; i++) {
2086 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2098 tt = getvar_i(av[1]);
2101 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2102 i = strftime(g_buf, MAXVARFMT,
2103 ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2106 setvar_s(res, g_buf);
2110 re = as_regex(an[1], &sreg);
2111 n = regexec(re, as[0], 1, pmatch, 0);
2116 pmatch[0].rm_so = 0;
2117 pmatch[0].rm_eo = -1;
2119 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2120 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2121 setvar_i(res, pmatch[0].rm_so);
2122 if (re == &sreg) regfree(re);
2126 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2130 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2134 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2144 * Evaluate node - the heart of the program. Supplied with subtree
2145 * and place where to store result. returns ptr to result.
2147 #define XC(n) ((n) >> 8)
2149 static var *evaluate(node *op, var *res)
2151 /* This procedure is recursive so we should count every byte */
2152 #define fnargs (G.evaluate__fnargs)
2153 /* seed is initialized to 1 */
2154 #define seed (G.evaluate__seed)
2155 #define sreg (G.evaluate__sreg)
2177 return setvar_s(res, NULL);
2183 opn = (opinfo & OPNMASK);
2184 g_lineno = op->lineno;
2186 /* execute inevitable things */
2188 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2189 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2190 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2191 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2192 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2194 switch (XC(opinfo & OPCLSMASK)) {
2196 /* -- iterative node type -- */
2200 if ((op1->info & OPCLSMASK) == OC_COMMA) {
2201 /* it's range pattern */
2202 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2203 op->info |= OF_CHECKED;
2204 if (ptest(op1->r.n))
2205 op->info &= ~OF_CHECKED;
2212 op = (ptest(op1)) ? op->a.n : op->r.n;
2216 /* just evaluate an expression, also used as unconditional jump */
2220 /* branch, used in if-else and various loops */
2222 op = istrue(L.v) ? op->a.n : op->r.n;
2225 /* initialize for-in loop */
2226 case XC( OC_WALKINIT ):
2227 hashwalk_init(L.v, iamarray(R.v));
2230 /* get next array item */
2231 case XC( OC_WALKNEXT ):
2232 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2235 case XC( OC_PRINT ):
2236 case XC( OC_PRINTF ):
2239 X.rsm = newfile(R.s);
2242 X.rsm->F = popen(R.s, "w");
2243 if (X.rsm->F == NULL)
2244 bb_perror_msg_and_die("popen");
2247 X.rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2253 if ((opinfo & OPCLSMASK) == OC_PRINT) {
2255 fputs(getvar_s(intvar[F0]), X.F);
2258 L.v = evaluate(nextarg(&op1), v1);
2259 if (L.v->type & VF_NUMBER) {
2260 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2261 getvar_i(L.v), TRUE);
2264 fputs(getvar_s(L.v), X.F);
2267 if (op1) fputs(getvar_s(intvar[OFS]), X.F);
2270 fputs(getvar_s(intvar[ORS]), X.F);
2272 } else { /* OC_PRINTF */
2273 L.s = awk_printf(op1);
2280 case XC( OC_DELETE ):
2281 X.info = op1->info & OPCLSMASK;
2282 if (X.info == OC_VAR) {
2284 } else if (X.info == OC_FNARG) {
2285 R.v = &fnargs[op1->l.i];
2287 syntax_error(EMSG_NOT_ARRAY);
2292 L.s = getvar_s(evaluate(op1->r.n, v1));
2293 hash_remove(iamarray(R.v), L.s);
2295 clear_array(iamarray(R.v));
2299 case XC( OC_NEWSOURCE ):
2300 g_progname = op->l.s;
2303 case XC( OC_RETURN ):
2307 case XC( OC_NEXTFILE ):
2318 /* -- recursive node type -- */
2322 if (L.v == intvar[NF])
2326 case XC( OC_FNARG ):
2327 L.v = &fnargs[op->l.i];
2329 res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2333 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2336 case XC( OC_REGEXP ):
2338 L.s = getvar_s(intvar[F0]);
2341 case XC( OC_MATCH ):
2344 X.re = as_regex(op1, &sreg);
2345 R.i = regexec(X.re, L.s, 0, NULL, 0);
2346 if (X.re == &sreg) regfree(X.re);
2347 setvar_i(res, (R.i == 0 ? 1 : 0) ^ (opn == '!' ? 1 : 0));
2351 /* if source is a temporary string, jusk relink it to dest */
2352 if (R.v == v1+1 && R.v->string) {
2353 res = setvar_p(L.v, R.v->string);
2356 res = copyvar(L.v, R.v);
2360 case XC( OC_TERNARY ):
2361 if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2362 syntax_error(EMSG_POSSIBLE_ERROR);
2363 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2367 if (!op->r.f->body.first)
2368 syntax_error(EMSG_UNDEF_FUNC);
2370 X.v = R.v = nvalloc(op->r.f->nargs+1);
2372 L.v = evaluate(nextarg(&op1), v1);
2374 R.v->type |= VF_CHILD;
2375 R.v->x.parent = L.v;
2376 if (++R.v - X.v >= op->r.f->nargs)
2384 res = evaluate(op->r.f->body.first, res);
2391 case XC( OC_GETLINE ):
2392 case XC( OC_PGETLINE ):
2394 X.rsm = newfile(L.s);
2396 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2397 X.rsm->F = popen(L.s, "r");
2398 X.rsm->is_pipe = TRUE;
2400 X.rsm->F = fopen(L.s, "r"); /* not xfopen! */
2404 if (!iF) iF = next_input_file();
2409 setvar_i(intvar[ERRNO], errno);
2417 L.i = awk_getline(X.rsm, R.v);
2420 incvar(intvar[FNR]);
2427 /* simple builtins */
2428 case XC( OC_FBLTIN ):
2436 R.d = (double)rand() / (double)RAND_MAX;
2438 #if ENABLE_FEATURE_AWK_MATH
2464 syntax_error(EMSG_NO_MATH);
2469 seed = op1 ? (unsigned)L.d : (unsigned)time(NULL);
2479 L.s = getvar_s(intvar[F0]);
2485 R.d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2486 ? (system(L.s) >> 8) : 0;
2494 X.rsm = newfile(L.s);
2503 X.rsm = (rstream *)hash_search(fdhash, L.s);
2505 R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
2506 free(X.rsm->buffer);
2507 hash_remove(fdhash, L.s);
2510 setvar_i(intvar[ERRNO], errno);
2517 case XC( OC_BUILTIN ):
2518 res = exec_builtin(op, res);
2521 case XC( OC_SPRINTF ):
2522 setvar_p(res, awk_printf(op1));
2525 case XC( OC_UNARY ):
2527 L.d = R.d = getvar_i(R.v);
2542 L.d = istrue(X.v) ? 0 : 1;
2553 case XC( OC_FIELD ):
2554 R.i = (int)getvar_i(R.v);
2561 res = &Fields[R.i - 1];
2565 /* concatenation (" ") and index joining (",") */
2566 case XC( OC_CONCAT ):
2567 case XC( OC_COMMA ):
2568 opn = strlen(L.s) + strlen(R.s) + 2;
2571 if ((opinfo & OPCLSMASK) == OC_COMMA) {
2572 L.s = getvar_s(intvar[SUBSEP]);
2573 X.s = xrealloc(X.s, opn + strlen(L.s));
2581 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2585 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2588 case XC( OC_BINARY ):
2589 case XC( OC_REPLACE ):
2590 R.d = getvar_i(R.v);
2602 if (R.d == 0) syntax_error(EMSG_DIV_BY_ZERO);
2606 #if ENABLE_FEATURE_AWK_MATH
2607 L.d = pow(L.d, R.d);
2609 syntax_error(EMSG_NO_MATH);
2613 if (R.d == 0) syntax_error(EMSG_DIV_BY_ZERO);
2614 L.d -= (int)(L.d / R.d) * R.d;
2617 res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2620 case XC( OC_COMPARE ):
2621 if (is_numeric(L.v) && is_numeric(R.v)) {
2622 L.d = getvar_i(L.v) - getvar_i(R.v);
2624 L.s = getvar_s(L.v);
2625 R.s = getvar_s(R.v);
2626 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2628 switch (opn & 0xfe) {
2639 setvar_i(res, (opn & 0x1 ? R.i : !R.i) ? 1 : 0);
2643 syntax_error(EMSG_POSSIBLE_ERROR);
2645 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2647 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2660 /* -------- main & co. -------- */
2662 static int awk_exit(int r)
2673 evaluate(endseq.first, &tv);
2676 /* waiting for children */
2677 for (i = 0; i < fdhash->csize; i++) {
2678 hi = fdhash->items[i];
2680 if (hi->data.rs.F && hi->data.rs.is_pipe)
2681 pclose(hi->data.rs.F);
2689 /* if expr looks like "var=value", perform assignment and return 1,
2690 * otherwise return 0 */
2691 static int is_assignment(const char *expr)
2693 char *exprc, *s, *s0, *s1;
2695 exprc = xstrdup(expr);
2696 if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2704 *(s1++) = nextchar(&s);
2707 setvar_u(newvar(exprc), s0);
2712 /* switch to next input file */
2713 static rstream *next_input_file(void)
2715 #define rsm (G.next_input_file__rsm)
2716 #define files_happen (G.next_input_file__files_happen)
2719 const char *fname, *ind;
2721 if (rsm.F) fclose(rsm.F);
2723 rsm.pos = rsm.adv = 0;
2726 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
2732 ind = getvar_s(incvar(intvar[ARGIND]));
2733 fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
2734 if (fname && *fname && !is_assignment(fname))
2735 F = afopen(fname, "r");
2739 files_happen = TRUE;
2740 setvar_s(intvar[FILENAME], fname);
2747 int awk_main(int argc, char **argv);
2748 int awk_main(int argc, char **argv)
2751 char *opt_F, *opt_W;
2752 llist_t *opt_v = NULL;
2757 char *vnames = (char *)vNames; /* cheat */
2758 char *vvalues = (char *)vValues;
2762 /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
2763 * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
2764 if (ENABLE_LOCALE_SUPPORT)
2765 setlocale(LC_NUMERIC, "C");
2769 /* allocate global buffer */
2770 g_buf = xmalloc(MAXVARFMT + 1);
2772 vhash = hash_init();
2773 ahash = hash_init();
2774 fdhash = hash_init();
2775 fnhash = hash_init();
2777 /* initialize variables */
2778 for (i = 0; *vnames; i++) {
2779 intvar[i] = v = newvar(nextword(&vnames));
2780 if (*vvalues != '\377')
2781 setvar_s(v, nextword(&vvalues));
2785 if (*vnames == '*') {
2786 v->type |= VF_SPECIAL;
2791 handle_special(intvar[FS]);
2792 handle_special(intvar[RS]);
2794 newfile("/dev/stdin")->F = stdin;
2795 newfile("/dev/stdout")->F = stdout;
2796 newfile("/dev/stderr")->F = stderr;
2798 /* Huh, people report that sometimes environ is NULL. Oh well. */
2799 if (environ) for (envp = environ; *envp; envp++) {
2800 char *s = xstrdup(*envp);
2801 char *s1 = strchr(s, '=');
2804 setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1);
2808 opt_complementary = "v::";
2809 opt = getopt32(argc, argv, "F:v:f:W:", &opt_F, &opt_v, &g_progname, &opt_W);
2813 setvar_s(intvar[FS], opt_F); // -F
2814 while (opt_v) { /* -v */
2815 if (!is_assignment(llist_pop(&opt_v)))
2818 if (opt & 0x4) { // -f
2819 char *s = s; /* die, gcc, die */
2820 FILE *from_file = afopen(g_progname, "r");
2821 /* one byte is reserved for some trick in next_token */
2822 if (fseek(from_file, 0, SEEK_END) == 0) {
2823 flen = ftell(from_file);
2824 s = xmalloc(flen + 4);
2825 fseek(from_file, 0, SEEK_SET);
2826 i = 1 + fread(s + 1, 1, flen, from_file);
2828 for (i = j = 1; j > 0; i += j) {
2829 s = xrealloc(s, i + 4096);
2830 j = fread(s + i, 1, 4094, from_file);
2835 parse_program(s + 1);
2837 } else { // no -f: take program from 1st parameter
2840 g_progname = "cmd. line";
2841 parse_program(*argv++);
2844 if (opt & 0x8) // -W
2845 bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
2847 /* fill in ARGV array */
2848 setvar_i(intvar[ARGC], argc + 1);
2849 setari_u(intvar[ARGV], 0, "awk");
2852 setari_u(intvar[ARGV], ++i, *argv++);
2854 evaluate(beginseq.first, &tv);
2855 if (!mainseq.first && !endseq.first)
2856 awk_exit(EXIT_SUCCESS);
2858 /* input file could already be opened in BEGIN block */
2859 if (!iF) iF = next_input_file();
2861 /* passing through input files */
2864 setvar_i(intvar[FNR], 0);
2866 while ((i = awk_getline(iF, intvar[F0])) > 0) {
2869 incvar(intvar[FNR]);
2870 evaluate(mainseq.first, &tv);
2877 syntax_error(strerror(errno));
2879 iF = next_input_file();
2882 awk_exit(EXIT_SUCCESS);