1 /* vi: set sw=4 ts=4: */
3 * awk implementation for busybox
5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
7 * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
13 extern char **environ;
15 /* This is a NOEXEC applet. Be very careful! */
22 #define VF_NUMBER 0x0001 /* 1 = primary type is number */
23 #define VF_ARRAY 0x0002 /* 1 = it's an array */
25 #define VF_CACHED 0x0100 /* 1 = num/str value has cached str/num eq */
26 #define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
27 #define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
28 #define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
29 #define VF_FSTR 0x1000 /* 1 = var::string points to fstring buffer */
30 #define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
31 #define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
33 /* these flags are static, don't change them when value is changed */
34 #define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
37 typedef struct var_s {
38 unsigned type; /* flags */
42 int aidx; /* func arg idx (for compilation stage) */
43 struct xhash_s *array; /* array ptr */
44 struct var_s *parent; /* for func args, ptr to actual parameter */
45 char **walker; /* list of array elements (for..in) */
49 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
50 typedef struct chain_s {
53 const char *programname;
57 typedef struct func_s {
63 typedef struct rstream_s {
72 typedef struct hash_item_s {
74 struct var_s v; /* variable/array hash */
75 struct rstream_s rs; /* redirect streams hash */
76 struct func_s f; /* functions hash */
78 struct hash_item_s *next; /* next in chain */
79 char name[1]; /* really it's longer */
82 typedef struct xhash_s {
83 unsigned nel; /* num of elements */
84 unsigned csize; /* current hash size */
85 unsigned nprime; /* next hash size in PRIMES[] */
86 unsigned glen; /* summary length of item names */
87 struct hash_item_s **items;
91 typedef struct node_s {
112 /* Block of temporary variables */
113 typedef struct nvblock_s {
116 struct nvblock_s *prev;
117 struct nvblock_s *next;
121 typedef struct tsplitter_s {
126 /* simple token classes */
127 /* Order and hex values are very important!!! See next_token() */
128 #define TC_SEQSTART 1 /* ( */
129 #define TC_SEQTERM (1 << 1) /* ) */
130 #define TC_REGEXP (1 << 2) /* /.../ */
131 #define TC_OUTRDR (1 << 3) /* | > >> */
132 #define TC_UOPPOST (1 << 4) /* unary postfix operator */
133 #define TC_UOPPRE1 (1 << 5) /* unary prefix operator */
134 #define TC_BINOPX (1 << 6) /* two-opnd operator */
135 #define TC_IN (1 << 7)
136 #define TC_COMMA (1 << 8)
137 #define TC_PIPE (1 << 9) /* input redirection pipe */
138 #define TC_UOPPRE2 (1 << 10) /* unary prefix operator */
139 #define TC_ARRTERM (1 << 11) /* ] */
140 #define TC_GRPSTART (1 << 12) /* { */
141 #define TC_GRPTERM (1 << 13) /* } */
142 #define TC_SEMICOL (1 << 14)
143 #define TC_NEWLINE (1 << 15)
144 #define TC_STATX (1 << 16) /* ctl statement (for, next...) */
145 #define TC_WHILE (1 << 17)
146 #define TC_ELSE (1 << 18)
147 #define TC_BUILTIN (1 << 19)
148 #define TC_GETLINE (1 << 20)
149 #define TC_FUNCDECL (1 << 21) /* `function' `func' */
150 #define TC_BEGIN (1 << 22)
151 #define TC_END (1 << 23)
152 #define TC_EOF (1 << 24)
153 #define TC_VARIABLE (1 << 25)
154 #define TC_ARRAY (1 << 26)
155 #define TC_FUNCTION (1 << 27)
156 #define TC_STRING (1 << 28)
157 #define TC_NUMBER (1 << 29)
159 #define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
161 /* combined token classes */
162 #define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
163 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
164 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
165 | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
167 #define TC_STATEMNT (TC_STATX | TC_WHILE)
168 #define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
170 /* word tokens, cannot mean something else if not expected */
171 #define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
172 | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
174 /* discard newlines after these */
175 #define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
176 | TC_BINOP | TC_OPTERM)
178 /* what can expression begin with */
179 #define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
180 /* what can group begin with */
181 #define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
183 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
184 /* operator is inserted between them */
185 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
186 | TC_STRING | TC_NUMBER | TC_UOPPOST)
187 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
189 #define OF_RES1 0x010000
190 #define OF_RES2 0x020000
191 #define OF_STR1 0x040000
192 #define OF_STR2 0x080000
193 #define OF_NUM1 0x100000
194 #define OF_CHECKED 0x200000
196 /* combined operator flags */
199 #define xS (OF_RES2 | OF_STR2)
201 #define VV (OF_RES1 | OF_RES2)
202 #define Nx (OF_RES1 | OF_NUM1)
203 #define NV (OF_RES1 | OF_NUM1 | OF_RES2)
204 #define Sx (OF_RES1 | OF_STR1)
205 #define SV (OF_RES1 | OF_STR1 | OF_RES2)
206 #define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
208 #define OPCLSMASK 0xFF00
209 #define OPNMASK 0x007F
211 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
212 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
213 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
215 #define P(x) (x << 24)
216 #define PRIMASK 0x7F000000
217 #define PRIMASK2 0x7E000000
219 /* Operation classes */
221 #define SHIFT_TIL_THIS 0x0600
222 #define RECUR_FROM_THIS 0x1000
225 OC_DELETE = 0x0100, OC_EXEC = 0x0200, OC_NEWSOURCE = 0x0300,
226 OC_PRINT = 0x0400, OC_PRINTF = 0x0500, OC_WALKINIT = 0x0600,
228 OC_BR = 0x0700, OC_BREAK = 0x0800, OC_CONTINUE = 0x0900,
229 OC_EXIT = 0x0a00, OC_NEXT = 0x0b00, OC_NEXTFILE = 0x0c00,
230 OC_TEST = 0x0d00, OC_WALKNEXT = 0x0e00,
232 OC_BINARY = 0x1000, OC_BUILTIN = 0x1100, OC_COLON = 0x1200,
233 OC_COMMA = 0x1300, OC_COMPARE = 0x1400, OC_CONCAT = 0x1500,
234 OC_FBLTIN = 0x1600, OC_FIELD = 0x1700, OC_FNARG = 0x1800,
235 OC_FUNC = 0x1900, OC_GETLINE = 0x1a00, OC_IN = 0x1b00,
236 OC_LAND = 0x1c00, OC_LOR = 0x1d00, OC_MATCH = 0x1e00,
237 OC_MOVE = 0x1f00, OC_PGETLINE = 0x2000, OC_REGEXP = 0x2100,
238 OC_REPLACE = 0x2200, OC_RETURN = 0x2300, OC_SPRINTF = 0x2400,
239 OC_TERNARY = 0x2500, OC_UNARY = 0x2600, OC_VAR = 0x2700,
242 ST_IF = 0x3000, ST_DO = 0x3100, ST_FOR = 0x3200,
246 /* simple builtins */
248 F_in, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
249 F_ti, F_le, F_sy, F_ff, F_cl
254 B_a2, B_ix, B_ma, B_sp, B_ss, B_ti, B_lo, B_up,
256 B_an, B_co, B_ls, B_or, B_rs, B_xo,
259 /* tokens and their corresponding info values */
261 #define NTC "\377" /* switch to next token class (tc<<1) */
264 #define OC_B OC_BUILTIN
266 static const char tokenlist[] =
269 "\1/" NTC /* REGEXP */
270 "\2>>" "\1>" "\1|" NTC /* OUTRDR */
271 "\2++" "\2--" NTC /* UOPPOST */
272 "\2++" "\2--" "\1$" NTC /* UOPPRE1 */
273 "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */
274 "\2*=" "\2/=" "\2%=" "\2^="
275 "\1+" "\1-" "\3**=" "\2**"
276 "\1/" "\1%" "\1^" "\1*"
277 "\2!=" "\2>=" "\2<=" "\1>"
278 "\1<" "\2!~" "\1~" "\2&&"
279 "\2||" "\1?" "\1:" NTC
283 "\1+" "\1-" "\1!" NTC /* UOPPRE2 */
289 "\2if" "\2do" "\3for" "\5break" /* STATX */
290 "\10continue" "\6delete" "\5print"
291 "\6printf" "\4next" "\10nextfile"
292 "\6return" "\4exit" NTC
296 "\3and" "\5compl" "\6lshift" "\2or"
298 "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */
299 "\3cos" "\3exp" "\3int" "\3log"
300 "\4rand" "\3sin" "\4sqrt" "\5srand"
301 "\6gensub" "\4gsub" "\5index" "\6length"
302 "\5match" "\5split" "\7sprintf" "\3sub"
303 "\6substr" "\7systime" "\10strftime"
304 "\7tolower" "\7toupper" NTC
306 "\4func" "\10function" NTC
311 static const uint32_t tokeninfo[] = {
315 xS|'a', xS|'w', xS|'|',
316 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
317 OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M',
319 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74),
320 OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
321 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/',
322 OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
323 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-',
324 OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
325 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%',
326 OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
327 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3,
328 OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
329 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!',
330 OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
331 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?',
332 OC_COLON|xx|P(67)|':',
335 OC_PGETLINE|SV|P(37),
336 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-',
337 OC_UNARY|xV|P(19)|'!',
343 ST_IF, ST_DO, ST_FOR, OC_BREAK,
344 OC_CONTINUE, OC_DELETE|Vx, OC_PRINT,
345 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
346 OC_RETURN|Vx, OC_EXIT|Nx,
350 OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
351 OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
352 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
353 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
354 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
355 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
356 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
357 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b),
358 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
365 /* internal variable names and their initial values */
366 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
368 CONVFMT, OFMT, FS, OFS,
369 ORS, RS, RT, FILENAME,
370 SUBSEP, ARGIND, ARGC, ARGV,
373 ENVIRON, F0, NUM_INTERNAL_VARS
376 static const char vNames[] =
377 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
378 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
379 "SUBSEP\0" "ARGIND\0" "ARGC\0" "ARGV\0"
381 "NR\0" "NF\0*" "IGNORECASE\0*"
382 "ENVIRON\0" "$\0*" "\0";
384 static const char vValues[] =
385 "%.6g\0" "%.6g\0" " \0" " \0"
386 "\n\0" "\n\0" "\0" "\0"
390 /* hash size may grow to these values */
391 #define FIRST_PRIME 61;
392 static const unsigned PRIMES[] = { 251, 1021, 4093, 16381, 65521 };
393 enum { NPRIMES = sizeof(PRIMES) / sizeof(PRIMES[0]) };
398 /* former 'struct t' */
399 uint32_t t_info; /* often used */
411 smallint is_f0_split;
412 chain beginseq, mainseq, endseq, *seq;
413 node *break_ptr, *continue_ptr;
415 xhash *vhash, *ahash, *fdhash, *fnhash;
416 const char *g_progname;
419 int maxfields; /* used in fsrealloc() only */
425 /* former statics from various functions */
426 char *split_f0__fstrings;
428 rstream next_input_file__rsm;
429 smallint next_input_file__files_happen;
431 smallint next_token__concat_inserted;
432 uint32_t next_token__save_tclass;
433 uint32_t next_token__save_info;
434 uint32_t next_token__ltclass;
436 var *evaluate__fnargs;
437 unsigned evaluate__seed;
438 regex_t evaluate__sreg;
442 tsplitter exec_builtin__tspl;
444 /* biggest members go last */
445 var *intvar[NUM_INTERNAL_VARS];
446 tsplitter fsplitter, rsplitter;
448 #define G (*ptr_to_globals)
450 /* char Gsize[sizeof(G)]; ~0x240 */
451 /* Trying to keep most of members accessible with short offsets: */
452 /* char Gofs_seed[offsetof(struct globals, evaluate__seed)]; ~0xc0 */
453 #define t_info (G.t_info )
454 #define t_tclass (G.t_tclass )
455 #define t_string (G.t_string )
456 #define t_double (G.t_double )
457 #define t_lineno (G.t_lineno )
458 #define t_rollback (G.t_rollback )
459 #define icase (G.icase )
460 #define exiting (G.exiting )
461 #define nextrec (G.nextrec )
462 #define nextfile (G.nextfile )
463 #define is_f0_split (G.is_f0_split )
464 #define beginseq (G.beginseq )
465 #define mainseq (G.mainseq )
466 #define endseq (G.endseq )
468 #define break_ptr (G.break_ptr )
469 #define continue_ptr (G.continue_ptr)
471 #define vhash (G.vhash )
472 #define ahash (G.ahash )
473 #define fdhash (G.fdhash )
474 #define fnhash (G.fnhash )
475 #define g_progname (G.g_progname )
476 #define g_lineno (G.g_lineno )
477 #define nfields (G.nfields )
478 #define maxfields (G.maxfields )
479 #define Fields (G.Fields )
480 #define g_cb (G.g_cb )
481 #define g_pos (G.g_pos )
482 #define g_buf (G.g_buf )
483 #define intvar (G.intvar )
484 #define fsplitter (G.fsplitter )
485 #define rsplitter (G.rsplitter )
486 #define INIT_G() do { \
487 PTR_TO_GLOBALS = xzalloc(sizeof(G)); \
488 G.next_token__ltclass = TC_OPTERM; \
489 G.evaluate__seed = 1; \
493 /* function prototypes */
494 static void handle_special(var *);
495 static node *parse_expr(uint32_t);
496 static void chain_group(void);
497 static var *evaluate(node *, var *);
498 static rstream *next_input_file(void);
499 static int fmt_num(char *, int, const char *, double, int);
500 static int awk_exit(int) ATTRIBUTE_NORETURN;
502 /* ---- error handling ---- */
504 static const char EMSG_INTERNAL_ERROR[] = "Internal error";
505 static const char EMSG_UNEXP_EOS[] = "Unexpected end of string";
506 static const char EMSG_UNEXP_TOKEN[] = "Unexpected token";
507 static const char EMSG_DIV_BY_ZERO[] = "Division by zero";
508 static const char EMSG_INV_FMT[] = "Invalid format specifier";
509 static const char EMSG_TOO_FEW_ARGS[] = "Too few arguments for builtin";
510 static const char EMSG_NOT_ARRAY[] = "Not an array";
511 static const char EMSG_POSSIBLE_ERROR[] = "Possible syntax error";
512 static const char EMSG_UNDEF_FUNC[] = "Call to undefined function";
513 #if !ENABLE_FEATURE_AWK_MATH
514 static const char EMSG_NO_MATH[] = "Math support is not compiled in";
517 static void zero_out_var(var * vp)
519 memset(vp, 0, sizeof(*vp));
522 static void syntax_error(const char * const message) ATTRIBUTE_NORETURN;
523 static void syntax_error(const char * const message)
525 bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
528 /* ---- hash stuff ---- */
530 static unsigned hashidx(const char *name)
534 while (*name) idx = *name++ + (idx << 6) - idx;
538 /* create new hash */
539 static xhash *hash_init(void)
543 newhash = xzalloc(sizeof(xhash));
544 newhash->csize = FIRST_PRIME;
545 newhash->items = xzalloc(newhash->csize * sizeof(hash_item *));
550 /* find item in hash, return ptr to data, NULL if not found */
551 static void *hash_search(xhash *hash, const char *name)
555 hi = hash->items [ hashidx(name) % hash->csize ];
557 if (strcmp(hi->name, name) == 0)
564 /* grow hash if it becomes too big */
565 static void hash_rebuild(xhash *hash)
567 unsigned newsize, i, idx;
568 hash_item **newitems, *hi, *thi;
570 if (hash->nprime == NPRIMES)
573 newsize = PRIMES[hash->nprime++];
574 newitems = xzalloc(newsize * sizeof(hash_item *));
576 for (i = 0; i < hash->csize; i++) {
581 idx = hashidx(thi->name) % newsize;
582 thi->next = newitems[idx];
588 hash->csize = newsize;
589 hash->items = newitems;
592 /* find item in hash, add it if necessary. Return ptr to data */
593 static void *hash_find(xhash *hash, const char *name)
599 hi = hash_search(hash, name);
601 if (++hash->nel / hash->csize > 10)
604 l = strlen(name) + 1;
605 hi = xzalloc(sizeof(hash_item) + l);
606 memcpy(hi->name, name, l);
608 idx = hashidx(name) % hash->csize;
609 hi->next = hash->items[idx];
610 hash->items[idx] = hi;
616 #define findvar(hash, name) ((var*) hash_find((hash), (name)))
617 #define newvar(name) ((var*) hash_find(vhash, (name)))
618 #define newfile(name) ((rstream*)hash_find(fdhash, (name)))
619 #define newfunc(name) ((func*) hash_find(fnhash, (name)))
621 static void hash_remove(xhash *hash, const char *name)
623 hash_item *hi, **phi;
625 phi = &(hash->items[hashidx(name) % hash->csize]);
628 if (strcmp(hi->name, name) == 0) {
629 hash->glen -= (strlen(name) + 1);
639 /* ------ some useful functions ------ */
641 static void skip_spaces(char **s)
646 if (*p == '\\' && p[1] == '\n') {
649 } else if (*p != ' ' && *p != '\t') {
657 static char *nextword(char **s)
661 while (*(*s)++) /* */;
666 static char nextchar(char **s)
672 if (c == '\\') c = bb_process_escape_sequence((const char**)s);
673 if (c == '\\' && *s == pps) c = *((*s)++);
677 static int ATTRIBUTE_ALWAYS_INLINE isalnum_(int c)
679 return (isalnum(c) || c == '_');
682 static FILE *afopen(const char *path, const char *mode)
684 return (*path == '-' && *(path+1) == '\0') ? stdin : xfopen(path, mode);
687 /* -------- working with variables (set/get/copy/etc) -------- */
689 static xhash *iamarray(var *v)
693 while (a->type & VF_CHILD)
696 if (!(a->type & VF_ARRAY)) {
698 a->x.array = hash_init();
703 static void clear_array(xhash *array)
708 for (i = 0; i < array->csize; i++) {
709 hi = array->items[i];
713 free(thi->data.v.string);
716 array->items[i] = NULL;
718 array->glen = array->nel = 0;
721 /* clear a variable */
722 static var *clrvar(var *v)
724 if (!(v->type & VF_FSTR))
727 v->type &= VF_DONTTOUCH;
733 /* assign string value to variable */
734 static var *setvar_p(var *v, char *value)
742 /* same as setvar_p but make a copy of string */
743 static var *setvar_s(var *v, const char *value)
745 return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
748 /* same as setvar_s but set USER flag */
749 static var *setvar_u(var *v, const char *value)
756 /* set array element to user string */
757 static void setari_u(var *a, int idx, const char *s)
759 char sidx[sizeof(int)*3 + 1];
762 sprintf(sidx, "%d", idx);
763 v = findvar(iamarray(a), sidx);
767 /* assign numeric value to variable */
768 static var *setvar_i(var *v, double value)
771 v->type |= VF_NUMBER;
777 static const char *getvar_s(var *v)
779 /* if v is numeric and has no cached string, convert it to string */
780 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
781 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
782 v->string = xstrdup(g_buf);
783 v->type |= VF_CACHED;
785 return (v->string == NULL) ? "" : v->string;
788 static double getvar_i(var *v)
792 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
796 v->number = strtod(s, &s);
797 if (v->type & VF_USER) {
805 v->type |= VF_CACHED;
810 static var *copyvar(var *dest, const var *src)
814 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
815 dest->number = src->number;
817 dest->string = xstrdup(src->string);
819 handle_special(dest);
823 static var *incvar(var *v)
825 return setvar_i(v, getvar_i(v)+1.);
828 /* return true if v is number or numeric string */
829 static int is_numeric(var *v)
832 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
835 /* return 1 when value of v corresponds to true, 0 otherwise */
836 static int istrue(var *v)
839 return (v->number == 0) ? 0 : 1;
840 return (v->string && *(v->string)) ? 1 : 0;
843 /* temporary variables allocator. Last allocated should be first freed */
844 static var *nvalloc(int n)
852 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size) break;
857 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
858 g_cb = xmalloc(sizeof(nvblock) + size * sizeof(var));
860 g_cb->pos = g_cb->nv;
863 if (pb) pb->next = g_cb;
869 while (v < g_cb->pos) {
878 static void nvfree(var *v)
882 if (v < g_cb->nv || v >= g_cb->pos)
883 syntax_error(EMSG_INTERNAL_ERROR);
885 for (p = v; p < g_cb->pos; p++) {
886 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
887 clear_array(iamarray(p));
888 free(p->x.array->items);
891 if (p->type & VF_WALK)
898 while (g_cb->prev && g_cb->pos == g_cb->nv) {
903 /* ------- awk program text parsing ------- */
905 /* Parse next token pointed by global pos, place results into global ttt.
906 * If token isn't expected, give away. Return token class
908 static uint32_t next_token(uint32_t expected)
910 #define concat_inserted (G.next_token__concat_inserted)
911 #define save_tclass (G.next_token__save_tclass)
912 #define save_info (G.next_token__save_info)
913 /* Initialized to TC_OPTERM: */
914 #define ltclass (G.next_token__ltclass)
925 } else if (concat_inserted) {
926 concat_inserted = FALSE;
927 t_tclass = save_tclass;
936 while (*p != '\n' && *p != '\0')
945 } else if (*p == '\"') {
949 if (*p == '\0' || *p == '\n')
950 syntax_error(EMSG_UNEXP_EOS);
951 *(s++) = nextchar(&p);
957 } else if ((expected & TC_REGEXP) && *p == '/') {
961 if (*p == '\0' || *p == '\n')
962 syntax_error(EMSG_UNEXP_EOS);
966 *(s-1) = bb_process_escape_sequence((const char **)&p);
977 } else if (*p == '.' || isdigit(*p)) {
979 t_double = strtod(p, &p);
981 syntax_error(EMSG_UNEXP_TOKEN);
985 /* search for something known */
995 /* if token class is expected, token
996 * matches and it's not a longer word,
997 * then this is what we are looking for
999 if ((tc & (expected | TC_WORD | TC_NEWLINE))
1000 && *tl == *p && strncmp(p, tl, l) == 0
1001 && !((tc & TC_WORD) && isalnum_(p[l]))
1012 /* it's a name (var/array/function),
1013 * otherwise it's something wrong
1016 syntax_error(EMSG_UNEXP_TOKEN);
1019 while (isalnum_(*(++p))) {
1024 /* also consume whitespace between functionname and bracket */
1025 if (!(expected & TC_VARIABLE))
1039 /* skipping newlines in some cases */
1040 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1043 /* insert concatenation operator when needed */
1044 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1045 concat_inserted = TRUE;
1049 t_info = OC_CONCAT | SS | P(35);
1056 /* Are we ready for this? */
1057 if (!(ltclass & expected))
1058 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1059 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1062 #undef concat_inserted
1068 static void rollback_token(void)
1073 static node *new_node(uint32_t info)
1077 n = xzalloc(sizeof(node));
1079 n->lineno = g_lineno;
1083 static node *mk_re_node(const char *s, node *n, regex_t *re)
1085 n->info = OC_REGEXP;
1088 xregcomp(re, s, REG_EXTENDED);
1089 xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1094 static node *condition(void)
1096 next_token(TC_SEQSTART);
1097 return parse_expr(TC_SEQTERM);
1100 /* parse expression terminated by given argument, return ptr
1101 * to built subtree. Terminator is eaten by parse_expr */
1102 static node *parse_expr(uint32_t iexp)
1111 sn.r.n = glptr = NULL;
1112 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1114 while (!((tc = next_token(xtc)) & iexp)) {
1115 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1116 /* input redirection (<) attached to glptr node */
1117 cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1119 xtc = TC_OPERAND | TC_UOPPRE;
1122 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1123 /* for binary and postfix-unary operators, jump back over
1124 * previous operators with higher priority */
1126 while ( ((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1127 || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON)) )
1129 if ((t_info & OPCLSMASK) == OC_TERNARY)
1131 cn = vn->a.n->r.n = new_node(t_info);
1133 if (tc & TC_BINOP) {
1135 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1136 if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1138 next_token(TC_GETLINE);
1139 /* give maximum priority to this pipe */
1140 cn->info &= ~PRIMASK;
1141 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1145 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1150 /* for operands and prefix-unary operators, attach them
1153 cn = vn->r.n = new_node(t_info);
1155 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1156 if (tc & (TC_OPERAND | TC_REGEXP)) {
1157 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1158 /* one should be very careful with switch on tclass -
1159 * only simple tclasses should be used! */
1164 v = hash_search(ahash, t_string);
1166 cn->info = OC_FNARG;
1167 cn->l.i = v->x.aidx;
1169 cn->l.v = newvar(t_string);
1171 if (tc & TC_ARRAY) {
1173 cn->r.n = parse_expr(TC_ARRTERM);
1180 v = cn->l.v = xzalloc(sizeof(var));
1182 setvar_i(v, t_double);
1184 setvar_s(v, t_string);
1188 mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1193 cn->r.f = newfunc(t_string);
1194 cn->l.n = condition();
1198 cn = vn->r.n = parse_expr(TC_SEQTERM);
1204 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1208 cn->l.n = condition();
1217 /* add node to chain. Return ptr to alloc'd node */
1218 static node *chain_node(uint32_t info)
1223 seq->first = seq->last = new_node(0);
1225 if (seq->programname != g_progname) {
1226 seq->programname = g_progname;
1227 n = chain_node(OC_NEWSOURCE);
1228 n->l.s = xstrdup(g_progname);
1233 seq->last = n->a.n = new_node(OC_DONE);
1238 static void chain_expr(uint32_t info)
1242 n = chain_node(info);
1243 n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1244 if (t_tclass & TC_GRPTERM)
1248 static node *chain_loop(node *nn)
1250 node *n, *n2, *save_brk, *save_cont;
1252 save_brk = break_ptr;
1253 save_cont = continue_ptr;
1255 n = chain_node(OC_BR | Vx);
1256 continue_ptr = new_node(OC_EXEC);
1257 break_ptr = new_node(OC_EXEC);
1259 n2 = chain_node(OC_EXEC | Vx);
1262 continue_ptr->a.n = n2;
1263 break_ptr->a.n = n->r.n = seq->last;
1265 continue_ptr = save_cont;
1266 break_ptr = save_brk;
1271 /* parse group and attach it to chain */
1272 static void chain_group(void)
1278 c = next_token(TC_GRPSEQ);
1279 } while (c & TC_NEWLINE);
1281 if (c & TC_GRPSTART) {
1282 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1283 if (t_tclass & TC_NEWLINE) continue;
1287 } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1289 chain_expr(OC_EXEC | Vx);
1290 } else { /* TC_STATEMNT */
1291 switch (t_info & OPCLSMASK) {
1293 n = chain_node(OC_BR | Vx);
1294 n->l.n = condition();
1296 n2 = chain_node(OC_EXEC);
1298 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1300 n2->a.n = seq->last;
1308 n = chain_loop(NULL);
1313 n2 = chain_node(OC_EXEC);
1314 n = chain_loop(NULL);
1316 next_token(TC_WHILE);
1317 n->l.n = condition();
1321 next_token(TC_SEQSTART);
1322 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1323 if (t_tclass & TC_SEQTERM) { /* for-in */
1324 if ((n2->info & OPCLSMASK) != OC_IN)
1325 syntax_error(EMSG_UNEXP_TOKEN);
1326 n = chain_node(OC_WALKINIT | VV);
1329 n = chain_loop(NULL);
1330 n->info = OC_WALKNEXT | Vx;
1332 } else { /* for (;;) */
1333 n = chain_node(OC_EXEC | Vx);
1335 n2 = parse_expr(TC_SEMICOL);
1336 n3 = parse_expr(TC_SEQTERM);
1346 n = chain_node(t_info);
1347 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1348 if (t_tclass & TC_OUTRDR) {
1350 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1352 if (t_tclass & TC_GRPTERM)
1357 n = chain_node(OC_EXEC);
1362 n = chain_node(OC_EXEC);
1363 n->a.n = continue_ptr;
1366 /* delete, next, nextfile, return, exit */
1373 static void parse_program(char *p)
1382 while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1383 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1385 if (tclass & TC_OPTERM)
1389 if (tclass & TC_BEGIN) {
1393 } else if (tclass & TC_END) {
1397 } else if (tclass & TC_FUNCDECL) {
1398 next_token(TC_FUNCTION);
1400 f = newfunc(t_string);
1401 f->body.first = NULL;
1403 while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1404 v = findvar(ahash, t_string);
1405 v->x.aidx = (f->nargs)++;
1407 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1414 } else if (tclass & TC_OPSEQ) {
1416 cn = chain_node(OC_TEST);
1417 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1418 if (t_tclass & TC_GRPSTART) {
1422 chain_node(OC_PRINT);
1424 cn->r.n = mainseq.last;
1426 } else /* if (tclass & TC_GRPSTART) */ {
1434 /* -------- program execution part -------- */
1436 static node *mk_splitter(const char *s, tsplitter *spl)
1444 if ((n->info & OPCLSMASK) == OC_REGEXP) {
1448 if (strlen(s) > 1) {
1449 mk_re_node(s, n, re);
1451 n->info = (uint32_t) *s;
1457 /* use node as a regular expression. Supplied with node ptr and regex_t
1458 * storage space. Return ptr to regex (if result points to preg, it should
1459 * be later regfree'd manually
1461 static regex_t *as_regex(node *op, regex_t *preg)
1466 if ((op->info & OPCLSMASK) == OC_REGEXP) {
1467 return icase ? op->r.ire : op->l.re;
1470 s = getvar_s(evaluate(op, v));
1471 xregcomp(preg, s, icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED);
1476 /* gradually increasing buffer */
1477 static void qrealloc(char **b, int n, int *size)
1479 if (!*b || n >= *size)
1480 *b = xrealloc(*b, *size = n + (n>>1) + 80);
1483 /* resize field storage space */
1484 static void fsrealloc(int size)
1488 if (size >= maxfields) {
1490 maxfields = size + 16;
1491 Fields = xrealloc(Fields, maxfields * sizeof(var));
1492 for (; i < maxfields; i++) {
1493 Fields[i].type = VF_SPECIAL;
1494 Fields[i].string = NULL;
1498 if (size < nfields) {
1499 for (i = size; i < nfields; i++) {
1506 static int awk_split(const char *s, node *spl, char **slist)
1511 regmatch_t pmatch[2];
1513 /* in worst case, each char would be a separate field */
1514 *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1517 c[0] = c[1] = (char)spl->info;
1519 if (*getvar_s(intvar[RS]) == '\0')
1522 if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */
1524 l = strcspn(s, c+2);
1525 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1526 && pmatch[0].rm_so <= l
1528 l = pmatch[0].rm_so;
1529 if (pmatch[0].rm_eo == 0) {
1534 pmatch[0].rm_eo = l;
1535 if (s[l]) pmatch[0].rm_eo++;
1541 s += pmatch[0].rm_eo;
1544 } else if (c[0] == '\0') { /* null split */
1550 } else if (c[0] != ' ') { /* single-character split */
1552 c[0] = toupper(c[0]);
1553 c[1] = tolower(c[1]);
1556 while ((s1 = strpbrk(s1, c))) {
1560 } else { /* space split */
1562 s = skip_whitespace(s);
1565 while (*s && !isspace(*s))
1573 static void split_f0(void)
1575 #define fstrings (G.split_f0__fstrings)
1586 n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1589 for (i = 0; i < n; i++) {
1590 Fields[i].string = nextword(&s);
1591 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1594 /* set NF manually to avoid side effects */
1596 intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1597 intvar[NF]->number = nfields;
1601 /* perform additional actions when some internal variables changed */
1602 static void handle_special(var *v)
1606 const char *sep, *s;
1607 int sl, l, len, i, bsize;
1609 if (!(v->type & VF_SPECIAL))
1612 if (v == intvar[NF]) {
1613 n = (int)getvar_i(v);
1616 /* recalculate $0 */
1617 sep = getvar_s(intvar[OFS]);
1621 for (i = 0; i < n; i++) {
1622 s = getvar_s(&Fields[i]);
1625 memcpy(b+len, sep, sl);
1628 qrealloc(&b, len+l+sl, &bsize);
1629 memcpy(b+len, s, l);
1634 setvar_p(intvar[F0], b);
1637 } else if (v == intvar[F0]) {
1638 is_f0_split = FALSE;
1640 } else if (v == intvar[FS]) {
1641 mk_splitter(getvar_s(v), &fsplitter);
1643 } else if (v == intvar[RS]) {
1644 mk_splitter(getvar_s(v), &rsplitter);
1646 } else if (v == intvar[IGNORECASE]) {
1650 n = getvar_i(intvar[NF]);
1651 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1652 /* right here v is invalid. Just to note... */
1656 /* step through func/builtin/etc arguments */
1657 static node *nextarg(node **pn)
1662 if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1671 static void hashwalk_init(var *v, xhash *array)
1677 if (v->type & VF_WALK)
1681 w = v->x.walker = xzalloc(2 + 2*sizeof(char *) + array->glen);
1682 w[0] = w[1] = (char *)(w + 2);
1683 for (i = 0; i < array->csize; i++) {
1684 hi = array->items[i];
1686 strcpy(*w, hi->name);
1693 static int hashwalk_next(var *v)
1701 setvar_s(v, nextword(w+1));
1705 /* evaluate node, return 1 when result is true, 0 otherwise */
1706 static int ptest(node *pattern)
1708 /* ptest__v is "static": to save stack space? */
1709 return istrue(evaluate(pattern, &G.ptest__v));
1712 /* read next record from stream rsm into a variable v */
1713 static int awk_getline(rstream *rsm, var *v)
1716 regmatch_t pmatch[2];
1717 int a, p, pp=0, size;
1718 int fd, so, eo, r, rp;
1721 /* we're using our own buffer since we need access to accumulating
1724 fd = fileno(rsm->F);
1729 c = (char) rsplitter.n.info;
1732 if (! m) qrealloc(&m, 256, &size);
1738 if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1739 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1740 b, 1, pmatch, 0) == 0) {
1741 so = pmatch[0].rm_so;
1742 eo = pmatch[0].rm_eo;
1746 } else if (c != '\0') {
1747 s = strchr(b+pp, c);
1748 if (! s) s = memchr(b+pp, '\0', p - pp);
1755 while (b[rp] == '\n')
1757 s = strstr(b+rp, "\n\n");
1760 while (b[eo] == '\n') eo++;
1768 memmove(m, (const void *)(m+a), p+1);
1773 qrealloc(&m, a+p+128, &size);
1776 p += safe_read(fd, b+p, size-p-1);
1780 setvar_i(intvar[ERRNO], errno);
1789 c = b[so]; b[so] = '\0';
1793 c = b[eo]; b[eo] = '\0';
1794 setvar_s(intvar[RT], b+so);
1806 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1810 const char *s = format;
1812 if (int_as_int && n == (int)n) {
1813 r = snprintf(b, size, "%d", (int)n);
1815 do { c = *s; } while (c && *++s);
1816 if (strchr("diouxX", c)) {
1817 r = snprintf(b, size, format, (int)n);
1818 } else if (strchr("eEfgG", c)) {
1819 r = snprintf(b, size, format, n);
1821 syntax_error(EMSG_INV_FMT);
1828 /* formatted output into an allocated buffer, return ptr to buffer */
1829 static char *awk_printf(node *n)
1834 int i, j, incr, bsize;
1839 fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1844 while (*f && (*f != '%' || *(++f) == '%'))
1846 while (*f && !isalpha(*f)) {
1848 syntax_error("%*x formats are not supported");
1852 incr = (f - s) + MAXVARFMT;
1853 qrealloc(&b, incr + i, &bsize);
1858 arg = evaluate(nextarg(&n), v);
1861 if (c == 'c' || !c) {
1862 i += sprintf(b+i, s, is_numeric(arg) ?
1863 (char)getvar_i(arg) : *getvar_s(arg));
1864 } else if (c == 's') {
1866 qrealloc(&b, incr+i+strlen(s1), &bsize);
1867 i += sprintf(b+i, s, s1);
1869 i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1873 /* if there was an error while sprintf, return value is negative */
1877 b = xrealloc(b, i + 1);
1884 /* common substitution routine
1885 * replace (nm) substring of (src) that match (n) with (repl), store
1886 * result into (dest), return number of substitutions. If nm=0, replace
1887 * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1888 * subexpression matching (\1-\9)
1890 static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int ex)
1895 int c, i, j, di, rl, so, eo, nbs, n, dssize;
1896 regmatch_t pmatch[10];
1899 re = as_regex(rn, &sreg);
1900 if (! src) src = intvar[F0];
1901 if (! dest) dest = intvar[F0];
1906 while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0 : REG_NOTBOL) == 0) {
1907 so = pmatch[0].rm_so;
1908 eo = pmatch[0].rm_eo;
1910 qrealloc(&ds, di + eo + rl, &dssize);
1911 memcpy(ds + di, sp, eo);
1917 for (s = repl; *s; s++) {
1923 if (c == '&' || (ex && c >= '0' && c <= '9')) {
1924 di -= ((nbs + 3) >> 1);
1933 n = pmatch[j].rm_eo - pmatch[j].rm_so;
1934 qrealloc(&ds, di + rl + n, &dssize);
1935 memcpy(ds + di, sp + pmatch[j].rm_so, n);
1946 if (! (ds[di++] = *sp++)) break;
1950 qrealloc(&ds, di + strlen(sp), &dssize);
1951 strcpy(ds + di, sp);
1953 if (re == &sreg) regfree(re);
1957 static var *exec_builtin(node *op, var *res)
1959 #define tspl (G.exec_builtin__tspl)
1966 regmatch_t pmatch[2];
1976 isr = info = op->info;
1979 av[2] = av[3] = NULL;
1980 for (i = 0; i < 4 && op; i++) {
1981 an[i] = nextarg(&op);
1982 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
1983 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
1988 if (nargs < (info >> 30))
1989 syntax_error(EMSG_TOO_FEW_ARGS);
1991 switch (info & OPNMASK) {
1994 #if ENABLE_FEATURE_AWK_MATH
1995 setvar_i(res, atan2(getvar_i(av[i]), getvar_i(av[1])));
1997 syntax_error(EMSG_NO_MATH);
2003 spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2004 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2009 n = awk_split(as[0], spl, &s);
2011 clear_array(iamarray(av[1]));
2012 for (i=1; i<=n; i++)
2013 setari_u(av[1], i, nextword(&s1));
2020 i = getvar_i(av[1]) - 1;
2023 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2026 strncpy(s, as[0]+i, n);
2032 setvar_i(res, (long)getvar_i(av[0]) & (long)getvar_i(av[1]));
2036 setvar_i(res, ~(long)getvar_i(av[0]));
2040 setvar_i(res, (long)getvar_i(av[0]) << (long)getvar_i(av[1]));
2044 setvar_i(res, (long)getvar_i(av[0]) | (long)getvar_i(av[1]));
2048 setvar_i(res, (long)((unsigned long)getvar_i(av[0]) >> (unsigned long)getvar_i(av[1])));
2052 setvar_i(res, (long)getvar_i(av[0]) ^ (long)getvar_i(av[1]));
2062 s1 = s = xstrdup(as[0]);
2064 *s1 = (*to_xxx)(*s1);
2073 l = strlen(as[0]) - ll;
2074 if (ll > 0 && l >= 0) {
2076 s = strstr(as[0], as[1]);
2077 if (s) n = (s - as[0]) + 1;
2079 /* this piece of code is terribly slow and
2080 * really should be rewritten
2082 for (i=0; i<=l; i++) {
2083 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2095 tt = getvar_i(av[1]);
2098 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2099 i = strftime(g_buf, MAXVARFMT,
2100 ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2103 setvar_s(res, g_buf);
2107 re = as_regex(an[1], &sreg);
2108 n = regexec(re, as[0], 1, pmatch, 0);
2113 pmatch[0].rm_so = 0;
2114 pmatch[0].rm_eo = -1;
2116 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2117 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2118 setvar_i(res, pmatch[0].rm_so);
2119 if (re == &sreg) regfree(re);
2123 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2127 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2131 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2141 * Evaluate node - the heart of the program. Supplied with subtree
2142 * and place where to store result. returns ptr to result.
2144 #define XC(n) ((n) >> 8)
2146 static var *evaluate(node *op, var *res)
2148 /* This procedure is recursive so we should count every byte */
2149 #define fnargs (G.evaluate__fnargs)
2150 /* seed is initialized to 1 */
2151 #define seed (G.evaluate__seed)
2152 #define sreg (G.evaluate__sreg)
2174 return setvar_s(res, NULL);
2180 opn = (opinfo & OPNMASK);
2181 g_lineno = op->lineno;
2183 /* execute inevitable things */
2185 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2186 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2187 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2188 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2189 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2191 switch (XC(opinfo & OPCLSMASK)) {
2193 /* -- iterative node type -- */
2197 if ((op1->info & OPCLSMASK) == OC_COMMA) {
2198 /* it's range pattern */
2199 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2200 op->info |= OF_CHECKED;
2201 if (ptest(op1->r.n))
2202 op->info &= ~OF_CHECKED;
2209 op = (ptest(op1)) ? op->a.n : op->r.n;
2213 /* just evaluate an expression, also used as unconditional jump */
2217 /* branch, used in if-else and various loops */
2219 op = istrue(L.v) ? op->a.n : op->r.n;
2222 /* initialize for-in loop */
2223 case XC( OC_WALKINIT ):
2224 hashwalk_init(L.v, iamarray(R.v));
2227 /* get next array item */
2228 case XC( OC_WALKNEXT ):
2229 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2232 case XC( OC_PRINT ):
2233 case XC( OC_PRINTF ):
2236 X.rsm = newfile(R.s);
2239 X.rsm->F = popen(R.s, "w");
2240 if (X.rsm->F == NULL)
2241 bb_perror_msg_and_die("popen");
2244 X.rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2250 if ((opinfo & OPCLSMASK) == OC_PRINT) {
2252 fputs(getvar_s(intvar[F0]), X.F);
2255 L.v = evaluate(nextarg(&op1), v1);
2256 if (L.v->type & VF_NUMBER) {
2257 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2258 getvar_i(L.v), TRUE);
2261 fputs(getvar_s(L.v), X.F);
2264 if (op1) fputs(getvar_s(intvar[OFS]), X.F);
2267 fputs(getvar_s(intvar[ORS]), X.F);
2269 } else { /* OC_PRINTF */
2270 L.s = awk_printf(op1);
2277 case XC( OC_DELETE ):
2278 X.info = op1->info & OPCLSMASK;
2279 if (X.info == OC_VAR) {
2281 } else if (X.info == OC_FNARG) {
2282 R.v = &fnargs[op1->l.i];
2284 syntax_error(EMSG_NOT_ARRAY);
2289 L.s = getvar_s(evaluate(op1->r.n, v1));
2290 hash_remove(iamarray(R.v), L.s);
2292 clear_array(iamarray(R.v));
2296 case XC( OC_NEWSOURCE ):
2297 g_progname = op->l.s;
2300 case XC( OC_RETURN ):
2304 case XC( OC_NEXTFILE ):
2315 /* -- recursive node type -- */
2319 if (L.v == intvar[NF])
2323 case XC( OC_FNARG ):
2324 L.v = &fnargs[op->l.i];
2326 res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2330 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2333 case XC( OC_REGEXP ):
2335 L.s = getvar_s(intvar[F0]);
2338 case XC( OC_MATCH ):
2341 X.re = as_regex(op1, &sreg);
2342 R.i = regexec(X.re, L.s, 0, NULL, 0);
2343 if (X.re == &sreg) regfree(X.re);
2344 setvar_i(res, (R.i == 0 ? 1 : 0) ^ (opn == '!' ? 1 : 0));
2348 /* if source is a temporary string, jusk relink it to dest */
2349 if (R.v == v1+1 && R.v->string) {
2350 res = setvar_p(L.v, R.v->string);
2353 res = copyvar(L.v, R.v);
2357 case XC( OC_TERNARY ):
2358 if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2359 syntax_error(EMSG_POSSIBLE_ERROR);
2360 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2364 if (!op->r.f->body.first)
2365 syntax_error(EMSG_UNDEF_FUNC);
2367 X.v = R.v = nvalloc(op->r.f->nargs+1);
2369 L.v = evaluate(nextarg(&op1), v1);
2371 R.v->type |= VF_CHILD;
2372 R.v->x.parent = L.v;
2373 if (++R.v - X.v >= op->r.f->nargs)
2381 res = evaluate(op->r.f->body.first, res);
2388 case XC( OC_GETLINE ):
2389 case XC( OC_PGETLINE ):
2391 X.rsm = newfile(L.s);
2393 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2394 X.rsm->F = popen(L.s, "r");
2395 X.rsm->is_pipe = TRUE;
2397 X.rsm->F = fopen(L.s, "r"); /* not xfopen! */
2401 if (!iF) iF = next_input_file();
2406 setvar_i(intvar[ERRNO], errno);
2414 L.i = awk_getline(X.rsm, R.v);
2417 incvar(intvar[FNR]);
2424 /* simple builtins */
2425 case XC( OC_FBLTIN ):
2433 R.d = (double)rand() / (double)RAND_MAX;
2435 #if ENABLE_FEATURE_AWK_MATH
2461 syntax_error(EMSG_NO_MATH);
2466 seed = op1 ? (unsigned)L.d : (unsigned)time(NULL);
2476 L.s = getvar_s(intvar[F0]);
2482 R.d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2483 ? (system(L.s) >> 8) : 0;
2491 X.rsm = newfile(L.s);
2500 X.rsm = (rstream *)hash_search(fdhash, L.s);
2502 R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
2503 free(X.rsm->buffer);
2504 hash_remove(fdhash, L.s);
2507 setvar_i(intvar[ERRNO], errno);
2514 case XC( OC_BUILTIN ):
2515 res = exec_builtin(op, res);
2518 case XC( OC_SPRINTF ):
2519 setvar_p(res, awk_printf(op1));
2522 case XC( OC_UNARY ):
2524 L.d = R.d = getvar_i(R.v);
2539 L.d = istrue(X.v) ? 0 : 1;
2550 case XC( OC_FIELD ):
2551 R.i = (int)getvar_i(R.v);
2558 res = &Fields[R.i - 1];
2562 /* concatenation (" ") and index joining (",") */
2563 case XC( OC_CONCAT ):
2564 case XC( OC_COMMA ):
2565 opn = strlen(L.s) + strlen(R.s) + 2;
2568 if ((opinfo & OPCLSMASK) == OC_COMMA) {
2569 L.s = getvar_s(intvar[SUBSEP]);
2570 X.s = xrealloc(X.s, opn + strlen(L.s));
2578 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2582 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2585 case XC( OC_BINARY ):
2586 case XC( OC_REPLACE ):
2587 R.d = getvar_i(R.v);
2599 if (R.d == 0) syntax_error(EMSG_DIV_BY_ZERO);
2603 #if ENABLE_FEATURE_AWK_MATH
2604 L.d = pow(L.d, R.d);
2606 syntax_error(EMSG_NO_MATH);
2610 if (R.d == 0) syntax_error(EMSG_DIV_BY_ZERO);
2611 L.d -= (int)(L.d / R.d) * R.d;
2614 res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2617 case XC( OC_COMPARE ):
2618 if (is_numeric(L.v) && is_numeric(R.v)) {
2619 L.d = getvar_i(L.v) - getvar_i(R.v);
2621 L.s = getvar_s(L.v);
2622 R.s = getvar_s(R.v);
2623 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2625 switch (opn & 0xfe) {
2636 setvar_i(res, (opn & 0x1 ? R.i : !R.i) ? 1 : 0);
2640 syntax_error(EMSG_POSSIBLE_ERROR);
2642 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2644 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2657 /* -------- main & co. -------- */
2659 static int awk_exit(int r)
2670 evaluate(endseq.first, &tv);
2673 /* waiting for children */
2674 for (i = 0; i < fdhash->csize; i++) {
2675 hi = fdhash->items[i];
2677 if (hi->data.rs.F && hi->data.rs.is_pipe)
2678 pclose(hi->data.rs.F);
2686 /* if expr looks like "var=value", perform assignment and return 1,
2687 * otherwise return 0 */
2688 static int is_assignment(const char *expr)
2690 char *exprc, *s, *s0, *s1;
2692 exprc = xstrdup(expr);
2693 if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2701 *(s1++) = nextchar(&s);
2704 setvar_u(newvar(exprc), s0);
2709 /* switch to next input file */
2710 static rstream *next_input_file(void)
2712 #define rsm (G.next_input_file__rsm)
2713 #define files_happen (G.next_input_file__files_happen)
2716 const char *fname, *ind;
2718 if (rsm.F) fclose(rsm.F);
2720 rsm.pos = rsm.adv = 0;
2723 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
2729 ind = getvar_s(incvar(intvar[ARGIND]));
2730 fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
2731 if (fname && *fname && !is_assignment(fname))
2732 F = afopen(fname, "r");
2736 files_happen = TRUE;
2737 setvar_s(intvar[FILENAME], fname);
2744 int awk_main(int argc, char **argv);
2745 int awk_main(int argc, char **argv)
2748 char *opt_F, *opt_W;
2749 llist_t *opt_v = NULL;
2754 char *vnames = (char *)vNames; /* cheat */
2755 char *vvalues = (char *)vValues;
2759 /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
2760 * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
2761 if (ENABLE_LOCALE_SUPPORT)
2762 setlocale(LC_NUMERIC, "C");
2766 /* allocate global buffer */
2767 g_buf = xmalloc(MAXVARFMT + 1);
2769 vhash = hash_init();
2770 ahash = hash_init();
2771 fdhash = hash_init();
2772 fnhash = hash_init();
2774 /* initialize variables */
2775 for (i = 0; *vnames; i++) {
2776 intvar[i] = v = newvar(nextword(&vnames));
2777 if (*vvalues != '\377')
2778 setvar_s(v, nextword(&vvalues));
2782 if (*vnames == '*') {
2783 v->type |= VF_SPECIAL;
2788 handle_special(intvar[FS]);
2789 handle_special(intvar[RS]);
2791 newfile("/dev/stdin")->F = stdin;
2792 newfile("/dev/stdout")->F = stdout;
2793 newfile("/dev/stderr")->F = stderr;
2795 /* Huh, people report that sometimes environ is NULL. Oh well. */
2796 if (environ) for (envp = environ; *envp; envp++) {
2797 char *s = xstrdup(*envp);
2798 char *s1 = strchr(s, '=');
2801 setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1);
2805 opt_complementary = "v::";
2806 opt = getopt32(argc, argv, "F:v:f:W:", &opt_F, &opt_v, &g_progname, &opt_W);
2810 setvar_s(intvar[FS], opt_F); // -F
2811 while (opt_v) { /* -v */
2812 if (!is_assignment(llist_pop(&opt_v)))
2815 if (opt & 0x4) { // -f
2816 char *s = s; /* die, gcc, die */
2817 FILE *from_file = afopen(g_progname, "r");
2818 /* one byte is reserved for some trick in next_token */
2819 if (fseek(from_file, 0, SEEK_END) == 0) {
2820 flen = ftell(from_file);
2821 s = xmalloc(flen + 4);
2822 fseek(from_file, 0, SEEK_SET);
2823 i = 1 + fread(s + 1, 1, flen, from_file);
2825 for (i = j = 1; j > 0; i += j) {
2826 s = xrealloc(s, i + 4096);
2827 j = fread(s + i, 1, 4094, from_file);
2832 parse_program(s + 1);
2834 } else { // no -f: take program from 1st parameter
2837 g_progname = "cmd. line";
2838 parse_program(*argv++);
2841 if (opt & 0x8) // -W
2842 bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
2844 /* fill in ARGV array */
2845 setvar_i(intvar[ARGC], argc + 1);
2846 setari_u(intvar[ARGV], 0, "awk");
2849 setari_u(intvar[ARGV], ++i, *argv++);
2851 evaluate(beginseq.first, &tv);
2852 if (!mainseq.first && !endseq.first)
2853 awk_exit(EXIT_SUCCESS);
2855 /* input file could already be opened in BEGIN block */
2856 if (!iF) iF = next_input_file();
2858 /* passing through input files */
2861 setvar_i(intvar[FNR], 0);
2863 while ((i = awk_getline(iF, intvar[F0])) > 0) {
2866 incvar(intvar[FNR]);
2867 evaluate(mainseq.first, &tv);
2874 syntax_error(strerror(errno));
2876 iF = next_input_file();
2879 awk_exit(EXIT_SUCCESS);