1 /* vi: set sw=4 ts=4: */
3 * awk implementation for busybox
5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
7 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
14 //config: Awk is used as a pattern scanning and processing language. This is
15 //config: the BusyBox implementation of that programming language.
17 //config:config FEATURE_AWK_LIBM
18 //config: bool "Enable math functions (requires libm)"
20 //config: depends on AWK
22 //config: Enable math functions of the Awk programming language.
23 //config: NOTE: This will require libm to be present for linking.
25 //config:config FEATURE_AWK_GNU_EXTENSIONS
26 //config: bool "Enable a few GNU extensions"
28 //config: depends on AWK
30 //config: Enable a few features from gawk:
31 //config: * command line option -e AWK_PROGRAM
32 //config: * simultaneous use of -f and -e on the command line.
33 //config: This enables the use of awk library files.
34 //config: Ex: awk -f mylib.awk -e '{print myfunction($1);}' ...
36 //applet:IF_AWK(APPLET_NOEXEC(awk, awk, BB_DIR_USR_BIN, BB_SUID_DROP, awk))
38 //kbuild:lib-$(CONFIG_AWK) += awk.o
40 //usage:#define awk_trivial_usage
41 //usage: "[OPTIONS] [AWK_PROGRAM] [FILE]..."
42 //usage:#define awk_full_usage "\n\n"
43 //usage: " -v VAR=VAL Set variable"
44 //usage: "\n -F SEP Use SEP as field separator"
45 //usage: "\n -f FILE Read program from FILE"
46 //usage: IF_FEATURE_AWK_GNU_EXTENSIONS(
47 //usage: "\n -e AWK_PROGRAM"
54 /* This is a NOEXEC applet. Be very careful! */
57 /* If you comment out one of these below, it will be #defined later
58 * to perform debug printfs to stderr: */
59 #define debug_printf_walker(...) do {} while (0)
60 #define debug_printf_eval(...) do {} while (0)
61 #define debug_printf_parse(...) do {} while (0)
63 #ifndef debug_printf_walker
64 # define debug_printf_walker(...) (fprintf(stderr, __VA_ARGS__))
66 #ifndef debug_printf_eval
67 # define debug_printf_eval(...) (fprintf(stderr, __VA_ARGS__))
69 #ifndef debug_printf_parse
70 # define debug_printf_parse(...) (fprintf(stderr, __VA_ARGS__))
76 IF_FEATURE_AWK_GNU_EXTENSIONS("e:") \
78 #define OPTCOMPLSTR_AWK \
80 IF_FEATURE_AWK_GNU_EXTENSIONS("e::")
82 OPTBIT_F, /* define field separator */
83 OPTBIT_v, /* define variable */
84 OPTBIT_f, /* pull in awk program from file */
85 IF_FEATURE_AWK_GNU_EXTENSIONS(OPTBIT_e,) /* -e AWK_PROGRAM */
86 OPTBIT_W, /* -W ignored */
87 OPT_F = 1 << OPTBIT_F,
88 OPT_v = 1 << OPTBIT_v,
89 OPT_f = 1 << OPTBIT_f,
90 OPT_e = IF_FEATURE_AWK_GNU_EXTENSIONS((1 << OPTBIT_e)) + 0,
98 #define VF_NUMBER 0x0001 /* 1 = primary type is number */
99 #define VF_ARRAY 0x0002 /* 1 = it's an array */
101 #define VF_CACHED 0x0100 /* 1 = num/str value has cached str/num eq */
102 #define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
103 #define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
104 #define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
105 #define VF_FSTR 0x1000 /* 1 = var::string points to fstring buffer */
106 #define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
107 #define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
109 /* these flags are static, don't change them when value is changed */
110 #define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
112 typedef struct walker_list {
115 struct walker_list *prev;
120 typedef struct var_s {
121 unsigned type; /* flags */
125 int aidx; /* func arg idx (for compilation stage) */
126 struct xhash_s *array; /* array ptr */
127 struct var_s *parent; /* for func args, ptr to actual parameter */
128 walker_list *walker; /* list of array elements (for..in) */
132 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
133 typedef struct chain_s {
134 struct node_s *first;
136 const char *programname;
140 typedef struct func_s {
146 typedef struct rstream_s {
155 typedef struct hash_item_s {
157 struct var_s v; /* variable/array hash */
158 struct rstream_s rs; /* redirect streams hash */
159 struct func_s f; /* functions hash */
161 struct hash_item_s *next; /* next in chain */
162 char name[1]; /* really it's longer */
165 typedef struct xhash_s {
166 unsigned nel; /* num of elements */
167 unsigned csize; /* current hash size */
168 unsigned nprime; /* next hash size in PRIMES[] */
169 unsigned glen; /* summary length of item names */
170 struct hash_item_s **items;
174 typedef struct node_s {
194 /* Block of temporary variables */
195 typedef struct nvblock_s {
198 struct nvblock_s *prev;
199 struct nvblock_s *next;
203 typedef struct tsplitter_s {
208 /* simple token classes */
209 /* Order and hex values are very important!!! See next_token() */
210 #define TC_SEQSTART 1 /* ( */
211 #define TC_SEQTERM (1 << 1) /* ) */
212 #define TC_REGEXP (1 << 2) /* /.../ */
213 #define TC_OUTRDR (1 << 3) /* | > >> */
214 #define TC_UOPPOST (1 << 4) /* unary postfix operator */
215 #define TC_UOPPRE1 (1 << 5) /* unary prefix operator */
216 #define TC_BINOPX (1 << 6) /* two-opnd operator */
217 #define TC_IN (1 << 7)
218 #define TC_COMMA (1 << 8)
219 #define TC_PIPE (1 << 9) /* input redirection pipe */
220 #define TC_UOPPRE2 (1 << 10) /* unary prefix operator */
221 #define TC_ARRTERM (1 << 11) /* ] */
222 #define TC_GRPSTART (1 << 12) /* { */
223 #define TC_GRPTERM (1 << 13) /* } */
224 #define TC_SEMICOL (1 << 14)
225 #define TC_NEWLINE (1 << 15)
226 #define TC_STATX (1 << 16) /* ctl statement (for, next...) */
227 #define TC_WHILE (1 << 17)
228 #define TC_ELSE (1 << 18)
229 #define TC_BUILTIN (1 << 19)
230 #define TC_GETLINE (1 << 20)
231 #define TC_FUNCDECL (1 << 21) /* `function' `func' */
232 #define TC_BEGIN (1 << 22)
233 #define TC_END (1 << 23)
234 #define TC_EOF (1 << 24)
235 #define TC_VARIABLE (1 << 25)
236 #define TC_ARRAY (1 << 26)
237 #define TC_FUNCTION (1 << 27)
238 #define TC_STRING (1 << 28)
239 #define TC_NUMBER (1 << 29)
241 #define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
243 /* combined token classes */
244 #define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
245 //#define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
246 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
247 | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
249 #define TC_STATEMNT (TC_STATX | TC_WHILE)
250 #define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
252 /* word tokens, cannot mean something else if not expected */
253 #define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
254 | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
256 /* discard newlines after these */
257 #define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
258 | TC_BINOP | TC_OPTERM)
260 /* what can expression begin with */
261 #define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
262 /* what can group begin with */
263 #define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
265 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
266 /* operator is inserted between them */
267 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
268 | TC_STRING | TC_NUMBER | TC_UOPPOST)
269 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
271 #define OF_RES1 0x010000
272 #define OF_RES2 0x020000
273 #define OF_STR1 0x040000
274 #define OF_STR2 0x080000
275 #define OF_NUM1 0x100000
276 #define OF_CHECKED 0x200000
278 /* combined operator flags */
281 #define xS (OF_RES2 | OF_STR2)
283 #define VV (OF_RES1 | OF_RES2)
284 #define Nx (OF_RES1 | OF_NUM1)
285 #define NV (OF_RES1 | OF_NUM1 | OF_RES2)
286 #define Sx (OF_RES1 | OF_STR1)
287 #define SV (OF_RES1 | OF_STR1 | OF_RES2)
288 #define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
290 #define OPCLSMASK 0xFF00
291 #define OPNMASK 0x007F
293 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
294 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
295 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
300 #define P(x) (x << 24)
301 #define PRIMASK 0x7F000000
302 #define PRIMASK2 0x7E000000
304 /* Operation classes */
306 #define SHIFT_TIL_THIS 0x0600
307 #define RECUR_FROM_THIS 0x1000
310 OC_DELETE = 0x0100, OC_EXEC = 0x0200, OC_NEWSOURCE = 0x0300,
311 OC_PRINT = 0x0400, OC_PRINTF = 0x0500, OC_WALKINIT = 0x0600,
313 OC_BR = 0x0700, OC_BREAK = 0x0800, OC_CONTINUE = 0x0900,
314 OC_EXIT = 0x0a00, OC_NEXT = 0x0b00, OC_NEXTFILE = 0x0c00,
315 OC_TEST = 0x0d00, OC_WALKNEXT = 0x0e00,
317 OC_BINARY = 0x1000, OC_BUILTIN = 0x1100, OC_COLON = 0x1200,
318 OC_COMMA = 0x1300, OC_COMPARE = 0x1400, OC_CONCAT = 0x1500,
319 OC_FBLTIN = 0x1600, OC_FIELD = 0x1700, OC_FNARG = 0x1800,
320 OC_FUNC = 0x1900, OC_GETLINE = 0x1a00, OC_IN = 0x1b00,
321 OC_LAND = 0x1c00, OC_LOR = 0x1d00, OC_MATCH = 0x1e00,
322 OC_MOVE = 0x1f00, OC_PGETLINE = 0x2000, OC_REGEXP = 0x2100,
323 OC_REPLACE = 0x2200, OC_RETURN = 0x2300, OC_SPRINTF = 0x2400,
324 OC_TERNARY = 0x2500, OC_UNARY = 0x2600, OC_VAR = 0x2700,
327 ST_IF = 0x3000, ST_DO = 0x3100, ST_FOR = 0x3200,
331 /* simple builtins */
333 F_in, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
334 F_ti, F_le, F_sy, F_ff, F_cl
339 B_a2, B_ix, B_ma, B_sp, B_ss, B_ti, B_mt, B_lo, B_up,
341 B_an, B_co, B_ls, B_or, B_rs, B_xo,
344 /* tokens and their corresponding info values */
346 #define NTC "\377" /* switch to next token class (tc<<1) */
349 #define OC_B OC_BUILTIN
351 static const char tokenlist[] ALIGN1 =
354 "\1/" NTC /* REGEXP */
355 "\2>>" "\1>" "\1|" NTC /* OUTRDR */
356 "\2++" "\2--" NTC /* UOPPOST */
357 "\2++" "\2--" "\1$" NTC /* UOPPRE1 */
358 "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */
359 "\2*=" "\2/=" "\2%=" "\2^="
360 "\1+" "\1-" "\3**=" "\2**"
361 "\1/" "\1%" "\1^" "\1*"
362 "\2!=" "\2>=" "\2<=" "\1>"
363 "\1<" "\2!~" "\1~" "\2&&"
364 "\2||" "\1?" "\1:" NTC
368 "\1+" "\1-" "\1!" NTC /* UOPPRE2 */
374 "\2if" "\2do" "\3for" "\5break" /* STATX */
375 "\10continue" "\6delete" "\5print"
376 "\6printf" "\4next" "\10nextfile"
377 "\6return" "\4exit" NTC
381 "\3and" "\5compl" "\6lshift" "\2or"
383 "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */
384 "\3cos" "\3exp" "\3int" "\3log"
385 "\4rand" "\3sin" "\4sqrt" "\5srand"
386 "\6gensub" "\4gsub" "\5index" "\6length"
387 "\5match" "\5split" "\7sprintf" "\3sub"
388 "\6substr" "\7systime" "\10strftime" "\6mktime"
389 "\7tolower" "\7toupper" NTC
391 "\4func" "\10function" NTC
394 /* compiler adds trailing "\0" */
397 static const uint32_t tokeninfo[] = {
401 xS|'a', xS|'w', xS|'|',
402 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
403 OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M', OC_FIELD|xV|P(5),
404 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74), OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
405 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/', OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
406 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-', OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
407 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%', OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
408 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3, OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
409 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!', OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
410 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?', OC_COLON|xx|P(67)|':',
411 OC_IN|SV|P(49), /* in */
413 OC_PGETLINE|SV|P(37),
414 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-', OC_UNARY|xV|P(19)|'!',
420 ST_IF, ST_DO, ST_FOR, OC_BREAK,
421 OC_CONTINUE, OC_DELETE|Vx, OC_PRINT,
422 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
423 OC_RETURN|Vx, OC_EXIT|Nx,
427 OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
428 OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
429 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
430 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
431 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
432 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
433 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
434 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b),
435 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
442 /* internal variable names and their initial values */
443 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
445 CONVFMT, OFMT, FS, OFS,
446 ORS, RS, RT, FILENAME,
447 SUBSEP, F0, ARGIND, ARGC,
448 ARGV, ERRNO, FNR, NR,
449 NF, IGNORECASE, ENVIRON, NUM_INTERNAL_VARS
452 static const char vNames[] ALIGN1 =
453 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
454 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
455 "SUBSEP\0" "$\0*" "ARGIND\0" "ARGC\0"
456 "ARGV\0" "ERRNO\0" "FNR\0" "NR\0"
457 "NF\0*" "IGNORECASE\0*" "ENVIRON\0" "\0";
459 static const char vValues[] ALIGN1 =
460 "%.6g\0" "%.6g\0" " \0" " \0"
461 "\n\0" "\n\0" "\0" "\0"
462 "\034\0" "\0" "\377";
464 /* hash size may grow to these values */
465 #define FIRST_PRIME 61
466 static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
469 /* Globals. Split in two parts so that first one is addressed
470 * with (mostly short) negative offsets.
471 * NB: it's unsafe to put members of type "double"
472 * into globals2 (gcc may fail to align them).
476 chain beginseq, mainseq, endseq;
478 node *break_ptr, *continue_ptr;
480 xhash *vhash, *ahash, *fdhash, *fnhash;
481 const char *g_progname;
484 int maxfields; /* used in fsrealloc() only */
493 smallint is_f0_split;
497 uint32_t t_info; /* often used */
502 var *intvar[NUM_INTERNAL_VARS]; /* often used */
504 /* former statics from various functions */
505 char *split_f0__fstrings;
507 uint32_t next_token__save_tclass;
508 uint32_t next_token__save_info;
509 uint32_t next_token__ltclass;
510 smallint next_token__concat_inserted;
512 smallint next_input_file__files_happen;
513 rstream next_input_file__rsm;
515 var *evaluate__fnargs;
516 unsigned evaluate__seed;
517 regex_t evaluate__sreg;
521 tsplitter exec_builtin__tspl;
523 /* biggest and least used members go last */
524 tsplitter fsplitter, rsplitter;
526 #define G1 (ptr_to_globals[-1])
527 #define G (*(struct globals2 *)ptr_to_globals)
528 /* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
529 /*char G1size[sizeof(G1)]; - 0x74 */
530 /*char Gsize[sizeof(G)]; - 0x1c4 */
531 /* Trying to keep most of members accessible with short offsets: */
532 /*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
533 #define t_double (G1.t_double )
534 #define beginseq (G1.beginseq )
535 #define mainseq (G1.mainseq )
536 #define endseq (G1.endseq )
537 #define seq (G1.seq )
538 #define break_ptr (G1.break_ptr )
539 #define continue_ptr (G1.continue_ptr)
541 #define vhash (G1.vhash )
542 #define ahash (G1.ahash )
543 #define fdhash (G1.fdhash )
544 #define fnhash (G1.fnhash )
545 #define g_progname (G1.g_progname )
546 #define g_lineno (G1.g_lineno )
547 #define nfields (G1.nfields )
548 #define maxfields (G1.maxfields )
549 #define Fields (G1.Fields )
550 #define g_cb (G1.g_cb )
551 #define g_pos (G1.g_pos )
552 #define g_buf (G1.g_buf )
553 #define icase (G1.icase )
554 #define exiting (G1.exiting )
555 #define nextrec (G1.nextrec )
556 #define nextfile (G1.nextfile )
557 #define is_f0_split (G1.is_f0_split )
558 #define t_rollback (G1.t_rollback )
559 #define t_info (G.t_info )
560 #define t_tclass (G.t_tclass )
561 #define t_string (G.t_string )
562 #define t_lineno (G.t_lineno )
563 #define intvar (G.intvar )
564 #define fsplitter (G.fsplitter )
565 #define rsplitter (G.rsplitter )
566 #define INIT_G() do { \
567 SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
568 G.next_token__ltclass = TC_OPTERM; \
569 G.evaluate__seed = 1; \
573 /* function prototypes */
574 static void handle_special(var *);
575 static node *parse_expr(uint32_t);
576 static void chain_group(void);
577 static var *evaluate(node *, var *);
578 static rstream *next_input_file(void);
579 static int fmt_num(char *, int, const char *, double, int);
580 static int awk_exit(int) NORETURN;
582 /* ---- error handling ---- */
584 static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
585 static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
586 static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
587 static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
588 static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
589 static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin";
590 static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
591 static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
592 static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
593 static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
595 static void zero_out_var(var *vp)
597 memset(vp, 0, sizeof(*vp));
600 static void syntax_error(const char *message) NORETURN;
601 static void syntax_error(const char *message)
603 bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
606 /* ---- hash stuff ---- */
608 static unsigned hashidx(const char *name)
613 idx = *name++ + (idx << 6) - idx;
617 /* create new hash */
618 static xhash *hash_init(void)
622 newhash = xzalloc(sizeof(*newhash));
623 newhash->csize = FIRST_PRIME;
624 newhash->items = xzalloc(FIRST_PRIME * sizeof(newhash->items[0]));
629 /* find item in hash, return ptr to data, NULL if not found */
630 static void *hash_search(xhash *hash, const char *name)
634 hi = hash->items[hashidx(name) % hash->csize];
636 if (strcmp(hi->name, name) == 0)
643 /* grow hash if it becomes too big */
644 static void hash_rebuild(xhash *hash)
646 unsigned newsize, i, idx;
647 hash_item **newitems, *hi, *thi;
649 if (hash->nprime == ARRAY_SIZE(PRIMES))
652 newsize = PRIMES[hash->nprime++];
653 newitems = xzalloc(newsize * sizeof(newitems[0]));
655 for (i = 0; i < hash->csize; i++) {
660 idx = hashidx(thi->name) % newsize;
661 thi->next = newitems[idx];
667 hash->csize = newsize;
668 hash->items = newitems;
671 /* find item in hash, add it if necessary. Return ptr to data */
672 static void *hash_find(xhash *hash, const char *name)
678 hi = hash_search(hash, name);
680 if (++hash->nel / hash->csize > 10)
683 l = strlen(name) + 1;
684 hi = xzalloc(sizeof(*hi) + l);
685 strcpy(hi->name, name);
687 idx = hashidx(name) % hash->csize;
688 hi->next = hash->items[idx];
689 hash->items[idx] = hi;
695 #define findvar(hash, name) ((var*) hash_find((hash), (name)))
696 #define newvar(name) ((var*) hash_find(vhash, (name)))
697 #define newfile(name) ((rstream*)hash_find(fdhash, (name)))
698 #define newfunc(name) ((func*) hash_find(fnhash, (name)))
700 static void hash_remove(xhash *hash, const char *name)
702 hash_item *hi, **phi;
704 phi = &hash->items[hashidx(name) % hash->csize];
707 if (strcmp(hi->name, name) == 0) {
708 hash->glen -= (strlen(name) + 1);
718 /* ------ some useful functions ------ */
720 static char *skip_spaces(char *p)
723 if (*p == '\\' && p[1] == '\n') {
726 } else if (*p != ' ' && *p != '\t') {
734 /* returns old *s, advances *s past word and terminating NUL */
735 static char *nextword(char **s)
738 while (*(*s)++ != '\0')
743 static char nextchar(char **s)
750 c = bb_process_escape_sequence((const char**)s);
751 /* Example awk statement:
753 * we must treat \" as "
755 if (c == '\\' && *s == pps) { /* unrecognized \z? */
756 c = *(*s); /* yes, fetch z */
758 (*s)++; /* advance unless z = NUL */
763 /* TODO: merge with strcpy_and_process_escape_sequences()?
765 static void unescape_string_in_place(char *s1)
768 while ((*s1 = nextchar(&s)) != '\0')
772 static ALWAYS_INLINE int isalnum_(int c)
774 return (isalnum(c) || c == '_');
777 static double my_strtod(char **pp)
780 if (ENABLE_DESKTOP && cp[0] == '0') {
781 /* Might be hex or octal integer: 0x123abc or 07777 */
782 char c = (cp[1] | 0x20);
783 if (c == 'x' || isdigit(cp[1])) {
784 unsigned long long ull = strtoull(cp, pp, 0);
788 if (!isdigit(c) && c != '.')
790 /* else: it may be a floating number. Examples:
791 * 009.123 (*pp points to '9')
792 * 000.123 (*pp points to '.')
793 * fall through to strtod.
797 return strtod(cp, pp);
800 /* -------- working with variables (set/get/copy/etc) -------- */
802 static xhash *iamarray(var *v)
806 while (a->type & VF_CHILD)
809 if (!(a->type & VF_ARRAY)) {
811 a->x.array = hash_init();
816 static void clear_array(xhash *array)
821 for (i = 0; i < array->csize; i++) {
822 hi = array->items[i];
826 free(thi->data.v.string);
829 array->items[i] = NULL;
831 array->glen = array->nel = 0;
834 /* clear a variable */
835 static var *clrvar(var *v)
837 if (!(v->type & VF_FSTR))
840 v->type &= VF_DONTTOUCH;
846 /* assign string value to variable */
847 static var *setvar_p(var *v, char *value)
855 /* same as setvar_p but make a copy of string */
856 static var *setvar_s(var *v, const char *value)
858 return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
861 /* same as setvar_s but sets USER flag */
862 static var *setvar_u(var *v, const char *value)
864 v = setvar_s(v, value);
869 /* set array element to user string */
870 static void setari_u(var *a, int idx, const char *s)
874 v = findvar(iamarray(a), itoa(idx));
878 /* assign numeric value to variable */
879 static var *setvar_i(var *v, double value)
882 v->type |= VF_NUMBER;
888 static const char *getvar_s(var *v)
890 /* if v is numeric and has no cached string, convert it to string */
891 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
892 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
893 v->string = xstrdup(g_buf);
894 v->type |= VF_CACHED;
896 return (v->string == NULL) ? "" : v->string;
899 static double getvar_i(var *v)
903 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
907 debug_printf_eval("getvar_i: '%s'->", s);
908 v->number = my_strtod(&s);
909 debug_printf_eval("%f (s:'%s')\n", v->number, s);
910 if (v->type & VF_USER) {
916 debug_printf_eval("getvar_i: '%s'->zero\n", s);
919 v->type |= VF_CACHED;
921 debug_printf_eval("getvar_i: %f\n", v->number);
925 /* Used for operands of bitwise ops */
926 static unsigned long getvar_i_int(var *v)
928 double d = getvar_i(v);
930 /* Casting doubles to longs is undefined for values outside
931 * of target type range. Try to widen it as much as possible */
933 return (unsigned long)d;
934 /* Why? Think about d == -4294967295.0 (assuming 32bit longs) */
935 return - (long) (unsigned long) (-d);
938 static var *copyvar(var *dest, const var *src)
942 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
943 debug_printf_eval("copyvar: number:%f string:'%s'\n", src->number, src->string);
944 dest->number = src->number;
946 dest->string = xstrdup(src->string);
948 handle_special(dest);
952 static var *incvar(var *v)
954 return setvar_i(v, getvar_i(v) + 1.0);
957 /* return true if v is number or numeric string */
958 static int is_numeric(var *v)
961 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
964 /* return 1 when value of v corresponds to true, 0 otherwise */
965 static int istrue(var *v)
968 return (v->number != 0);
969 return (v->string && v->string[0]);
972 /* temporary variables allocator. Last allocated should be first freed */
973 static var *nvalloc(int n)
981 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size)
987 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
988 g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
990 g_cb->pos = g_cb->nv;
992 /*g_cb->next = NULL; - xzalloc did it */
1000 while (v < g_cb->pos) {
1009 static void nvfree(var *v)
1013 if (v < g_cb->nv || v >= g_cb->pos)
1014 syntax_error(EMSG_INTERNAL_ERROR);
1016 for (p = v; p < g_cb->pos; p++) {
1017 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
1018 clear_array(iamarray(p));
1019 free(p->x.array->items);
1022 if (p->type & VF_WALK) {
1024 walker_list *w = p->x.walker;
1025 debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker);
1029 debug_printf_walker(" free(%p)\n", w);
1038 while (g_cb->prev && g_cb->pos == g_cb->nv) {
1043 /* ------- awk program text parsing ------- */
1045 /* Parse next token pointed by global pos, place results into global ttt.
1046 * If token isn't expected, give away. Return token class
1048 static uint32_t next_token(uint32_t expected)
1050 #define concat_inserted (G.next_token__concat_inserted)
1051 #define save_tclass (G.next_token__save_tclass)
1052 #define save_info (G.next_token__save_info)
1053 /* Initialized to TC_OPTERM: */
1054 #define ltclass (G.next_token__ltclass)
1064 } else if (concat_inserted) {
1065 concat_inserted = FALSE;
1066 t_tclass = save_tclass;
1073 g_lineno = t_lineno;
1075 while (*p != '\n' && *p != '\0')
1083 debug_printf_parse("%s: token found: TC_EOF\n", __func__);
1085 } else if (*p == '\"') {
1088 while (*p != '\"') {
1090 if (*p == '\0' || *p == '\n')
1091 syntax_error(EMSG_UNEXP_EOS);
1093 *s++ = nextchar(&pp);
1099 debug_printf_parse("%s: token found:'%s' TC_STRING\n", __func__, t_string);
1101 } else if ((expected & TC_REGEXP) && *p == '/') {
1105 if (*p == '\0' || *p == '\n')
1106 syntax_error(EMSG_UNEXP_EOS);
1110 s[-1] = bb_process_escape_sequence((const char **)&pp);
1122 debug_printf_parse("%s: token found:'%s' TC_REGEXP\n", __func__, t_string);
1124 } else if (*p == '.' || isdigit(*p)) {
1127 t_double = my_strtod(&pp);
1130 syntax_error(EMSG_UNEXP_TOKEN);
1132 debug_printf_parse("%s: token found:%f TC_NUMBER\n", __func__, t_double);
1135 /* search for something known */
1140 int l = (unsigned char) *tl++;
1141 if (l == (unsigned char) NTCC) {
1145 /* if token class is expected,
1147 * and it's not a longer word,
1149 if ((tc & (expected | TC_WORD | TC_NEWLINE))
1150 && strncmp(p, tl, l) == 0
1151 && !((tc & TC_WORD) && isalnum_(p[l]))
1153 /* then this is what we are looking for */
1155 debug_printf_parse("%s: token found:'%.*s' t_info:%x\n", __func__, l, p, t_info);
1162 /* not a known token */
1164 /* is it a name? (var/array/function) */
1166 syntax_error(EMSG_UNEXP_TOKEN); /* no */
1169 while (isalnum_(*++p)) {
1174 /* also consume whitespace between functionname and bracket */
1175 if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY))
1179 debug_printf_parse("%s: token found:'%s' TC_FUNCTION\n", __func__, t_string);
1184 debug_printf_parse("%s: token found:'%s' TC_ARRAY\n", __func__, t_string);
1186 debug_printf_parse("%s: token found:'%s' TC_VARIABLE\n", __func__, t_string);
1192 /* skipping newlines in some cases */
1193 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1196 /* insert concatenation operator when needed */
1197 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1198 concat_inserted = TRUE;
1202 t_info = OC_CONCAT | SS | P(35);
1209 /* Are we ready for this? */
1210 if (!(ltclass & expected))
1211 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1212 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1215 #undef concat_inserted
1221 static void rollback_token(void)
1226 static node *new_node(uint32_t info)
1230 n = xzalloc(sizeof(node));
1232 n->lineno = g_lineno;
1236 static void mk_re_node(const char *s, node *n, regex_t *re)
1238 n->info = OC_REGEXP;
1241 xregcomp(re, s, REG_EXTENDED);
1242 xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1245 static node *condition(void)
1247 next_token(TC_SEQSTART);
1248 return parse_expr(TC_SEQTERM);
1251 /* parse expression terminated by given argument, return ptr
1252 * to built subtree. Terminator is eaten by parse_expr */
1253 static node *parse_expr(uint32_t iexp)
1261 debug_printf_parse("%s(%x)\n", __func__, iexp);
1264 sn.r.n = glptr = NULL;
1265 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1267 while (!((tc = next_token(xtc)) & iexp)) {
1269 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1270 /* input redirection (<) attached to glptr node */
1271 debug_printf_parse("%s: input redir\n", __func__);
1272 cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1274 xtc = TC_OPERAND | TC_UOPPRE;
1277 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1278 debug_printf_parse("%s: TC_BINOP | TC_UOPPOST\n", __func__);
1279 /* for binary and postfix-unary operators, jump back over
1280 * previous operators with higher priority */
1282 while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1283 || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON))
1287 if ((t_info & OPCLSMASK) == OC_TERNARY)
1289 cn = vn->a.n->r.n = new_node(t_info);
1291 if (tc & TC_BINOP) {
1293 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1294 if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1296 next_token(TC_GETLINE);
1297 /* give maximum priority to this pipe */
1298 cn->info &= ~PRIMASK;
1299 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1303 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1308 debug_printf_parse("%s: other\n", __func__);
1309 /* for operands and prefix-unary operators, attach them
1312 cn = vn->r.n = new_node(t_info);
1314 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1315 if (tc & (TC_OPERAND | TC_REGEXP)) {
1316 debug_printf_parse("%s: TC_OPERAND | TC_REGEXP\n", __func__);
1317 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1318 /* one should be very careful with switch on tclass -
1319 * only simple tclasses should be used! */
1323 debug_printf_parse("%s: TC_VARIABLE | TC_ARRAY\n", __func__);
1325 v = hash_search(ahash, t_string);
1327 cn->info = OC_FNARG;
1328 cn->l.aidx = v->x.aidx;
1330 cn->l.v = newvar(t_string);
1332 if (tc & TC_ARRAY) {
1334 cn->r.n = parse_expr(TC_ARRTERM);
1340 debug_printf_parse("%s: TC_NUMBER | TC_STRING\n", __func__);
1342 v = cn->l.v = xzalloc(sizeof(var));
1344 setvar_i(v, t_double);
1346 setvar_s(v, t_string);
1350 debug_printf_parse("%s: TC_REGEXP\n", __func__);
1351 mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1355 debug_printf_parse("%s: TC_FUNCTION\n", __func__);
1357 cn->r.f = newfunc(t_string);
1358 cn->l.n = condition();
1362 debug_printf_parse("%s: TC_SEQSTART\n", __func__);
1363 cn = vn->r.n = parse_expr(TC_SEQTERM);
1365 syntax_error("Empty sequence");
1370 debug_printf_parse("%s: TC_GETLINE\n", __func__);
1372 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1376 debug_printf_parse("%s: TC_BUILTIN\n", __func__);
1377 cn->l.n = condition();
1384 debug_printf_parse("%s() returns %p\n", __func__, sn.r.n);
1388 /* add node to chain. Return ptr to alloc'd node */
1389 static node *chain_node(uint32_t info)
1394 seq->first = seq->last = new_node(0);
1396 if (seq->programname != g_progname) {
1397 seq->programname = g_progname;
1398 n = chain_node(OC_NEWSOURCE);
1399 n->l.new_progname = xstrdup(g_progname);
1404 seq->last = n->a.n = new_node(OC_DONE);
1409 static void chain_expr(uint32_t info)
1413 n = chain_node(info);
1414 n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1415 if (t_tclass & TC_GRPTERM)
1419 static node *chain_loop(node *nn)
1421 node *n, *n2, *save_brk, *save_cont;
1423 save_brk = break_ptr;
1424 save_cont = continue_ptr;
1426 n = chain_node(OC_BR | Vx);
1427 continue_ptr = new_node(OC_EXEC);
1428 break_ptr = new_node(OC_EXEC);
1430 n2 = chain_node(OC_EXEC | Vx);
1433 continue_ptr->a.n = n2;
1434 break_ptr->a.n = n->r.n = seq->last;
1436 continue_ptr = save_cont;
1437 break_ptr = save_brk;
1442 /* parse group and attach it to chain */
1443 static void chain_group(void)
1449 c = next_token(TC_GRPSEQ);
1450 } while (c & TC_NEWLINE);
1452 if (c & TC_GRPSTART) {
1453 debug_printf_parse("%s: TC_GRPSTART\n", __func__);
1454 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1455 debug_printf_parse("%s: !TC_GRPTERM\n", __func__);
1456 if (t_tclass & TC_NEWLINE)
1461 debug_printf_parse("%s: TC_GRPTERM\n", __func__);
1462 } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1463 debug_printf_parse("%s: TC_OPSEQ | TC_OPTERM\n", __func__);
1465 chain_expr(OC_EXEC | Vx);
1468 debug_printf_parse("%s: TC_STATEMNT(?)\n", __func__);
1469 switch (t_info & OPCLSMASK) {
1471 debug_printf_parse("%s: ST_IF\n", __func__);
1472 n = chain_node(OC_BR | Vx);
1473 n->l.n = condition();
1475 n2 = chain_node(OC_EXEC);
1477 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1479 n2->a.n = seq->last;
1486 debug_printf_parse("%s: ST_WHILE\n", __func__);
1488 n = chain_loop(NULL);
1493 debug_printf_parse("%s: ST_DO\n", __func__);
1494 n2 = chain_node(OC_EXEC);
1495 n = chain_loop(NULL);
1497 next_token(TC_WHILE);
1498 n->l.n = condition();
1502 debug_printf_parse("%s: ST_FOR\n", __func__);
1503 next_token(TC_SEQSTART);
1504 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1505 if (t_tclass & TC_SEQTERM) { /* for-in */
1506 if ((n2->info & OPCLSMASK) != OC_IN)
1507 syntax_error(EMSG_UNEXP_TOKEN);
1508 n = chain_node(OC_WALKINIT | VV);
1511 n = chain_loop(NULL);
1512 n->info = OC_WALKNEXT | Vx;
1514 } else { /* for (;;) */
1515 n = chain_node(OC_EXEC | Vx);
1517 n2 = parse_expr(TC_SEMICOL);
1518 n3 = parse_expr(TC_SEQTERM);
1528 debug_printf_parse("%s: OC_PRINT[F]\n", __func__);
1529 n = chain_node(t_info);
1530 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1531 if (t_tclass & TC_OUTRDR) {
1533 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1535 if (t_tclass & TC_GRPTERM)
1540 debug_printf_parse("%s: OC_BREAK\n", __func__);
1541 n = chain_node(OC_EXEC);
1546 debug_printf_parse("%s: OC_CONTINUE\n", __func__);
1547 n = chain_node(OC_EXEC);
1548 n->a.n = continue_ptr;
1551 /* delete, next, nextfile, return, exit */
1553 debug_printf_parse("%s: default\n", __func__);
1559 static void parse_program(char *p)
1568 while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1569 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1571 if (tclass & TC_OPTERM) {
1572 debug_printf_parse("%s: TC_OPTERM\n", __func__);
1577 if (tclass & TC_BEGIN) {
1578 debug_printf_parse("%s: TC_BEGIN\n", __func__);
1582 } else if (tclass & TC_END) {
1583 debug_printf_parse("%s: TC_END\n", __func__);
1587 } else if (tclass & TC_FUNCDECL) {
1588 debug_printf_parse("%s: TC_FUNCDECL\n", __func__);
1589 next_token(TC_FUNCTION);
1591 f = newfunc(t_string);
1592 f->body.first = NULL;
1594 while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1595 v = findvar(ahash, t_string);
1596 v->x.aidx = f->nargs++;
1598 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1605 } else if (tclass & TC_OPSEQ) {
1606 debug_printf_parse("%s: TC_OPSEQ\n", __func__);
1608 cn = chain_node(OC_TEST);
1609 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1610 if (t_tclass & TC_GRPSTART) {
1611 debug_printf_parse("%s: TC_GRPSTART\n", __func__);
1615 debug_printf_parse("%s: !TC_GRPSTART\n", __func__);
1616 chain_node(OC_PRINT);
1618 cn->r.n = mainseq.last;
1620 } else /* if (tclass & TC_GRPSTART) */ {
1621 debug_printf_parse("%s: TC_GRPSTART(?)\n", __func__);
1626 debug_printf_parse("%s: TC_EOF\n", __func__);
1630 /* -------- program execution part -------- */
1632 static node *mk_splitter(const char *s, tsplitter *spl)
1640 if ((n->info & OPCLSMASK) == OC_REGEXP) {
1642 regfree(ire); // TODO: nuke ire, use re+1?
1644 if (s[0] && s[1]) { /* strlen(s) > 1 */
1645 mk_re_node(s, n, re);
1647 n->info = (uint32_t) s[0];
1653 /* use node as a regular expression. Supplied with node ptr and regex_t
1654 * storage space. Return ptr to regex (if result points to preg, it should
1655 * be later regfree'd manually
1657 static regex_t *as_regex(node *op, regex_t *preg)
1663 if ((op->info & OPCLSMASK) == OC_REGEXP) {
1664 return icase ? op->r.ire : op->l.re;
1667 s = getvar_s(evaluate(op, v));
1669 cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
1670 /* Testcase where REG_EXTENDED fails (unpaired '{'):
1671 * echo Hi | awk 'gsub("@(samp|code|file)\{","");'
1672 * gawk 3.1.5 eats this. We revert to ~REG_EXTENDED
1673 * (maybe gsub is not supposed to use REG_EXTENDED?).
1675 if (regcomp(preg, s, cflags)) {
1676 cflags &= ~REG_EXTENDED;
1677 xregcomp(preg, s, cflags);
1683 /* gradually increasing buffer.
1684 * note that we reallocate even if n == old_size,
1685 * and thus there is at least one extra allocated byte.
1687 static char* qrealloc(char *b, int n, int *size)
1689 if (!b || n >= *size) {
1690 *size = n + (n>>1) + 80;
1691 b = xrealloc(b, *size);
1696 /* resize field storage space */
1697 static void fsrealloc(int size)
1701 if (size >= maxfields) {
1703 maxfields = size + 16;
1704 Fields = xrealloc(Fields, maxfields * sizeof(Fields[0]));
1705 for (; i < maxfields; i++) {
1706 Fields[i].type = VF_SPECIAL;
1707 Fields[i].string = NULL;
1710 /* if size < nfields, clear extra field variables */
1711 for (i = size; i < nfields; i++) {
1717 static int awk_split(const char *s, node *spl, char **slist)
1722 regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1724 /* in worst case, each char would be a separate field */
1725 *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1728 c[0] = c[1] = (char)spl->info;
1730 if (*getvar_s(intvar[RS]) == '\0')
1734 if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */
1736 return n; /* "": zero fields */
1737 n++; /* at least one field will be there */
1739 l = strcspn(s, c+2); /* len till next NUL or \n */
1740 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1741 && pmatch[0].rm_so <= l
1743 l = pmatch[0].rm_so;
1744 if (pmatch[0].rm_eo == 0) {
1748 n++; /* we saw yet another delimiter */
1750 pmatch[0].rm_eo = l;
1755 /* make sure we remove *all* of the separator chars */
1758 } while (++l < pmatch[0].rm_eo);
1760 s += pmatch[0].rm_eo;
1764 if (c[0] == '\0') { /* null split */
1772 if (c[0] != ' ') { /* single-character split */
1774 c[0] = toupper(c[0]);
1775 c[1] = tolower(c[1]);
1779 while ((s1 = strpbrk(s1, c)) != NULL) {
1787 s = skip_whitespace(s);
1791 while (*s && !isspace(*s))
1798 static void split_f0(void)
1800 /* static char *fstrings; */
1801 #define fstrings (G.split_f0__fstrings)
1812 n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1815 for (i = 0; i < n; i++) {
1816 Fields[i].string = nextword(&s);
1817 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1820 /* set NF manually to avoid side effects */
1822 intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1823 intvar[NF]->number = nfields;
1827 /* perform additional actions when some internal variables changed */
1828 static void handle_special(var *v)
1832 const char *sep, *s;
1833 int sl, l, len, i, bsize;
1835 if (!(v->type & VF_SPECIAL))
1838 if (v == intvar[NF]) {
1839 n = (int)getvar_i(v);
1842 /* recalculate $0 */
1843 sep = getvar_s(intvar[OFS]);
1847 for (i = 0; i < n; i++) {
1848 s = getvar_s(&Fields[i]);
1851 memcpy(b+len, sep, sl);
1854 b = qrealloc(b, len+l+sl, &bsize);
1855 memcpy(b+len, s, l);
1860 setvar_p(intvar[F0], b);
1863 } else if (v == intvar[F0]) {
1864 is_f0_split = FALSE;
1866 } else if (v == intvar[FS]) {
1868 * The POSIX-2008 standard says that changing FS should have no effect on the
1869 * current input line, but only on the next one. The language is:
1871 * > Before the first reference to a field in the record is evaluated, the record
1872 * > shall be split into fields, according to the rules in Regular Expressions,
1873 * > using the value of FS that was current at the time the record was read.
1875 * So, split up current line before assignment to FS:
1879 mk_splitter(getvar_s(v), &fsplitter);
1881 } else if (v == intvar[RS]) {
1882 mk_splitter(getvar_s(v), &rsplitter);
1884 } else if (v == intvar[IGNORECASE]) {
1888 n = getvar_i(intvar[NF]);
1889 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1890 /* right here v is invalid. Just to note... */
1894 /* step through func/builtin/etc arguments */
1895 static node *nextarg(node **pn)
1900 if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1909 static void hashwalk_init(var *v, xhash *array)
1914 walker_list *prev_walker;
1916 if (v->type & VF_WALK) {
1917 prev_walker = v->x.walker;
1922 debug_printf_walker("hashwalk_init: prev_walker:%p\n", prev_walker);
1924 w = v->x.walker = xzalloc(sizeof(*w) + array->glen + 1); /* why + 1? */
1925 debug_printf_walker(" walker@%p=%p\n", &v->x.walker, w);
1926 w->cur = w->end = w->wbuf;
1927 w->prev = prev_walker;
1928 for (i = 0; i < array->csize; i++) {
1929 hi = array->items[i];
1931 strcpy(w->end, hi->name);
1938 static int hashwalk_next(var *v)
1940 walker_list *w = v->x.walker;
1942 if (w->cur >= w->end) {
1943 walker_list *prev_walker = w->prev;
1945 debug_printf_walker("end of iteration, free(walker@%p:%p), prev_walker:%p\n", &v->x.walker, w, prev_walker);
1947 v->x.walker = prev_walker;
1951 setvar_s(v, nextword(&w->cur));
1955 /* evaluate node, return 1 when result is true, 0 otherwise */
1956 static int ptest(node *pattern)
1958 /* ptest__v is "static": to save stack space? */
1959 return istrue(evaluate(pattern, &G.ptest__v));
1962 /* read next record from stream rsm into a variable v */
1963 static int awk_getline(rstream *rsm, var *v)
1966 regmatch_t pmatch[2];
1967 int size, a, p, pp = 0;
1968 int fd, so, eo, r, rp;
1971 debug_printf_eval("entered %s()\n", __func__);
1973 /* we're using our own buffer since we need access to accumulating
1976 fd = fileno(rsm->F);
1981 c = (char) rsplitter.n.info;
1985 m = qrealloc(m, 256, &size);
1992 if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1993 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1994 b, 1, pmatch, 0) == 0) {
1995 so = pmatch[0].rm_so;
1996 eo = pmatch[0].rm_eo;
2000 } else if (c != '\0') {
2001 s = strchr(b+pp, c);
2003 s = memchr(b+pp, '\0', p - pp);
2010 while (b[rp] == '\n')
2012 s = strstr(b+rp, "\n\n");
2015 while (b[eo] == '\n')
2024 memmove(m, m+a, p+1);
2029 m = qrealloc(m, a+p+128, &size);
2032 p += safe_read(fd, b+p, size-p-1);
2036 setvar_i(intvar[ERRNO], errno);
2045 c = b[so]; b[so] = '\0';
2049 c = b[eo]; b[eo] = '\0';
2050 setvar_s(intvar[RT], b+so);
2059 debug_printf_eval("returning from %s(): %d\n", __func__, r);
2064 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
2068 const char *s = format;
2070 if (int_as_int && n == (long long)n) {
2071 r = snprintf(b, size, "%lld", (long long)n);
2073 do { c = *s; } while (c && *++s);
2074 if (strchr("diouxX", c)) {
2075 r = snprintf(b, size, format, (int)n);
2076 } else if (strchr("eEfgG", c)) {
2077 r = snprintf(b, size, format, n);
2079 syntax_error(EMSG_INV_FMT);
2085 /* formatted output into an allocated buffer, return ptr to buffer */
2086 static char *awk_printf(node *n)
2091 int i, j, incr, bsize;
2096 fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
2101 while (*f && (*f != '%' || *++f == '%'))
2103 while (*f && !isalpha(*f)) {
2105 syntax_error("%*x formats are not supported");
2109 incr = (f - s) + MAXVARFMT;
2110 b = qrealloc(b, incr + i, &bsize);
2116 arg = evaluate(nextarg(&n), v);
2119 if (c == 'c' || !c) {
2120 i += sprintf(b+i, s, is_numeric(arg) ?
2121 (char)getvar_i(arg) : *getvar_s(arg));
2122 } else if (c == 's') {
2124 b = qrealloc(b, incr+i+strlen(s1), &bsize);
2125 i += sprintf(b+i, s, s1);
2127 i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
2131 /* if there was an error while sprintf, return value is negative */
2138 b = xrealloc(b, i + 1);
2143 /* Common substitution routine.
2144 * Replace (nm)'th substring of (src) that matches (rn) with (repl),
2145 * store result into (dest), return number of substitutions.
2146 * If nm = 0, replace all matches.
2147 * If src or dst is NULL, use $0.
2148 * If subexp != 0, enable subexpression matching (\1-\9).
2150 static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int subexp)
2154 int match_no, residx, replen, resbufsize;
2156 regmatch_t pmatch[10];
2157 regex_t sreg, *regex;
2163 regex = as_regex(rn, &sreg);
2164 sp = getvar_s(src ? src : intvar[F0]);
2165 replen = strlen(repl);
2166 while (regexec(regex, sp, 10, pmatch, regexec_flags) == 0) {
2167 int so = pmatch[0].rm_so;
2168 int eo = pmatch[0].rm_eo;
2170 //bb_error_msg("match %u: [%u,%u] '%s'%p", match_no+1, so, eo, sp,sp);
2171 resbuf = qrealloc(resbuf, residx + eo + replen, &resbufsize);
2172 memcpy(resbuf + residx, sp, eo);
2174 if (++match_no >= nm) {
2179 residx -= (eo - so);
2181 for (s = repl; *s; s++) {
2182 char c = resbuf[residx++] = *s;
2187 if (c == '&' || (subexp && c >= '0' && c <= '9')) {
2189 residx -= ((nbs + 3) >> 1);
2196 resbuf[residx++] = c;
2198 int n = pmatch[j].rm_eo - pmatch[j].rm_so;
2199 resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize);
2200 memcpy(resbuf + residx, sp + pmatch[j].rm_so, n);
2208 regexec_flags = REG_NOTBOL;
2213 /* Empty match (e.g. "b*" will match anywhere).
2214 * Advance by one char. */
2216 //gsub(/\<b*/,"") on "abc" will reach this point, advance to "bc"
2217 //... and will erroneously match "b" even though it is NOT at the word start.
2218 //we need REG_NOTBOW but it does not exist...
2219 //TODO: if EXTRA_COMPAT=y, use GNU matching and re_search,
2220 //it should be able to do it correctly.
2221 /* Subtle: this is safe only because
2222 * qrealloc allocated at least one extra byte */
2223 resbuf[residx] = *sp;
2231 resbuf = qrealloc(resbuf, residx + strlen(sp), &resbufsize);
2232 strcpy(resbuf + residx, sp);
2234 //bb_error_msg("end sp:'%s'%p", sp,sp);
2235 setvar_p(dest ? dest : intvar[F0], resbuf);
2241 static NOINLINE int do_mktime(const char *ds)
2246 /*memset(&then, 0, sizeof(then)); - not needed */
2247 then.tm_isdst = -1; /* default is unknown */
2249 /* manpage of mktime says these fields are ints,
2250 * so we can sscanf stuff directly into them */
2251 count = sscanf(ds, "%u %u %u %u %u %u %d",
2252 &then.tm_year, &then.tm_mon, &then.tm_mday,
2253 &then.tm_hour, &then.tm_min, &then.tm_sec,
2257 || (unsigned)then.tm_mon < 1
2258 || (unsigned)then.tm_year < 1900
2264 then.tm_year -= 1900;
2266 return mktime(&then);
2269 static NOINLINE var *exec_builtin(node *op, var *res)
2271 #define tspl (G.exec_builtin__tspl)
2277 regmatch_t pmatch[2];
2286 isr = info = op->info;
2289 av[2] = av[3] = NULL;
2290 for (i = 0; i < 4 && op; i++) {
2291 an[i] = nextarg(&op);
2292 if (isr & 0x09000000)
2293 av[i] = evaluate(an[i], &tv[i]);
2294 if (isr & 0x08000000)
2295 as[i] = getvar_s(av[i]);
2300 if ((uint32_t)nargs < (info >> 30))
2301 syntax_error(EMSG_TOO_FEW_ARGS);
2307 if (ENABLE_FEATURE_AWK_LIBM)
2308 setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1])));
2310 syntax_error(EMSG_NO_MATH);
2317 spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2318 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2323 n = awk_split(as[0], spl, &s);
2325 clear_array(iamarray(av[1]));
2326 for (i = 1; i <= n; i++)
2327 setari_u(av[1], i, nextword(&s));
2337 i = getvar_i(av[1]) - 1;
2342 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2345 s = xstrndup(as[0]+i, n);
2350 /* Bitwise ops must assume that operands are unsigned. GNU Awk 3.1.5:
2351 * awk '{ print or(-1,1) }' gives "4.29497e+09", not "-2.xxxe+09" */
2353 setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1]));
2357 setvar_i(res, ~getvar_i_int(av[0]));
2361 setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1]));
2365 setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1]));
2369 setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1]));
2373 setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1]));
2379 s1 = s = xstrdup(as[0]);
2381 //*s1 = (info == B_up) ? toupper(*s1) : tolower(*s1);
2382 if ((unsigned char)((*s1 | 0x20) - 'a') <= ('z' - 'a'))
2383 *s1 = (info == B_up) ? (*s1 & 0xdf) : (*s1 | 0x20);
2393 l = strlen(as[0]) - ll;
2394 if (ll > 0 && l >= 0) {
2396 char *s = strstr(as[0], as[1]);
2398 n = (s - as[0]) + 1;
2400 /* this piece of code is terribly slow and
2401 * really should be rewritten
2403 for (i = 0; i <= l; i++) {
2404 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2416 tt = getvar_i(av[1]);
2419 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2420 i = strftime(g_buf, MAXVARFMT,
2421 ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2424 setvar_s(res, g_buf);
2428 setvar_i(res, do_mktime(as[0]));
2432 re = as_regex(an[1], &sreg);
2433 n = regexec(re, as[0], 1, pmatch, 0);
2438 pmatch[0].rm_so = 0;
2439 pmatch[0].rm_eo = -1;
2441 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2442 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2443 setvar_i(res, pmatch[0].rm_so);
2449 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2453 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2457 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2467 * Evaluate node - the heart of the program. Supplied with subtree
2468 * and place where to store result. returns ptr to result.
2470 #define XC(n) ((n) >> 8)
2472 static var *evaluate(node *op, var *res)
2474 /* This procedure is recursive so we should count every byte */
2475 #define fnargs (G.evaluate__fnargs)
2476 /* seed is initialized to 1 */
2477 #define seed (G.evaluate__seed)
2478 #define sreg (G.evaluate__sreg)
2483 return setvar_s(res, NULL);
2485 debug_printf_eval("entered %s()\n", __func__);
2493 } L = L; /* for compiler */
2504 opn = (opinfo & OPNMASK);
2505 g_lineno = op->lineno;
2507 debug_printf_eval("opinfo:%08x opn:%08x\n", opinfo, opn);
2509 /* execute inevitable things */
2510 if (opinfo & OF_RES1)
2511 L.v = evaluate(op1, v1);
2512 if (opinfo & OF_RES2)
2513 R.v = evaluate(op->r.n, v1+1);
2514 if (opinfo & OF_STR1) {
2515 L.s = getvar_s(L.v);
2516 debug_printf_eval("L.s:'%s'\n", L.s);
2518 if (opinfo & OF_STR2) {
2519 R.s = getvar_s(R.v);
2520 debug_printf_eval("R.s:'%s'\n", R.s);
2522 if (opinfo & OF_NUM1) {
2523 L_d = getvar_i(L.v);
2524 debug_printf_eval("L_d:%f\n", L_d);
2527 debug_printf_eval("switch(0x%x)\n", XC(opinfo & OPCLSMASK));
2528 switch (XC(opinfo & OPCLSMASK)) {
2530 /* -- iterative node type -- */
2534 if ((op1->info & OPCLSMASK) == OC_COMMA) {
2535 /* it's range pattern */
2536 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2537 op->info |= OF_CHECKED;
2538 if (ptest(op1->r.n))
2539 op->info &= ~OF_CHECKED;
2545 op = ptest(op1) ? op->a.n : op->r.n;
2549 /* just evaluate an expression, also used as unconditional jump */
2553 /* branch, used in if-else and various loops */
2555 op = istrue(L.v) ? op->a.n : op->r.n;
2558 /* initialize for-in loop */
2559 case XC( OC_WALKINIT ):
2560 hashwalk_init(L.v, iamarray(R.v));
2563 /* get next array item */
2564 case XC( OC_WALKNEXT ):
2565 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2568 case XC( OC_PRINT ):
2569 case XC( OC_PRINTF ): {
2573 rstream *rsm = newfile(R.s);
2576 rsm->F = popen(R.s, "w");
2578 bb_perror_msg_and_die("popen");
2581 rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2587 if ((opinfo & OPCLSMASK) == OC_PRINT) {
2589 fputs(getvar_s(intvar[F0]), F);
2592 var *v = evaluate(nextarg(&op1), v1);
2593 if (v->type & VF_NUMBER) {
2594 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2598 fputs(getvar_s(v), F);
2602 fputs(getvar_s(intvar[OFS]), F);
2605 fputs(getvar_s(intvar[ORS]), F);
2607 } else { /* OC_PRINTF */
2608 char *s = awk_printf(op1);
2616 case XC( OC_DELETE ): {
2617 uint32_t info = op1->info & OPCLSMASK;
2620 if (info == OC_VAR) {
2622 } else if (info == OC_FNARG) {
2623 v = &fnargs[op1->l.aidx];
2625 syntax_error(EMSG_NOT_ARRAY);
2631 s = getvar_s(evaluate(op1->r.n, v1));
2632 hash_remove(iamarray(v), s);
2634 clear_array(iamarray(v));
2639 case XC( OC_NEWSOURCE ):
2640 g_progname = op->l.new_progname;
2643 case XC( OC_RETURN ):
2647 case XC( OC_NEXTFILE ):
2658 /* -- recursive node type -- */
2662 if (L.v == intvar[NF])
2666 case XC( OC_FNARG ):
2667 L.v = &fnargs[op->l.aidx];
2669 res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2673 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2676 case XC( OC_REGEXP ):
2678 L.s = getvar_s(intvar[F0]);
2681 case XC( OC_MATCH ):
2685 regex_t *re = as_regex(op1, &sreg);
2686 int i = regexec(re, L.s, 0, NULL, 0);
2689 setvar_i(res, (i == 0) ^ (opn == '!'));
2694 debug_printf_eval("MOVE\n");
2695 /* if source is a temporary string, jusk relink it to dest */
2696 //Disabled: if R.v is numeric but happens to have cached R.v->string,
2697 //then L.v ends up being a string, which is wrong
2698 // if (R.v == v1+1 && R.v->string) {
2699 // res = setvar_p(L.v, R.v->string);
2700 // R.v->string = NULL;
2702 res = copyvar(L.v, R.v);
2706 case XC( OC_TERNARY ):
2707 if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2708 syntax_error(EMSG_POSSIBLE_ERROR);
2709 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2712 case XC( OC_FUNC ): {
2714 const char *sv_progname;
2716 /* The body might be empty, still has to eval the args */
2717 if (!op->r.n->info && !op->r.f->body.first)
2718 syntax_error(EMSG_UNDEF_FUNC);
2720 vbeg = v = nvalloc(op->r.f->nargs + 1);
2722 var *arg = evaluate(nextarg(&op1), v1);
2724 v->type |= VF_CHILD;
2726 if (++v - vbeg >= op->r.f->nargs)
2732 sv_progname = g_progname;
2734 res = evaluate(op->r.f->body.first, res);
2736 g_progname = sv_progname;
2743 case XC( OC_GETLINE ):
2744 case XC( OC_PGETLINE ): {
2751 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2752 rsm->F = popen(L.s, "r");
2753 rsm->is_pipe = TRUE;
2755 rsm->F = fopen_for_read(L.s); /* not xfopen! */
2760 iF = next_input_file();
2764 if (!rsm || !rsm->F) {
2765 setvar_i(intvar[ERRNO], errno);
2773 i = awk_getline(rsm, R.v);
2774 if (i > 0 && !op1) {
2775 incvar(intvar[FNR]);
2782 /* simple builtins */
2783 case XC( OC_FBLTIN ): {
2784 double R_d = R_d; /* for compiler */
2788 R_d = (long long)L_d;
2792 R_d = (double)rand() / (double)RAND_MAX;
2796 if (ENABLE_FEATURE_AWK_LIBM) {
2802 if (ENABLE_FEATURE_AWK_LIBM) {
2808 if (ENABLE_FEATURE_AWK_LIBM) {
2814 if (ENABLE_FEATURE_AWK_LIBM) {
2820 if (ENABLE_FEATURE_AWK_LIBM) {
2825 syntax_error(EMSG_NO_MATH);
2830 seed = op1 ? (unsigned)L_d : (unsigned)time(NULL);
2839 debug_printf_eval("length: L.s:'%s'\n", L.s);
2841 L.s = getvar_s(intvar[F0]);
2842 debug_printf_eval("length: L.s='%s'\n", L.s);
2844 else if (L.v->type & VF_ARRAY) {
2845 R_d = L.v->x.array->nel;
2846 debug_printf_eval("length: array_len:%d\n", L.v->x.array->nel);
2854 R_d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2855 ? (system(L.s) >> 8) : 0;
2861 } else if (L.s && *L.s) {
2862 rstream *rsm = newfile(L.s);
2872 rsm = (rstream *)hash_search(fdhash, L.s);
2873 debug_printf_eval("OC_FBLTIN F_cl rsm:%p\n", rsm);
2875 debug_printf_eval("OC_FBLTIN F_cl "
2876 "rsm->is_pipe:%d, ->F:%p\n",
2877 rsm->is_pipe, rsm->F);
2878 /* Can be NULL if open failed. Example:
2879 * getline line <"doesnt_exist";
2880 * close("doesnt_exist"); <--- here rsm->F is NULL
2883 err = rsm->is_pipe ? pclose(rsm->F) : fclose(rsm->F);
2885 hash_remove(fdhash, L.s);
2888 setvar_i(intvar[ERRNO], errno);
2897 case XC( OC_BUILTIN ):
2898 res = exec_builtin(op, res);
2901 case XC( OC_SPRINTF ):
2902 setvar_p(res, awk_printf(op1));
2905 case XC( OC_UNARY ): {
2908 Ld = R_d = getvar_i(R.v);
2935 case XC( OC_FIELD ): {
2936 int i = (int)getvar_i(R.v);
2943 res = &Fields[i - 1];
2948 /* concatenation (" ") and index joining (",") */
2949 case XC( OC_CONCAT ):
2950 case XC( OC_COMMA ): {
2951 const char *sep = "";
2952 if ((opinfo & OPCLSMASK) == OC_COMMA)
2953 sep = getvar_s(intvar[SUBSEP]);
2954 setvar_p(res, xasprintf("%s%s%s", L.s, sep, R.s));
2959 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2963 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2966 case XC( OC_BINARY ):
2967 case XC( OC_REPLACE ): {
2968 double R_d = getvar_i(R.v);
2969 debug_printf_eval("BINARY/REPLACE: R_d:%f opn:%c\n", R_d, opn);
2982 syntax_error(EMSG_DIV_BY_ZERO);
2986 if (ENABLE_FEATURE_AWK_LIBM)
2987 L_d = pow(L_d, R_d);
2989 syntax_error(EMSG_NO_MATH);
2993 syntax_error(EMSG_DIV_BY_ZERO);
2994 L_d -= (long long)(L_d / R_d) * R_d;
2997 debug_printf_eval("BINARY/REPLACE result:%f\n", L_d);
2998 res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : L.v, L_d);
3002 case XC( OC_COMPARE ): {
3003 int i = i; /* for compiler */
3006 if (is_numeric(L.v) && is_numeric(R.v)) {
3007 Ld = getvar_i(L.v) - getvar_i(R.v);
3009 const char *l = getvar_s(L.v);
3010 const char *r = getvar_s(R.v);
3011 Ld = icase ? strcasecmp(l, r) : strcmp(l, r);
3013 switch (opn & 0xfe) {
3024 setvar_i(res, (i == 0) ^ (opn & 1));
3029 syntax_error(EMSG_POSSIBLE_ERROR);
3031 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
3033 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
3040 debug_printf_eval("returning from %s(): %p\n", __func__, res);
3048 /* -------- main & co. -------- */
3050 static int awk_exit(int r)
3061 evaluate(endseq.first, &tv);
3064 /* waiting for children */
3065 for (i = 0; i < fdhash->csize; i++) {
3066 hi = fdhash->items[i];
3068 if (hi->data.rs.F && hi->data.rs.is_pipe)
3069 pclose(hi->data.rs.F);
3077 /* if expr looks like "var=value", perform assignment and return 1,
3078 * otherwise return 0 */
3079 static int is_assignment(const char *expr)
3083 if (!isalnum_(*expr) || (val = strchr(expr, '=')) == NULL) {
3087 exprc = xstrdup(expr);
3088 val = exprc + (val - expr);
3091 unescape_string_in_place(val);
3092 setvar_u(newvar(exprc), val);
3097 /* switch to next input file */
3098 static rstream *next_input_file(void)
3100 #define rsm (G.next_input_file__rsm)
3101 #define files_happen (G.next_input_file__files_happen)
3104 const char *fname, *ind;
3109 rsm.pos = rsm.adv = 0;
3112 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
3119 ind = getvar_s(incvar(intvar[ARGIND]));
3120 fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
3121 if (fname && *fname && !is_assignment(fname)) {
3122 F = xfopen_stdin(fname);
3127 files_happen = TRUE;
3128 setvar_s(intvar[FILENAME], fname);
3135 int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
3136 int awk_main(int argc, char **argv)
3140 llist_t *list_v = NULL;
3141 llist_t *list_f = NULL;
3142 #if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
3143 llist_t *list_e = NULL;
3149 char *vnames = (char *)vNames; /* cheat */
3150 char *vvalues = (char *)vValues;
3154 /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
3155 * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
3156 if (ENABLE_LOCALE_SUPPORT)
3157 setlocale(LC_NUMERIC, "C");
3161 /* allocate global buffer */
3162 g_buf = xmalloc(MAXVARFMT + 1);
3164 vhash = hash_init();
3165 ahash = hash_init();
3166 fdhash = hash_init();
3167 fnhash = hash_init();
3169 /* initialize variables */
3170 for (i = 0; *vnames; i++) {
3171 intvar[i] = v = newvar(nextword(&vnames));
3172 if (*vvalues != '\377')
3173 setvar_s(v, nextword(&vvalues));
3177 if (*vnames == '*') {
3178 v->type |= VF_SPECIAL;
3183 handle_special(intvar[FS]);
3184 handle_special(intvar[RS]);
3186 newfile("/dev/stdin")->F = stdin;
3187 newfile("/dev/stdout")->F = stdout;
3188 newfile("/dev/stderr")->F = stderr;
3190 /* Huh, people report that sometimes environ is NULL. Oh well. */
3191 if (environ) for (envp = environ; *envp; envp++) {
3192 /* environ is writable, thus we don't strdup it needlessly */
3194 char *s1 = strchr(s, '=');
3197 /* Both findvar and setvar_u take const char*
3198 * as 2nd arg -> environment is not trashed */
3199 setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
3203 opt_complementary = OPTCOMPLSTR_AWK;
3204 opt = getopt32(argv, OPTSTR_AWK, &opt_F, &list_v, &list_f, IF_FEATURE_AWK_GNU_EXTENSIONS(&list_e,) NULL);
3207 if (opt & OPT_F) { /* -F */
3208 unescape_string_in_place(opt_F);
3209 setvar_s(intvar[FS], opt_F);
3211 while (list_v) { /* -v */
3212 if (!is_assignment(llist_pop(&list_v)))
3215 while (list_f) { /* -f */
3219 g_progname = llist_pop(&list_f);
3220 from_file = xfopen_stdin(g_progname);
3221 /* one byte is reserved for some trick in next_token */
3222 for (i = j = 1; j > 0; i += j) {
3223 s = xrealloc(s, i + 4096);
3224 j = fread(s + i, 1, 4094, from_file);
3228 parse_program(s + 1);
3231 g_progname = "cmd. line";
3232 #if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
3233 while (list_e) { /* -e */
3234 parse_program(llist_pop(&list_e));
3237 if (!(opt & (OPT_f | OPT_e))) {
3240 parse_program(*argv++);
3243 if (opt & OPT_W) // -W
3244 bb_error_msg("warning: option -W is ignored");
3246 /* fill in ARGV array */
3247 setvar_i(intvar[ARGC], argc);
3248 setari_u(intvar[ARGV], 0, "awk");
3251 setari_u(intvar[ARGV], ++i, *argv++);
3253 evaluate(beginseq.first, &tv);
3254 if (!mainseq.first && !endseq.first)
3255 awk_exit(EXIT_SUCCESS);
3257 /* input file could already be opened in BEGIN block */
3259 iF = next_input_file();
3261 /* passing through input files */
3264 setvar_i(intvar[FNR], 0);
3266 while ((i = awk_getline(iF, intvar[F0])) > 0) {
3269 incvar(intvar[FNR]);
3270 evaluate(mainseq.first, &tv);
3277 syntax_error(strerror(errno));
3279 iF = next_input_file();
3282 awk_exit(EXIT_SUCCESS);