1 /* vi: set sw=4 ts=4: */
3 * awk implementation for busybox
5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
7 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
14 //config: Awk is used as a pattern scanning and processing language. This is
15 //config: the BusyBox implementation of that programming language.
17 //config:config FEATURE_AWK_LIBM
18 //config: bool "Enable math functions (requires libm)"
20 //config: depends on AWK
22 //config: Enable math functions of the Awk programming language.
23 //config: NOTE: This will require libm to be present for linking.
25 //config:config FEATURE_AWK_GNU_EXTENSIONS
26 //config: bool "Enable a few GNU extensions"
28 //config: depends on AWK
30 //config: Enable a few features from gawk:
31 //config: * command line option -e AWK_PROGRAM
32 //config: * simultaneous use of -f and -e on the command line.
33 //config: This enables the use of awk library files.
34 //config: Ex: awk -f mylib.awk -e '{print myfunction($1);}' ...
36 //applet:IF_AWK(APPLET_NOEXEC(awk, awk, BB_DIR_USR_BIN, BB_SUID_DROP, awk))
38 //kbuild:lib-$(CONFIG_AWK) += awk.o
40 //usage:#define awk_trivial_usage
41 //usage: "[OPTIONS] [AWK_PROGRAM] [FILE]..."
42 //usage:#define awk_full_usage "\n\n"
43 //usage: " -v VAR=VAL Set variable"
44 //usage: "\n -F SEP Use SEP as field separator"
45 //usage: "\n -f FILE Read program from FILE"
46 //usage: IF_FEATURE_AWK_GNU_EXTENSIONS(
47 //usage: "\n -e AWK_PROGRAM"
54 /* This is a NOEXEC applet. Be very careful! */
57 /* If you comment out one of these below, it will be #defined later
58 * to perform debug printfs to stderr: */
59 #define debug_printf_walker(...) do {} while (0)
60 #define debug_printf_eval(...) do {} while (0)
61 #define debug_printf_parse(...) do {} while (0)
63 #ifndef debug_printf_walker
64 # define debug_printf_walker(...) (fprintf(stderr, __VA_ARGS__))
66 #ifndef debug_printf_eval
67 # define debug_printf_eval(...) (fprintf(stderr, __VA_ARGS__))
69 #ifndef debug_printf_parse
70 # define debug_printf_parse(...) (fprintf(stderr, __VA_ARGS__))
76 IF_FEATURE_AWK_GNU_EXTENSIONS("e:") \
78 #define OPTCOMPLSTR_AWK \
80 IF_FEATURE_AWK_GNU_EXTENSIONS("e::")
82 OPTBIT_F, /* define field separator */
83 OPTBIT_v, /* define variable */
84 OPTBIT_f, /* pull in awk program from file */
85 IF_FEATURE_AWK_GNU_EXTENSIONS(OPTBIT_e,) /* -e AWK_PROGRAM */
86 OPTBIT_W, /* -W ignored */
87 OPT_F = 1 << OPTBIT_F,
88 OPT_v = 1 << OPTBIT_v,
89 OPT_f = 1 << OPTBIT_f,
90 OPT_e = IF_FEATURE_AWK_GNU_EXTENSIONS((1 << OPTBIT_e)) + 0,
98 #define VF_NUMBER 0x0001 /* 1 = primary type is number */
99 #define VF_ARRAY 0x0002 /* 1 = it's an array */
101 #define VF_CACHED 0x0100 /* 1 = num/str value has cached str/num eq */
102 #define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
103 #define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
104 #define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
105 #define VF_FSTR 0x1000 /* 1 = var::string points to fstring buffer */
106 #define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
107 #define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
109 /* these flags are static, don't change them when value is changed */
110 #define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
112 typedef struct walker_list {
115 struct walker_list *prev;
120 typedef struct var_s {
121 unsigned type; /* flags */
125 int aidx; /* func arg idx (for compilation stage) */
126 struct xhash_s *array; /* array ptr */
127 struct var_s *parent; /* for func args, ptr to actual parameter */
128 walker_list *walker; /* list of array elements (for..in) */
132 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
133 typedef struct chain_s {
134 struct node_s *first;
136 const char *programname;
140 typedef struct func_s {
146 typedef struct rstream_s {
155 typedef struct hash_item_s {
157 struct var_s v; /* variable/array hash */
158 struct rstream_s rs; /* redirect streams hash */
159 struct func_s f; /* functions hash */
161 struct hash_item_s *next; /* next in chain */
162 char name[1]; /* really it's longer */
165 typedef struct xhash_s {
166 unsigned nel; /* num of elements */
167 unsigned csize; /* current hash size */
168 unsigned nprime; /* next hash size in PRIMES[] */
169 unsigned glen; /* summary length of item names */
170 struct hash_item_s **items;
174 typedef struct node_s {
194 /* Block of temporary variables */
195 typedef struct nvblock_s {
198 struct nvblock_s *prev;
199 struct nvblock_s *next;
203 typedef struct tsplitter_s {
208 /* simple token classes */
209 /* Order and hex values are very important!!! See next_token() */
210 #define TC_SEQSTART 1 /* ( */
211 #define TC_SEQTERM (1 << 1) /* ) */
212 #define TC_REGEXP (1 << 2) /* /.../ */
213 #define TC_OUTRDR (1 << 3) /* | > >> */
214 #define TC_UOPPOST (1 << 4) /* unary postfix operator */
215 #define TC_UOPPRE1 (1 << 5) /* unary prefix operator */
216 #define TC_BINOPX (1 << 6) /* two-opnd operator */
217 #define TC_IN (1 << 7)
218 #define TC_COMMA (1 << 8)
219 #define TC_PIPE (1 << 9) /* input redirection pipe */
220 #define TC_UOPPRE2 (1 << 10) /* unary prefix operator */
221 #define TC_ARRTERM (1 << 11) /* ] */
222 #define TC_GRPSTART (1 << 12) /* { */
223 #define TC_GRPTERM (1 << 13) /* } */
224 #define TC_SEMICOL (1 << 14)
225 #define TC_NEWLINE (1 << 15)
226 #define TC_STATX (1 << 16) /* ctl statement (for, next...) */
227 #define TC_WHILE (1 << 17)
228 #define TC_ELSE (1 << 18)
229 #define TC_BUILTIN (1 << 19)
230 #define TC_GETLINE (1 << 20)
231 #define TC_FUNCDECL (1 << 21) /* `function' `func' */
232 #define TC_BEGIN (1 << 22)
233 #define TC_END (1 << 23)
234 #define TC_EOF (1 << 24)
235 #define TC_VARIABLE (1 << 25)
236 #define TC_ARRAY (1 << 26)
237 #define TC_FUNCTION (1 << 27)
238 #define TC_STRING (1 << 28)
239 #define TC_NUMBER (1 << 29)
241 #define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
243 /* combined token classes */
244 #define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
245 //#define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
246 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
247 | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
249 #define TC_STATEMNT (TC_STATX | TC_WHILE)
250 #define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
252 /* word tokens, cannot mean something else if not expected */
253 #define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
254 | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
256 /* discard newlines after these */
257 #define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
258 | TC_BINOP | TC_OPTERM)
260 /* what can expression begin with */
261 #define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
262 /* what can group begin with */
263 #define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
265 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
266 /* operator is inserted between them */
267 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
268 | TC_STRING | TC_NUMBER | TC_UOPPOST)
269 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
271 #define OF_RES1 0x010000
272 #define OF_RES2 0x020000
273 #define OF_STR1 0x040000
274 #define OF_STR2 0x080000
275 #define OF_NUM1 0x100000
276 #define OF_CHECKED 0x200000
278 /* combined operator flags */
281 #define xS (OF_RES2 | OF_STR2)
283 #define VV (OF_RES1 | OF_RES2)
284 #define Nx (OF_RES1 | OF_NUM1)
285 #define NV (OF_RES1 | OF_NUM1 | OF_RES2)
286 #define Sx (OF_RES1 | OF_STR1)
287 #define SV (OF_RES1 | OF_STR1 | OF_RES2)
288 #define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
290 #define OPCLSMASK 0xFF00
291 #define OPNMASK 0x007F
293 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
294 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
295 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
300 #define P(x) (x << 24)
301 #define PRIMASK 0x7F000000
302 #define PRIMASK2 0x7E000000
304 /* Operation classes */
306 #define SHIFT_TIL_THIS 0x0600
307 #define RECUR_FROM_THIS 0x1000
310 OC_DELETE = 0x0100, OC_EXEC = 0x0200, OC_NEWSOURCE = 0x0300,
311 OC_PRINT = 0x0400, OC_PRINTF = 0x0500, OC_WALKINIT = 0x0600,
313 OC_BR = 0x0700, OC_BREAK = 0x0800, OC_CONTINUE = 0x0900,
314 OC_EXIT = 0x0a00, OC_NEXT = 0x0b00, OC_NEXTFILE = 0x0c00,
315 OC_TEST = 0x0d00, OC_WALKNEXT = 0x0e00,
317 OC_BINARY = 0x1000, OC_BUILTIN = 0x1100, OC_COLON = 0x1200,
318 OC_COMMA = 0x1300, OC_COMPARE = 0x1400, OC_CONCAT = 0x1500,
319 OC_FBLTIN = 0x1600, OC_FIELD = 0x1700, OC_FNARG = 0x1800,
320 OC_FUNC = 0x1900, OC_GETLINE = 0x1a00, OC_IN = 0x1b00,
321 OC_LAND = 0x1c00, OC_LOR = 0x1d00, OC_MATCH = 0x1e00,
322 OC_MOVE = 0x1f00, OC_PGETLINE = 0x2000, OC_REGEXP = 0x2100,
323 OC_REPLACE = 0x2200, OC_RETURN = 0x2300, OC_SPRINTF = 0x2400,
324 OC_TERNARY = 0x2500, OC_UNARY = 0x2600, OC_VAR = 0x2700,
327 ST_IF = 0x3000, ST_DO = 0x3100, ST_FOR = 0x3200,
331 /* simple builtins */
333 F_in, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
334 F_ti, F_le, F_sy, F_ff, F_cl
339 B_a2, B_ix, B_ma, B_sp, B_ss, B_ti, B_mt, B_lo, B_up,
341 B_an, B_co, B_ls, B_or, B_rs, B_xo,
344 /* tokens and their corresponding info values */
346 #define NTC "\377" /* switch to next token class (tc<<1) */
349 #define OC_B OC_BUILTIN
351 static const char tokenlist[] ALIGN1 =
354 "\1/" NTC /* REGEXP */
355 "\2>>" "\1>" "\1|" NTC /* OUTRDR */
356 "\2++" "\2--" NTC /* UOPPOST */
357 "\2++" "\2--" "\1$" NTC /* UOPPRE1 */
358 "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */
359 "\2*=" "\2/=" "\2%=" "\2^="
360 "\1+" "\1-" "\3**=" "\2**"
361 "\1/" "\1%" "\1^" "\1*"
362 "\2!=" "\2>=" "\2<=" "\1>"
363 "\1<" "\2!~" "\1~" "\2&&"
364 "\2||" "\1?" "\1:" NTC
368 "\1+" "\1-" "\1!" NTC /* UOPPRE2 */
374 "\2if" "\2do" "\3for" "\5break" /* STATX */
375 "\10continue" "\6delete" "\5print"
376 "\6printf" "\4next" "\10nextfile"
377 "\6return" "\4exit" NTC
381 "\3and" "\5compl" "\6lshift" "\2or"
383 "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */
384 "\3cos" "\3exp" "\3int" "\3log"
385 "\4rand" "\3sin" "\4sqrt" "\5srand"
386 "\6gensub" "\4gsub" "\5index" "\6length"
387 "\5match" "\5split" "\7sprintf" "\3sub"
388 "\6substr" "\7systime" "\10strftime" "\6mktime"
389 "\7tolower" "\7toupper" NTC
391 "\4func" "\10function" NTC
394 /* compiler adds trailing "\0" */
397 static const uint32_t tokeninfo[] = {
401 xS|'a', xS|'w', xS|'|',
402 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
403 OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M', OC_FIELD|xV|P(5),
404 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74), OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
405 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/', OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
406 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-', OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
407 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%', OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
408 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3, OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
409 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!', OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
410 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?', OC_COLON|xx|P(67)|':',
411 OC_IN|SV|P(49), /* in */
413 OC_PGETLINE|SV|P(37),
414 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-', OC_UNARY|xV|P(19)|'!',
420 ST_IF, ST_DO, ST_FOR, OC_BREAK,
421 OC_CONTINUE, OC_DELETE|Vx, OC_PRINT,
422 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
423 OC_RETURN|Vx, OC_EXIT|Nx,
427 OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
428 OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
429 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
430 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
431 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
432 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
433 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
434 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b),
435 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
442 /* internal variable names and their initial values */
443 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
445 CONVFMT, OFMT, FS, OFS,
446 ORS, RS, RT, FILENAME,
447 SUBSEP, F0, ARGIND, ARGC,
448 ARGV, ERRNO, FNR, NR,
449 NF, IGNORECASE, ENVIRON, NUM_INTERNAL_VARS
452 static const char vNames[] ALIGN1 =
453 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
454 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
455 "SUBSEP\0" "$\0*" "ARGIND\0" "ARGC\0"
456 "ARGV\0" "ERRNO\0" "FNR\0" "NR\0"
457 "NF\0*" "IGNORECASE\0*" "ENVIRON\0" "\0";
459 static const char vValues[] ALIGN1 =
460 "%.6g\0" "%.6g\0" " \0" " \0"
461 "\n\0" "\n\0" "\0" "\0"
462 "\034\0" "\0" "\377";
464 /* hash size may grow to these values */
465 #define FIRST_PRIME 61
466 static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
469 /* Globals. Split in two parts so that first one is addressed
470 * with (mostly short) negative offsets.
471 * NB: it's unsafe to put members of type "double"
472 * into globals2 (gcc may fail to align them).
476 chain beginseq, mainseq, endseq;
478 node *break_ptr, *continue_ptr;
480 xhash *vhash, *ahash, *fdhash, *fnhash;
481 const char *g_progname;
484 int maxfields; /* used in fsrealloc() only */
493 smallint is_f0_split;
497 uint32_t t_info; /* often used */
502 var *intvar[NUM_INTERNAL_VARS]; /* often used */
504 /* former statics from various functions */
505 char *split_f0__fstrings;
507 uint32_t next_token__save_tclass;
508 uint32_t next_token__save_info;
509 uint32_t next_token__ltclass;
510 smallint next_token__concat_inserted;
512 smallint next_input_file__files_happen;
513 rstream next_input_file__rsm;
515 var *evaluate__fnargs;
516 unsigned evaluate__seed;
517 regex_t evaluate__sreg;
521 tsplitter exec_builtin__tspl;
523 /* biggest and least used members go last */
524 tsplitter fsplitter, rsplitter;
526 #define G1 (ptr_to_globals[-1])
527 #define G (*(struct globals2 *)ptr_to_globals)
528 /* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
529 /*char G1size[sizeof(G1)]; - 0x74 */
530 /*char Gsize[sizeof(G)]; - 0x1c4 */
531 /* Trying to keep most of members accessible with short offsets: */
532 /*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
533 #define t_double (G1.t_double )
534 #define beginseq (G1.beginseq )
535 #define mainseq (G1.mainseq )
536 #define endseq (G1.endseq )
537 #define seq (G1.seq )
538 #define break_ptr (G1.break_ptr )
539 #define continue_ptr (G1.continue_ptr)
541 #define vhash (G1.vhash )
542 #define ahash (G1.ahash )
543 #define fdhash (G1.fdhash )
544 #define fnhash (G1.fnhash )
545 #define g_progname (G1.g_progname )
546 #define g_lineno (G1.g_lineno )
547 #define nfields (G1.nfields )
548 #define maxfields (G1.maxfields )
549 #define Fields (G1.Fields )
550 #define g_cb (G1.g_cb )
551 #define g_pos (G1.g_pos )
552 #define g_buf (G1.g_buf )
553 #define icase (G1.icase )
554 #define exiting (G1.exiting )
555 #define nextrec (G1.nextrec )
556 #define nextfile (G1.nextfile )
557 #define is_f0_split (G1.is_f0_split )
558 #define t_rollback (G1.t_rollback )
559 #define t_info (G.t_info )
560 #define t_tclass (G.t_tclass )
561 #define t_string (G.t_string )
562 #define t_lineno (G.t_lineno )
563 #define intvar (G.intvar )
564 #define fsplitter (G.fsplitter )
565 #define rsplitter (G.rsplitter )
566 #define INIT_G() do { \
567 SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
568 G.next_token__ltclass = TC_OPTERM; \
569 G.evaluate__seed = 1; \
573 /* function prototypes */
574 static void handle_special(var *);
575 static node *parse_expr(uint32_t);
576 static void chain_group(void);
577 static var *evaluate(node *, var *);
578 static rstream *next_input_file(void);
579 static int fmt_num(char *, int, const char *, double, int);
580 static int awk_exit(int) NORETURN;
582 /* ---- error handling ---- */
584 static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
585 static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
586 static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
587 static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
588 static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
589 static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin";
590 static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
591 static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
592 static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
593 static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
595 static void zero_out_var(var *vp)
597 memset(vp, 0, sizeof(*vp));
600 static void syntax_error(const char *message) NORETURN;
601 static void syntax_error(const char *message)
603 bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
606 /* ---- hash stuff ---- */
608 static unsigned hashidx(const char *name)
613 idx = *name++ + (idx << 6) - idx;
617 /* create new hash */
618 static xhash *hash_init(void)
622 newhash = xzalloc(sizeof(*newhash));
623 newhash->csize = FIRST_PRIME;
624 newhash->items = xzalloc(FIRST_PRIME * sizeof(newhash->items[0]));
629 /* find item in hash, return ptr to data, NULL if not found */
630 static void *hash_search(xhash *hash, const char *name)
634 hi = hash->items[hashidx(name) % hash->csize];
636 if (strcmp(hi->name, name) == 0)
643 /* grow hash if it becomes too big */
644 static void hash_rebuild(xhash *hash)
646 unsigned newsize, i, idx;
647 hash_item **newitems, *hi, *thi;
649 if (hash->nprime == ARRAY_SIZE(PRIMES))
652 newsize = PRIMES[hash->nprime++];
653 newitems = xzalloc(newsize * sizeof(newitems[0]));
655 for (i = 0; i < hash->csize; i++) {
660 idx = hashidx(thi->name) % newsize;
661 thi->next = newitems[idx];
667 hash->csize = newsize;
668 hash->items = newitems;
671 /* find item in hash, add it if necessary. Return ptr to data */
672 static void *hash_find(xhash *hash, const char *name)
678 hi = hash_search(hash, name);
680 if (++hash->nel / hash->csize > 10)
683 l = strlen(name) + 1;
684 hi = xzalloc(sizeof(*hi) + l);
685 strcpy(hi->name, name);
687 idx = hashidx(name) % hash->csize;
688 hi->next = hash->items[idx];
689 hash->items[idx] = hi;
695 #define findvar(hash, name) ((var*) hash_find((hash), (name)))
696 #define newvar(name) ((var*) hash_find(vhash, (name)))
697 #define newfile(name) ((rstream*)hash_find(fdhash, (name)))
698 #define newfunc(name) ((func*) hash_find(fnhash, (name)))
700 static void hash_remove(xhash *hash, const char *name)
702 hash_item *hi, **phi;
704 phi = &hash->items[hashidx(name) % hash->csize];
707 if (strcmp(hi->name, name) == 0) {
708 hash->glen -= (strlen(name) + 1);
718 /* ------ some useful functions ------ */
720 static char *skip_spaces(char *p)
723 if (*p == '\\' && p[1] == '\n') {
726 } else if (*p != ' ' && *p != '\t') {
734 /* returns old *s, advances *s past word and terminating NUL */
735 static char *nextword(char **s)
738 while (*(*s)++ != '\0')
743 static char nextchar(char **s)
750 c = bb_process_escape_sequence((const char**)s);
751 /* Example awk statement:
753 * we must treat \" as "
755 if (c == '\\' && *s == pps) { /* unrecognized \z? */
756 c = *(*s); /* yes, fetch z */
758 (*s)++; /* advance unless z = NUL */
763 /* TODO: merge with strcpy_and_process_escape_sequences()?
765 static void unescape_string_in_place(char *s1)
768 while ((*s1 = nextchar(&s)) != '\0')
772 static ALWAYS_INLINE int isalnum_(int c)
774 return (isalnum(c) || c == '_');
777 static double my_strtod(char **pp)
780 if (ENABLE_DESKTOP && cp[0] == '0') {
781 /* Might be hex or octal integer: 0x123abc or 07777 */
782 char c = (cp[1] | 0x20);
783 if (c == 'x' || isdigit(cp[1])) {
784 unsigned long long ull = strtoull(cp, pp, 0);
788 if (!isdigit(c) && c != '.')
790 /* else: it may be a floating number. Examples:
791 * 009.123 (*pp points to '9')
792 * 000.123 (*pp points to '.')
793 * fall through to strtod.
797 return strtod(cp, pp);
800 /* -------- working with variables (set/get/copy/etc) -------- */
802 static xhash *iamarray(var *v)
806 while (a->type & VF_CHILD)
809 if (!(a->type & VF_ARRAY)) {
811 a->x.array = hash_init();
816 static void clear_array(xhash *array)
821 for (i = 0; i < array->csize; i++) {
822 hi = array->items[i];
826 free(thi->data.v.string);
829 array->items[i] = NULL;
831 array->glen = array->nel = 0;
834 /* clear a variable */
835 static var *clrvar(var *v)
837 if (!(v->type & VF_FSTR))
840 v->type &= VF_DONTTOUCH;
846 /* assign string value to variable */
847 static var *setvar_p(var *v, char *value)
855 /* same as setvar_p but make a copy of string */
856 static var *setvar_s(var *v, const char *value)
858 return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
861 /* same as setvar_s but sets USER flag */
862 static var *setvar_u(var *v, const char *value)
864 v = setvar_s(v, value);
869 /* set array element to user string */
870 static void setari_u(var *a, int idx, const char *s)
874 v = findvar(iamarray(a), itoa(idx));
878 /* assign numeric value to variable */
879 static var *setvar_i(var *v, double value)
882 v->type |= VF_NUMBER;
888 static const char *getvar_s(var *v)
890 /* if v is numeric and has no cached string, convert it to string */
891 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
892 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
893 v->string = xstrdup(g_buf);
894 v->type |= VF_CACHED;
896 return (v->string == NULL) ? "" : v->string;
899 static double getvar_i(var *v)
903 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
907 debug_printf_eval("getvar_i: '%s'->", s);
908 v->number = my_strtod(&s);
909 debug_printf_eval("%f (s:'%s')\n", v->number, s);
910 if (v->type & VF_USER) {
916 debug_printf_eval("getvar_i: '%s'->zero\n", s);
919 v->type |= VF_CACHED;
921 debug_printf_eval("getvar_i: %f\n", v->number);
925 /* Used for operands of bitwise ops */
926 static unsigned long getvar_i_int(var *v)
928 double d = getvar_i(v);
930 /* Casting doubles to longs is undefined for values outside
931 * of target type range. Try to widen it as much as possible */
933 return (unsigned long)d;
934 /* Why? Think about d == -4294967295.0 (assuming 32bit longs) */
935 return - (long) (unsigned long) (-d);
938 static var *copyvar(var *dest, const var *src)
942 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
943 debug_printf_eval("copyvar: number:%f string:'%s'\n", src->number, src->string);
944 dest->number = src->number;
946 dest->string = xstrdup(src->string);
948 handle_special(dest);
952 static var *incvar(var *v)
954 return setvar_i(v, getvar_i(v) + 1.0);
957 /* return true if v is number or numeric string */
958 static int is_numeric(var *v)
961 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
964 /* return 1 when value of v corresponds to true, 0 otherwise */
965 static int istrue(var *v)
968 return (v->number != 0);
969 return (v->string && v->string[0]);
972 /* temporary variables allocator. Last allocated should be first freed */
973 static var *nvalloc(int n)
981 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size)
987 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
988 g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
990 g_cb->pos = g_cb->nv;
992 /*g_cb->next = NULL; - xzalloc did it */
1000 while (v < g_cb->pos) {
1009 static void nvfree(var *v)
1013 if (v < g_cb->nv || v >= g_cb->pos)
1014 syntax_error(EMSG_INTERNAL_ERROR);
1016 for (p = v; p < g_cb->pos; p++) {
1017 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
1018 clear_array(iamarray(p));
1019 free(p->x.array->items);
1022 if (p->type & VF_WALK) {
1024 walker_list *w = p->x.walker;
1025 debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker);
1029 debug_printf_walker(" free(%p)\n", w);
1038 while (g_cb->prev && g_cb->pos == g_cb->nv) {
1043 /* ------- awk program text parsing ------- */
1045 /* Parse next token pointed by global pos, place results into global ttt.
1046 * If token isn't expected, give away. Return token class
1048 static uint32_t next_token(uint32_t expected)
1050 #define concat_inserted (G.next_token__concat_inserted)
1051 #define save_tclass (G.next_token__save_tclass)
1052 #define save_info (G.next_token__save_info)
1053 /* Initialized to TC_OPTERM: */
1054 #define ltclass (G.next_token__ltclass)
1064 } else if (concat_inserted) {
1065 concat_inserted = FALSE;
1066 t_tclass = save_tclass;
1073 g_lineno = t_lineno;
1075 while (*p != '\n' && *p != '\0')
1083 debug_printf_parse("%s: token found: TC_EOF\n", __func__);
1085 } else if (*p == '\"') {
1088 while (*p != '\"') {
1090 if (*p == '\0' || *p == '\n')
1091 syntax_error(EMSG_UNEXP_EOS);
1093 *s++ = nextchar(&pp);
1099 debug_printf_parse("%s: token found:'%s' TC_STRING\n", __func__, t_string);
1101 } else if ((expected & TC_REGEXP) && *p == '/') {
1105 if (*p == '\0' || *p == '\n')
1106 syntax_error(EMSG_UNEXP_EOS);
1110 s[-1] = bb_process_escape_sequence((const char **)&pp);
1122 debug_printf_parse("%s: token found:'%s' TC_REGEXP\n", __func__, t_string);
1124 } else if (*p == '.' || isdigit(*p)) {
1127 t_double = my_strtod(&pp);
1130 syntax_error(EMSG_UNEXP_TOKEN);
1132 debug_printf_parse("%s: token found:%f TC_NUMBER\n", __func__, t_double);
1135 /* search for something known */
1140 int l = (unsigned char) *tl++;
1141 if (l == (unsigned char) NTCC) {
1145 /* if token class is expected,
1147 * and it's not a longer word,
1149 if ((tc & (expected | TC_WORD | TC_NEWLINE))
1150 && strncmp(p, tl, l) == 0
1151 && !((tc & TC_WORD) && isalnum_(p[l]))
1153 /* then this is what we are looking for */
1155 debug_printf_parse("%s: token found:'%.*s' t_info:%x\n", __func__, l, p, t_info);
1162 /* not a known token */
1164 /* is it a name? (var/array/function) */
1166 syntax_error(EMSG_UNEXP_TOKEN); /* no */
1169 while (isalnum_(*++p)) {
1174 /* also consume whitespace between functionname and bracket */
1175 if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY))
1179 debug_printf_parse("%s: token found:'%s' TC_FUNCTION\n", __func__, t_string);
1184 debug_printf_parse("%s: token found:'%s' TC_ARRAY\n", __func__, t_string);
1186 debug_printf_parse("%s: token found:'%s' TC_VARIABLE\n", __func__, t_string);
1192 /* skipping newlines in some cases */
1193 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1196 /* insert concatenation operator when needed */
1197 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1198 concat_inserted = TRUE;
1202 t_info = OC_CONCAT | SS | P(35);
1209 /* Are we ready for this? */
1210 if (!(ltclass & expected))
1211 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1212 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1215 #undef concat_inserted
1221 static void rollback_token(void)
1226 static node *new_node(uint32_t info)
1230 n = xzalloc(sizeof(node));
1232 n->lineno = g_lineno;
1236 static void mk_re_node(const char *s, node *n, regex_t *re)
1238 n->info = OC_REGEXP;
1241 xregcomp(re, s, REG_EXTENDED);
1242 xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1245 static node *condition(void)
1247 next_token(TC_SEQSTART);
1248 return parse_expr(TC_SEQTERM);
1251 /* parse expression terminated by given argument, return ptr
1252 * to built subtree. Terminator is eaten by parse_expr */
1253 static node *parse_expr(uint32_t iexp)
1261 debug_printf_parse("%s(%x)\n", __func__, iexp);
1264 sn.r.n = glptr = NULL;
1265 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1267 while (!((tc = next_token(xtc)) & iexp)) {
1269 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1270 /* input redirection (<) attached to glptr node */
1271 debug_printf_parse("%s: input redir\n", __func__);
1272 cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1274 xtc = TC_OPERAND | TC_UOPPRE;
1277 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1278 debug_printf_parse("%s: TC_BINOP | TC_UOPPOST\n", __func__);
1279 /* for binary and postfix-unary operators, jump back over
1280 * previous operators with higher priority */
1282 while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1283 || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON))
1287 if ((t_info & OPCLSMASK) == OC_TERNARY)
1289 cn = vn->a.n->r.n = new_node(t_info);
1291 if (tc & TC_BINOP) {
1293 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1294 if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1296 next_token(TC_GETLINE);
1297 /* give maximum priority to this pipe */
1298 cn->info &= ~PRIMASK;
1299 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1303 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1308 debug_printf_parse("%s: other\n", __func__);
1309 /* for operands and prefix-unary operators, attach them
1312 cn = vn->r.n = new_node(t_info);
1314 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1315 if (tc & (TC_OPERAND | TC_REGEXP)) {
1316 debug_printf_parse("%s: TC_OPERAND | TC_REGEXP\n", __func__);
1317 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1318 /* one should be very careful with switch on tclass -
1319 * only simple tclasses should be used! */
1323 debug_printf_parse("%s: TC_VARIABLE | TC_ARRAY\n", __func__);
1325 v = hash_search(ahash, t_string);
1327 cn->info = OC_FNARG;
1328 cn->l.aidx = v->x.aidx;
1330 cn->l.v = newvar(t_string);
1332 if (tc & TC_ARRAY) {
1334 cn->r.n = parse_expr(TC_ARRTERM);
1340 debug_printf_parse("%s: TC_NUMBER | TC_STRING\n", __func__);
1342 v = cn->l.v = xzalloc(sizeof(var));
1344 setvar_i(v, t_double);
1346 setvar_s(v, t_string);
1350 debug_printf_parse("%s: TC_REGEXP\n", __func__);
1351 mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1355 debug_printf_parse("%s: TC_FUNCTION\n", __func__);
1357 cn->r.f = newfunc(t_string);
1358 cn->l.n = condition();
1362 debug_printf_parse("%s: TC_SEQSTART\n", __func__);
1363 cn = vn->r.n = parse_expr(TC_SEQTERM);
1365 syntax_error("Empty sequence");
1370 debug_printf_parse("%s: TC_GETLINE\n", __func__);
1372 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1376 debug_printf_parse("%s: TC_BUILTIN\n", __func__);
1377 cn->l.n = condition();
1384 debug_printf_parse("%s() returns %p\n", __func__, sn.r.n);
1388 /* add node to chain. Return ptr to alloc'd node */
1389 static node *chain_node(uint32_t info)
1394 seq->first = seq->last = new_node(0);
1396 if (seq->programname != g_progname) {
1397 seq->programname = g_progname;
1398 n = chain_node(OC_NEWSOURCE);
1399 n->l.new_progname = xstrdup(g_progname);
1404 seq->last = n->a.n = new_node(OC_DONE);
1409 static void chain_expr(uint32_t info)
1413 n = chain_node(info);
1414 n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1415 if (t_tclass & TC_GRPTERM)
1419 static node *chain_loop(node *nn)
1421 node *n, *n2, *save_brk, *save_cont;
1423 save_brk = break_ptr;
1424 save_cont = continue_ptr;
1426 n = chain_node(OC_BR | Vx);
1427 continue_ptr = new_node(OC_EXEC);
1428 break_ptr = new_node(OC_EXEC);
1430 n2 = chain_node(OC_EXEC | Vx);
1433 continue_ptr->a.n = n2;
1434 break_ptr->a.n = n->r.n = seq->last;
1436 continue_ptr = save_cont;
1437 break_ptr = save_brk;
1442 /* parse group and attach it to chain */
1443 static void chain_group(void)
1449 c = next_token(TC_GRPSEQ);
1450 } while (c & TC_NEWLINE);
1452 if (c & TC_GRPSTART) {
1453 debug_printf_parse("%s: TC_GRPSTART\n", __func__);
1454 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1455 debug_printf_parse("%s: !TC_GRPTERM\n", __func__);
1456 if (t_tclass & TC_NEWLINE)
1461 debug_printf_parse("%s: TC_GRPTERM\n", __func__);
1462 } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1463 debug_printf_parse("%s: TC_OPSEQ | TC_OPTERM\n", __func__);
1465 chain_expr(OC_EXEC | Vx);
1468 debug_printf_parse("%s: TC_STATEMNT(?)\n", __func__);
1469 switch (t_info & OPCLSMASK) {
1471 debug_printf_parse("%s: ST_IF\n", __func__);
1472 n = chain_node(OC_BR | Vx);
1473 n->l.n = condition();
1475 n2 = chain_node(OC_EXEC);
1477 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1479 n2->a.n = seq->last;
1486 debug_printf_parse("%s: ST_WHILE\n", __func__);
1488 n = chain_loop(NULL);
1493 debug_printf_parse("%s: ST_DO\n", __func__);
1494 n2 = chain_node(OC_EXEC);
1495 n = chain_loop(NULL);
1497 next_token(TC_WHILE);
1498 n->l.n = condition();
1502 debug_printf_parse("%s: ST_FOR\n", __func__);
1503 next_token(TC_SEQSTART);
1504 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1505 if (t_tclass & TC_SEQTERM) { /* for-in */
1506 if ((n2->info & OPCLSMASK) != OC_IN)
1507 syntax_error(EMSG_UNEXP_TOKEN);
1508 n = chain_node(OC_WALKINIT | VV);
1511 n = chain_loop(NULL);
1512 n->info = OC_WALKNEXT | Vx;
1514 } else { /* for (;;) */
1515 n = chain_node(OC_EXEC | Vx);
1517 n2 = parse_expr(TC_SEMICOL);
1518 n3 = parse_expr(TC_SEQTERM);
1528 debug_printf_parse("%s: OC_PRINT[F]\n", __func__);
1529 n = chain_node(t_info);
1530 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1531 if (t_tclass & TC_OUTRDR) {
1533 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1535 if (t_tclass & TC_GRPTERM)
1540 debug_printf_parse("%s: OC_BREAK\n", __func__);
1541 n = chain_node(OC_EXEC);
1547 debug_printf_parse("%s: OC_CONTINUE\n", __func__);
1548 n = chain_node(OC_EXEC);
1549 n->a.n = continue_ptr;
1553 /* delete, next, nextfile, return, exit */
1555 debug_printf_parse("%s: default\n", __func__);
1561 static void parse_program(char *p)
1570 while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1571 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1573 if (tclass & TC_OPTERM) {
1574 debug_printf_parse("%s: TC_OPTERM\n", __func__);
1579 if (tclass & TC_BEGIN) {
1580 debug_printf_parse("%s: TC_BEGIN\n", __func__);
1584 } else if (tclass & TC_END) {
1585 debug_printf_parse("%s: TC_END\n", __func__);
1589 } else if (tclass & TC_FUNCDECL) {
1590 debug_printf_parse("%s: TC_FUNCDECL\n", __func__);
1591 next_token(TC_FUNCTION);
1593 f = newfunc(t_string);
1594 f->body.first = NULL;
1596 while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1597 v = findvar(ahash, t_string);
1598 v->x.aidx = f->nargs++;
1600 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1607 } else if (tclass & TC_OPSEQ) {
1608 debug_printf_parse("%s: TC_OPSEQ\n", __func__);
1610 cn = chain_node(OC_TEST);
1611 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1612 if (t_tclass & TC_GRPSTART) {
1613 debug_printf_parse("%s: TC_GRPSTART\n", __func__);
1617 debug_printf_parse("%s: !TC_GRPSTART\n", __func__);
1618 chain_node(OC_PRINT);
1620 cn->r.n = mainseq.last;
1622 } else /* if (tclass & TC_GRPSTART) */ {
1623 debug_printf_parse("%s: TC_GRPSTART(?)\n", __func__);
1628 debug_printf_parse("%s: TC_EOF\n", __func__);
1632 /* -------- program execution part -------- */
1634 static node *mk_splitter(const char *s, tsplitter *spl)
1642 if ((n->info & OPCLSMASK) == OC_REGEXP) {
1644 regfree(ire); // TODO: nuke ire, use re+1?
1646 if (s[0] && s[1]) { /* strlen(s) > 1 */
1647 mk_re_node(s, n, re);
1649 n->info = (uint32_t) s[0];
1655 /* use node as a regular expression. Supplied with node ptr and regex_t
1656 * storage space. Return ptr to regex (if result points to preg, it should
1657 * be later regfree'd manually
1659 static regex_t *as_regex(node *op, regex_t *preg)
1665 if ((op->info & OPCLSMASK) == OC_REGEXP) {
1666 return icase ? op->r.ire : op->l.re;
1669 s = getvar_s(evaluate(op, v));
1671 cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
1672 /* Testcase where REG_EXTENDED fails (unpaired '{'):
1673 * echo Hi | awk 'gsub("@(samp|code|file)\{","");'
1674 * gawk 3.1.5 eats this. We revert to ~REG_EXTENDED
1675 * (maybe gsub is not supposed to use REG_EXTENDED?).
1677 if (regcomp(preg, s, cflags)) {
1678 cflags &= ~REG_EXTENDED;
1679 xregcomp(preg, s, cflags);
1685 /* gradually increasing buffer.
1686 * note that we reallocate even if n == old_size,
1687 * and thus there is at least one extra allocated byte.
1689 static char* qrealloc(char *b, int n, int *size)
1691 if (!b || n >= *size) {
1692 *size = n + (n>>1) + 80;
1693 b = xrealloc(b, *size);
1698 /* resize field storage space */
1699 static void fsrealloc(int size)
1703 if (size >= maxfields) {
1705 maxfields = size + 16;
1706 Fields = xrealloc(Fields, maxfields * sizeof(Fields[0]));
1707 for (; i < maxfields; i++) {
1708 Fields[i].type = VF_SPECIAL;
1709 Fields[i].string = NULL;
1712 /* if size < nfields, clear extra field variables */
1713 for (i = size; i < nfields; i++) {
1719 static int awk_split(const char *s, node *spl, char **slist)
1724 regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1726 /* in worst case, each char would be a separate field */
1727 *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1730 c[0] = c[1] = (char)spl->info;
1732 if (*getvar_s(intvar[RS]) == '\0')
1736 if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */
1738 return n; /* "": zero fields */
1739 n++; /* at least one field will be there */
1741 l = strcspn(s, c+2); /* len till next NUL or \n */
1742 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1743 && pmatch[0].rm_so <= l
1745 l = pmatch[0].rm_so;
1746 if (pmatch[0].rm_eo == 0) {
1750 n++; /* we saw yet another delimiter */
1752 pmatch[0].rm_eo = l;
1757 /* make sure we remove *all* of the separator chars */
1760 } while (++l < pmatch[0].rm_eo);
1762 s += pmatch[0].rm_eo;
1766 if (c[0] == '\0') { /* null split */
1774 if (c[0] != ' ') { /* single-character split */
1776 c[0] = toupper(c[0]);
1777 c[1] = tolower(c[1]);
1781 while ((s1 = strpbrk(s1, c)) != NULL) {
1789 s = skip_whitespace(s);
1793 while (*s && !isspace(*s))
1800 static void split_f0(void)
1802 /* static char *fstrings; */
1803 #define fstrings (G.split_f0__fstrings)
1814 n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1817 for (i = 0; i < n; i++) {
1818 Fields[i].string = nextword(&s);
1819 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1822 /* set NF manually to avoid side effects */
1824 intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1825 intvar[NF]->number = nfields;
1829 /* perform additional actions when some internal variables changed */
1830 static void handle_special(var *v)
1834 const char *sep, *s;
1835 int sl, l, len, i, bsize;
1837 if (!(v->type & VF_SPECIAL))
1840 if (v == intvar[NF]) {
1841 n = (int)getvar_i(v);
1844 /* recalculate $0 */
1845 sep = getvar_s(intvar[OFS]);
1849 for (i = 0; i < n; i++) {
1850 s = getvar_s(&Fields[i]);
1853 memcpy(b+len, sep, sl);
1856 b = qrealloc(b, len+l+sl, &bsize);
1857 memcpy(b+len, s, l);
1862 setvar_p(intvar[F0], b);
1865 } else if (v == intvar[F0]) {
1866 is_f0_split = FALSE;
1868 } else if (v == intvar[FS]) {
1870 * The POSIX-2008 standard says that changing FS should have no effect on the
1871 * current input line, but only on the next one. The language is:
1873 * > Before the first reference to a field in the record is evaluated, the record
1874 * > shall be split into fields, according to the rules in Regular Expressions,
1875 * > using the value of FS that was current at the time the record was read.
1877 * So, split up current line before assignment to FS:
1881 mk_splitter(getvar_s(v), &fsplitter);
1883 } else if (v == intvar[RS]) {
1884 mk_splitter(getvar_s(v), &rsplitter);
1886 } else if (v == intvar[IGNORECASE]) {
1890 n = getvar_i(intvar[NF]);
1891 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1892 /* right here v is invalid. Just to note... */
1896 /* step through func/builtin/etc arguments */
1897 static node *nextarg(node **pn)
1902 if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1911 static void hashwalk_init(var *v, xhash *array)
1916 walker_list *prev_walker;
1918 if (v->type & VF_WALK) {
1919 prev_walker = v->x.walker;
1924 debug_printf_walker("hashwalk_init: prev_walker:%p\n", prev_walker);
1926 w = v->x.walker = xzalloc(sizeof(*w) + array->glen + 1); /* why + 1? */
1927 debug_printf_walker(" walker@%p=%p\n", &v->x.walker, w);
1928 w->cur = w->end = w->wbuf;
1929 w->prev = prev_walker;
1930 for (i = 0; i < array->csize; i++) {
1931 hi = array->items[i];
1933 strcpy(w->end, hi->name);
1940 static int hashwalk_next(var *v)
1942 walker_list *w = v->x.walker;
1944 if (w->cur >= w->end) {
1945 walker_list *prev_walker = w->prev;
1947 debug_printf_walker("end of iteration, free(walker@%p:%p), prev_walker:%p\n", &v->x.walker, w, prev_walker);
1949 v->x.walker = prev_walker;
1953 setvar_s(v, nextword(&w->cur));
1957 /* evaluate node, return 1 when result is true, 0 otherwise */
1958 static int ptest(node *pattern)
1960 /* ptest__v is "static": to save stack space? */
1961 return istrue(evaluate(pattern, &G.ptest__v));
1964 /* read next record from stream rsm into a variable v */
1965 static int awk_getline(rstream *rsm, var *v)
1968 regmatch_t pmatch[2];
1969 int size, a, p, pp = 0;
1970 int fd, so, eo, r, rp;
1973 debug_printf_eval("entered %s()\n", __func__);
1975 /* we're using our own buffer since we need access to accumulating
1978 fd = fileno(rsm->F);
1983 c = (char) rsplitter.n.info;
1987 m = qrealloc(m, 256, &size);
1994 if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1995 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1996 b, 1, pmatch, 0) == 0) {
1997 so = pmatch[0].rm_so;
1998 eo = pmatch[0].rm_eo;
2002 } else if (c != '\0') {
2003 s = strchr(b+pp, c);
2005 s = memchr(b+pp, '\0', p - pp);
2012 while (b[rp] == '\n')
2014 s = strstr(b+rp, "\n\n");
2017 while (b[eo] == '\n')
2026 memmove(m, m+a, p+1);
2031 m = qrealloc(m, a+p+128, &size);
2034 p += safe_read(fd, b+p, size-p-1);
2038 setvar_i(intvar[ERRNO], errno);
2047 c = b[so]; b[so] = '\0';
2051 c = b[eo]; b[eo] = '\0';
2052 setvar_s(intvar[RT], b+so);
2061 debug_printf_eval("returning from %s(): %d\n", __func__, r);
2066 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
2070 const char *s = format;
2072 if (int_as_int && n == (long long)n) {
2073 r = snprintf(b, size, "%lld", (long long)n);
2075 do { c = *s; } while (c && *++s);
2076 if (strchr("diouxX", c)) {
2077 r = snprintf(b, size, format, (int)n);
2078 } else if (strchr("eEfgG", c)) {
2079 r = snprintf(b, size, format, n);
2081 syntax_error(EMSG_INV_FMT);
2087 /* formatted output into an allocated buffer, return ptr to buffer */
2088 static char *awk_printf(node *n)
2093 int i, j, incr, bsize;
2098 fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
2103 while (*f && (*f != '%' || *++f == '%'))
2105 while (*f && !isalpha(*f)) {
2107 syntax_error("%*x formats are not supported");
2111 incr = (f - s) + MAXVARFMT;
2112 b = qrealloc(b, incr + i, &bsize);
2118 arg = evaluate(nextarg(&n), v);
2121 if (c == 'c' || !c) {
2122 i += sprintf(b+i, s, is_numeric(arg) ?
2123 (char)getvar_i(arg) : *getvar_s(arg));
2124 } else if (c == 's') {
2126 b = qrealloc(b, incr+i+strlen(s1), &bsize);
2127 i += sprintf(b+i, s, s1);
2129 i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
2133 /* if there was an error while sprintf, return value is negative */
2140 b = xrealloc(b, i + 1);
2145 /* Common substitution routine.
2146 * Replace (nm)'th substring of (src) that matches (rn) with (repl),
2147 * store result into (dest), return number of substitutions.
2148 * If nm = 0, replace all matches.
2149 * If src or dst is NULL, use $0.
2150 * If subexp != 0, enable subexpression matching (\1-\9).
2152 static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int subexp)
2156 int match_no, residx, replen, resbufsize;
2158 regmatch_t pmatch[10];
2159 regex_t sreg, *regex;
2165 regex = as_regex(rn, &sreg);
2166 sp = getvar_s(src ? src : intvar[F0]);
2167 replen = strlen(repl);
2168 while (regexec(regex, sp, 10, pmatch, regexec_flags) == 0) {
2169 int so = pmatch[0].rm_so;
2170 int eo = pmatch[0].rm_eo;
2172 //bb_error_msg("match %u: [%u,%u] '%s'%p", match_no+1, so, eo, sp,sp);
2173 resbuf = qrealloc(resbuf, residx + eo + replen, &resbufsize);
2174 memcpy(resbuf + residx, sp, eo);
2176 if (++match_no >= nm) {
2181 residx -= (eo - so);
2183 for (s = repl; *s; s++) {
2184 char c = resbuf[residx++] = *s;
2189 if (c == '&' || (subexp && c >= '0' && c <= '9')) {
2191 residx -= ((nbs + 3) >> 1);
2198 resbuf[residx++] = c;
2200 int n = pmatch[j].rm_eo - pmatch[j].rm_so;
2201 resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize);
2202 memcpy(resbuf + residx, sp + pmatch[j].rm_so, n);
2210 regexec_flags = REG_NOTBOL;
2215 /* Empty match (e.g. "b*" will match anywhere).
2216 * Advance by one char. */
2218 //gsub(/\<b*/,"") on "abc" will reach this point, advance to "bc"
2219 //... and will erroneously match "b" even though it is NOT at the word start.
2220 //we need REG_NOTBOW but it does not exist...
2221 //TODO: if EXTRA_COMPAT=y, use GNU matching and re_search,
2222 //it should be able to do it correctly.
2223 /* Subtle: this is safe only because
2224 * qrealloc allocated at least one extra byte */
2225 resbuf[residx] = *sp;
2233 resbuf = qrealloc(resbuf, residx + strlen(sp), &resbufsize);
2234 strcpy(resbuf + residx, sp);
2236 //bb_error_msg("end sp:'%s'%p", sp,sp);
2237 setvar_p(dest ? dest : intvar[F0], resbuf);
2243 static NOINLINE int do_mktime(const char *ds)
2248 /*memset(&then, 0, sizeof(then)); - not needed */
2249 then.tm_isdst = -1; /* default is unknown */
2251 /* manpage of mktime says these fields are ints,
2252 * so we can sscanf stuff directly into them */
2253 count = sscanf(ds, "%u %u %u %u %u %u %d",
2254 &then.tm_year, &then.tm_mon, &then.tm_mday,
2255 &then.tm_hour, &then.tm_min, &then.tm_sec,
2259 || (unsigned)then.tm_mon < 1
2260 || (unsigned)then.tm_year < 1900
2266 then.tm_year -= 1900;
2268 return mktime(&then);
2271 static NOINLINE var *exec_builtin(node *op, var *res)
2273 #define tspl (G.exec_builtin__tspl)
2279 regmatch_t pmatch[2];
2288 isr = info = op->info;
2291 av[2] = av[3] = NULL;
2292 for (i = 0; i < 4 && op; i++) {
2293 an[i] = nextarg(&op);
2294 if (isr & 0x09000000)
2295 av[i] = evaluate(an[i], &tv[i]);
2296 if (isr & 0x08000000)
2297 as[i] = getvar_s(av[i]);
2302 if ((uint32_t)nargs < (info >> 30))
2303 syntax_error(EMSG_TOO_FEW_ARGS);
2309 if (ENABLE_FEATURE_AWK_LIBM)
2310 setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1])));
2312 syntax_error(EMSG_NO_MATH);
2319 spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2320 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2325 n = awk_split(as[0], spl, &s);
2327 clear_array(iamarray(av[1]));
2328 for (i = 1; i <= n; i++)
2329 setari_u(av[1], i, nextword(&s));
2339 i = getvar_i(av[1]) - 1;
2344 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2347 s = xstrndup(as[0]+i, n);
2352 /* Bitwise ops must assume that operands are unsigned. GNU Awk 3.1.5:
2353 * awk '{ print or(-1,1) }' gives "4.29497e+09", not "-2.xxxe+09" */
2355 setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1]));
2359 setvar_i(res, ~getvar_i_int(av[0]));
2363 setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1]));
2367 setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1]));
2371 setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1]));
2375 setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1]));
2381 s1 = s = xstrdup(as[0]);
2383 //*s1 = (info == B_up) ? toupper(*s1) : tolower(*s1);
2384 if ((unsigned char)((*s1 | 0x20) - 'a') <= ('z' - 'a'))
2385 *s1 = (info == B_up) ? (*s1 & 0xdf) : (*s1 | 0x20);
2395 l = strlen(as[0]) - ll;
2396 if (ll > 0 && l >= 0) {
2398 char *s = strstr(as[0], as[1]);
2400 n = (s - as[0]) + 1;
2402 /* this piece of code is terribly slow and
2403 * really should be rewritten
2405 for (i = 0; i <= l; i++) {
2406 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2418 tt = getvar_i(av[1]);
2421 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2422 i = strftime(g_buf, MAXVARFMT,
2423 ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2426 setvar_s(res, g_buf);
2430 setvar_i(res, do_mktime(as[0]));
2434 re = as_regex(an[1], &sreg);
2435 n = regexec(re, as[0], 1, pmatch, 0);
2440 pmatch[0].rm_so = 0;
2441 pmatch[0].rm_eo = -1;
2443 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2444 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2445 setvar_i(res, pmatch[0].rm_so);
2451 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2455 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2459 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2469 * Evaluate node - the heart of the program. Supplied with subtree
2470 * and place where to store result. returns ptr to result.
2472 #define XC(n) ((n) >> 8)
2474 static var *evaluate(node *op, var *res)
2476 /* This procedure is recursive so we should count every byte */
2477 #define fnargs (G.evaluate__fnargs)
2478 /* seed is initialized to 1 */
2479 #define seed (G.evaluate__seed)
2480 #define sreg (G.evaluate__sreg)
2485 return setvar_s(res, NULL);
2487 debug_printf_eval("entered %s()\n", __func__);
2495 } L = L; /* for compiler */
2506 opn = (opinfo & OPNMASK);
2507 g_lineno = op->lineno;
2509 debug_printf_eval("opinfo:%08x opn:%08x\n", opinfo, opn);
2511 /* execute inevitable things */
2512 if (opinfo & OF_RES1)
2513 L.v = evaluate(op1, v1);
2514 if (opinfo & OF_RES2)
2515 R.v = evaluate(op->r.n, v1+1);
2516 if (opinfo & OF_STR1) {
2517 L.s = getvar_s(L.v);
2518 debug_printf_eval("L.s:'%s'\n", L.s);
2520 if (opinfo & OF_STR2) {
2521 R.s = getvar_s(R.v);
2522 debug_printf_eval("R.s:'%s'\n", R.s);
2524 if (opinfo & OF_NUM1) {
2525 L_d = getvar_i(L.v);
2526 debug_printf_eval("L_d:%f\n", L_d);
2529 debug_printf_eval("switch(0x%x)\n", XC(opinfo & OPCLSMASK));
2530 switch (XC(opinfo & OPCLSMASK)) {
2532 /* -- iterative node type -- */
2536 if ((op1->info & OPCLSMASK) == OC_COMMA) {
2537 /* it's range pattern */
2538 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2539 op->info |= OF_CHECKED;
2540 if (ptest(op1->r.n))
2541 op->info &= ~OF_CHECKED;
2547 op = ptest(op1) ? op->a.n : op->r.n;
2551 /* just evaluate an expression, also used as unconditional jump */
2555 /* branch, used in if-else and various loops */
2557 op = istrue(L.v) ? op->a.n : op->r.n;
2560 /* initialize for-in loop */
2561 case XC( OC_WALKINIT ):
2562 hashwalk_init(L.v, iamarray(R.v));
2565 /* get next array item */
2566 case XC( OC_WALKNEXT ):
2567 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2570 case XC( OC_PRINT ):
2571 case XC( OC_PRINTF ): {
2575 rstream *rsm = newfile(R.s);
2578 rsm->F = popen(R.s, "w");
2580 bb_perror_msg_and_die("popen");
2583 rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2589 if ((opinfo & OPCLSMASK) == OC_PRINT) {
2591 fputs(getvar_s(intvar[F0]), F);
2594 var *v = evaluate(nextarg(&op1), v1);
2595 if (v->type & VF_NUMBER) {
2596 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2600 fputs(getvar_s(v), F);
2604 fputs(getvar_s(intvar[OFS]), F);
2607 fputs(getvar_s(intvar[ORS]), F);
2609 } else { /* OC_PRINTF */
2610 char *s = awk_printf(op1);
2618 case XC( OC_DELETE ): {
2619 uint32_t info = op1->info & OPCLSMASK;
2622 if (info == OC_VAR) {
2624 } else if (info == OC_FNARG) {
2625 v = &fnargs[op1->l.aidx];
2627 syntax_error(EMSG_NOT_ARRAY);
2633 s = getvar_s(evaluate(op1->r.n, v1));
2634 hash_remove(iamarray(v), s);
2636 clear_array(iamarray(v));
2641 case XC( OC_NEWSOURCE ):
2642 g_progname = op->l.new_progname;
2645 case XC( OC_RETURN ):
2649 case XC( OC_NEXTFILE ):
2660 /* -- recursive node type -- */
2664 if (L.v == intvar[NF])
2668 case XC( OC_FNARG ):
2669 L.v = &fnargs[op->l.aidx];
2671 res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2675 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2678 case XC( OC_REGEXP ):
2680 L.s = getvar_s(intvar[F0]);
2683 case XC( OC_MATCH ):
2687 regex_t *re = as_regex(op1, &sreg);
2688 int i = regexec(re, L.s, 0, NULL, 0);
2691 setvar_i(res, (i == 0) ^ (opn == '!'));
2696 debug_printf_eval("MOVE\n");
2697 /* if source is a temporary string, jusk relink it to dest */
2698 //Disabled: if R.v is numeric but happens to have cached R.v->string,
2699 //then L.v ends up being a string, which is wrong
2700 // if (R.v == v1+1 && R.v->string) {
2701 // res = setvar_p(L.v, R.v->string);
2702 // R.v->string = NULL;
2704 res = copyvar(L.v, R.v);
2708 case XC( OC_TERNARY ):
2709 if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2710 syntax_error(EMSG_POSSIBLE_ERROR);
2711 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2714 case XC( OC_FUNC ): {
2716 const char *sv_progname;
2718 /* The body might be empty, still has to eval the args */
2719 if (!op->r.n->info && !op->r.f->body.first)
2720 syntax_error(EMSG_UNDEF_FUNC);
2722 vbeg = v = nvalloc(op->r.f->nargs + 1);
2724 var *arg = evaluate(nextarg(&op1), v1);
2726 v->type |= VF_CHILD;
2728 if (++v - vbeg >= op->r.f->nargs)
2734 sv_progname = g_progname;
2736 res = evaluate(op->r.f->body.first, res);
2738 g_progname = sv_progname;
2745 case XC( OC_GETLINE ):
2746 case XC( OC_PGETLINE ): {
2753 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2754 rsm->F = popen(L.s, "r");
2755 rsm->is_pipe = TRUE;
2757 rsm->F = fopen_for_read(L.s); /* not xfopen! */
2762 iF = next_input_file();
2766 if (!rsm || !rsm->F) {
2767 setvar_i(intvar[ERRNO], errno);
2775 i = awk_getline(rsm, R.v);
2776 if (i > 0 && !op1) {
2777 incvar(intvar[FNR]);
2784 /* simple builtins */
2785 case XC( OC_FBLTIN ): {
2786 double R_d = R_d; /* for compiler */
2790 R_d = (long long)L_d;
2794 R_d = (double)rand() / (double)RAND_MAX;
2798 if (ENABLE_FEATURE_AWK_LIBM) {
2804 if (ENABLE_FEATURE_AWK_LIBM) {
2810 if (ENABLE_FEATURE_AWK_LIBM) {
2816 if (ENABLE_FEATURE_AWK_LIBM) {
2822 if (ENABLE_FEATURE_AWK_LIBM) {
2827 syntax_error(EMSG_NO_MATH);
2832 seed = op1 ? (unsigned)L_d : (unsigned)time(NULL);
2841 debug_printf_eval("length: L.s:'%s'\n", L.s);
2843 L.s = getvar_s(intvar[F0]);
2844 debug_printf_eval("length: L.s='%s'\n", L.s);
2846 else if (L.v->type & VF_ARRAY) {
2847 R_d = L.v->x.array->nel;
2848 debug_printf_eval("length: array_len:%d\n", L.v->x.array->nel);
2856 R_d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2857 ? (system(L.s) >> 8) : 0;
2863 } else if (L.s && *L.s) {
2864 rstream *rsm = newfile(L.s);
2874 rsm = (rstream *)hash_search(fdhash, L.s);
2875 debug_printf_eval("OC_FBLTIN F_cl rsm:%p\n", rsm);
2877 debug_printf_eval("OC_FBLTIN F_cl "
2878 "rsm->is_pipe:%d, ->F:%p\n",
2879 rsm->is_pipe, rsm->F);
2880 /* Can be NULL if open failed. Example:
2881 * getline line <"doesnt_exist";
2882 * close("doesnt_exist"); <--- here rsm->F is NULL
2885 err = rsm->is_pipe ? pclose(rsm->F) : fclose(rsm->F);
2887 hash_remove(fdhash, L.s);
2890 setvar_i(intvar[ERRNO], errno);
2899 case XC( OC_BUILTIN ):
2900 res = exec_builtin(op, res);
2903 case XC( OC_SPRINTF ):
2904 setvar_p(res, awk_printf(op1));
2907 case XC( OC_UNARY ): {
2910 Ld = R_d = getvar_i(R.v);
2937 case XC( OC_FIELD ): {
2938 int i = (int)getvar_i(R.v);
2945 res = &Fields[i - 1];
2950 /* concatenation (" ") and index joining (",") */
2951 case XC( OC_CONCAT ):
2952 case XC( OC_COMMA ): {
2953 const char *sep = "";
2954 if ((opinfo & OPCLSMASK) == OC_COMMA)
2955 sep = getvar_s(intvar[SUBSEP]);
2956 setvar_p(res, xasprintf("%s%s%s", L.s, sep, R.s));
2961 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2965 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2968 case XC( OC_BINARY ):
2969 case XC( OC_REPLACE ): {
2970 double R_d = getvar_i(R.v);
2971 debug_printf_eval("BINARY/REPLACE: R_d:%f opn:%c\n", R_d, opn);
2984 syntax_error(EMSG_DIV_BY_ZERO);
2988 if (ENABLE_FEATURE_AWK_LIBM)
2989 L_d = pow(L_d, R_d);
2991 syntax_error(EMSG_NO_MATH);
2995 syntax_error(EMSG_DIV_BY_ZERO);
2996 L_d -= (long long)(L_d / R_d) * R_d;
2999 debug_printf_eval("BINARY/REPLACE result:%f\n", L_d);
3000 res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : L.v, L_d);
3004 case XC( OC_COMPARE ): {
3005 int i = i; /* for compiler */
3008 if (is_numeric(L.v) && is_numeric(R.v)) {
3009 Ld = getvar_i(L.v) - getvar_i(R.v);
3011 const char *l = getvar_s(L.v);
3012 const char *r = getvar_s(R.v);
3013 Ld = icase ? strcasecmp(l, r) : strcmp(l, r);
3015 switch (opn & 0xfe) {
3026 setvar_i(res, (i == 0) ^ (opn & 1));
3031 syntax_error(EMSG_POSSIBLE_ERROR);
3033 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
3035 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
3042 debug_printf_eval("returning from %s(): %p\n", __func__, res);
3050 /* -------- main & co. -------- */
3052 static int awk_exit(int r)
3063 evaluate(endseq.first, &tv);
3066 /* waiting for children */
3067 for (i = 0; i < fdhash->csize; i++) {
3068 hi = fdhash->items[i];
3070 if (hi->data.rs.F && hi->data.rs.is_pipe)
3071 pclose(hi->data.rs.F);
3079 /* if expr looks like "var=value", perform assignment and return 1,
3080 * otherwise return 0 */
3081 static int is_assignment(const char *expr)
3085 if (!isalnum_(*expr) || (val = strchr(expr, '=')) == NULL) {
3089 exprc = xstrdup(expr);
3090 val = exprc + (val - expr);
3093 unescape_string_in_place(val);
3094 setvar_u(newvar(exprc), val);
3099 /* switch to next input file */
3100 static rstream *next_input_file(void)
3102 #define rsm (G.next_input_file__rsm)
3103 #define files_happen (G.next_input_file__files_happen)
3106 const char *fname, *ind;
3111 rsm.pos = rsm.adv = 0;
3114 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
3121 ind = getvar_s(incvar(intvar[ARGIND]));
3122 fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
3123 if (fname && *fname && !is_assignment(fname)) {
3124 F = xfopen_stdin(fname);
3129 files_happen = TRUE;
3130 setvar_s(intvar[FILENAME], fname);
3137 int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
3138 int awk_main(int argc, char **argv)
3142 llist_t *list_v = NULL;
3143 llist_t *list_f = NULL;
3144 #if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
3145 llist_t *list_e = NULL;
3151 char *vnames = (char *)vNames; /* cheat */
3152 char *vvalues = (char *)vValues;
3156 /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
3157 * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
3158 if (ENABLE_LOCALE_SUPPORT)
3159 setlocale(LC_NUMERIC, "C");
3163 /* allocate global buffer */
3164 g_buf = xmalloc(MAXVARFMT + 1);
3166 vhash = hash_init();
3167 ahash = hash_init();
3168 fdhash = hash_init();
3169 fnhash = hash_init();
3171 /* initialize variables */
3172 for (i = 0; *vnames; i++) {
3173 intvar[i] = v = newvar(nextword(&vnames));
3174 if (*vvalues != '\377')
3175 setvar_s(v, nextword(&vvalues));
3179 if (*vnames == '*') {
3180 v->type |= VF_SPECIAL;
3185 handle_special(intvar[FS]);
3186 handle_special(intvar[RS]);
3188 newfile("/dev/stdin")->F = stdin;
3189 newfile("/dev/stdout")->F = stdout;
3190 newfile("/dev/stderr")->F = stderr;
3192 /* Huh, people report that sometimes environ is NULL. Oh well. */
3193 if (environ) for (envp = environ; *envp; envp++) {
3194 /* environ is writable, thus we don't strdup it needlessly */
3196 char *s1 = strchr(s, '=');
3199 /* Both findvar and setvar_u take const char*
3200 * as 2nd arg -> environment is not trashed */
3201 setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
3205 opt_complementary = OPTCOMPLSTR_AWK;
3206 opt = getopt32(argv, OPTSTR_AWK, &opt_F, &list_v, &list_f, IF_FEATURE_AWK_GNU_EXTENSIONS(&list_e,) NULL);
3210 bb_error_msg("warning: option -W is ignored");
3212 unescape_string_in_place(opt_F);
3213 setvar_s(intvar[FS], opt_F);
3216 if (!is_assignment(llist_pop(&list_v)))
3223 g_progname = llist_pop(&list_f);
3224 from_file = xfopen_stdin(g_progname);
3225 /* one byte is reserved for some trick in next_token */
3226 for (i = j = 1; j > 0; i += j) {
3227 s = xrealloc(s, i + 4096);
3228 j = fread(s + i, 1, 4094, from_file);
3232 parse_program(s + 1);
3235 g_progname = "cmd. line";
3236 #if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
3238 parse_program(llist_pop(&list_e));
3241 if (!(opt & (OPT_f | OPT_e))) {
3244 parse_program(*argv++);
3248 /* fill in ARGV array */
3249 setvar_i(intvar[ARGC], argc + 1);
3250 setari_u(intvar[ARGV], 0, "awk");
3253 setari_u(intvar[ARGV], ++i, *argv++);
3255 evaluate(beginseq.first, &tv);
3256 if (!mainseq.first && !endseq.first)
3257 awk_exit(EXIT_SUCCESS);
3259 /* input file could already be opened in BEGIN block */
3261 iF = next_input_file();
3263 /* passing through input files */
3266 setvar_i(intvar[FNR], 0);
3268 while ((i = awk_getline(iF, intvar[F0])) > 0) {
3271 incvar(intvar[FNR]);
3272 evaluate(mainseq.first, &tv);
3279 syntax_error(strerror(errno));
3281 iF = next_input_file();
3284 awk_exit(EXIT_SUCCESS);