1 /* vi: set sw=4 ts=4: */
3 * awk implementation for busybox
5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
7 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
10 //usage:#define awk_trivial_usage
11 //usage: "[OPTIONS] [AWK_PROGRAM] [FILE]..."
12 //usage:#define awk_full_usage "\n\n"
13 //usage: " -v VAR=VAL Set variable"
14 //usage: "\n -F SEP Use SEP as field separator"
15 //usage: "\n -f FILE Read program from FILE"
21 /* This is a NOEXEC applet. Be very careful! */
24 /* If you comment out one of these below, it will be #defined later
25 * to perform debug printfs to stderr: */
26 #define debug_printf_walker(...) do {} while (0)
27 #define debug_printf_eval(...) do {} while (0)
28 #define debug_printf_parse(...) do {} while (0)
30 #ifndef debug_printf_walker
31 # define debug_printf_walker(...) (fprintf(stderr, __VA_ARGS__))
33 #ifndef debug_printf_eval
34 # define debug_printf_eval(...) (fprintf(stderr, __VA_ARGS__))
36 #ifndef debug_printf_parse
37 # define debug_printf_parse(...) (fprintf(stderr, __VA_ARGS__))
46 #define VF_NUMBER 0x0001 /* 1 = primary type is number */
47 #define VF_ARRAY 0x0002 /* 1 = it's an array */
49 #define VF_CACHED 0x0100 /* 1 = num/str value has cached str/num eq */
50 #define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
51 #define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
52 #define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
53 #define VF_FSTR 0x1000 /* 1 = var::string points to fstring buffer */
54 #define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
55 #define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
57 /* these flags are static, don't change them when value is changed */
58 #define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
60 typedef struct walker_list {
63 struct walker_list *prev;
68 typedef struct var_s {
69 unsigned type; /* flags */
73 int aidx; /* func arg idx (for compilation stage) */
74 struct xhash_s *array; /* array ptr */
75 struct var_s *parent; /* for func args, ptr to actual parameter */
76 walker_list *walker; /* list of array elements (for..in) */
80 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
81 typedef struct chain_s {
84 const char *programname;
88 typedef struct func_s {
94 typedef struct rstream_s {
103 typedef struct hash_item_s {
105 struct var_s v; /* variable/array hash */
106 struct rstream_s rs; /* redirect streams hash */
107 struct func_s f; /* functions hash */
109 struct hash_item_s *next; /* next in chain */
110 char name[1]; /* really it's longer */
113 typedef struct xhash_s {
114 unsigned nel; /* num of elements */
115 unsigned csize; /* current hash size */
116 unsigned nprime; /* next hash size in PRIMES[] */
117 unsigned glen; /* summary length of item names */
118 struct hash_item_s **items;
122 typedef struct node_s {
142 /* Block of temporary variables */
143 typedef struct nvblock_s {
146 struct nvblock_s *prev;
147 struct nvblock_s *next;
151 typedef struct tsplitter_s {
156 /* simple token classes */
157 /* Order and hex values are very important!!! See next_token() */
158 #define TC_SEQSTART 1 /* ( */
159 #define TC_SEQTERM (1 << 1) /* ) */
160 #define TC_REGEXP (1 << 2) /* /.../ */
161 #define TC_OUTRDR (1 << 3) /* | > >> */
162 #define TC_UOPPOST (1 << 4) /* unary postfix operator */
163 #define TC_UOPPRE1 (1 << 5) /* unary prefix operator */
164 #define TC_BINOPX (1 << 6) /* two-opnd operator */
165 #define TC_IN (1 << 7)
166 #define TC_COMMA (1 << 8)
167 #define TC_PIPE (1 << 9) /* input redirection pipe */
168 #define TC_UOPPRE2 (1 << 10) /* unary prefix operator */
169 #define TC_ARRTERM (1 << 11) /* ] */
170 #define TC_GRPSTART (1 << 12) /* { */
171 #define TC_GRPTERM (1 << 13) /* } */
172 #define TC_SEMICOL (1 << 14)
173 #define TC_NEWLINE (1 << 15)
174 #define TC_STATX (1 << 16) /* ctl statement (for, next...) */
175 #define TC_WHILE (1 << 17)
176 #define TC_ELSE (1 << 18)
177 #define TC_BUILTIN (1 << 19)
178 #define TC_GETLINE (1 << 20)
179 #define TC_FUNCDECL (1 << 21) /* `function' `func' */
180 #define TC_BEGIN (1 << 22)
181 #define TC_END (1 << 23)
182 #define TC_EOF (1 << 24)
183 #define TC_VARIABLE (1 << 25)
184 #define TC_ARRAY (1 << 26)
185 #define TC_FUNCTION (1 << 27)
186 #define TC_STRING (1 << 28)
187 #define TC_NUMBER (1 << 29)
189 #define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
191 /* combined token classes */
192 #define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
193 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
194 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
195 | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
197 #define TC_STATEMNT (TC_STATX | TC_WHILE)
198 #define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
200 /* word tokens, cannot mean something else if not expected */
201 #define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
202 | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
204 /* discard newlines after these */
205 #define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
206 | TC_BINOP | TC_OPTERM)
208 /* what can expression begin with */
209 #define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
210 /* what can group begin with */
211 #define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
213 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
214 /* operator is inserted between them */
215 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
216 | TC_STRING | TC_NUMBER | TC_UOPPOST)
217 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
219 #define OF_RES1 0x010000
220 #define OF_RES2 0x020000
221 #define OF_STR1 0x040000
222 #define OF_STR2 0x080000
223 #define OF_NUM1 0x100000
224 #define OF_CHECKED 0x200000
226 /* combined operator flags */
229 #define xS (OF_RES2 | OF_STR2)
231 #define VV (OF_RES1 | OF_RES2)
232 #define Nx (OF_RES1 | OF_NUM1)
233 #define NV (OF_RES1 | OF_NUM1 | OF_RES2)
234 #define Sx (OF_RES1 | OF_STR1)
235 #define SV (OF_RES1 | OF_STR1 | OF_RES2)
236 #define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
238 #define OPCLSMASK 0xFF00
239 #define OPNMASK 0x007F
241 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
242 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
243 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
248 #define P(x) (x << 24)
249 #define PRIMASK 0x7F000000
250 #define PRIMASK2 0x7E000000
252 /* Operation classes */
254 #define SHIFT_TIL_THIS 0x0600
255 #define RECUR_FROM_THIS 0x1000
258 OC_DELETE = 0x0100, OC_EXEC = 0x0200, OC_NEWSOURCE = 0x0300,
259 OC_PRINT = 0x0400, OC_PRINTF = 0x0500, OC_WALKINIT = 0x0600,
261 OC_BR = 0x0700, OC_BREAK = 0x0800, OC_CONTINUE = 0x0900,
262 OC_EXIT = 0x0a00, OC_NEXT = 0x0b00, OC_NEXTFILE = 0x0c00,
263 OC_TEST = 0x0d00, OC_WALKNEXT = 0x0e00,
265 OC_BINARY = 0x1000, OC_BUILTIN = 0x1100, OC_COLON = 0x1200,
266 OC_COMMA = 0x1300, OC_COMPARE = 0x1400, OC_CONCAT = 0x1500,
267 OC_FBLTIN = 0x1600, OC_FIELD = 0x1700, OC_FNARG = 0x1800,
268 OC_FUNC = 0x1900, OC_GETLINE = 0x1a00, OC_IN = 0x1b00,
269 OC_LAND = 0x1c00, OC_LOR = 0x1d00, OC_MATCH = 0x1e00,
270 OC_MOVE = 0x1f00, OC_PGETLINE = 0x2000, OC_REGEXP = 0x2100,
271 OC_REPLACE = 0x2200, OC_RETURN = 0x2300, OC_SPRINTF = 0x2400,
272 OC_TERNARY = 0x2500, OC_UNARY = 0x2600, OC_VAR = 0x2700,
275 ST_IF = 0x3000, ST_DO = 0x3100, ST_FOR = 0x3200,
279 /* simple builtins */
281 F_in, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
282 F_ti, F_le, F_sy, F_ff, F_cl
287 B_a2, B_ix, B_ma, B_sp, B_ss, B_ti, B_mt, B_lo, B_up,
289 B_an, B_co, B_ls, B_or, B_rs, B_xo,
292 /* tokens and their corresponding info values */
294 #define NTC "\377" /* switch to next token class (tc<<1) */
297 #define OC_B OC_BUILTIN
299 static const char tokenlist[] ALIGN1 =
302 "\1/" NTC /* REGEXP */
303 "\2>>" "\1>" "\1|" NTC /* OUTRDR */
304 "\2++" "\2--" NTC /* UOPPOST */
305 "\2++" "\2--" "\1$" NTC /* UOPPRE1 */
306 "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */
307 "\2*=" "\2/=" "\2%=" "\2^="
308 "\1+" "\1-" "\3**=" "\2**"
309 "\1/" "\1%" "\1^" "\1*"
310 "\2!=" "\2>=" "\2<=" "\1>"
311 "\1<" "\2!~" "\1~" "\2&&"
312 "\2||" "\1?" "\1:" NTC
316 "\1+" "\1-" "\1!" NTC /* UOPPRE2 */
322 "\2if" "\2do" "\3for" "\5break" /* STATX */
323 "\10continue" "\6delete" "\5print"
324 "\6printf" "\4next" "\10nextfile"
325 "\6return" "\4exit" NTC
329 "\3and" "\5compl" "\6lshift" "\2or"
331 "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */
332 "\3cos" "\3exp" "\3int" "\3log"
333 "\4rand" "\3sin" "\4sqrt" "\5srand"
334 "\6gensub" "\4gsub" "\5index" "\6length"
335 "\5match" "\5split" "\7sprintf" "\3sub"
336 "\6substr" "\7systime" "\10strftime" "\6mktime"
337 "\7tolower" "\7toupper" NTC
339 "\4func" "\10function" NTC
342 /* compiler adds trailing "\0" */
345 static const uint32_t tokeninfo[] = {
349 xS|'a', xS|'w', xS|'|',
350 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
351 OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M', OC_FIELD|xV|P(5),
352 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74), OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
353 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/', OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
354 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-', OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
355 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%', OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
356 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3, OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
357 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!', OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
358 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?', OC_COLON|xx|P(67)|':',
359 OC_IN|SV|P(49), /* in */
361 OC_PGETLINE|SV|P(37),
362 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-', OC_UNARY|xV|P(19)|'!',
368 ST_IF, ST_DO, ST_FOR, OC_BREAK,
369 OC_CONTINUE, OC_DELETE|Vx, OC_PRINT,
370 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
371 OC_RETURN|Vx, OC_EXIT|Nx,
375 OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
376 OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
377 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
378 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
379 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
380 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
381 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
382 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b),
383 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
390 /* internal variable names and their initial values */
391 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
393 CONVFMT, OFMT, FS, OFS,
394 ORS, RS, RT, FILENAME,
395 SUBSEP, F0, ARGIND, ARGC,
396 ARGV, ERRNO, FNR, NR,
397 NF, IGNORECASE, ENVIRON, NUM_INTERNAL_VARS
400 static const char vNames[] ALIGN1 =
401 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
402 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
403 "SUBSEP\0" "$\0*" "ARGIND\0" "ARGC\0"
404 "ARGV\0" "ERRNO\0" "FNR\0" "NR\0"
405 "NF\0*" "IGNORECASE\0*" "ENVIRON\0" "\0";
407 static const char vValues[] ALIGN1 =
408 "%.6g\0" "%.6g\0" " \0" " \0"
409 "\n\0" "\n\0" "\0" "\0"
410 "\034\0" "\0" "\377";
412 /* hash size may grow to these values */
413 #define FIRST_PRIME 61
414 static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
417 /* Globals. Split in two parts so that first one is addressed
418 * with (mostly short) negative offsets.
419 * NB: it's unsafe to put members of type "double"
420 * into globals2 (gcc may fail to align them).
424 chain beginseq, mainseq, endseq;
426 node *break_ptr, *continue_ptr;
428 xhash *vhash, *ahash, *fdhash, *fnhash;
429 const char *g_progname;
432 int maxfields; /* used in fsrealloc() only */
441 smallint is_f0_split;
445 uint32_t t_info; /* often used */
450 var *intvar[NUM_INTERNAL_VARS]; /* often used */
452 /* former statics from various functions */
453 char *split_f0__fstrings;
455 uint32_t next_token__save_tclass;
456 uint32_t next_token__save_info;
457 uint32_t next_token__ltclass;
458 smallint next_token__concat_inserted;
460 smallint next_input_file__files_happen;
461 rstream next_input_file__rsm;
463 var *evaluate__fnargs;
464 unsigned evaluate__seed;
465 regex_t evaluate__sreg;
469 tsplitter exec_builtin__tspl;
471 /* biggest and least used members go last */
472 tsplitter fsplitter, rsplitter;
474 #define G1 (ptr_to_globals[-1])
475 #define G (*(struct globals2 *)ptr_to_globals)
476 /* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
477 /*char G1size[sizeof(G1)]; - 0x74 */
478 /*char Gsize[sizeof(G)]; - 0x1c4 */
479 /* Trying to keep most of members accessible with short offsets: */
480 /*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
481 #define t_double (G1.t_double )
482 #define beginseq (G1.beginseq )
483 #define mainseq (G1.mainseq )
484 #define endseq (G1.endseq )
485 #define seq (G1.seq )
486 #define break_ptr (G1.break_ptr )
487 #define continue_ptr (G1.continue_ptr)
489 #define vhash (G1.vhash )
490 #define ahash (G1.ahash )
491 #define fdhash (G1.fdhash )
492 #define fnhash (G1.fnhash )
493 #define g_progname (G1.g_progname )
494 #define g_lineno (G1.g_lineno )
495 #define nfields (G1.nfields )
496 #define maxfields (G1.maxfields )
497 #define Fields (G1.Fields )
498 #define g_cb (G1.g_cb )
499 #define g_pos (G1.g_pos )
500 #define g_buf (G1.g_buf )
501 #define icase (G1.icase )
502 #define exiting (G1.exiting )
503 #define nextrec (G1.nextrec )
504 #define nextfile (G1.nextfile )
505 #define is_f0_split (G1.is_f0_split )
506 #define t_rollback (G1.t_rollback )
507 #define t_info (G.t_info )
508 #define t_tclass (G.t_tclass )
509 #define t_string (G.t_string )
510 #define t_lineno (G.t_lineno )
511 #define intvar (G.intvar )
512 #define fsplitter (G.fsplitter )
513 #define rsplitter (G.rsplitter )
514 #define INIT_G() do { \
515 SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
516 G.next_token__ltclass = TC_OPTERM; \
517 G.evaluate__seed = 1; \
521 /* function prototypes */
522 static void handle_special(var *);
523 static node *parse_expr(uint32_t);
524 static void chain_group(void);
525 static var *evaluate(node *, var *);
526 static rstream *next_input_file(void);
527 static int fmt_num(char *, int, const char *, double, int);
528 static int awk_exit(int) NORETURN;
530 /* ---- error handling ---- */
532 static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
533 static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
534 static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
535 static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
536 static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
537 static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin";
538 static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
539 static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
540 static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
541 static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
543 static void zero_out_var(var *vp)
545 memset(vp, 0, sizeof(*vp));
548 static void syntax_error(const char *message) NORETURN;
549 static void syntax_error(const char *message)
551 bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
554 /* ---- hash stuff ---- */
556 static unsigned hashidx(const char *name)
561 idx = *name++ + (idx << 6) - idx;
565 /* create new hash */
566 static xhash *hash_init(void)
570 newhash = xzalloc(sizeof(*newhash));
571 newhash->csize = FIRST_PRIME;
572 newhash->items = xzalloc(FIRST_PRIME * sizeof(newhash->items[0]));
577 /* find item in hash, return ptr to data, NULL if not found */
578 static void *hash_search(xhash *hash, const char *name)
582 hi = hash->items[hashidx(name) % hash->csize];
584 if (strcmp(hi->name, name) == 0)
591 /* grow hash if it becomes too big */
592 static void hash_rebuild(xhash *hash)
594 unsigned newsize, i, idx;
595 hash_item **newitems, *hi, *thi;
597 if (hash->nprime == ARRAY_SIZE(PRIMES))
600 newsize = PRIMES[hash->nprime++];
601 newitems = xzalloc(newsize * sizeof(newitems[0]));
603 for (i = 0; i < hash->csize; i++) {
608 idx = hashidx(thi->name) % newsize;
609 thi->next = newitems[idx];
615 hash->csize = newsize;
616 hash->items = newitems;
619 /* find item in hash, add it if necessary. Return ptr to data */
620 static void *hash_find(xhash *hash, const char *name)
626 hi = hash_search(hash, name);
628 if (++hash->nel / hash->csize > 10)
631 l = strlen(name) + 1;
632 hi = xzalloc(sizeof(*hi) + l);
633 strcpy(hi->name, name);
635 idx = hashidx(name) % hash->csize;
636 hi->next = hash->items[idx];
637 hash->items[idx] = hi;
643 #define findvar(hash, name) ((var*) hash_find((hash), (name)))
644 #define newvar(name) ((var*) hash_find(vhash, (name)))
645 #define newfile(name) ((rstream*)hash_find(fdhash, (name)))
646 #define newfunc(name) ((func*) hash_find(fnhash, (name)))
648 static void hash_remove(xhash *hash, const char *name)
650 hash_item *hi, **phi;
652 phi = &hash->items[hashidx(name) % hash->csize];
655 if (strcmp(hi->name, name) == 0) {
656 hash->glen -= (strlen(name) + 1);
666 /* ------ some useful functions ------ */
668 static char *skip_spaces(char *p)
671 if (*p == '\\' && p[1] == '\n') {
674 } else if (*p != ' ' && *p != '\t') {
682 /* returns old *s, advances *s past word and terminating NUL */
683 static char *nextword(char **s)
686 while (*(*s)++ != '\0')
691 static char nextchar(char **s)
698 c = bb_process_escape_sequence((const char**)s);
699 if (c == '\\' && *s == pps) { /* unrecognized \z? */
700 c = *(*s); /* yes, fetch z */
702 (*s)++; /* advance unless z = NUL */
707 static ALWAYS_INLINE int isalnum_(int c)
709 return (isalnum(c) || c == '_');
712 static double my_strtod(char **pp)
715 if (ENABLE_DESKTOP && cp[0] == '0') {
716 /* Might be hex or octal integer: 0x123abc or 07777 */
717 char c = (cp[1] | 0x20);
718 if (c == 'x' || isdigit(cp[1])) {
719 unsigned long long ull = strtoull(cp, pp, 0);
723 if (!isdigit(c) && c != '.')
725 /* else: it may be a floating number. Examples:
726 * 009.123 (*pp points to '9')
727 * 000.123 (*pp points to '.')
728 * fall through to strtod.
732 return strtod(cp, pp);
735 /* -------- working with variables (set/get/copy/etc) -------- */
737 static xhash *iamarray(var *v)
741 while (a->type & VF_CHILD)
744 if (!(a->type & VF_ARRAY)) {
746 a->x.array = hash_init();
751 static void clear_array(xhash *array)
756 for (i = 0; i < array->csize; i++) {
757 hi = array->items[i];
761 free(thi->data.v.string);
764 array->items[i] = NULL;
766 array->glen = array->nel = 0;
769 /* clear a variable */
770 static var *clrvar(var *v)
772 if (!(v->type & VF_FSTR))
775 v->type &= VF_DONTTOUCH;
781 /* assign string value to variable */
782 static var *setvar_p(var *v, char *value)
790 /* same as setvar_p but make a copy of string */
791 static var *setvar_s(var *v, const char *value)
793 return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
796 /* same as setvar_s but sets USER flag */
797 static var *setvar_u(var *v, const char *value)
799 v = setvar_s(v, value);
804 /* set array element to user string */
805 static void setari_u(var *a, int idx, const char *s)
809 v = findvar(iamarray(a), itoa(idx));
813 /* assign numeric value to variable */
814 static var *setvar_i(var *v, double value)
817 v->type |= VF_NUMBER;
823 static const char *getvar_s(var *v)
825 /* if v is numeric and has no cached string, convert it to string */
826 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
827 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
828 v->string = xstrdup(g_buf);
829 v->type |= VF_CACHED;
831 return (v->string == NULL) ? "" : v->string;
834 static double getvar_i(var *v)
838 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
842 debug_printf_eval("getvar_i: '%s'->", s);
843 v->number = my_strtod(&s);
844 debug_printf_eval("%f (s:'%s')\n", v->number, s);
845 if (v->type & VF_USER) {
851 debug_printf_eval("getvar_i: '%s'->zero\n", s);
854 v->type |= VF_CACHED;
856 debug_printf_eval("getvar_i: %f\n", v->number);
860 /* Used for operands of bitwise ops */
861 static unsigned long getvar_i_int(var *v)
863 double d = getvar_i(v);
865 /* Casting doubles to longs is undefined for values outside
866 * of target type range. Try to widen it as much as possible */
868 return (unsigned long)d;
869 /* Why? Think about d == -4294967295.0 (assuming 32bit longs) */
870 return - (long) (unsigned long) (-d);
873 static var *copyvar(var *dest, const var *src)
877 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
878 debug_printf_eval("copyvar: number:%f string:'%s'\n", src->number, src->string);
879 dest->number = src->number;
881 dest->string = xstrdup(src->string);
883 handle_special(dest);
887 static var *incvar(var *v)
889 return setvar_i(v, getvar_i(v) + 1.0);
892 /* return true if v is number or numeric string */
893 static int is_numeric(var *v)
896 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
899 /* return 1 when value of v corresponds to true, 0 otherwise */
900 static int istrue(var *v)
903 return (v->number != 0);
904 return (v->string && v->string[0]);
907 /* temporary variables allocator. Last allocated should be first freed */
908 static var *nvalloc(int n)
916 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size)
922 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
923 g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
925 g_cb->pos = g_cb->nv;
927 /*g_cb->next = NULL; - xzalloc did it */
935 while (v < g_cb->pos) {
944 static void nvfree(var *v)
948 if (v < g_cb->nv || v >= g_cb->pos)
949 syntax_error(EMSG_INTERNAL_ERROR);
951 for (p = v; p < g_cb->pos; p++) {
952 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
953 clear_array(iamarray(p));
954 free(p->x.array->items);
957 if (p->type & VF_WALK) {
959 walker_list *w = p->x.walker;
960 debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker);
964 debug_printf_walker(" free(%p)\n", w);
973 while (g_cb->prev && g_cb->pos == g_cb->nv) {
978 /* ------- awk program text parsing ------- */
980 /* Parse next token pointed by global pos, place results into global ttt.
981 * If token isn't expected, give away. Return token class
983 static uint32_t next_token(uint32_t expected)
985 #define concat_inserted (G.next_token__concat_inserted)
986 #define save_tclass (G.next_token__save_tclass)
987 #define save_info (G.next_token__save_info)
988 /* Initialized to TC_OPTERM: */
989 #define ltclass (G.next_token__ltclass)
999 } else if (concat_inserted) {
1000 concat_inserted = FALSE;
1001 t_tclass = save_tclass;
1008 g_lineno = t_lineno;
1010 while (*p != '\n' && *p != '\0')
1018 debug_printf_parse("%s: token found: TC_EOF\n", __func__);
1020 } else if (*p == '\"') {
1023 while (*p != '\"') {
1025 if (*p == '\0' || *p == '\n')
1026 syntax_error(EMSG_UNEXP_EOS);
1028 *s++ = nextchar(&pp);
1034 debug_printf_parse("%s: token found:'%s' TC_STRING\n", __func__, t_string);
1036 } else if ((expected & TC_REGEXP) && *p == '/') {
1040 if (*p == '\0' || *p == '\n')
1041 syntax_error(EMSG_UNEXP_EOS);
1045 s[-1] = bb_process_escape_sequence((const char **)&pp);
1057 debug_printf_parse("%s: token found:'%s' TC_REGEXP\n", __func__, t_string);
1059 } else if (*p == '.' || isdigit(*p)) {
1062 t_double = my_strtod(&pp);
1065 syntax_error(EMSG_UNEXP_TOKEN);
1067 debug_printf_parse("%s: token found:%f TC_NUMBER\n", __func__, t_double);
1070 /* search for something known */
1075 int l = (unsigned char) *tl++;
1076 if (l == (unsigned char) NTCC) {
1080 /* if token class is expected,
1082 * and it's not a longer word,
1084 if ((tc & (expected | TC_WORD | TC_NEWLINE))
1085 && strncmp(p, tl, l) == 0
1086 && !((tc & TC_WORD) && isalnum_(p[l]))
1088 /* then this is what we are looking for */
1090 debug_printf_parse("%s: token found:'%.*s' t_info:%x\n", __func__, l, p, t_info);
1097 /* not a known token */
1099 /* is it a name? (var/array/function) */
1101 syntax_error(EMSG_UNEXP_TOKEN); /* no */
1104 while (isalnum_(*++p)) {
1109 /* also consume whitespace between functionname and bracket */
1110 if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY))
1114 debug_printf_parse("%s: token found:'%s' TC_FUNCTION\n", __func__, t_string);
1119 debug_printf_parse("%s: token found:'%s' TC_ARRAY\n", __func__, t_string);
1121 debug_printf_parse("%s: token found:'%s' TC_VARIABLE\n", __func__, t_string);
1127 /* skipping newlines in some cases */
1128 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1131 /* insert concatenation operator when needed */
1132 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1133 concat_inserted = TRUE;
1137 t_info = OC_CONCAT | SS | P(35);
1144 /* Are we ready for this? */
1145 if (!(ltclass & expected))
1146 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1147 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1150 #undef concat_inserted
1156 static void rollback_token(void)
1161 static node *new_node(uint32_t info)
1165 n = xzalloc(sizeof(node));
1167 n->lineno = g_lineno;
1171 static void mk_re_node(const char *s, node *n, regex_t *re)
1173 n->info = OC_REGEXP;
1176 xregcomp(re, s, REG_EXTENDED);
1177 xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1180 static node *condition(void)
1182 next_token(TC_SEQSTART);
1183 return parse_expr(TC_SEQTERM);
1186 /* parse expression terminated by given argument, return ptr
1187 * to built subtree. Terminator is eaten by parse_expr */
1188 static node *parse_expr(uint32_t iexp)
1196 debug_printf_parse("%s(%x)\n", __func__, iexp);
1199 sn.r.n = glptr = NULL;
1200 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1202 while (!((tc = next_token(xtc)) & iexp)) {
1204 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1205 /* input redirection (<) attached to glptr node */
1206 debug_printf_parse("%s: input redir\n", __func__);
1207 cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1209 xtc = TC_OPERAND | TC_UOPPRE;
1212 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1213 debug_printf_parse("%s: TC_BINOP | TC_UOPPOST\n", __func__);
1214 /* for binary and postfix-unary operators, jump back over
1215 * previous operators with higher priority */
1217 while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1218 || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON))
1222 if ((t_info & OPCLSMASK) == OC_TERNARY)
1224 cn = vn->a.n->r.n = new_node(t_info);
1226 if (tc & TC_BINOP) {
1228 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1229 if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1231 next_token(TC_GETLINE);
1232 /* give maximum priority to this pipe */
1233 cn->info &= ~PRIMASK;
1234 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1238 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1243 debug_printf_parse("%s: other\n", __func__);
1244 /* for operands and prefix-unary operators, attach them
1247 cn = vn->r.n = new_node(t_info);
1249 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1250 if (tc & (TC_OPERAND | TC_REGEXP)) {
1251 debug_printf_parse("%s: TC_OPERAND | TC_REGEXP\n", __func__);
1252 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1253 /* one should be very careful with switch on tclass -
1254 * only simple tclasses should be used! */
1258 debug_printf_parse("%s: TC_VARIABLE | TC_ARRAY\n", __func__);
1260 v = hash_search(ahash, t_string);
1262 cn->info = OC_FNARG;
1263 cn->l.aidx = v->x.aidx;
1265 cn->l.v = newvar(t_string);
1267 if (tc & TC_ARRAY) {
1269 cn->r.n = parse_expr(TC_ARRTERM);
1275 debug_printf_parse("%s: TC_NUMBER | TC_STRING\n", __func__);
1277 v = cn->l.v = xzalloc(sizeof(var));
1279 setvar_i(v, t_double);
1281 setvar_s(v, t_string);
1285 debug_printf_parse("%s: TC_REGEXP\n", __func__);
1286 mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1290 debug_printf_parse("%s: TC_FUNCTION\n", __func__);
1292 cn->r.f = newfunc(t_string);
1293 cn->l.n = condition();
1297 debug_printf_parse("%s: TC_SEQSTART\n", __func__);
1298 cn = vn->r.n = parse_expr(TC_SEQTERM);
1300 syntax_error("Empty sequence");
1305 debug_printf_parse("%s: TC_GETLINE\n", __func__);
1307 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1311 debug_printf_parse("%s: TC_BUILTIN\n", __func__);
1312 cn->l.n = condition();
1319 debug_printf_parse("%s() returns %p\n", __func__, sn.r.n);
1323 /* add node to chain. Return ptr to alloc'd node */
1324 static node *chain_node(uint32_t info)
1329 seq->first = seq->last = new_node(0);
1331 if (seq->programname != g_progname) {
1332 seq->programname = g_progname;
1333 n = chain_node(OC_NEWSOURCE);
1334 n->l.new_progname = xstrdup(g_progname);
1339 seq->last = n->a.n = new_node(OC_DONE);
1344 static void chain_expr(uint32_t info)
1348 n = chain_node(info);
1349 n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1350 if (t_tclass & TC_GRPTERM)
1354 static node *chain_loop(node *nn)
1356 node *n, *n2, *save_brk, *save_cont;
1358 save_brk = break_ptr;
1359 save_cont = continue_ptr;
1361 n = chain_node(OC_BR | Vx);
1362 continue_ptr = new_node(OC_EXEC);
1363 break_ptr = new_node(OC_EXEC);
1365 n2 = chain_node(OC_EXEC | Vx);
1368 continue_ptr->a.n = n2;
1369 break_ptr->a.n = n->r.n = seq->last;
1371 continue_ptr = save_cont;
1372 break_ptr = save_brk;
1377 /* parse group and attach it to chain */
1378 static void chain_group(void)
1384 c = next_token(TC_GRPSEQ);
1385 } while (c & TC_NEWLINE);
1387 if (c & TC_GRPSTART) {
1388 debug_printf_parse("%s: TC_GRPSTART\n", __func__);
1389 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1390 debug_printf_parse("%s: !TC_GRPTERM\n", __func__);
1391 if (t_tclass & TC_NEWLINE)
1396 debug_printf_parse("%s: TC_GRPTERM\n", __func__);
1397 } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1398 debug_printf_parse("%s: TC_OPSEQ | TC_OPTERM\n", __func__);
1400 chain_expr(OC_EXEC | Vx);
1403 debug_printf_parse("%s: TC_STATEMNT(?)\n", __func__);
1404 switch (t_info & OPCLSMASK) {
1406 debug_printf_parse("%s: ST_IF\n", __func__);
1407 n = chain_node(OC_BR | Vx);
1408 n->l.n = condition();
1410 n2 = chain_node(OC_EXEC);
1412 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1414 n2->a.n = seq->last;
1421 debug_printf_parse("%s: ST_WHILE\n", __func__);
1423 n = chain_loop(NULL);
1428 debug_printf_parse("%s: ST_DO\n", __func__);
1429 n2 = chain_node(OC_EXEC);
1430 n = chain_loop(NULL);
1432 next_token(TC_WHILE);
1433 n->l.n = condition();
1437 debug_printf_parse("%s: ST_FOR\n", __func__);
1438 next_token(TC_SEQSTART);
1439 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1440 if (t_tclass & TC_SEQTERM) { /* for-in */
1441 if ((n2->info & OPCLSMASK) != OC_IN)
1442 syntax_error(EMSG_UNEXP_TOKEN);
1443 n = chain_node(OC_WALKINIT | VV);
1446 n = chain_loop(NULL);
1447 n->info = OC_WALKNEXT | Vx;
1449 } else { /* for (;;) */
1450 n = chain_node(OC_EXEC | Vx);
1452 n2 = parse_expr(TC_SEMICOL);
1453 n3 = parse_expr(TC_SEQTERM);
1463 debug_printf_parse("%s: OC_PRINT[F]\n", __func__);
1464 n = chain_node(t_info);
1465 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1466 if (t_tclass & TC_OUTRDR) {
1468 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1470 if (t_tclass & TC_GRPTERM)
1475 debug_printf_parse("%s: OC_BREAK\n", __func__);
1476 n = chain_node(OC_EXEC);
1481 debug_printf_parse("%s: OC_CONTINUE\n", __func__);
1482 n = chain_node(OC_EXEC);
1483 n->a.n = continue_ptr;
1486 /* delete, next, nextfile, return, exit */
1488 debug_printf_parse("%s: default\n", __func__);
1494 static void parse_program(char *p)
1503 while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1504 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1506 if (tclass & TC_OPTERM) {
1507 debug_printf_parse("%s: TC_OPTERM\n", __func__);
1512 if (tclass & TC_BEGIN) {
1513 debug_printf_parse("%s: TC_BEGIN\n", __func__);
1517 } else if (tclass & TC_END) {
1518 debug_printf_parse("%s: TC_END\n", __func__);
1522 } else if (tclass & TC_FUNCDECL) {
1523 debug_printf_parse("%s: TC_FUNCDECL\n", __func__);
1524 next_token(TC_FUNCTION);
1526 f = newfunc(t_string);
1527 f->body.first = NULL;
1529 while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1530 v = findvar(ahash, t_string);
1531 v->x.aidx = f->nargs++;
1533 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1540 } else if (tclass & TC_OPSEQ) {
1541 debug_printf_parse("%s: TC_OPSEQ\n", __func__);
1543 cn = chain_node(OC_TEST);
1544 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1545 if (t_tclass & TC_GRPSTART) {
1546 debug_printf_parse("%s: TC_GRPSTART\n", __func__);
1550 debug_printf_parse("%s: !TC_GRPSTART\n", __func__);
1551 chain_node(OC_PRINT);
1553 cn->r.n = mainseq.last;
1555 } else /* if (tclass & TC_GRPSTART) */ {
1556 debug_printf_parse("%s: TC_GRPSTART(?)\n", __func__);
1561 debug_printf_parse("%s: TC_EOF\n", __func__);
1565 /* -------- program execution part -------- */
1567 static node *mk_splitter(const char *s, tsplitter *spl)
1575 if ((n->info & OPCLSMASK) == OC_REGEXP) {
1577 regfree(ire); // TODO: nuke ire, use re+1?
1579 if (s[0] && s[1]) { /* strlen(s) > 1 */
1580 mk_re_node(s, n, re);
1582 n->info = (uint32_t) s[0];
1588 /* use node as a regular expression. Supplied with node ptr and regex_t
1589 * storage space. Return ptr to regex (if result points to preg, it should
1590 * be later regfree'd manually
1592 static regex_t *as_regex(node *op, regex_t *preg)
1598 if ((op->info & OPCLSMASK) == OC_REGEXP) {
1599 return icase ? op->r.ire : op->l.re;
1602 s = getvar_s(evaluate(op, v));
1604 cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
1605 /* Testcase where REG_EXTENDED fails (unpaired '{'):
1606 * echo Hi | awk 'gsub("@(samp|code|file)\{","");'
1607 * gawk 3.1.5 eats this. We revert to ~REG_EXTENDED
1608 * (maybe gsub is not supposed to use REG_EXTENDED?).
1610 if (regcomp(preg, s, cflags)) {
1611 cflags &= ~REG_EXTENDED;
1612 xregcomp(preg, s, cflags);
1618 /* gradually increasing buffer.
1619 * note that we reallocate even if n == old_size,
1620 * and thus there is at least one extra allocated byte.
1622 static char* qrealloc(char *b, int n, int *size)
1624 if (!b || n >= *size) {
1625 *size = n + (n>>1) + 80;
1626 b = xrealloc(b, *size);
1631 /* resize field storage space */
1632 static void fsrealloc(int size)
1636 if (size >= maxfields) {
1638 maxfields = size + 16;
1639 Fields = xrealloc(Fields, maxfields * sizeof(Fields[0]));
1640 for (; i < maxfields; i++) {
1641 Fields[i].type = VF_SPECIAL;
1642 Fields[i].string = NULL;
1645 /* if size < nfields, clear extra field variables */
1646 for (i = size; i < nfields; i++) {
1652 static int awk_split(const char *s, node *spl, char **slist)
1657 regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1659 /* in worst case, each char would be a separate field */
1660 *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1663 c[0] = c[1] = (char)spl->info;
1665 if (*getvar_s(intvar[RS]) == '\0')
1669 if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */
1671 return n; /* "": zero fields */
1672 n++; /* at least one field will be there */
1674 l = strcspn(s, c+2); /* len till next NUL or \n */
1675 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1676 && pmatch[0].rm_so <= l
1678 l = pmatch[0].rm_so;
1679 if (pmatch[0].rm_eo == 0) {
1683 n++; /* we saw yet another delimiter */
1685 pmatch[0].rm_eo = l;
1690 /* make sure we remove *all* of the separator chars */
1693 } while (++l < pmatch[0].rm_eo);
1695 s += pmatch[0].rm_eo;
1699 if (c[0] == '\0') { /* null split */
1707 if (c[0] != ' ') { /* single-character split */
1709 c[0] = toupper(c[0]);
1710 c[1] = tolower(c[1]);
1714 while ((s1 = strpbrk(s1, c)) != NULL) {
1722 s = skip_whitespace(s);
1726 while (*s && !isspace(*s))
1733 static void split_f0(void)
1735 /* static char *fstrings; */
1736 #define fstrings (G.split_f0__fstrings)
1747 n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1750 for (i = 0; i < n; i++) {
1751 Fields[i].string = nextword(&s);
1752 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1755 /* set NF manually to avoid side effects */
1757 intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1758 intvar[NF]->number = nfields;
1762 /* perform additional actions when some internal variables changed */
1763 static void handle_special(var *v)
1767 const char *sep, *s;
1768 int sl, l, len, i, bsize;
1770 if (!(v->type & VF_SPECIAL))
1773 if (v == intvar[NF]) {
1774 n = (int)getvar_i(v);
1777 /* recalculate $0 */
1778 sep = getvar_s(intvar[OFS]);
1782 for (i = 0; i < n; i++) {
1783 s = getvar_s(&Fields[i]);
1786 memcpy(b+len, sep, sl);
1789 b = qrealloc(b, len+l+sl, &bsize);
1790 memcpy(b+len, s, l);
1795 setvar_p(intvar[F0], b);
1798 } else if (v == intvar[F0]) {
1799 is_f0_split = FALSE;
1801 } else if (v == intvar[FS]) {
1802 mk_splitter(getvar_s(v), &fsplitter);
1804 } else if (v == intvar[RS]) {
1805 mk_splitter(getvar_s(v), &rsplitter);
1807 } else if (v == intvar[IGNORECASE]) {
1811 n = getvar_i(intvar[NF]);
1812 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1813 /* right here v is invalid. Just to note... */
1817 /* step through func/builtin/etc arguments */
1818 static node *nextarg(node **pn)
1823 if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1832 static void hashwalk_init(var *v, xhash *array)
1837 walker_list *prev_walker;
1839 if (v->type & VF_WALK) {
1840 prev_walker = v->x.walker;
1845 debug_printf_walker("hashwalk_init: prev_walker:%p\n", prev_walker);
1847 w = v->x.walker = xzalloc(sizeof(*w) + array->glen + 1); /* why + 1? */
1848 debug_printf_walker(" walker@%p=%p\n", &v->x.walker, w);
1849 w->cur = w->end = w->wbuf;
1850 w->prev = prev_walker;
1851 for (i = 0; i < array->csize; i++) {
1852 hi = array->items[i];
1854 strcpy(w->end, hi->name);
1861 static int hashwalk_next(var *v)
1863 walker_list *w = v->x.walker;
1865 if (w->cur >= w->end) {
1866 walker_list *prev_walker = w->prev;
1868 debug_printf_walker("end of iteration, free(walker@%p:%p), prev_walker:%p\n", &v->x.walker, w, prev_walker);
1870 v->x.walker = prev_walker;
1874 setvar_s(v, nextword(&w->cur));
1878 /* evaluate node, return 1 when result is true, 0 otherwise */
1879 static int ptest(node *pattern)
1881 /* ptest__v is "static": to save stack space? */
1882 return istrue(evaluate(pattern, &G.ptest__v));
1885 /* read next record from stream rsm into a variable v */
1886 static int awk_getline(rstream *rsm, var *v)
1889 regmatch_t pmatch[2];
1890 int size, a, p, pp = 0;
1891 int fd, so, eo, r, rp;
1894 debug_printf_eval("entered %s()\n", __func__);
1896 /* we're using our own buffer since we need access to accumulating
1899 fd = fileno(rsm->F);
1904 c = (char) rsplitter.n.info;
1908 m = qrealloc(m, 256, &size);
1915 if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1916 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1917 b, 1, pmatch, 0) == 0) {
1918 so = pmatch[0].rm_so;
1919 eo = pmatch[0].rm_eo;
1923 } else if (c != '\0') {
1924 s = strchr(b+pp, c);
1926 s = memchr(b+pp, '\0', p - pp);
1933 while (b[rp] == '\n')
1935 s = strstr(b+rp, "\n\n");
1938 while (b[eo] == '\n')
1947 memmove(m, m+a, p+1);
1952 m = qrealloc(m, a+p+128, &size);
1955 p += safe_read(fd, b+p, size-p-1);
1959 setvar_i(intvar[ERRNO], errno);
1968 c = b[so]; b[so] = '\0';
1972 c = b[eo]; b[eo] = '\0';
1973 setvar_s(intvar[RT], b+so);
1982 debug_printf_eval("returning from %s(): %d\n", __func__, r);
1987 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1991 const char *s = format;
1993 if (int_as_int && n == (int)n) {
1994 r = snprintf(b, size, "%d", (int)n);
1996 do { c = *s; } while (c && *++s);
1997 if (strchr("diouxX", c)) {
1998 r = snprintf(b, size, format, (int)n);
1999 } else if (strchr("eEfgG", c)) {
2000 r = snprintf(b, size, format, n);
2002 syntax_error(EMSG_INV_FMT);
2008 /* formatted output into an allocated buffer, return ptr to buffer */
2009 static char *awk_printf(node *n)
2014 int i, j, incr, bsize;
2019 fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
2024 while (*f && (*f != '%' || *++f == '%'))
2026 while (*f && !isalpha(*f)) {
2028 syntax_error("%*x formats are not supported");
2032 incr = (f - s) + MAXVARFMT;
2033 b = qrealloc(b, incr + i, &bsize);
2039 arg = evaluate(nextarg(&n), v);
2042 if (c == 'c' || !c) {
2043 i += sprintf(b+i, s, is_numeric(arg) ?
2044 (char)getvar_i(arg) : *getvar_s(arg));
2045 } else if (c == 's') {
2047 b = qrealloc(b, incr+i+strlen(s1), &bsize);
2048 i += sprintf(b+i, s, s1);
2050 i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
2054 /* if there was an error while sprintf, return value is negative */
2061 b = xrealloc(b, i + 1);
2066 /* Common substitution routine.
2067 * Replace (nm)'th substring of (src) that matches (rn) with (repl),
2068 * store result into (dest), return number of substitutions.
2069 * If nm = 0, replace all matches.
2070 * If src or dst is NULL, use $0.
2071 * If subexp != 0, enable subexpression matching (\1-\9).
2073 static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int subexp)
2077 int match_no, residx, replen, resbufsize;
2079 regmatch_t pmatch[10];
2080 regex_t sreg, *regex;
2086 regex = as_regex(rn, &sreg);
2087 sp = getvar_s(src ? src : intvar[F0]);
2088 replen = strlen(repl);
2089 while (regexec(regex, sp, 10, pmatch, regexec_flags) == 0) {
2090 int so = pmatch[0].rm_so;
2091 int eo = pmatch[0].rm_eo;
2093 //bb_error_msg("match %u: [%u,%u] '%s'%p", match_no+1, so, eo, sp,sp);
2094 resbuf = qrealloc(resbuf, residx + eo + replen, &resbufsize);
2095 memcpy(resbuf + residx, sp, eo);
2097 if (++match_no >= nm) {
2102 residx -= (eo - so);
2104 for (s = repl; *s; s++) {
2105 char c = resbuf[residx++] = *s;
2110 if (c == '&' || (subexp && c >= '0' && c <= '9')) {
2112 residx -= ((nbs + 3) >> 1);
2119 resbuf[residx++] = c;
2121 int n = pmatch[j].rm_eo - pmatch[j].rm_so;
2122 resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize);
2123 memcpy(resbuf + residx, sp + pmatch[j].rm_so, n);
2131 regexec_flags = REG_NOTBOL;
2136 /* Empty match (e.g. "b*" will match anywhere).
2137 * Advance by one char. */
2139 //gsub(/\<b*/,"") on "abc" will reach this point, advance to "bc"
2140 //... and will erroneously match "b" even though it is NOT at the word start.
2141 //we need REG_NOTBOW but it does not exist...
2142 //TODO: if EXTRA_COMPAT=y, use GNU matching and re_search,
2143 //it should be able to do it correctly.
2144 /* Subtle: this is safe only because
2145 * qrealloc allocated at least one extra byte */
2146 resbuf[residx] = *sp;
2154 resbuf = qrealloc(resbuf, residx + strlen(sp), &resbufsize);
2155 strcpy(resbuf + residx, sp);
2157 //bb_error_msg("end sp:'%s'%p", sp,sp);
2158 setvar_p(dest ? dest : intvar[F0], resbuf);
2164 static NOINLINE int do_mktime(const char *ds)
2169 /*memset(&then, 0, sizeof(then)); - not needed */
2170 then.tm_isdst = -1; /* default is unknown */
2172 /* manpage of mktime says these fields are ints,
2173 * so we can sscanf stuff directly into them */
2174 count = sscanf(ds, "%u %u %u %u %u %u %d",
2175 &then.tm_year, &then.tm_mon, &then.tm_mday,
2176 &then.tm_hour, &then.tm_min, &then.tm_sec,
2180 || (unsigned)then.tm_mon < 1
2181 || (unsigned)then.tm_year < 1900
2187 then.tm_year -= 1900;
2189 return mktime(&then);
2192 static NOINLINE var *exec_builtin(node *op, var *res)
2194 #define tspl (G.exec_builtin__tspl)
2200 regmatch_t pmatch[2];
2209 isr = info = op->info;
2212 av[2] = av[3] = NULL;
2213 for (i = 0; i < 4 && op; i++) {
2214 an[i] = nextarg(&op);
2215 if (isr & 0x09000000)
2216 av[i] = evaluate(an[i], &tv[i]);
2217 if (isr & 0x08000000)
2218 as[i] = getvar_s(av[i]);
2223 if ((uint32_t)nargs < (info >> 30))
2224 syntax_error(EMSG_TOO_FEW_ARGS);
2230 if (ENABLE_FEATURE_AWK_LIBM)
2231 setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1])));
2233 syntax_error(EMSG_NO_MATH);
2240 spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2241 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2246 n = awk_split(as[0], spl, &s);
2248 clear_array(iamarray(av[1]));
2249 for (i = 1; i <= n; i++)
2250 setari_u(av[1], i, nextword(&s));
2260 i = getvar_i(av[1]) - 1;
2265 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2268 s = xstrndup(as[0]+i, n);
2273 /* Bitwise ops must assume that operands are unsigned. GNU Awk 3.1.5:
2274 * awk '{ print or(-1,1) }' gives "4.29497e+09", not "-2.xxxe+09" */
2276 setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1]));
2280 setvar_i(res, ~getvar_i_int(av[0]));
2284 setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1]));
2288 setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1]));
2292 setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1]));
2296 setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1]));
2302 s1 = s = xstrdup(as[0]);
2304 //*s1 = (info == B_up) ? toupper(*s1) : tolower(*s1);
2305 if ((unsigned char)((*s1 | 0x20) - 'a') <= ('z' - 'a'))
2306 *s1 = (info == B_up) ? (*s1 & 0xdf) : (*s1 | 0x20);
2316 l = strlen(as[0]) - ll;
2317 if (ll > 0 && l >= 0) {
2319 char *s = strstr(as[0], as[1]);
2321 n = (s - as[0]) + 1;
2323 /* this piece of code is terribly slow and
2324 * really should be rewritten
2326 for (i = 0; i <= l; i++) {
2327 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2339 tt = getvar_i(av[1]);
2342 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2343 i = strftime(g_buf, MAXVARFMT,
2344 ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2347 setvar_s(res, g_buf);
2351 setvar_i(res, do_mktime(as[0]));
2355 re = as_regex(an[1], &sreg);
2356 n = regexec(re, as[0], 1, pmatch, 0);
2361 pmatch[0].rm_so = 0;
2362 pmatch[0].rm_eo = -1;
2364 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2365 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2366 setvar_i(res, pmatch[0].rm_so);
2372 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2376 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2380 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2390 * Evaluate node - the heart of the program. Supplied with subtree
2391 * and place where to store result. returns ptr to result.
2393 #define XC(n) ((n) >> 8)
2395 static var *evaluate(node *op, var *res)
2397 /* This procedure is recursive so we should count every byte */
2398 #define fnargs (G.evaluate__fnargs)
2399 /* seed is initialized to 1 */
2400 #define seed (G.evaluate__seed)
2401 #define sreg (G.evaluate__sreg)
2406 return setvar_s(res, NULL);
2408 debug_printf_eval("entered %s()\n", __func__);
2416 } L = L; /* for compiler */
2427 opn = (opinfo & OPNMASK);
2428 g_lineno = op->lineno;
2430 debug_printf_eval("opinfo:%08x opn:%08x\n", opinfo, opn);
2432 /* execute inevitable things */
2433 if (opinfo & OF_RES1)
2434 L.v = evaluate(op1, v1);
2435 if (opinfo & OF_RES2)
2436 R.v = evaluate(op->r.n, v1+1);
2437 if (opinfo & OF_STR1) {
2438 L.s = getvar_s(L.v);
2439 debug_printf_eval("L.s:'%s'\n", L.s);
2441 if (opinfo & OF_STR2) {
2442 R.s = getvar_s(R.v);
2443 debug_printf_eval("R.s:'%s'\n", R.s);
2445 if (opinfo & OF_NUM1) {
2446 L_d = getvar_i(L.v);
2447 debug_printf_eval("L_d:%f\n", L_d);
2450 debug_printf_eval("switch(0x%x)\n", XC(opinfo & OPCLSMASK));
2451 switch (XC(opinfo & OPCLSMASK)) {
2453 /* -- iterative node type -- */
2457 if ((op1->info & OPCLSMASK) == OC_COMMA) {
2458 /* it's range pattern */
2459 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2460 op->info |= OF_CHECKED;
2461 if (ptest(op1->r.n))
2462 op->info &= ~OF_CHECKED;
2468 op = ptest(op1) ? op->a.n : op->r.n;
2472 /* just evaluate an expression, also used as unconditional jump */
2476 /* branch, used in if-else and various loops */
2478 op = istrue(L.v) ? op->a.n : op->r.n;
2481 /* initialize for-in loop */
2482 case XC( OC_WALKINIT ):
2483 hashwalk_init(L.v, iamarray(R.v));
2486 /* get next array item */
2487 case XC( OC_WALKNEXT ):
2488 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2491 case XC( OC_PRINT ):
2492 case XC( OC_PRINTF ): {
2496 rstream *rsm = newfile(R.s);
2499 rsm->F = popen(R.s, "w");
2501 bb_perror_msg_and_die("popen");
2504 rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2510 if ((opinfo & OPCLSMASK) == OC_PRINT) {
2512 fputs(getvar_s(intvar[F0]), F);
2515 var *v = evaluate(nextarg(&op1), v1);
2516 if (v->type & VF_NUMBER) {
2517 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2521 fputs(getvar_s(v), F);
2525 fputs(getvar_s(intvar[OFS]), F);
2528 fputs(getvar_s(intvar[ORS]), F);
2530 } else { /* OC_PRINTF */
2531 char *s = awk_printf(op1);
2539 case XC( OC_DELETE ): {
2540 uint32_t info = op1->info & OPCLSMASK;
2543 if (info == OC_VAR) {
2545 } else if (info == OC_FNARG) {
2546 v = &fnargs[op1->l.aidx];
2548 syntax_error(EMSG_NOT_ARRAY);
2554 s = getvar_s(evaluate(op1->r.n, v1));
2555 hash_remove(iamarray(v), s);
2557 clear_array(iamarray(v));
2562 case XC( OC_NEWSOURCE ):
2563 g_progname = op->l.new_progname;
2566 case XC( OC_RETURN ):
2570 case XC( OC_NEXTFILE ):
2581 /* -- recursive node type -- */
2585 if (L.v == intvar[NF])
2589 case XC( OC_FNARG ):
2590 L.v = &fnargs[op->l.aidx];
2592 res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2596 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2599 case XC( OC_REGEXP ):
2601 L.s = getvar_s(intvar[F0]);
2604 case XC( OC_MATCH ):
2608 regex_t *re = as_regex(op1, &sreg);
2609 int i = regexec(re, L.s, 0, NULL, 0);
2612 setvar_i(res, (i == 0) ^ (opn == '!'));
2617 debug_printf_eval("MOVE\n");
2618 /* if source is a temporary string, jusk relink it to dest */
2619 //Disabled: if R.v is numeric but happens to have cached R.v->string,
2620 //then L.v ends up being a string, which is wrong
2621 // if (R.v == v1+1 && R.v->string) {
2622 // res = setvar_p(L.v, R.v->string);
2623 // R.v->string = NULL;
2625 res = copyvar(L.v, R.v);
2629 case XC( OC_TERNARY ):
2630 if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2631 syntax_error(EMSG_POSSIBLE_ERROR);
2632 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2635 case XC( OC_FUNC ): {
2637 const char *sv_progname;
2639 if (!op->r.f->body.first)
2640 syntax_error(EMSG_UNDEF_FUNC);
2642 vbeg = v = nvalloc(op->r.f->nargs + 1);
2644 var *arg = evaluate(nextarg(&op1), v1);
2646 v->type |= VF_CHILD;
2648 if (++v - vbeg >= op->r.f->nargs)
2654 sv_progname = g_progname;
2656 res = evaluate(op->r.f->body.first, res);
2658 g_progname = sv_progname;
2665 case XC( OC_GETLINE ):
2666 case XC( OC_PGETLINE ): {
2673 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2674 rsm->F = popen(L.s, "r");
2675 rsm->is_pipe = TRUE;
2677 rsm->F = fopen_for_read(L.s); /* not xfopen! */
2682 iF = next_input_file();
2686 if (!rsm || !rsm->F) {
2687 setvar_i(intvar[ERRNO], errno);
2695 i = awk_getline(rsm, R.v);
2696 if (i > 0 && !op1) {
2697 incvar(intvar[FNR]);
2704 /* simple builtins */
2705 case XC( OC_FBLTIN ): {
2706 double R_d = R_d; /* for compiler */
2714 R_d = (double)rand() / (double)RAND_MAX;
2718 if (ENABLE_FEATURE_AWK_LIBM) {
2724 if (ENABLE_FEATURE_AWK_LIBM) {
2730 if (ENABLE_FEATURE_AWK_LIBM) {
2736 if (ENABLE_FEATURE_AWK_LIBM) {
2742 if (ENABLE_FEATURE_AWK_LIBM) {
2747 syntax_error(EMSG_NO_MATH);
2752 seed = op1 ? (unsigned)L_d : (unsigned)time(NULL);
2762 L.s = getvar_s(intvar[F0]);
2768 R_d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2769 ? (system(L.s) >> 8) : 0;
2775 } else if (L.s && *L.s) {
2776 rstream *rsm = newfile(L.s);
2786 rsm = (rstream *)hash_search(fdhash, L.s);
2787 debug_printf_eval("OC_FBLTIN F_cl rsm:%p\n", rsm);
2789 debug_printf_eval("OC_FBLTIN F_cl "
2790 "rsm->is_pipe:%d, ->F:%p\n",
2791 rsm->is_pipe, rsm->F);
2792 /* Can be NULL if open failed. Example:
2793 * getline line <"doesnt_exist";
2794 * close("doesnt_exist"); <--- here rsm->F is NULL
2797 err = rsm->is_pipe ? pclose(rsm->F) : fclose(rsm->F);
2799 hash_remove(fdhash, L.s);
2802 setvar_i(intvar[ERRNO], errno);
2811 case XC( OC_BUILTIN ):
2812 res = exec_builtin(op, res);
2815 case XC( OC_SPRINTF ):
2816 setvar_p(res, awk_printf(op1));
2819 case XC( OC_UNARY ): {
2822 Ld = R_d = getvar_i(R.v);
2849 case XC( OC_FIELD ): {
2850 int i = (int)getvar_i(R.v);
2857 res = &Fields[i - 1];
2862 /* concatenation (" ") and index joining (",") */
2863 case XC( OC_CONCAT ):
2864 case XC( OC_COMMA ): {
2865 const char *sep = "";
2866 if ((opinfo & OPCLSMASK) == OC_COMMA)
2867 sep = getvar_s(intvar[SUBSEP]);
2868 setvar_p(res, xasprintf("%s%s%s", L.s, sep, R.s));
2873 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2877 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2880 case XC( OC_BINARY ):
2881 case XC( OC_REPLACE ): {
2882 double R_d = getvar_i(R.v);
2883 debug_printf_eval("BINARY/REPLACE: R_d:%f opn:%c\n", R_d, opn);
2896 syntax_error(EMSG_DIV_BY_ZERO);
2900 if (ENABLE_FEATURE_AWK_LIBM)
2901 L_d = pow(L_d, R_d);
2903 syntax_error(EMSG_NO_MATH);
2907 syntax_error(EMSG_DIV_BY_ZERO);
2908 L_d -= (int)(L_d / R_d) * R_d;
2911 debug_printf_eval("BINARY/REPLACE result:%f\n", L_d);
2912 res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : L.v, L_d);
2916 case XC( OC_COMPARE ): {
2917 int i = i; /* for compiler */
2920 if (is_numeric(L.v) && is_numeric(R.v)) {
2921 Ld = getvar_i(L.v) - getvar_i(R.v);
2923 const char *l = getvar_s(L.v);
2924 const char *r = getvar_s(R.v);
2925 Ld = icase ? strcasecmp(l, r) : strcmp(l, r);
2927 switch (opn & 0xfe) {
2938 setvar_i(res, (i == 0) ^ (opn & 1));
2943 syntax_error(EMSG_POSSIBLE_ERROR);
2945 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2947 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2954 debug_printf_eval("returning from %s(): %p\n", __func__, res);
2962 /* -------- main & co. -------- */
2964 static int awk_exit(int r)
2975 evaluate(endseq.first, &tv);
2978 /* waiting for children */
2979 for (i = 0; i < fdhash->csize; i++) {
2980 hi = fdhash->items[i];
2982 if (hi->data.rs.F && hi->data.rs.is_pipe)
2983 pclose(hi->data.rs.F);
2991 /* if expr looks like "var=value", perform assignment and return 1,
2992 * otherwise return 0 */
2993 static int is_assignment(const char *expr)
2995 char *exprc, *val, *s, *s1;
2997 if (!isalnum_(*expr) || (val = strchr(expr, '=')) == NULL) {
3001 exprc = xstrdup(expr);
3002 val = exprc + (val - expr);
3006 while ((*s1 = nextchar(&s)) != '\0')
3009 setvar_u(newvar(exprc), val);
3014 /* switch to next input file */
3015 static rstream *next_input_file(void)
3017 #define rsm (G.next_input_file__rsm)
3018 #define files_happen (G.next_input_file__files_happen)
3021 const char *fname, *ind;
3026 rsm.pos = rsm.adv = 0;
3029 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
3036 ind = getvar_s(incvar(intvar[ARGIND]));
3037 fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
3038 if (fname && *fname && !is_assignment(fname)) {
3039 F = xfopen_stdin(fname);
3044 files_happen = TRUE;
3045 setvar_s(intvar[FILENAME], fname);
3052 int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
3053 int awk_main(int argc, char **argv)
3057 llist_t *list_v = NULL;
3058 llist_t *list_f = NULL;
3063 char *vnames = (char *)vNames; /* cheat */
3064 char *vvalues = (char *)vValues;
3068 /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
3069 * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
3070 if (ENABLE_LOCALE_SUPPORT)
3071 setlocale(LC_NUMERIC, "C");
3075 /* allocate global buffer */
3076 g_buf = xmalloc(MAXVARFMT + 1);
3078 vhash = hash_init();
3079 ahash = hash_init();
3080 fdhash = hash_init();
3081 fnhash = hash_init();
3083 /* initialize variables */
3084 for (i = 0; *vnames; i++) {
3085 intvar[i] = v = newvar(nextword(&vnames));
3086 if (*vvalues != '\377')
3087 setvar_s(v, nextword(&vvalues));
3091 if (*vnames == '*') {
3092 v->type |= VF_SPECIAL;
3097 handle_special(intvar[FS]);
3098 handle_special(intvar[RS]);
3100 newfile("/dev/stdin")->F = stdin;
3101 newfile("/dev/stdout")->F = stdout;
3102 newfile("/dev/stderr")->F = stderr;
3104 /* Huh, people report that sometimes environ is NULL. Oh well. */
3105 if (environ) for (envp = environ; *envp; envp++) {
3106 /* environ is writable, thus we don't strdup it needlessly */
3108 char *s1 = strchr(s, '=');
3111 /* Both findvar and setvar_u take const char*
3112 * as 2nd arg -> environment is not trashed */
3113 setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
3117 opt_complementary = "v::f::"; /* -v and -f can occur multiple times */
3118 opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, NULL);
3122 setvar_s(intvar[FS], opt_F); // -F
3123 while (list_v) { /* -v */
3124 if (!is_assignment(llist_pop(&list_v)))
3127 if (list_f) { /* -f */
3132 g_progname = llist_pop(&list_f);
3133 from_file = xfopen_stdin(g_progname);
3134 /* one byte is reserved for some trick in next_token */
3135 for (i = j = 1; j > 0; i += j) {
3136 s = xrealloc(s, i + 4096);
3137 j = fread(s + i, 1, 4094, from_file);
3141 parse_program(s + 1);
3145 } else { // no -f: take program from 1st parameter
3148 g_progname = "cmd. line";
3149 parse_program(*argv++);
3151 if (opt & 0x8) // -W
3152 bb_error_msg("warning: option -W is ignored");
3154 /* fill in ARGV array */
3155 setvar_i(intvar[ARGC], argc);
3156 setari_u(intvar[ARGV], 0, "awk");
3159 setari_u(intvar[ARGV], ++i, *argv++);
3161 evaluate(beginseq.first, &tv);
3162 if (!mainseq.first && !endseq.first)
3163 awk_exit(EXIT_SUCCESS);
3165 /* input file could already be opened in BEGIN block */
3167 iF = next_input_file();
3169 /* passing through input files */
3172 setvar_i(intvar[FNR], 0);
3174 while ((i = awk_getline(iF, intvar[F0])) > 0) {
3177 incvar(intvar[FNR]);
3178 evaluate(mainseq.first, &tv);
3185 syntax_error(strerror(errno));
3187 iF = next_input_file();
3190 awk_exit(EXIT_SUCCESS);