1 /* vi: set sw=4 ts=4: */
3 * awk implementation for busybox
5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
7 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
10 //usage:#define awk_trivial_usage
11 //usage: "[OPTIONS] [AWK_PROGRAM] [FILE]..."
12 //usage:#define awk_full_usage "\n\n"
13 //usage: " -v VAR=VAL Set variable"
14 //usage: "\n -F SEP Use SEP as field separator"
15 //usage: "\n -f FILE Read program from FILE"
21 /* This is a NOEXEC applet. Be very careful! */
24 /* If you comment out one of these below, it will be #defined later
25 * to perform debug printfs to stderr: */
26 #define debug_printf_walker(...) do {} while (0)
27 #define debug_printf_eval(...) do {} while (0)
28 #define debug_printf_parse(...) do {} while (0)
30 #ifndef debug_printf_walker
31 # define debug_printf_walker(...) (fprintf(stderr, __VA_ARGS__))
33 #ifndef debug_printf_eval
34 # define debug_printf_eval(...) (fprintf(stderr, __VA_ARGS__))
36 #ifndef debug_printf_parse
37 # define debug_printf_parse(...) (fprintf(stderr, __VA_ARGS__))
46 #define VF_NUMBER 0x0001 /* 1 = primary type is number */
47 #define VF_ARRAY 0x0002 /* 1 = it's an array */
49 #define VF_CACHED 0x0100 /* 1 = num/str value has cached str/num eq */
50 #define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
51 #define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
52 #define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
53 #define VF_FSTR 0x1000 /* 1 = var::string points to fstring buffer */
54 #define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
55 #define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
57 /* these flags are static, don't change them when value is changed */
58 #define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
60 typedef struct walker_list {
63 struct walker_list *prev;
68 typedef struct var_s {
69 unsigned type; /* flags */
73 int aidx; /* func arg idx (for compilation stage) */
74 struct xhash_s *array; /* array ptr */
75 struct var_s *parent; /* for func args, ptr to actual parameter */
76 walker_list *walker; /* list of array elements (for..in) */
80 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
81 typedef struct chain_s {
84 const char *programname;
88 typedef struct func_s {
94 typedef struct rstream_s {
103 typedef struct hash_item_s {
105 struct var_s v; /* variable/array hash */
106 struct rstream_s rs; /* redirect streams hash */
107 struct func_s f; /* functions hash */
109 struct hash_item_s *next; /* next in chain */
110 char name[1]; /* really it's longer */
113 typedef struct xhash_s {
114 unsigned nel; /* num of elements */
115 unsigned csize; /* current hash size */
116 unsigned nprime; /* next hash size in PRIMES[] */
117 unsigned glen; /* summary length of item names */
118 struct hash_item_s **items;
122 typedef struct node_s {
142 /* Block of temporary variables */
143 typedef struct nvblock_s {
146 struct nvblock_s *prev;
147 struct nvblock_s *next;
151 typedef struct tsplitter_s {
156 /* simple token classes */
157 /* Order and hex values are very important!!! See next_token() */
158 #define TC_SEQSTART 1 /* ( */
159 #define TC_SEQTERM (1 << 1) /* ) */
160 #define TC_REGEXP (1 << 2) /* /.../ */
161 #define TC_OUTRDR (1 << 3) /* | > >> */
162 #define TC_UOPPOST (1 << 4) /* unary postfix operator */
163 #define TC_UOPPRE1 (1 << 5) /* unary prefix operator */
164 #define TC_BINOPX (1 << 6) /* two-opnd operator */
165 #define TC_IN (1 << 7)
166 #define TC_COMMA (1 << 8)
167 #define TC_PIPE (1 << 9) /* input redirection pipe */
168 #define TC_UOPPRE2 (1 << 10) /* unary prefix operator */
169 #define TC_ARRTERM (1 << 11) /* ] */
170 #define TC_GRPSTART (1 << 12) /* { */
171 #define TC_GRPTERM (1 << 13) /* } */
172 #define TC_SEMICOL (1 << 14)
173 #define TC_NEWLINE (1 << 15)
174 #define TC_STATX (1 << 16) /* ctl statement (for, next...) */
175 #define TC_WHILE (1 << 17)
176 #define TC_ELSE (1 << 18)
177 #define TC_BUILTIN (1 << 19)
178 #define TC_GETLINE (1 << 20)
179 #define TC_FUNCDECL (1 << 21) /* `function' `func' */
180 #define TC_BEGIN (1 << 22)
181 #define TC_END (1 << 23)
182 #define TC_EOF (1 << 24)
183 #define TC_VARIABLE (1 << 25)
184 #define TC_ARRAY (1 << 26)
185 #define TC_FUNCTION (1 << 27)
186 #define TC_STRING (1 << 28)
187 #define TC_NUMBER (1 << 29)
189 #define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
191 /* combined token classes */
192 #define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
193 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
194 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
195 | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
197 #define TC_STATEMNT (TC_STATX | TC_WHILE)
198 #define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
200 /* word tokens, cannot mean something else if not expected */
201 #define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
202 | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
204 /* discard newlines after these */
205 #define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
206 | TC_BINOP | TC_OPTERM)
208 /* what can expression begin with */
209 #define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
210 /* what can group begin with */
211 #define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
213 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
214 /* operator is inserted between them */
215 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
216 | TC_STRING | TC_NUMBER | TC_UOPPOST)
217 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
219 #define OF_RES1 0x010000
220 #define OF_RES2 0x020000
221 #define OF_STR1 0x040000
222 #define OF_STR2 0x080000
223 #define OF_NUM1 0x100000
224 #define OF_CHECKED 0x200000
226 /* combined operator flags */
229 #define xS (OF_RES2 | OF_STR2)
231 #define VV (OF_RES1 | OF_RES2)
232 #define Nx (OF_RES1 | OF_NUM1)
233 #define NV (OF_RES1 | OF_NUM1 | OF_RES2)
234 #define Sx (OF_RES1 | OF_STR1)
235 #define SV (OF_RES1 | OF_STR1 | OF_RES2)
236 #define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
238 #define OPCLSMASK 0xFF00
239 #define OPNMASK 0x007F
241 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
242 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
243 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
248 #define P(x) (x << 24)
249 #define PRIMASK 0x7F000000
250 #define PRIMASK2 0x7E000000
252 /* Operation classes */
254 #define SHIFT_TIL_THIS 0x0600
255 #define RECUR_FROM_THIS 0x1000
258 OC_DELETE = 0x0100, OC_EXEC = 0x0200, OC_NEWSOURCE = 0x0300,
259 OC_PRINT = 0x0400, OC_PRINTF = 0x0500, OC_WALKINIT = 0x0600,
261 OC_BR = 0x0700, OC_BREAK = 0x0800, OC_CONTINUE = 0x0900,
262 OC_EXIT = 0x0a00, OC_NEXT = 0x0b00, OC_NEXTFILE = 0x0c00,
263 OC_TEST = 0x0d00, OC_WALKNEXT = 0x0e00,
265 OC_BINARY = 0x1000, OC_BUILTIN = 0x1100, OC_COLON = 0x1200,
266 OC_COMMA = 0x1300, OC_COMPARE = 0x1400, OC_CONCAT = 0x1500,
267 OC_FBLTIN = 0x1600, OC_FIELD = 0x1700, OC_FNARG = 0x1800,
268 OC_FUNC = 0x1900, OC_GETLINE = 0x1a00, OC_IN = 0x1b00,
269 OC_LAND = 0x1c00, OC_LOR = 0x1d00, OC_MATCH = 0x1e00,
270 OC_MOVE = 0x1f00, OC_PGETLINE = 0x2000, OC_REGEXP = 0x2100,
271 OC_REPLACE = 0x2200, OC_RETURN = 0x2300, OC_SPRINTF = 0x2400,
272 OC_TERNARY = 0x2500, OC_UNARY = 0x2600, OC_VAR = 0x2700,
275 ST_IF = 0x3000, ST_DO = 0x3100, ST_FOR = 0x3200,
279 /* simple builtins */
281 F_in, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
282 F_ti, F_le, F_sy, F_ff, F_cl
287 B_a2, B_ix, B_ma, B_sp, B_ss, B_ti, B_mt, B_lo, B_up,
289 B_an, B_co, B_ls, B_or, B_rs, B_xo,
292 /* tokens and their corresponding info values */
294 #define NTC "\377" /* switch to next token class (tc<<1) */
297 #define OC_B OC_BUILTIN
299 static const char tokenlist[] ALIGN1 =
302 "\1/" NTC /* REGEXP */
303 "\2>>" "\1>" "\1|" NTC /* OUTRDR */
304 "\2++" "\2--" NTC /* UOPPOST */
305 "\2++" "\2--" "\1$" NTC /* UOPPRE1 */
306 "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */
307 "\2*=" "\2/=" "\2%=" "\2^="
308 "\1+" "\1-" "\3**=" "\2**"
309 "\1/" "\1%" "\1^" "\1*"
310 "\2!=" "\2>=" "\2<=" "\1>"
311 "\1<" "\2!~" "\1~" "\2&&"
312 "\2||" "\1?" "\1:" NTC
316 "\1+" "\1-" "\1!" NTC /* UOPPRE2 */
322 "\2if" "\2do" "\3for" "\5break" /* STATX */
323 "\10continue" "\6delete" "\5print"
324 "\6printf" "\4next" "\10nextfile"
325 "\6return" "\4exit" NTC
329 "\3and" "\5compl" "\6lshift" "\2or"
331 "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */
332 "\3cos" "\3exp" "\3int" "\3log"
333 "\4rand" "\3sin" "\4sqrt" "\5srand"
334 "\6gensub" "\4gsub" "\5index" "\6length"
335 "\5match" "\5split" "\7sprintf" "\3sub"
336 "\6substr" "\7systime" "\10strftime" "\6mktime"
337 "\7tolower" "\7toupper" NTC
339 "\4func" "\10function" NTC
342 /* compiler adds trailing "\0" */
345 static const uint32_t tokeninfo[] = {
349 xS|'a', xS|'w', xS|'|',
350 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
351 OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M', OC_FIELD|xV|P(5),
352 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74), OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
353 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/', OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
354 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-', OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
355 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%', OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
356 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3, OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
357 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!', OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
358 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?', OC_COLON|xx|P(67)|':',
359 OC_IN|SV|P(49), /* in */
361 OC_PGETLINE|SV|P(37),
362 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-', OC_UNARY|xV|P(19)|'!',
368 ST_IF, ST_DO, ST_FOR, OC_BREAK,
369 OC_CONTINUE, OC_DELETE|Vx, OC_PRINT,
370 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
371 OC_RETURN|Vx, OC_EXIT|Nx,
375 OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
376 OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
377 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
378 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
379 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
380 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
381 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
382 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b),
383 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
390 /* internal variable names and their initial values */
391 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
393 CONVFMT, OFMT, FS, OFS,
394 ORS, RS, RT, FILENAME,
395 SUBSEP, F0, ARGIND, ARGC,
396 ARGV, ERRNO, FNR, NR,
397 NF, IGNORECASE, ENVIRON, NUM_INTERNAL_VARS
400 static const char vNames[] ALIGN1 =
401 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
402 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
403 "SUBSEP\0" "$\0*" "ARGIND\0" "ARGC\0"
404 "ARGV\0" "ERRNO\0" "FNR\0" "NR\0"
405 "NF\0*" "IGNORECASE\0*" "ENVIRON\0" "\0";
407 static const char vValues[] ALIGN1 =
408 "%.6g\0" "%.6g\0" " \0" " \0"
409 "\n\0" "\n\0" "\0" "\0"
410 "\034\0" "\0" "\377";
412 /* hash size may grow to these values */
413 #define FIRST_PRIME 61
414 static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
417 /* Globals. Split in two parts so that first one is addressed
418 * with (mostly short) negative offsets.
419 * NB: it's unsafe to put members of type "double"
420 * into globals2 (gcc may fail to align them).
424 chain beginseq, mainseq, endseq;
426 node *break_ptr, *continue_ptr;
428 xhash *vhash, *ahash, *fdhash, *fnhash;
429 const char *g_progname;
432 int maxfields; /* used in fsrealloc() only */
441 smallint is_f0_split;
445 uint32_t t_info; /* often used */
450 var *intvar[NUM_INTERNAL_VARS]; /* often used */
452 /* former statics from various functions */
453 char *split_f0__fstrings;
455 uint32_t next_token__save_tclass;
456 uint32_t next_token__save_info;
457 uint32_t next_token__ltclass;
458 smallint next_token__concat_inserted;
460 smallint next_input_file__files_happen;
461 rstream next_input_file__rsm;
463 var *evaluate__fnargs;
464 unsigned evaluate__seed;
465 regex_t evaluate__sreg;
469 tsplitter exec_builtin__tspl;
471 /* biggest and least used members go last */
472 tsplitter fsplitter, rsplitter;
474 #define G1 (ptr_to_globals[-1])
475 #define G (*(struct globals2 *)ptr_to_globals)
476 /* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
477 /*char G1size[sizeof(G1)]; - 0x74 */
478 /*char Gsize[sizeof(G)]; - 0x1c4 */
479 /* Trying to keep most of members accessible with short offsets: */
480 /*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
481 #define t_double (G1.t_double )
482 #define beginseq (G1.beginseq )
483 #define mainseq (G1.mainseq )
484 #define endseq (G1.endseq )
485 #define seq (G1.seq )
486 #define break_ptr (G1.break_ptr )
487 #define continue_ptr (G1.continue_ptr)
489 #define vhash (G1.vhash )
490 #define ahash (G1.ahash )
491 #define fdhash (G1.fdhash )
492 #define fnhash (G1.fnhash )
493 #define g_progname (G1.g_progname )
494 #define g_lineno (G1.g_lineno )
495 #define nfields (G1.nfields )
496 #define maxfields (G1.maxfields )
497 #define Fields (G1.Fields )
498 #define g_cb (G1.g_cb )
499 #define g_pos (G1.g_pos )
500 #define g_buf (G1.g_buf )
501 #define icase (G1.icase )
502 #define exiting (G1.exiting )
503 #define nextrec (G1.nextrec )
504 #define nextfile (G1.nextfile )
505 #define is_f0_split (G1.is_f0_split )
506 #define t_rollback (G1.t_rollback )
507 #define t_info (G.t_info )
508 #define t_tclass (G.t_tclass )
509 #define t_string (G.t_string )
510 #define t_lineno (G.t_lineno )
511 #define intvar (G.intvar )
512 #define fsplitter (G.fsplitter )
513 #define rsplitter (G.rsplitter )
514 #define INIT_G() do { \
515 SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
516 G.next_token__ltclass = TC_OPTERM; \
517 G.evaluate__seed = 1; \
521 /* function prototypes */
522 static void handle_special(var *);
523 static node *parse_expr(uint32_t);
524 static void chain_group(void);
525 static var *evaluate(node *, var *);
526 static rstream *next_input_file(void);
527 static int fmt_num(char *, int, const char *, double, int);
528 static int awk_exit(int) NORETURN;
530 /* ---- error handling ---- */
532 static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
533 static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
534 static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
535 static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
536 static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
537 static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin";
538 static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
539 static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
540 static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
541 static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
543 static void zero_out_var(var *vp)
545 memset(vp, 0, sizeof(*vp));
548 static void syntax_error(const char *message) NORETURN;
549 static void syntax_error(const char *message)
551 bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
554 /* ---- hash stuff ---- */
556 static unsigned hashidx(const char *name)
561 idx = *name++ + (idx << 6) - idx;
565 /* create new hash */
566 static xhash *hash_init(void)
570 newhash = xzalloc(sizeof(*newhash));
571 newhash->csize = FIRST_PRIME;
572 newhash->items = xzalloc(FIRST_PRIME * sizeof(newhash->items[0]));
577 /* find item in hash, return ptr to data, NULL if not found */
578 static void *hash_search(xhash *hash, const char *name)
582 hi = hash->items[hashidx(name) % hash->csize];
584 if (strcmp(hi->name, name) == 0)
591 /* grow hash if it becomes too big */
592 static void hash_rebuild(xhash *hash)
594 unsigned newsize, i, idx;
595 hash_item **newitems, *hi, *thi;
597 if (hash->nprime == ARRAY_SIZE(PRIMES))
600 newsize = PRIMES[hash->nprime++];
601 newitems = xzalloc(newsize * sizeof(newitems[0]));
603 for (i = 0; i < hash->csize; i++) {
608 idx = hashidx(thi->name) % newsize;
609 thi->next = newitems[idx];
615 hash->csize = newsize;
616 hash->items = newitems;
619 /* find item in hash, add it if necessary. Return ptr to data */
620 static void *hash_find(xhash *hash, const char *name)
626 hi = hash_search(hash, name);
628 if (++hash->nel / hash->csize > 10)
631 l = strlen(name) + 1;
632 hi = xzalloc(sizeof(*hi) + l);
633 strcpy(hi->name, name);
635 idx = hashidx(name) % hash->csize;
636 hi->next = hash->items[idx];
637 hash->items[idx] = hi;
643 #define findvar(hash, name) ((var*) hash_find((hash), (name)))
644 #define newvar(name) ((var*) hash_find(vhash, (name)))
645 #define newfile(name) ((rstream*)hash_find(fdhash, (name)))
646 #define newfunc(name) ((func*) hash_find(fnhash, (name)))
648 static void hash_remove(xhash *hash, const char *name)
650 hash_item *hi, **phi;
652 phi = &hash->items[hashidx(name) % hash->csize];
655 if (strcmp(hi->name, name) == 0) {
656 hash->glen -= (strlen(name) + 1);
666 /* ------ some useful functions ------ */
668 static char *skip_spaces(char *p)
671 if (*p == '\\' && p[1] == '\n') {
674 } else if (*p != ' ' && *p != '\t') {
682 /* returns old *s, advances *s past word and terminating NUL */
683 static char *nextword(char **s)
686 while (*(*s)++ != '\0')
691 static char nextchar(char **s)
698 c = bb_process_escape_sequence((const char**)s);
699 /* Example awk statement:
701 * we must treat \" as "
703 if (c == '\\' && *s == pps) { /* unrecognized \z? */
704 c = *(*s); /* yes, fetch z */
706 (*s)++; /* advance unless z = NUL */
711 /* TODO: merge with strcpy_and_process_escape_sequences()?
713 static void unescape_string_in_place(char *s1)
716 while ((*s1 = nextchar(&s)) != '\0')
720 static ALWAYS_INLINE int isalnum_(int c)
722 return (isalnum(c) || c == '_');
725 static double my_strtod(char **pp)
728 if (ENABLE_DESKTOP && cp[0] == '0') {
729 /* Might be hex or octal integer: 0x123abc or 07777 */
730 char c = (cp[1] | 0x20);
731 if (c == 'x' || isdigit(cp[1])) {
732 unsigned long long ull = strtoull(cp, pp, 0);
736 if (!isdigit(c) && c != '.')
738 /* else: it may be a floating number. Examples:
739 * 009.123 (*pp points to '9')
740 * 000.123 (*pp points to '.')
741 * fall through to strtod.
745 return strtod(cp, pp);
748 /* -------- working with variables (set/get/copy/etc) -------- */
750 static xhash *iamarray(var *v)
754 while (a->type & VF_CHILD)
757 if (!(a->type & VF_ARRAY)) {
759 a->x.array = hash_init();
764 static void clear_array(xhash *array)
769 for (i = 0; i < array->csize; i++) {
770 hi = array->items[i];
774 free(thi->data.v.string);
777 array->items[i] = NULL;
779 array->glen = array->nel = 0;
782 /* clear a variable */
783 static var *clrvar(var *v)
785 if (!(v->type & VF_FSTR))
788 v->type &= VF_DONTTOUCH;
794 /* assign string value to variable */
795 static var *setvar_p(var *v, char *value)
803 /* same as setvar_p but make a copy of string */
804 static var *setvar_s(var *v, const char *value)
806 return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
809 /* same as setvar_s but sets USER flag */
810 static var *setvar_u(var *v, const char *value)
812 v = setvar_s(v, value);
817 /* set array element to user string */
818 static void setari_u(var *a, int idx, const char *s)
822 v = findvar(iamarray(a), itoa(idx));
826 /* assign numeric value to variable */
827 static var *setvar_i(var *v, double value)
830 v->type |= VF_NUMBER;
836 static const char *getvar_s(var *v)
838 /* if v is numeric and has no cached string, convert it to string */
839 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
840 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
841 v->string = xstrdup(g_buf);
842 v->type |= VF_CACHED;
844 return (v->string == NULL) ? "" : v->string;
847 static double getvar_i(var *v)
851 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
855 debug_printf_eval("getvar_i: '%s'->", s);
856 v->number = my_strtod(&s);
857 debug_printf_eval("%f (s:'%s')\n", v->number, s);
858 if (v->type & VF_USER) {
864 debug_printf_eval("getvar_i: '%s'->zero\n", s);
867 v->type |= VF_CACHED;
869 debug_printf_eval("getvar_i: %f\n", v->number);
873 /* Used for operands of bitwise ops */
874 static unsigned long getvar_i_int(var *v)
876 double d = getvar_i(v);
878 /* Casting doubles to longs is undefined for values outside
879 * of target type range. Try to widen it as much as possible */
881 return (unsigned long)d;
882 /* Why? Think about d == -4294967295.0 (assuming 32bit longs) */
883 return - (long) (unsigned long) (-d);
886 static var *copyvar(var *dest, const var *src)
890 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
891 debug_printf_eval("copyvar: number:%f string:'%s'\n", src->number, src->string);
892 dest->number = src->number;
894 dest->string = xstrdup(src->string);
896 handle_special(dest);
900 static var *incvar(var *v)
902 return setvar_i(v, getvar_i(v) + 1.0);
905 /* return true if v is number or numeric string */
906 static int is_numeric(var *v)
909 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
912 /* return 1 when value of v corresponds to true, 0 otherwise */
913 static int istrue(var *v)
916 return (v->number != 0);
917 return (v->string && v->string[0]);
920 /* temporary variables allocator. Last allocated should be first freed */
921 static var *nvalloc(int n)
929 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size)
935 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
936 g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
938 g_cb->pos = g_cb->nv;
940 /*g_cb->next = NULL; - xzalloc did it */
948 while (v < g_cb->pos) {
957 static void nvfree(var *v)
961 if (v < g_cb->nv || v >= g_cb->pos)
962 syntax_error(EMSG_INTERNAL_ERROR);
964 for (p = v; p < g_cb->pos; p++) {
965 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
966 clear_array(iamarray(p));
967 free(p->x.array->items);
970 if (p->type & VF_WALK) {
972 walker_list *w = p->x.walker;
973 debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker);
977 debug_printf_walker(" free(%p)\n", w);
986 while (g_cb->prev && g_cb->pos == g_cb->nv) {
991 /* ------- awk program text parsing ------- */
993 /* Parse next token pointed by global pos, place results into global ttt.
994 * If token isn't expected, give away. Return token class
996 static uint32_t next_token(uint32_t expected)
998 #define concat_inserted (G.next_token__concat_inserted)
999 #define save_tclass (G.next_token__save_tclass)
1000 #define save_info (G.next_token__save_info)
1001 /* Initialized to TC_OPTERM: */
1002 #define ltclass (G.next_token__ltclass)
1012 } else if (concat_inserted) {
1013 concat_inserted = FALSE;
1014 t_tclass = save_tclass;
1021 g_lineno = t_lineno;
1023 while (*p != '\n' && *p != '\0')
1031 debug_printf_parse("%s: token found: TC_EOF\n", __func__);
1033 } else if (*p == '\"') {
1036 while (*p != '\"') {
1038 if (*p == '\0' || *p == '\n')
1039 syntax_error(EMSG_UNEXP_EOS);
1041 *s++ = nextchar(&pp);
1047 debug_printf_parse("%s: token found:'%s' TC_STRING\n", __func__, t_string);
1049 } else if ((expected & TC_REGEXP) && *p == '/') {
1053 if (*p == '\0' || *p == '\n')
1054 syntax_error(EMSG_UNEXP_EOS);
1058 s[-1] = bb_process_escape_sequence((const char **)&pp);
1070 debug_printf_parse("%s: token found:'%s' TC_REGEXP\n", __func__, t_string);
1072 } else if (*p == '.' || isdigit(*p)) {
1075 t_double = my_strtod(&pp);
1078 syntax_error(EMSG_UNEXP_TOKEN);
1080 debug_printf_parse("%s: token found:%f TC_NUMBER\n", __func__, t_double);
1083 /* search for something known */
1088 int l = (unsigned char) *tl++;
1089 if (l == (unsigned char) NTCC) {
1093 /* if token class is expected,
1095 * and it's not a longer word,
1097 if ((tc & (expected | TC_WORD | TC_NEWLINE))
1098 && strncmp(p, tl, l) == 0
1099 && !((tc & TC_WORD) && isalnum_(p[l]))
1101 /* then this is what we are looking for */
1103 debug_printf_parse("%s: token found:'%.*s' t_info:%x\n", __func__, l, p, t_info);
1110 /* not a known token */
1112 /* is it a name? (var/array/function) */
1114 syntax_error(EMSG_UNEXP_TOKEN); /* no */
1117 while (isalnum_(*++p)) {
1122 /* also consume whitespace between functionname and bracket */
1123 if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY))
1127 debug_printf_parse("%s: token found:'%s' TC_FUNCTION\n", __func__, t_string);
1132 debug_printf_parse("%s: token found:'%s' TC_ARRAY\n", __func__, t_string);
1134 debug_printf_parse("%s: token found:'%s' TC_VARIABLE\n", __func__, t_string);
1140 /* skipping newlines in some cases */
1141 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1144 /* insert concatenation operator when needed */
1145 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1146 concat_inserted = TRUE;
1150 t_info = OC_CONCAT | SS | P(35);
1157 /* Are we ready for this? */
1158 if (!(ltclass & expected))
1159 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1160 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1163 #undef concat_inserted
1169 static void rollback_token(void)
1174 static node *new_node(uint32_t info)
1178 n = xzalloc(sizeof(node));
1180 n->lineno = g_lineno;
1184 static void mk_re_node(const char *s, node *n, regex_t *re)
1186 n->info = OC_REGEXP;
1189 xregcomp(re, s, REG_EXTENDED);
1190 xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1193 static node *condition(void)
1195 next_token(TC_SEQSTART);
1196 return parse_expr(TC_SEQTERM);
1199 /* parse expression terminated by given argument, return ptr
1200 * to built subtree. Terminator is eaten by parse_expr */
1201 static node *parse_expr(uint32_t iexp)
1209 debug_printf_parse("%s(%x)\n", __func__, iexp);
1212 sn.r.n = glptr = NULL;
1213 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1215 while (!((tc = next_token(xtc)) & iexp)) {
1217 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1218 /* input redirection (<) attached to glptr node */
1219 debug_printf_parse("%s: input redir\n", __func__);
1220 cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1222 xtc = TC_OPERAND | TC_UOPPRE;
1225 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1226 debug_printf_parse("%s: TC_BINOP | TC_UOPPOST\n", __func__);
1227 /* for binary and postfix-unary operators, jump back over
1228 * previous operators with higher priority */
1230 while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1231 || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON))
1235 if ((t_info & OPCLSMASK) == OC_TERNARY)
1237 cn = vn->a.n->r.n = new_node(t_info);
1239 if (tc & TC_BINOP) {
1241 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1242 if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1244 next_token(TC_GETLINE);
1245 /* give maximum priority to this pipe */
1246 cn->info &= ~PRIMASK;
1247 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1251 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1256 debug_printf_parse("%s: other\n", __func__);
1257 /* for operands and prefix-unary operators, attach them
1260 cn = vn->r.n = new_node(t_info);
1262 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1263 if (tc & (TC_OPERAND | TC_REGEXP)) {
1264 debug_printf_parse("%s: TC_OPERAND | TC_REGEXP\n", __func__);
1265 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1266 /* one should be very careful with switch on tclass -
1267 * only simple tclasses should be used! */
1271 debug_printf_parse("%s: TC_VARIABLE | TC_ARRAY\n", __func__);
1273 v = hash_search(ahash, t_string);
1275 cn->info = OC_FNARG;
1276 cn->l.aidx = v->x.aidx;
1278 cn->l.v = newvar(t_string);
1280 if (tc & TC_ARRAY) {
1282 cn->r.n = parse_expr(TC_ARRTERM);
1288 debug_printf_parse("%s: TC_NUMBER | TC_STRING\n", __func__);
1290 v = cn->l.v = xzalloc(sizeof(var));
1292 setvar_i(v, t_double);
1294 setvar_s(v, t_string);
1298 debug_printf_parse("%s: TC_REGEXP\n", __func__);
1299 mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1303 debug_printf_parse("%s: TC_FUNCTION\n", __func__);
1305 cn->r.f = newfunc(t_string);
1306 cn->l.n = condition();
1310 debug_printf_parse("%s: TC_SEQSTART\n", __func__);
1311 cn = vn->r.n = parse_expr(TC_SEQTERM);
1313 syntax_error("Empty sequence");
1318 debug_printf_parse("%s: TC_GETLINE\n", __func__);
1320 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1324 debug_printf_parse("%s: TC_BUILTIN\n", __func__);
1325 cn->l.n = condition();
1332 debug_printf_parse("%s() returns %p\n", __func__, sn.r.n);
1336 /* add node to chain. Return ptr to alloc'd node */
1337 static node *chain_node(uint32_t info)
1342 seq->first = seq->last = new_node(0);
1344 if (seq->programname != g_progname) {
1345 seq->programname = g_progname;
1346 n = chain_node(OC_NEWSOURCE);
1347 n->l.new_progname = xstrdup(g_progname);
1352 seq->last = n->a.n = new_node(OC_DONE);
1357 static void chain_expr(uint32_t info)
1361 n = chain_node(info);
1362 n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1363 if (t_tclass & TC_GRPTERM)
1367 static node *chain_loop(node *nn)
1369 node *n, *n2, *save_brk, *save_cont;
1371 save_brk = break_ptr;
1372 save_cont = continue_ptr;
1374 n = chain_node(OC_BR | Vx);
1375 continue_ptr = new_node(OC_EXEC);
1376 break_ptr = new_node(OC_EXEC);
1378 n2 = chain_node(OC_EXEC | Vx);
1381 continue_ptr->a.n = n2;
1382 break_ptr->a.n = n->r.n = seq->last;
1384 continue_ptr = save_cont;
1385 break_ptr = save_brk;
1390 /* parse group and attach it to chain */
1391 static void chain_group(void)
1397 c = next_token(TC_GRPSEQ);
1398 } while (c & TC_NEWLINE);
1400 if (c & TC_GRPSTART) {
1401 debug_printf_parse("%s: TC_GRPSTART\n", __func__);
1402 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1403 debug_printf_parse("%s: !TC_GRPTERM\n", __func__);
1404 if (t_tclass & TC_NEWLINE)
1409 debug_printf_parse("%s: TC_GRPTERM\n", __func__);
1410 } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1411 debug_printf_parse("%s: TC_OPSEQ | TC_OPTERM\n", __func__);
1413 chain_expr(OC_EXEC | Vx);
1416 debug_printf_parse("%s: TC_STATEMNT(?)\n", __func__);
1417 switch (t_info & OPCLSMASK) {
1419 debug_printf_parse("%s: ST_IF\n", __func__);
1420 n = chain_node(OC_BR | Vx);
1421 n->l.n = condition();
1423 n2 = chain_node(OC_EXEC);
1425 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1427 n2->a.n = seq->last;
1434 debug_printf_parse("%s: ST_WHILE\n", __func__);
1436 n = chain_loop(NULL);
1441 debug_printf_parse("%s: ST_DO\n", __func__);
1442 n2 = chain_node(OC_EXEC);
1443 n = chain_loop(NULL);
1445 next_token(TC_WHILE);
1446 n->l.n = condition();
1450 debug_printf_parse("%s: ST_FOR\n", __func__);
1451 next_token(TC_SEQSTART);
1452 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1453 if (t_tclass & TC_SEQTERM) { /* for-in */
1454 if ((n2->info & OPCLSMASK) != OC_IN)
1455 syntax_error(EMSG_UNEXP_TOKEN);
1456 n = chain_node(OC_WALKINIT | VV);
1459 n = chain_loop(NULL);
1460 n->info = OC_WALKNEXT | Vx;
1462 } else { /* for (;;) */
1463 n = chain_node(OC_EXEC | Vx);
1465 n2 = parse_expr(TC_SEMICOL);
1466 n3 = parse_expr(TC_SEQTERM);
1476 debug_printf_parse("%s: OC_PRINT[F]\n", __func__);
1477 n = chain_node(t_info);
1478 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1479 if (t_tclass & TC_OUTRDR) {
1481 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1483 if (t_tclass & TC_GRPTERM)
1488 debug_printf_parse("%s: OC_BREAK\n", __func__);
1489 n = chain_node(OC_EXEC);
1494 debug_printf_parse("%s: OC_CONTINUE\n", __func__);
1495 n = chain_node(OC_EXEC);
1496 n->a.n = continue_ptr;
1499 /* delete, next, nextfile, return, exit */
1501 debug_printf_parse("%s: default\n", __func__);
1507 static void parse_program(char *p)
1516 while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1517 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1519 if (tclass & TC_OPTERM) {
1520 debug_printf_parse("%s: TC_OPTERM\n", __func__);
1525 if (tclass & TC_BEGIN) {
1526 debug_printf_parse("%s: TC_BEGIN\n", __func__);
1530 } else if (tclass & TC_END) {
1531 debug_printf_parse("%s: TC_END\n", __func__);
1535 } else if (tclass & TC_FUNCDECL) {
1536 debug_printf_parse("%s: TC_FUNCDECL\n", __func__);
1537 next_token(TC_FUNCTION);
1539 f = newfunc(t_string);
1540 f->body.first = NULL;
1542 while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1543 v = findvar(ahash, t_string);
1544 v->x.aidx = f->nargs++;
1546 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1553 } else if (tclass & TC_OPSEQ) {
1554 debug_printf_parse("%s: TC_OPSEQ\n", __func__);
1556 cn = chain_node(OC_TEST);
1557 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1558 if (t_tclass & TC_GRPSTART) {
1559 debug_printf_parse("%s: TC_GRPSTART\n", __func__);
1563 debug_printf_parse("%s: !TC_GRPSTART\n", __func__);
1564 chain_node(OC_PRINT);
1566 cn->r.n = mainseq.last;
1568 } else /* if (tclass & TC_GRPSTART) */ {
1569 debug_printf_parse("%s: TC_GRPSTART(?)\n", __func__);
1574 debug_printf_parse("%s: TC_EOF\n", __func__);
1578 /* -------- program execution part -------- */
1580 static node *mk_splitter(const char *s, tsplitter *spl)
1588 if ((n->info & OPCLSMASK) == OC_REGEXP) {
1590 regfree(ire); // TODO: nuke ire, use re+1?
1592 if (s[0] && s[1]) { /* strlen(s) > 1 */
1593 mk_re_node(s, n, re);
1595 n->info = (uint32_t) s[0];
1601 /* use node as a regular expression. Supplied with node ptr and regex_t
1602 * storage space. Return ptr to regex (if result points to preg, it should
1603 * be later regfree'd manually
1605 static regex_t *as_regex(node *op, regex_t *preg)
1611 if ((op->info & OPCLSMASK) == OC_REGEXP) {
1612 return icase ? op->r.ire : op->l.re;
1615 s = getvar_s(evaluate(op, v));
1617 cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
1618 /* Testcase where REG_EXTENDED fails (unpaired '{'):
1619 * echo Hi | awk 'gsub("@(samp|code|file)\{","");'
1620 * gawk 3.1.5 eats this. We revert to ~REG_EXTENDED
1621 * (maybe gsub is not supposed to use REG_EXTENDED?).
1623 if (regcomp(preg, s, cflags)) {
1624 cflags &= ~REG_EXTENDED;
1625 xregcomp(preg, s, cflags);
1631 /* gradually increasing buffer.
1632 * note that we reallocate even if n == old_size,
1633 * and thus there is at least one extra allocated byte.
1635 static char* qrealloc(char *b, int n, int *size)
1637 if (!b || n >= *size) {
1638 *size = n + (n>>1) + 80;
1639 b = xrealloc(b, *size);
1644 /* resize field storage space */
1645 static void fsrealloc(int size)
1649 if (size >= maxfields) {
1651 maxfields = size + 16;
1652 Fields = xrealloc(Fields, maxfields * sizeof(Fields[0]));
1653 for (; i < maxfields; i++) {
1654 Fields[i].type = VF_SPECIAL;
1655 Fields[i].string = NULL;
1658 /* if size < nfields, clear extra field variables */
1659 for (i = size; i < nfields; i++) {
1665 static int awk_split(const char *s, node *spl, char **slist)
1670 regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1672 /* in worst case, each char would be a separate field */
1673 *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1676 c[0] = c[1] = (char)spl->info;
1678 if (*getvar_s(intvar[RS]) == '\0')
1682 if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */
1684 return n; /* "": zero fields */
1685 n++; /* at least one field will be there */
1687 l = strcspn(s, c+2); /* len till next NUL or \n */
1688 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1689 && pmatch[0].rm_so <= l
1691 l = pmatch[0].rm_so;
1692 if (pmatch[0].rm_eo == 0) {
1696 n++; /* we saw yet another delimiter */
1698 pmatch[0].rm_eo = l;
1703 /* make sure we remove *all* of the separator chars */
1706 } while (++l < pmatch[0].rm_eo);
1708 s += pmatch[0].rm_eo;
1712 if (c[0] == '\0') { /* null split */
1720 if (c[0] != ' ') { /* single-character split */
1722 c[0] = toupper(c[0]);
1723 c[1] = tolower(c[1]);
1727 while ((s1 = strpbrk(s1, c)) != NULL) {
1735 s = skip_whitespace(s);
1739 while (*s && !isspace(*s))
1746 static void split_f0(void)
1748 /* static char *fstrings; */
1749 #define fstrings (G.split_f0__fstrings)
1760 n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1763 for (i = 0; i < n; i++) {
1764 Fields[i].string = nextword(&s);
1765 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1768 /* set NF manually to avoid side effects */
1770 intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1771 intvar[NF]->number = nfields;
1775 /* perform additional actions when some internal variables changed */
1776 static void handle_special(var *v)
1780 const char *sep, *s;
1781 int sl, l, len, i, bsize;
1783 if (!(v->type & VF_SPECIAL))
1786 if (v == intvar[NF]) {
1787 n = (int)getvar_i(v);
1790 /* recalculate $0 */
1791 sep = getvar_s(intvar[OFS]);
1795 for (i = 0; i < n; i++) {
1796 s = getvar_s(&Fields[i]);
1799 memcpy(b+len, sep, sl);
1802 b = qrealloc(b, len+l+sl, &bsize);
1803 memcpy(b+len, s, l);
1808 setvar_p(intvar[F0], b);
1811 } else if (v == intvar[F0]) {
1812 is_f0_split = FALSE;
1814 } else if (v == intvar[FS]) {
1815 mk_splitter(getvar_s(v), &fsplitter);
1817 } else if (v == intvar[RS]) {
1818 mk_splitter(getvar_s(v), &rsplitter);
1820 } else if (v == intvar[IGNORECASE]) {
1824 n = getvar_i(intvar[NF]);
1825 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1826 /* right here v is invalid. Just to note... */
1830 /* step through func/builtin/etc arguments */
1831 static node *nextarg(node **pn)
1836 if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1845 static void hashwalk_init(var *v, xhash *array)
1850 walker_list *prev_walker;
1852 if (v->type & VF_WALK) {
1853 prev_walker = v->x.walker;
1858 debug_printf_walker("hashwalk_init: prev_walker:%p\n", prev_walker);
1860 w = v->x.walker = xzalloc(sizeof(*w) + array->glen + 1); /* why + 1? */
1861 debug_printf_walker(" walker@%p=%p\n", &v->x.walker, w);
1862 w->cur = w->end = w->wbuf;
1863 w->prev = prev_walker;
1864 for (i = 0; i < array->csize; i++) {
1865 hi = array->items[i];
1867 strcpy(w->end, hi->name);
1874 static int hashwalk_next(var *v)
1876 walker_list *w = v->x.walker;
1878 if (w->cur >= w->end) {
1879 walker_list *prev_walker = w->prev;
1881 debug_printf_walker("end of iteration, free(walker@%p:%p), prev_walker:%p\n", &v->x.walker, w, prev_walker);
1883 v->x.walker = prev_walker;
1887 setvar_s(v, nextword(&w->cur));
1891 /* evaluate node, return 1 when result is true, 0 otherwise */
1892 static int ptest(node *pattern)
1894 /* ptest__v is "static": to save stack space? */
1895 return istrue(evaluate(pattern, &G.ptest__v));
1898 /* read next record from stream rsm into a variable v */
1899 static int awk_getline(rstream *rsm, var *v)
1902 regmatch_t pmatch[2];
1903 int size, a, p, pp = 0;
1904 int fd, so, eo, r, rp;
1907 debug_printf_eval("entered %s()\n", __func__);
1909 /* we're using our own buffer since we need access to accumulating
1912 fd = fileno(rsm->F);
1917 c = (char) rsplitter.n.info;
1921 m = qrealloc(m, 256, &size);
1928 if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1929 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1930 b, 1, pmatch, 0) == 0) {
1931 so = pmatch[0].rm_so;
1932 eo = pmatch[0].rm_eo;
1936 } else if (c != '\0') {
1937 s = strchr(b+pp, c);
1939 s = memchr(b+pp, '\0', p - pp);
1946 while (b[rp] == '\n')
1948 s = strstr(b+rp, "\n\n");
1951 while (b[eo] == '\n')
1960 memmove(m, m+a, p+1);
1965 m = qrealloc(m, a+p+128, &size);
1968 p += safe_read(fd, b+p, size-p-1);
1972 setvar_i(intvar[ERRNO], errno);
1981 c = b[so]; b[so] = '\0';
1985 c = b[eo]; b[eo] = '\0';
1986 setvar_s(intvar[RT], b+so);
1995 debug_printf_eval("returning from %s(): %d\n", __func__, r);
2000 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
2004 const char *s = format;
2006 if (int_as_int && n == (int)n) {
2007 r = snprintf(b, size, "%d", (int)n);
2009 do { c = *s; } while (c && *++s);
2010 if (strchr("diouxX", c)) {
2011 r = snprintf(b, size, format, (int)n);
2012 } else if (strchr("eEfgG", c)) {
2013 r = snprintf(b, size, format, n);
2015 syntax_error(EMSG_INV_FMT);
2021 /* formatted output into an allocated buffer, return ptr to buffer */
2022 static char *awk_printf(node *n)
2027 int i, j, incr, bsize;
2032 fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
2037 while (*f && (*f != '%' || *++f == '%'))
2039 while (*f && !isalpha(*f)) {
2041 syntax_error("%*x formats are not supported");
2045 incr = (f - s) + MAXVARFMT;
2046 b = qrealloc(b, incr + i, &bsize);
2052 arg = evaluate(nextarg(&n), v);
2055 if (c == 'c' || !c) {
2056 i += sprintf(b+i, s, is_numeric(arg) ?
2057 (char)getvar_i(arg) : *getvar_s(arg));
2058 } else if (c == 's') {
2060 b = qrealloc(b, incr+i+strlen(s1), &bsize);
2061 i += sprintf(b+i, s, s1);
2063 i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
2067 /* if there was an error while sprintf, return value is negative */
2074 b = xrealloc(b, i + 1);
2079 /* Common substitution routine.
2080 * Replace (nm)'th substring of (src) that matches (rn) with (repl),
2081 * store result into (dest), return number of substitutions.
2082 * If nm = 0, replace all matches.
2083 * If src or dst is NULL, use $0.
2084 * If subexp != 0, enable subexpression matching (\1-\9).
2086 static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int subexp)
2090 int match_no, residx, replen, resbufsize;
2092 regmatch_t pmatch[10];
2093 regex_t sreg, *regex;
2099 regex = as_regex(rn, &sreg);
2100 sp = getvar_s(src ? src : intvar[F0]);
2101 replen = strlen(repl);
2102 while (regexec(regex, sp, 10, pmatch, regexec_flags) == 0) {
2103 int so = pmatch[0].rm_so;
2104 int eo = pmatch[0].rm_eo;
2106 //bb_error_msg("match %u: [%u,%u] '%s'%p", match_no+1, so, eo, sp,sp);
2107 resbuf = qrealloc(resbuf, residx + eo + replen, &resbufsize);
2108 memcpy(resbuf + residx, sp, eo);
2110 if (++match_no >= nm) {
2115 residx -= (eo - so);
2117 for (s = repl; *s; s++) {
2118 char c = resbuf[residx++] = *s;
2123 if (c == '&' || (subexp && c >= '0' && c <= '9')) {
2125 residx -= ((nbs + 3) >> 1);
2132 resbuf[residx++] = c;
2134 int n = pmatch[j].rm_eo - pmatch[j].rm_so;
2135 resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize);
2136 memcpy(resbuf + residx, sp + pmatch[j].rm_so, n);
2144 regexec_flags = REG_NOTBOL;
2149 /* Empty match (e.g. "b*" will match anywhere).
2150 * Advance by one char. */
2152 //gsub(/\<b*/,"") on "abc" will reach this point, advance to "bc"
2153 //... and will erroneously match "b" even though it is NOT at the word start.
2154 //we need REG_NOTBOW but it does not exist...
2155 //TODO: if EXTRA_COMPAT=y, use GNU matching and re_search,
2156 //it should be able to do it correctly.
2157 /* Subtle: this is safe only because
2158 * qrealloc allocated at least one extra byte */
2159 resbuf[residx] = *sp;
2167 resbuf = qrealloc(resbuf, residx + strlen(sp), &resbufsize);
2168 strcpy(resbuf + residx, sp);
2170 //bb_error_msg("end sp:'%s'%p", sp,sp);
2171 setvar_p(dest ? dest : intvar[F0], resbuf);
2177 static NOINLINE int do_mktime(const char *ds)
2182 /*memset(&then, 0, sizeof(then)); - not needed */
2183 then.tm_isdst = -1; /* default is unknown */
2185 /* manpage of mktime says these fields are ints,
2186 * so we can sscanf stuff directly into them */
2187 count = sscanf(ds, "%u %u %u %u %u %u %d",
2188 &then.tm_year, &then.tm_mon, &then.tm_mday,
2189 &then.tm_hour, &then.tm_min, &then.tm_sec,
2193 || (unsigned)then.tm_mon < 1
2194 || (unsigned)then.tm_year < 1900
2200 then.tm_year -= 1900;
2202 return mktime(&then);
2205 static NOINLINE var *exec_builtin(node *op, var *res)
2207 #define tspl (G.exec_builtin__tspl)
2213 regmatch_t pmatch[2];
2222 isr = info = op->info;
2225 av[2] = av[3] = NULL;
2226 for (i = 0; i < 4 && op; i++) {
2227 an[i] = nextarg(&op);
2228 if (isr & 0x09000000)
2229 av[i] = evaluate(an[i], &tv[i]);
2230 if (isr & 0x08000000)
2231 as[i] = getvar_s(av[i]);
2236 if ((uint32_t)nargs < (info >> 30))
2237 syntax_error(EMSG_TOO_FEW_ARGS);
2243 if (ENABLE_FEATURE_AWK_LIBM)
2244 setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1])));
2246 syntax_error(EMSG_NO_MATH);
2253 spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2254 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2259 n = awk_split(as[0], spl, &s);
2261 clear_array(iamarray(av[1]));
2262 for (i = 1; i <= n; i++)
2263 setari_u(av[1], i, nextword(&s));
2273 i = getvar_i(av[1]) - 1;
2278 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2281 s = xstrndup(as[0]+i, n);
2286 /* Bitwise ops must assume that operands are unsigned. GNU Awk 3.1.5:
2287 * awk '{ print or(-1,1) }' gives "4.29497e+09", not "-2.xxxe+09" */
2289 setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1]));
2293 setvar_i(res, ~getvar_i_int(av[0]));
2297 setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1]));
2301 setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1]));
2305 setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1]));
2309 setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1]));
2315 s1 = s = xstrdup(as[0]);
2317 //*s1 = (info == B_up) ? toupper(*s1) : tolower(*s1);
2318 if ((unsigned char)((*s1 | 0x20) - 'a') <= ('z' - 'a'))
2319 *s1 = (info == B_up) ? (*s1 & 0xdf) : (*s1 | 0x20);
2329 l = strlen(as[0]) - ll;
2330 if (ll > 0 && l >= 0) {
2332 char *s = strstr(as[0], as[1]);
2334 n = (s - as[0]) + 1;
2336 /* this piece of code is terribly slow and
2337 * really should be rewritten
2339 for (i = 0; i <= l; i++) {
2340 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2352 tt = getvar_i(av[1]);
2355 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2356 i = strftime(g_buf, MAXVARFMT,
2357 ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2360 setvar_s(res, g_buf);
2364 setvar_i(res, do_mktime(as[0]));
2368 re = as_regex(an[1], &sreg);
2369 n = regexec(re, as[0], 1, pmatch, 0);
2374 pmatch[0].rm_so = 0;
2375 pmatch[0].rm_eo = -1;
2377 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2378 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2379 setvar_i(res, pmatch[0].rm_so);
2385 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2389 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2393 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2403 * Evaluate node - the heart of the program. Supplied with subtree
2404 * and place where to store result. returns ptr to result.
2406 #define XC(n) ((n) >> 8)
2408 static var *evaluate(node *op, var *res)
2410 /* This procedure is recursive so we should count every byte */
2411 #define fnargs (G.evaluate__fnargs)
2412 /* seed is initialized to 1 */
2413 #define seed (G.evaluate__seed)
2414 #define sreg (G.evaluate__sreg)
2419 return setvar_s(res, NULL);
2421 debug_printf_eval("entered %s()\n", __func__);
2429 } L = L; /* for compiler */
2440 opn = (opinfo & OPNMASK);
2441 g_lineno = op->lineno;
2443 debug_printf_eval("opinfo:%08x opn:%08x\n", opinfo, opn);
2445 /* execute inevitable things */
2446 if (opinfo & OF_RES1)
2447 L.v = evaluate(op1, v1);
2448 if (opinfo & OF_RES2)
2449 R.v = evaluate(op->r.n, v1+1);
2450 if (opinfo & OF_STR1) {
2451 L.s = getvar_s(L.v);
2452 debug_printf_eval("L.s:'%s'\n", L.s);
2454 if (opinfo & OF_STR2) {
2455 R.s = getvar_s(R.v);
2456 debug_printf_eval("R.s:'%s'\n", R.s);
2458 if (opinfo & OF_NUM1) {
2459 L_d = getvar_i(L.v);
2460 debug_printf_eval("L_d:%f\n", L_d);
2463 debug_printf_eval("switch(0x%x)\n", XC(opinfo & OPCLSMASK));
2464 switch (XC(opinfo & OPCLSMASK)) {
2466 /* -- iterative node type -- */
2470 if ((op1->info & OPCLSMASK) == OC_COMMA) {
2471 /* it's range pattern */
2472 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2473 op->info |= OF_CHECKED;
2474 if (ptest(op1->r.n))
2475 op->info &= ~OF_CHECKED;
2481 op = ptest(op1) ? op->a.n : op->r.n;
2485 /* just evaluate an expression, also used as unconditional jump */
2489 /* branch, used in if-else and various loops */
2491 op = istrue(L.v) ? op->a.n : op->r.n;
2494 /* initialize for-in loop */
2495 case XC( OC_WALKINIT ):
2496 hashwalk_init(L.v, iamarray(R.v));
2499 /* get next array item */
2500 case XC( OC_WALKNEXT ):
2501 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2504 case XC( OC_PRINT ):
2505 case XC( OC_PRINTF ): {
2509 rstream *rsm = newfile(R.s);
2512 rsm->F = popen(R.s, "w");
2514 bb_perror_msg_and_die("popen");
2517 rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2523 if ((opinfo & OPCLSMASK) == OC_PRINT) {
2525 fputs(getvar_s(intvar[F0]), F);
2528 var *v = evaluate(nextarg(&op1), v1);
2529 if (v->type & VF_NUMBER) {
2530 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2534 fputs(getvar_s(v), F);
2538 fputs(getvar_s(intvar[OFS]), F);
2541 fputs(getvar_s(intvar[ORS]), F);
2543 } else { /* OC_PRINTF */
2544 char *s = awk_printf(op1);
2552 case XC( OC_DELETE ): {
2553 uint32_t info = op1->info & OPCLSMASK;
2556 if (info == OC_VAR) {
2558 } else if (info == OC_FNARG) {
2559 v = &fnargs[op1->l.aidx];
2561 syntax_error(EMSG_NOT_ARRAY);
2567 s = getvar_s(evaluate(op1->r.n, v1));
2568 hash_remove(iamarray(v), s);
2570 clear_array(iamarray(v));
2575 case XC( OC_NEWSOURCE ):
2576 g_progname = op->l.new_progname;
2579 case XC( OC_RETURN ):
2583 case XC( OC_NEXTFILE ):
2594 /* -- recursive node type -- */
2598 if (L.v == intvar[NF])
2602 case XC( OC_FNARG ):
2603 L.v = &fnargs[op->l.aidx];
2605 res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2609 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2612 case XC( OC_REGEXP ):
2614 L.s = getvar_s(intvar[F0]);
2617 case XC( OC_MATCH ):
2621 regex_t *re = as_regex(op1, &sreg);
2622 int i = regexec(re, L.s, 0, NULL, 0);
2625 setvar_i(res, (i == 0) ^ (opn == '!'));
2630 debug_printf_eval("MOVE\n");
2631 /* if source is a temporary string, jusk relink it to dest */
2632 //Disabled: if R.v is numeric but happens to have cached R.v->string,
2633 //then L.v ends up being a string, which is wrong
2634 // if (R.v == v1+1 && R.v->string) {
2635 // res = setvar_p(L.v, R.v->string);
2636 // R.v->string = NULL;
2638 res = copyvar(L.v, R.v);
2642 case XC( OC_TERNARY ):
2643 if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2644 syntax_error(EMSG_POSSIBLE_ERROR);
2645 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2648 case XC( OC_FUNC ): {
2650 const char *sv_progname;
2652 if (!op->r.f->body.first)
2653 syntax_error(EMSG_UNDEF_FUNC);
2655 vbeg = v = nvalloc(op->r.f->nargs + 1);
2657 var *arg = evaluate(nextarg(&op1), v1);
2659 v->type |= VF_CHILD;
2661 if (++v - vbeg >= op->r.f->nargs)
2667 sv_progname = g_progname;
2669 res = evaluate(op->r.f->body.first, res);
2671 g_progname = sv_progname;
2678 case XC( OC_GETLINE ):
2679 case XC( OC_PGETLINE ): {
2686 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2687 rsm->F = popen(L.s, "r");
2688 rsm->is_pipe = TRUE;
2690 rsm->F = fopen_for_read(L.s); /* not xfopen! */
2695 iF = next_input_file();
2699 if (!rsm || !rsm->F) {
2700 setvar_i(intvar[ERRNO], errno);
2708 i = awk_getline(rsm, R.v);
2709 if (i > 0 && !op1) {
2710 incvar(intvar[FNR]);
2717 /* simple builtins */
2718 case XC( OC_FBLTIN ): {
2719 double R_d = R_d; /* for compiler */
2727 R_d = (double)rand() / (double)RAND_MAX;
2731 if (ENABLE_FEATURE_AWK_LIBM) {
2737 if (ENABLE_FEATURE_AWK_LIBM) {
2743 if (ENABLE_FEATURE_AWK_LIBM) {
2749 if (ENABLE_FEATURE_AWK_LIBM) {
2755 if (ENABLE_FEATURE_AWK_LIBM) {
2760 syntax_error(EMSG_NO_MATH);
2765 seed = op1 ? (unsigned)L_d : (unsigned)time(NULL);
2775 L.s = getvar_s(intvar[F0]);
2781 R_d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2782 ? (system(L.s) >> 8) : 0;
2788 } else if (L.s && *L.s) {
2789 rstream *rsm = newfile(L.s);
2799 rsm = (rstream *)hash_search(fdhash, L.s);
2800 debug_printf_eval("OC_FBLTIN F_cl rsm:%p\n", rsm);
2802 debug_printf_eval("OC_FBLTIN F_cl "
2803 "rsm->is_pipe:%d, ->F:%p\n",
2804 rsm->is_pipe, rsm->F);
2805 /* Can be NULL if open failed. Example:
2806 * getline line <"doesnt_exist";
2807 * close("doesnt_exist"); <--- here rsm->F is NULL
2810 err = rsm->is_pipe ? pclose(rsm->F) : fclose(rsm->F);
2812 hash_remove(fdhash, L.s);
2815 setvar_i(intvar[ERRNO], errno);
2824 case XC( OC_BUILTIN ):
2825 res = exec_builtin(op, res);
2828 case XC( OC_SPRINTF ):
2829 setvar_p(res, awk_printf(op1));
2832 case XC( OC_UNARY ): {
2835 Ld = R_d = getvar_i(R.v);
2862 case XC( OC_FIELD ): {
2863 int i = (int)getvar_i(R.v);
2870 res = &Fields[i - 1];
2875 /* concatenation (" ") and index joining (",") */
2876 case XC( OC_CONCAT ):
2877 case XC( OC_COMMA ): {
2878 const char *sep = "";
2879 if ((opinfo & OPCLSMASK) == OC_COMMA)
2880 sep = getvar_s(intvar[SUBSEP]);
2881 setvar_p(res, xasprintf("%s%s%s", L.s, sep, R.s));
2886 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2890 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2893 case XC( OC_BINARY ):
2894 case XC( OC_REPLACE ): {
2895 double R_d = getvar_i(R.v);
2896 debug_printf_eval("BINARY/REPLACE: R_d:%f opn:%c\n", R_d, opn);
2909 syntax_error(EMSG_DIV_BY_ZERO);
2913 if (ENABLE_FEATURE_AWK_LIBM)
2914 L_d = pow(L_d, R_d);
2916 syntax_error(EMSG_NO_MATH);
2920 syntax_error(EMSG_DIV_BY_ZERO);
2921 L_d -= (int)(L_d / R_d) * R_d;
2924 debug_printf_eval("BINARY/REPLACE result:%f\n", L_d);
2925 res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : L.v, L_d);
2929 case XC( OC_COMPARE ): {
2930 int i = i; /* for compiler */
2933 if (is_numeric(L.v) && is_numeric(R.v)) {
2934 Ld = getvar_i(L.v) - getvar_i(R.v);
2936 const char *l = getvar_s(L.v);
2937 const char *r = getvar_s(R.v);
2938 Ld = icase ? strcasecmp(l, r) : strcmp(l, r);
2940 switch (opn & 0xfe) {
2951 setvar_i(res, (i == 0) ^ (opn & 1));
2956 syntax_error(EMSG_POSSIBLE_ERROR);
2958 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2960 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2967 debug_printf_eval("returning from %s(): %p\n", __func__, res);
2975 /* -------- main & co. -------- */
2977 static int awk_exit(int r)
2988 evaluate(endseq.first, &tv);
2991 /* waiting for children */
2992 for (i = 0; i < fdhash->csize; i++) {
2993 hi = fdhash->items[i];
2995 if (hi->data.rs.F && hi->data.rs.is_pipe)
2996 pclose(hi->data.rs.F);
3004 /* if expr looks like "var=value", perform assignment and return 1,
3005 * otherwise return 0 */
3006 static int is_assignment(const char *expr)
3010 if (!isalnum_(*expr) || (val = strchr(expr, '=')) == NULL) {
3014 exprc = xstrdup(expr);
3015 val = exprc + (val - expr);
3018 unescape_string_in_place(val);
3019 setvar_u(newvar(exprc), val);
3024 /* switch to next input file */
3025 static rstream *next_input_file(void)
3027 #define rsm (G.next_input_file__rsm)
3028 #define files_happen (G.next_input_file__files_happen)
3031 const char *fname, *ind;
3036 rsm.pos = rsm.adv = 0;
3039 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
3046 ind = getvar_s(incvar(intvar[ARGIND]));
3047 fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
3048 if (fname && *fname && !is_assignment(fname)) {
3049 F = xfopen_stdin(fname);
3054 files_happen = TRUE;
3055 setvar_s(intvar[FILENAME], fname);
3062 int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
3063 int awk_main(int argc, char **argv)
3067 llist_t *list_v = NULL;
3068 llist_t *list_f = NULL;
3073 char *vnames = (char *)vNames; /* cheat */
3074 char *vvalues = (char *)vValues;
3078 /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
3079 * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
3080 if (ENABLE_LOCALE_SUPPORT)
3081 setlocale(LC_NUMERIC, "C");
3085 /* allocate global buffer */
3086 g_buf = xmalloc(MAXVARFMT + 1);
3088 vhash = hash_init();
3089 ahash = hash_init();
3090 fdhash = hash_init();
3091 fnhash = hash_init();
3093 /* initialize variables */
3094 for (i = 0; *vnames; i++) {
3095 intvar[i] = v = newvar(nextword(&vnames));
3096 if (*vvalues != '\377')
3097 setvar_s(v, nextword(&vvalues));
3101 if (*vnames == '*') {
3102 v->type |= VF_SPECIAL;
3107 handle_special(intvar[FS]);
3108 handle_special(intvar[RS]);
3110 newfile("/dev/stdin")->F = stdin;
3111 newfile("/dev/stdout")->F = stdout;
3112 newfile("/dev/stderr")->F = stderr;
3114 /* Huh, people report that sometimes environ is NULL. Oh well. */
3115 if (environ) for (envp = environ; *envp; envp++) {
3116 /* environ is writable, thus we don't strdup it needlessly */
3118 char *s1 = strchr(s, '=');
3121 /* Both findvar and setvar_u take const char*
3122 * as 2nd arg -> environment is not trashed */
3123 setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
3127 opt_complementary = "v::f::"; /* -v and -f can occur multiple times */
3128 opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, NULL);
3131 if (opt & 0x1) { /* -F */
3132 unescape_string_in_place(opt_F);
3133 setvar_s(intvar[FS], opt_F);
3135 while (list_v) { /* -v */
3136 if (!is_assignment(llist_pop(&list_v)))
3139 if (list_f) { /* -f */
3144 g_progname = llist_pop(&list_f);
3145 from_file = xfopen_stdin(g_progname);
3146 /* one byte is reserved for some trick in next_token */
3147 for (i = j = 1; j > 0; i += j) {
3148 s = xrealloc(s, i + 4096);
3149 j = fread(s + i, 1, 4094, from_file);
3153 parse_program(s + 1);
3157 } else { // no -f: take program from 1st parameter
3160 g_progname = "cmd. line";
3161 parse_program(*argv++);
3163 if (opt & 0x8) // -W
3164 bb_error_msg("warning: option -W is ignored");
3166 /* fill in ARGV array */
3167 setvar_i(intvar[ARGC], argc);
3168 setari_u(intvar[ARGV], 0, "awk");
3171 setari_u(intvar[ARGV], ++i, *argv++);
3173 evaluate(beginseq.first, &tv);
3174 if (!mainseq.first && !endseq.first)
3175 awk_exit(EXIT_SUCCESS);
3177 /* input file could already be opened in BEGIN block */
3179 iF = next_input_file();
3181 /* passing through input files */
3184 setvar_i(intvar[FNR], 0);
3186 while ((i = awk_getline(iF, intvar[F0])) > 0) {
3189 incvar(intvar[FNR]);
3190 evaluate(mainseq.first, &tv);
3197 syntax_error(strerror(errno));
3199 iF = next_input_file();
3202 awk_exit(EXIT_SUCCESS);