1 /* vi: set sw=4 ts=4: */
3 * awk implementation for busybox
5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
7 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
10 //usage:#define awk_trivial_usage
11 //usage: "[OPTIONS] [AWK_PROGRAM] [FILE]..."
12 //usage:#define awk_full_usage "\n\n"
13 //usage: " -v VAR=VAL Set variable"
14 //usage: "\n -F SEP Use SEP as field separator"
15 //usage: "\n -f FILE Read program from FILE"
21 /* This is a NOEXEC applet. Be very careful! */
24 /* If you comment out one of these below, it will be #defined later
25 * to perform debug printfs to stderr: */
26 #define debug_printf_walker(...) do {} while (0)
27 #define debug_printf_eval(...) do {} while (0)
28 #define debug_printf_parse(...) do {} while (0)
30 #ifndef debug_printf_walker
31 # define debug_printf_walker(...) (fprintf(stderr, __VA_ARGS__))
33 #ifndef debug_printf_eval
34 # define debug_printf_eval(...) (fprintf(stderr, __VA_ARGS__))
36 #ifndef debug_printf_parse
37 # define debug_printf_parse(...) (fprintf(stderr, __VA_ARGS__))
46 #define VF_NUMBER 0x0001 /* 1 = primary type is number */
47 #define VF_ARRAY 0x0002 /* 1 = it's an array */
49 #define VF_CACHED 0x0100 /* 1 = num/str value has cached str/num eq */
50 #define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
51 #define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
52 #define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
53 #define VF_FSTR 0x1000 /* 1 = var::string points to fstring buffer */
54 #define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
55 #define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
57 /* these flags are static, don't change them when value is changed */
58 #define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
60 typedef struct walker_list {
63 struct walker_list *prev;
68 typedef struct var_s {
69 unsigned type; /* flags */
73 int aidx; /* func arg idx (for compilation stage) */
74 struct xhash_s *array; /* array ptr */
75 struct var_s *parent; /* for func args, ptr to actual parameter */
76 walker_list *walker; /* list of array elements (for..in) */
80 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
81 typedef struct chain_s {
84 const char *programname;
88 typedef struct func_s {
94 typedef struct rstream_s {
103 typedef struct hash_item_s {
105 struct var_s v; /* variable/array hash */
106 struct rstream_s rs; /* redirect streams hash */
107 struct func_s f; /* functions hash */
109 struct hash_item_s *next; /* next in chain */
110 char name[1]; /* really it's longer */
113 typedef struct xhash_s {
114 unsigned nel; /* num of elements */
115 unsigned csize; /* current hash size */
116 unsigned nprime; /* next hash size in PRIMES[] */
117 unsigned glen; /* summary length of item names */
118 struct hash_item_s **items;
122 typedef struct node_s {
142 /* Block of temporary variables */
143 typedef struct nvblock_s {
146 struct nvblock_s *prev;
147 struct nvblock_s *next;
151 typedef struct tsplitter_s {
156 /* simple token classes */
157 /* Order and hex values are very important!!! See next_token() */
158 #define TC_SEQSTART 1 /* ( */
159 #define TC_SEQTERM (1 << 1) /* ) */
160 #define TC_REGEXP (1 << 2) /* /.../ */
161 #define TC_OUTRDR (1 << 3) /* | > >> */
162 #define TC_UOPPOST (1 << 4) /* unary postfix operator */
163 #define TC_UOPPRE1 (1 << 5) /* unary prefix operator */
164 #define TC_BINOPX (1 << 6) /* two-opnd operator */
165 #define TC_IN (1 << 7)
166 #define TC_COMMA (1 << 8)
167 #define TC_PIPE (1 << 9) /* input redirection pipe */
168 #define TC_UOPPRE2 (1 << 10) /* unary prefix operator */
169 #define TC_ARRTERM (1 << 11) /* ] */
170 #define TC_GRPSTART (1 << 12) /* { */
171 #define TC_GRPTERM (1 << 13) /* } */
172 #define TC_SEMICOL (1 << 14)
173 #define TC_NEWLINE (1 << 15)
174 #define TC_STATX (1 << 16) /* ctl statement (for, next...) */
175 #define TC_WHILE (1 << 17)
176 #define TC_ELSE (1 << 18)
177 #define TC_BUILTIN (1 << 19)
178 #define TC_GETLINE (1 << 20)
179 #define TC_FUNCDECL (1 << 21) /* `function' `func' */
180 #define TC_BEGIN (1 << 22)
181 #define TC_END (1 << 23)
182 #define TC_EOF (1 << 24)
183 #define TC_VARIABLE (1 << 25)
184 #define TC_ARRAY (1 << 26)
185 #define TC_FUNCTION (1 << 27)
186 #define TC_STRING (1 << 28)
187 #define TC_NUMBER (1 << 29)
189 #define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
191 /* combined token classes */
192 #define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
193 //#define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
194 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
195 | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
197 #define TC_STATEMNT (TC_STATX | TC_WHILE)
198 #define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
200 /* word tokens, cannot mean something else if not expected */
201 #define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
202 | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
204 /* discard newlines after these */
205 #define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
206 | TC_BINOP | TC_OPTERM)
208 /* what can expression begin with */
209 #define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
210 /* what can group begin with */
211 #define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
213 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
214 /* operator is inserted between them */
215 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
216 | TC_STRING | TC_NUMBER | TC_UOPPOST)
217 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
219 #define OF_RES1 0x010000
220 #define OF_RES2 0x020000
221 #define OF_STR1 0x040000
222 #define OF_STR2 0x080000
223 #define OF_NUM1 0x100000
224 #define OF_CHECKED 0x200000
226 /* combined operator flags */
229 #define xS (OF_RES2 | OF_STR2)
231 #define VV (OF_RES1 | OF_RES2)
232 #define Nx (OF_RES1 | OF_NUM1)
233 #define NV (OF_RES1 | OF_NUM1 | OF_RES2)
234 #define Sx (OF_RES1 | OF_STR1)
235 #define SV (OF_RES1 | OF_STR1 | OF_RES2)
236 #define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
238 #define OPCLSMASK 0xFF00
239 #define OPNMASK 0x007F
241 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
242 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
243 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
248 #define P(x) (x << 24)
249 #define PRIMASK 0x7F000000
250 #define PRIMASK2 0x7E000000
252 /* Operation classes */
254 #define SHIFT_TIL_THIS 0x0600
255 #define RECUR_FROM_THIS 0x1000
258 OC_DELETE = 0x0100, OC_EXEC = 0x0200, OC_NEWSOURCE = 0x0300,
259 OC_PRINT = 0x0400, OC_PRINTF = 0x0500, OC_WALKINIT = 0x0600,
261 OC_BR = 0x0700, OC_BREAK = 0x0800, OC_CONTINUE = 0x0900,
262 OC_EXIT = 0x0a00, OC_NEXT = 0x0b00, OC_NEXTFILE = 0x0c00,
263 OC_TEST = 0x0d00, OC_WALKNEXT = 0x0e00,
265 OC_BINARY = 0x1000, OC_BUILTIN = 0x1100, OC_COLON = 0x1200,
266 OC_COMMA = 0x1300, OC_COMPARE = 0x1400, OC_CONCAT = 0x1500,
267 OC_FBLTIN = 0x1600, OC_FIELD = 0x1700, OC_FNARG = 0x1800,
268 OC_FUNC = 0x1900, OC_GETLINE = 0x1a00, OC_IN = 0x1b00,
269 OC_LAND = 0x1c00, OC_LOR = 0x1d00, OC_MATCH = 0x1e00,
270 OC_MOVE = 0x1f00, OC_PGETLINE = 0x2000, OC_REGEXP = 0x2100,
271 OC_REPLACE = 0x2200, OC_RETURN = 0x2300, OC_SPRINTF = 0x2400,
272 OC_TERNARY = 0x2500, OC_UNARY = 0x2600, OC_VAR = 0x2700,
275 ST_IF = 0x3000, ST_DO = 0x3100, ST_FOR = 0x3200,
279 /* simple builtins */
281 F_in, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
282 F_ti, F_le, F_sy, F_ff, F_cl
287 B_a2, B_ix, B_ma, B_sp, B_ss, B_ti, B_mt, B_lo, B_up,
289 B_an, B_co, B_ls, B_or, B_rs, B_xo,
292 /* tokens and their corresponding info values */
294 #define NTC "\377" /* switch to next token class (tc<<1) */
297 #define OC_B OC_BUILTIN
299 static const char tokenlist[] ALIGN1 =
302 "\1/" NTC /* REGEXP */
303 "\2>>" "\1>" "\1|" NTC /* OUTRDR */
304 "\2++" "\2--" NTC /* UOPPOST */
305 "\2++" "\2--" "\1$" NTC /* UOPPRE1 */
306 "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */
307 "\2*=" "\2/=" "\2%=" "\2^="
308 "\1+" "\1-" "\3**=" "\2**"
309 "\1/" "\1%" "\1^" "\1*"
310 "\2!=" "\2>=" "\2<=" "\1>"
311 "\1<" "\2!~" "\1~" "\2&&"
312 "\2||" "\1?" "\1:" NTC
316 "\1+" "\1-" "\1!" NTC /* UOPPRE2 */
322 "\2if" "\2do" "\3for" "\5break" /* STATX */
323 "\10continue" "\6delete" "\5print"
324 "\6printf" "\4next" "\10nextfile"
325 "\6return" "\4exit" NTC
329 "\3and" "\5compl" "\6lshift" "\2or"
331 "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */
332 "\3cos" "\3exp" "\3int" "\3log"
333 "\4rand" "\3sin" "\4sqrt" "\5srand"
334 "\6gensub" "\4gsub" "\5index" "\6length"
335 "\5match" "\5split" "\7sprintf" "\3sub"
336 "\6substr" "\7systime" "\10strftime" "\6mktime"
337 "\7tolower" "\7toupper" NTC
339 "\4func" "\10function" NTC
342 /* compiler adds trailing "\0" */
345 static const uint32_t tokeninfo[] = {
349 xS|'a', xS|'w', xS|'|',
350 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
351 OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M', OC_FIELD|xV|P(5),
352 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74), OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
353 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/', OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
354 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-', OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
355 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%', OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
356 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3, OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
357 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!', OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
358 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?', OC_COLON|xx|P(67)|':',
359 OC_IN|SV|P(49), /* in */
361 OC_PGETLINE|SV|P(37),
362 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-', OC_UNARY|xV|P(19)|'!',
368 ST_IF, ST_DO, ST_FOR, OC_BREAK,
369 OC_CONTINUE, OC_DELETE|Vx, OC_PRINT,
370 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
371 OC_RETURN|Vx, OC_EXIT|Nx,
375 OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
376 OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
377 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
378 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
379 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
380 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
381 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
382 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b),
383 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
390 /* internal variable names and their initial values */
391 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
393 CONVFMT, OFMT, FS, OFS,
394 ORS, RS, RT, FILENAME,
395 SUBSEP, F0, ARGIND, ARGC,
396 ARGV, ERRNO, FNR, NR,
397 NF, IGNORECASE, ENVIRON, NUM_INTERNAL_VARS
400 static const char vNames[] ALIGN1 =
401 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
402 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
403 "SUBSEP\0" "$\0*" "ARGIND\0" "ARGC\0"
404 "ARGV\0" "ERRNO\0" "FNR\0" "NR\0"
405 "NF\0*" "IGNORECASE\0*" "ENVIRON\0" "\0";
407 static const char vValues[] ALIGN1 =
408 "%.6g\0" "%.6g\0" " \0" " \0"
409 "\n\0" "\n\0" "\0" "\0"
410 "\034\0" "\0" "\377";
412 /* hash size may grow to these values */
413 #define FIRST_PRIME 61
414 static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
417 /* Globals. Split in two parts so that first one is addressed
418 * with (mostly short) negative offsets.
419 * NB: it's unsafe to put members of type "double"
420 * into globals2 (gcc may fail to align them).
424 chain beginseq, mainseq, endseq;
426 node *break_ptr, *continue_ptr;
428 xhash *vhash, *ahash, *fdhash, *fnhash;
429 const char *g_progname;
432 int maxfields; /* used in fsrealloc() only */
441 smallint is_f0_split;
445 uint32_t t_info; /* often used */
450 var *intvar[NUM_INTERNAL_VARS]; /* often used */
452 /* former statics from various functions */
453 char *split_f0__fstrings;
455 uint32_t next_token__save_tclass;
456 uint32_t next_token__save_info;
457 uint32_t next_token__ltclass;
458 smallint next_token__concat_inserted;
460 smallint next_input_file__files_happen;
461 rstream next_input_file__rsm;
463 var *evaluate__fnargs;
464 unsigned evaluate__seed;
465 regex_t evaluate__sreg;
469 tsplitter exec_builtin__tspl;
471 /* biggest and least used members go last */
472 tsplitter fsplitter, rsplitter;
474 #define G1 (ptr_to_globals[-1])
475 #define G (*(struct globals2 *)ptr_to_globals)
476 /* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
477 /*char G1size[sizeof(G1)]; - 0x74 */
478 /*char Gsize[sizeof(G)]; - 0x1c4 */
479 /* Trying to keep most of members accessible with short offsets: */
480 /*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
481 #define t_double (G1.t_double )
482 #define beginseq (G1.beginseq )
483 #define mainseq (G1.mainseq )
484 #define endseq (G1.endseq )
485 #define seq (G1.seq )
486 #define break_ptr (G1.break_ptr )
487 #define continue_ptr (G1.continue_ptr)
489 #define vhash (G1.vhash )
490 #define ahash (G1.ahash )
491 #define fdhash (G1.fdhash )
492 #define fnhash (G1.fnhash )
493 #define g_progname (G1.g_progname )
494 #define g_lineno (G1.g_lineno )
495 #define nfields (G1.nfields )
496 #define maxfields (G1.maxfields )
497 #define Fields (G1.Fields )
498 #define g_cb (G1.g_cb )
499 #define g_pos (G1.g_pos )
500 #define g_buf (G1.g_buf )
501 #define icase (G1.icase )
502 #define exiting (G1.exiting )
503 #define nextrec (G1.nextrec )
504 #define nextfile (G1.nextfile )
505 #define is_f0_split (G1.is_f0_split )
506 #define t_rollback (G1.t_rollback )
507 #define t_info (G.t_info )
508 #define t_tclass (G.t_tclass )
509 #define t_string (G.t_string )
510 #define t_lineno (G.t_lineno )
511 #define intvar (G.intvar )
512 #define fsplitter (G.fsplitter )
513 #define rsplitter (G.rsplitter )
514 #define INIT_G() do { \
515 SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
516 G.next_token__ltclass = TC_OPTERM; \
517 G.evaluate__seed = 1; \
521 /* function prototypes */
522 static void handle_special(var *);
523 static node *parse_expr(uint32_t);
524 static void chain_group(void);
525 static var *evaluate(node *, var *);
526 static rstream *next_input_file(void);
527 static int fmt_num(char *, int, const char *, double, int);
528 static int awk_exit(int) NORETURN;
530 /* ---- error handling ---- */
532 static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
533 static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
534 static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
535 static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
536 static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
537 static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin";
538 static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
539 static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
540 static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
541 static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
543 static void zero_out_var(var *vp)
545 memset(vp, 0, sizeof(*vp));
548 static void syntax_error(const char *message) NORETURN;
549 static void syntax_error(const char *message)
551 bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
554 /* ---- hash stuff ---- */
556 static unsigned hashidx(const char *name)
561 idx = *name++ + (idx << 6) - idx;
565 /* create new hash */
566 static xhash *hash_init(void)
570 newhash = xzalloc(sizeof(*newhash));
571 newhash->csize = FIRST_PRIME;
572 newhash->items = xzalloc(FIRST_PRIME * sizeof(newhash->items[0]));
577 /* find item in hash, return ptr to data, NULL if not found */
578 static void *hash_search(xhash *hash, const char *name)
582 hi = hash->items[hashidx(name) % hash->csize];
584 if (strcmp(hi->name, name) == 0)
591 /* grow hash if it becomes too big */
592 static void hash_rebuild(xhash *hash)
594 unsigned newsize, i, idx;
595 hash_item **newitems, *hi, *thi;
597 if (hash->nprime == ARRAY_SIZE(PRIMES))
600 newsize = PRIMES[hash->nprime++];
601 newitems = xzalloc(newsize * sizeof(newitems[0]));
603 for (i = 0; i < hash->csize; i++) {
608 idx = hashidx(thi->name) % newsize;
609 thi->next = newitems[idx];
615 hash->csize = newsize;
616 hash->items = newitems;
619 /* find item in hash, add it if necessary. Return ptr to data */
620 static void *hash_find(xhash *hash, const char *name)
626 hi = hash_search(hash, name);
628 if (++hash->nel / hash->csize > 10)
631 l = strlen(name) + 1;
632 hi = xzalloc(sizeof(*hi) + l);
633 strcpy(hi->name, name);
635 idx = hashidx(name) % hash->csize;
636 hi->next = hash->items[idx];
637 hash->items[idx] = hi;
643 #define findvar(hash, name) ((var*) hash_find((hash), (name)))
644 #define newvar(name) ((var*) hash_find(vhash, (name)))
645 #define newfile(name) ((rstream*)hash_find(fdhash, (name)))
646 #define newfunc(name) ((func*) hash_find(fnhash, (name)))
648 static void hash_remove(xhash *hash, const char *name)
650 hash_item *hi, **phi;
652 phi = &hash->items[hashidx(name) % hash->csize];
655 if (strcmp(hi->name, name) == 0) {
656 hash->glen -= (strlen(name) + 1);
666 /* ------ some useful functions ------ */
668 static char *skip_spaces(char *p)
671 if (*p == '\\' && p[1] == '\n') {
674 } else if (*p != ' ' && *p != '\t') {
682 /* returns old *s, advances *s past word and terminating NUL */
683 static char *nextword(char **s)
686 while (*(*s)++ != '\0')
691 static char nextchar(char **s)
698 c = bb_process_escape_sequence((const char**)s);
699 /* Example awk statement:
701 * we must treat \" as "
703 if (c == '\\' && *s == pps) { /* unrecognized \z? */
704 c = *(*s); /* yes, fetch z */
706 (*s)++; /* advance unless z = NUL */
711 /* TODO: merge with strcpy_and_process_escape_sequences()?
713 static void unescape_string_in_place(char *s1)
716 while ((*s1 = nextchar(&s)) != '\0')
720 static ALWAYS_INLINE int isalnum_(int c)
722 return (isalnum(c) || c == '_');
725 static double my_strtod(char **pp)
728 if (ENABLE_DESKTOP && cp[0] == '0') {
729 /* Might be hex or octal integer: 0x123abc or 07777 */
730 char c = (cp[1] | 0x20);
731 if (c == 'x' || isdigit(cp[1])) {
732 unsigned long long ull = strtoull(cp, pp, 0);
736 if (!isdigit(c) && c != '.')
738 /* else: it may be a floating number. Examples:
739 * 009.123 (*pp points to '9')
740 * 000.123 (*pp points to '.')
741 * fall through to strtod.
745 return strtod(cp, pp);
748 /* -------- working with variables (set/get/copy/etc) -------- */
750 static xhash *iamarray(var *v)
754 while (a->type & VF_CHILD)
757 if (!(a->type & VF_ARRAY)) {
759 a->x.array = hash_init();
764 static void clear_array(xhash *array)
769 for (i = 0; i < array->csize; i++) {
770 hi = array->items[i];
774 free(thi->data.v.string);
777 array->items[i] = NULL;
779 array->glen = array->nel = 0;
782 /* clear a variable */
783 static var *clrvar(var *v)
785 if (!(v->type & VF_FSTR))
788 v->type &= VF_DONTTOUCH;
794 /* assign string value to variable */
795 static var *setvar_p(var *v, char *value)
803 /* same as setvar_p but make a copy of string */
804 static var *setvar_s(var *v, const char *value)
806 return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
809 /* same as setvar_s but sets USER flag */
810 static var *setvar_u(var *v, const char *value)
812 v = setvar_s(v, value);
817 /* set array element to user string */
818 static void setari_u(var *a, int idx, const char *s)
822 v = findvar(iamarray(a), itoa(idx));
826 /* assign numeric value to variable */
827 static var *setvar_i(var *v, double value)
830 v->type |= VF_NUMBER;
836 static const char *getvar_s(var *v)
838 /* if v is numeric and has no cached string, convert it to string */
839 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
840 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
841 v->string = xstrdup(g_buf);
842 v->type |= VF_CACHED;
844 return (v->string == NULL) ? "" : v->string;
847 static double getvar_i(var *v)
851 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
855 debug_printf_eval("getvar_i: '%s'->", s);
856 v->number = my_strtod(&s);
857 debug_printf_eval("%f (s:'%s')\n", v->number, s);
858 if (v->type & VF_USER) {
864 debug_printf_eval("getvar_i: '%s'->zero\n", s);
867 v->type |= VF_CACHED;
869 debug_printf_eval("getvar_i: %f\n", v->number);
873 /* Used for operands of bitwise ops */
874 static unsigned long getvar_i_int(var *v)
876 double d = getvar_i(v);
878 /* Casting doubles to longs is undefined for values outside
879 * of target type range. Try to widen it as much as possible */
881 return (unsigned long)d;
882 /* Why? Think about d == -4294967295.0 (assuming 32bit longs) */
883 return - (long) (unsigned long) (-d);
886 static var *copyvar(var *dest, const var *src)
890 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
891 debug_printf_eval("copyvar: number:%f string:'%s'\n", src->number, src->string);
892 dest->number = src->number;
894 dest->string = xstrdup(src->string);
896 handle_special(dest);
900 static var *incvar(var *v)
902 return setvar_i(v, getvar_i(v) + 1.0);
905 /* return true if v is number or numeric string */
906 static int is_numeric(var *v)
909 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
912 /* return 1 when value of v corresponds to true, 0 otherwise */
913 static int istrue(var *v)
916 return (v->number != 0);
917 return (v->string && v->string[0]);
920 /* temporary variables allocator. Last allocated should be first freed */
921 static var *nvalloc(int n)
929 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size)
935 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
936 g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
938 g_cb->pos = g_cb->nv;
940 /*g_cb->next = NULL; - xzalloc did it */
948 while (v < g_cb->pos) {
957 static void nvfree(var *v)
961 if (v < g_cb->nv || v >= g_cb->pos)
962 syntax_error(EMSG_INTERNAL_ERROR);
964 for (p = v; p < g_cb->pos; p++) {
965 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
966 clear_array(iamarray(p));
967 free(p->x.array->items);
970 if (p->type & VF_WALK) {
972 walker_list *w = p->x.walker;
973 debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker);
977 debug_printf_walker(" free(%p)\n", w);
986 while (g_cb->prev && g_cb->pos == g_cb->nv) {
991 /* ------- awk program text parsing ------- */
993 /* Parse next token pointed by global pos, place results into global ttt.
994 * If token isn't expected, give away. Return token class
996 static uint32_t next_token(uint32_t expected)
998 #define concat_inserted (G.next_token__concat_inserted)
999 #define save_tclass (G.next_token__save_tclass)
1000 #define save_info (G.next_token__save_info)
1001 /* Initialized to TC_OPTERM: */
1002 #define ltclass (G.next_token__ltclass)
1012 } else if (concat_inserted) {
1013 concat_inserted = FALSE;
1014 t_tclass = save_tclass;
1021 g_lineno = t_lineno;
1023 while (*p != '\n' && *p != '\0')
1031 debug_printf_parse("%s: token found: TC_EOF\n", __func__);
1033 } else if (*p == '\"') {
1036 while (*p != '\"') {
1038 if (*p == '\0' || *p == '\n')
1039 syntax_error(EMSG_UNEXP_EOS);
1041 *s++ = nextchar(&pp);
1047 debug_printf_parse("%s: token found:'%s' TC_STRING\n", __func__, t_string);
1049 } else if ((expected & TC_REGEXP) && *p == '/') {
1053 if (*p == '\0' || *p == '\n')
1054 syntax_error(EMSG_UNEXP_EOS);
1058 s[-1] = bb_process_escape_sequence((const char **)&pp);
1070 debug_printf_parse("%s: token found:'%s' TC_REGEXP\n", __func__, t_string);
1072 } else if (*p == '.' || isdigit(*p)) {
1075 t_double = my_strtod(&pp);
1078 syntax_error(EMSG_UNEXP_TOKEN);
1080 debug_printf_parse("%s: token found:%f TC_NUMBER\n", __func__, t_double);
1083 /* search for something known */
1088 int l = (unsigned char) *tl++;
1089 if (l == (unsigned char) NTCC) {
1093 /* if token class is expected,
1095 * and it's not a longer word,
1097 if ((tc & (expected | TC_WORD | TC_NEWLINE))
1098 && strncmp(p, tl, l) == 0
1099 && !((tc & TC_WORD) && isalnum_(p[l]))
1101 /* then this is what we are looking for */
1103 debug_printf_parse("%s: token found:'%.*s' t_info:%x\n", __func__, l, p, t_info);
1110 /* not a known token */
1112 /* is it a name? (var/array/function) */
1114 syntax_error(EMSG_UNEXP_TOKEN); /* no */
1117 while (isalnum_(*++p)) {
1122 /* also consume whitespace between functionname and bracket */
1123 if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY))
1127 debug_printf_parse("%s: token found:'%s' TC_FUNCTION\n", __func__, t_string);
1132 debug_printf_parse("%s: token found:'%s' TC_ARRAY\n", __func__, t_string);
1134 debug_printf_parse("%s: token found:'%s' TC_VARIABLE\n", __func__, t_string);
1140 /* skipping newlines in some cases */
1141 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1144 /* insert concatenation operator when needed */
1145 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1146 concat_inserted = TRUE;
1150 t_info = OC_CONCAT | SS | P(35);
1157 /* Are we ready for this? */
1158 if (!(ltclass & expected))
1159 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1160 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1163 #undef concat_inserted
1169 static void rollback_token(void)
1174 static node *new_node(uint32_t info)
1178 n = xzalloc(sizeof(node));
1180 n->lineno = g_lineno;
1184 static void mk_re_node(const char *s, node *n, regex_t *re)
1186 n->info = OC_REGEXP;
1189 xregcomp(re, s, REG_EXTENDED);
1190 xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1193 static node *condition(void)
1195 next_token(TC_SEQSTART);
1196 return parse_expr(TC_SEQTERM);
1199 /* parse expression terminated by given argument, return ptr
1200 * to built subtree. Terminator is eaten by parse_expr */
1201 static node *parse_expr(uint32_t iexp)
1209 debug_printf_parse("%s(%x)\n", __func__, iexp);
1212 sn.r.n = glptr = NULL;
1213 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1215 while (!((tc = next_token(xtc)) & iexp)) {
1217 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1218 /* input redirection (<) attached to glptr node */
1219 debug_printf_parse("%s: input redir\n", __func__);
1220 cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1222 xtc = TC_OPERAND | TC_UOPPRE;
1225 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1226 debug_printf_parse("%s: TC_BINOP | TC_UOPPOST\n", __func__);
1227 /* for binary and postfix-unary operators, jump back over
1228 * previous operators with higher priority */
1230 while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1231 || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON))
1235 if ((t_info & OPCLSMASK) == OC_TERNARY)
1237 cn = vn->a.n->r.n = new_node(t_info);
1239 if (tc & TC_BINOP) {
1241 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1242 if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1244 next_token(TC_GETLINE);
1245 /* give maximum priority to this pipe */
1246 cn->info &= ~PRIMASK;
1247 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1251 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1256 debug_printf_parse("%s: other\n", __func__);
1257 /* for operands and prefix-unary operators, attach them
1260 cn = vn->r.n = new_node(t_info);
1262 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1263 if (tc & (TC_OPERAND | TC_REGEXP)) {
1264 debug_printf_parse("%s: TC_OPERAND | TC_REGEXP\n", __func__);
1265 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1266 /* one should be very careful with switch on tclass -
1267 * only simple tclasses should be used! */
1271 debug_printf_parse("%s: TC_VARIABLE | TC_ARRAY\n", __func__);
1273 v = hash_search(ahash, t_string);
1275 cn->info = OC_FNARG;
1276 cn->l.aidx = v->x.aidx;
1278 cn->l.v = newvar(t_string);
1280 if (tc & TC_ARRAY) {
1282 cn->r.n = parse_expr(TC_ARRTERM);
1288 debug_printf_parse("%s: TC_NUMBER | TC_STRING\n", __func__);
1290 v = cn->l.v = xzalloc(sizeof(var));
1292 setvar_i(v, t_double);
1294 setvar_s(v, t_string);
1298 debug_printf_parse("%s: TC_REGEXP\n", __func__);
1299 mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1303 debug_printf_parse("%s: TC_FUNCTION\n", __func__);
1305 cn->r.f = newfunc(t_string);
1306 cn->l.n = condition();
1310 debug_printf_parse("%s: TC_SEQSTART\n", __func__);
1311 cn = vn->r.n = parse_expr(TC_SEQTERM);
1313 syntax_error("Empty sequence");
1318 debug_printf_parse("%s: TC_GETLINE\n", __func__);
1320 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1324 debug_printf_parse("%s: TC_BUILTIN\n", __func__);
1325 cn->l.n = condition();
1332 debug_printf_parse("%s() returns %p\n", __func__, sn.r.n);
1336 /* add node to chain. Return ptr to alloc'd node */
1337 static node *chain_node(uint32_t info)
1342 seq->first = seq->last = new_node(0);
1344 if (seq->programname != g_progname) {
1345 seq->programname = g_progname;
1346 n = chain_node(OC_NEWSOURCE);
1347 n->l.new_progname = xstrdup(g_progname);
1352 seq->last = n->a.n = new_node(OC_DONE);
1357 static void chain_expr(uint32_t info)
1361 n = chain_node(info);
1362 n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1363 if (t_tclass & TC_GRPTERM)
1367 static node *chain_loop(node *nn)
1369 node *n, *n2, *save_brk, *save_cont;
1371 save_brk = break_ptr;
1372 save_cont = continue_ptr;
1374 n = chain_node(OC_BR | Vx);
1375 continue_ptr = new_node(OC_EXEC);
1376 break_ptr = new_node(OC_EXEC);
1378 n2 = chain_node(OC_EXEC | Vx);
1381 continue_ptr->a.n = n2;
1382 break_ptr->a.n = n->r.n = seq->last;
1384 continue_ptr = save_cont;
1385 break_ptr = save_brk;
1390 /* parse group and attach it to chain */
1391 static void chain_group(void)
1397 c = next_token(TC_GRPSEQ);
1398 } while (c & TC_NEWLINE);
1400 if (c & TC_GRPSTART) {
1401 debug_printf_parse("%s: TC_GRPSTART\n", __func__);
1402 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1403 debug_printf_parse("%s: !TC_GRPTERM\n", __func__);
1404 if (t_tclass & TC_NEWLINE)
1409 debug_printf_parse("%s: TC_GRPTERM\n", __func__);
1410 } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1411 debug_printf_parse("%s: TC_OPSEQ | TC_OPTERM\n", __func__);
1413 chain_expr(OC_EXEC | Vx);
1416 debug_printf_parse("%s: TC_STATEMNT(?)\n", __func__);
1417 switch (t_info & OPCLSMASK) {
1419 debug_printf_parse("%s: ST_IF\n", __func__);
1420 n = chain_node(OC_BR | Vx);
1421 n->l.n = condition();
1423 n2 = chain_node(OC_EXEC);
1425 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1427 n2->a.n = seq->last;
1434 debug_printf_parse("%s: ST_WHILE\n", __func__);
1436 n = chain_loop(NULL);
1441 debug_printf_parse("%s: ST_DO\n", __func__);
1442 n2 = chain_node(OC_EXEC);
1443 n = chain_loop(NULL);
1445 next_token(TC_WHILE);
1446 n->l.n = condition();
1450 debug_printf_parse("%s: ST_FOR\n", __func__);
1451 next_token(TC_SEQSTART);
1452 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1453 if (t_tclass & TC_SEQTERM) { /* for-in */
1454 if ((n2->info & OPCLSMASK) != OC_IN)
1455 syntax_error(EMSG_UNEXP_TOKEN);
1456 n = chain_node(OC_WALKINIT | VV);
1459 n = chain_loop(NULL);
1460 n->info = OC_WALKNEXT | Vx;
1462 } else { /* for (;;) */
1463 n = chain_node(OC_EXEC | Vx);
1465 n2 = parse_expr(TC_SEMICOL);
1466 n3 = parse_expr(TC_SEQTERM);
1476 debug_printf_parse("%s: OC_PRINT[F]\n", __func__);
1477 n = chain_node(t_info);
1478 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1479 if (t_tclass & TC_OUTRDR) {
1481 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1483 if (t_tclass & TC_GRPTERM)
1488 debug_printf_parse("%s: OC_BREAK\n", __func__);
1489 n = chain_node(OC_EXEC);
1494 debug_printf_parse("%s: OC_CONTINUE\n", __func__);
1495 n = chain_node(OC_EXEC);
1496 n->a.n = continue_ptr;
1499 /* delete, next, nextfile, return, exit */
1501 debug_printf_parse("%s: default\n", __func__);
1507 static void parse_program(char *p)
1516 while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1517 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1519 if (tclass & TC_OPTERM) {
1520 debug_printf_parse("%s: TC_OPTERM\n", __func__);
1525 if (tclass & TC_BEGIN) {
1526 debug_printf_parse("%s: TC_BEGIN\n", __func__);
1530 } else if (tclass & TC_END) {
1531 debug_printf_parse("%s: TC_END\n", __func__);
1535 } else if (tclass & TC_FUNCDECL) {
1536 debug_printf_parse("%s: TC_FUNCDECL\n", __func__);
1537 next_token(TC_FUNCTION);
1539 f = newfunc(t_string);
1540 f->body.first = NULL;
1542 while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1543 v = findvar(ahash, t_string);
1544 v->x.aidx = f->nargs++;
1546 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1553 } else if (tclass & TC_OPSEQ) {
1554 debug_printf_parse("%s: TC_OPSEQ\n", __func__);
1556 cn = chain_node(OC_TEST);
1557 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1558 if (t_tclass & TC_GRPSTART) {
1559 debug_printf_parse("%s: TC_GRPSTART\n", __func__);
1563 debug_printf_parse("%s: !TC_GRPSTART\n", __func__);
1564 chain_node(OC_PRINT);
1566 cn->r.n = mainseq.last;
1568 } else /* if (tclass & TC_GRPSTART) */ {
1569 debug_printf_parse("%s: TC_GRPSTART(?)\n", __func__);
1574 debug_printf_parse("%s: TC_EOF\n", __func__);
1578 /* -------- program execution part -------- */
1580 static node *mk_splitter(const char *s, tsplitter *spl)
1588 if ((n->info & OPCLSMASK) == OC_REGEXP) {
1590 regfree(ire); // TODO: nuke ire, use re+1?
1592 if (s[0] && s[1]) { /* strlen(s) > 1 */
1593 mk_re_node(s, n, re);
1595 n->info = (uint32_t) s[0];
1601 /* use node as a regular expression. Supplied with node ptr and regex_t
1602 * storage space. Return ptr to regex (if result points to preg, it should
1603 * be later regfree'd manually
1605 static regex_t *as_regex(node *op, regex_t *preg)
1611 if ((op->info & OPCLSMASK) == OC_REGEXP) {
1612 return icase ? op->r.ire : op->l.re;
1615 s = getvar_s(evaluate(op, v));
1617 cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
1618 /* Testcase where REG_EXTENDED fails (unpaired '{'):
1619 * echo Hi | awk 'gsub("@(samp|code|file)\{","");'
1620 * gawk 3.1.5 eats this. We revert to ~REG_EXTENDED
1621 * (maybe gsub is not supposed to use REG_EXTENDED?).
1623 if (regcomp(preg, s, cflags)) {
1624 cflags &= ~REG_EXTENDED;
1625 xregcomp(preg, s, cflags);
1631 /* gradually increasing buffer.
1632 * note that we reallocate even if n == old_size,
1633 * and thus there is at least one extra allocated byte.
1635 static char* qrealloc(char *b, int n, int *size)
1637 if (!b || n >= *size) {
1638 *size = n + (n>>1) + 80;
1639 b = xrealloc(b, *size);
1644 /* resize field storage space */
1645 static void fsrealloc(int size)
1649 if (size >= maxfields) {
1651 maxfields = size + 16;
1652 Fields = xrealloc(Fields, maxfields * sizeof(Fields[0]));
1653 for (; i < maxfields; i++) {
1654 Fields[i].type = VF_SPECIAL;
1655 Fields[i].string = NULL;
1658 /* if size < nfields, clear extra field variables */
1659 for (i = size; i < nfields; i++) {
1665 static int awk_split(const char *s, node *spl, char **slist)
1670 regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1672 /* in worst case, each char would be a separate field */
1673 *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1676 c[0] = c[1] = (char)spl->info;
1678 if (*getvar_s(intvar[RS]) == '\0')
1682 if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */
1684 return n; /* "": zero fields */
1685 n++; /* at least one field will be there */
1687 l = strcspn(s, c+2); /* len till next NUL or \n */
1688 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1689 && pmatch[0].rm_so <= l
1691 l = pmatch[0].rm_so;
1692 if (pmatch[0].rm_eo == 0) {
1696 n++; /* we saw yet another delimiter */
1698 pmatch[0].rm_eo = l;
1703 /* make sure we remove *all* of the separator chars */
1706 } while (++l < pmatch[0].rm_eo);
1708 s += pmatch[0].rm_eo;
1712 if (c[0] == '\0') { /* null split */
1720 if (c[0] != ' ') { /* single-character split */
1722 c[0] = toupper(c[0]);
1723 c[1] = tolower(c[1]);
1727 while ((s1 = strpbrk(s1, c)) != NULL) {
1735 s = skip_whitespace(s);
1739 while (*s && !isspace(*s))
1746 static void split_f0(void)
1748 /* static char *fstrings; */
1749 #define fstrings (G.split_f0__fstrings)
1760 n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1763 for (i = 0; i < n; i++) {
1764 Fields[i].string = nextword(&s);
1765 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1768 /* set NF manually to avoid side effects */
1770 intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1771 intvar[NF]->number = nfields;
1775 /* perform additional actions when some internal variables changed */
1776 static void handle_special(var *v)
1780 const char *sep, *s;
1781 int sl, l, len, i, bsize;
1783 if (!(v->type & VF_SPECIAL))
1786 if (v == intvar[NF]) {
1787 n = (int)getvar_i(v);
1790 /* recalculate $0 */
1791 sep = getvar_s(intvar[OFS]);
1795 for (i = 0; i < n; i++) {
1796 s = getvar_s(&Fields[i]);
1799 memcpy(b+len, sep, sl);
1802 b = qrealloc(b, len+l+sl, &bsize);
1803 memcpy(b+len, s, l);
1808 setvar_p(intvar[F0], b);
1811 } else if (v == intvar[F0]) {
1812 is_f0_split = FALSE;
1814 } else if (v == intvar[FS]) {
1816 * The POSIX-2008 standard says that changing FS should have no effect on the
1817 * current input line, but only on the next one. The language is:
1819 * > Before the first reference to a field in the record is evaluated, the record
1820 * > shall be split into fields, according to the rules in Regular Expressions,
1821 * > using the value of FS that was current at the time the record was read.
1823 * So, split up current line before assignment to FS:
1827 mk_splitter(getvar_s(v), &fsplitter);
1829 } else if (v == intvar[RS]) {
1830 mk_splitter(getvar_s(v), &rsplitter);
1832 } else if (v == intvar[IGNORECASE]) {
1836 n = getvar_i(intvar[NF]);
1837 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1838 /* right here v is invalid. Just to note... */
1842 /* step through func/builtin/etc arguments */
1843 static node *nextarg(node **pn)
1848 if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1857 static void hashwalk_init(var *v, xhash *array)
1862 walker_list *prev_walker;
1864 if (v->type & VF_WALK) {
1865 prev_walker = v->x.walker;
1870 debug_printf_walker("hashwalk_init: prev_walker:%p\n", prev_walker);
1872 w = v->x.walker = xzalloc(sizeof(*w) + array->glen + 1); /* why + 1? */
1873 debug_printf_walker(" walker@%p=%p\n", &v->x.walker, w);
1874 w->cur = w->end = w->wbuf;
1875 w->prev = prev_walker;
1876 for (i = 0; i < array->csize; i++) {
1877 hi = array->items[i];
1879 strcpy(w->end, hi->name);
1886 static int hashwalk_next(var *v)
1888 walker_list *w = v->x.walker;
1890 if (w->cur >= w->end) {
1891 walker_list *prev_walker = w->prev;
1893 debug_printf_walker("end of iteration, free(walker@%p:%p), prev_walker:%p\n", &v->x.walker, w, prev_walker);
1895 v->x.walker = prev_walker;
1899 setvar_s(v, nextword(&w->cur));
1903 /* evaluate node, return 1 when result is true, 0 otherwise */
1904 static int ptest(node *pattern)
1906 /* ptest__v is "static": to save stack space? */
1907 return istrue(evaluate(pattern, &G.ptest__v));
1910 /* read next record from stream rsm into a variable v */
1911 static int awk_getline(rstream *rsm, var *v)
1914 regmatch_t pmatch[2];
1915 int size, a, p, pp = 0;
1916 int fd, so, eo, r, rp;
1919 debug_printf_eval("entered %s()\n", __func__);
1921 /* we're using our own buffer since we need access to accumulating
1924 fd = fileno(rsm->F);
1929 c = (char) rsplitter.n.info;
1933 m = qrealloc(m, 256, &size);
1940 if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1941 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1942 b, 1, pmatch, 0) == 0) {
1943 so = pmatch[0].rm_so;
1944 eo = pmatch[0].rm_eo;
1948 } else if (c != '\0') {
1949 s = strchr(b+pp, c);
1951 s = memchr(b+pp, '\0', p - pp);
1958 while (b[rp] == '\n')
1960 s = strstr(b+rp, "\n\n");
1963 while (b[eo] == '\n')
1972 memmove(m, m+a, p+1);
1977 m = qrealloc(m, a+p+128, &size);
1980 p += safe_read(fd, b+p, size-p-1);
1984 setvar_i(intvar[ERRNO], errno);
1993 c = b[so]; b[so] = '\0';
1997 c = b[eo]; b[eo] = '\0';
1998 setvar_s(intvar[RT], b+so);
2007 debug_printf_eval("returning from %s(): %d\n", __func__, r);
2012 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
2016 const char *s = format;
2018 if (int_as_int && n == (long long)n) {
2019 r = snprintf(b, size, "%lld", (long long)n);
2021 do { c = *s; } while (c && *++s);
2022 if (strchr("diouxX", c)) {
2023 r = snprintf(b, size, format, (int)n);
2024 } else if (strchr("eEfgG", c)) {
2025 r = snprintf(b, size, format, n);
2027 syntax_error(EMSG_INV_FMT);
2033 /* formatted output into an allocated buffer, return ptr to buffer */
2034 static char *awk_printf(node *n)
2039 int i, j, incr, bsize;
2044 fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
2049 while (*f && (*f != '%' || *++f == '%'))
2051 while (*f && !isalpha(*f)) {
2053 syntax_error("%*x formats are not supported");
2057 incr = (f - s) + MAXVARFMT;
2058 b = qrealloc(b, incr + i, &bsize);
2064 arg = evaluate(nextarg(&n), v);
2067 if (c == 'c' || !c) {
2068 i += sprintf(b+i, s, is_numeric(arg) ?
2069 (char)getvar_i(arg) : *getvar_s(arg));
2070 } else if (c == 's') {
2072 b = qrealloc(b, incr+i+strlen(s1), &bsize);
2073 i += sprintf(b+i, s, s1);
2075 i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
2079 /* if there was an error while sprintf, return value is negative */
2086 b = xrealloc(b, i + 1);
2091 /* Common substitution routine.
2092 * Replace (nm)'th substring of (src) that matches (rn) with (repl),
2093 * store result into (dest), return number of substitutions.
2094 * If nm = 0, replace all matches.
2095 * If src or dst is NULL, use $0.
2096 * If subexp != 0, enable subexpression matching (\1-\9).
2098 static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int subexp)
2102 int match_no, residx, replen, resbufsize;
2104 regmatch_t pmatch[10];
2105 regex_t sreg, *regex;
2111 regex = as_regex(rn, &sreg);
2112 sp = getvar_s(src ? src : intvar[F0]);
2113 replen = strlen(repl);
2114 while (regexec(regex, sp, 10, pmatch, regexec_flags) == 0) {
2115 int so = pmatch[0].rm_so;
2116 int eo = pmatch[0].rm_eo;
2118 //bb_error_msg("match %u: [%u,%u] '%s'%p", match_no+1, so, eo, sp,sp);
2119 resbuf = qrealloc(resbuf, residx + eo + replen, &resbufsize);
2120 memcpy(resbuf + residx, sp, eo);
2122 if (++match_no >= nm) {
2127 residx -= (eo - so);
2129 for (s = repl; *s; s++) {
2130 char c = resbuf[residx++] = *s;
2135 if (c == '&' || (subexp && c >= '0' && c <= '9')) {
2137 residx -= ((nbs + 3) >> 1);
2144 resbuf[residx++] = c;
2146 int n = pmatch[j].rm_eo - pmatch[j].rm_so;
2147 resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize);
2148 memcpy(resbuf + residx, sp + pmatch[j].rm_so, n);
2156 regexec_flags = REG_NOTBOL;
2161 /* Empty match (e.g. "b*" will match anywhere).
2162 * Advance by one char. */
2164 //gsub(/\<b*/,"") on "abc" will reach this point, advance to "bc"
2165 //... and will erroneously match "b" even though it is NOT at the word start.
2166 //we need REG_NOTBOW but it does not exist...
2167 //TODO: if EXTRA_COMPAT=y, use GNU matching and re_search,
2168 //it should be able to do it correctly.
2169 /* Subtle: this is safe only because
2170 * qrealloc allocated at least one extra byte */
2171 resbuf[residx] = *sp;
2179 resbuf = qrealloc(resbuf, residx + strlen(sp), &resbufsize);
2180 strcpy(resbuf + residx, sp);
2182 //bb_error_msg("end sp:'%s'%p", sp,sp);
2183 setvar_p(dest ? dest : intvar[F0], resbuf);
2189 static NOINLINE int do_mktime(const char *ds)
2194 /*memset(&then, 0, sizeof(then)); - not needed */
2195 then.tm_isdst = -1; /* default is unknown */
2197 /* manpage of mktime says these fields are ints,
2198 * so we can sscanf stuff directly into them */
2199 count = sscanf(ds, "%u %u %u %u %u %u %d",
2200 &then.tm_year, &then.tm_mon, &then.tm_mday,
2201 &then.tm_hour, &then.tm_min, &then.tm_sec,
2205 || (unsigned)then.tm_mon < 1
2206 || (unsigned)then.tm_year < 1900
2212 then.tm_year -= 1900;
2214 return mktime(&then);
2217 static NOINLINE var *exec_builtin(node *op, var *res)
2219 #define tspl (G.exec_builtin__tspl)
2225 regmatch_t pmatch[2];
2234 isr = info = op->info;
2237 av[2] = av[3] = NULL;
2238 for (i = 0; i < 4 && op; i++) {
2239 an[i] = nextarg(&op);
2240 if (isr & 0x09000000)
2241 av[i] = evaluate(an[i], &tv[i]);
2242 if (isr & 0x08000000)
2243 as[i] = getvar_s(av[i]);
2248 if ((uint32_t)nargs < (info >> 30))
2249 syntax_error(EMSG_TOO_FEW_ARGS);
2255 if (ENABLE_FEATURE_AWK_LIBM)
2256 setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1])));
2258 syntax_error(EMSG_NO_MATH);
2265 spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2266 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2271 n = awk_split(as[0], spl, &s);
2273 clear_array(iamarray(av[1]));
2274 for (i = 1; i <= n; i++)
2275 setari_u(av[1], i, nextword(&s));
2285 i = getvar_i(av[1]) - 1;
2290 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2293 s = xstrndup(as[0]+i, n);
2298 /* Bitwise ops must assume that operands are unsigned. GNU Awk 3.1.5:
2299 * awk '{ print or(-1,1) }' gives "4.29497e+09", not "-2.xxxe+09" */
2301 setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1]));
2305 setvar_i(res, ~getvar_i_int(av[0]));
2309 setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1]));
2313 setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1]));
2317 setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1]));
2321 setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1]));
2327 s1 = s = xstrdup(as[0]);
2329 //*s1 = (info == B_up) ? toupper(*s1) : tolower(*s1);
2330 if ((unsigned char)((*s1 | 0x20) - 'a') <= ('z' - 'a'))
2331 *s1 = (info == B_up) ? (*s1 & 0xdf) : (*s1 | 0x20);
2341 l = strlen(as[0]) - ll;
2342 if (ll > 0 && l >= 0) {
2344 char *s = strstr(as[0], as[1]);
2346 n = (s - as[0]) + 1;
2348 /* this piece of code is terribly slow and
2349 * really should be rewritten
2351 for (i = 0; i <= l; i++) {
2352 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2364 tt = getvar_i(av[1]);
2367 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2368 i = strftime(g_buf, MAXVARFMT,
2369 ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2372 setvar_s(res, g_buf);
2376 setvar_i(res, do_mktime(as[0]));
2380 re = as_regex(an[1], &sreg);
2381 n = regexec(re, as[0], 1, pmatch, 0);
2386 pmatch[0].rm_so = 0;
2387 pmatch[0].rm_eo = -1;
2389 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2390 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2391 setvar_i(res, pmatch[0].rm_so);
2397 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2401 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2405 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2415 * Evaluate node - the heart of the program. Supplied with subtree
2416 * and place where to store result. returns ptr to result.
2418 #define XC(n) ((n) >> 8)
2420 static var *evaluate(node *op, var *res)
2422 /* This procedure is recursive so we should count every byte */
2423 #define fnargs (G.evaluate__fnargs)
2424 /* seed is initialized to 1 */
2425 #define seed (G.evaluate__seed)
2426 #define sreg (G.evaluate__sreg)
2431 return setvar_s(res, NULL);
2433 debug_printf_eval("entered %s()\n", __func__);
2441 } L = L; /* for compiler */
2452 opn = (opinfo & OPNMASK);
2453 g_lineno = op->lineno;
2455 debug_printf_eval("opinfo:%08x opn:%08x\n", opinfo, opn);
2457 /* execute inevitable things */
2458 if (opinfo & OF_RES1)
2459 L.v = evaluate(op1, v1);
2460 if (opinfo & OF_RES2)
2461 R.v = evaluate(op->r.n, v1+1);
2462 if (opinfo & OF_STR1) {
2463 L.s = getvar_s(L.v);
2464 debug_printf_eval("L.s:'%s'\n", L.s);
2466 if (opinfo & OF_STR2) {
2467 R.s = getvar_s(R.v);
2468 debug_printf_eval("R.s:'%s'\n", R.s);
2470 if (opinfo & OF_NUM1) {
2471 L_d = getvar_i(L.v);
2472 debug_printf_eval("L_d:%f\n", L_d);
2475 debug_printf_eval("switch(0x%x)\n", XC(opinfo & OPCLSMASK));
2476 switch (XC(opinfo & OPCLSMASK)) {
2478 /* -- iterative node type -- */
2482 if ((op1->info & OPCLSMASK) == OC_COMMA) {
2483 /* it's range pattern */
2484 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2485 op->info |= OF_CHECKED;
2486 if (ptest(op1->r.n))
2487 op->info &= ~OF_CHECKED;
2493 op = ptest(op1) ? op->a.n : op->r.n;
2497 /* just evaluate an expression, also used as unconditional jump */
2501 /* branch, used in if-else and various loops */
2503 op = istrue(L.v) ? op->a.n : op->r.n;
2506 /* initialize for-in loop */
2507 case XC( OC_WALKINIT ):
2508 hashwalk_init(L.v, iamarray(R.v));
2511 /* get next array item */
2512 case XC( OC_WALKNEXT ):
2513 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2516 case XC( OC_PRINT ):
2517 case XC( OC_PRINTF ): {
2521 rstream *rsm = newfile(R.s);
2524 rsm->F = popen(R.s, "w");
2526 bb_perror_msg_and_die("popen");
2529 rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2535 if ((opinfo & OPCLSMASK) == OC_PRINT) {
2537 fputs(getvar_s(intvar[F0]), F);
2540 var *v = evaluate(nextarg(&op1), v1);
2541 if (v->type & VF_NUMBER) {
2542 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2546 fputs(getvar_s(v), F);
2550 fputs(getvar_s(intvar[OFS]), F);
2553 fputs(getvar_s(intvar[ORS]), F);
2555 } else { /* OC_PRINTF */
2556 char *s = awk_printf(op1);
2564 case XC( OC_DELETE ): {
2565 uint32_t info = op1->info & OPCLSMASK;
2568 if (info == OC_VAR) {
2570 } else if (info == OC_FNARG) {
2571 v = &fnargs[op1->l.aidx];
2573 syntax_error(EMSG_NOT_ARRAY);
2579 s = getvar_s(evaluate(op1->r.n, v1));
2580 hash_remove(iamarray(v), s);
2582 clear_array(iamarray(v));
2587 case XC( OC_NEWSOURCE ):
2588 g_progname = op->l.new_progname;
2591 case XC( OC_RETURN ):
2595 case XC( OC_NEXTFILE ):
2606 /* -- recursive node type -- */
2610 if (L.v == intvar[NF])
2614 case XC( OC_FNARG ):
2615 L.v = &fnargs[op->l.aidx];
2617 res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2621 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2624 case XC( OC_REGEXP ):
2626 L.s = getvar_s(intvar[F0]);
2629 case XC( OC_MATCH ):
2633 regex_t *re = as_regex(op1, &sreg);
2634 int i = regexec(re, L.s, 0, NULL, 0);
2637 setvar_i(res, (i == 0) ^ (opn == '!'));
2642 debug_printf_eval("MOVE\n");
2643 /* if source is a temporary string, jusk relink it to dest */
2644 //Disabled: if R.v is numeric but happens to have cached R.v->string,
2645 //then L.v ends up being a string, which is wrong
2646 // if (R.v == v1+1 && R.v->string) {
2647 // res = setvar_p(L.v, R.v->string);
2648 // R.v->string = NULL;
2650 res = copyvar(L.v, R.v);
2654 case XC( OC_TERNARY ):
2655 if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2656 syntax_error(EMSG_POSSIBLE_ERROR);
2657 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2660 case XC( OC_FUNC ): {
2662 const char *sv_progname;
2664 /* The body might be empty, still has to eval the args */
2665 if (!op->r.n->info && !op->r.f->body.first)
2666 syntax_error(EMSG_UNDEF_FUNC);
2668 vbeg = v = nvalloc(op->r.f->nargs + 1);
2670 var *arg = evaluate(nextarg(&op1), v1);
2672 v->type |= VF_CHILD;
2674 if (++v - vbeg >= op->r.f->nargs)
2680 sv_progname = g_progname;
2682 res = evaluate(op->r.f->body.first, res);
2684 g_progname = sv_progname;
2691 case XC( OC_GETLINE ):
2692 case XC( OC_PGETLINE ): {
2699 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2700 rsm->F = popen(L.s, "r");
2701 rsm->is_pipe = TRUE;
2703 rsm->F = fopen_for_read(L.s); /* not xfopen! */
2708 iF = next_input_file();
2712 if (!rsm || !rsm->F) {
2713 setvar_i(intvar[ERRNO], errno);
2721 i = awk_getline(rsm, R.v);
2722 if (i > 0 && !op1) {
2723 incvar(intvar[FNR]);
2730 /* simple builtins */
2731 case XC( OC_FBLTIN ): {
2732 double R_d = R_d; /* for compiler */
2736 R_d = (long long)L_d;
2740 R_d = (double)rand() / (double)RAND_MAX;
2744 if (ENABLE_FEATURE_AWK_LIBM) {
2750 if (ENABLE_FEATURE_AWK_LIBM) {
2756 if (ENABLE_FEATURE_AWK_LIBM) {
2762 if (ENABLE_FEATURE_AWK_LIBM) {
2768 if (ENABLE_FEATURE_AWK_LIBM) {
2773 syntax_error(EMSG_NO_MATH);
2778 seed = op1 ? (unsigned)L_d : (unsigned)time(NULL);
2787 debug_printf_eval("length: L.s:'%s'\n", L.s);
2789 L.s = getvar_s(intvar[F0]);
2790 debug_printf_eval("length: L.s='%s'\n", L.s);
2792 else if (L.v->type & VF_ARRAY) {
2793 R_d = L.v->x.array->nel;
2794 debug_printf_eval("length: array_len:%d\n", L.v->x.array->nel);
2802 R_d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2803 ? (system(L.s) >> 8) : 0;
2809 } else if (L.s && *L.s) {
2810 rstream *rsm = newfile(L.s);
2820 rsm = (rstream *)hash_search(fdhash, L.s);
2821 debug_printf_eval("OC_FBLTIN F_cl rsm:%p\n", rsm);
2823 debug_printf_eval("OC_FBLTIN F_cl "
2824 "rsm->is_pipe:%d, ->F:%p\n",
2825 rsm->is_pipe, rsm->F);
2826 /* Can be NULL if open failed. Example:
2827 * getline line <"doesnt_exist";
2828 * close("doesnt_exist"); <--- here rsm->F is NULL
2831 err = rsm->is_pipe ? pclose(rsm->F) : fclose(rsm->F);
2833 hash_remove(fdhash, L.s);
2836 setvar_i(intvar[ERRNO], errno);
2845 case XC( OC_BUILTIN ):
2846 res = exec_builtin(op, res);
2849 case XC( OC_SPRINTF ):
2850 setvar_p(res, awk_printf(op1));
2853 case XC( OC_UNARY ): {
2856 Ld = R_d = getvar_i(R.v);
2883 case XC( OC_FIELD ): {
2884 int i = (int)getvar_i(R.v);
2891 res = &Fields[i - 1];
2896 /* concatenation (" ") and index joining (",") */
2897 case XC( OC_CONCAT ):
2898 case XC( OC_COMMA ): {
2899 const char *sep = "";
2900 if ((opinfo & OPCLSMASK) == OC_COMMA)
2901 sep = getvar_s(intvar[SUBSEP]);
2902 setvar_p(res, xasprintf("%s%s%s", L.s, sep, R.s));
2907 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2911 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2914 case XC( OC_BINARY ):
2915 case XC( OC_REPLACE ): {
2916 double R_d = getvar_i(R.v);
2917 debug_printf_eval("BINARY/REPLACE: R_d:%f opn:%c\n", R_d, opn);
2930 syntax_error(EMSG_DIV_BY_ZERO);
2934 if (ENABLE_FEATURE_AWK_LIBM)
2935 L_d = pow(L_d, R_d);
2937 syntax_error(EMSG_NO_MATH);
2941 syntax_error(EMSG_DIV_BY_ZERO);
2942 L_d -= (long long)(L_d / R_d) * R_d;
2945 debug_printf_eval("BINARY/REPLACE result:%f\n", L_d);
2946 res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : L.v, L_d);
2950 case XC( OC_COMPARE ): {
2951 int i = i; /* for compiler */
2954 if (is_numeric(L.v) && is_numeric(R.v)) {
2955 Ld = getvar_i(L.v) - getvar_i(R.v);
2957 const char *l = getvar_s(L.v);
2958 const char *r = getvar_s(R.v);
2959 Ld = icase ? strcasecmp(l, r) : strcmp(l, r);
2961 switch (opn & 0xfe) {
2972 setvar_i(res, (i == 0) ^ (opn & 1));
2977 syntax_error(EMSG_POSSIBLE_ERROR);
2979 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2981 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2988 debug_printf_eval("returning from %s(): %p\n", __func__, res);
2996 /* -------- main & co. -------- */
2998 static int awk_exit(int r)
3009 evaluate(endseq.first, &tv);
3012 /* waiting for children */
3013 for (i = 0; i < fdhash->csize; i++) {
3014 hi = fdhash->items[i];
3016 if (hi->data.rs.F && hi->data.rs.is_pipe)
3017 pclose(hi->data.rs.F);
3025 /* if expr looks like "var=value", perform assignment and return 1,
3026 * otherwise return 0 */
3027 static int is_assignment(const char *expr)
3031 if (!isalnum_(*expr) || (val = strchr(expr, '=')) == NULL) {
3035 exprc = xstrdup(expr);
3036 val = exprc + (val - expr);
3039 unescape_string_in_place(val);
3040 setvar_u(newvar(exprc), val);
3045 /* switch to next input file */
3046 static rstream *next_input_file(void)
3048 #define rsm (G.next_input_file__rsm)
3049 #define files_happen (G.next_input_file__files_happen)
3052 const char *fname, *ind;
3057 rsm.pos = rsm.adv = 0;
3060 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
3067 ind = getvar_s(incvar(intvar[ARGIND]));
3068 fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
3069 if (fname && *fname && !is_assignment(fname)) {
3070 F = xfopen_stdin(fname);
3075 files_happen = TRUE;
3076 setvar_s(intvar[FILENAME], fname);
3083 int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
3084 int awk_main(int argc, char **argv)
3088 llist_t *list_v = NULL;
3089 llist_t *list_f = NULL;
3094 char *vnames = (char *)vNames; /* cheat */
3095 char *vvalues = (char *)vValues;
3099 /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
3100 * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
3101 if (ENABLE_LOCALE_SUPPORT)
3102 setlocale(LC_NUMERIC, "C");
3106 /* allocate global buffer */
3107 g_buf = xmalloc(MAXVARFMT + 1);
3109 vhash = hash_init();
3110 ahash = hash_init();
3111 fdhash = hash_init();
3112 fnhash = hash_init();
3114 /* initialize variables */
3115 for (i = 0; *vnames; i++) {
3116 intvar[i] = v = newvar(nextword(&vnames));
3117 if (*vvalues != '\377')
3118 setvar_s(v, nextword(&vvalues));
3122 if (*vnames == '*') {
3123 v->type |= VF_SPECIAL;
3128 handle_special(intvar[FS]);
3129 handle_special(intvar[RS]);
3131 newfile("/dev/stdin")->F = stdin;
3132 newfile("/dev/stdout")->F = stdout;
3133 newfile("/dev/stderr")->F = stderr;
3135 /* Huh, people report that sometimes environ is NULL. Oh well. */
3136 if (environ) for (envp = environ; *envp; envp++) {
3137 /* environ is writable, thus we don't strdup it needlessly */
3139 char *s1 = strchr(s, '=');
3142 /* Both findvar and setvar_u take const char*
3143 * as 2nd arg -> environment is not trashed */
3144 setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
3148 opt_complementary = "v::f::"; /* -v and -f can occur multiple times */
3149 opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, NULL);
3152 if (opt & 0x1) { /* -F */
3153 unescape_string_in_place(opt_F);
3154 setvar_s(intvar[FS], opt_F);
3156 while (list_v) { /* -v */
3157 if (!is_assignment(llist_pop(&list_v)))
3160 if (list_f) { /* -f */
3165 g_progname = llist_pop(&list_f);
3166 from_file = xfopen_stdin(g_progname);
3167 /* one byte is reserved for some trick in next_token */
3168 for (i = j = 1; j > 0; i += j) {
3169 s = xrealloc(s, i + 4096);
3170 j = fread(s + i, 1, 4094, from_file);
3174 parse_program(s + 1);
3178 } else { // no -f: take program from 1st parameter
3181 g_progname = "cmd. line";
3182 parse_program(*argv++);
3184 if (opt & 0x8) // -W
3185 bb_error_msg("warning: option -W is ignored");
3187 /* fill in ARGV array */
3188 setvar_i(intvar[ARGC], argc);
3189 setari_u(intvar[ARGV], 0, "awk");
3192 setari_u(intvar[ARGV], ++i, *argv++);
3194 evaluate(beginseq.first, &tv);
3195 if (!mainseq.first && !endseq.first)
3196 awk_exit(EXIT_SUCCESS);
3198 /* input file could already be opened in BEGIN block */
3200 iF = next_input_file();
3202 /* passing through input files */
3205 setvar_i(intvar[FNR], 0);
3207 while ((i = awk_getline(iF, intvar[F0])) > 0) {
3210 incvar(intvar[FNR]);
3211 evaluate(mainseq.first, &tv);
3218 syntax_error(strerror(errno));
3220 iF = next_input_file();
3223 awk_exit(EXIT_SUCCESS);