awk: code shrink
[oweals/busybox.git] / editors / awk.c
1 /* vi: set sw=4 ts=4: */
2 /*
3  * awk implementation for busybox
4  *
5  * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
6  *
7  * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
8  */
9
10 #include "libbb.h"
11 #include "xregex.h"
12 #include <math.h>
13
14 /* This is a NOEXEC applet. Be very careful! */
15
16
17 /* If you comment out one of these below, it will be #defined later
18  * to perform debug printfs to stderr: */
19 #define debug_printf_walker(...)  do {} while (0)
20
21 #ifndef debug_printf_walker
22 # define debug_printf_walker(...) (fprintf(stderr, __VA_ARGS__))
23 #endif
24
25
26
27 #define MAXVARFMT       240
28 #define MINNVBLOCK      64
29
30 /* variable flags */
31 #define VF_NUMBER       0x0001  /* 1 = primary type is number */
32 #define VF_ARRAY        0x0002  /* 1 = it's an array */
33
34 #define VF_CACHED       0x0100  /* 1 = num/str value has cached str/num eq */
35 #define VF_USER         0x0200  /* 1 = user input (may be numeric string) */
36 #define VF_SPECIAL      0x0400  /* 1 = requires extra handling when changed */
37 #define VF_WALK         0x0800  /* 1 = variable has alloc'd x.walker list */
38 #define VF_FSTR         0x1000  /* 1 = var::string points to fstring buffer */
39 #define VF_CHILD        0x2000  /* 1 = function arg; x.parent points to source */
40 #define VF_DIRTY        0x4000  /* 1 = variable was set explicitly */
41
42 /* these flags are static, don't change them when value is changed */
43 #define VF_DONTTOUCH    (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
44
45 typedef struct walker_list {
46         char *end;
47         char *cur;
48         struct walker_list *prev;
49         char wbuf[1];
50 } walker_list;
51
52 /* Variable */
53 typedef struct var_s {
54         unsigned type;            /* flags */
55         double number;
56         char *string;
57         union {
58                 int aidx;               /* func arg idx (for compilation stage) */
59                 struct xhash_s *array;  /* array ptr */
60                 struct var_s *parent;   /* for func args, ptr to actual parameter */
61                 walker_list *walker;    /* list of array elements (for..in) */
62         } x;
63 } var;
64
65 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
66 typedef struct chain_s {
67         struct node_s *first;
68         struct node_s *last;
69         const char *programname;
70 } chain;
71
72 /* Function */
73 typedef struct func_s {
74         unsigned nargs;
75         struct chain_s body;
76 } func;
77
78 /* I/O stream */
79 typedef struct rstream_s {
80         FILE *F;
81         char *buffer;
82         int adv;
83         int size;
84         int pos;
85         smallint is_pipe;
86 } rstream;
87
88 typedef struct hash_item_s {
89         union {
90                 struct var_s v;         /* variable/array hash */
91                 struct rstream_s rs;    /* redirect streams hash */
92                 struct func_s f;        /* functions hash */
93         } data;
94         struct hash_item_s *next;       /* next in chain */
95         char name[1];                   /* really it's longer */
96 } hash_item;
97
98 typedef struct xhash_s {
99         unsigned nel;           /* num of elements */
100         unsigned csize;         /* current hash size */
101         unsigned nprime;        /* next hash size in PRIMES[] */
102         unsigned glen;          /* summary length of item names */
103         struct hash_item_s **items;
104 } xhash;
105
106 /* Tree node */
107 typedef struct node_s {
108         uint32_t info;
109         unsigned lineno;
110         union {
111                 struct node_s *n;
112                 var *v;
113                 int aidx;
114                 char *new_progname;
115                 regex_t *re;
116         } l;
117         union {
118                 struct node_s *n;
119                 regex_t *ire;
120                 func *f;
121         } r;
122         union {
123                 struct node_s *n;
124         } a;
125 } node;
126
127 /* Block of temporary variables */
128 typedef struct nvblock_s {
129         int size;
130         var *pos;
131         struct nvblock_s *prev;
132         struct nvblock_s *next;
133         var nv[];
134 } nvblock;
135
136 typedef struct tsplitter_s {
137         node n;
138         regex_t re[2];
139 } tsplitter;
140
141 /* simple token classes */
142 /* Order and hex values are very important!!!  See next_token() */
143 #define TC_SEQSTART      1                              /* ( */
144 #define TC_SEQTERM      (1 << 1)                /* ) */
145 #define TC_REGEXP       (1 << 2)                /* /.../ */
146 #define TC_OUTRDR       (1 << 3)                /* | > >> */
147 #define TC_UOPPOST      (1 << 4)                /* unary postfix operator */
148 #define TC_UOPPRE1      (1 << 5)                /* unary prefix operator */
149 #define TC_BINOPX       (1 << 6)                /* two-opnd operator */
150 #define TC_IN           (1 << 7)
151 #define TC_COMMA        (1 << 8)
152 #define TC_PIPE         (1 << 9)                /* input redirection pipe */
153 #define TC_UOPPRE2      (1 << 10)               /* unary prefix operator */
154 #define TC_ARRTERM      (1 << 11)               /* ] */
155 #define TC_GRPSTART     (1 << 12)               /* { */
156 #define TC_GRPTERM      (1 << 13)               /* } */
157 #define TC_SEMICOL      (1 << 14)
158 #define TC_NEWLINE      (1 << 15)
159 #define TC_STATX        (1 << 16)               /* ctl statement (for, next...) */
160 #define TC_WHILE        (1 << 17)
161 #define TC_ELSE         (1 << 18)
162 #define TC_BUILTIN      (1 << 19)
163 #define TC_GETLINE      (1 << 20)
164 #define TC_FUNCDECL     (1 << 21)               /* `function' `func' */
165 #define TC_BEGIN        (1 << 22)
166 #define TC_END          (1 << 23)
167 #define TC_EOF          (1 << 24)
168 #define TC_VARIABLE     (1 << 25)
169 #define TC_ARRAY        (1 << 26)
170 #define TC_FUNCTION     (1 << 27)
171 #define TC_STRING       (1 << 28)
172 #define TC_NUMBER       (1 << 29)
173
174 #define TC_UOPPRE  (TC_UOPPRE1 | TC_UOPPRE2)
175
176 /* combined token classes */
177 #define TC_BINOP   (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
178 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
179 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
180                    | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
181
182 #define TC_STATEMNT (TC_STATX | TC_WHILE)
183 #define TC_OPTERM  (TC_SEMICOL | TC_NEWLINE)
184
185 /* word tokens, cannot mean something else if not expected */
186 #define TC_WORD    (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
187                    | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
188
189 /* discard newlines after these */
190 #define TC_NOTERM  (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
191                    | TC_BINOP | TC_OPTERM)
192
193 /* what can expression begin with */
194 #define TC_OPSEQ   (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
195 /* what can group begin with */
196 #define TC_GRPSEQ  (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
197
198 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
199 /* operator is inserted between them */
200 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
201                    | TC_STRING | TC_NUMBER | TC_UOPPOST)
202 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
203
204 #define OF_RES1    0x010000
205 #define OF_RES2    0x020000
206 #define OF_STR1    0x040000
207 #define OF_STR2    0x080000
208 #define OF_NUM1    0x100000
209 #define OF_CHECKED 0x200000
210
211 /* combined operator flags */
212 #define xx      0
213 #define xV      OF_RES2
214 #define xS      (OF_RES2 | OF_STR2)
215 #define Vx      OF_RES1
216 #define VV      (OF_RES1 | OF_RES2)
217 #define Nx      (OF_RES1 | OF_NUM1)
218 #define NV      (OF_RES1 | OF_NUM1 | OF_RES2)
219 #define Sx      (OF_RES1 | OF_STR1)
220 #define SV      (OF_RES1 | OF_STR1 | OF_RES2)
221 #define SS      (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
222
223 #define OPCLSMASK 0xFF00
224 #define OPNMASK   0x007F
225
226 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
227  * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
228  * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
229  */
230 #define P(x)      (x << 24)
231 #define PRIMASK   0x7F000000
232 #define PRIMASK2  0x7E000000
233
234 /* Operation classes */
235
236 #define SHIFT_TIL_THIS  0x0600
237 #define RECUR_FROM_THIS 0x1000
238
239 enum {
240         OC_DELETE = 0x0100,     OC_EXEC = 0x0200,       OC_NEWSOURCE = 0x0300,
241         OC_PRINT = 0x0400,      OC_PRINTF = 0x0500,     OC_WALKINIT = 0x0600,
242
243         OC_BR = 0x0700,         OC_BREAK = 0x0800,      OC_CONTINUE = 0x0900,
244         OC_EXIT = 0x0a00,       OC_NEXT = 0x0b00,       OC_NEXTFILE = 0x0c00,
245         OC_TEST = 0x0d00,       OC_WALKNEXT = 0x0e00,
246
247         OC_BINARY = 0x1000,     OC_BUILTIN = 0x1100,    OC_COLON = 0x1200,
248         OC_COMMA = 0x1300,      OC_COMPARE = 0x1400,    OC_CONCAT = 0x1500,
249         OC_FBLTIN = 0x1600,     OC_FIELD = 0x1700,      OC_FNARG = 0x1800,
250         OC_FUNC = 0x1900,       OC_GETLINE = 0x1a00,    OC_IN = 0x1b00,
251         OC_LAND = 0x1c00,       OC_LOR = 0x1d00,        OC_MATCH = 0x1e00,
252         OC_MOVE = 0x1f00,       OC_PGETLINE = 0x2000,   OC_REGEXP = 0x2100,
253         OC_REPLACE = 0x2200,    OC_RETURN = 0x2300,     OC_SPRINTF = 0x2400,
254         OC_TERNARY = 0x2500,    OC_UNARY = 0x2600,      OC_VAR = 0x2700,
255         OC_DONE = 0x2800,
256
257         ST_IF = 0x3000,         ST_DO = 0x3100,         ST_FOR = 0x3200,
258         ST_WHILE = 0x3300
259 };
260
261 /* simple builtins */
262 enum {
263         F_in,   F_rn,   F_co,   F_ex,   F_lg,   F_si,   F_sq,   F_sr,
264         F_ti,   F_le,   F_sy,   F_ff,   F_cl
265 };
266
267 /* builtins */
268 enum {
269         B_a2,   B_ix,   B_ma,   B_sp,   B_ss,   B_ti,   B_mt,   B_lo,   B_up,
270         B_ge,   B_gs,   B_su,
271         B_an,   B_co,   B_ls,   B_or,   B_rs,   B_xo,
272 };
273
274 /* tokens and their corresponding info values */
275
276 #define NTC     "\377"  /* switch to next token class (tc<<1) */
277 #define NTCC    '\377'
278
279 #define OC_B    OC_BUILTIN
280
281 static const char tokenlist[] ALIGN1 =
282         "\1("       NTC
283         "\1)"       NTC
284         "\1/"       NTC                                 /* REGEXP */
285         "\2>>"      "\1>"       "\1|"       NTC         /* OUTRDR */
286         "\2++"      "\2--"      NTC                     /* UOPPOST */
287         "\2++"      "\2--"      "\1$"       NTC         /* UOPPRE1 */
288         "\2=="      "\1="       "\2+="      "\2-="      /* BINOPX */
289         "\2*="      "\2/="      "\2%="      "\2^="
290         "\1+"       "\1-"       "\3**="     "\2**"
291         "\1/"       "\1%"       "\1^"       "\1*"
292         "\2!="      "\2>="      "\2<="      "\1>"
293         "\1<"       "\2!~"      "\1~"       "\2&&"
294         "\2||"      "\1?"       "\1:"       NTC
295         "\2in"      NTC
296         "\1,"       NTC
297         "\1|"       NTC
298         "\1+"       "\1-"       "\1!"       NTC         /* UOPPRE2 */
299         "\1]"       NTC
300         "\1{"       NTC
301         "\1}"       NTC
302         "\1;"       NTC
303         "\1\n"      NTC
304         "\2if"      "\2do"      "\3for"     "\5break"   /* STATX */
305         "\10continue"           "\6delete"  "\5print"
306         "\6printf"  "\4next"    "\10nextfile"
307         "\6return"  "\4exit"    NTC
308         "\5while"   NTC
309         "\4else"    NTC
310
311         "\3and"     "\5compl"   "\6lshift"  "\2or"
312         "\6rshift"  "\3xor"
313         "\5close"   "\6system"  "\6fflush"  "\5atan2"   /* BUILTIN */
314         "\3cos"     "\3exp"     "\3int"     "\3log"
315         "\4rand"    "\3sin"     "\4sqrt"    "\5srand"
316         "\6gensub"  "\4gsub"    "\5index"   "\6length"
317         "\5match"   "\5split"   "\7sprintf" "\3sub"
318         "\6substr"  "\7systime" "\10strftime" "\6mktime"
319         "\7tolower" "\7toupper" NTC
320         "\7getline" NTC
321         "\4func"    "\10function"   NTC
322         "\5BEGIN"   NTC
323         "\3END"     "\0"
324         ;
325
326 static const uint32_t tokeninfo[] = {
327         0,
328         0,
329         OC_REGEXP,
330         xS|'a',     xS|'w',     xS|'|',
331         OC_UNARY|xV|P(9)|'p',       OC_UNARY|xV|P(9)|'m',
332         OC_UNARY|xV|P(9)|'P',       OC_UNARY|xV|P(9)|'M',
333             OC_FIELD|xV|P(5),
334         OC_COMPARE|VV|P(39)|5,      OC_MOVE|VV|P(74),
335             OC_REPLACE|NV|P(74)|'+',    OC_REPLACE|NV|P(74)|'-',
336         OC_REPLACE|NV|P(74)|'*',    OC_REPLACE|NV|P(74)|'/',
337             OC_REPLACE|NV|P(74)|'%',    OC_REPLACE|NV|P(74)|'&',
338         OC_BINARY|NV|P(29)|'+',     OC_BINARY|NV|P(29)|'-',
339             OC_REPLACE|NV|P(74)|'&',    OC_BINARY|NV|P(15)|'&',
340         OC_BINARY|NV|P(25)|'/',     OC_BINARY|NV|P(25)|'%',
341             OC_BINARY|NV|P(15)|'&',     OC_BINARY|NV|P(25)|'*',
342         OC_COMPARE|VV|P(39)|4,      OC_COMPARE|VV|P(39)|3,
343             OC_COMPARE|VV|P(39)|0,      OC_COMPARE|VV|P(39)|1,
344         OC_COMPARE|VV|P(39)|2,      OC_MATCH|Sx|P(45)|'!',
345             OC_MATCH|Sx|P(45)|'~',      OC_LAND|Vx|P(55),
346         OC_LOR|Vx|P(59),            OC_TERNARY|Vx|P(64)|'?',
347             OC_COLON|xx|P(67)|':',
348         OC_IN|SV|P(49),
349         OC_COMMA|SS|P(80),
350         OC_PGETLINE|SV|P(37),
351         OC_UNARY|xV|P(19)|'+',      OC_UNARY|xV|P(19)|'-',
352             OC_UNARY|xV|P(19)|'!',
353         0,
354         0,
355         0,
356         0,
357         0,
358         ST_IF,          ST_DO,          ST_FOR,         OC_BREAK,
359         OC_CONTINUE,                    OC_DELETE|Vx,   OC_PRINT,
360         OC_PRINTF,      OC_NEXT,        OC_NEXTFILE,
361         OC_RETURN|Vx,   OC_EXIT|Nx,
362         ST_WHILE,
363         0,
364
365         OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
366         OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
367         OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
368         OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
369         OC_FBLTIN|F_rn,    OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
370         OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
371         OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF,        OC_B|B_su|P(0xb6),
372         OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti,    OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b),
373         OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
374         OC_GETLINE|SV|P(0),
375         0,      0,
376         0,
377         0
378 };
379
380 /* internal variable names and their initial values       */
381 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
382 enum {
383         CONVFMT,    OFMT,       FS,         OFS,
384         ORS,        RS,         RT,         FILENAME,
385         SUBSEP,     F0,         ARGIND,     ARGC,
386         ARGV,       ERRNO,      FNR,        NR,
387         NF,         IGNORECASE, ENVIRON,    NUM_INTERNAL_VARS
388 };
389
390 static const char vNames[] ALIGN1 =
391         "CONVFMT\0" "OFMT\0"    "FS\0*"     "OFS\0"
392         "ORS\0"     "RS\0*"     "RT\0"      "FILENAME\0"
393         "SUBSEP\0"  "$\0*"      "ARGIND\0"  "ARGC\0"
394         "ARGV\0"    "ERRNO\0"   "FNR\0"     "NR\0"
395         "NF\0*"     "IGNORECASE\0*" "ENVIRON\0" "\0";
396
397 static const char vValues[] ALIGN1 =
398         "%.6g\0"    "%.6g\0"    " \0"       " \0"
399         "\n\0"      "\n\0"      "\0"        "\0"
400         "\034\0"    "\0"        "\377";
401
402 /* hash size may grow to these values */
403 #define FIRST_PRIME 61
404 static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
405
406
407 /* Globals. Split in two parts so that first one is addressed
408  * with (mostly short) negative offsets.
409  * NB: it's unsafe to put members of type "double"
410  * into globals2 (gcc may fail to align them).
411  */
412 struct globals {
413         double t_double;
414         chain beginseq, mainseq, endseq;
415         chain *seq;
416         node *break_ptr, *continue_ptr;
417         rstream *iF;
418         xhash *vhash, *ahash, *fdhash, *fnhash;
419         const char *g_progname;
420         int g_lineno;
421         int nfields;
422         int maxfields; /* used in fsrealloc() only */
423         var *Fields;
424         nvblock *g_cb;
425         char *g_pos;
426         char *g_buf;
427         smallint icase;
428         smallint exiting;
429         smallint nextrec;
430         smallint nextfile;
431         smallint is_f0_split;
432 };
433 struct globals2 {
434         uint32_t t_info; /* often used */
435         uint32_t t_tclass;
436         char *t_string;
437         int t_lineno;
438         int t_rollback;
439
440         var *intvar[NUM_INTERNAL_VARS]; /* often used */
441
442         /* former statics from various functions */
443         char *split_f0__fstrings;
444
445         uint32_t next_token__save_tclass;
446         uint32_t next_token__save_info;
447         uint32_t next_token__ltclass;
448         smallint next_token__concat_inserted;
449
450         smallint next_input_file__files_happen;
451         rstream next_input_file__rsm;
452
453         var *evaluate__fnargs;
454         unsigned evaluate__seed;
455         regex_t evaluate__sreg;
456
457         var ptest__v;
458
459         tsplitter exec_builtin__tspl;
460
461         /* biggest and least used members go last */
462         tsplitter fsplitter, rsplitter;
463 };
464 #define G1 (ptr_to_globals[-1])
465 #define G (*(struct globals2 *)ptr_to_globals)
466 /* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
467 /*char G1size[sizeof(G1)]; - 0x74 */
468 /*char Gsize[sizeof(G)]; - 0x1c4 */
469 /* Trying to keep most of members accessible with short offsets: */
470 /*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
471 #define t_double     (G1.t_double    )
472 #define beginseq     (G1.beginseq    )
473 #define mainseq      (G1.mainseq     )
474 #define endseq       (G1.endseq      )
475 #define seq          (G1.seq         )
476 #define break_ptr    (G1.break_ptr   )
477 #define continue_ptr (G1.continue_ptr)
478 #define iF           (G1.iF          )
479 #define vhash        (G1.vhash       )
480 #define ahash        (G1.ahash       )
481 #define fdhash       (G1.fdhash      )
482 #define fnhash       (G1.fnhash      )
483 #define g_progname   (G1.g_progname  )
484 #define g_lineno     (G1.g_lineno    )
485 #define nfields      (G1.nfields     )
486 #define maxfields    (G1.maxfields   )
487 #define Fields       (G1.Fields      )
488 #define g_cb         (G1.g_cb        )
489 #define g_pos        (G1.g_pos       )
490 #define g_buf        (G1.g_buf       )
491 #define icase        (G1.icase       )
492 #define exiting      (G1.exiting     )
493 #define nextrec      (G1.nextrec     )
494 #define nextfile     (G1.nextfile    )
495 #define is_f0_split  (G1.is_f0_split )
496 #define t_info       (G.t_info      )
497 #define t_tclass     (G.t_tclass    )
498 #define t_string     (G.t_string    )
499 #define t_lineno     (G.t_lineno    )
500 #define t_rollback   (G.t_rollback  )
501 #define intvar       (G.intvar      )
502 #define fsplitter    (G.fsplitter   )
503 #define rsplitter    (G.rsplitter   )
504 #define INIT_G() do { \
505         SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
506         G.next_token__ltclass = TC_OPTERM; \
507         G.evaluate__seed = 1; \
508 } while (0)
509
510
511 /* function prototypes */
512 static void handle_special(var *);
513 static node *parse_expr(uint32_t);
514 static void chain_group(void);
515 static var *evaluate(node *, var *);
516 static rstream *next_input_file(void);
517 static int fmt_num(char *, int, const char *, double, int);
518 static int awk_exit(int) NORETURN;
519
520 /* ---- error handling ---- */
521
522 static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
523 static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
524 static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
525 static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
526 static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
527 static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin";
528 static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
529 static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
530 static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
531 #if !ENABLE_FEATURE_AWK_LIBM
532 static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
533 #endif
534
535 static void zero_out_var(var *vp)
536 {
537         memset(vp, 0, sizeof(*vp));
538 }
539
540 static void syntax_error(const char *message) NORETURN;
541 static void syntax_error(const char *message)
542 {
543         bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
544 }
545
546 /* ---- hash stuff ---- */
547
548 static unsigned hashidx(const char *name)
549 {
550         unsigned idx = 0;
551
552         while (*name)
553                 idx = *name++ + (idx << 6) - idx;
554         return idx;
555 }
556
557 /* create new hash */
558 static xhash *hash_init(void)
559 {
560         xhash *newhash;
561
562         newhash = xzalloc(sizeof(*newhash));
563         newhash->csize = FIRST_PRIME;
564         newhash->items = xzalloc(FIRST_PRIME * sizeof(newhash->items[0]));
565
566         return newhash;
567 }
568
569 /* find item in hash, return ptr to data, NULL if not found */
570 static void *hash_search(xhash *hash, const char *name)
571 {
572         hash_item *hi;
573
574         hi = hash->items[hashidx(name) % hash->csize];
575         while (hi) {
576                 if (strcmp(hi->name, name) == 0)
577                         return &hi->data;
578                 hi = hi->next;
579         }
580         return NULL;
581 }
582
583 /* grow hash if it becomes too big */
584 static void hash_rebuild(xhash *hash)
585 {
586         unsigned newsize, i, idx;
587         hash_item **newitems, *hi, *thi;
588
589         if (hash->nprime == ARRAY_SIZE(PRIMES))
590                 return;
591
592         newsize = PRIMES[hash->nprime++];
593         newitems = xzalloc(newsize * sizeof(newitems[0]));
594
595         for (i = 0; i < hash->csize; i++) {
596                 hi = hash->items[i];
597                 while (hi) {
598                         thi = hi;
599                         hi = thi->next;
600                         idx = hashidx(thi->name) % newsize;
601                         thi->next = newitems[idx];
602                         newitems[idx] = thi;
603                 }
604         }
605
606         free(hash->items);
607         hash->csize = newsize;
608         hash->items = newitems;
609 }
610
611 /* find item in hash, add it if necessary. Return ptr to data */
612 static void *hash_find(xhash *hash, const char *name)
613 {
614         hash_item *hi;
615         unsigned idx;
616         int l;
617
618         hi = hash_search(hash, name);
619         if (!hi) {
620                 if (++hash->nel / hash->csize > 10)
621                         hash_rebuild(hash);
622
623                 l = strlen(name) + 1;
624                 hi = xzalloc(sizeof(*hi) + l);
625                 strcpy(hi->name, name);
626
627                 idx = hashidx(name) % hash->csize;
628                 hi->next = hash->items[idx];
629                 hash->items[idx] = hi;
630                 hash->glen += l;
631         }
632         return &hi->data;
633 }
634
635 #define findvar(hash, name) ((var*)    hash_find((hash), (name)))
636 #define newvar(name)        ((var*)    hash_find(vhash, (name)))
637 #define newfile(name)       ((rstream*)hash_find(fdhash, (name)))
638 #define newfunc(name)       ((func*)   hash_find(fnhash, (name)))
639
640 static void hash_remove(xhash *hash, const char *name)
641 {
642         hash_item *hi, **phi;
643
644         phi = &hash->items[hashidx(name) % hash->csize];
645         while (*phi) {
646                 hi = *phi;
647                 if (strcmp(hi->name, name) == 0) {
648                         hash->glen -= (strlen(name) + 1);
649                         hash->nel--;
650                         *phi = hi->next;
651                         free(hi);
652                         break;
653                 }
654                 phi = &hi->next;
655         }
656 }
657
658 /* ------ some useful functions ------ */
659
660 static char *skip_spaces(char *p)
661 {
662         while (1) {
663                 if (*p == '\\' && p[1] == '\n') {
664                         p++;
665                         t_lineno++;
666                 } else if (*p != ' ' && *p != '\t') {
667                         break;
668                 }
669                 p++;
670         }
671         return p;
672 }
673
674 /* returns old *s, advances *s past word and terminating NUL */
675 static char *nextword(char **s)
676 {
677         char *p = *s;
678         while (*(*s)++ != '\0')
679                 continue;
680         return p;
681 }
682
683 static char nextchar(char **s)
684 {
685         char c, *pps;
686
687         c = *(*s)++;
688         pps = *s;
689         if (c == '\\')
690                 c = bb_process_escape_sequence((const char**)s);
691         if (c == '\\' && *s == pps)
692                 c = *(*s)++;
693         return c;
694 }
695
696 static ALWAYS_INLINE int isalnum_(int c)
697 {
698         return (isalnum(c) || c == '_');
699 }
700
701 static double my_strtod(char **pp)
702 {
703 #if ENABLE_DESKTOP
704         if ((*pp)[0] == '0'
705          && ((((*pp)[1] | 0x20) == 'x') || isdigit((*pp)[1]))
706         ) {
707                 return strtoull(*pp, pp, 0);
708         }
709 #endif
710         return strtod(*pp, pp);
711 }
712
713 /* -------- working with variables (set/get/copy/etc) -------- */
714
715 static xhash *iamarray(var *v)
716 {
717         var *a = v;
718
719         while (a->type & VF_CHILD)
720                 a = a->x.parent;
721
722         if (!(a->type & VF_ARRAY)) {
723                 a->type |= VF_ARRAY;
724                 a->x.array = hash_init();
725         }
726         return a->x.array;
727 }
728
729 static void clear_array(xhash *array)
730 {
731         unsigned i;
732         hash_item *hi, *thi;
733
734         for (i = 0; i < array->csize; i++) {
735                 hi = array->items[i];
736                 while (hi) {
737                         thi = hi;
738                         hi = hi->next;
739                         free(thi->data.v.string);
740                         free(thi);
741                 }
742                 array->items[i] = NULL;
743         }
744         array->glen = array->nel = 0;
745 }
746
747 /* clear a variable */
748 static var *clrvar(var *v)
749 {
750         if (!(v->type & VF_FSTR))
751                 free(v->string);
752
753         v->type &= VF_DONTTOUCH;
754         v->type |= VF_DIRTY;
755         v->string = NULL;
756         return v;
757 }
758
759 /* assign string value to variable */
760 static var *setvar_p(var *v, char *value)
761 {
762         clrvar(v);
763         v->string = value;
764         handle_special(v);
765         return v;
766 }
767
768 /* same as setvar_p but make a copy of string */
769 static var *setvar_s(var *v, const char *value)
770 {
771         return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
772 }
773
774 /* same as setvar_s but sets USER flag */
775 static var *setvar_u(var *v, const char *value)
776 {
777         v = setvar_s(v, value);
778         v->type |= VF_USER;
779         return v;
780 }
781
782 /* set array element to user string */
783 static void setari_u(var *a, int idx, const char *s)
784 {
785         var *v;
786
787         v = findvar(iamarray(a), itoa(idx));
788         setvar_u(v, s);
789 }
790
791 /* assign numeric value to variable */
792 static var *setvar_i(var *v, double value)
793 {
794         clrvar(v);
795         v->type |= VF_NUMBER;
796         v->number = value;
797         handle_special(v);
798         return v;
799 }
800
801 static const char *getvar_s(var *v)
802 {
803         /* if v is numeric and has no cached string, convert it to string */
804         if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
805                 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
806                 v->string = xstrdup(g_buf);
807                 v->type |= VF_CACHED;
808         }
809         return (v->string == NULL) ? "" : v->string;
810 }
811
812 static double getvar_i(var *v)
813 {
814         char *s;
815
816         if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
817                 v->number = 0;
818                 s = v->string;
819                 if (s && *s) {
820                         v->number = my_strtod(&s);
821                         if (v->type & VF_USER) {
822                                 s = skip_spaces(s);
823                                 if (*s != '\0')
824                                         v->type &= ~VF_USER;
825                         }
826                 } else {
827                         v->type &= ~VF_USER;
828                 }
829                 v->type |= VF_CACHED;
830         }
831         return v->number;
832 }
833
834 /* Used for operands of bitwise ops */
835 static unsigned long getvar_i_int(var *v)
836 {
837         double d = getvar_i(v);
838
839         /* Casting doubles to longs is undefined for values outside
840          * of target type range. Try to widen it as much as possible */
841         if (d >= 0)
842                 return (unsigned long)d;
843         /* Why? Think about d == -4294967295.0 (assuming 32bit longs) */
844         return - (long) (unsigned long) (-d);
845 }
846
847 static var *copyvar(var *dest, const var *src)
848 {
849         if (dest != src) {
850                 clrvar(dest);
851                 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
852                 dest->number = src->number;
853                 if (src->string)
854                         dest->string = xstrdup(src->string);
855         }
856         handle_special(dest);
857         return dest;
858 }
859
860 static var *incvar(var *v)
861 {
862         return setvar_i(v, getvar_i(v) + 1.0);
863 }
864
865 /* return true if v is number or numeric string */
866 static int is_numeric(var *v)
867 {
868         getvar_i(v);
869         return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
870 }
871
872 /* return 1 when value of v corresponds to true, 0 otherwise */
873 static int istrue(var *v)
874 {
875         if (is_numeric(v))
876                 return (v->number != 0);
877         return (v->string && v->string[0]);
878 }
879
880 /* temporary variables allocator. Last allocated should be first freed */
881 static var *nvalloc(int n)
882 {
883         nvblock *pb = NULL;
884         var *v, *r;
885         int size;
886
887         while (g_cb) {
888                 pb = g_cb;
889                 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size)
890                         break;
891                 g_cb = g_cb->next;
892         }
893
894         if (!g_cb) {
895                 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
896                 g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
897                 g_cb->size = size;
898                 g_cb->pos = g_cb->nv;
899                 g_cb->prev = pb;
900                 /*g_cb->next = NULL; - xzalloc did it */
901                 if (pb)
902                         pb->next = g_cb;
903         }
904
905         v = r = g_cb->pos;
906         g_cb->pos += n;
907
908         while (v < g_cb->pos) {
909                 v->type = 0;
910                 v->string = NULL;
911                 v++;
912         }
913
914         return r;
915 }
916
917 static void nvfree(var *v)
918 {
919         var *p;
920
921         if (v < g_cb->nv || v >= g_cb->pos)
922                 syntax_error(EMSG_INTERNAL_ERROR);
923
924         for (p = v; p < g_cb->pos; p++) {
925                 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
926                         clear_array(iamarray(p));
927                         free(p->x.array->items);
928                         free(p->x.array);
929                 }
930                 if (p->type & VF_WALK) {
931                         walker_list *n;
932                         walker_list *w = p->x.walker;
933                         debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker);
934                         p->x.walker = NULL;
935                         while (w) {
936                                 n = w->prev;
937                                 debug_printf_walker(" free(%p)\n", w);
938                                 free(w);
939                                 w = n;
940                         }
941                 }
942                 clrvar(p);
943         }
944
945         g_cb->pos = v;
946         while (g_cb->prev && g_cb->pos == g_cb->nv) {
947                 g_cb = g_cb->prev;
948         }
949 }
950
951 /* ------- awk program text parsing ------- */
952
953 /* Parse next token pointed by global pos, place results into global ttt.
954  * If token isn't expected, give away. Return token class
955  */
956 static uint32_t next_token(uint32_t expected)
957 {
958 #define concat_inserted (G.next_token__concat_inserted)
959 #define save_tclass     (G.next_token__save_tclass)
960 #define save_info       (G.next_token__save_info)
961 /* Initialized to TC_OPTERM: */
962 #define ltclass         (G.next_token__ltclass)
963
964         char *p, *s;
965         const char *tl;
966         uint32_t tc;
967         const uint32_t *ti;
968         int l;
969
970         if (t_rollback) {
971                 t_rollback = FALSE;
972
973         } else if (concat_inserted) {
974                 concat_inserted = FALSE;
975                 t_tclass = save_tclass;
976                 t_info = save_info;
977
978         } else {
979                 p = g_pos;
980  readnext:
981                 p = skip_spaces(p);
982                 g_lineno = t_lineno;
983                 if (*p == '#')
984                         while (*p != '\n' && *p != '\0')
985                                 p++;
986
987                 if (*p == '\n')
988                         t_lineno++;
989
990                 if (*p == '\0') {
991                         tc = TC_EOF;
992
993                 } else if (*p == '\"') {
994                         /* it's a string */
995                         t_string = s = ++p;
996                         while (*p != '\"') {
997                                 char *pp = p;
998                                 if (*p == '\0' || *p == '\n')
999                                         syntax_error(EMSG_UNEXP_EOS);
1000                                 *s++ = nextchar(&pp);
1001                                 p = pp;
1002                         }
1003                         p++;
1004                         *s = '\0';
1005                         tc = TC_STRING;
1006
1007                 } else if ((expected & TC_REGEXP) && *p == '/') {
1008                         /* it's regexp */
1009                         t_string = s = ++p;
1010                         while (*p != '/') {
1011                                 if (*p == '\0' || *p == '\n')
1012                                         syntax_error(EMSG_UNEXP_EOS);
1013                                 *s = *p++;
1014                                 if (*s++ == '\\') {
1015                                         char *pp = p;
1016                                         s[-1] = bb_process_escape_sequence((const char **)&pp);
1017                                         if (*p == '\\')
1018                                                 *s++ = '\\';
1019                                         if (pp == p)
1020                                                 *s++ = *p++;
1021                                         else
1022                                                 p = pp;
1023                                 }
1024                         }
1025                         p++;
1026                         *s = '\0';
1027                         tc = TC_REGEXP;
1028
1029                 } else if (*p == '.' || isdigit(*p)) {
1030                         /* it's a number */
1031                         char *pp = p;
1032                         t_double = my_strtod(&pp);
1033                         p = pp;
1034                         if (*pp == '.')
1035                                 syntax_error(EMSG_UNEXP_TOKEN);
1036                         tc = TC_NUMBER;
1037
1038                 } else {
1039                         /* search for something known */
1040                         tl = tokenlist;
1041                         tc = 0x00000001;
1042                         ti = tokeninfo;
1043                         while (*tl) {
1044                                 l = *tl++;
1045                                 if (l == NTCC) {
1046                                         tc <<= 1;
1047                                         continue;
1048                                 }
1049                                 /* if token class is expected, token
1050                                  * matches and it's not a longer word,
1051                                  * then this is what we are looking for
1052                                  */
1053                                 if ((tc & (expected | TC_WORD | TC_NEWLINE))
1054                                  && *tl == *p && strncmp(p, tl, l) == 0
1055                                  && !((tc & TC_WORD) && isalnum_(p[l]))
1056                                 ) {
1057                                         t_info = *ti;
1058                                         p += l;
1059                                         break;
1060                                 }
1061                                 ti++;
1062                                 tl += l;
1063                         }
1064
1065                         if (!*tl) {
1066                                 /* it's a name (var/array/function),
1067                                  * otherwise it's something wrong
1068                                  */
1069                                 if (!isalnum_(*p))
1070                                         syntax_error(EMSG_UNEXP_TOKEN);
1071
1072                                 t_string = --p;
1073                                 while (isalnum_(*++p)) {
1074                                         p[-1] = *p;
1075                                 }
1076                                 p[-1] = '\0';
1077                                 tc = TC_VARIABLE;
1078                                 /* also consume whitespace between functionname and bracket */
1079                                 if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY))
1080                                         p = skip_spaces(p);
1081                                 if (*p == '(') {
1082                                         tc = TC_FUNCTION;
1083                                 } else {
1084                                         if (*p == '[') {
1085                                                 p++;
1086                                                 tc = TC_ARRAY;
1087                                         }
1088                                 }
1089                         }
1090                 }
1091                 g_pos = p;
1092
1093                 /* skipping newlines in some cases */
1094                 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1095                         goto readnext;
1096
1097                 /* insert concatenation operator when needed */
1098                 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1099                         concat_inserted = TRUE;
1100                         save_tclass = tc;
1101                         save_info = t_info;
1102                         tc = TC_BINOP;
1103                         t_info = OC_CONCAT | SS | P(35);
1104                 }
1105
1106                 t_tclass = tc;
1107         }
1108         ltclass = t_tclass;
1109
1110         /* Are we ready for this? */
1111         if (!(ltclass & expected))
1112                 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1113                                 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1114
1115         return ltclass;
1116 #undef concat_inserted
1117 #undef save_tclass
1118 #undef save_info
1119 #undef ltclass
1120 }
1121
1122 static void rollback_token(void)
1123 {
1124         t_rollback = TRUE;
1125 }
1126
1127 static node *new_node(uint32_t info)
1128 {
1129         node *n;
1130
1131         n = xzalloc(sizeof(node));
1132         n->info = info;
1133         n->lineno = g_lineno;
1134         return n;
1135 }
1136
1137 static node *mk_re_node(const char *s, node *n, regex_t *re)
1138 {
1139         n->info = OC_REGEXP;
1140         n->l.re = re;
1141         n->r.ire = re + 1;
1142         xregcomp(re, s, REG_EXTENDED);
1143         xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1144
1145         return n;
1146 }
1147
1148 static node *condition(void)
1149 {
1150         next_token(TC_SEQSTART);
1151         return parse_expr(TC_SEQTERM);
1152 }
1153
1154 /* parse expression terminated by given argument, return ptr
1155  * to built subtree. Terminator is eaten by parse_expr */
1156 static node *parse_expr(uint32_t iexp)
1157 {
1158         node sn;
1159         node *cn = &sn;
1160         node *vn, *glptr;
1161         uint32_t tc, xtc;
1162         var *v;
1163
1164         sn.info = PRIMASK;
1165         sn.r.n = glptr = NULL;
1166         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1167
1168         while (!((tc = next_token(xtc)) & iexp)) {
1169                 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1170                         /* input redirection (<) attached to glptr node */
1171                         cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1172                         cn->a.n = glptr;
1173                         xtc = TC_OPERAND | TC_UOPPRE;
1174                         glptr = NULL;
1175
1176                 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1177                         /* for binary and postfix-unary operators, jump back over
1178                          * previous operators with higher priority */
1179                         vn = cn;
1180                         while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1181                             || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON))
1182                         ) {
1183                                 vn = vn->a.n;
1184                         }
1185                         if ((t_info & OPCLSMASK) == OC_TERNARY)
1186                                 t_info += P(6);
1187                         cn = vn->a.n->r.n = new_node(t_info);
1188                         cn->a.n = vn->a.n;
1189                         if (tc & TC_BINOP) {
1190                                 cn->l.n = vn;
1191                                 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1192                                 if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1193                                         /* it's a pipe */
1194                                         next_token(TC_GETLINE);
1195                                         /* give maximum priority to this pipe */
1196                                         cn->info &= ~PRIMASK;
1197                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1198                                 }
1199                         } else {
1200                                 cn->r.n = vn;
1201                                 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1202                         }
1203                         vn->a.n = cn;
1204
1205                 } else {
1206                         /* for operands and prefix-unary operators, attach them
1207                          * to last node */
1208                         vn = cn;
1209                         cn = vn->r.n = new_node(t_info);
1210                         cn->a.n = vn;
1211                         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1212                         if (tc & (TC_OPERAND | TC_REGEXP)) {
1213                                 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1214                                 /* one should be very careful with switch on tclass -
1215                                  * only simple tclasses should be used! */
1216                                 switch (tc) {
1217                                 case TC_VARIABLE:
1218                                 case TC_ARRAY:
1219                                         cn->info = OC_VAR;
1220                                         v = hash_search(ahash, t_string);
1221                                         if (v != NULL) {
1222                                                 cn->info = OC_FNARG;
1223                                                 cn->l.aidx = v->x.aidx;
1224                                         } else {
1225                                                 cn->l.v = newvar(t_string);
1226                                         }
1227                                         if (tc & TC_ARRAY) {
1228                                                 cn->info |= xS;
1229                                                 cn->r.n = parse_expr(TC_ARRTERM);
1230                                         }
1231                                         break;
1232
1233                                 case TC_NUMBER:
1234                                 case TC_STRING:
1235                                         cn->info = OC_VAR;
1236                                         v = cn->l.v = xzalloc(sizeof(var));
1237                                         if (tc & TC_NUMBER)
1238                                                 setvar_i(v, t_double);
1239                                         else
1240                                                 setvar_s(v, t_string);
1241                                         break;
1242
1243                                 case TC_REGEXP:
1244                                         mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1245                                         break;
1246
1247                                 case TC_FUNCTION:
1248                                         cn->info = OC_FUNC;
1249                                         cn->r.f = newfunc(t_string);
1250                                         cn->l.n = condition();
1251                                         break;
1252
1253                                 case TC_SEQSTART:
1254                                         cn = vn->r.n = parse_expr(TC_SEQTERM);
1255                                         cn->a.n = vn;
1256                                         break;
1257
1258                                 case TC_GETLINE:
1259                                         glptr = cn;
1260                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1261                                         break;
1262
1263                                 case TC_BUILTIN:
1264                                         cn->l.n = condition();
1265                                         break;
1266                                 }
1267                         }
1268                 }
1269         }
1270         return sn.r.n;
1271 }
1272
1273 /* add node to chain. Return ptr to alloc'd node */
1274 static node *chain_node(uint32_t info)
1275 {
1276         node *n;
1277
1278         if (!seq->first)
1279                 seq->first = seq->last = new_node(0);
1280
1281         if (seq->programname != g_progname) {
1282                 seq->programname = g_progname;
1283                 n = chain_node(OC_NEWSOURCE);
1284                 n->l.new_progname = xstrdup(g_progname);
1285         }
1286
1287         n = seq->last;
1288         n->info = info;
1289         seq->last = n->a.n = new_node(OC_DONE);
1290
1291         return n;
1292 }
1293
1294 static void chain_expr(uint32_t info)
1295 {
1296         node *n;
1297
1298         n = chain_node(info);
1299         n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1300         if (t_tclass & TC_GRPTERM)
1301                 rollback_token();
1302 }
1303
1304 static node *chain_loop(node *nn)
1305 {
1306         node *n, *n2, *save_brk, *save_cont;
1307
1308         save_brk = break_ptr;
1309         save_cont = continue_ptr;
1310
1311         n = chain_node(OC_BR | Vx);
1312         continue_ptr = new_node(OC_EXEC);
1313         break_ptr = new_node(OC_EXEC);
1314         chain_group();
1315         n2 = chain_node(OC_EXEC | Vx);
1316         n2->l.n = nn;
1317         n2->a.n = n;
1318         continue_ptr->a.n = n2;
1319         break_ptr->a.n = n->r.n = seq->last;
1320
1321         continue_ptr = save_cont;
1322         break_ptr = save_brk;
1323
1324         return n;
1325 }
1326
1327 /* parse group and attach it to chain */
1328 static void chain_group(void)
1329 {
1330         uint32_t c;
1331         node *n, *n2, *n3;
1332
1333         do {
1334                 c = next_token(TC_GRPSEQ);
1335         } while (c & TC_NEWLINE);
1336
1337         if (c & TC_GRPSTART) {
1338                 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1339                         if (t_tclass & TC_NEWLINE)
1340                                 continue;
1341                         rollback_token();
1342                         chain_group();
1343                 }
1344         } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1345                 rollback_token();
1346                 chain_expr(OC_EXEC | Vx);
1347         } else {                                                /* TC_STATEMNT */
1348                 switch (t_info & OPCLSMASK) {
1349                 case ST_IF:
1350                         n = chain_node(OC_BR | Vx);
1351                         n->l.n = condition();
1352                         chain_group();
1353                         n2 = chain_node(OC_EXEC);
1354                         n->r.n = seq->last;
1355                         if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1356                                 chain_group();
1357                                 n2->a.n = seq->last;
1358                         } else {
1359                                 rollback_token();
1360                         }
1361                         break;
1362
1363                 case ST_WHILE:
1364                         n2 = condition();
1365                         n = chain_loop(NULL);
1366                         n->l.n = n2;
1367                         break;
1368
1369                 case ST_DO:
1370                         n2 = chain_node(OC_EXEC);
1371                         n = chain_loop(NULL);
1372                         n2->a.n = n->a.n;
1373                         next_token(TC_WHILE);
1374                         n->l.n = condition();
1375                         break;
1376
1377                 case ST_FOR:
1378                         next_token(TC_SEQSTART);
1379                         n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1380                         if (t_tclass & TC_SEQTERM) {    /* for-in */
1381                                 if ((n2->info & OPCLSMASK) != OC_IN)
1382                                         syntax_error(EMSG_UNEXP_TOKEN);
1383                                 n = chain_node(OC_WALKINIT | VV);
1384                                 n->l.n = n2->l.n;
1385                                 n->r.n = n2->r.n;
1386                                 n = chain_loop(NULL);
1387                                 n->info = OC_WALKNEXT | Vx;
1388                                 n->l.n = n2->l.n;
1389                         } else {                        /* for (;;) */
1390                                 n = chain_node(OC_EXEC | Vx);
1391                                 n->l.n = n2;
1392                                 n2 = parse_expr(TC_SEMICOL);
1393                                 n3 = parse_expr(TC_SEQTERM);
1394                                 n = chain_loop(n3);
1395                                 n->l.n = n2;
1396                                 if (!n2)
1397                                         n->info = OC_EXEC;
1398                         }
1399                         break;
1400
1401                 case OC_PRINT:
1402                 case OC_PRINTF:
1403                         n = chain_node(t_info);
1404                         n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1405                         if (t_tclass & TC_OUTRDR) {
1406                                 n->info |= t_info;
1407                                 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1408                         }
1409                         if (t_tclass & TC_GRPTERM)
1410                                 rollback_token();
1411                         break;
1412
1413                 case OC_BREAK:
1414                         n = chain_node(OC_EXEC);
1415                         n->a.n = break_ptr;
1416                         break;
1417
1418                 case OC_CONTINUE:
1419                         n = chain_node(OC_EXEC);
1420                         n->a.n = continue_ptr;
1421                         break;
1422
1423                 /* delete, next, nextfile, return, exit */
1424                 default:
1425                         chain_expr(t_info);
1426                 }
1427         }
1428 }
1429
1430 static void parse_program(char *p)
1431 {
1432         uint32_t tclass;
1433         node *cn;
1434         func *f;
1435         var *v;
1436
1437         g_pos = p;
1438         t_lineno = 1;
1439         while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1440                         TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1441
1442                 if (tclass & TC_OPTERM)
1443                         continue;
1444
1445                 seq = &mainseq;
1446                 if (tclass & TC_BEGIN) {
1447                         seq = &beginseq;
1448                         chain_group();
1449
1450                 } else if (tclass & TC_END) {
1451                         seq = &endseq;
1452                         chain_group();
1453
1454                 } else if (tclass & TC_FUNCDECL) {
1455                         next_token(TC_FUNCTION);
1456                         g_pos++;
1457                         f = newfunc(t_string);
1458                         f->body.first = NULL;
1459                         f->nargs = 0;
1460                         while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1461                                 v = findvar(ahash, t_string);
1462                                 v->x.aidx = f->nargs++;
1463
1464                                 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1465                                         break;
1466                         }
1467                         seq = &f->body;
1468                         chain_group();
1469                         clear_array(ahash);
1470
1471                 } else if (tclass & TC_OPSEQ) {
1472                         rollback_token();
1473                         cn = chain_node(OC_TEST);
1474                         cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1475                         if (t_tclass & TC_GRPSTART) {
1476                                 rollback_token();
1477                                 chain_group();
1478                         } else {
1479                                 chain_node(OC_PRINT);
1480                         }
1481                         cn->r.n = mainseq.last;
1482
1483                 } else /* if (tclass & TC_GRPSTART) */ {
1484                         rollback_token();
1485                         chain_group();
1486                 }
1487         }
1488 }
1489
1490
1491 /* -------- program execution part -------- */
1492
1493 static node *mk_splitter(const char *s, tsplitter *spl)
1494 {
1495         regex_t *re, *ire;
1496         node *n;
1497
1498         re = &spl->re[0];
1499         ire = &spl->re[1];
1500         n = &spl->n;
1501         if ((n->info & OPCLSMASK) == OC_REGEXP) {
1502                 regfree(re);
1503                 regfree(ire); // TODO: nuke ire, use re+1?
1504         }
1505         if (strlen(s) > 1) {
1506                 mk_re_node(s, n, re);
1507         } else {
1508                 n->info = (uint32_t) *s;
1509         }
1510
1511         return n;
1512 }
1513
1514 /* use node as a regular expression. Supplied with node ptr and regex_t
1515  * storage space. Return ptr to regex (if result points to preg, it should
1516  * be later regfree'd manually
1517  */
1518 static regex_t *as_regex(node *op, regex_t *preg)
1519 {
1520         int cflags;
1521         var *v;
1522         const char *s;
1523
1524         if ((op->info & OPCLSMASK) == OC_REGEXP) {
1525                 return icase ? op->r.ire : op->l.re;
1526         }
1527         v = nvalloc(1);
1528         s = getvar_s(evaluate(op, v));
1529
1530         cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
1531         /* Testcase where REG_EXTENDED fails (unpaired '{'):
1532          * echo Hi | awk 'gsub("@(samp|code|file)\{","");'
1533          * gawk 3.1.5 eats this. We revert to ~REG_EXTENDED
1534          * (maybe gsub is not supposed to use REG_EXTENDED?).
1535          */
1536         if (regcomp(preg, s, cflags)) {
1537                 cflags &= ~REG_EXTENDED;
1538                 xregcomp(preg, s, cflags);
1539         }
1540         nvfree(v);
1541         return preg;
1542 }
1543
1544 /* gradually increasing buffer */
1545 static char* qrealloc(char *b, int n, int *size)
1546 {
1547         if (!b || n >= *size) {
1548                 *size = n + (n>>1) + 80;
1549                 b = xrealloc(b, *size);
1550         }
1551         return b;
1552 }
1553
1554 /* resize field storage space */
1555 static void fsrealloc(int size)
1556 {
1557         int i;
1558
1559         if (size >= maxfields) {
1560                 i = maxfields;
1561                 maxfields = size + 16;
1562                 Fields = xrealloc(Fields, maxfields * sizeof(var));
1563                 for (; i < maxfields; i++) {
1564                         Fields[i].type = VF_SPECIAL;
1565                         Fields[i].string = NULL;
1566                 }
1567         }
1568
1569         if (size < nfields) {
1570                 for (i = size; i < nfields; i++) {
1571                         clrvar(Fields + i);
1572                 }
1573         }
1574         nfields = size;
1575 }
1576
1577 static int awk_split(const char *s, node *spl, char **slist)
1578 {
1579         int l, n = 0;
1580         char c[4];
1581         char *s1;
1582         regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1583
1584         /* in worst case, each char would be a separate field */
1585         *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1586         strcpy(s1, s);
1587
1588         c[0] = c[1] = (char)spl->info;
1589         c[2] = c[3] = '\0';
1590         if (*getvar_s(intvar[RS]) == '\0')
1591                 c[2] = '\n';
1592
1593         if ((spl->info & OPCLSMASK) == OC_REGEXP) {  /* regex split */
1594                 if (!*s)
1595                         return n; /* "": zero fields */
1596                 n++; /* at least one field will be there */
1597                 do {
1598                         l = strcspn(s, c+2); /* len till next NUL or \n */
1599                         if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1600                          && pmatch[0].rm_so <= l
1601                         ) {
1602                                 l = pmatch[0].rm_so;
1603                                 if (pmatch[0].rm_eo == 0) {
1604                                         l++;
1605                                         pmatch[0].rm_eo++;
1606                                 }
1607                                 n++; /* we saw yet another delimiter */
1608                         } else {
1609                                 pmatch[0].rm_eo = l;
1610                                 if (s[l])
1611                                         pmatch[0].rm_eo++;
1612                         }
1613                         memcpy(s1, s, l);
1614                         /* make sure we remove *all* of the separator chars */
1615                         do {
1616                                 s1[l] = '\0';
1617                         } while (++l < pmatch[0].rm_eo);
1618                         nextword(&s1);
1619                         s += pmatch[0].rm_eo;
1620                 } while (*s);
1621                 return n;
1622         }
1623         if (c[0] == '\0') {  /* null split */
1624                 while (*s) {
1625                         *s1++ = *s++;
1626                         *s1++ = '\0';
1627                         n++;
1628                 }
1629                 return n;
1630         }
1631         if (c[0] != ' ') {  /* single-character split */
1632                 if (icase) {
1633                         c[0] = toupper(c[0]);
1634                         c[1] = tolower(c[1]);
1635                 }
1636                 if (*s1)
1637                         n++;
1638                 while ((s1 = strpbrk(s1, c))) {
1639                         *s1++ = '\0';
1640                         n++;
1641                 }
1642                 return n;
1643         }
1644         /* space split */
1645         while (*s) {
1646                 s = skip_whitespace(s);
1647                 if (!*s)
1648                         break;
1649                 n++;
1650                 while (*s && !isspace(*s))
1651                         *s1++ = *s++;
1652                 *s1++ = '\0';
1653         }
1654         return n;
1655 }
1656
1657 static void split_f0(void)
1658 {
1659 /* static char *fstrings; */
1660 #define fstrings (G.split_f0__fstrings)
1661
1662         int i, n;
1663         char *s;
1664
1665         if (is_f0_split)
1666                 return;
1667
1668         is_f0_split = TRUE;
1669         free(fstrings);
1670         fsrealloc(0);
1671         n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1672         fsrealloc(n);
1673         s = fstrings;
1674         for (i = 0; i < n; i++) {
1675                 Fields[i].string = nextword(&s);
1676                 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1677         }
1678
1679         /* set NF manually to avoid side effects */
1680         clrvar(intvar[NF]);
1681         intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1682         intvar[NF]->number = nfields;
1683 #undef fstrings
1684 }
1685
1686 /* perform additional actions when some internal variables changed */
1687 static void handle_special(var *v)
1688 {
1689         int n;
1690         char *b;
1691         const char *sep, *s;
1692         int sl, l, len, i, bsize;
1693
1694         if (!(v->type & VF_SPECIAL))
1695                 return;
1696
1697         if (v == intvar[NF]) {
1698                 n = (int)getvar_i(v);
1699                 fsrealloc(n);
1700
1701                 /* recalculate $0 */
1702                 sep = getvar_s(intvar[OFS]);
1703                 sl = strlen(sep);
1704                 b = NULL;
1705                 len = 0;
1706                 for (i = 0; i < n; i++) {
1707                         s = getvar_s(&Fields[i]);
1708                         l = strlen(s);
1709                         if (b) {
1710                                 memcpy(b+len, sep, sl);
1711                                 len += sl;
1712                         }
1713                         b = qrealloc(b, len+l+sl, &bsize);
1714                         memcpy(b+len, s, l);
1715                         len += l;
1716                 }
1717                 if (b)
1718                         b[len] = '\0';
1719                 setvar_p(intvar[F0], b);
1720                 is_f0_split = TRUE;
1721
1722         } else if (v == intvar[F0]) {
1723                 is_f0_split = FALSE;
1724
1725         } else if (v == intvar[FS]) {
1726                 mk_splitter(getvar_s(v), &fsplitter);
1727
1728         } else if (v == intvar[RS]) {
1729                 mk_splitter(getvar_s(v), &rsplitter);
1730
1731         } else if (v == intvar[IGNORECASE]) {
1732                 icase = istrue(v);
1733
1734         } else {                                /* $n */
1735                 n = getvar_i(intvar[NF]);
1736                 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1737                 /* right here v is invalid. Just to note... */
1738         }
1739 }
1740
1741 /* step through func/builtin/etc arguments */
1742 static node *nextarg(node **pn)
1743 {
1744         node *n;
1745
1746         n = *pn;
1747         if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1748                 *pn = n->r.n;
1749                 n = n->l.n;
1750         } else {
1751                 *pn = NULL;
1752         }
1753         return n;
1754 }
1755
1756 static void hashwalk_init(var *v, xhash *array)
1757 {
1758         hash_item *hi;
1759         unsigned i;
1760         walker_list *w;
1761         walker_list *prev_walker;
1762
1763         if (v->type & VF_WALK) {
1764                 prev_walker = v->x.walker;
1765         } else {
1766                 v->type |= VF_WALK;
1767                 prev_walker = NULL;
1768         }
1769         debug_printf_walker("hashwalk_init: prev_walker:%p\n", prev_walker);
1770
1771         w = v->x.walker = xzalloc(sizeof(*w) + array->glen + 1); /* why + 1? */
1772         debug_printf_walker(" walker@%p=%p\n", &v->x.walker, w);
1773         w->cur = w->end = w->wbuf;
1774         w->prev = prev_walker;
1775         for (i = 0; i < array->csize; i++) {
1776                 hi = array->items[i];
1777                 while (hi) {
1778                         strcpy(w->end, hi->name);
1779                         nextword(&w->end);
1780                         hi = hi->next;
1781                 }
1782         }
1783 }
1784
1785 static int hashwalk_next(var *v)
1786 {
1787         walker_list *w = v->x.walker;
1788
1789         if (w->cur >= w->end) {
1790                 walker_list *prev_walker = w->prev;
1791
1792                 debug_printf_walker("end of iteration, free(walker@%p:%p), prev_walker:%p\n", &v->x.walker, w, prev_walker);
1793                 free(w);
1794                 v->x.walker = prev_walker;
1795                 return FALSE;
1796         }
1797
1798         setvar_s(v, nextword(&w->cur));
1799         return TRUE;
1800 }
1801
1802 /* evaluate node, return 1 when result is true, 0 otherwise */
1803 static int ptest(node *pattern)
1804 {
1805         /* ptest__v is "static": to save stack space? */
1806         return istrue(evaluate(pattern, &G.ptest__v));
1807 }
1808
1809 /* read next record from stream rsm into a variable v */
1810 static int awk_getline(rstream *rsm, var *v)
1811 {
1812         char *b;
1813         regmatch_t pmatch[2];
1814         int size, a, p, pp = 0;
1815         int fd, so, eo, r, rp;
1816         char c, *m, *s;
1817
1818         /* we're using our own buffer since we need access to accumulating
1819          * characters
1820          */
1821         fd = fileno(rsm->F);
1822         m = rsm->buffer;
1823         a = rsm->adv;
1824         p = rsm->pos;
1825         size = rsm->size;
1826         c = (char) rsplitter.n.info;
1827         rp = 0;
1828
1829         if (!m)
1830                 m = qrealloc(m, 256, &size);
1831
1832         do {
1833                 b = m + a;
1834                 so = eo = p;
1835                 r = 1;
1836                 if (p > 0) {
1837                         if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1838                                 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1839                                                         b, 1, pmatch, 0) == 0) {
1840                                         so = pmatch[0].rm_so;
1841                                         eo = pmatch[0].rm_eo;
1842                                         if (b[eo] != '\0')
1843                                                 break;
1844                                 }
1845                         } else if (c != '\0') {
1846                                 s = strchr(b+pp, c);
1847                                 if (!s)
1848                                         s = memchr(b+pp, '\0', p - pp);
1849                                 if (s) {
1850                                         so = eo = s-b;
1851                                         eo++;
1852                                         break;
1853                                 }
1854                         } else {
1855                                 while (b[rp] == '\n')
1856                                         rp++;
1857                                 s = strstr(b+rp, "\n\n");
1858                                 if (s) {
1859                                         so = eo = s-b;
1860                                         while (b[eo] == '\n')
1861                                                 eo++;
1862                                         if (b[eo] != '\0')
1863                                                 break;
1864                                 }
1865                         }
1866                 }
1867
1868                 if (a > 0) {
1869                         memmove(m, m+a, p+1);
1870                         b = m;
1871                         a = 0;
1872                 }
1873
1874                 m = qrealloc(m, a+p+128, &size);
1875                 b = m + a;
1876                 pp = p;
1877                 p += safe_read(fd, b+p, size-p-1);
1878                 if (p < pp) {
1879                         p = 0;
1880                         r = 0;
1881                         setvar_i(intvar[ERRNO], errno);
1882                 }
1883                 b[p] = '\0';
1884
1885         } while (p > pp);
1886
1887         if (p == 0) {
1888                 r--;
1889         } else {
1890                 c = b[so]; b[so] = '\0';
1891                 setvar_s(v, b+rp);
1892                 v->type |= VF_USER;
1893                 b[so] = c;
1894                 c = b[eo]; b[eo] = '\0';
1895                 setvar_s(intvar[RT], b+so);
1896                 b[eo] = c;
1897         }
1898
1899         rsm->buffer = m;
1900         rsm->adv = a + eo;
1901         rsm->pos = p - eo;
1902         rsm->size = size;
1903
1904         return r;
1905 }
1906
1907 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1908 {
1909         int r = 0;
1910         char c;
1911         const char *s = format;
1912
1913         if (int_as_int && n == (int)n) {
1914                 r = snprintf(b, size, "%d", (int)n);
1915         } else {
1916                 do { c = *s; } while (c && *++s);
1917                 if (strchr("diouxX", c)) {
1918                         r = snprintf(b, size, format, (int)n);
1919                 } else if (strchr("eEfgG", c)) {
1920                         r = snprintf(b, size, format, n);
1921                 } else {
1922                         syntax_error(EMSG_INV_FMT);
1923                 }
1924         }
1925         return r;
1926 }
1927
1928 /* formatted output into an allocated buffer, return ptr to buffer */
1929 static char *awk_printf(node *n)
1930 {
1931         char *b = NULL;
1932         char *fmt, *s, *f;
1933         const char *s1;
1934         int i, j, incr, bsize;
1935         char c, c1;
1936         var *v, *arg;
1937
1938         v = nvalloc(1);
1939         fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1940
1941         i = 0;
1942         while (*f) {
1943                 s = f;
1944                 while (*f && (*f != '%' || *++f == '%'))
1945                         f++;
1946                 while (*f && !isalpha(*f)) {
1947                         if (*f == '*')
1948                                 syntax_error("%*x formats are not supported");
1949                         f++;
1950                 }
1951
1952                 incr = (f - s) + MAXVARFMT;
1953                 b = qrealloc(b, incr + i, &bsize);
1954                 c = *f;
1955                 if (c != '\0')
1956                         f++;
1957                 c1 = *f;
1958                 *f = '\0';
1959                 arg = evaluate(nextarg(&n), v);
1960
1961                 j = i;
1962                 if (c == 'c' || !c) {
1963                         i += sprintf(b+i, s, is_numeric(arg) ?
1964                                         (char)getvar_i(arg) : *getvar_s(arg));
1965                 } else if (c == 's') {
1966                         s1 = getvar_s(arg);
1967                         b = qrealloc(b, incr+i+strlen(s1), &bsize);
1968                         i += sprintf(b+i, s, s1);
1969                 } else {
1970                         i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1971                 }
1972                 *f = c1;
1973
1974                 /* if there was an error while sprintf, return value is negative */
1975                 if (i < j)
1976                         i = j;
1977         }
1978
1979         free(fmt);
1980         nvfree(v);
1981         b = xrealloc(b, i + 1);
1982         b[i] = '\0';
1983         return b;
1984 }
1985
1986 /* common substitution routine
1987  * replace (nm) substring of (src) that match (n) with (repl), store
1988  * result into (dest), return number of substitutions. If nm=0, replace
1989  * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1990  * subexpression matching (\1-\9)
1991  */
1992 static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int ex)
1993 {
1994         char *ds = NULL;
1995         const char *s;
1996         const char *sp;
1997         int c, i, j, di, rl, so, eo, nbs, n, dssize;
1998         regmatch_t pmatch[10];
1999         regex_t sreg, *re;
2000
2001         re = as_regex(rn, &sreg);
2002         if (!src)
2003                 src = intvar[F0];
2004         if (!dest)
2005                 dest = intvar[F0];
2006
2007         i = di = 0;
2008         sp = getvar_s(src);
2009         rl = strlen(repl);
2010         while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0 : REG_NOTBOL) == 0) {
2011                 so = pmatch[0].rm_so;
2012                 eo = pmatch[0].rm_eo;
2013
2014                 ds = qrealloc(ds, di + eo + rl, &dssize);
2015                 memcpy(ds + di, sp, eo);
2016                 di += eo;
2017                 if (++i >= nm) {
2018                         /* replace */
2019                         di -= (eo - so);
2020                         nbs = 0;
2021                         for (s = repl; *s; s++) {
2022                                 ds[di++] = c = *s;
2023                                 if (c == '\\') {
2024                                         nbs++;
2025                                         continue;
2026                                 }
2027                                 if (c == '&' || (ex && c >= '0' && c <= '9')) {
2028                                         di -= ((nbs + 3) >> 1);
2029                                         j = 0;
2030                                         if (c != '&') {
2031                                                 j = c - '0';
2032                                                 nbs++;
2033                                         }
2034                                         if (nbs % 2) {
2035                                                 ds[di++] = c;
2036                                         } else {
2037                                                 n = pmatch[j].rm_eo - pmatch[j].rm_so;
2038                                                 ds = qrealloc(ds, di + rl + n, &dssize);
2039                                                 memcpy(ds + di, sp + pmatch[j].rm_so, n);
2040                                                 di += n;
2041                                         }
2042                                 }
2043                                 nbs = 0;
2044                         }
2045                 }
2046
2047                 sp += eo;
2048                 if (i == nm)
2049                         break;
2050                 if (eo == so) {
2051                         ds[di] = *sp++;
2052                         if (!ds[di++])
2053                                 break;
2054                 }
2055         }
2056
2057         ds = qrealloc(ds, di + strlen(sp), &dssize);
2058         strcpy(ds + di, sp);
2059         setvar_p(dest, ds);
2060         if (re == &sreg)
2061                 regfree(re);
2062         return i;
2063 }
2064
2065 static NOINLINE int do_mktime(const char *ds)
2066 {
2067         struct tm then;
2068         int count;
2069
2070         /*memset(&then, 0, sizeof(then)); - not needed */
2071         then.tm_isdst = -1; /* default is unknown */
2072
2073         /* manpage of mktime says these fields are ints,
2074          * so we can sscanf stuff directly into them */
2075         count = sscanf(ds, "%u %u %u %u %u %u %d",
2076                 &then.tm_year, &then.tm_mon, &then.tm_mday,
2077                 &then.tm_hour, &then.tm_min, &then.tm_sec,
2078                 &then.tm_isdst);
2079
2080         if (count < 6
2081          || (unsigned)then.tm_mon < 1
2082          || (unsigned)then.tm_year < 1900
2083         ) {
2084                 return -1;
2085         }
2086
2087         then.tm_mon -= 1;
2088         then.tm_year -= 1900;
2089
2090         return mktime(&then);
2091 }
2092
2093 static NOINLINE var *exec_builtin(node *op, var *res)
2094 {
2095 #define tspl (G.exec_builtin__tspl)
2096
2097         var *tv;
2098         node *an[4];
2099         var *av[4];
2100         const char *as[4];
2101         regmatch_t pmatch[2];
2102         regex_t sreg, *re;
2103         node *spl;
2104         uint32_t isr, info;
2105         int nargs;
2106         time_t tt;
2107         int i, l, ll, n;
2108
2109         tv = nvalloc(4);
2110         isr = info = op->info;
2111         op = op->l.n;
2112
2113         av[2] = av[3] = NULL;
2114         for (i = 0; i < 4 && op; i++) {
2115                 an[i] = nextarg(&op);
2116                 if (isr & 0x09000000)
2117                         av[i] = evaluate(an[i], &tv[i]);
2118                 if (isr & 0x08000000)
2119                         as[i] = getvar_s(av[i]);
2120                 isr >>= 1;
2121         }
2122
2123         nargs = i;
2124         if ((uint32_t)nargs < (info >> 30))
2125                 syntax_error(EMSG_TOO_FEW_ARGS);
2126
2127         info &= OPNMASK;
2128         switch (info) {
2129
2130         case B_a2:
2131 #if ENABLE_FEATURE_AWK_LIBM
2132                 setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1])));
2133 #else
2134                 syntax_error(EMSG_NO_MATH);
2135 #endif
2136                 break;
2137
2138         case B_sp: {
2139                 char *s, *s1;
2140
2141                 if (nargs > 2) {
2142                         spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2143                                 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2144                 } else {
2145                         spl = &fsplitter.n;
2146                 }
2147
2148                 n = awk_split(as[0], spl, &s);
2149                 s1 = s;
2150                 clear_array(iamarray(av[1]));
2151                 for (i = 1; i <= n; i++)
2152                         setari_u(av[1], i, nextword(&s));
2153                 free(s1);
2154                 setvar_i(res, n);
2155                 break;
2156         }
2157
2158         case B_ss: {
2159                 char *s;
2160
2161                 l = strlen(as[0]);
2162                 i = getvar_i(av[1]) - 1;
2163                 if (i > l)
2164                         i = l;
2165                 if (i < 0)
2166                         i = 0;
2167                 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2168                 if (n < 0)
2169                         n = 0;
2170                 s = xstrndup(as[0]+i, n);
2171                 setvar_p(res, s);
2172                 break;
2173         }
2174
2175         /* Bitwise ops must assume that operands are unsigned. GNU Awk 3.1.5:
2176          * awk '{ print or(-1,1) }' gives "4.29497e+09", not "-2.xxxe+09" */
2177         case B_an:
2178                 setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1]));
2179                 break;
2180
2181         case B_co:
2182                 setvar_i(res, ~getvar_i_int(av[0]));
2183                 break;
2184
2185         case B_ls:
2186                 setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1]));
2187                 break;
2188
2189         case B_or:
2190                 setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1]));
2191                 break;
2192
2193         case B_rs:
2194                 setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1]));
2195                 break;
2196
2197         case B_xo:
2198                 setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1]));
2199                 break;
2200
2201         case B_lo:
2202         case B_up: {
2203                 char *s, *s1;
2204                 s1 = s = xstrdup(as[0]);
2205                 while (*s1) {
2206                         //*s1 = (info == B_up) ? toupper(*s1) : tolower(*s1);
2207                         if ((unsigned char)((*s1 | 0x20) - 'a') <= ('z' - 'a'))
2208                                 *s1 = (info == B_up) ? (*s1 & 0xdf) : (*s1 | 0x20);
2209                         s1++;
2210                 }
2211                 setvar_p(res, s);
2212                 break;
2213         }
2214
2215         case B_ix:
2216                 n = 0;
2217                 ll = strlen(as[1]);
2218                 l = strlen(as[0]) - ll;
2219                 if (ll > 0 && l >= 0) {
2220                         if (!icase) {
2221                                 char *s = strstr(as[0], as[1]);
2222                                 if (s)
2223                                         n = (s - as[0]) + 1;
2224                         } else {
2225                                 /* this piece of code is terribly slow and
2226                                  * really should be rewritten
2227                                  */
2228                                 for (i = 0; i <= l; i++) {
2229                                         if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2230                                                 n = i+1;
2231                                                 break;
2232                                         }
2233                                 }
2234                         }
2235                 }
2236                 setvar_i(res, n);
2237                 break;
2238
2239         case B_ti:
2240                 if (nargs > 1)
2241                         tt = getvar_i(av[1]);
2242                 else
2243                         time(&tt);
2244                 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2245                 i = strftime(g_buf, MAXVARFMT,
2246                         ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2247                         localtime(&tt));
2248                 g_buf[i] = '\0';
2249                 setvar_s(res, g_buf);
2250                 break;
2251
2252         case B_mt:
2253                 setvar_i(res, do_mktime(as[0]));
2254                 break;
2255
2256         case B_ma:
2257                 re = as_regex(an[1], &sreg);
2258                 n = regexec(re, as[0], 1, pmatch, 0);
2259                 if (n == 0) {
2260                         pmatch[0].rm_so++;
2261                         pmatch[0].rm_eo++;
2262                 } else {
2263                         pmatch[0].rm_so = 0;
2264                         pmatch[0].rm_eo = -1;
2265                 }
2266                 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2267                 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2268                 setvar_i(res, pmatch[0].rm_so);
2269                 if (re == &sreg)
2270                         regfree(re);
2271                 break;
2272
2273         case B_ge:
2274                 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2275                 break;
2276
2277         case B_gs:
2278                 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2279                 break;
2280
2281         case B_su:
2282                 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2283                 break;
2284         }
2285
2286         nvfree(tv);
2287         return res;
2288 #undef tspl
2289 }
2290
2291 /*
2292  * Evaluate node - the heart of the program. Supplied with subtree
2293  * and place where to store result. returns ptr to result.
2294  */
2295 #define XC(n) ((n) >> 8)
2296
2297 static var *evaluate(node *op, var *res)
2298 {
2299 /* This procedure is recursive so we should count every byte */
2300 #define fnargs (G.evaluate__fnargs)
2301 /* seed is initialized to 1 */
2302 #define seed   (G.evaluate__seed)
2303 #define sreg   (G.evaluate__sreg)
2304
2305         node *op1;
2306         var *v1;
2307         struct {
2308                 var *v;
2309                 const char *s;
2310         } L = L; /* for compiler */
2311         struct {
2312                 var *v;
2313                 const char *s;
2314         } R = R;
2315         double L_d = L_d;
2316         uint32_t opinfo;
2317         int opn;
2318
2319         if (!op)
2320                 return setvar_s(res, NULL);
2321
2322         v1 = nvalloc(2);
2323
2324         while (op) {
2325                 opinfo = op->info;
2326                 opn = (opinfo & OPNMASK);
2327                 g_lineno = op->lineno;
2328
2329                 /* execute inevitable things */
2330                 op1 = op->l.n;
2331                 if (opinfo & OF_RES1)
2332                         L.v = evaluate(op1, v1);
2333                 if (opinfo & OF_RES2)
2334                         R.v = evaluate(op->r.n, v1+1);
2335                 if (opinfo & OF_STR1)
2336                         L.s = getvar_s(L.v);
2337                 if (opinfo & OF_STR2)
2338                         R.s = getvar_s(R.v);
2339                 if (opinfo & OF_NUM1)
2340                         L_d = getvar_i(L.v);
2341
2342                 switch (XC(opinfo & OPCLSMASK)) {
2343
2344                 /* -- iterative node type -- */
2345
2346                 /* test pattern */
2347                 case XC( OC_TEST ):
2348                         if ((op1->info & OPCLSMASK) == OC_COMMA) {
2349                                 /* it's range pattern */
2350                                 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2351                                         op->info |= OF_CHECKED;
2352                                         if (ptest(op1->r.n))
2353                                                 op->info &= ~OF_CHECKED;
2354                                         op = op->a.n;
2355                                 } else {
2356                                         op = op->r.n;
2357                                 }
2358                         } else {
2359                                 op = ptest(op1) ? op->a.n : op->r.n;
2360                         }
2361                         break;
2362
2363                 /* just evaluate an expression, also used as unconditional jump */
2364                 case XC( OC_EXEC ):
2365                         break;
2366
2367                 /* branch, used in if-else and various loops */
2368                 case XC( OC_BR ):
2369                         op = istrue(L.v) ? op->a.n : op->r.n;
2370                         break;
2371
2372                 /* initialize for-in loop */
2373                 case XC( OC_WALKINIT ):
2374                         hashwalk_init(L.v, iamarray(R.v));
2375                         break;
2376
2377                 /* get next array item */
2378                 case XC( OC_WALKNEXT ):
2379                         op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2380                         break;
2381
2382                 case XC( OC_PRINT ):
2383                 case XC( OC_PRINTF ): {
2384                         FILE *F = stdout;
2385
2386                         if (op->r.n) {
2387                                 rstream *rsm = newfile(R.s);
2388                                 if (!rsm->F) {
2389                                         if (opn == '|') {
2390                                                 rsm->F = popen(R.s, "w");
2391                                                 if (rsm->F == NULL)
2392                                                         bb_perror_msg_and_die("popen");
2393                                                 rsm->is_pipe = 1;
2394                                         } else {
2395                                                 rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2396                                         }
2397                                 }
2398                                 F = rsm->F;
2399                         }
2400
2401                         if ((opinfo & OPCLSMASK) == OC_PRINT) {
2402                                 if (!op1) {
2403                                         fputs(getvar_s(intvar[F0]), F);
2404                                 } else {
2405                                         while (op1) {
2406                                                 var *v = evaluate(nextarg(&op1), v1);
2407                                                 if (v->type & VF_NUMBER) {
2408                                                         fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2409                                                                         getvar_i(v), TRUE);
2410                                                         fputs(g_buf, F);
2411                                                 } else {
2412                                                         fputs(getvar_s(v), F);
2413                                                 }
2414
2415                                                 if (op1)
2416                                                         fputs(getvar_s(intvar[OFS]), F);
2417                                         }
2418                                 }
2419                                 fputs(getvar_s(intvar[ORS]), F);
2420
2421                         } else {        /* OC_PRINTF */
2422                                 char *s = awk_printf(op1);
2423                                 fputs(s, F);
2424                                 free(s);
2425                         }
2426                         fflush(F);
2427                         break;
2428                 }
2429
2430                 case XC( OC_DELETE ): {
2431                         uint32_t info = op1->info & OPCLSMASK;
2432                         if (info == OC_VAR) {
2433                                 R.v = op1->l.v;
2434                         } else if (info == OC_FNARG) {
2435                                 R.v = &fnargs[op1->l.aidx];
2436                         } else {
2437                                 syntax_error(EMSG_NOT_ARRAY);
2438                         }
2439
2440                         if (op1->r.n) {
2441                                 clrvar(L.v);
2442                                 L.s = getvar_s(evaluate(op1->r.n, v1));
2443                                 hash_remove(iamarray(R.v), L.s);
2444                         } else {
2445                                 clear_array(iamarray(R.v));
2446                         }
2447                         break;
2448                 }
2449
2450                 case XC( OC_NEWSOURCE ):
2451                         g_progname = op->l.new_progname;
2452                         break;
2453
2454                 case XC( OC_RETURN ):
2455                         copyvar(res, L.v);
2456                         break;
2457
2458                 case XC( OC_NEXTFILE ):
2459                         nextfile = TRUE;
2460                 case XC( OC_NEXT ):
2461                         nextrec = TRUE;
2462                 case XC( OC_DONE ):
2463                         clrvar(res);
2464                         break;
2465
2466                 case XC( OC_EXIT ):
2467                         awk_exit(L_d);
2468
2469                 /* -- recursive node type -- */
2470
2471                 case XC( OC_VAR ):
2472                         L.v = op->l.v;
2473                         if (L.v == intvar[NF])
2474                                 split_f0();
2475                         goto v_cont;
2476
2477                 case XC( OC_FNARG ):
2478                         L.v = &fnargs[op->l.aidx];
2479  v_cont:
2480                         res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2481                         break;
2482
2483                 case XC( OC_IN ):
2484                         setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2485                         break;
2486
2487                 case XC( OC_REGEXP ):
2488                         op1 = op;
2489                         L.s = getvar_s(intvar[F0]);
2490                         goto re_cont;
2491
2492                 case XC( OC_MATCH ):
2493                         op1 = op->r.n;
2494  re_cont:
2495                         {
2496                                 regex_t *re = as_regex(op1, &sreg);
2497                                 int i = regexec(re, L.s, 0, NULL, 0);
2498                                 if (re == &sreg)
2499                                         regfree(re);
2500                                 setvar_i(res, (i == 0) ^ (opn == '!'));
2501                         }
2502                         break;
2503
2504                 case XC( OC_MOVE ):
2505                         /* if source is a temporary string, jusk relink it to dest */
2506 //Disabled: if R.v is numeric but happens to have cached R.v->string,
2507 //then L.v ends up being a string, which is wrong
2508 //                      if (R.v == v1+1 && R.v->string) {
2509 //                              res = setvar_p(L.v, R.v->string);
2510 //                              R.v->string = NULL;
2511 //                      } else {
2512                                 res = copyvar(L.v, R.v);
2513 //                      }
2514                         break;
2515
2516                 case XC( OC_TERNARY ):
2517                         if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2518                                 syntax_error(EMSG_POSSIBLE_ERROR);
2519                         res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2520                         break;
2521
2522                 case XC( OC_FUNC ): {
2523                         var *v;
2524
2525                         if (!op->r.f->body.first)
2526                                 syntax_error(EMSG_UNDEF_FUNC);
2527
2528                         v = R.v = nvalloc(op->r.f->nargs + 1);
2529                         while (op1) {
2530                                 L.v = evaluate(nextarg(&op1), v1);
2531                                 copyvar(R.v, L.v);
2532                                 R.v->type |= VF_CHILD;
2533                                 R.v->x.parent = L.v;
2534                                 if (++R.v - v >= op->r.f->nargs)
2535                                         break;
2536                         }
2537
2538                         R.v = fnargs;
2539                         fnargs = v;
2540
2541                         L.s = g_progname;
2542                         res = evaluate(op->r.f->body.first, res);
2543                         g_progname = L.s;
2544
2545                         nvfree(fnargs);
2546                         fnargs = R.v;
2547                         break;
2548                 }
2549
2550                 case XC( OC_GETLINE ):
2551                 case XC( OC_PGETLINE ): {
2552                         rstream *rsm;
2553                         int i;
2554
2555                         if (op1) {
2556                                 rsm = newfile(L.s);
2557                                 if (!rsm->F) {
2558                                         if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2559                                                 rsm->F = popen(L.s, "r");
2560                                                 rsm->is_pipe = TRUE;
2561                                         } else {
2562                                                 rsm->F = fopen_for_read(L.s);           /* not xfopen! */
2563                                         }
2564                                 }
2565                         } else {
2566                                 if (!iF)
2567                                         iF = next_input_file();
2568                                 rsm = iF;
2569                         }
2570
2571                         if (!rsm->F) {
2572                                 setvar_i(intvar[ERRNO], errno);
2573                                 setvar_i(res, -1);
2574                                 break;
2575                         }
2576
2577                         if (!op->r.n)
2578                                 R.v = intvar[F0];
2579
2580                         i = awk_getline(rsm, R.v);
2581                         if (i > 0 && !op1) {
2582                                 incvar(intvar[FNR]);
2583                                 incvar(intvar[NR]);
2584                         }
2585                         setvar_i(res, i);
2586                         break;
2587                 }
2588
2589                 /* simple builtins */
2590                 case XC( OC_FBLTIN ): {
2591                         int i;
2592                         rstream *rsm;
2593                         double R_d = R_d; /* for compiler */
2594
2595                         switch (opn) {
2596                         case F_in:
2597                                 R_d = (int)L_d;
2598                                 break;
2599
2600                         case F_rn:
2601                                 R_d = (double)rand() / (double)RAND_MAX;
2602                                 break;
2603 #if ENABLE_FEATURE_AWK_LIBM
2604                         case F_co:
2605                                 R_d = cos(L_d);
2606                                 break;
2607
2608                         case F_ex:
2609                                 R_d = exp(L_d);
2610                                 break;
2611
2612                         case F_lg:
2613                                 R_d = log(L_d);
2614                                 break;
2615
2616                         case F_si:
2617                                 R_d = sin(L_d);
2618                                 break;
2619
2620                         case F_sq:
2621                                 R_d = sqrt(L_d);
2622                                 break;
2623 #else
2624                         case F_co:
2625                         case F_ex:
2626                         case F_lg:
2627                         case F_si:
2628                         case F_sq:
2629                                 syntax_error(EMSG_NO_MATH);
2630                                 break;
2631 #endif
2632                         case F_sr:
2633                                 R_d = (double)seed;
2634                                 seed = op1 ? (unsigned)L_d : (unsigned)time(NULL);
2635                                 srand(seed);
2636                                 break;
2637
2638                         case F_ti:
2639                                 R_d = time(NULL);
2640                                 break;
2641
2642                         case F_le:
2643                                 if (!op1)
2644                                         L.s = getvar_s(intvar[F0]);
2645                                 R_d = strlen(L.s);
2646                                 break;
2647
2648                         case F_sy:
2649                                 fflush_all();
2650                                 R_d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2651                                                 ? (system(L.s) >> 8) : 0;
2652                                 break;
2653
2654                         case F_ff:
2655                                 if (!op1) {
2656                                         fflush(stdout);
2657                                 } else if (L.s && *L.s) {
2658                                         rsm = newfile(L.s);
2659                                         fflush(rsm->F);
2660                                 } else {
2661                                         fflush_all();
2662                                 }
2663                                 break;
2664
2665                         case F_cl:
2666                                 i = 0;
2667                                 rsm = (rstream *)hash_search(fdhash, L.s);
2668                                 if (rsm) {
2669                                         i = rsm->is_pipe ? pclose(rsm->F) : fclose(rsm->F);
2670                                         free(rsm->buffer);
2671                                         hash_remove(fdhash, L.s);
2672                                 }
2673                                 if (i != 0)
2674                                         setvar_i(intvar[ERRNO], errno);
2675                                 R_d = (double)i;
2676                                 break;
2677                         }
2678                         setvar_i(res, R_d);
2679                         break;
2680                 }
2681
2682                 case XC( OC_BUILTIN ):
2683                         res = exec_builtin(op, res);
2684                         break;
2685
2686                 case XC( OC_SPRINTF ):
2687                         setvar_p(res, awk_printf(op1));
2688                         break;
2689
2690                 case XC( OC_UNARY ): {
2691                         double Ld, R_d;
2692
2693                         Ld = R_d = getvar_i(R.v);
2694                         switch (opn) {
2695                         case 'P':
2696                                 Ld = ++R_d;
2697                                 goto r_op_change;
2698                         case 'p':
2699                                 R_d++;
2700                                 goto r_op_change;
2701                         case 'M':
2702                                 Ld = --R_d;
2703                                 goto r_op_change;
2704                         case 'm':
2705                                 R_d--;
2706  r_op_change:
2707                                 setvar_i(R.v, R_d);
2708                                 break;
2709                         case '!':
2710                                 Ld = !istrue(R.v);
2711                                 break;
2712                         case '-':
2713                                 Ld = -R_d;
2714                                 break;
2715                         }
2716                         setvar_i(res, Ld);
2717                         break;
2718                 }
2719
2720                 case XC( OC_FIELD ): {
2721                         int i = (int)getvar_i(R.v);
2722                         if (i == 0) {
2723                                 res = intvar[F0];
2724                         } else {
2725                                 split_f0();
2726                                 if (i > nfields)
2727                                         fsrealloc(i);
2728                                 res = &Fields[i - 1];
2729                         }
2730                         break;
2731                 }
2732
2733                 /* concatenation (" ") and index joining (",") */
2734                 case XC( OC_CONCAT ):
2735                 case XC( OC_COMMA ): {
2736                         const char *sep = "";
2737                         if ((opinfo & OPCLSMASK) == OC_COMMA)
2738                                 sep = getvar_s(intvar[SUBSEP]);
2739                         setvar_p(res, xasprintf("%s%s%s", L.s, sep, R.s));
2740                         break;
2741                 }
2742
2743                 case XC( OC_LAND ):
2744                         setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2745                         break;
2746
2747                 case XC( OC_LOR ):
2748                         setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2749                         break;
2750
2751                 case XC( OC_BINARY ):
2752                 case XC( OC_REPLACE ): {
2753                         double R_d = getvar_i(R.v);
2754                         switch (opn) {
2755                         case '+':
2756                                 L_d += R_d;
2757                                 break;
2758                         case '-':
2759                                 L_d -= R_d;
2760                                 break;
2761                         case '*':
2762                                 L_d *= R_d;
2763                                 break;
2764                         case '/':
2765                                 if (R_d == 0)
2766                                         syntax_error(EMSG_DIV_BY_ZERO);
2767                                 L_d /= R_d;
2768                                 break;
2769                         case '&':
2770 #if ENABLE_FEATURE_AWK_LIBM
2771                                 L_d = pow(L_d, R_d);
2772 #else
2773                                 syntax_error(EMSG_NO_MATH);
2774 #endif
2775                                 break;
2776                         case '%':
2777                                 if (R_d == 0)
2778                                         syntax_error(EMSG_DIV_BY_ZERO);
2779                                 L_d -= (int)(L_d / R_d) * R_d;
2780                                 break;
2781                         }
2782                         res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : L.v, L_d);
2783                         break;
2784                 }
2785
2786                 case XC( OC_COMPARE ): {
2787                         int i = i; /* for compiler */
2788                         double Ld;
2789
2790                         if (is_numeric(L.v) && is_numeric(R.v)) {
2791                                 Ld = getvar_i(L.v) - getvar_i(R.v);
2792                         } else {
2793                                 L.s = getvar_s(L.v);
2794                                 R.s = getvar_s(R.v);
2795                                 Ld = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2796                         }
2797                         switch (opn & 0xfe) {
2798                         case 0:
2799                                 i = (Ld > 0);
2800                                 break;
2801                         case 2:
2802                                 i = (Ld >= 0);
2803                                 break;
2804                         case 4:
2805                                 i = (Ld == 0);
2806                                 break;
2807                         }
2808                         setvar_i(res, (i == 0) ^ (opn & 1));
2809                         break;
2810                 }
2811
2812                 default:
2813                         syntax_error(EMSG_POSSIBLE_ERROR);
2814                 }
2815                 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2816                         op = op->a.n;
2817                 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2818                         break;
2819                 if (nextrec)
2820                         break;
2821         } /* while (op) */
2822
2823         nvfree(v1);
2824         return res;
2825 #undef fnargs
2826 #undef seed
2827 #undef sreg
2828 }
2829
2830
2831 /* -------- main & co. -------- */
2832
2833 static int awk_exit(int r)
2834 {
2835         var tv;
2836         unsigned i;
2837         hash_item *hi;
2838
2839         zero_out_var(&tv);
2840
2841         if (!exiting) {
2842                 exiting = TRUE;
2843                 nextrec = FALSE;
2844                 evaluate(endseq.first, &tv);
2845         }
2846
2847         /* waiting for children */
2848         for (i = 0; i < fdhash->csize; i++) {
2849                 hi = fdhash->items[i];
2850                 while (hi) {
2851                         if (hi->data.rs.F && hi->data.rs.is_pipe)
2852                                 pclose(hi->data.rs.F);
2853                         hi = hi->next;
2854                 }
2855         }
2856
2857         exit(r);
2858 }
2859
2860 /* if expr looks like "var=value", perform assignment and return 1,
2861  * otherwise return 0 */
2862 static int is_assignment(const char *expr)
2863 {
2864         char *exprc, *s, *s0, *s1;
2865
2866         exprc = xstrdup(expr);
2867         if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2868                 free(exprc);
2869                 return FALSE;
2870         }
2871
2872         *s++ = '\0';
2873         s0 = s1 = s;
2874         while (*s)
2875                 *s1++ = nextchar(&s);
2876
2877         *s1 = '\0';
2878         setvar_u(newvar(exprc), s0);
2879         free(exprc);
2880         return TRUE;
2881 }
2882
2883 /* switch to next input file */
2884 static rstream *next_input_file(void)
2885 {
2886 #define rsm          (G.next_input_file__rsm)
2887 #define files_happen (G.next_input_file__files_happen)
2888
2889         FILE *F = NULL;
2890         const char *fname, *ind;
2891
2892         if (rsm.F)
2893                 fclose(rsm.F);
2894         rsm.F = NULL;
2895         rsm.pos = rsm.adv = 0;
2896
2897         do {
2898                 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
2899                         if (files_happen)
2900                                 return NULL;
2901                         fname = "-";
2902                         F = stdin;
2903                 } else {
2904                         ind = getvar_s(incvar(intvar[ARGIND]));
2905                         fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
2906                         if (fname && *fname && !is_assignment(fname))
2907                                 F = xfopen_stdin(fname);
2908                 }
2909         } while (!F);
2910
2911         files_happen = TRUE;
2912         setvar_s(intvar[FILENAME], fname);
2913         rsm.F = F;
2914         return &rsm;
2915 #undef rsm
2916 #undef files_happen
2917 }
2918
2919 int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
2920 int awk_main(int argc, char **argv)
2921 {
2922         unsigned opt;
2923         char *opt_F, *opt_W;
2924         llist_t *list_v = NULL;
2925         llist_t *list_f = NULL;
2926         int i, j;
2927         var *v;
2928         var tv;
2929         char **envp;
2930         char *vnames = (char *)vNames; /* cheat */
2931         char *vvalues = (char *)vValues;
2932
2933         INIT_G();
2934
2935         /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
2936          * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
2937         if (ENABLE_LOCALE_SUPPORT)
2938                 setlocale(LC_NUMERIC, "C");
2939
2940         zero_out_var(&tv);
2941
2942         /* allocate global buffer */
2943         g_buf = xmalloc(MAXVARFMT + 1);
2944
2945         vhash = hash_init();
2946         ahash = hash_init();
2947         fdhash = hash_init();
2948         fnhash = hash_init();
2949
2950         /* initialize variables */
2951         for (i = 0; *vnames; i++) {
2952                 intvar[i] = v = newvar(nextword(&vnames));
2953                 if (*vvalues != '\377')
2954                         setvar_s(v, nextword(&vvalues));
2955                 else
2956                         setvar_i(v, 0);
2957
2958                 if (*vnames == '*') {
2959                         v->type |= VF_SPECIAL;
2960                         vnames++;
2961                 }
2962         }
2963
2964         handle_special(intvar[FS]);
2965         handle_special(intvar[RS]);
2966
2967         newfile("/dev/stdin")->F = stdin;
2968         newfile("/dev/stdout")->F = stdout;
2969         newfile("/dev/stderr")->F = stderr;
2970
2971         /* Huh, people report that sometimes environ is NULL. Oh well. */
2972         if (environ) for (envp = environ; *envp; envp++) {
2973                 /* environ is writable, thus we don't strdup it needlessly */
2974                 char *s = *envp;
2975                 char *s1 = strchr(s, '=');
2976                 if (s1) {
2977                         *s1 = '\0';
2978                         /* Both findvar and setvar_u take const char*
2979                          * as 2nd arg -> environment is not trashed */
2980                         setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
2981                         *s1 = '=';
2982                 }
2983         }
2984         opt_complementary = "v::f::"; /* -v and -f can occur multiple times */
2985         opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, &opt_W);
2986         argv += optind;
2987         argc -= optind;
2988         if (opt & 0x1)
2989                 setvar_s(intvar[FS], opt_F); // -F
2990         while (list_v) { /* -v */
2991                 if (!is_assignment(llist_pop(&list_v)))
2992                         bb_show_usage();
2993         }
2994         if (list_f) { /* -f */
2995                 do {
2996                         char *s = NULL;
2997                         FILE *from_file;
2998
2999                         g_progname = llist_pop(&list_f);
3000                         from_file = xfopen_stdin(g_progname);
3001                         /* one byte is reserved for some trick in next_token */
3002                         for (i = j = 1; j > 0; i += j) {
3003                                 s = xrealloc(s, i + 4096);
3004                                 j = fread(s + i, 1, 4094, from_file);
3005                         }
3006                         s[i] = '\0';
3007                         fclose(from_file);
3008                         parse_program(s + 1);
3009                         free(s);
3010                 } while (list_f);
3011                 argc++;
3012         } else { // no -f: take program from 1st parameter
3013                 if (!argc)
3014                         bb_show_usage();
3015                 g_progname = "cmd. line";
3016                 parse_program(*argv++);
3017         }
3018         if (opt & 0x8) // -W
3019                 bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
3020
3021         /* fill in ARGV array */
3022         setvar_i(intvar[ARGC], argc);
3023         setari_u(intvar[ARGV], 0, "awk");
3024         i = 0;
3025         while (*argv)
3026                 setari_u(intvar[ARGV], ++i, *argv++);
3027
3028         evaluate(beginseq.first, &tv);
3029         if (!mainseq.first && !endseq.first)
3030                 awk_exit(EXIT_SUCCESS);
3031
3032         /* input file could already be opened in BEGIN block */
3033         if (!iF)
3034                 iF = next_input_file();
3035
3036         /* passing through input files */
3037         while (iF) {
3038                 nextfile = FALSE;
3039                 setvar_i(intvar[FNR], 0);
3040
3041                 while ((i = awk_getline(iF, intvar[F0])) > 0) {
3042                         nextrec = FALSE;
3043                         incvar(intvar[NR]);
3044                         incvar(intvar[FNR]);
3045                         evaluate(mainseq.first, &tv);
3046
3047                         if (nextfile)
3048                                 break;
3049                 }
3050
3051                 if (i < 0)
3052                         syntax_error(strerror(errno));
3053
3054                 iF = next_input_file();
3055         }
3056
3057         awk_exit(EXIT_SUCCESS);
3058         /*return 0;*/
3059 }