awk: code shrink; style fixes
[oweals/busybox.git] / editors / awk.c
1 /* vi: set sw=4 ts=4: */
2 /*
3  * awk implementation for busybox
4  *
5  * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
6  *
7  * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
8  */
9
10 #include "libbb.h"
11 #include "xregex.h"
12 #include <math.h>
13
14 /* This is a NOEXEC applet. Be very careful! */
15
16
17 /* If you comment out one of these below, it will be #defined later
18  * to perform debug printfs to stderr: */
19 #define debug_printf_walker(...)  do {} while (0)
20
21 #ifndef debug_printf_walker
22 # define debug_printf_walker(...) (fprintf(stderr, __VA_ARGS__))
23 #endif
24
25
26
27 #define MAXVARFMT       240
28 #define MINNVBLOCK      64
29
30 /* variable flags */
31 #define VF_NUMBER       0x0001  /* 1 = primary type is number */
32 #define VF_ARRAY        0x0002  /* 1 = it's an array */
33
34 #define VF_CACHED       0x0100  /* 1 = num/str value has cached str/num eq */
35 #define VF_USER         0x0200  /* 1 = user input (may be numeric string) */
36 #define VF_SPECIAL      0x0400  /* 1 = requires extra handling when changed */
37 #define VF_WALK         0x0800  /* 1 = variable has alloc'd x.walker list */
38 #define VF_FSTR         0x1000  /* 1 = var::string points to fstring buffer */
39 #define VF_CHILD        0x2000  /* 1 = function arg; x.parent points to source */
40 #define VF_DIRTY        0x4000  /* 1 = variable was set explicitly */
41
42 /* these flags are static, don't change them when value is changed */
43 #define VF_DONTTOUCH    (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
44
45 typedef struct walker_list {
46         char *end;
47         char *cur;
48         struct walker_list *prev;
49         char wbuf[1];
50 } walker_list;
51
52 /* Variable */
53 typedef struct var_s {
54         unsigned type;            /* flags */
55         double number;
56         char *string;
57         union {
58                 int aidx;               /* func arg idx (for compilation stage) */
59                 struct xhash_s *array;  /* array ptr */
60                 struct var_s *parent;   /* for func args, ptr to actual parameter */
61                 walker_list *walker;    /* list of array elements (for..in) */
62         } x;
63 } var;
64
65 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
66 typedef struct chain_s {
67         struct node_s *first;
68         struct node_s *last;
69         const char *programname;
70 } chain;
71
72 /* Function */
73 typedef struct func_s {
74         unsigned nargs;
75         struct chain_s body;
76 } func;
77
78 /* I/O stream */
79 typedef struct rstream_s {
80         FILE *F;
81         char *buffer;
82         int adv;
83         int size;
84         int pos;
85         smallint is_pipe;
86 } rstream;
87
88 typedef struct hash_item_s {
89         union {
90                 struct var_s v;         /* variable/array hash */
91                 struct rstream_s rs;    /* redirect streams hash */
92                 struct func_s f;        /* functions hash */
93         } data;
94         struct hash_item_s *next;       /* next in chain */
95         char name[1];                   /* really it's longer */
96 } hash_item;
97
98 typedef struct xhash_s {
99         unsigned nel;           /* num of elements */
100         unsigned csize;         /* current hash size */
101         unsigned nprime;        /* next hash size in PRIMES[] */
102         unsigned glen;          /* summary length of item names */
103         struct hash_item_s **items;
104 } xhash;
105
106 /* Tree node */
107 typedef struct node_s {
108         uint32_t info;
109         unsigned lineno;
110         union {
111                 struct node_s *n;
112                 var *v;
113                 int i;
114                 char *s;
115                 regex_t *re;
116         } l;
117         union {
118                 struct node_s *n;
119                 regex_t *ire;
120                 func *f;
121                 int argno;
122         } r;
123         union {
124                 struct node_s *n;
125         } a;
126 } node;
127
128 /* Block of temporary variables */
129 typedef struct nvblock_s {
130         int size;
131         var *pos;
132         struct nvblock_s *prev;
133         struct nvblock_s *next;
134         var nv[];
135 } nvblock;
136
137 typedef struct tsplitter_s {
138         node n;
139         regex_t re[2];
140 } tsplitter;
141
142 /* simple token classes */
143 /* Order and hex values are very important!!!  See next_token() */
144 #define TC_SEQSTART      1                              /* ( */
145 #define TC_SEQTERM      (1 << 1)                /* ) */
146 #define TC_REGEXP       (1 << 2)                /* /.../ */
147 #define TC_OUTRDR       (1 << 3)                /* | > >> */
148 #define TC_UOPPOST      (1 << 4)                /* unary postfix operator */
149 #define TC_UOPPRE1      (1 << 5)                /* unary prefix operator */
150 #define TC_BINOPX       (1 << 6)                /* two-opnd operator */
151 #define TC_IN           (1 << 7)
152 #define TC_COMMA        (1 << 8)
153 #define TC_PIPE         (1 << 9)                /* input redirection pipe */
154 #define TC_UOPPRE2      (1 << 10)               /* unary prefix operator */
155 #define TC_ARRTERM      (1 << 11)               /* ] */
156 #define TC_GRPSTART     (1 << 12)               /* { */
157 #define TC_GRPTERM      (1 << 13)               /* } */
158 #define TC_SEMICOL      (1 << 14)
159 #define TC_NEWLINE      (1 << 15)
160 #define TC_STATX        (1 << 16)               /* ctl statement (for, next...) */
161 #define TC_WHILE        (1 << 17)
162 #define TC_ELSE         (1 << 18)
163 #define TC_BUILTIN      (1 << 19)
164 #define TC_GETLINE      (1 << 20)
165 #define TC_FUNCDECL     (1 << 21)               /* `function' `func' */
166 #define TC_BEGIN        (1 << 22)
167 #define TC_END          (1 << 23)
168 #define TC_EOF          (1 << 24)
169 #define TC_VARIABLE     (1 << 25)
170 #define TC_ARRAY        (1 << 26)
171 #define TC_FUNCTION     (1 << 27)
172 #define TC_STRING       (1 << 28)
173 #define TC_NUMBER       (1 << 29)
174
175 #define TC_UOPPRE  (TC_UOPPRE1 | TC_UOPPRE2)
176
177 /* combined token classes */
178 #define TC_BINOP   (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
179 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
180 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
181                    | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
182
183 #define TC_STATEMNT (TC_STATX | TC_WHILE)
184 #define TC_OPTERM  (TC_SEMICOL | TC_NEWLINE)
185
186 /* word tokens, cannot mean something else if not expected */
187 #define TC_WORD    (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
188                    | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
189
190 /* discard newlines after these */
191 #define TC_NOTERM  (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
192                    | TC_BINOP | TC_OPTERM)
193
194 /* what can expression begin with */
195 #define TC_OPSEQ   (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
196 /* what can group begin with */
197 #define TC_GRPSEQ  (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
198
199 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
200 /* operator is inserted between them */
201 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
202                    | TC_STRING | TC_NUMBER | TC_UOPPOST)
203 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
204
205 #define OF_RES1    0x010000
206 #define OF_RES2    0x020000
207 #define OF_STR1    0x040000
208 #define OF_STR2    0x080000
209 #define OF_NUM1    0x100000
210 #define OF_CHECKED 0x200000
211
212 /* combined operator flags */
213 #define xx      0
214 #define xV      OF_RES2
215 #define xS      (OF_RES2 | OF_STR2)
216 #define Vx      OF_RES1
217 #define VV      (OF_RES1 | OF_RES2)
218 #define Nx      (OF_RES1 | OF_NUM1)
219 #define NV      (OF_RES1 | OF_NUM1 | OF_RES2)
220 #define Sx      (OF_RES1 | OF_STR1)
221 #define SV      (OF_RES1 | OF_STR1 | OF_RES2)
222 #define SS      (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
223
224 #define OPCLSMASK 0xFF00
225 #define OPNMASK   0x007F
226
227 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
228  * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
229  * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
230  */
231 #define P(x)      (x << 24)
232 #define PRIMASK   0x7F000000
233 #define PRIMASK2  0x7E000000
234
235 /* Operation classes */
236
237 #define SHIFT_TIL_THIS  0x0600
238 #define RECUR_FROM_THIS 0x1000
239
240 enum {
241         OC_DELETE = 0x0100,     OC_EXEC = 0x0200,       OC_NEWSOURCE = 0x0300,
242         OC_PRINT = 0x0400,      OC_PRINTF = 0x0500,     OC_WALKINIT = 0x0600,
243
244         OC_BR = 0x0700,         OC_BREAK = 0x0800,      OC_CONTINUE = 0x0900,
245         OC_EXIT = 0x0a00,       OC_NEXT = 0x0b00,       OC_NEXTFILE = 0x0c00,
246         OC_TEST = 0x0d00,       OC_WALKNEXT = 0x0e00,
247
248         OC_BINARY = 0x1000,     OC_BUILTIN = 0x1100,    OC_COLON = 0x1200,
249         OC_COMMA = 0x1300,      OC_COMPARE = 0x1400,    OC_CONCAT = 0x1500,
250         OC_FBLTIN = 0x1600,     OC_FIELD = 0x1700,      OC_FNARG = 0x1800,
251         OC_FUNC = 0x1900,       OC_GETLINE = 0x1a00,    OC_IN = 0x1b00,
252         OC_LAND = 0x1c00,       OC_LOR = 0x1d00,        OC_MATCH = 0x1e00,
253         OC_MOVE = 0x1f00,       OC_PGETLINE = 0x2000,   OC_REGEXP = 0x2100,
254         OC_REPLACE = 0x2200,    OC_RETURN = 0x2300,     OC_SPRINTF = 0x2400,
255         OC_TERNARY = 0x2500,    OC_UNARY = 0x2600,      OC_VAR = 0x2700,
256         OC_DONE = 0x2800,
257
258         ST_IF = 0x3000,         ST_DO = 0x3100,         ST_FOR = 0x3200,
259         ST_WHILE = 0x3300
260 };
261
262 /* simple builtins */
263 enum {
264         F_in,   F_rn,   F_co,   F_ex,   F_lg,   F_si,   F_sq,   F_sr,
265         F_ti,   F_le,   F_sy,   F_ff,   F_cl
266 };
267
268 /* builtins */
269 enum {
270         B_a2,   B_ix,   B_ma,   B_sp,   B_ss,   B_ti,   B_mt,   B_lo,   B_up,
271         B_ge,   B_gs,   B_su,
272         B_an,   B_co,   B_ls,   B_or,   B_rs,   B_xo,
273 };
274
275 /* tokens and their corresponding info values */
276
277 #define NTC     "\377"  /* switch to next token class (tc<<1) */
278 #define NTCC    '\377'
279
280 #define OC_B    OC_BUILTIN
281
282 static const char tokenlist[] ALIGN1 =
283         "\1("       NTC
284         "\1)"       NTC
285         "\1/"       NTC                                 /* REGEXP */
286         "\2>>"      "\1>"       "\1|"       NTC         /* OUTRDR */
287         "\2++"      "\2--"      NTC                     /* UOPPOST */
288         "\2++"      "\2--"      "\1$"       NTC         /* UOPPRE1 */
289         "\2=="      "\1="       "\2+="      "\2-="      /* BINOPX */
290         "\2*="      "\2/="      "\2%="      "\2^="
291         "\1+"       "\1-"       "\3**="     "\2**"
292         "\1/"       "\1%"       "\1^"       "\1*"
293         "\2!="      "\2>="      "\2<="      "\1>"
294         "\1<"       "\2!~"      "\1~"       "\2&&"
295         "\2||"      "\1?"       "\1:"       NTC
296         "\2in"      NTC
297         "\1,"       NTC
298         "\1|"       NTC
299         "\1+"       "\1-"       "\1!"       NTC         /* UOPPRE2 */
300         "\1]"       NTC
301         "\1{"       NTC
302         "\1}"       NTC
303         "\1;"       NTC
304         "\1\n"      NTC
305         "\2if"      "\2do"      "\3for"     "\5break"   /* STATX */
306         "\10continue"           "\6delete"  "\5print"
307         "\6printf"  "\4next"    "\10nextfile"
308         "\6return"  "\4exit"    NTC
309         "\5while"   NTC
310         "\4else"    NTC
311
312         "\3and"     "\5compl"   "\6lshift"  "\2or"
313         "\6rshift"  "\3xor"
314         "\5close"   "\6system"  "\6fflush"  "\5atan2"   /* BUILTIN */
315         "\3cos"     "\3exp"     "\3int"     "\3log"
316         "\4rand"    "\3sin"     "\4sqrt"    "\5srand"
317         "\6gensub"  "\4gsub"    "\5index"   "\6length"
318         "\5match"   "\5split"   "\7sprintf" "\3sub"
319         "\6substr"  "\7systime" "\10strftime" "\6mktime"
320         "\7tolower" "\7toupper" NTC
321         "\7getline" NTC
322         "\4func"    "\10function"   NTC
323         "\5BEGIN"   NTC
324         "\3END"     "\0"
325         ;
326
327 static const uint32_t tokeninfo[] = {
328         0,
329         0,
330         OC_REGEXP,
331         xS|'a',     xS|'w',     xS|'|',
332         OC_UNARY|xV|P(9)|'p',       OC_UNARY|xV|P(9)|'m',
333         OC_UNARY|xV|P(9)|'P',       OC_UNARY|xV|P(9)|'M',
334             OC_FIELD|xV|P(5),
335         OC_COMPARE|VV|P(39)|5,      OC_MOVE|VV|P(74),
336             OC_REPLACE|NV|P(74)|'+',    OC_REPLACE|NV|P(74)|'-',
337         OC_REPLACE|NV|P(74)|'*',    OC_REPLACE|NV|P(74)|'/',
338             OC_REPLACE|NV|P(74)|'%',    OC_REPLACE|NV|P(74)|'&',
339         OC_BINARY|NV|P(29)|'+',     OC_BINARY|NV|P(29)|'-',
340             OC_REPLACE|NV|P(74)|'&',    OC_BINARY|NV|P(15)|'&',
341         OC_BINARY|NV|P(25)|'/',     OC_BINARY|NV|P(25)|'%',
342             OC_BINARY|NV|P(15)|'&',     OC_BINARY|NV|P(25)|'*',
343         OC_COMPARE|VV|P(39)|4,      OC_COMPARE|VV|P(39)|3,
344             OC_COMPARE|VV|P(39)|0,      OC_COMPARE|VV|P(39)|1,
345         OC_COMPARE|VV|P(39)|2,      OC_MATCH|Sx|P(45)|'!',
346             OC_MATCH|Sx|P(45)|'~',      OC_LAND|Vx|P(55),
347         OC_LOR|Vx|P(59),            OC_TERNARY|Vx|P(64)|'?',
348             OC_COLON|xx|P(67)|':',
349         OC_IN|SV|P(49),
350         OC_COMMA|SS|P(80),
351         OC_PGETLINE|SV|P(37),
352         OC_UNARY|xV|P(19)|'+',      OC_UNARY|xV|P(19)|'-',
353             OC_UNARY|xV|P(19)|'!',
354         0,
355         0,
356         0,
357         0,
358         0,
359         ST_IF,          ST_DO,          ST_FOR,         OC_BREAK,
360         OC_CONTINUE,                    OC_DELETE|Vx,   OC_PRINT,
361         OC_PRINTF,      OC_NEXT,        OC_NEXTFILE,
362         OC_RETURN|Vx,   OC_EXIT|Nx,
363         ST_WHILE,
364         0,
365
366         OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
367         OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
368         OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
369         OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
370         OC_FBLTIN|F_rn,    OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
371         OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
372         OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF,        OC_B|B_su|P(0xb6),
373         OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti,    OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b),
374         OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
375         OC_GETLINE|SV|P(0),
376         0,      0,
377         0,
378         0
379 };
380
381 /* internal variable names and their initial values       */
382 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
383 enum {
384         CONVFMT,    OFMT,       FS,         OFS,
385         ORS,        RS,         RT,         FILENAME,
386         SUBSEP,     F0,         ARGIND,     ARGC,
387         ARGV,       ERRNO,      FNR,        NR,
388         NF,         IGNORECASE, ENVIRON,    NUM_INTERNAL_VARS
389 };
390
391 static const char vNames[] ALIGN1 =
392         "CONVFMT\0" "OFMT\0"    "FS\0*"     "OFS\0"
393         "ORS\0"     "RS\0*"     "RT\0"      "FILENAME\0"
394         "SUBSEP\0"  "$\0*"      "ARGIND\0"  "ARGC\0"
395         "ARGV\0"    "ERRNO\0"   "FNR\0"     "NR\0"
396         "NF\0*"     "IGNORECASE\0*" "ENVIRON\0" "\0";
397
398 static const char vValues[] ALIGN1 =
399         "%.6g\0"    "%.6g\0"    " \0"       " \0"
400         "\n\0"      "\n\0"      "\0"        "\0"
401         "\034\0"    "\0"        "\377";
402
403 /* hash size may grow to these values */
404 #define FIRST_PRIME 61
405 static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
406
407
408 /* Globals. Split in two parts so that first one is addressed
409  * with (mostly short) negative offsets.
410  * NB: it's unsafe to put members of type "double"
411  * into globals2 (gcc may fail to align them).
412  */
413 struct globals {
414         double t_double;
415         chain beginseq, mainseq, endseq;
416         chain *seq;
417         node *break_ptr, *continue_ptr;
418         rstream *iF;
419         xhash *vhash, *ahash, *fdhash, *fnhash;
420         const char *g_progname;
421         int g_lineno;
422         int nfields;
423         int maxfields; /* used in fsrealloc() only */
424         var *Fields;
425         nvblock *g_cb;
426         char *g_pos;
427         char *g_buf;
428         smallint icase;
429         smallint exiting;
430         smallint nextrec;
431         smallint nextfile;
432         smallint is_f0_split;
433 };
434 struct globals2 {
435         uint32_t t_info; /* often used */
436         uint32_t t_tclass;
437         char *t_string;
438         int t_lineno;
439         int t_rollback;
440
441         var *intvar[NUM_INTERNAL_VARS]; /* often used */
442
443         /* former statics from various functions */
444         char *split_f0__fstrings;
445
446         uint32_t next_token__save_tclass;
447         uint32_t next_token__save_info;
448         uint32_t next_token__ltclass;
449         smallint next_token__concat_inserted;
450
451         smallint next_input_file__files_happen;
452         rstream next_input_file__rsm;
453
454         var *evaluate__fnargs;
455         unsigned evaluate__seed;
456         regex_t evaluate__sreg;
457
458         var ptest__v;
459
460         tsplitter exec_builtin__tspl;
461
462         /* biggest and least used members go last */
463         tsplitter fsplitter, rsplitter;
464 };
465 #define G1 (ptr_to_globals[-1])
466 #define G (*(struct globals2 *)ptr_to_globals)
467 /* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
468 /*char G1size[sizeof(G1)]; - 0x74 */
469 /*char Gsize[sizeof(G)]; - 0x1c4 */
470 /* Trying to keep most of members accessible with short offsets: */
471 /*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
472 #define t_double     (G1.t_double    )
473 #define beginseq     (G1.beginseq    )
474 #define mainseq      (G1.mainseq     )
475 #define endseq       (G1.endseq      )
476 #define seq          (G1.seq         )
477 #define break_ptr    (G1.break_ptr   )
478 #define continue_ptr (G1.continue_ptr)
479 #define iF           (G1.iF          )
480 #define vhash        (G1.vhash       )
481 #define ahash        (G1.ahash       )
482 #define fdhash       (G1.fdhash      )
483 #define fnhash       (G1.fnhash      )
484 #define g_progname   (G1.g_progname  )
485 #define g_lineno     (G1.g_lineno    )
486 #define nfields      (G1.nfields     )
487 #define maxfields    (G1.maxfields   )
488 #define Fields       (G1.Fields      )
489 #define g_cb         (G1.g_cb        )
490 #define g_pos        (G1.g_pos       )
491 #define g_buf        (G1.g_buf       )
492 #define icase        (G1.icase       )
493 #define exiting      (G1.exiting     )
494 #define nextrec      (G1.nextrec     )
495 #define nextfile     (G1.nextfile    )
496 #define is_f0_split  (G1.is_f0_split )
497 #define t_info       (G.t_info      )
498 #define t_tclass     (G.t_tclass    )
499 #define t_string     (G.t_string    )
500 #define t_lineno     (G.t_lineno    )
501 #define t_rollback   (G.t_rollback  )
502 #define intvar       (G.intvar      )
503 #define fsplitter    (G.fsplitter   )
504 #define rsplitter    (G.rsplitter   )
505 #define INIT_G() do { \
506         SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
507         G.next_token__ltclass = TC_OPTERM; \
508         G.evaluate__seed = 1; \
509 } while (0)
510
511
512 /* function prototypes */
513 static void handle_special(var *);
514 static node *parse_expr(uint32_t);
515 static void chain_group(void);
516 static var *evaluate(node *, var *);
517 static rstream *next_input_file(void);
518 static int fmt_num(char *, int, const char *, double, int);
519 static int awk_exit(int) NORETURN;
520
521 /* ---- error handling ---- */
522
523 static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
524 static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
525 static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
526 static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
527 static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
528 static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin";
529 static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
530 static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
531 static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
532 #if !ENABLE_FEATURE_AWK_LIBM
533 static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
534 #endif
535
536 static void zero_out_var(var *vp)
537 {
538         memset(vp, 0, sizeof(*vp));
539 }
540
541 static void syntax_error(const char *message) NORETURN;
542 static void syntax_error(const char *message)
543 {
544         bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
545 }
546
547 /* ---- hash stuff ---- */
548
549 static unsigned hashidx(const char *name)
550 {
551         unsigned idx = 0;
552
553         while (*name)
554                 idx = *name++ + (idx << 6) - idx;
555         return idx;
556 }
557
558 /* create new hash */
559 static xhash *hash_init(void)
560 {
561         xhash *newhash;
562
563         newhash = xzalloc(sizeof(*newhash));
564         newhash->csize = FIRST_PRIME;
565         newhash->items = xzalloc(FIRST_PRIME * sizeof(newhash->items[0]));
566
567         return newhash;
568 }
569
570 /* find item in hash, return ptr to data, NULL if not found */
571 static void *hash_search(xhash *hash, const char *name)
572 {
573         hash_item *hi;
574
575         hi = hash->items[hashidx(name) % hash->csize];
576         while (hi) {
577                 if (strcmp(hi->name, name) == 0)
578                         return &(hi->data);
579                 hi = hi->next;
580         }
581         return NULL;
582 }
583
584 /* grow hash if it becomes too big */
585 static void hash_rebuild(xhash *hash)
586 {
587         unsigned newsize, i, idx;
588         hash_item **newitems, *hi, *thi;
589
590         if (hash->nprime == ARRAY_SIZE(PRIMES))
591                 return;
592
593         newsize = PRIMES[hash->nprime++];
594         newitems = xzalloc(newsize * sizeof(newitems[0]));
595
596         for (i = 0; i < hash->csize; i++) {
597                 hi = hash->items[i];
598                 while (hi) {
599                         thi = hi;
600                         hi = thi->next;
601                         idx = hashidx(thi->name) % newsize;
602                         thi->next = newitems[idx];
603                         newitems[idx] = thi;
604                 }
605         }
606
607         free(hash->items);
608         hash->csize = newsize;
609         hash->items = newitems;
610 }
611
612 /* find item in hash, add it if necessary. Return ptr to data */
613 static void *hash_find(xhash *hash, const char *name)
614 {
615         hash_item *hi;
616         unsigned idx;
617         int l;
618
619         hi = hash_search(hash, name);
620         if (!hi) {
621                 if (++hash->nel / hash->csize > 10)
622                         hash_rebuild(hash);
623
624                 l = strlen(name) + 1;
625                 hi = xzalloc(sizeof(*hi) + l);
626                 strcpy(hi->name, name);
627
628                 idx = hashidx(name) % hash->csize;
629                 hi->next = hash->items[idx];
630                 hash->items[idx] = hi;
631                 hash->glen += l;
632         }
633         return &(hi->data);
634 }
635
636 #define findvar(hash, name) ((var*)    hash_find((hash), (name)))
637 #define newvar(name)        ((var*)    hash_find(vhash, (name)))
638 #define newfile(name)       ((rstream*)hash_find(fdhash, (name)))
639 #define newfunc(name)       ((func*)   hash_find(fnhash, (name)))
640
641 static void hash_remove(xhash *hash, const char *name)
642 {
643         hash_item *hi, **phi;
644
645         phi = &(hash->items[hashidx(name) % hash->csize]);
646         while (*phi) {
647                 hi = *phi;
648                 if (strcmp(hi->name, name) == 0) {
649                         hash->glen -= (strlen(name) + 1);
650                         hash->nel--;
651                         *phi = hi->next;
652                         free(hi);
653                         break;
654                 }
655                 phi = &(hi->next);
656         }
657 }
658
659 /* ------ some useful functions ------ */
660
661 static void skip_spaces(char **s)
662 {
663         char *p = *s;
664
665         while (1) {
666                 if (*p == '\\' && p[1] == '\n') {
667                         p++;
668                         t_lineno++;
669                 } else if (*p != ' ' && *p != '\t') {
670                         break;
671                 }
672                 p++;
673         }
674         *s = p;
675 }
676
677 /* returns old *s, advances *s past word and terminating NUL */
678 static char *nextword(char **s)
679 {
680         char *p = *s;
681         while (*(*s)++ != '\0')
682                 continue;
683         return p;
684 }
685
686 static char nextchar(char **s)
687 {
688         char c, *pps;
689
690         c = *(*s)++;
691         pps = *s;
692         if (c == '\\')
693                 c = bb_process_escape_sequence((const char**)s);
694         if (c == '\\' && *s == pps)
695                 c = *(*s)++;
696         return c;
697 }
698
699 static ALWAYS_INLINE int isalnum_(int c)
700 {
701         return (isalnum(c) || c == '_');
702 }
703
704 static double my_strtod(char **pp)
705 {
706 #if ENABLE_DESKTOP
707         if ((*pp)[0] == '0'
708          && ((((*pp)[1] | 0x20) == 'x') || isdigit((*pp)[1]))
709         ) {
710                 return strtoull(*pp, pp, 0);
711         }
712 #endif
713         return strtod(*pp, pp);
714 }
715
716 /* -------- working with variables (set/get/copy/etc) -------- */
717
718 static xhash *iamarray(var *v)
719 {
720         var *a = v;
721
722         while (a->type & VF_CHILD)
723                 a = a->x.parent;
724
725         if (!(a->type & VF_ARRAY)) {
726                 a->type |= VF_ARRAY;
727                 a->x.array = hash_init();
728         }
729         return a->x.array;
730 }
731
732 static void clear_array(xhash *array)
733 {
734         unsigned i;
735         hash_item *hi, *thi;
736
737         for (i = 0; i < array->csize; i++) {
738                 hi = array->items[i];
739                 while (hi) {
740                         thi = hi;
741                         hi = hi->next;
742                         free(thi->data.v.string);
743                         free(thi);
744                 }
745                 array->items[i] = NULL;
746         }
747         array->glen = array->nel = 0;
748 }
749
750 /* clear a variable */
751 static var *clrvar(var *v)
752 {
753         if (!(v->type & VF_FSTR))
754                 free(v->string);
755
756         v->type &= VF_DONTTOUCH;
757         v->type |= VF_DIRTY;
758         v->string = NULL;
759         return v;
760 }
761
762 /* assign string value to variable */
763 static var *setvar_p(var *v, char *value)
764 {
765         clrvar(v);
766         v->string = value;
767         handle_special(v);
768         return v;
769 }
770
771 /* same as setvar_p but make a copy of string */
772 static var *setvar_s(var *v, const char *value)
773 {
774         return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
775 }
776
777 /* same as setvar_s but sets USER flag */
778 static var *setvar_u(var *v, const char *value)
779 {
780         v = setvar_s(v, value);
781         v->type |= VF_USER;
782         return v;
783 }
784
785 /* set array element to user string */
786 static void setari_u(var *a, int idx, const char *s)
787 {
788         var *v;
789
790         v = findvar(iamarray(a), itoa(idx));
791         setvar_u(v, s);
792 }
793
794 /* assign numeric value to variable */
795 static var *setvar_i(var *v, double value)
796 {
797         clrvar(v);
798         v->type |= VF_NUMBER;
799         v->number = value;
800         handle_special(v);
801         return v;
802 }
803
804 static const char *getvar_s(var *v)
805 {
806         /* if v is numeric and has no cached string, convert it to string */
807         if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
808                 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
809                 v->string = xstrdup(g_buf);
810                 v->type |= VF_CACHED;
811         }
812         return (v->string == NULL) ? "" : v->string;
813 }
814
815 static double getvar_i(var *v)
816 {
817         char *s;
818
819         if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
820                 v->number = 0;
821                 s = v->string;
822                 if (s && *s) {
823                         v->number = my_strtod(&s);
824                         if (v->type & VF_USER) {
825                                 skip_spaces(&s);
826                                 if (*s != '\0')
827                                         v->type &= ~VF_USER;
828                         }
829                 } else {
830                         v->type &= ~VF_USER;
831                 }
832                 v->type |= VF_CACHED;
833         }
834         return v->number;
835 }
836
837 /* Used for operands of bitwise ops */
838 static unsigned long getvar_i_int(var *v)
839 {
840         double d = getvar_i(v);
841
842         /* Casting doubles to longs is undefined for values outside
843          * of target type range. Try to widen it as much as possible */
844         if (d >= 0)
845                 return (unsigned long)d;
846         /* Why? Think about d == -4294967295.0 (assuming 32bit longs) */
847         return - (long) (unsigned long) (-d);
848 }
849
850 static var *copyvar(var *dest, const var *src)
851 {
852         if (dest != src) {
853                 clrvar(dest);
854                 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
855                 dest->number = src->number;
856                 if (src->string)
857                         dest->string = xstrdup(src->string);
858         }
859         handle_special(dest);
860         return dest;
861 }
862
863 static var *incvar(var *v)
864 {
865         return setvar_i(v, getvar_i(v) + 1.0);
866 }
867
868 /* return true if v is number or numeric string */
869 static int is_numeric(var *v)
870 {
871         getvar_i(v);
872         return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
873 }
874
875 /* return 1 when value of v corresponds to true, 0 otherwise */
876 static int istrue(var *v)
877 {
878         if (is_numeric(v))
879                 return (v->number != 0);
880         return (v->string && v->string[0]);
881 }
882
883 /* temporary variables allocator. Last allocated should be first freed */
884 static var *nvalloc(int n)
885 {
886         nvblock *pb = NULL;
887         var *v, *r;
888         int size;
889
890         while (g_cb) {
891                 pb = g_cb;
892                 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size)
893                         break;
894                 g_cb = g_cb->next;
895         }
896
897         if (!g_cb) {
898                 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
899                 g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
900                 g_cb->size = size;
901                 g_cb->pos = g_cb->nv;
902                 g_cb->prev = pb;
903                 /*g_cb->next = NULL; - xzalloc did it */
904                 if (pb)
905                         pb->next = g_cb;
906         }
907
908         v = r = g_cb->pos;
909         g_cb->pos += n;
910
911         while (v < g_cb->pos) {
912                 v->type = 0;
913                 v->string = NULL;
914                 v++;
915         }
916
917         return r;
918 }
919
920 static void nvfree(var *v)
921 {
922         var *p;
923
924         if (v < g_cb->nv || v >= g_cb->pos)
925                 syntax_error(EMSG_INTERNAL_ERROR);
926
927         for (p = v; p < g_cb->pos; p++) {
928                 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
929                         clear_array(iamarray(p));
930                         free(p->x.array->items);
931                         free(p->x.array);
932                 }
933                 if (p->type & VF_WALK) {
934                         walker_list *n;
935                         walker_list *w = p->x.walker;
936                         debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker);
937                         p->x.walker = NULL;
938                         while (w) {
939                                 n = w->prev;
940                                 debug_printf_walker(" free(%p)\n", w);
941                                 free(w);
942                                 w = n;
943                         }
944                 }
945                 clrvar(p);
946         }
947
948         g_cb->pos = v;
949         while (g_cb->prev && g_cb->pos == g_cb->nv) {
950                 g_cb = g_cb->prev;
951         }
952 }
953
954 /* ------- awk program text parsing ------- */
955
956 /* Parse next token pointed by global pos, place results into global ttt.
957  * If token isn't expected, give away. Return token class
958  */
959 static uint32_t next_token(uint32_t expected)
960 {
961 #define concat_inserted (G.next_token__concat_inserted)
962 #define save_tclass     (G.next_token__save_tclass)
963 #define save_info       (G.next_token__save_info)
964 /* Initialized to TC_OPTERM: */
965 #define ltclass         (G.next_token__ltclass)
966
967         char *p, *s;
968         const char *tl;
969         uint32_t tc;
970         const uint32_t *ti;
971         int l;
972
973         if (t_rollback) {
974                 t_rollback = FALSE;
975
976         } else if (concat_inserted) {
977                 concat_inserted = FALSE;
978                 t_tclass = save_tclass;
979                 t_info = save_info;
980
981         } else {
982                 p = g_pos;
983  readnext:
984                 skip_spaces(&p);
985                 g_lineno = t_lineno;
986                 if (*p == '#')
987                         while (*p != '\n' && *p != '\0')
988                                 p++;
989
990                 if (*p == '\n')
991                         t_lineno++;
992
993                 if (*p == '\0') {
994                         tc = TC_EOF;
995
996                 } else if (*p == '\"') {
997                         /* it's a string */
998                         t_string = s = ++p;
999                         while (*p != '\"') {
1000                                 char *pp = p;
1001                                 if (*p == '\0' || *p == '\n')
1002                                         syntax_error(EMSG_UNEXP_EOS);
1003                                 *s++ = nextchar(&pp);
1004                                 p = pp;
1005                         }
1006                         p++;
1007                         *s = '\0';
1008                         tc = TC_STRING;
1009
1010                 } else if ((expected & TC_REGEXP) && *p == '/') {
1011                         /* it's regexp */
1012                         t_string = s = ++p;
1013                         while (*p != '/') {
1014                                 if (*p == '\0' || *p == '\n')
1015                                         syntax_error(EMSG_UNEXP_EOS);
1016                                 *s = *p++;
1017                                 if (*s++ == '\\') {
1018                                         char *pp = p;
1019                                         s[-1] = bb_process_escape_sequence((const char **)&pp);
1020                                         if (*p == '\\')
1021                                                 *s++ = '\\';
1022                                         if (pp == p)
1023                                                 *s++ = *p++;
1024                                         else
1025                                                 p = pp;
1026                                 }
1027                         }
1028                         p++;
1029                         *s = '\0';
1030                         tc = TC_REGEXP;
1031
1032                 } else if (*p == '.' || isdigit(*p)) {
1033                         /* it's a number */
1034                         char *pp = p;
1035                         t_double = my_strtod(&pp);
1036                         p = pp;
1037                         if (*pp == '.')
1038                                 syntax_error(EMSG_UNEXP_TOKEN);
1039                         tc = TC_NUMBER;
1040
1041                 } else {
1042                         /* search for something known */
1043                         tl = tokenlist;
1044                         tc = 0x00000001;
1045                         ti = tokeninfo;
1046                         while (*tl) {
1047                                 l = *tl++;
1048                                 if (l == NTCC) {
1049                                         tc <<= 1;
1050                                         continue;
1051                                 }
1052                                 /* if token class is expected, token
1053                                  * matches and it's not a longer word,
1054                                  * then this is what we are looking for
1055                                  */
1056                                 if ((tc & (expected | TC_WORD | TC_NEWLINE))
1057                                  && *tl == *p && strncmp(p, tl, l) == 0
1058                                  && !((tc & TC_WORD) && isalnum_(p[l]))
1059                                 ) {
1060                                         t_info = *ti;
1061                                         p += l;
1062                                         break;
1063                                 }
1064                                 ti++;
1065                                 tl += l;
1066                         }
1067
1068                         if (!*tl) {
1069                                 /* it's a name (var/array/function),
1070                                  * otherwise it's something wrong
1071                                  */
1072                                 if (!isalnum_(*p))
1073                                         syntax_error(EMSG_UNEXP_TOKEN);
1074
1075                                 t_string = --p;
1076                                 while (isalnum_(*++p)) {
1077                                         p[-1] = *p;
1078                                 }
1079                                 p[-1] = '\0';
1080                                 tc = TC_VARIABLE;
1081                                 /* also consume whitespace between functionname and bracket */
1082                                 if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY))
1083                                         skip_spaces(&p);
1084                                 if (*p == '(') {
1085                                         tc = TC_FUNCTION;
1086                                 } else {
1087                                         if (*p == '[') {
1088                                                 p++;
1089                                                 tc = TC_ARRAY;
1090                                         }
1091                                 }
1092                         }
1093                 }
1094                 g_pos = p;
1095
1096                 /* skipping newlines in some cases */
1097                 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1098                         goto readnext;
1099
1100                 /* insert concatenation operator when needed */
1101                 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1102                         concat_inserted = TRUE;
1103                         save_tclass = tc;
1104                         save_info = t_info;
1105                         tc = TC_BINOP;
1106                         t_info = OC_CONCAT | SS | P(35);
1107                 }
1108
1109                 t_tclass = tc;
1110         }
1111         ltclass = t_tclass;
1112
1113         /* Are we ready for this? */
1114         if (!(ltclass & expected))
1115                 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1116                                 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1117
1118         return ltclass;
1119 #undef concat_inserted
1120 #undef save_tclass
1121 #undef save_info
1122 #undef ltclass
1123 }
1124
1125 static void rollback_token(void)
1126 {
1127         t_rollback = TRUE;
1128 }
1129
1130 static node *new_node(uint32_t info)
1131 {
1132         node *n;
1133
1134         n = xzalloc(sizeof(node));
1135         n->info = info;
1136         n->lineno = g_lineno;
1137         return n;
1138 }
1139
1140 static node *mk_re_node(const char *s, node *n, regex_t *re)
1141 {
1142         n->info = OC_REGEXP;
1143         n->l.re = re;
1144         n->r.ire = re + 1;
1145         xregcomp(re, s, REG_EXTENDED);
1146         xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1147
1148         return n;
1149 }
1150
1151 static node *condition(void)
1152 {
1153         next_token(TC_SEQSTART);
1154         return parse_expr(TC_SEQTERM);
1155 }
1156
1157 /* parse expression terminated by given argument, return ptr
1158  * to built subtree. Terminator is eaten by parse_expr */
1159 static node *parse_expr(uint32_t iexp)
1160 {
1161         node sn;
1162         node *cn = &sn;
1163         node *vn, *glptr;
1164         uint32_t tc, xtc;
1165         var *v;
1166
1167         sn.info = PRIMASK;
1168         sn.r.n = glptr = NULL;
1169         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1170
1171         while (!((tc = next_token(xtc)) & iexp)) {
1172                 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1173                         /* input redirection (<) attached to glptr node */
1174                         cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1175                         cn->a.n = glptr;
1176                         xtc = TC_OPERAND | TC_UOPPRE;
1177                         glptr = NULL;
1178
1179                 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1180                         /* for binary and postfix-unary operators, jump back over
1181                          * previous operators with higher priority */
1182                         vn = cn;
1183                         while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1184                             || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON))
1185                         ) {
1186                                 vn = vn->a.n;
1187                         }
1188                         if ((t_info & OPCLSMASK) == OC_TERNARY)
1189                                 t_info += P(6);
1190                         cn = vn->a.n->r.n = new_node(t_info);
1191                         cn->a.n = vn->a.n;
1192                         if (tc & TC_BINOP) {
1193                                 cn->l.n = vn;
1194                                 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1195                                 if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1196                                         /* it's a pipe */
1197                                         next_token(TC_GETLINE);
1198                                         /* give maximum priority to this pipe */
1199                                         cn->info &= ~PRIMASK;
1200                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1201                                 }
1202                         } else {
1203                                 cn->r.n = vn;
1204                                 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1205                         }
1206                         vn->a.n = cn;
1207
1208                 } else {
1209                         /* for operands and prefix-unary operators, attach them
1210                          * to last node */
1211                         vn = cn;
1212                         cn = vn->r.n = new_node(t_info);
1213                         cn->a.n = vn;
1214                         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1215                         if (tc & (TC_OPERAND | TC_REGEXP)) {
1216                                 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1217                                 /* one should be very careful with switch on tclass -
1218                                  * only simple tclasses should be used! */
1219                                 switch (tc) {
1220                                 case TC_VARIABLE:
1221                                 case TC_ARRAY:
1222                                         cn->info = OC_VAR;
1223                                         v = hash_search(ahash, t_string);
1224                                         if (v != NULL) {
1225                                                 cn->info = OC_FNARG;
1226                                                 cn->l.i = v->x.aidx;
1227                                         } else {
1228                                                 cn->l.v = newvar(t_string);
1229                                         }
1230                                         if (tc & TC_ARRAY) {
1231                                                 cn->info |= xS;
1232                                                 cn->r.n = parse_expr(TC_ARRTERM);
1233                                         }
1234                                         break;
1235
1236                                 case TC_NUMBER:
1237                                 case TC_STRING:
1238                                         cn->info = OC_VAR;
1239                                         v = cn->l.v = xzalloc(sizeof(var));
1240                                         if (tc & TC_NUMBER)
1241                                                 setvar_i(v, t_double);
1242                                         else
1243                                                 setvar_s(v, t_string);
1244                                         break;
1245
1246                                 case TC_REGEXP:
1247                                         mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1248                                         break;
1249
1250                                 case TC_FUNCTION:
1251                                         cn->info = OC_FUNC;
1252                                         cn->r.f = newfunc(t_string);
1253                                         cn->l.n = condition();
1254                                         break;
1255
1256                                 case TC_SEQSTART:
1257                                         cn = vn->r.n = parse_expr(TC_SEQTERM);
1258                                         cn->a.n = vn;
1259                                         break;
1260
1261                                 case TC_GETLINE:
1262                                         glptr = cn;
1263                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1264                                         break;
1265
1266                                 case TC_BUILTIN:
1267                                         cn->l.n = condition();
1268                                         break;
1269                                 }
1270                         }
1271                 }
1272         }
1273         return sn.r.n;
1274 }
1275
1276 /* add node to chain. Return ptr to alloc'd node */
1277 static node *chain_node(uint32_t info)
1278 {
1279         node *n;
1280
1281         if (!seq->first)
1282                 seq->first = seq->last = new_node(0);
1283
1284         if (seq->programname != g_progname) {
1285                 seq->programname = g_progname;
1286                 n = chain_node(OC_NEWSOURCE);
1287                 n->l.s = xstrdup(g_progname);
1288         }
1289
1290         n = seq->last;
1291         n->info = info;
1292         seq->last = n->a.n = new_node(OC_DONE);
1293
1294         return n;
1295 }
1296
1297 static void chain_expr(uint32_t info)
1298 {
1299         node *n;
1300
1301         n = chain_node(info);
1302         n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1303         if (t_tclass & TC_GRPTERM)
1304                 rollback_token();
1305 }
1306
1307 static node *chain_loop(node *nn)
1308 {
1309         node *n, *n2, *save_brk, *save_cont;
1310
1311         save_brk = break_ptr;
1312         save_cont = continue_ptr;
1313
1314         n = chain_node(OC_BR | Vx);
1315         continue_ptr = new_node(OC_EXEC);
1316         break_ptr = new_node(OC_EXEC);
1317         chain_group();
1318         n2 = chain_node(OC_EXEC | Vx);
1319         n2->l.n = nn;
1320         n2->a.n = n;
1321         continue_ptr->a.n = n2;
1322         break_ptr->a.n = n->r.n = seq->last;
1323
1324         continue_ptr = save_cont;
1325         break_ptr = save_brk;
1326
1327         return n;
1328 }
1329
1330 /* parse group and attach it to chain */
1331 static void chain_group(void)
1332 {
1333         uint32_t c;
1334         node *n, *n2, *n3;
1335
1336         do {
1337                 c = next_token(TC_GRPSEQ);
1338         } while (c & TC_NEWLINE);
1339
1340         if (c & TC_GRPSTART) {
1341                 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1342                         if (t_tclass & TC_NEWLINE)
1343                                 continue;
1344                         rollback_token();
1345                         chain_group();
1346                 }
1347         } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1348                 rollback_token();
1349                 chain_expr(OC_EXEC | Vx);
1350         } else {                                                /* TC_STATEMNT */
1351                 switch (t_info & OPCLSMASK) {
1352                 case ST_IF:
1353                         n = chain_node(OC_BR | Vx);
1354                         n->l.n = condition();
1355                         chain_group();
1356                         n2 = chain_node(OC_EXEC);
1357                         n->r.n = seq->last;
1358                         if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1359                                 chain_group();
1360                                 n2->a.n = seq->last;
1361                         } else {
1362                                 rollback_token();
1363                         }
1364                         break;
1365
1366                 case ST_WHILE:
1367                         n2 = condition();
1368                         n = chain_loop(NULL);
1369                         n->l.n = n2;
1370                         break;
1371
1372                 case ST_DO:
1373                         n2 = chain_node(OC_EXEC);
1374                         n = chain_loop(NULL);
1375                         n2->a.n = n->a.n;
1376                         next_token(TC_WHILE);
1377                         n->l.n = condition();
1378                         break;
1379
1380                 case ST_FOR:
1381                         next_token(TC_SEQSTART);
1382                         n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1383                         if (t_tclass & TC_SEQTERM) {    /* for-in */
1384                                 if ((n2->info & OPCLSMASK) != OC_IN)
1385                                         syntax_error(EMSG_UNEXP_TOKEN);
1386                                 n = chain_node(OC_WALKINIT | VV);
1387                                 n->l.n = n2->l.n;
1388                                 n->r.n = n2->r.n;
1389                                 n = chain_loop(NULL);
1390                                 n->info = OC_WALKNEXT | Vx;
1391                                 n->l.n = n2->l.n;
1392                         } else {                        /* for (;;) */
1393                                 n = chain_node(OC_EXEC | Vx);
1394                                 n->l.n = n2;
1395                                 n2 = parse_expr(TC_SEMICOL);
1396                                 n3 = parse_expr(TC_SEQTERM);
1397                                 n = chain_loop(n3);
1398                                 n->l.n = n2;
1399                                 if (!n2)
1400                                         n->info = OC_EXEC;
1401                         }
1402                         break;
1403
1404                 case OC_PRINT:
1405                 case OC_PRINTF:
1406                         n = chain_node(t_info);
1407                         n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1408                         if (t_tclass & TC_OUTRDR) {
1409                                 n->info |= t_info;
1410                                 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1411                         }
1412                         if (t_tclass & TC_GRPTERM)
1413                                 rollback_token();
1414                         break;
1415
1416                 case OC_BREAK:
1417                         n = chain_node(OC_EXEC);
1418                         n->a.n = break_ptr;
1419                         break;
1420
1421                 case OC_CONTINUE:
1422                         n = chain_node(OC_EXEC);
1423                         n->a.n = continue_ptr;
1424                         break;
1425
1426                 /* delete, next, nextfile, return, exit */
1427                 default:
1428                         chain_expr(t_info);
1429                 }
1430         }
1431 }
1432
1433 static void parse_program(char *p)
1434 {
1435         uint32_t tclass;
1436         node *cn;
1437         func *f;
1438         var *v;
1439
1440         g_pos = p;
1441         t_lineno = 1;
1442         while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1443                         TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1444
1445                 if (tclass & TC_OPTERM)
1446                         continue;
1447
1448                 seq = &mainseq;
1449                 if (tclass & TC_BEGIN) {
1450                         seq = &beginseq;
1451                         chain_group();
1452
1453                 } else if (tclass & TC_END) {
1454                         seq = &endseq;
1455                         chain_group();
1456
1457                 } else if (tclass & TC_FUNCDECL) {
1458                         next_token(TC_FUNCTION);
1459                         g_pos++;
1460                         f = newfunc(t_string);
1461                         f->body.first = NULL;
1462                         f->nargs = 0;
1463                         while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1464                                 v = findvar(ahash, t_string);
1465                                 v->x.aidx = (f->nargs)++;
1466
1467                                 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1468                                         break;
1469                         }
1470                         seq = &(f->body);
1471                         chain_group();
1472                         clear_array(ahash);
1473
1474                 } else if (tclass & TC_OPSEQ) {
1475                         rollback_token();
1476                         cn = chain_node(OC_TEST);
1477                         cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1478                         if (t_tclass & TC_GRPSTART) {
1479                                 rollback_token();
1480                                 chain_group();
1481                         } else {
1482                                 chain_node(OC_PRINT);
1483                         }
1484                         cn->r.n = mainseq.last;
1485
1486                 } else /* if (tclass & TC_GRPSTART) */ {
1487                         rollback_token();
1488                         chain_group();
1489                 }
1490         }
1491 }
1492
1493
1494 /* -------- program execution part -------- */
1495
1496 static node *mk_splitter(const char *s, tsplitter *spl)
1497 {
1498         regex_t *re, *ire;
1499         node *n;
1500
1501         re = &spl->re[0];
1502         ire = &spl->re[1];
1503         n = &spl->n;
1504         if ((n->info & OPCLSMASK) == OC_REGEXP) {
1505                 regfree(re);
1506                 regfree(ire); // TODO: nuke ire, use re+1?
1507         }
1508         if (strlen(s) > 1) {
1509                 mk_re_node(s, n, re);
1510         } else {
1511                 n->info = (uint32_t) *s;
1512         }
1513
1514         return n;
1515 }
1516
1517 /* use node as a regular expression. Supplied with node ptr and regex_t
1518  * storage space. Return ptr to regex (if result points to preg, it should
1519  * be later regfree'd manually
1520  */
1521 static regex_t *as_regex(node *op, regex_t *preg)
1522 {
1523         int cflags;
1524         var *v;
1525         const char *s;
1526
1527         if ((op->info & OPCLSMASK) == OC_REGEXP) {
1528                 return icase ? op->r.ire : op->l.re;
1529         }
1530         v = nvalloc(1);
1531         s = getvar_s(evaluate(op, v));
1532
1533         cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
1534         /* Testcase where REG_EXTENDED fails (unpaired '{'):
1535          * echo Hi | awk 'gsub("@(samp|code|file)\{","");'
1536          * gawk 3.1.5 eats this. We revert to ~REG_EXTENDED
1537          * (maybe gsub is not supposed to use REG_EXTENDED?).
1538          */
1539         if (regcomp(preg, s, cflags)) {
1540                 cflags &= ~REG_EXTENDED;
1541                 xregcomp(preg, s, cflags);
1542         }
1543         nvfree(v);
1544         return preg;
1545 }
1546
1547 /* gradually increasing buffer */
1548 static char* qrealloc(char *b, int n, int *size)
1549 {
1550         if (!b || n >= *size) {
1551                 *size = n + (n>>1) + 80;
1552                 b = xrealloc(b, *size);
1553         }
1554         return b;
1555 }
1556
1557 /* resize field storage space */
1558 static void fsrealloc(int size)
1559 {
1560         int i;
1561
1562         if (size >= maxfields) {
1563                 i = maxfields;
1564                 maxfields = size + 16;
1565                 Fields = xrealloc(Fields, maxfields * sizeof(var));
1566                 for (; i < maxfields; i++) {
1567                         Fields[i].type = VF_SPECIAL;
1568                         Fields[i].string = NULL;
1569                 }
1570         }
1571
1572         if (size < nfields) {
1573                 for (i = size; i < nfields; i++) {
1574                         clrvar(Fields + i);
1575                 }
1576         }
1577         nfields = size;
1578 }
1579
1580 static int awk_split(const char *s, node *spl, char **slist)
1581 {
1582         int l, n = 0;
1583         char c[4];
1584         char *s1;
1585         regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1586
1587         /* in worst case, each char would be a separate field */
1588         *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1589         strcpy(s1, s);
1590
1591         c[0] = c[1] = (char)spl->info;
1592         c[2] = c[3] = '\0';
1593         if (*getvar_s(intvar[RS]) == '\0')
1594                 c[2] = '\n';
1595
1596         if ((spl->info & OPCLSMASK) == OC_REGEXP) {  /* regex split */
1597                 if (!*s)
1598                         return n; /* "": zero fields */
1599                 n++; /* at least one field will be there */
1600                 do {
1601                         l = strcspn(s, c+2); /* len till next NUL or \n */
1602                         if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1603                          && pmatch[0].rm_so <= l
1604                         ) {
1605                                 l = pmatch[0].rm_so;
1606                                 if (pmatch[0].rm_eo == 0) {
1607                                         l++;
1608                                         pmatch[0].rm_eo++;
1609                                 }
1610                                 n++; /* we saw yet another delimiter */
1611                         } else {
1612                                 pmatch[0].rm_eo = l;
1613                                 if (s[l])
1614                                         pmatch[0].rm_eo++;
1615                         }
1616                         memcpy(s1, s, l);
1617                         /* make sure we remove *all* of the separator chars */
1618                         do {
1619                                 s1[l] = '\0';
1620                         } while (++l < pmatch[0].rm_eo);
1621                         nextword(&s1);
1622                         s += pmatch[0].rm_eo;
1623                 } while (*s);
1624                 return n;
1625         }
1626         if (c[0] == '\0') {  /* null split */
1627                 while (*s) {
1628                         *s1++ = *s++;
1629                         *s1++ = '\0';
1630                         n++;
1631                 }
1632                 return n;
1633         }
1634         if (c[0] != ' ') {  /* single-character split */
1635                 if (icase) {
1636                         c[0] = toupper(c[0]);
1637                         c[1] = tolower(c[1]);
1638                 }
1639                 if (*s1)
1640                         n++;
1641                 while ((s1 = strpbrk(s1, c))) {
1642                         *s1++ = '\0';
1643                         n++;
1644                 }
1645                 return n;
1646         }
1647         /* space split */
1648         while (*s) {
1649                 s = skip_whitespace(s);
1650                 if (!*s)
1651                         break;
1652                 n++;
1653                 while (*s && !isspace(*s))
1654                         *s1++ = *s++;
1655                 *s1++ = '\0';
1656         }
1657         return n;
1658 }
1659
1660 static void split_f0(void)
1661 {
1662 /* static char *fstrings; */
1663 #define fstrings (G.split_f0__fstrings)
1664
1665         int i, n;
1666         char *s;
1667
1668         if (is_f0_split)
1669                 return;
1670
1671         is_f0_split = TRUE;
1672         free(fstrings);
1673         fsrealloc(0);
1674         n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1675         fsrealloc(n);
1676         s = fstrings;
1677         for (i = 0; i < n; i++) {
1678                 Fields[i].string = nextword(&s);
1679                 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1680         }
1681
1682         /* set NF manually to avoid side effects */
1683         clrvar(intvar[NF]);
1684         intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1685         intvar[NF]->number = nfields;
1686 #undef fstrings
1687 }
1688
1689 /* perform additional actions when some internal variables changed */
1690 static void handle_special(var *v)
1691 {
1692         int n;
1693         char *b;
1694         const char *sep, *s;
1695         int sl, l, len, i, bsize;
1696
1697         if (!(v->type & VF_SPECIAL))
1698                 return;
1699
1700         if (v == intvar[NF]) {
1701                 n = (int)getvar_i(v);
1702                 fsrealloc(n);
1703
1704                 /* recalculate $0 */
1705                 sep = getvar_s(intvar[OFS]);
1706                 sl = strlen(sep);
1707                 b = NULL;
1708                 len = 0;
1709                 for (i = 0; i < n; i++) {
1710                         s = getvar_s(&Fields[i]);
1711                         l = strlen(s);
1712                         if (b) {
1713                                 memcpy(b+len, sep, sl);
1714                                 len += sl;
1715                         }
1716                         b = qrealloc(b, len+l+sl, &bsize);
1717                         memcpy(b+len, s, l);
1718                         len += l;
1719                 }
1720                 if (b)
1721                         b[len] = '\0';
1722                 setvar_p(intvar[F0], b);
1723                 is_f0_split = TRUE;
1724
1725         } else if (v == intvar[F0]) {
1726                 is_f0_split = FALSE;
1727
1728         } else if (v == intvar[FS]) {
1729                 mk_splitter(getvar_s(v), &fsplitter);
1730
1731         } else if (v == intvar[RS]) {
1732                 mk_splitter(getvar_s(v), &rsplitter);
1733
1734         } else if (v == intvar[IGNORECASE]) {
1735                 icase = istrue(v);
1736
1737         } else {                                /* $n */
1738                 n = getvar_i(intvar[NF]);
1739                 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1740                 /* right here v is invalid. Just to note... */
1741         }
1742 }
1743
1744 /* step through func/builtin/etc arguments */
1745 static node *nextarg(node **pn)
1746 {
1747         node *n;
1748
1749         n = *pn;
1750         if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1751                 *pn = n->r.n;
1752                 n = n->l.n;
1753         } else {
1754                 *pn = NULL;
1755         }
1756         return n;
1757 }
1758
1759 static void hashwalk_init(var *v, xhash *array)
1760 {
1761         hash_item *hi;
1762         unsigned i;
1763         walker_list *w;
1764         walker_list *prev_walker;
1765
1766         if (v->type & VF_WALK) {
1767                 prev_walker = v->x.walker;
1768         } else {
1769                 v->type |= VF_WALK;
1770                 prev_walker = NULL;
1771         }
1772         debug_printf_walker("hashwalk_init: prev_walker:%p\n", prev_walker);
1773
1774         w = v->x.walker = xzalloc(sizeof(*w) + array->glen + 1); /* why + 1? */
1775         debug_printf_walker(" walker@%p=%p\n", &v->x.walker, w);
1776         w->cur = w->end = w->wbuf;
1777         w->prev = prev_walker;
1778         for (i = 0; i < array->csize; i++) {
1779                 hi = array->items[i];
1780                 while (hi) {
1781                         strcpy(w->end, hi->name);
1782                         nextword(&w->end);
1783                         hi = hi->next;
1784                 }
1785         }
1786 }
1787
1788 static int hashwalk_next(var *v)
1789 {
1790         walker_list *w = v->x.walker;
1791
1792         if (w->cur >= w->end) {
1793                 walker_list *prev_walker = w->prev;
1794
1795                 debug_printf_walker("end of iteration, free(walker@%p:%p), prev_walker:%p\n", &v->x.walker, w, prev_walker);
1796                 free(w);
1797                 v->x.walker = prev_walker;
1798                 return FALSE;
1799         }
1800
1801         setvar_s(v, nextword(&w->cur));
1802         return TRUE;
1803 }
1804
1805 /* evaluate node, return 1 when result is true, 0 otherwise */
1806 static int ptest(node *pattern)
1807 {
1808         /* ptest__v is "static": to save stack space? */
1809         return istrue(evaluate(pattern, &G.ptest__v));
1810 }
1811
1812 /* read next record from stream rsm into a variable v */
1813 static int awk_getline(rstream *rsm, var *v)
1814 {
1815         char *b;
1816         regmatch_t pmatch[2];
1817         int a, p, pp=0, size;
1818         int fd, so, eo, r, rp;
1819         char c, *m, *s;
1820
1821         /* we're using our own buffer since we need access to accumulating
1822          * characters
1823          */
1824         fd = fileno(rsm->F);
1825         m = rsm->buffer;
1826         a = rsm->adv;
1827         p = rsm->pos;
1828         size = rsm->size;
1829         c = (char) rsplitter.n.info;
1830         rp = 0;
1831
1832         if (!m)
1833                 m = qrealloc(m, 256, &size);
1834         do {
1835                 b = m + a;
1836                 so = eo = p;
1837                 r = 1;
1838                 if (p > 0) {
1839                         if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1840                                 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1841                                                         b, 1, pmatch, 0) == 0) {
1842                                         so = pmatch[0].rm_so;
1843                                         eo = pmatch[0].rm_eo;
1844                                         if (b[eo] != '\0')
1845                                                 break;
1846                                 }
1847                         } else if (c != '\0') {
1848                                 s = strchr(b+pp, c);
1849                                 if (!s)
1850                                         s = memchr(b+pp, '\0', p - pp);
1851                                 if (s) {
1852                                         so = eo = s-b;
1853                                         eo++;
1854                                         break;
1855                                 }
1856                         } else {
1857                                 while (b[rp] == '\n')
1858                                         rp++;
1859                                 s = strstr(b+rp, "\n\n");
1860                                 if (s) {
1861                                         so = eo = s-b;
1862                                         while (b[eo] == '\n') eo++;
1863                                         if (b[eo] != '\0')
1864                                                 break;
1865                                 }
1866                         }
1867                 }
1868
1869                 if (a > 0) {
1870                         memmove(m, (const void *)(m+a), p+1);
1871                         b = m;
1872                         a = 0;
1873                 }
1874
1875                 m = qrealloc(m, a+p+128, &size);
1876                 b = m + a;
1877                 pp = p;
1878                 p += safe_read(fd, b+p, size-p-1);
1879                 if (p < pp) {
1880                         p = 0;
1881                         r = 0;
1882                         setvar_i(intvar[ERRNO], errno);
1883                 }
1884                 b[p] = '\0';
1885
1886         } while (p > pp);
1887
1888         if (p == 0) {
1889                 r--;
1890         } else {
1891                 c = b[so]; b[so] = '\0';
1892                 setvar_s(v, b+rp);
1893                 v->type |= VF_USER;
1894                 b[so] = c;
1895                 c = b[eo]; b[eo] = '\0';
1896                 setvar_s(intvar[RT], b+so);
1897                 b[eo] = c;
1898         }
1899
1900         rsm->buffer = m;
1901         rsm->adv = a + eo;
1902         rsm->pos = p - eo;
1903         rsm->size = size;
1904
1905         return r;
1906 }
1907
1908 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1909 {
1910         int r = 0;
1911         char c;
1912         const char *s = format;
1913
1914         if (int_as_int && n == (int)n) {
1915                 r = snprintf(b, size, "%d", (int)n);
1916         } else {
1917                 do { c = *s; } while (c && *++s);
1918                 if (strchr("diouxX", c)) {
1919                         r = snprintf(b, size, format, (int)n);
1920                 } else if (strchr("eEfgG", c)) {
1921                         r = snprintf(b, size, format, n);
1922                 } else {
1923                         syntax_error(EMSG_INV_FMT);
1924                 }
1925         }
1926         return r;
1927 }
1928
1929 /* formatted output into an allocated buffer, return ptr to buffer */
1930 static char *awk_printf(node *n)
1931 {
1932         char *b = NULL;
1933         char *fmt, *s, *f;
1934         const char *s1;
1935         int i, j, incr, bsize;
1936         char c, c1;
1937         var *v, *arg;
1938
1939         v = nvalloc(1);
1940         fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1941
1942         i = 0;
1943         while (*f) {
1944                 s = f;
1945                 while (*f && (*f != '%' || *++f == '%'))
1946                         f++;
1947                 while (*f && !isalpha(*f)) {
1948                         if (*f == '*')
1949                                 syntax_error("%*x formats are not supported");
1950                         f++;
1951                 }
1952
1953                 incr = (f - s) + MAXVARFMT;
1954                 b = qrealloc(b, incr + i, &bsize);
1955                 c = *f;
1956                 if (c != '\0')
1957                         f++;
1958                 c1 = *f;
1959                 *f = '\0';
1960                 arg = evaluate(nextarg(&n), v);
1961
1962                 j = i;
1963                 if (c == 'c' || !c) {
1964                         i += sprintf(b+i, s, is_numeric(arg) ?
1965                                         (char)getvar_i(arg) : *getvar_s(arg));
1966                 } else if (c == 's') {
1967                         s1 = getvar_s(arg);
1968                         b = qrealloc(b, incr+i+strlen(s1), &bsize);
1969                         i += sprintf(b+i, s, s1);
1970                 } else {
1971                         i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1972                 }
1973                 *f = c1;
1974
1975                 /* if there was an error while sprintf, return value is negative */
1976                 if (i < j)
1977                         i = j;
1978         }
1979
1980         b = xrealloc(b, i + 1);
1981         free(fmt);
1982         nvfree(v);
1983         b[i] = '\0';
1984         return b;
1985 }
1986
1987 /* common substitution routine
1988  * replace (nm) substring of (src) that match (n) with (repl), store
1989  * result into (dest), return number of substitutions. If nm=0, replace
1990  * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1991  * subexpression matching (\1-\9)
1992  */
1993 static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int ex)
1994 {
1995         char *ds = NULL;
1996         const char *s;
1997         const char *sp;
1998         int c, i, j, di, rl, so, eo, nbs, n, dssize;
1999         regmatch_t pmatch[10];
2000         regex_t sreg, *re;
2001
2002         re = as_regex(rn, &sreg);
2003         if (!src)
2004                 src = intvar[F0];
2005         if (!dest)
2006                 dest = intvar[F0];
2007
2008         i = di = 0;
2009         sp = getvar_s(src);
2010         rl = strlen(repl);
2011         while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0 : REG_NOTBOL) == 0) {
2012                 so = pmatch[0].rm_so;
2013                 eo = pmatch[0].rm_eo;
2014
2015                 ds = qrealloc(ds, di + eo + rl, &dssize);
2016                 memcpy(ds + di, sp, eo);
2017                 di += eo;
2018                 if (++i >= nm) {
2019                         /* replace */
2020                         di -= (eo - so);
2021                         nbs = 0;
2022                         for (s = repl; *s; s++) {
2023                                 ds[di++] = c = *s;
2024                                 if (c == '\\') {
2025                                         nbs++;
2026                                         continue;
2027                                 }
2028                                 if (c == '&' || (ex && c >= '0' && c <= '9')) {
2029                                         di -= ((nbs + 3) >> 1);
2030                                         j = 0;
2031                                         if (c != '&') {
2032                                                 j = c - '0';
2033                                                 nbs++;
2034                                         }
2035                                         if (nbs % 2) {
2036                                                 ds[di++] = c;
2037                                         } else {
2038                                                 n = pmatch[j].rm_eo - pmatch[j].rm_so;
2039                                                 ds = qrealloc(ds, di + rl + n, &dssize);
2040                                                 memcpy(ds + di, sp + pmatch[j].rm_so, n);
2041                                                 di += n;
2042                                         }
2043                                 }
2044                                 nbs = 0;
2045                         }
2046                 }
2047
2048                 sp += eo;
2049                 if (i == nm)
2050                         break;
2051                 if (eo == so) {
2052                         ds[di] = *sp++;
2053                         if (!ds[di++])
2054                                 break;
2055                 }
2056         }
2057
2058         ds = qrealloc(ds, di + strlen(sp), &dssize);
2059         strcpy(ds + di, sp);
2060         setvar_p(dest, ds);
2061         if (re == &sreg)
2062                 regfree(re);
2063         return i;
2064 }
2065
2066 static NOINLINE int do_mktime(const char *ds)
2067 {
2068         struct tm then;
2069         int count;
2070
2071         /*memset(&then, 0, sizeof(then)); - not needed */
2072         then.tm_isdst = -1; /* default is unknown */
2073
2074         /* manpage of mktime says these fields are ints,
2075          * so we can sscanf stuff directly into them */
2076         count = sscanf(ds, "%u %u %u %u %u %u %d",
2077                 &then.tm_year, &then.tm_mon, &then.tm_mday,
2078                 &then.tm_hour, &then.tm_min, &then.tm_sec,
2079                 &then.tm_isdst);
2080
2081         if (count < 6
2082          || (unsigned)then.tm_mon < 1
2083          || (unsigned)then.tm_year < 1900
2084         ) {
2085                 return -1;
2086         }
2087
2088         then.tm_mon -= 1;
2089         then.tm_year -= 1900;
2090
2091         return mktime(&then);
2092 }
2093
2094 static NOINLINE var *exec_builtin(node *op, var *res)
2095 {
2096 #define tspl (G.exec_builtin__tspl)
2097
2098         var *tv;
2099         node *an[4];
2100         var *av[4];
2101         const char *as[4];
2102         regmatch_t pmatch[2];
2103         regex_t sreg, *re;
2104         node *spl;
2105         uint32_t isr, info;
2106         int nargs;
2107         time_t tt;
2108         char *s, *s1;
2109         int i, l, ll, n;
2110
2111         tv = nvalloc(4);
2112         isr = info = op->info;
2113         op = op->l.n;
2114
2115         av[2] = av[3] = NULL;
2116         for (i = 0; i < 4 && op; i++) {
2117                 an[i] = nextarg(&op);
2118                 if (isr & 0x09000000)
2119                         av[i] = evaluate(an[i], &tv[i]);
2120                 if (isr & 0x08000000)
2121                         as[i] = getvar_s(av[i]);
2122                 isr >>= 1;
2123         }
2124
2125         nargs = i;
2126         if ((uint32_t)nargs < (info >> 30))
2127                 syntax_error(EMSG_TOO_FEW_ARGS);
2128
2129         info &= OPNMASK;
2130         switch (info) {
2131
2132         case B_a2:
2133 #if ENABLE_FEATURE_AWK_LIBM
2134                 setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1])));
2135 #else
2136                 syntax_error(EMSG_NO_MATH);
2137 #endif
2138                 break;
2139
2140         case B_sp:
2141                 if (nargs > 2) {
2142                         spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2143                                 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2144                 } else {
2145                         spl = &fsplitter.n;
2146                 }
2147
2148                 n = awk_split(as[0], spl, &s);
2149                 s1 = s;
2150                 clear_array(iamarray(av[1]));
2151                 for (i = 1; i <= n; i++)
2152                         setari_u(av[1], i, nextword(&s1));
2153                 free(s);
2154                 setvar_i(res, n);
2155                 break;
2156
2157         case B_ss:
2158                 l = strlen(as[0]);
2159                 i = getvar_i(av[1]) - 1;
2160                 if (i > l)
2161                         i = l;
2162                 if (i < 0)
2163                         i = 0;
2164                 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2165                 if (n < 0)
2166                         n = 0;
2167                 s = xstrndup(as[0]+i, n);
2168                 setvar_p(res, s);
2169                 break;
2170
2171         /* Bitwise ops must assume that operands are unsigned. GNU Awk 3.1.5:
2172          * awk '{ print or(-1,1) }' gives "4.29497e+09", not "-2.xxxe+09" */
2173         case B_an:
2174                 setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1]));
2175                 break;
2176
2177         case B_co:
2178                 setvar_i(res, ~getvar_i_int(av[0]));
2179                 break;
2180
2181         case B_ls:
2182                 setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1]));
2183                 break;
2184
2185         case B_or:
2186                 setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1]));
2187                 break;
2188
2189         case B_rs:
2190                 setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1]));
2191                 break;
2192
2193         case B_xo:
2194                 setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1]));
2195                 break;
2196
2197         case B_lo:
2198         case B_up:
2199                 s1 = s = xstrdup(as[0]);
2200                 while (*s1) {
2201                         //*s1 = (info == B_up) ? toupper(*s1) : tolower(*s1);
2202                         if ((unsigned char)((*s1 | 0x20) - 'a') <= ('z' - 'a'))
2203                                 *s1 = (info == B_up) ? (*s1 & 0xdf) : (*s1 | 0x20);
2204                         s1++;
2205                 }
2206                 setvar_p(res, s);
2207                 break;
2208
2209         case B_ix:
2210                 n = 0;
2211                 ll = strlen(as[1]);
2212                 l = strlen(as[0]) - ll;
2213                 if (ll > 0 && l >= 0) {
2214                         if (!icase) {
2215                                 s = strstr(as[0], as[1]);
2216                                 if (s)
2217                                         n = (s - as[0]) + 1;
2218                         } else {
2219                                 /* this piece of code is terribly slow and
2220                                  * really should be rewritten
2221                                  */
2222                                 for (i=0; i<=l; i++) {
2223                                         if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2224                                                 n = i+1;
2225                                                 break;
2226                                         }
2227                                 }
2228                         }
2229                 }
2230                 setvar_i(res, n);
2231                 break;
2232
2233         case B_ti:
2234                 if (nargs > 1)
2235                         tt = getvar_i(av[1]);
2236                 else
2237                         time(&tt);
2238                 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2239                 i = strftime(g_buf, MAXVARFMT,
2240                         ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2241                         localtime(&tt));
2242                 g_buf[i] = '\0';
2243                 setvar_s(res, g_buf);
2244                 break;
2245
2246         case B_mt:
2247                 setvar_i(res, do_mktime(as[0]));
2248                 break;
2249
2250         case B_ma:
2251                 re = as_regex(an[1], &sreg);
2252                 n = regexec(re, as[0], 1, pmatch, 0);
2253                 if (n == 0) {
2254                         pmatch[0].rm_so++;
2255                         pmatch[0].rm_eo++;
2256                 } else {
2257                         pmatch[0].rm_so = 0;
2258                         pmatch[0].rm_eo = -1;
2259                 }
2260                 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2261                 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2262                 setvar_i(res, pmatch[0].rm_so);
2263                 if (re == &sreg)
2264                         regfree(re);
2265                 break;
2266
2267         case B_ge:
2268                 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2269                 break;
2270
2271         case B_gs:
2272                 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2273                 break;
2274
2275         case B_su:
2276                 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2277                 break;
2278         }
2279
2280         nvfree(tv);
2281         return res;
2282 #undef tspl
2283 }
2284
2285 /*
2286  * Evaluate node - the heart of the program. Supplied with subtree
2287  * and place where to store result. returns ptr to result.
2288  */
2289 #define XC(n) ((n) >> 8)
2290
2291 static var *evaluate(node *op, var *res)
2292 {
2293 /* This procedure is recursive so we should count every byte */
2294 #define fnargs (G.evaluate__fnargs)
2295 /* seed is initialized to 1 */
2296 #define seed   (G.evaluate__seed)
2297 #define sreg   (G.evaluate__sreg)
2298
2299         node *op1;
2300         var *v1;
2301         union {
2302                 var *v;
2303                 const char *s;
2304                 double d;
2305                 int i;
2306         } L, R;
2307         uint32_t opinfo;
2308         int opn;
2309         union {
2310                 char *s;
2311                 rstream *rsm;
2312                 FILE *F;
2313                 var *v;
2314                 regex_t *re;
2315                 uint32_t info;
2316         } X;
2317
2318         if (!op)
2319                 return setvar_s(res, NULL);
2320
2321         v1 = nvalloc(2);
2322
2323         while (op) {
2324                 opinfo = op->info;
2325                 opn = (opinfo & OPNMASK);
2326                 g_lineno = op->lineno;
2327
2328                 /* execute inevitable things */
2329                 op1 = op->l.n;
2330                 if (opinfo & OF_RES1)
2331                         X.v = L.v = evaluate(op1, v1);
2332                 if (opinfo & OF_RES2)
2333                         R.v = evaluate(op->r.n, v1+1);
2334                 if (opinfo & OF_STR1)
2335                         L.s = getvar_s(L.v);
2336                 if (opinfo & OF_STR2)
2337                         R.s = getvar_s(R.v);
2338                 if (opinfo & OF_NUM1)
2339                         L.d = getvar_i(L.v);
2340
2341                 switch (XC(opinfo & OPCLSMASK)) {
2342
2343                 /* -- iterative node type -- */
2344
2345                 /* test pattern */
2346                 case XC( OC_TEST ):
2347                         if ((op1->info & OPCLSMASK) == OC_COMMA) {
2348                                 /* it's range pattern */
2349                                 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2350                                         op->info |= OF_CHECKED;
2351                                         if (ptest(op1->r.n))
2352                                                 op->info &= ~OF_CHECKED;
2353
2354                                         op = op->a.n;
2355                                 } else {
2356                                         op = op->r.n;
2357                                 }
2358                         } else {
2359                                 op = (ptest(op1)) ? op->a.n : op->r.n;
2360                         }
2361                         break;
2362
2363                 /* just evaluate an expression, also used as unconditional jump */
2364                 case XC( OC_EXEC ):
2365                         break;
2366
2367                 /* branch, used in if-else and various loops */
2368                 case XC( OC_BR ):
2369                         op = istrue(L.v) ? op->a.n : op->r.n;
2370                         break;
2371
2372                 /* initialize for-in loop */
2373                 case XC( OC_WALKINIT ):
2374                         hashwalk_init(L.v, iamarray(R.v));
2375                         break;
2376
2377                 /* get next array item */
2378                 case XC( OC_WALKNEXT ):
2379                         op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2380                         break;
2381
2382                 case XC( OC_PRINT ):
2383                 case XC( OC_PRINTF ):
2384                         X.F = stdout;
2385                         if (op->r.n) {
2386                                 X.rsm = newfile(R.s);
2387                                 if (!X.rsm->F) {
2388                                         if (opn == '|') {
2389                                                 X.rsm->F = popen(R.s, "w");
2390                                                 if (X.rsm->F == NULL)
2391                                                         bb_perror_msg_and_die("popen");
2392                                                 X.rsm->is_pipe = 1;
2393                                         } else {
2394                                                 X.rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2395                                         }
2396                                 }
2397                                 X.F = X.rsm->F;
2398                         }
2399
2400                         if ((opinfo & OPCLSMASK) == OC_PRINT) {
2401                                 if (!op1) {
2402                                         fputs(getvar_s(intvar[F0]), X.F);
2403                                 } else {
2404                                         while (op1) {
2405                                                 L.v = evaluate(nextarg(&op1), v1);
2406                                                 if (L.v->type & VF_NUMBER) {
2407                                                         fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2408                                                                         getvar_i(L.v), TRUE);
2409                                                         fputs(g_buf, X.F);
2410                                                 } else {
2411                                                         fputs(getvar_s(L.v), X.F);
2412                                                 }
2413
2414                                                 if (op1)
2415                                                         fputs(getvar_s(intvar[OFS]), X.F);
2416                                         }
2417                                 }
2418                                 fputs(getvar_s(intvar[ORS]), X.F);
2419
2420                         } else {        /* OC_PRINTF */
2421                                 L.s = awk_printf(op1);
2422                                 fputs(L.s, X.F);
2423                                 free((char*)L.s);
2424                         }
2425                         fflush(X.F);
2426                         break;
2427
2428                 case XC( OC_DELETE ):
2429                         X.info = op1->info & OPCLSMASK;
2430                         if (X.info == OC_VAR) {
2431                                 R.v = op1->l.v;
2432                         } else if (X.info == OC_FNARG) {
2433                                 R.v = &fnargs[op1->l.i];
2434                         } else {
2435                                 syntax_error(EMSG_NOT_ARRAY);
2436                         }
2437
2438                         if (op1->r.n) {
2439                                 clrvar(L.v);
2440                                 L.s = getvar_s(evaluate(op1->r.n, v1));
2441                                 hash_remove(iamarray(R.v), L.s);
2442                         } else {
2443                                 clear_array(iamarray(R.v));
2444                         }
2445                         break;
2446
2447                 case XC( OC_NEWSOURCE ):
2448                         g_progname = op->l.s;
2449                         break;
2450
2451                 case XC( OC_RETURN ):
2452                         copyvar(res, L.v);
2453                         break;
2454
2455                 case XC( OC_NEXTFILE ):
2456                         nextfile = TRUE;
2457                 case XC( OC_NEXT ):
2458                         nextrec = TRUE;
2459                 case XC( OC_DONE ):
2460                         clrvar(res);
2461                         break;
2462
2463                 case XC( OC_EXIT ):
2464                         awk_exit(L.d);
2465
2466                 /* -- recursive node type -- */
2467
2468                 case XC( OC_VAR ):
2469                         L.v = op->l.v;
2470                         if (L.v == intvar[NF])
2471                                 split_f0();
2472                         goto v_cont;
2473
2474                 case XC( OC_FNARG ):
2475                         L.v = &fnargs[op->l.i];
2476  v_cont:
2477                         res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2478                         break;
2479
2480                 case XC( OC_IN ):
2481                         setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2482                         break;
2483
2484                 case XC( OC_REGEXP ):
2485                         op1 = op;
2486                         L.s = getvar_s(intvar[F0]);
2487                         goto re_cont;
2488
2489                 case XC( OC_MATCH ):
2490                         op1 = op->r.n;
2491  re_cont:
2492                         X.re = as_regex(op1, &sreg);
2493                         R.i = regexec(X.re, L.s, 0, NULL, 0);
2494                         if (X.re == &sreg)
2495                                 regfree(X.re);
2496                         setvar_i(res, (R.i == 0) ^ (opn == '!'));
2497                         break;
2498
2499                 case XC( OC_MOVE ):
2500                         /* if source is a temporary string, jusk relink it to dest */
2501 //Disabled: if R.v is numeric but happens to have cached R.v->string,
2502 //then L.v ends up being a string, which is wrong
2503 //                      if (R.v == v1+1 && R.v->string) {
2504 //                              res = setvar_p(L.v, R.v->string);
2505 //                              R.v->string = NULL;
2506 //                      } else {
2507                                 res = copyvar(L.v, R.v);
2508 //                      }
2509                         break;
2510
2511                 case XC( OC_TERNARY ):
2512                         if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2513                                 syntax_error(EMSG_POSSIBLE_ERROR);
2514                         res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2515                         break;
2516
2517                 case XC( OC_FUNC ):
2518                         if (!op->r.f->body.first)
2519                                 syntax_error(EMSG_UNDEF_FUNC);
2520
2521                         X.v = R.v = nvalloc(op->r.f->nargs + 1);
2522                         while (op1) {
2523                                 L.v = evaluate(nextarg(&op1), v1);
2524                                 copyvar(R.v, L.v);
2525                                 R.v->type |= VF_CHILD;
2526                                 R.v->x.parent = L.v;
2527                                 if (++R.v - X.v >= op->r.f->nargs)
2528                                         break;
2529                         }
2530
2531                         R.v = fnargs;
2532                         fnargs = X.v;
2533
2534                         L.s = g_progname;
2535                         res = evaluate(op->r.f->body.first, res);
2536                         g_progname = L.s;
2537
2538                         nvfree(fnargs);
2539                         fnargs = R.v;
2540                         break;
2541
2542                 case XC( OC_GETLINE ):
2543                 case XC( OC_PGETLINE ):
2544                         if (op1) {
2545                                 X.rsm = newfile(L.s);
2546                                 if (!X.rsm->F) {
2547                                         if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2548                                                 X.rsm->F = popen(L.s, "r");
2549                                                 X.rsm->is_pipe = TRUE;
2550                                         } else {
2551                                                 X.rsm->F = fopen_for_read(L.s);         /* not xfopen! */
2552                                         }
2553                                 }
2554                         } else {
2555                                 if (!iF)
2556                                         iF = next_input_file();
2557                                 X.rsm = iF;
2558                         }
2559
2560                         if (!X.rsm->F) {
2561                                 setvar_i(intvar[ERRNO], errno);
2562                                 setvar_i(res, -1);
2563                                 break;
2564                         }
2565
2566                         if (!op->r.n)
2567                                 R.v = intvar[F0];
2568
2569                         L.i = awk_getline(X.rsm, R.v);
2570                         if (L.i > 0) {
2571                                 if (!op1) {
2572                                         incvar(intvar[FNR]);
2573                                         incvar(intvar[NR]);
2574                                 }
2575                         }
2576                         setvar_i(res, L.i);
2577                         break;
2578
2579                 /* simple builtins */
2580                 case XC( OC_FBLTIN ):
2581                         switch (opn) {
2582
2583                         case F_in:
2584                                 R.d = (int)L.d;
2585                                 break;
2586
2587                         case F_rn:
2588                                 R.d = (double)rand() / (double)RAND_MAX;
2589                                 break;
2590 #if ENABLE_FEATURE_AWK_LIBM
2591                         case F_co:
2592                                 R.d = cos(L.d);
2593                                 break;
2594
2595                         case F_ex:
2596                                 R.d = exp(L.d);
2597                                 break;
2598
2599                         case F_lg:
2600                                 R.d = log(L.d);
2601                                 break;
2602
2603                         case F_si:
2604                                 R.d = sin(L.d);
2605                                 break;
2606
2607                         case F_sq:
2608                                 R.d = sqrt(L.d);
2609                                 break;
2610 #else
2611                         case F_co:
2612                         case F_ex:
2613                         case F_lg:
2614                         case F_si:
2615                         case F_sq:
2616                                 syntax_error(EMSG_NO_MATH);
2617                                 break;
2618 #endif
2619                         case F_sr:
2620                                 R.d = (double)seed;
2621                                 seed = op1 ? (unsigned)L.d : (unsigned)time(NULL);
2622                                 srand(seed);
2623                                 break;
2624
2625                         case F_ti:
2626                                 R.d = time(NULL);
2627                                 break;
2628
2629                         case F_le:
2630                                 if (!op1)
2631                                         L.s = getvar_s(intvar[F0]);
2632                                 R.d = strlen(L.s);
2633                                 break;
2634
2635                         case F_sy:
2636                                 fflush_all();
2637                                 R.d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2638                                                 ? (system(L.s) >> 8) : 0;
2639                                 break;
2640
2641                         case F_ff:
2642                                 if (!op1)
2643                                         fflush(stdout);
2644                                 else {
2645                                         if (L.s && *L.s) {
2646                                                 X.rsm = newfile(L.s);
2647                                                 fflush(X.rsm->F);
2648                                         } else {
2649                                                 fflush_all();
2650                                         }
2651                                 }
2652                                 break;
2653
2654                         case F_cl:
2655                                 X.rsm = (rstream *)hash_search(fdhash, L.s);
2656                                 if (X.rsm) {
2657                                         R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
2658                                         free(X.rsm->buffer);
2659                                         hash_remove(fdhash, L.s);
2660                                 }
2661                                 if (R.i != 0)
2662                                         setvar_i(intvar[ERRNO], errno);
2663                                 R.d = (double)R.i;
2664                                 break;
2665                         }
2666                         setvar_i(res, R.d);
2667                         break;
2668
2669                 case XC( OC_BUILTIN ):
2670                         res = exec_builtin(op, res);
2671                         break;
2672
2673                 case XC( OC_SPRINTF ):
2674                         setvar_p(res, awk_printf(op1));
2675                         break;
2676
2677                 case XC( OC_UNARY ):
2678                         X.v = R.v;
2679                         L.d = R.d = getvar_i(R.v);
2680                         switch (opn) {
2681                         case 'P':
2682                                 L.d = ++R.d;
2683                                 goto r_op_change;
2684                         case 'p':
2685                                 R.d++;
2686                                 goto r_op_change;
2687                         case 'M':
2688                                 L.d = --R.d;
2689                                 goto r_op_change;
2690                         case 'm':
2691                                 R.d--;
2692                                 goto r_op_change;
2693                         case '!':
2694                                 L.d = !istrue(X.v);
2695                                 break;
2696                         case '-':
2697                                 L.d = -R.d;
2698                                 break;
2699  r_op_change:
2700                                 setvar_i(X.v, R.d);
2701                         }
2702                         setvar_i(res, L.d);
2703                         break;
2704
2705                 case XC( OC_FIELD ):
2706                         R.i = (int)getvar_i(R.v);
2707                         if (R.i == 0) {
2708                                 res = intvar[F0];
2709                         } else {
2710                                 split_f0();
2711                                 if (R.i > nfields)
2712                                         fsrealloc(R.i);
2713                                 res = &Fields[R.i - 1];
2714                         }
2715                         break;
2716
2717                 /* concatenation (" ") and index joining (",") */
2718                 case XC( OC_CONCAT ):
2719                 case XC( OC_COMMA ):
2720                         opn = strlen(L.s) + strlen(R.s) + 2;
2721                         X.s = xmalloc(opn);
2722                         strcpy(X.s, L.s);
2723                         if ((opinfo & OPCLSMASK) == OC_COMMA) {
2724                                 L.s = getvar_s(intvar[SUBSEP]);
2725                                 X.s = xrealloc(X.s, opn + strlen(L.s));
2726                                 strcat(X.s, L.s);
2727                         }
2728                         strcat(X.s, R.s);
2729                         setvar_p(res, X.s);
2730                         break;
2731
2732                 case XC( OC_LAND ):
2733                         setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2734                         break;
2735
2736                 case XC( OC_LOR ):
2737                         setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2738                         break;
2739
2740                 case XC( OC_BINARY ):
2741                 case XC( OC_REPLACE ):
2742                         R.d = getvar_i(R.v);
2743                         switch (opn) {
2744                         case '+':
2745                                 L.d += R.d;
2746                                 break;
2747                         case '-':
2748                                 L.d -= R.d;
2749                                 break;
2750                         case '*':
2751                                 L.d *= R.d;
2752                                 break;
2753                         case '/':
2754                                 if (R.d == 0)
2755                                         syntax_error(EMSG_DIV_BY_ZERO);
2756                                 L.d /= R.d;
2757                                 break;
2758                         case '&':
2759 #if ENABLE_FEATURE_AWK_LIBM
2760                                 L.d = pow(L.d, R.d);
2761 #else
2762                                 syntax_error(EMSG_NO_MATH);
2763 #endif
2764                                 break;
2765                         case '%':
2766                                 if (R.d == 0)
2767                                         syntax_error(EMSG_DIV_BY_ZERO);
2768                                 L.d -= (int)(L.d / R.d) * R.d;
2769                                 break;
2770                         }
2771                         res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2772                         break;
2773
2774                 case XC( OC_COMPARE ):
2775                         if (is_numeric(L.v) && is_numeric(R.v)) {
2776                                 L.d = getvar_i(L.v) - getvar_i(R.v);
2777                         } else {
2778                                 L.s = getvar_s(L.v);
2779                                 R.s = getvar_s(R.v);
2780                                 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2781                         }
2782                         switch (opn & 0xfe) {
2783                         case 0:
2784                                 R.i = (L.d > 0);
2785                                 break;
2786                         case 2:
2787                                 R.i = (L.d >= 0);
2788                                 break;
2789                         case 4:
2790                                 R.i = (L.d == 0);
2791                                 break;
2792                         }
2793                         setvar_i(res, (opn & 1 ? R.i : !R.i) ? 1 : 0);
2794                         break;
2795
2796                 default:
2797                         syntax_error(EMSG_POSSIBLE_ERROR);
2798                 }
2799                 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2800                         op = op->a.n;
2801                 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2802                         break;
2803                 if (nextrec)
2804                         break;
2805         }
2806         nvfree(v1);
2807         return res;
2808 #undef fnargs
2809 #undef seed
2810 #undef sreg
2811 }
2812
2813
2814 /* -------- main & co. -------- */
2815
2816 static int awk_exit(int r)
2817 {
2818         var tv;
2819         unsigned i;
2820         hash_item *hi;
2821
2822         zero_out_var(&tv);
2823
2824         if (!exiting) {
2825                 exiting = TRUE;
2826                 nextrec = FALSE;
2827                 evaluate(endseq.first, &tv);
2828         }
2829
2830         /* waiting for children */
2831         for (i = 0; i < fdhash->csize; i++) {
2832                 hi = fdhash->items[i];
2833                 while (hi) {
2834                         if (hi->data.rs.F && hi->data.rs.is_pipe)
2835                                 pclose(hi->data.rs.F);
2836                         hi = hi->next;
2837                 }
2838         }
2839
2840         exit(r);
2841 }
2842
2843 /* if expr looks like "var=value", perform assignment and return 1,
2844  * otherwise return 0 */
2845 static int is_assignment(const char *expr)
2846 {
2847         char *exprc, *s, *s0, *s1;
2848
2849         exprc = xstrdup(expr);
2850         if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2851                 free(exprc);
2852                 return FALSE;
2853         }
2854
2855         *s++ = '\0';
2856         s0 = s1 = s;
2857         while (*s)
2858                 *s1++ = nextchar(&s);
2859
2860         *s1 = '\0';
2861         setvar_u(newvar(exprc), s0);
2862         free(exprc);
2863         return TRUE;
2864 }
2865
2866 /* switch to next input file */
2867 static rstream *next_input_file(void)
2868 {
2869 #define rsm          (G.next_input_file__rsm)
2870 #define files_happen (G.next_input_file__files_happen)
2871
2872         FILE *F = NULL;
2873         const char *fname, *ind;
2874
2875         if (rsm.F)
2876                 fclose(rsm.F);
2877         rsm.F = NULL;
2878         rsm.pos = rsm.adv = 0;
2879
2880         do {
2881                 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
2882                         if (files_happen)
2883                                 return NULL;
2884                         fname = "-";
2885                         F = stdin;
2886                 } else {
2887                         ind = getvar_s(incvar(intvar[ARGIND]));
2888                         fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
2889                         if (fname && *fname && !is_assignment(fname))
2890                                 F = xfopen_stdin(fname);
2891                 }
2892         } while (!F);
2893
2894         files_happen = TRUE;
2895         setvar_s(intvar[FILENAME], fname);
2896         rsm.F = F;
2897         return &rsm;
2898 #undef rsm
2899 #undef files_happen
2900 }
2901
2902 int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
2903 int awk_main(int argc, char **argv)
2904 {
2905         unsigned opt;
2906         char *opt_F, *opt_W;
2907         llist_t *list_v = NULL;
2908         llist_t *list_f = NULL;
2909         int i, j;
2910         var *v;
2911         var tv;
2912         char **envp;
2913         char *vnames = (char *)vNames; /* cheat */
2914         char *vvalues = (char *)vValues;
2915
2916         INIT_G();
2917
2918         /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
2919          * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
2920         if (ENABLE_LOCALE_SUPPORT)
2921                 setlocale(LC_NUMERIC, "C");
2922
2923         zero_out_var(&tv);
2924
2925         /* allocate global buffer */
2926         g_buf = xmalloc(MAXVARFMT + 1);
2927
2928         vhash = hash_init();
2929         ahash = hash_init();
2930         fdhash = hash_init();
2931         fnhash = hash_init();
2932
2933         /* initialize variables */
2934         for (i = 0; *vnames; i++) {
2935                 intvar[i] = v = newvar(nextword(&vnames));
2936                 if (*vvalues != '\377')
2937                         setvar_s(v, nextword(&vvalues));
2938                 else
2939                         setvar_i(v, 0);
2940
2941                 if (*vnames == '*') {
2942                         v->type |= VF_SPECIAL;
2943                         vnames++;
2944                 }
2945         }
2946
2947         handle_special(intvar[FS]);
2948         handle_special(intvar[RS]);
2949
2950         newfile("/dev/stdin")->F = stdin;
2951         newfile("/dev/stdout")->F = stdout;
2952         newfile("/dev/stderr")->F = stderr;
2953
2954         /* Huh, people report that sometimes environ is NULL. Oh well. */
2955         if (environ) for (envp = environ; *envp; envp++) {
2956                 /* environ is writable, thus we don't strdup it needlessly */
2957                 char *s = *envp;
2958                 char *s1 = strchr(s, '=');
2959                 if (s1) {
2960                         *s1 = '\0';
2961                         /* Both findvar and setvar_u take const char*
2962                          * as 2nd arg -> environment is not trashed */
2963                         setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
2964                         *s1 = '=';
2965                 }
2966         }
2967         opt_complementary = "v::f::"; /* -v and -f can occur multiple times */
2968         opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, &opt_W);
2969         argv += optind;
2970         argc -= optind;
2971         if (opt & 0x1)
2972                 setvar_s(intvar[FS], opt_F); // -F
2973         while (list_v) { /* -v */
2974                 if (!is_assignment(llist_pop(&list_v)))
2975                         bb_show_usage();
2976         }
2977         if (list_f) { /* -f */
2978                 do {
2979                         char *s = NULL;
2980                         FILE *from_file;
2981
2982                         g_progname = llist_pop(&list_f);
2983                         from_file = xfopen_stdin(g_progname);
2984                         /* one byte is reserved for some trick in next_token */
2985                         for (i = j = 1; j > 0; i += j) {
2986                                 s = xrealloc(s, i + 4096);
2987                                 j = fread(s + i, 1, 4094, from_file);
2988                         }
2989                         s[i] = '\0';
2990                         fclose(from_file);
2991                         parse_program(s + 1);
2992                         free(s);
2993                 } while (list_f);
2994                 argc++;
2995         } else { // no -f: take program from 1st parameter
2996                 if (!argc)
2997                         bb_show_usage();
2998                 g_progname = "cmd. line";
2999                 parse_program(*argv++);
3000         }
3001         if (opt & 0x8) // -W
3002                 bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
3003
3004         /* fill in ARGV array */
3005         setvar_i(intvar[ARGC], argc);
3006         setari_u(intvar[ARGV], 0, "awk");
3007         i = 0;
3008         while (*argv)
3009                 setari_u(intvar[ARGV], ++i, *argv++);
3010
3011         evaluate(beginseq.first, &tv);
3012         if (!mainseq.first && !endseq.first)
3013                 awk_exit(EXIT_SUCCESS);
3014
3015         /* input file could already be opened in BEGIN block */
3016         if (!iF)
3017                 iF = next_input_file();
3018
3019         /* passing through input files */
3020         while (iF) {
3021                 nextfile = FALSE;
3022                 setvar_i(intvar[FNR], 0);
3023
3024                 while ((i = awk_getline(iF, intvar[F0])) > 0) {
3025                         nextrec = FALSE;
3026                         incvar(intvar[NR]);
3027                         incvar(intvar[FNR]);
3028                         evaluate(mainseq.first, &tv);
3029
3030                         if (nextfile)
3031                                 break;
3032                 }
3033
3034                 if (i < 0)
3035                         syntax_error(strerror(errno));
3036
3037                 iF = next_input_file();
3038         }
3039
3040         awk_exit(EXIT_SUCCESS);
3041         /*return 0;*/
3042 }