Consolidate ARRAY_SIZE macro; remove one unneeded global var (walter harms <wharms...
[oweals/busybox.git] / editors / awk.c
1 /* vi: set sw=4 ts=4: */
2 /*
3  * awk implementation for busybox
4  *
5  * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
6  *
7  * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
8  */
9
10 #include "libbb.h"
11 #include "xregex.h"
12 #include <math.h>
13 extern char **environ;
14
15 /* This is a NOEXEC applet. Be very careful! */
16
17
18 #define MAXVARFMT       240
19 #define MINNVBLOCK      64
20
21 /* variable flags */
22 #define VF_NUMBER       0x0001  /* 1 = primary type is number */
23 #define VF_ARRAY        0x0002  /* 1 = it's an array */
24
25 #define VF_CACHED       0x0100  /* 1 = num/str value has cached str/num eq */
26 #define VF_USER         0x0200  /* 1 = user input (may be numeric string) */
27 #define VF_SPECIAL      0x0400  /* 1 = requires extra handling when changed */
28 #define VF_WALK         0x0800  /* 1 = variable has alloc'd x.walker list */
29 #define VF_FSTR         0x1000  /* 1 = var::string points to fstring buffer */
30 #define VF_CHILD        0x2000  /* 1 = function arg; x.parent points to source */
31 #define VF_DIRTY        0x4000  /* 1 = variable was set explicitly */
32
33 /* these flags are static, don't change them when value is changed */
34 #define VF_DONTTOUCH    (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
35
36 /* Variable */
37 typedef struct var_s {
38         unsigned type;            /* flags */
39         double number;
40         char *string;
41         union {
42                 int aidx;               /* func arg idx (for compilation stage) */
43                 struct xhash_s *array;  /* array ptr */
44                 struct var_s *parent;   /* for func args, ptr to actual parameter */
45                 char **walker;          /* list of array elements (for..in) */
46         } x;
47 } var;
48
49 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
50 typedef struct chain_s {
51         struct node_s *first;
52         struct node_s *last;
53         const char *programname;
54 } chain;
55
56 /* Function */
57 typedef struct func_s {
58         unsigned nargs;
59         struct chain_s body;
60 } func;
61
62 /* I/O stream */
63 typedef struct rstream_s {
64         FILE *F;
65         char *buffer;
66         int adv;
67         int size;
68         int pos;
69         smallint is_pipe;
70 } rstream;
71
72 typedef struct hash_item_s {
73         union {
74                 struct var_s v;         /* variable/array hash */
75                 struct rstream_s rs;    /* redirect streams hash */
76                 struct func_s f;        /* functions hash */
77         } data;
78         struct hash_item_s *next;       /* next in chain */
79         char name[1];                   /* really it's longer */
80 } hash_item;
81
82 typedef struct xhash_s {
83         unsigned nel;           /* num of elements */
84         unsigned csize;         /* current hash size */
85         unsigned nprime;        /* next hash size in PRIMES[] */
86         unsigned glen;          /* summary length of item names */
87         struct hash_item_s **items;
88 } xhash;
89
90 /* Tree node */
91 typedef struct node_s {
92         uint32_t info;
93         unsigned lineno;
94         union {
95                 struct node_s *n;
96                 var *v;
97                 int i;
98                 char *s;
99                 regex_t *re;
100         } l;
101         union {
102                 struct node_s *n;
103                 regex_t *ire;
104                 func *f;
105                 int argno;
106         } r;
107         union {
108                 struct node_s *n;
109         } a;
110 } node;
111
112 /* Block of temporary variables */
113 typedef struct nvblock_s {
114         int size;
115         var *pos;
116         struct nvblock_s *prev;
117         struct nvblock_s *next;
118         var nv[0];
119 } nvblock;
120
121 typedef struct tsplitter_s {
122         node n;
123         regex_t re[2];
124 } tsplitter;
125
126 /* simple token classes */
127 /* Order and hex values are very important!!!  See next_token() */
128 #define TC_SEQSTART      1                              /* ( */
129 #define TC_SEQTERM      (1 << 1)                /* ) */
130 #define TC_REGEXP       (1 << 2)                /* /.../ */
131 #define TC_OUTRDR       (1 << 3)                /* | > >> */
132 #define TC_UOPPOST      (1 << 4)                /* unary postfix operator */
133 #define TC_UOPPRE1      (1 << 5)                /* unary prefix operator */
134 #define TC_BINOPX       (1 << 6)                /* two-opnd operator */
135 #define TC_IN           (1 << 7)
136 #define TC_COMMA        (1 << 8)
137 #define TC_PIPE         (1 << 9)                /* input redirection pipe */
138 #define TC_UOPPRE2      (1 << 10)               /* unary prefix operator */
139 #define TC_ARRTERM      (1 << 11)               /* ] */
140 #define TC_GRPSTART     (1 << 12)               /* { */
141 #define TC_GRPTERM      (1 << 13)               /* } */
142 #define TC_SEMICOL      (1 << 14)
143 #define TC_NEWLINE      (1 << 15)
144 #define TC_STATX        (1 << 16)               /* ctl statement (for, next...) */
145 #define TC_WHILE        (1 << 17)
146 #define TC_ELSE         (1 << 18)
147 #define TC_BUILTIN      (1 << 19)
148 #define TC_GETLINE      (1 << 20)
149 #define TC_FUNCDECL     (1 << 21)               /* `function' `func' */
150 #define TC_BEGIN        (1 << 22)
151 #define TC_END          (1 << 23)
152 #define TC_EOF          (1 << 24)
153 #define TC_VARIABLE     (1 << 25)
154 #define TC_ARRAY        (1 << 26)
155 #define TC_FUNCTION     (1 << 27)
156 #define TC_STRING       (1 << 28)
157 #define TC_NUMBER       (1 << 29)
158
159 #define TC_UOPPRE  (TC_UOPPRE1 | TC_UOPPRE2)
160
161 /* combined token classes */
162 #define TC_BINOP   (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
163 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
164 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
165                    | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
166
167 #define TC_STATEMNT (TC_STATX | TC_WHILE)
168 #define TC_OPTERM  (TC_SEMICOL | TC_NEWLINE)
169
170 /* word tokens, cannot mean something else if not expected */
171 #define TC_WORD    (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
172                    | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
173
174 /* discard newlines after these */
175 #define TC_NOTERM  (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
176                    | TC_BINOP | TC_OPTERM)
177
178 /* what can expression begin with */
179 #define TC_OPSEQ   (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
180 /* what can group begin with */
181 #define TC_GRPSEQ  (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
182
183 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
184 /* operator is inserted between them */
185 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
186                    | TC_STRING | TC_NUMBER | TC_UOPPOST)
187 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
188
189 #define OF_RES1    0x010000
190 #define OF_RES2    0x020000
191 #define OF_STR1    0x040000
192 #define OF_STR2    0x080000
193 #define OF_NUM1    0x100000
194 #define OF_CHECKED 0x200000
195
196 /* combined operator flags */
197 #define xx      0
198 #define xV      OF_RES2
199 #define xS      (OF_RES2 | OF_STR2)
200 #define Vx      OF_RES1
201 #define VV      (OF_RES1 | OF_RES2)
202 #define Nx      (OF_RES1 | OF_NUM1)
203 #define NV      (OF_RES1 | OF_NUM1 | OF_RES2)
204 #define Sx      (OF_RES1 | OF_STR1)
205 #define SV      (OF_RES1 | OF_STR1 | OF_RES2)
206 #define SS      (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
207
208 #define OPCLSMASK 0xFF00
209 #define OPNMASK   0x007F
210
211 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
212  * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
213  * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
214  */
215 #define P(x)      (x << 24)
216 #define PRIMASK   0x7F000000
217 #define PRIMASK2  0x7E000000
218
219 /* Operation classes */
220
221 #define SHIFT_TIL_THIS  0x0600
222 #define RECUR_FROM_THIS 0x1000
223
224 enum {
225         OC_DELETE = 0x0100,     OC_EXEC = 0x0200,       OC_NEWSOURCE = 0x0300,
226         OC_PRINT = 0x0400,      OC_PRINTF = 0x0500,     OC_WALKINIT = 0x0600,
227
228         OC_BR = 0x0700,         OC_BREAK = 0x0800,      OC_CONTINUE = 0x0900,
229         OC_EXIT = 0x0a00,       OC_NEXT = 0x0b00,       OC_NEXTFILE = 0x0c00,
230         OC_TEST = 0x0d00,       OC_WALKNEXT = 0x0e00,
231
232         OC_BINARY = 0x1000,     OC_BUILTIN = 0x1100,    OC_COLON = 0x1200,
233         OC_COMMA = 0x1300,      OC_COMPARE = 0x1400,    OC_CONCAT = 0x1500,
234         OC_FBLTIN = 0x1600,     OC_FIELD = 0x1700,      OC_FNARG = 0x1800,
235         OC_FUNC = 0x1900,       OC_GETLINE = 0x1a00,    OC_IN = 0x1b00,
236         OC_LAND = 0x1c00,       OC_LOR = 0x1d00,        OC_MATCH = 0x1e00,
237         OC_MOVE = 0x1f00,       OC_PGETLINE = 0x2000,   OC_REGEXP = 0x2100,
238         OC_REPLACE = 0x2200,    OC_RETURN = 0x2300,     OC_SPRINTF = 0x2400,
239         OC_TERNARY = 0x2500,    OC_UNARY = 0x2600,      OC_VAR = 0x2700,
240         OC_DONE = 0x2800,
241
242         ST_IF = 0x3000,         ST_DO = 0x3100,         ST_FOR = 0x3200,
243         ST_WHILE = 0x3300
244 };
245
246 /* simple builtins */
247 enum {
248         F_in,   F_rn,   F_co,   F_ex,   F_lg,   F_si,   F_sq,   F_sr,
249         F_ti,   F_le,   F_sy,   F_ff,   F_cl
250 };
251
252 /* builtins */
253 enum {
254         B_a2,   B_ix,   B_ma,   B_sp,   B_ss,   B_ti,   B_lo,   B_up,
255         B_ge,   B_gs,   B_su,
256         B_an,   B_co,   B_ls,   B_or,   B_rs,   B_xo,
257 };
258
259 /* tokens and their corresponding info values */
260
261 #define NTC     "\377"  /* switch to next token class (tc<<1) */
262 #define NTCC    '\377'
263
264 #define OC_B    OC_BUILTIN
265
266 static const char tokenlist[] =
267         "\1("       NTC
268         "\1)"       NTC
269         "\1/"       NTC                                 /* REGEXP */
270         "\2>>"      "\1>"       "\1|"       NTC         /* OUTRDR */
271         "\2++"      "\2--"      NTC                     /* UOPPOST */
272         "\2++"      "\2--"      "\1$"       NTC         /* UOPPRE1 */
273         "\2=="      "\1="       "\2+="      "\2-="      /* BINOPX */
274         "\2*="      "\2/="      "\2%="      "\2^="
275         "\1+"       "\1-"       "\3**="     "\2**"
276         "\1/"       "\1%"       "\1^"       "\1*"
277         "\2!="      "\2>="      "\2<="      "\1>"
278         "\1<"       "\2!~"      "\1~"       "\2&&"
279         "\2||"      "\1?"       "\1:"       NTC
280         "\2in"      NTC
281         "\1,"       NTC
282         "\1|"       NTC
283         "\1+"       "\1-"       "\1!"       NTC         /* UOPPRE2 */
284         "\1]"       NTC
285         "\1{"       NTC
286         "\1}"       NTC
287         "\1;"       NTC
288         "\1\n"      NTC
289         "\2if"      "\2do"      "\3for"     "\5break"   /* STATX */
290         "\10continue"           "\6delete"  "\5print"
291         "\6printf"  "\4next"    "\10nextfile"
292         "\6return"  "\4exit"    NTC
293         "\5while"   NTC
294         "\4else"    NTC
295
296         "\3and"     "\5compl"   "\6lshift"  "\2or"
297         "\6rshift"  "\3xor"
298         "\5close"   "\6system"  "\6fflush"  "\5atan2"   /* BUILTIN */
299         "\3cos"     "\3exp"     "\3int"     "\3log"
300         "\4rand"    "\3sin"     "\4sqrt"    "\5srand"
301         "\6gensub"  "\4gsub"    "\5index"   "\6length"
302         "\5match"   "\5split"   "\7sprintf" "\3sub"
303         "\6substr"  "\7systime" "\10strftime"
304         "\7tolower" "\7toupper" NTC
305         "\7getline" NTC
306         "\4func"    "\10function"   NTC
307         "\5BEGIN"   NTC
308         "\3END"     "\0"
309         ;
310
311 static const uint32_t tokeninfo[] = {
312         0,
313         0,
314         OC_REGEXP,
315         xS|'a',     xS|'w',     xS|'|',
316         OC_UNARY|xV|P(9)|'p',       OC_UNARY|xV|P(9)|'m',
317         OC_UNARY|xV|P(9)|'P',       OC_UNARY|xV|P(9)|'M',
318             OC_FIELD|xV|P(5),
319         OC_COMPARE|VV|P(39)|5,      OC_MOVE|VV|P(74),
320             OC_REPLACE|NV|P(74)|'+',    OC_REPLACE|NV|P(74)|'-',
321         OC_REPLACE|NV|P(74)|'*',    OC_REPLACE|NV|P(74)|'/',
322             OC_REPLACE|NV|P(74)|'%',    OC_REPLACE|NV|P(74)|'&',
323         OC_BINARY|NV|P(29)|'+',     OC_BINARY|NV|P(29)|'-',
324             OC_REPLACE|NV|P(74)|'&',    OC_BINARY|NV|P(15)|'&',
325         OC_BINARY|NV|P(25)|'/',     OC_BINARY|NV|P(25)|'%',
326             OC_BINARY|NV|P(15)|'&',     OC_BINARY|NV|P(25)|'*',
327         OC_COMPARE|VV|P(39)|4,      OC_COMPARE|VV|P(39)|3,
328             OC_COMPARE|VV|P(39)|0,      OC_COMPARE|VV|P(39)|1,
329         OC_COMPARE|VV|P(39)|2,      OC_MATCH|Sx|P(45)|'!',
330             OC_MATCH|Sx|P(45)|'~',      OC_LAND|Vx|P(55),
331         OC_LOR|Vx|P(59),            OC_TERNARY|Vx|P(64)|'?',
332             OC_COLON|xx|P(67)|':',
333         OC_IN|SV|P(49),
334         OC_COMMA|SS|P(80),
335         OC_PGETLINE|SV|P(37),
336         OC_UNARY|xV|P(19)|'+',      OC_UNARY|xV|P(19)|'-',
337             OC_UNARY|xV|P(19)|'!',
338         0,
339         0,
340         0,
341         0,
342         0,
343         ST_IF,          ST_DO,          ST_FOR,         OC_BREAK,
344         OC_CONTINUE,                    OC_DELETE|Vx,   OC_PRINT,
345         OC_PRINTF,      OC_NEXT,        OC_NEXTFILE,
346         OC_RETURN|Vx,   OC_EXIT|Nx,
347         ST_WHILE,
348         0,
349
350         OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
351         OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
352         OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
353         OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
354         OC_FBLTIN|F_rn,    OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
355         OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
356         OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF,        OC_B|B_su|P(0xb6),
357         OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti,    OC_B|B_ti|P(0x0b),
358         OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
359         OC_GETLINE|SV|P(0),
360         0,      0,
361         0,
362         0
363 };
364
365 /* internal variable names and their initial values       */
366 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
367 enum {
368         CONVFMT,    OFMT,       FS,         OFS,
369         ORS,        RS,         RT,         FILENAME,
370         SUBSEP,     ARGIND,     ARGC,       ARGV,
371         ERRNO,      FNR,
372         NR,         NF,         IGNORECASE,
373         ENVIRON,    F0,         NUM_INTERNAL_VARS
374 };
375
376 static const char vNames[] =
377         "CONVFMT\0" "OFMT\0"    "FS\0*"     "OFS\0"
378         "ORS\0"     "RS\0*"     "RT\0"      "FILENAME\0"
379         "SUBSEP\0"  "ARGIND\0"  "ARGC\0"    "ARGV\0"
380         "ERRNO\0"   "FNR\0"
381         "NR\0"      "NF\0*"     "IGNORECASE\0*"
382         "ENVIRON\0" "$\0*"      "\0";
383
384 static const char vValues[] =
385         "%.6g\0"    "%.6g\0"    " \0"       " \0"
386         "\n\0"      "\n\0"      "\0"        "\0"
387         "\034\0"
388         "\377";
389
390 /* hash size may grow to these values */
391 #define FIRST_PRIME 61;
392 static const unsigned PRIMES[] = { 251, 1021, 4093, 16381, 65521 };
393
394
395
396 /* Globals. Split in two parts so that first one is addressed
397  * with (mostly short) negative offsets */
398 struct globals {
399         chain beginseq, mainseq, endseq, *seq;
400         node *break_ptr, *continue_ptr;
401         rstream *iF;
402         xhash *vhash, *ahash, *fdhash, *fnhash;
403         const char *g_progname;
404         int g_lineno;
405         int nfields;
406         int maxfields; /* used in fsrealloc() only */
407         var *Fields;
408         nvblock *g_cb;
409         char *g_pos;
410         char *g_buf;
411         smallint icase;
412         smallint exiting;
413         smallint nextrec;
414         smallint nextfile;
415         smallint is_f0_split;
416 };
417 struct globals2 {
418         uint32_t t_info; /* often used */
419         uint32_t t_tclass;
420         char *t_string;
421         int t_lineno;
422         int t_rollback;
423
424         var *intvar[NUM_INTERNAL_VARS]; /* often used */
425
426         /* former statics from various functions */
427         char *split_f0__fstrings;
428
429         uint32_t next_token__save_tclass;
430         uint32_t next_token__save_info;
431         uint32_t next_token__ltclass;
432         smallint next_token__concat_inserted;
433
434         smallint next_input_file__files_happen;
435         rstream next_input_file__rsm;
436
437         var *evaluate__fnargs;
438         unsigned evaluate__seed;
439         regex_t evaluate__sreg;
440
441         var ptest__v;
442
443         tsplitter exec_builtin__tspl;
444
445         /* biggest and least used members go last */
446         double t_double;
447         tsplitter fsplitter, rsplitter;
448 };
449 #define G1 (ptr_to_globals[-1])
450 #define G (*(struct globals2 *const)ptr_to_globals)
451 /* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
452 /* char G1size[sizeof(G1)]; - 0x6c */
453 /* char Gsize[sizeof(G)]; - 0x1cc */
454 /* Trying to keep most of members accessible with short offsets: */
455 /* char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
456 #define beginseq     (G1.beginseq    )
457 #define mainseq      (G1.mainseq     )
458 #define endseq       (G1.endseq      )
459 #define seq          (G1.seq         )
460 #define break_ptr    (G1.break_ptr   )
461 #define continue_ptr (G1.continue_ptr)
462 #define iF           (G1.iF          )
463 #define vhash        (G1.vhash       )
464 #define ahash        (G1.ahash       )
465 #define fdhash       (G1.fdhash      )
466 #define fnhash       (G1.fnhash      )
467 #define g_progname   (G1.g_progname  )
468 #define g_lineno     (G1.g_lineno    )
469 #define nfields      (G1.nfields     )
470 #define maxfields    (G1.maxfields   )
471 #define Fields       (G1.Fields      )
472 #define g_cb         (G1.g_cb        )
473 #define g_pos        (G1.g_pos       )
474 #define g_buf        (G1.g_buf       )
475 #define icase        (G1.icase       )
476 #define exiting      (G1.exiting     )
477 #define nextrec      (G1.nextrec     )
478 #define nextfile     (G1.nextfile    )
479 #define is_f0_split  (G1.is_f0_split )
480 #define t_info       (G.t_info      )
481 #define t_tclass     (G.t_tclass    )
482 #define t_string     (G.t_string    )
483 #define t_double     (G.t_double    )
484 #define t_lineno     (G.t_lineno    )
485 #define t_rollback   (G.t_rollback  )
486 #define intvar       (G.intvar      )
487 #define fsplitter    (G.fsplitter   )
488 #define rsplitter    (G.rsplitter   )
489 #define INIT_G() do { \
490         PTR_TO_GLOBALS = xzalloc(sizeof(G1) + sizeof(G)) + sizeof(G1); \
491         G.next_token__ltclass = TC_OPTERM; \
492         G.evaluate__seed = 1; \
493 } while (0)
494
495
496 /* function prototypes */
497 static void handle_special(var *);
498 static node *parse_expr(uint32_t);
499 static void chain_group(void);
500 static var *evaluate(node *, var *);
501 static rstream *next_input_file(void);
502 static int fmt_num(char *, int, const char *, double, int);
503 static int awk_exit(int) ATTRIBUTE_NORETURN;
504
505 /* ---- error handling ---- */
506
507 static const char EMSG_INTERNAL_ERROR[] = "Internal error";
508 static const char EMSG_UNEXP_EOS[] = "Unexpected end of string";
509 static const char EMSG_UNEXP_TOKEN[] = "Unexpected token";
510 static const char EMSG_DIV_BY_ZERO[] = "Division by zero";
511 static const char EMSG_INV_FMT[] = "Invalid format specifier";
512 static const char EMSG_TOO_FEW_ARGS[] = "Too few arguments for builtin";
513 static const char EMSG_NOT_ARRAY[] = "Not an array";
514 static const char EMSG_POSSIBLE_ERROR[] = "Possible syntax error";
515 static const char EMSG_UNDEF_FUNC[] = "Call to undefined function";
516 #if !ENABLE_FEATURE_AWK_MATH
517 static const char EMSG_NO_MATH[] = "Math support is not compiled in";
518 #endif
519
520 static void zero_out_var(var * vp)
521 {
522         memset(vp, 0, sizeof(*vp));
523 }
524
525 static void syntax_error(const char * const message) ATTRIBUTE_NORETURN;
526 static void syntax_error(const char * const message)
527 {
528         bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
529 }
530
531 /* ---- hash stuff ---- */
532
533 static unsigned hashidx(const char *name)
534 {
535         unsigned idx = 0;
536
537         while (*name) idx = *name++ + (idx << 6) - idx;
538         return idx;
539 }
540
541 /* create new hash */
542 static xhash *hash_init(void)
543 {
544         xhash *newhash;
545
546         newhash = xzalloc(sizeof(xhash));
547         newhash->csize = FIRST_PRIME;
548         newhash->items = xzalloc(newhash->csize * sizeof(hash_item *));
549
550         return newhash;
551 }
552
553 /* find item in hash, return ptr to data, NULL if not found */
554 static void *hash_search(xhash *hash, const char *name)
555 {
556         hash_item *hi;
557
558         hi = hash->items [ hashidx(name) % hash->csize ];
559         while (hi) {
560                 if (strcmp(hi->name, name) == 0)
561                         return &(hi->data);
562                 hi = hi->next;
563         }
564         return NULL;
565 }
566
567 /* grow hash if it becomes too big */
568 static void hash_rebuild(xhash *hash)
569 {
570         unsigned newsize, i, idx;
571         hash_item **newitems, *hi, *thi;
572
573         if (hash->nprime == ARRAY_SIZE(PRIMES))
574                 return;
575
576         newsize = PRIMES[hash->nprime++];
577         newitems = xzalloc(newsize * sizeof(hash_item *));
578
579         for (i = 0; i < hash->csize; i++) {
580                 hi = hash->items[i];
581                 while (hi) {
582                         thi = hi;
583                         hi = thi->next;
584                         idx = hashidx(thi->name) % newsize;
585                         thi->next = newitems[idx];
586                         newitems[idx] = thi;
587                 }
588         }
589
590         free(hash->items);
591         hash->csize = newsize;
592         hash->items = newitems;
593 }
594
595 /* find item in hash, add it if necessary. Return ptr to data */
596 static void *hash_find(xhash *hash, const char *name)
597 {
598         hash_item *hi;
599         unsigned idx;
600         int l;
601
602         hi = hash_search(hash, name);
603         if (! hi) {
604                 if (++hash->nel / hash->csize > 10)
605                         hash_rebuild(hash);
606
607                 l = strlen(name) + 1;
608                 hi = xzalloc(sizeof(hash_item) + l);
609                 memcpy(hi->name, name, l);
610
611                 idx = hashidx(name) % hash->csize;
612                 hi->next = hash->items[idx];
613                 hash->items[idx] = hi;
614                 hash->glen += l;
615         }
616         return &(hi->data);
617 }
618
619 #define findvar(hash, name) ((var*)    hash_find((hash), (name)))
620 #define newvar(name)        ((var*)    hash_find(vhash, (name)))
621 #define newfile(name)       ((rstream*)hash_find(fdhash, (name)))
622 #define newfunc(name)       ((func*)   hash_find(fnhash, (name)))
623
624 static void hash_remove(xhash *hash, const char *name)
625 {
626         hash_item *hi, **phi;
627
628         phi = &(hash->items[hashidx(name) % hash->csize]);
629         while (*phi) {
630                 hi = *phi;
631                 if (strcmp(hi->name, name) == 0) {
632                         hash->glen -= (strlen(name) + 1);
633                         hash->nel--;
634                         *phi = hi->next;
635                         free(hi);
636                         break;
637                 }
638                 phi = &(hi->next);
639         }
640 }
641
642 /* ------ some useful functions ------ */
643
644 static void skip_spaces(char **s)
645 {
646         char *p = *s;
647
648         while (1) {
649                 if (*p == '\\' && p[1] == '\n') {
650                         p++;
651                         t_lineno++;
652                 } else if (*p != ' ' && *p != '\t') {
653                         break;
654                 }
655                 p++;
656         }
657         *s = p;
658 }
659
660 static char *nextword(char **s)
661 {
662         char *p = *s;
663
664         while (*(*s)++) /* */;
665
666         return p;
667 }
668
669 static char nextchar(char **s)
670 {
671         char c, *pps;
672
673         c = *((*s)++);
674         pps = *s;
675         if (c == '\\') c = bb_process_escape_sequence((const char**)s);
676         if (c == '\\' && *s == pps) c = *((*s)++);
677         return c;
678 }
679
680 static int ALWAYS_INLINE isalnum_(int c)
681 {
682         return (isalnum(c) || c == '_');
683 }
684
685 static FILE *afopen(const char *path, const char *mode)
686 {
687         return (*path == '-' && *(path+1) == '\0') ? stdin : xfopen(path, mode);
688 }
689
690 /* -------- working with variables (set/get/copy/etc) -------- */
691
692 static xhash *iamarray(var *v)
693 {
694         var *a = v;
695
696         while (a->type & VF_CHILD)
697                 a = a->x.parent;
698
699         if (!(a->type & VF_ARRAY)) {
700                 a->type |= VF_ARRAY;
701                 a->x.array = hash_init();
702         }
703         return a->x.array;
704 }
705
706 static void clear_array(xhash *array)
707 {
708         unsigned i;
709         hash_item *hi, *thi;
710
711         for (i = 0; i < array->csize; i++) {
712                 hi = array->items[i];
713                 while (hi) {
714                         thi = hi;
715                         hi = hi->next;
716                         free(thi->data.v.string);
717                         free(thi);
718                 }
719                 array->items[i] = NULL;
720         }
721         array->glen = array->nel = 0;
722 }
723
724 /* clear a variable */
725 static var *clrvar(var *v)
726 {
727         if (!(v->type & VF_FSTR))
728                 free(v->string);
729
730         v->type &= VF_DONTTOUCH;
731         v->type |= VF_DIRTY;
732         v->string = NULL;
733         return v;
734 }
735
736 /* assign string value to variable */
737 static var *setvar_p(var *v, char *value)
738 {
739         clrvar(v);
740         v->string = value;
741         handle_special(v);
742         return v;
743 }
744
745 /* same as setvar_p but make a copy of string */
746 static var *setvar_s(var *v, const char *value)
747 {
748         return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
749 }
750
751 /* same as setvar_s but set USER flag */
752 static var *setvar_u(var *v, const char *value)
753 {
754         setvar_s(v, value);
755         v->type |= VF_USER;
756         return v;
757 }
758
759 /* set array element to user string */
760 static void setari_u(var *a, int idx, const char *s)
761 {
762         char sidx[sizeof(int)*3 + 1];
763         var *v;
764
765         sprintf(sidx, "%d", idx);
766         v = findvar(iamarray(a), sidx);
767         setvar_u(v, s);
768 }
769
770 /* assign numeric value to variable */
771 static var *setvar_i(var *v, double value)
772 {
773         clrvar(v);
774         v->type |= VF_NUMBER;
775         v->number = value;
776         handle_special(v);
777         return v;
778 }
779
780 static const char *getvar_s(var *v)
781 {
782         /* if v is numeric and has no cached string, convert it to string */
783         if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
784                 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
785                 v->string = xstrdup(g_buf);
786                 v->type |= VF_CACHED;
787         }
788         return (v->string == NULL) ? "" : v->string;
789 }
790
791 static double getvar_i(var *v)
792 {
793         char *s;
794
795         if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
796                 v->number = 0;
797                 s = v->string;
798                 if (s && *s) {
799                         v->number = strtod(s, &s);
800                         if (v->type & VF_USER) {
801                                 skip_spaces(&s);
802                                 if (*s != '\0')
803                                         v->type &= ~VF_USER;
804                         }
805                 } else {
806                         v->type &= ~VF_USER;
807                 }
808                 v->type |= VF_CACHED;
809         }
810         return v->number;
811 }
812
813 static var *copyvar(var *dest, const var *src)
814 {
815         if (dest != src) {
816                 clrvar(dest);
817                 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
818                 dest->number = src->number;
819                 if (src->string)
820                         dest->string = xstrdup(src->string);
821         }
822         handle_special(dest);
823         return dest;
824 }
825
826 static var *incvar(var *v)
827 {
828         return setvar_i(v, getvar_i(v)+1.);
829 }
830
831 /* return true if v is number or numeric string */
832 static int is_numeric(var *v)
833 {
834         getvar_i(v);
835         return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
836 }
837
838 /* return 1 when value of v corresponds to true, 0 otherwise */
839 static int istrue(var *v)
840 {
841         if (is_numeric(v))
842                 return (v->number == 0) ? 0 : 1;
843         return (v->string && *(v->string)) ? 1 : 0;
844 }
845
846 /* temporary variables allocator. Last allocated should be first freed */
847 static var *nvalloc(int n)
848 {
849         nvblock *pb = NULL;
850         var *v, *r;
851         int size;
852
853         while (g_cb) {
854                 pb = g_cb;
855                 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size) break;
856                 g_cb = g_cb->next;
857         }
858
859         if (!g_cb) {
860                 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
861                 g_cb = xmalloc(sizeof(nvblock) + size * sizeof(var));
862                 g_cb->size = size;
863                 g_cb->pos = g_cb->nv;
864                 g_cb->prev = pb;
865                 g_cb->next = NULL;
866                 if (pb) pb->next = g_cb;
867         }
868
869         v = r = g_cb->pos;
870         g_cb->pos += n;
871
872         while (v < g_cb->pos) {
873                 v->type = 0;
874                 v->string = NULL;
875                 v++;
876         }
877
878         return r;
879 }
880
881 static void nvfree(var *v)
882 {
883         var *p;
884
885         if (v < g_cb->nv || v >= g_cb->pos)
886                 syntax_error(EMSG_INTERNAL_ERROR);
887
888         for (p = v; p < g_cb->pos; p++) {
889                 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
890                         clear_array(iamarray(p));
891                         free(p->x.array->items);
892                         free(p->x.array);
893                 }
894                 if (p->type & VF_WALK)
895                         free(p->x.walker);
896
897                 clrvar(p);
898         }
899
900         g_cb->pos = v;
901         while (g_cb->prev && g_cb->pos == g_cb->nv) {
902                 g_cb = g_cb->prev;
903         }
904 }
905
906 /* ------- awk program text parsing ------- */
907
908 /* Parse next token pointed by global pos, place results into global ttt.
909  * If token isn't expected, give away. Return token class
910  */
911 static uint32_t next_token(uint32_t expected)
912 {
913 #define concat_inserted (G.next_token__concat_inserted)
914 #define save_tclass     (G.next_token__save_tclass)
915 #define save_info       (G.next_token__save_info)
916 /* Initialized to TC_OPTERM: */
917 #define ltclass         (G.next_token__ltclass)
918
919         char *p, *pp, *s;
920         const char *tl;
921         uint32_t tc;
922         const uint32_t *ti;
923         int l;
924
925         if (t_rollback) {
926                 t_rollback = FALSE;
927
928         } else if (concat_inserted) {
929                 concat_inserted = FALSE;
930                 t_tclass = save_tclass;
931                 t_info = save_info;
932
933         } else {
934                 p = g_pos;
935  readnext:
936                 skip_spaces(&p);
937                 g_lineno = t_lineno;
938                 if (*p == '#')
939                         while (*p != '\n' && *p != '\0')
940                                 p++;
941
942                 if (*p == '\n')
943                         t_lineno++;
944
945                 if (*p == '\0') {
946                         tc = TC_EOF;
947
948                 } else if (*p == '\"') {
949                         /* it's a string */
950                         t_string = s = ++p;
951                         while (*p != '\"') {
952                                 if (*p == '\0' || *p == '\n')
953                                         syntax_error(EMSG_UNEXP_EOS);
954                                 *(s++) = nextchar(&p);
955                         }
956                         p++;
957                         *s = '\0';
958                         tc = TC_STRING;
959
960                 } else if ((expected & TC_REGEXP) && *p == '/') {
961                         /* it's regexp */
962                         t_string = s = ++p;
963                         while (*p != '/') {
964                                 if (*p == '\0' || *p == '\n')
965                                         syntax_error(EMSG_UNEXP_EOS);
966                                 *s = *p++;
967                                 if (*s++ == '\\') {
968                                         pp = p;
969                                         *(s-1) = bb_process_escape_sequence((const char **)&p);
970                                         if (*pp == '\\')
971                                                 *s++ = '\\';
972                                         if (p == pp)
973                                                 *s++ = *p++;
974                                 }
975                         }
976                         p++;
977                         *s = '\0';
978                         tc = TC_REGEXP;
979
980                 } else if (*p == '.' || isdigit(*p)) {
981                         /* it's a number */
982                         t_double = strtod(p, &p);
983                         if (*p == '.')
984                                 syntax_error(EMSG_UNEXP_TOKEN);
985                         tc = TC_NUMBER;
986
987                 } else {
988                         /* search for something known */
989                         tl = tokenlist;
990                         tc = 0x00000001;
991                         ti = tokeninfo;
992                         while (*tl) {
993                                 l = *(tl++);
994                                 if (l == NTCC) {
995                                         tc <<= 1;
996                                         continue;
997                                 }
998                                 /* if token class is expected, token
999                                  * matches and it's not a longer word,
1000                                  * then this is what we are looking for
1001                                  */
1002                                 if ((tc & (expected | TC_WORD | TC_NEWLINE))
1003                                  && *tl == *p && strncmp(p, tl, l) == 0
1004                                  && !((tc & TC_WORD) && isalnum_(p[l]))
1005                                 ) {
1006                                         t_info = *ti;
1007                                         p += l;
1008                                         break;
1009                                 }
1010                                 ti++;
1011                                 tl += l;
1012                         }
1013
1014                         if (!*tl) {
1015                                 /* it's a name (var/array/function),
1016                                  * otherwise it's something wrong
1017                                  */
1018                                 if (!isalnum_(*p))
1019                                         syntax_error(EMSG_UNEXP_TOKEN);
1020
1021                                 t_string = --p;
1022                                 while (isalnum_(*(++p))) {
1023                                         *(p-1) = *p;
1024                                 }
1025                                 *(p-1) = '\0';
1026                                 tc = TC_VARIABLE;
1027                                 /* also consume whitespace between functionname and bracket */
1028                                 if (!(expected & TC_VARIABLE))
1029                                         skip_spaces(&p);
1030                                 if (*p == '(') {
1031                                         tc = TC_FUNCTION;
1032                                 } else {
1033                                         if (*p == '[') {
1034                                                 p++;
1035                                                 tc = TC_ARRAY;
1036                                         }
1037                                 }
1038                         }
1039                 }
1040                 g_pos = p;
1041
1042                 /* skipping newlines in some cases */
1043                 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1044                         goto readnext;
1045
1046                 /* insert concatenation operator when needed */
1047                 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1048                         concat_inserted = TRUE;
1049                         save_tclass = tc;
1050                         save_info = t_info;
1051                         tc = TC_BINOP;
1052                         t_info = OC_CONCAT | SS | P(35);
1053                 }
1054
1055                 t_tclass = tc;
1056         }
1057         ltclass = t_tclass;
1058
1059         /* Are we ready for this? */
1060         if (!(ltclass & expected))
1061                 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1062                                 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1063
1064         return ltclass;
1065 #undef concat_inserted
1066 #undef save_tclass
1067 #undef save_info
1068 #undef ltclass
1069 }
1070
1071 static void rollback_token(void)
1072 {
1073         t_rollback = TRUE;
1074 }
1075
1076 static node *new_node(uint32_t info)
1077 {
1078         node *n;
1079
1080         n = xzalloc(sizeof(node));
1081         n->info = info;
1082         n->lineno = g_lineno;
1083         return n;
1084 }
1085
1086 static node *mk_re_node(const char *s, node *n, regex_t *re)
1087 {
1088         n->info = OC_REGEXP;
1089         n->l.re = re;
1090         n->r.ire = re + 1;
1091         xregcomp(re, s, REG_EXTENDED);
1092         xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1093
1094         return n;
1095 }
1096
1097 static node *condition(void)
1098 {
1099         next_token(TC_SEQSTART);
1100         return parse_expr(TC_SEQTERM);
1101 }
1102
1103 /* parse expression terminated by given argument, return ptr
1104  * to built subtree. Terminator is eaten by parse_expr */
1105 static node *parse_expr(uint32_t iexp)
1106 {
1107         node sn;
1108         node *cn = &sn;
1109         node *vn, *glptr;
1110         uint32_t tc, xtc;
1111         var *v;
1112
1113         sn.info = PRIMASK;
1114         sn.r.n = glptr = NULL;
1115         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1116
1117         while (!((tc = next_token(xtc)) & iexp)) {
1118                 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1119                         /* input redirection (<) attached to glptr node */
1120                         cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1121                         cn->a.n = glptr;
1122                         xtc = TC_OPERAND | TC_UOPPRE;
1123                         glptr = NULL;
1124
1125                 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1126                         /* for binary and postfix-unary operators, jump back over
1127                          * previous operators with higher priority */
1128                         vn = cn;
1129                         while ( ((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1130                          || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON)) )
1131                                 vn = vn->a.n;
1132                         if ((t_info & OPCLSMASK) == OC_TERNARY)
1133                                 t_info += P(6);
1134                         cn = vn->a.n->r.n = new_node(t_info);
1135                         cn->a.n = vn->a.n;
1136                         if (tc & TC_BINOP) {
1137                                 cn->l.n = vn;
1138                                 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1139                                 if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1140                                         /* it's a pipe */
1141                                         next_token(TC_GETLINE);
1142                                         /* give maximum priority to this pipe */
1143                                         cn->info &= ~PRIMASK;
1144                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1145                                 }
1146                         } else {
1147                                 cn->r.n = vn;
1148                                 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1149                         }
1150                         vn->a.n = cn;
1151
1152                 } else {
1153                         /* for operands and prefix-unary operators, attach them
1154                          * to last node */
1155                         vn = cn;
1156                         cn = vn->r.n = new_node(t_info);
1157                         cn->a.n = vn;
1158                         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1159                         if (tc & (TC_OPERAND | TC_REGEXP)) {
1160                                 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1161                                 /* one should be very careful with switch on tclass -
1162                                  * only simple tclasses should be used! */
1163                                 switch (tc) {
1164                                 case TC_VARIABLE:
1165                                 case TC_ARRAY:
1166                                         cn->info = OC_VAR;
1167                                         v = hash_search(ahash, t_string);
1168                                         if (v != NULL) {
1169                                                 cn->info = OC_FNARG;
1170                                                 cn->l.i = v->x.aidx;
1171                                         } else {
1172                                                 cn->l.v = newvar(t_string);
1173                                         }
1174                                         if (tc & TC_ARRAY) {
1175                                                 cn->info |= xS;
1176                                                 cn->r.n = parse_expr(TC_ARRTERM);
1177                                         }
1178                                         break;
1179
1180                                 case TC_NUMBER:
1181                                 case TC_STRING:
1182                                         cn->info = OC_VAR;
1183                                         v = cn->l.v = xzalloc(sizeof(var));
1184                                         if (tc & TC_NUMBER)
1185                                                 setvar_i(v, t_double);
1186                                         else
1187                                                 setvar_s(v, t_string);
1188                                         break;
1189
1190                                 case TC_REGEXP:
1191                                         mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1192                                         break;
1193
1194                                 case TC_FUNCTION:
1195                                         cn->info = OC_FUNC;
1196                                         cn->r.f = newfunc(t_string);
1197                                         cn->l.n = condition();
1198                                         break;
1199
1200                                 case TC_SEQSTART:
1201                                         cn = vn->r.n = parse_expr(TC_SEQTERM);
1202                                         cn->a.n = vn;
1203                                         break;
1204
1205                                 case TC_GETLINE:
1206                                         glptr = cn;
1207                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1208                                         break;
1209
1210                                 case TC_BUILTIN:
1211                                         cn->l.n = condition();
1212                                         break;
1213                                 }
1214                         }
1215                 }
1216         }
1217         return sn.r.n;
1218 }
1219
1220 /* add node to chain. Return ptr to alloc'd node */
1221 static node *chain_node(uint32_t info)
1222 {
1223         node *n;
1224
1225         if (!seq->first)
1226                 seq->first = seq->last = new_node(0);
1227
1228         if (seq->programname != g_progname) {
1229                 seq->programname = g_progname;
1230                 n = chain_node(OC_NEWSOURCE);
1231                 n->l.s = xstrdup(g_progname);
1232         }
1233
1234         n = seq->last;
1235         n->info = info;
1236         seq->last = n->a.n = new_node(OC_DONE);
1237
1238         return n;
1239 }
1240
1241 static void chain_expr(uint32_t info)
1242 {
1243         node *n;
1244
1245         n = chain_node(info);
1246         n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1247         if (t_tclass & TC_GRPTERM)
1248                 rollback_token();
1249 }
1250
1251 static node *chain_loop(node *nn)
1252 {
1253         node *n, *n2, *save_brk, *save_cont;
1254
1255         save_brk = break_ptr;
1256         save_cont = continue_ptr;
1257
1258         n = chain_node(OC_BR | Vx);
1259         continue_ptr = new_node(OC_EXEC);
1260         break_ptr = new_node(OC_EXEC);
1261         chain_group();
1262         n2 = chain_node(OC_EXEC | Vx);
1263         n2->l.n = nn;
1264         n2->a.n = n;
1265         continue_ptr->a.n = n2;
1266         break_ptr->a.n = n->r.n = seq->last;
1267
1268         continue_ptr = save_cont;
1269         break_ptr = save_brk;
1270
1271         return n;
1272 }
1273
1274 /* parse group and attach it to chain */
1275 static void chain_group(void)
1276 {
1277         uint32_t c;
1278         node *n, *n2, *n3;
1279
1280         do {
1281                 c = next_token(TC_GRPSEQ);
1282         } while (c & TC_NEWLINE);
1283
1284         if (c & TC_GRPSTART) {
1285                 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1286                         if (t_tclass & TC_NEWLINE) continue;
1287                         rollback_token();
1288                         chain_group();
1289                 }
1290         } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1291                 rollback_token();
1292                 chain_expr(OC_EXEC | Vx);
1293         } else {                                                /* TC_STATEMNT */
1294                 switch (t_info & OPCLSMASK) {
1295                 case ST_IF:
1296                         n = chain_node(OC_BR | Vx);
1297                         n->l.n = condition();
1298                         chain_group();
1299                         n2 = chain_node(OC_EXEC);
1300                         n->r.n = seq->last;
1301                         if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1302                                 chain_group();
1303                                 n2->a.n = seq->last;
1304                         } else {
1305                                 rollback_token();
1306                         }
1307                         break;
1308
1309                 case ST_WHILE:
1310                         n2 = condition();
1311                         n = chain_loop(NULL);
1312                         n->l.n = n2;
1313                         break;
1314
1315                 case ST_DO:
1316                         n2 = chain_node(OC_EXEC);
1317                         n = chain_loop(NULL);
1318                         n2->a.n = n->a.n;
1319                         next_token(TC_WHILE);
1320                         n->l.n = condition();
1321                         break;
1322
1323                 case ST_FOR:
1324                         next_token(TC_SEQSTART);
1325                         n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1326                         if (t_tclass & TC_SEQTERM) {    /* for-in */
1327                                 if ((n2->info & OPCLSMASK) != OC_IN)
1328                                         syntax_error(EMSG_UNEXP_TOKEN);
1329                                 n = chain_node(OC_WALKINIT | VV);
1330                                 n->l.n = n2->l.n;
1331                                 n->r.n = n2->r.n;
1332                                 n = chain_loop(NULL);
1333                                 n->info = OC_WALKNEXT | Vx;
1334                                 n->l.n = n2->l.n;
1335                         } else {                        /* for (;;) */
1336                                 n = chain_node(OC_EXEC | Vx);
1337                                 n->l.n = n2;
1338                                 n2 = parse_expr(TC_SEMICOL);
1339                                 n3 = parse_expr(TC_SEQTERM);
1340                                 n = chain_loop(n3);
1341                                 n->l.n = n2;
1342                                 if (! n2)
1343                                         n->info = OC_EXEC;
1344                         }
1345                         break;
1346
1347                 case OC_PRINT:
1348                 case OC_PRINTF:
1349                         n = chain_node(t_info);
1350                         n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1351                         if (t_tclass & TC_OUTRDR) {
1352                                 n->info |= t_info;
1353                                 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1354                         }
1355                         if (t_tclass & TC_GRPTERM)
1356                                 rollback_token();
1357                         break;
1358
1359                 case OC_BREAK:
1360                         n = chain_node(OC_EXEC);
1361                         n->a.n = break_ptr;
1362                         break;
1363
1364                 case OC_CONTINUE:
1365                         n = chain_node(OC_EXEC);
1366                         n->a.n = continue_ptr;
1367                         break;
1368
1369                 /* delete, next, nextfile, return, exit */
1370                 default:
1371                         chain_expr(t_info);
1372                 }
1373         }
1374 }
1375
1376 static void parse_program(char *p)
1377 {
1378         uint32_t tclass;
1379         node *cn;
1380         func *f;
1381         var *v;
1382
1383         g_pos = p;
1384         t_lineno = 1;
1385         while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1386                         TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1387
1388                 if (tclass & TC_OPTERM)
1389                         continue;
1390
1391                 seq = &mainseq;
1392                 if (tclass & TC_BEGIN) {
1393                         seq = &beginseq;
1394                         chain_group();
1395
1396                 } else if (tclass & TC_END) {
1397                         seq = &endseq;
1398                         chain_group();
1399
1400                 } else if (tclass & TC_FUNCDECL) {
1401                         next_token(TC_FUNCTION);
1402                         g_pos++;
1403                         f = newfunc(t_string);
1404                         f->body.first = NULL;
1405                         f->nargs = 0;
1406                         while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1407                                 v = findvar(ahash, t_string);
1408                                 v->x.aidx = (f->nargs)++;
1409
1410                                 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1411                                         break;
1412                         }
1413                         seq = &(f->body);
1414                         chain_group();
1415                         clear_array(ahash);
1416
1417                 } else if (tclass & TC_OPSEQ) {
1418                         rollback_token();
1419                         cn = chain_node(OC_TEST);
1420                         cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1421                         if (t_tclass & TC_GRPSTART) {
1422                                 rollback_token();
1423                                 chain_group();
1424                         } else {
1425                                 chain_node(OC_PRINT);
1426                         }
1427                         cn->r.n = mainseq.last;
1428
1429                 } else /* if (tclass & TC_GRPSTART) */ {
1430                         rollback_token();
1431                         chain_group();
1432                 }
1433         }
1434 }
1435
1436
1437 /* -------- program execution part -------- */
1438
1439 static node *mk_splitter(const char *s, tsplitter *spl)
1440 {
1441         regex_t *re, *ire;
1442         node *n;
1443
1444         re = &spl->re[0];
1445         ire = &spl->re[1];
1446         n = &spl->n;
1447         if ((n->info & OPCLSMASK) == OC_REGEXP) {
1448                 regfree(re);
1449                 regfree(ire);
1450         }
1451         if (strlen(s) > 1) {
1452                 mk_re_node(s, n, re);
1453         } else {
1454                 n->info = (uint32_t) *s;
1455         }
1456
1457         return n;
1458 }
1459
1460 /* use node as a regular expression. Supplied with node ptr and regex_t
1461  * storage space. Return ptr to regex (if result points to preg, it should
1462  * be later regfree'd manually
1463  */
1464 static regex_t *as_regex(node *op, regex_t *preg)
1465 {
1466         var *v;
1467         const char *s;
1468
1469         if ((op->info & OPCLSMASK) == OC_REGEXP) {
1470                 return icase ? op->r.ire : op->l.re;
1471         }
1472         v = nvalloc(1);
1473         s = getvar_s(evaluate(op, v));
1474         xregcomp(preg, s, icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED);
1475         nvfree(v);
1476         return preg;
1477 }
1478
1479 /* gradually increasing buffer */
1480 static void qrealloc(char **b, int n, int *size)
1481 {
1482         if (!*b || n >= *size)
1483                 *b = xrealloc(*b, *size = n + (n>>1) + 80);
1484 }
1485
1486 /* resize field storage space */
1487 static void fsrealloc(int size)
1488 {
1489         int i;
1490
1491         if (size >= maxfields) {
1492                 i = maxfields;
1493                 maxfields = size + 16;
1494                 Fields = xrealloc(Fields, maxfields * sizeof(var));
1495                 for (; i < maxfields; i++) {
1496                         Fields[i].type = VF_SPECIAL;
1497                         Fields[i].string = NULL;
1498                 }
1499         }
1500
1501         if (size < nfields) {
1502                 for (i = size; i < nfields; i++) {
1503                         clrvar(Fields + i);
1504                 }
1505         }
1506         nfields = size;
1507 }
1508
1509 static int awk_split(const char *s, node *spl, char **slist)
1510 {
1511         int l, n = 0;
1512         char c[4];
1513         char *s1;
1514         regmatch_t pmatch[2];
1515
1516         /* in worst case, each char would be a separate field */
1517         *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1518         strcpy(s1, s);
1519
1520         c[0] = c[1] = (char)spl->info;
1521         c[2] = c[3] = '\0';
1522         if (*getvar_s(intvar[RS]) == '\0')
1523                 c[2] = '\n';
1524
1525         if ((spl->info & OPCLSMASK) == OC_REGEXP) {             /* regex split */
1526                 while (*s) {
1527                         l = strcspn(s, c+2);
1528                         if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1529                          && pmatch[0].rm_so <= l
1530                         ) {
1531                                 l = pmatch[0].rm_so;
1532                                 if (pmatch[0].rm_eo == 0) {
1533                                         l++;
1534                                         pmatch[0].rm_eo++;
1535                                 }
1536                         } else {
1537                                 pmatch[0].rm_eo = l;
1538                                 if (s[l]) pmatch[0].rm_eo++;
1539                         }
1540
1541                         memcpy(s1, s, l);
1542                         s1[l] = '\0';
1543                         nextword(&s1);
1544                         s += pmatch[0].rm_eo;
1545                         n++;
1546                 }
1547         } else if (c[0] == '\0') {              /* null split */
1548                 while (*s) {
1549                         *s1++ = *s++;
1550                         *s1++ = '\0';
1551                         n++;
1552                 }
1553         } else if (c[0] != ' ') {               /* single-character split */
1554                 if (icase) {
1555                         c[0] = toupper(c[0]);
1556                         c[1] = tolower(c[1]);
1557                 }
1558                 if (*s1) n++;
1559                 while ((s1 = strpbrk(s1, c))) {
1560                         *s1++ = '\0';
1561                         n++;
1562                 }
1563         } else {                                /* space split */
1564                 while (*s) {
1565                         s = skip_whitespace(s);
1566                         if (!*s) break;
1567                         n++;
1568                         while (*s && !isspace(*s))
1569                                 *s1++ = *s++;
1570                         *s1++ = '\0';
1571                 }
1572         }
1573         return n;
1574 }
1575
1576 static void split_f0(void)
1577 {
1578 #define fstrings (G.split_f0__fstrings)
1579
1580         int i, n;
1581         char *s;
1582
1583         if (is_f0_split)
1584                 return;
1585
1586         is_f0_split = TRUE;
1587         free(fstrings);
1588         fsrealloc(0);
1589         n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1590         fsrealloc(n);
1591         s = fstrings;
1592         for (i = 0; i < n; i++) {
1593                 Fields[i].string = nextword(&s);
1594                 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1595         }
1596
1597         /* set NF manually to avoid side effects */
1598         clrvar(intvar[NF]);
1599         intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1600         intvar[NF]->number = nfields;
1601 #undef fstrings
1602 }
1603
1604 /* perform additional actions when some internal variables changed */
1605 static void handle_special(var *v)
1606 {
1607         int n;
1608         char *b;
1609         const char *sep, *s;
1610         int sl, l, len, i, bsize;
1611
1612         if (!(v->type & VF_SPECIAL))
1613                 return;
1614
1615         if (v == intvar[NF]) {
1616                 n = (int)getvar_i(v);
1617                 fsrealloc(n);
1618
1619                 /* recalculate $0 */
1620                 sep = getvar_s(intvar[OFS]);
1621                 sl = strlen(sep);
1622                 b = NULL;
1623                 len = 0;
1624                 for (i = 0; i < n; i++) {
1625                         s = getvar_s(&Fields[i]);
1626                         l = strlen(s);
1627                         if (b) {
1628                                 memcpy(b+len, sep, sl);
1629                                 len += sl;
1630                         }
1631                         qrealloc(&b, len+l+sl, &bsize);
1632                         memcpy(b+len, s, l);
1633                         len += l;
1634                 }
1635                 if (b)
1636                         b[len] = '\0';
1637                 setvar_p(intvar[F0], b);
1638                 is_f0_split = TRUE;
1639
1640         } else if (v == intvar[F0]) {
1641                 is_f0_split = FALSE;
1642
1643         } else if (v == intvar[FS]) {
1644                 mk_splitter(getvar_s(v), &fsplitter);
1645
1646         } else if (v == intvar[RS]) {
1647                 mk_splitter(getvar_s(v), &rsplitter);
1648
1649         } else if (v == intvar[IGNORECASE]) {
1650                 icase = istrue(v);
1651
1652         } else {                                /* $n */
1653                 n = getvar_i(intvar[NF]);
1654                 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1655                 /* right here v is invalid. Just to note... */
1656         }
1657 }
1658
1659 /* step through func/builtin/etc arguments */
1660 static node *nextarg(node **pn)
1661 {
1662         node *n;
1663
1664         n = *pn;
1665         if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1666                 *pn = n->r.n;
1667                 n = n->l.n;
1668         } else {
1669                 *pn = NULL;
1670         }
1671         return n;
1672 }
1673
1674 static void hashwalk_init(var *v, xhash *array)
1675 {
1676         char **w;
1677         hash_item *hi;
1678         int i;
1679
1680         if (v->type & VF_WALK)
1681                 free(v->x.walker);
1682
1683         v->type |= VF_WALK;
1684         w = v->x.walker = xzalloc(2 + 2*sizeof(char *) + array->glen);
1685         w[0] = w[1] = (char *)(w + 2);
1686         for (i = 0; i < array->csize; i++) {
1687                 hi = array->items[i];
1688                 while (hi) {
1689                         strcpy(*w, hi->name);
1690                         nextword(w);
1691                         hi = hi->next;
1692                 }
1693         }
1694 }
1695
1696 static int hashwalk_next(var *v)
1697 {
1698         char **w;
1699
1700         w = v->x.walker;
1701         if (w[1] == w[0])
1702                 return FALSE;
1703
1704         setvar_s(v, nextword(w+1));
1705         return TRUE;
1706 }
1707
1708 /* evaluate node, return 1 when result is true, 0 otherwise */
1709 static int ptest(node *pattern)
1710 {
1711         /* ptest__v is "static": to save stack space? */
1712         return istrue(evaluate(pattern, &G.ptest__v));
1713 }
1714
1715 /* read next record from stream rsm into a variable v */
1716 static int awk_getline(rstream *rsm, var *v)
1717 {
1718         char *b;
1719         regmatch_t pmatch[2];
1720         int a, p, pp=0, size;
1721         int fd, so, eo, r, rp;
1722         char c, *m, *s;
1723
1724         /* we're using our own buffer since we need access to accumulating
1725          * characters
1726          */
1727         fd = fileno(rsm->F);
1728         m = rsm->buffer;
1729         a = rsm->adv;
1730         p = rsm->pos;
1731         size = rsm->size;
1732         c = (char) rsplitter.n.info;
1733         rp = 0;
1734
1735         if (! m) qrealloc(&m, 256, &size);
1736         do {
1737                 b = m + a;
1738                 so = eo = p;
1739                 r = 1;
1740                 if (p > 0) {
1741                         if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1742                                 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1743                                                         b, 1, pmatch, 0) == 0) {
1744                                         so = pmatch[0].rm_so;
1745                                         eo = pmatch[0].rm_eo;
1746                                         if (b[eo] != '\0')
1747                                                 break;
1748                                 }
1749                         } else if (c != '\0') {
1750                                 s = strchr(b+pp, c);
1751                                 if (! s) s = memchr(b+pp, '\0', p - pp);
1752                                 if (s) {
1753                                         so = eo = s-b;
1754                                         eo++;
1755                                         break;
1756                                 }
1757                         } else {
1758                                 while (b[rp] == '\n')
1759                                         rp++;
1760                                 s = strstr(b+rp, "\n\n");
1761                                 if (s) {
1762                                         so = eo = s-b;
1763                                         while (b[eo] == '\n') eo++;
1764                                         if (b[eo] != '\0')
1765                                                 break;
1766                                 }
1767                         }
1768                 }
1769
1770                 if (a > 0) {
1771                         memmove(m, (const void *)(m+a), p+1);
1772                         b = m;
1773                         a = 0;
1774                 }
1775
1776                 qrealloc(&m, a+p+128, &size);
1777                 b = m + a;
1778                 pp = p;
1779                 p += safe_read(fd, b+p, size-p-1);
1780                 if (p < pp) {
1781                         p = 0;
1782                         r = 0;
1783                         setvar_i(intvar[ERRNO], errno);
1784                 }
1785                 b[p] = '\0';
1786
1787         } while (p > pp);
1788
1789         if (p == 0) {
1790                 r--;
1791         } else {
1792                 c = b[so]; b[so] = '\0';
1793                 setvar_s(v, b+rp);
1794                 v->type |= VF_USER;
1795                 b[so] = c;
1796                 c = b[eo]; b[eo] = '\0';
1797                 setvar_s(intvar[RT], b+so);
1798                 b[eo] = c;
1799         }
1800
1801         rsm->buffer = m;
1802         rsm->adv = a + eo;
1803         rsm->pos = p - eo;
1804         rsm->size = size;
1805
1806         return r;
1807 }
1808
1809 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1810 {
1811         int r = 0;
1812         char c;
1813         const char *s = format;
1814
1815         if (int_as_int && n == (int)n) {
1816                 r = snprintf(b, size, "%d", (int)n);
1817         } else {
1818                 do { c = *s; } while (c && *++s);
1819                 if (strchr("diouxX", c)) {
1820                         r = snprintf(b, size, format, (int)n);
1821                 } else if (strchr("eEfgG", c)) {
1822                         r = snprintf(b, size, format, n);
1823                 } else {
1824                         syntax_error(EMSG_INV_FMT);
1825                 }
1826         }
1827         return r;
1828 }
1829
1830
1831 /* formatted output into an allocated buffer, return ptr to buffer */
1832 static char *awk_printf(node *n)
1833 {
1834         char *b = NULL;
1835         char *fmt, *s, *f;
1836         const char *s1;
1837         int i, j, incr, bsize;
1838         char c, c1;
1839         var *v, *arg;
1840
1841         v = nvalloc(1);
1842         fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1843
1844         i = 0;
1845         while (*f) {
1846                 s = f;
1847                 while (*f && (*f != '%' || *(++f) == '%'))
1848                         f++;
1849                 while (*f && !isalpha(*f)) {
1850                         if (*f == '*')
1851                                 syntax_error("%*x formats are not supported");
1852                         f++;
1853                 }
1854
1855                 incr = (f - s) + MAXVARFMT;
1856                 qrealloc(&b, incr + i, &bsize);
1857                 c = *f;
1858                 if (c != '\0') f++;
1859                 c1 = *f;
1860                 *f = '\0';
1861                 arg = evaluate(nextarg(&n), v);
1862
1863                 j = i;
1864                 if (c == 'c' || !c) {
1865                         i += sprintf(b+i, s, is_numeric(arg) ?
1866                                         (char)getvar_i(arg) : *getvar_s(arg));
1867                 } else if (c == 's') {
1868                         s1 = getvar_s(arg);
1869                         qrealloc(&b, incr+i+strlen(s1), &bsize);
1870                         i += sprintf(b+i, s, s1);
1871                 } else {
1872                         i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1873                 }
1874                 *f = c1;
1875
1876                 /* if there was an error while sprintf, return value is negative */
1877                 if (i < j) i = j;
1878         }
1879
1880         b = xrealloc(b, i + 1);
1881         free(fmt);
1882         nvfree(v);
1883         b[i] = '\0';
1884         return b;
1885 }
1886
1887 /* common substitution routine
1888  * replace (nm) substring of (src) that match (n) with (repl), store
1889  * result into (dest), return number of substitutions. If nm=0, replace
1890  * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1891  * subexpression matching (\1-\9)
1892  */
1893 static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int ex)
1894 {
1895         char *ds = NULL;
1896         const char *s;
1897         const char *sp;
1898         int c, i, j, di, rl, so, eo, nbs, n, dssize;
1899         regmatch_t pmatch[10];
1900         regex_t sreg, *re;
1901
1902         re = as_regex(rn, &sreg);
1903         if (! src) src = intvar[F0];
1904         if (! dest) dest = intvar[F0];
1905
1906         i = di = 0;
1907         sp = getvar_s(src);
1908         rl = strlen(repl);
1909         while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0 : REG_NOTBOL) == 0) {
1910                 so = pmatch[0].rm_so;
1911                 eo = pmatch[0].rm_eo;
1912
1913                 qrealloc(&ds, di + eo + rl, &dssize);
1914                 memcpy(ds + di, sp, eo);
1915                 di += eo;
1916                 if (++i >= nm) {
1917                         /* replace */
1918                         di -= (eo - so);
1919                         nbs = 0;
1920                         for (s = repl; *s; s++) {
1921                                 ds[di++] = c = *s;
1922                                 if (c == '\\') {
1923                                         nbs++;
1924                                         continue;
1925                                 }
1926                                 if (c == '&' || (ex && c >= '0' && c <= '9')) {
1927                                         di -= ((nbs + 3) >> 1);
1928                                         j = 0;
1929                                         if (c != '&') {
1930                                                 j = c - '0';
1931                                                 nbs++;
1932                                         }
1933                                         if (nbs % 2) {
1934                                                 ds[di++] = c;
1935                                         } else {
1936                                                 n = pmatch[j].rm_eo - pmatch[j].rm_so;
1937                                                 qrealloc(&ds, di + rl + n, &dssize);
1938                                                 memcpy(ds + di, sp + pmatch[j].rm_so, n);
1939                                                 di += n;
1940                                         }
1941                                 }
1942                                 nbs = 0;
1943                         }
1944                 }
1945
1946                 sp += eo;
1947                 if (i == nm) break;
1948                 if (eo == so) {
1949                         if (! (ds[di++] = *sp++)) break;
1950                 }
1951         }
1952
1953         qrealloc(&ds, di + strlen(sp), &dssize);
1954         strcpy(ds + di, sp);
1955         setvar_p(dest, ds);
1956         if (re == &sreg) regfree(re);
1957         return i;
1958 }
1959
1960 static var *exec_builtin(node *op, var *res)
1961 {
1962 #define tspl (G.exec_builtin__tspl)
1963
1964         int (*to_xxx)(int);
1965         var *tv;
1966         node *an[4];
1967         var *av[4];
1968         const char *as[4];
1969         regmatch_t pmatch[2];
1970         regex_t sreg, *re;
1971         node *spl;
1972         uint32_t isr, info;
1973         int nargs;
1974         time_t tt;
1975         char *s, *s1;
1976         int i, l, ll, n;
1977
1978         tv = nvalloc(4);
1979         isr = info = op->info;
1980         op = op->l.n;
1981
1982         av[2] = av[3] = NULL;
1983         for (i = 0; i < 4 && op; i++) {
1984                 an[i] = nextarg(&op);
1985                 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
1986                 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
1987                 isr >>= 1;
1988         }
1989
1990         nargs = i;
1991         if (nargs < (info >> 30))
1992                 syntax_error(EMSG_TOO_FEW_ARGS);
1993
1994         switch (info & OPNMASK) {
1995
1996         case B_a2:
1997 #if ENABLE_FEATURE_AWK_MATH
1998                 setvar_i(res, atan2(getvar_i(av[i]), getvar_i(av[1])));
1999 #else
2000                 syntax_error(EMSG_NO_MATH);
2001 #endif
2002                 break;
2003
2004         case B_sp:
2005                 if (nargs > 2) {
2006                         spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2007                                 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2008                 } else {
2009                         spl = &fsplitter.n;
2010                 }
2011
2012                 n = awk_split(as[0], spl, &s);
2013                 s1 = s;
2014                 clear_array(iamarray(av[1]));
2015                 for (i=1; i<=n; i++)
2016                         setari_u(av[1], i, nextword(&s1));
2017                 free(s);
2018                 setvar_i(res, n);
2019                 break;
2020
2021         case B_ss:
2022                 l = strlen(as[0]);
2023                 i = getvar_i(av[1]) - 1;
2024                 if (i > l) i = l;
2025                 if (i < 0) i = 0;
2026                 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2027                 if (n < 0) n = 0;
2028                 s = xmalloc(n+1);
2029                 strncpy(s, as[0]+i, n);
2030                 s[n] = '\0';
2031                 setvar_p(res, s);
2032                 break;
2033
2034         case B_an:
2035                 setvar_i(res, (long)getvar_i(av[0]) & (long)getvar_i(av[1]));
2036                 break;
2037
2038         case B_co:
2039                 setvar_i(res, ~(long)getvar_i(av[0]));
2040                 break;
2041
2042         case B_ls:
2043                 setvar_i(res, (long)getvar_i(av[0]) << (long)getvar_i(av[1]));
2044                 break;
2045
2046         case B_or:
2047                 setvar_i(res, (long)getvar_i(av[0]) | (long)getvar_i(av[1]));
2048                 break;
2049
2050         case B_rs:
2051                 setvar_i(res, (long)((unsigned long)getvar_i(av[0]) >> (unsigned long)getvar_i(av[1])));
2052                 break;
2053
2054         case B_xo:
2055                 setvar_i(res, (long)getvar_i(av[0]) ^ (long)getvar_i(av[1]));
2056                 break;
2057
2058         case B_lo:
2059                 to_xxx = tolower;
2060                 goto lo_cont;
2061
2062         case B_up:
2063                 to_xxx = toupper;
2064  lo_cont:
2065                 s1 = s = xstrdup(as[0]);
2066                 while (*s1) {
2067                         *s1 = (*to_xxx)(*s1);
2068                         s1++;
2069                 }
2070                 setvar_p(res, s);
2071                 break;
2072
2073         case B_ix:
2074                 n = 0;
2075                 ll = strlen(as[1]);
2076                 l = strlen(as[0]) - ll;
2077                 if (ll > 0 && l >= 0) {
2078                         if (!icase) {
2079                                 s = strstr(as[0], as[1]);
2080                                 if (s) n = (s - as[0]) + 1;
2081                         } else {
2082                                 /* this piece of code is terribly slow and
2083                                  * really should be rewritten
2084                                  */
2085                                 for (i=0; i<=l; i++) {
2086                                         if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2087                                                 n = i+1;
2088                                                 break;
2089                                         }
2090                                 }
2091                         }
2092                 }
2093                 setvar_i(res, n);
2094                 break;
2095
2096         case B_ti:
2097                 if (nargs > 1)
2098                         tt = getvar_i(av[1]);
2099                 else
2100                         time(&tt);
2101                 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2102                 i = strftime(g_buf, MAXVARFMT,
2103                         ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2104                         localtime(&tt));
2105                 g_buf[i] = '\0';
2106                 setvar_s(res, g_buf);
2107                 break;
2108
2109         case B_ma:
2110                 re = as_regex(an[1], &sreg);
2111                 n = regexec(re, as[0], 1, pmatch, 0);
2112                 if (n == 0) {
2113                         pmatch[0].rm_so++;
2114                         pmatch[0].rm_eo++;
2115                 } else {
2116                         pmatch[0].rm_so = 0;
2117                         pmatch[0].rm_eo = -1;
2118                 }
2119                 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2120                 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2121                 setvar_i(res, pmatch[0].rm_so);
2122                 if (re == &sreg) regfree(re);
2123                 break;
2124
2125         case B_ge:
2126                 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2127                 break;
2128
2129         case B_gs:
2130                 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2131                 break;
2132
2133         case B_su:
2134                 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2135                 break;
2136         }
2137
2138         nvfree(tv);
2139         return res;
2140 #undef tspl
2141 }
2142
2143 /*
2144  * Evaluate node - the heart of the program. Supplied with subtree
2145  * and place where to store result. returns ptr to result.
2146  */
2147 #define XC(n) ((n) >> 8)
2148
2149 static var *evaluate(node *op, var *res)
2150 {
2151 /* This procedure is recursive so we should count every byte */
2152 #define fnargs (G.evaluate__fnargs)
2153 /* seed is initialized to 1 */
2154 #define seed   (G.evaluate__seed)
2155 #define sreg   (G.evaluate__sreg)
2156
2157         node *op1;
2158         var *v1;
2159         union {
2160                 var *v;
2161                 const char *s;
2162                 double d;
2163                 int i;
2164         } L, R;
2165         uint32_t opinfo;
2166         int opn;
2167         union {
2168                 char *s;
2169                 rstream *rsm;
2170                 FILE *F;
2171                 var *v;
2172                 regex_t *re;
2173                 uint32_t info;
2174         } X;
2175
2176         if (!op)
2177                 return setvar_s(res, NULL);
2178
2179         v1 = nvalloc(2);
2180
2181         while (op) {
2182                 opinfo = op->info;
2183                 opn = (opinfo & OPNMASK);
2184                 g_lineno = op->lineno;
2185
2186                 /* execute inevitable things */
2187                 op1 = op->l.n;
2188                 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2189                 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2190                 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2191                 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2192                 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2193
2194                 switch (XC(opinfo & OPCLSMASK)) {
2195
2196                 /* -- iterative node type -- */
2197
2198                 /* test pattern */
2199                 case XC( OC_TEST ):
2200                         if ((op1->info & OPCLSMASK) == OC_COMMA) {
2201                                 /* it's range pattern */
2202                                 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2203                                         op->info |= OF_CHECKED;
2204                                         if (ptest(op1->r.n))
2205                                                 op->info &= ~OF_CHECKED;
2206
2207                                         op = op->a.n;
2208                                 } else {
2209                                         op = op->r.n;
2210                                 }
2211                         } else {
2212                                 op = (ptest(op1)) ? op->a.n : op->r.n;
2213                         }
2214                         break;
2215
2216                 /* just evaluate an expression, also used as unconditional jump */
2217                 case XC( OC_EXEC ):
2218                         break;
2219
2220                 /* branch, used in if-else and various loops */
2221                 case XC( OC_BR ):
2222                         op = istrue(L.v) ? op->a.n : op->r.n;
2223                         break;
2224
2225                 /* initialize for-in loop */
2226                 case XC( OC_WALKINIT ):
2227                         hashwalk_init(L.v, iamarray(R.v));
2228                         break;
2229
2230                 /* get next array item */
2231                 case XC( OC_WALKNEXT ):
2232                         op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2233                         break;
2234
2235                 case XC( OC_PRINT ):
2236                 case XC( OC_PRINTF ):
2237                         X.F = stdout;
2238                         if (op->r.n) {
2239                                 X.rsm = newfile(R.s);
2240                                 if (!X.rsm->F) {
2241                                         if (opn == '|') {
2242                                                 X.rsm->F = popen(R.s, "w");
2243                                                 if (X.rsm->F == NULL)
2244                                                         bb_perror_msg_and_die("popen");
2245                                                 X.rsm->is_pipe = 1;
2246                                         } else {
2247                                                 X.rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2248                                         }
2249                                 }
2250                                 X.F = X.rsm->F;
2251                         }
2252
2253                         if ((opinfo & OPCLSMASK) == OC_PRINT) {
2254                                 if (!op1) {
2255                                         fputs(getvar_s(intvar[F0]), X.F);
2256                                 } else {
2257                                         while (op1) {
2258                                                 L.v = evaluate(nextarg(&op1), v1);
2259                                                 if (L.v->type & VF_NUMBER) {
2260                                                         fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2261                                                                         getvar_i(L.v), TRUE);
2262                                                         fputs(g_buf, X.F);
2263                                                 } else {
2264                                                         fputs(getvar_s(L.v), X.F);
2265                                                 }
2266
2267                                                 if (op1) fputs(getvar_s(intvar[OFS]), X.F);
2268                                         }
2269                                 }
2270                                 fputs(getvar_s(intvar[ORS]), X.F);
2271
2272                         } else {        /* OC_PRINTF */
2273                                 L.s = awk_printf(op1);
2274                                 fputs(L.s, X.F);
2275                                 free((char*)L.s);
2276                         }
2277                         fflush(X.F);
2278                         break;
2279
2280                 case XC( OC_DELETE ):
2281                         X.info = op1->info & OPCLSMASK;
2282                         if (X.info == OC_VAR) {
2283                                 R.v = op1->l.v;
2284                         } else if (X.info == OC_FNARG) {
2285                                 R.v = &fnargs[op1->l.i];
2286                         } else {
2287                                 syntax_error(EMSG_NOT_ARRAY);
2288                         }
2289
2290                         if (op1->r.n) {
2291                                 clrvar(L.v);
2292                                 L.s = getvar_s(evaluate(op1->r.n, v1));
2293                                 hash_remove(iamarray(R.v), L.s);
2294                         } else {
2295                                 clear_array(iamarray(R.v));
2296                         }
2297                         break;
2298
2299                 case XC( OC_NEWSOURCE ):
2300                         g_progname = op->l.s;
2301                         break;
2302
2303                 case XC( OC_RETURN ):
2304                         copyvar(res, L.v);
2305                         break;
2306
2307                 case XC( OC_NEXTFILE ):
2308                         nextfile = TRUE;
2309                 case XC( OC_NEXT ):
2310                         nextrec = TRUE;
2311                 case XC( OC_DONE ):
2312                         clrvar(res);
2313                         break;
2314
2315                 case XC( OC_EXIT ):
2316                         awk_exit(L.d);
2317
2318                 /* -- recursive node type -- */
2319
2320                 case XC( OC_VAR ):
2321                         L.v = op->l.v;
2322                         if (L.v == intvar[NF])
2323                                 split_f0();
2324                         goto v_cont;
2325
2326                 case XC( OC_FNARG ):
2327                         L.v = &fnargs[op->l.i];
2328  v_cont:
2329                         res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2330                         break;
2331
2332                 case XC( OC_IN ):
2333                         setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2334                         break;
2335
2336                 case XC( OC_REGEXP ):
2337                         op1 = op;
2338                         L.s = getvar_s(intvar[F0]);
2339                         goto re_cont;
2340
2341                 case XC( OC_MATCH ):
2342                         op1 = op->r.n;
2343  re_cont:
2344                         X.re = as_regex(op1, &sreg);
2345                         R.i = regexec(X.re, L.s, 0, NULL, 0);
2346                         if (X.re == &sreg) regfree(X.re);
2347                         setvar_i(res, (R.i == 0 ? 1 : 0) ^ (opn == '!' ? 1 : 0));
2348                         break;
2349
2350                 case XC( OC_MOVE ):
2351                         /* if source is a temporary string, jusk relink it to dest */
2352                         if (R.v == v1+1 && R.v->string) {
2353                                 res = setvar_p(L.v, R.v->string);
2354                                 R.v->string = NULL;
2355                         } else {
2356                                 res = copyvar(L.v, R.v);
2357                         }
2358                         break;
2359
2360                 case XC( OC_TERNARY ):
2361                         if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2362                                 syntax_error(EMSG_POSSIBLE_ERROR);
2363                         res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2364                         break;
2365
2366                 case XC( OC_FUNC ):
2367                         if (!op->r.f->body.first)
2368                                 syntax_error(EMSG_UNDEF_FUNC);
2369
2370                         X.v = R.v = nvalloc(op->r.f->nargs+1);
2371                         while (op1) {
2372                                 L.v = evaluate(nextarg(&op1), v1);
2373                                 copyvar(R.v, L.v);
2374                                 R.v->type |= VF_CHILD;
2375                                 R.v->x.parent = L.v;
2376                                 if (++R.v - X.v >= op->r.f->nargs)
2377                                         break;
2378                         }
2379
2380                         R.v = fnargs;
2381                         fnargs = X.v;
2382
2383                         L.s = g_progname;
2384                         res = evaluate(op->r.f->body.first, res);
2385                         g_progname = L.s;
2386
2387                         nvfree(fnargs);
2388                         fnargs = R.v;
2389                         break;
2390
2391                 case XC( OC_GETLINE ):
2392                 case XC( OC_PGETLINE ):
2393                         if (op1) {
2394                                 X.rsm = newfile(L.s);
2395                                 if (!X.rsm->F) {
2396                                         if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2397                                                 X.rsm->F = popen(L.s, "r");
2398                                                 X.rsm->is_pipe = TRUE;
2399                                         } else {
2400                                                 X.rsm->F = fopen(L.s, "r");             /* not xfopen! */
2401                                         }
2402                                 }
2403                         } else {
2404                                 if (!iF) iF = next_input_file();
2405                                 X.rsm = iF;
2406                         }
2407
2408                         if (!X.rsm->F) {
2409                                 setvar_i(intvar[ERRNO], errno);
2410                                 setvar_i(res, -1);
2411                                 break;
2412                         }
2413
2414                         if (!op->r.n)
2415                                 R.v = intvar[F0];
2416
2417                         L.i = awk_getline(X.rsm, R.v);
2418                         if (L.i > 0) {
2419                                 if (!op1) {
2420                                         incvar(intvar[FNR]);
2421                                         incvar(intvar[NR]);
2422                                 }
2423                         }
2424                         setvar_i(res, L.i);
2425                         break;
2426
2427                 /* simple builtins */
2428                 case XC( OC_FBLTIN ):
2429                         switch (opn) {
2430
2431                         case F_in:
2432                                 R.d = (int)L.d;
2433                                 break;
2434
2435                         case F_rn:
2436                                 R.d = (double)rand() / (double)RAND_MAX;
2437                                 break;
2438 #if ENABLE_FEATURE_AWK_MATH
2439                         case F_co:
2440                                 R.d = cos(L.d);
2441                                 break;
2442
2443                         case F_ex:
2444                                 R.d = exp(L.d);
2445                                 break;
2446
2447                         case F_lg:
2448                                 R.d = log(L.d);
2449                                 break;
2450
2451                         case F_si:
2452                                 R.d = sin(L.d);
2453                                 break;
2454
2455                         case F_sq:
2456                                 R.d = sqrt(L.d);
2457                                 break;
2458 #else
2459                         case F_co:
2460                         case F_ex:
2461                         case F_lg:
2462                         case F_si:
2463                         case F_sq:
2464                                 syntax_error(EMSG_NO_MATH);
2465                                 break;
2466 #endif
2467                         case F_sr:
2468                                 R.d = (double)seed;
2469                                 seed = op1 ? (unsigned)L.d : (unsigned)time(NULL);
2470                                 srand(seed);
2471                                 break;
2472
2473                         case F_ti:
2474                                 R.d = time(NULL);
2475                                 break;
2476
2477                         case F_le:
2478                                 if (!op1)
2479                                         L.s = getvar_s(intvar[F0]);
2480                                 R.d = strlen(L.s);
2481                                 break;
2482
2483                         case F_sy:
2484                                 fflush(NULL);
2485                                 R.d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2486                                                 ? (system(L.s) >> 8) : 0;
2487                                 break;
2488
2489                         case F_ff:
2490                                 if (!op1)
2491                                         fflush(stdout);
2492                                 else {
2493                                         if (L.s && *L.s) {
2494                                                 X.rsm = newfile(L.s);
2495                                                 fflush(X.rsm->F);
2496                                         } else {
2497                                                 fflush(NULL);
2498                                         }
2499                                 }
2500                                 break;
2501
2502                         case F_cl:
2503                                 X.rsm = (rstream *)hash_search(fdhash, L.s);
2504                                 if (X.rsm) {
2505                                         R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
2506                                         free(X.rsm->buffer);
2507                                         hash_remove(fdhash, L.s);
2508                                 }
2509                                 if (R.i != 0)
2510                                         setvar_i(intvar[ERRNO], errno);
2511                                 R.d = (double)R.i;
2512                                 break;
2513                         }
2514                         setvar_i(res, R.d);
2515                         break;
2516
2517                 case XC( OC_BUILTIN ):
2518                         res = exec_builtin(op, res);
2519                         break;
2520
2521                 case XC( OC_SPRINTF ):
2522                         setvar_p(res, awk_printf(op1));
2523                         break;
2524
2525                 case XC( OC_UNARY ):
2526                         X.v = R.v;
2527                         L.d = R.d = getvar_i(R.v);
2528                         switch (opn) {
2529                         case 'P':
2530                                 L.d = ++R.d;
2531                                 goto r_op_change;
2532                         case 'p':
2533                                 R.d++;
2534                                 goto r_op_change;
2535                         case 'M':
2536                                 L.d = --R.d;
2537                                 goto r_op_change;
2538                         case 'm':
2539                                 R.d--;
2540                                 goto r_op_change;
2541                         case '!':
2542                                 L.d = istrue(X.v) ? 0 : 1;
2543                                 break;
2544                         case '-':
2545                                 L.d = -R.d;
2546                                 break;
2547  r_op_change:
2548                                 setvar_i(X.v, R.d);
2549                         }
2550                         setvar_i(res, L.d);
2551                         break;
2552
2553                 case XC( OC_FIELD ):
2554                         R.i = (int)getvar_i(R.v);
2555                         if (R.i == 0) {
2556                                 res = intvar[F0];
2557                         } else {
2558                                 split_f0();
2559                                 if (R.i > nfields)
2560                                         fsrealloc(R.i);
2561                                 res = &Fields[R.i - 1];
2562                         }
2563                         break;
2564
2565                 /* concatenation (" ") and index joining (",") */
2566                 case XC( OC_CONCAT ):
2567                 case XC( OC_COMMA ):
2568                         opn = strlen(L.s) + strlen(R.s) + 2;
2569                         X.s = xmalloc(opn);
2570                         strcpy(X.s, L.s);
2571                         if ((opinfo & OPCLSMASK) == OC_COMMA) {
2572                                 L.s = getvar_s(intvar[SUBSEP]);
2573                                 X.s = xrealloc(X.s, opn + strlen(L.s));
2574                                 strcat(X.s, L.s);
2575                         }
2576                         strcat(X.s, R.s);
2577                         setvar_p(res, X.s);
2578                         break;
2579
2580                 case XC( OC_LAND ):
2581                         setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2582                         break;
2583
2584                 case XC( OC_LOR ):
2585                         setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2586                         break;
2587
2588                 case XC( OC_BINARY ):
2589                 case XC( OC_REPLACE ):
2590                         R.d = getvar_i(R.v);
2591                         switch (opn) {
2592                         case '+':
2593                                 L.d += R.d;
2594                                 break;
2595                         case '-':
2596                                 L.d -= R.d;
2597                                 break;
2598                         case '*':
2599                                 L.d *= R.d;
2600                                 break;
2601                         case '/':
2602                                 if (R.d == 0) syntax_error(EMSG_DIV_BY_ZERO);
2603                                 L.d /= R.d;
2604                                 break;
2605                         case '&':
2606 #if ENABLE_FEATURE_AWK_MATH
2607                                 L.d = pow(L.d, R.d);
2608 #else
2609                                 syntax_error(EMSG_NO_MATH);
2610 #endif
2611                                 break;
2612                         case '%':
2613                                 if (R.d == 0) syntax_error(EMSG_DIV_BY_ZERO);
2614                                 L.d -= (int)(L.d / R.d) * R.d;
2615                                 break;
2616                         }
2617                         res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2618                         break;
2619
2620                 case XC( OC_COMPARE ):
2621                         if (is_numeric(L.v) && is_numeric(R.v)) {
2622                                 L.d = getvar_i(L.v) - getvar_i(R.v);
2623                         } else {
2624                                 L.s = getvar_s(L.v);
2625                                 R.s = getvar_s(R.v);
2626                                 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2627                         }
2628                         switch (opn & 0xfe) {
2629                         case 0:
2630                                 R.i = (L.d > 0);
2631                                 break;
2632                         case 2:
2633                                 R.i = (L.d >= 0);
2634                                 break;
2635                         case 4:
2636                                 R.i = (L.d == 0);
2637                                 break;
2638                         }
2639                         setvar_i(res, (opn & 0x1 ? R.i : !R.i) ? 1 : 0);
2640                         break;
2641
2642                 default:
2643                         syntax_error(EMSG_POSSIBLE_ERROR);
2644                 }
2645                 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2646                         op = op->a.n;
2647                 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2648                         break;
2649                 if (nextrec)
2650                         break;
2651         }
2652         nvfree(v1);
2653         return res;
2654 #undef fnargs
2655 #undef seed
2656 #undef sreg
2657 }
2658
2659
2660 /* -------- main & co. -------- */
2661
2662 static int awk_exit(int r)
2663 {
2664         var tv;
2665         unsigned i;
2666         hash_item *hi;
2667
2668         zero_out_var(&tv);
2669
2670         if (!exiting) {
2671                 exiting = TRUE;
2672                 nextrec = FALSE;
2673                 evaluate(endseq.first, &tv);
2674         }
2675
2676         /* waiting for children */
2677         for (i = 0; i < fdhash->csize; i++) {
2678                 hi = fdhash->items[i];
2679                 while (hi) {
2680                         if (hi->data.rs.F && hi->data.rs.is_pipe)
2681                                 pclose(hi->data.rs.F);
2682                         hi = hi->next;
2683                 }
2684         }
2685
2686         exit(r);
2687 }
2688
2689 /* if expr looks like "var=value", perform assignment and return 1,
2690  * otherwise return 0 */
2691 static int is_assignment(const char *expr)
2692 {
2693         char *exprc, *s, *s0, *s1;
2694
2695         exprc = xstrdup(expr);
2696         if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2697                 free(exprc);
2698                 return FALSE;
2699         }
2700
2701         *(s++) = '\0';
2702         s0 = s1 = s;
2703         while (*s)
2704                 *(s1++) = nextchar(&s);
2705
2706         *s1 = '\0';
2707         setvar_u(newvar(exprc), s0);
2708         free(exprc);
2709         return TRUE;
2710 }
2711
2712 /* switch to next input file */
2713 static rstream *next_input_file(void)
2714 {
2715 #define rsm          (G.next_input_file__rsm)
2716 #define files_happen (G.next_input_file__files_happen)
2717
2718         FILE *F = NULL;
2719         const char *fname, *ind;
2720
2721         if (rsm.F) fclose(rsm.F);
2722         rsm.F = NULL;
2723         rsm.pos = rsm.adv = 0;
2724
2725         do {
2726                 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
2727                         if (files_happen)
2728                                 return NULL;
2729                         fname = "-";
2730                         F = stdin;
2731                 } else {
2732                         ind = getvar_s(incvar(intvar[ARGIND]));
2733                         fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
2734                         if (fname && *fname && !is_assignment(fname))
2735                                 F = afopen(fname, "r");
2736                 }
2737         } while (!F);
2738
2739         files_happen = TRUE;
2740         setvar_s(intvar[FILENAME], fname);
2741         rsm.F = F;
2742         return &rsm;
2743 #undef rsm
2744 #undef files_happen
2745 }
2746
2747 int awk_main(int argc, char **argv);
2748 int awk_main(int argc, char **argv)
2749 {
2750         unsigned opt;
2751         char *opt_F, *opt_W;
2752         llist_t *opt_v = NULL;
2753         int i, j, flen;
2754         var *v;
2755         var tv;
2756         char **envp;
2757         char *vnames = (char *)vNames; /* cheat */
2758         char *vvalues = (char *)vValues;
2759
2760         INIT_G();
2761
2762         /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
2763          * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
2764         if (ENABLE_LOCALE_SUPPORT)
2765                 setlocale(LC_NUMERIC, "C");
2766
2767         zero_out_var(&tv);
2768
2769         /* allocate global buffer */
2770         g_buf = xmalloc(MAXVARFMT + 1);
2771
2772         vhash = hash_init();
2773         ahash = hash_init();
2774         fdhash = hash_init();
2775         fnhash = hash_init();
2776
2777         /* initialize variables */
2778         for (i = 0; *vnames; i++) {
2779                 intvar[i] = v = newvar(nextword(&vnames));
2780                 if (*vvalues != '\377')
2781                         setvar_s(v, nextword(&vvalues));
2782                 else
2783                         setvar_i(v, 0);
2784
2785                 if (*vnames == '*') {
2786                         v->type |= VF_SPECIAL;
2787                         vnames++;
2788                 }
2789         }
2790
2791         handle_special(intvar[FS]);
2792         handle_special(intvar[RS]);
2793
2794         newfile("/dev/stdin")->F = stdin;
2795         newfile("/dev/stdout")->F = stdout;
2796         newfile("/dev/stderr")->F = stderr;
2797
2798         /* Huh, people report that sometimes environ is NULL. Oh well. */
2799         if (environ) for (envp = environ; *envp; envp++) {
2800                 char *s = xstrdup(*envp);
2801                 char *s1 = strchr(s, '=');
2802                 if (s1) {
2803                         *s1++ = '\0';
2804                         setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1);
2805                 }
2806                 free(s);
2807         }
2808         opt_complementary = "v::";
2809         opt = getopt32(argc, argv, "F:v:f:W:", &opt_F, &opt_v, &g_progname, &opt_W);
2810         argv += optind;
2811         argc -= optind;
2812         if (opt & 0x1)
2813                 setvar_s(intvar[FS], opt_F); // -F
2814         while (opt_v) { /* -v */
2815                 if (!is_assignment(llist_pop(&opt_v)))
2816                         bb_show_usage();
2817         }
2818         if (opt & 0x4) { // -f
2819                 char *s = s; /* die, gcc, die */
2820                 FILE *from_file = afopen(g_progname, "r");
2821                 /* one byte is reserved for some trick in next_token */
2822                 if (fseek(from_file, 0, SEEK_END) == 0) {
2823                         flen = ftell(from_file);
2824                         s = xmalloc(flen + 4);
2825                         fseek(from_file, 0, SEEK_SET);
2826                         i = 1 + fread(s + 1, 1, flen, from_file);
2827                 } else {
2828                         for (i = j = 1; j > 0; i += j) {
2829                                 s = xrealloc(s, i + 4096);
2830                                 j = fread(s + i, 1, 4094, from_file);
2831                         }
2832                 }
2833                 s[i] = '\0';
2834                 fclose(from_file);
2835                 parse_program(s + 1);
2836                 free(s);
2837         } else { // no -f: take program from 1st parameter
2838                 if (!argc)
2839                         bb_show_usage();
2840                 g_progname = "cmd. line";
2841                 parse_program(*argv++);
2842                 argc--;
2843         }
2844         if (opt & 0x8) // -W
2845                 bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
2846
2847         /* fill in ARGV array */
2848         setvar_i(intvar[ARGC], argc + 1);
2849         setari_u(intvar[ARGV], 0, "awk");
2850         i = 0;
2851         while (*argv)
2852                 setari_u(intvar[ARGV], ++i, *argv++);
2853
2854         evaluate(beginseq.first, &tv);
2855         if (!mainseq.first && !endseq.first)
2856                 awk_exit(EXIT_SUCCESS);
2857
2858         /* input file could already be opened in BEGIN block */
2859         if (!iF) iF = next_input_file();
2860
2861         /* passing through input files */
2862         while (iF) {
2863                 nextfile = FALSE;
2864                 setvar_i(intvar[FNR], 0);
2865
2866                 while ((i = awk_getline(iF, intvar[F0])) > 0) {
2867                         nextrec = FALSE;
2868                         incvar(intvar[NR]);
2869                         incvar(intvar[FNR]);
2870                         evaluate(mainseq.first, &tv);
2871
2872                         if (nextfile)
2873                                 break;
2874                 }
2875
2876                 if (i < 0)
2877                         syntax_error(strerror(errno));
2878
2879                 iF = next_input_file();
2880         }
2881
2882         awk_exit(EXIT_SUCCESS);
2883         /*return 0;*/
2884 }