awk: style fixes; remove one xstrdup/free pair; testsuite
[oweals/busybox.git] / editors / awk.c
1 /* vi: set sw=4 ts=4: */
2 /*
3  * awk implementation for busybox
4  *
5  * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
6  *
7  * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
8  */
9
10 #include "libbb.h"
11 #include "xregex.h"
12 #include <math.h>
13 extern char **environ;
14
15 /* This is a NOEXEC applet. Be very careful! */
16
17
18 #define MAXVARFMT       240
19 #define MINNVBLOCK      64
20
21 /* variable flags */
22 #define VF_NUMBER       0x0001  /* 1 = primary type is number */
23 #define VF_ARRAY        0x0002  /* 1 = it's an array */
24
25 #define VF_CACHED       0x0100  /* 1 = num/str value has cached str/num eq */
26 #define VF_USER         0x0200  /* 1 = user input (may be numeric string) */
27 #define VF_SPECIAL      0x0400  /* 1 = requires extra handling when changed */
28 #define VF_WALK         0x0800  /* 1 = variable has alloc'd x.walker list */
29 #define VF_FSTR         0x1000  /* 1 = var::string points to fstring buffer */
30 #define VF_CHILD        0x2000  /* 1 = function arg; x.parent points to source */
31 #define VF_DIRTY        0x4000  /* 1 = variable was set explicitly */
32
33 /* these flags are static, don't change them when value is changed */
34 #define VF_DONTTOUCH    (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
35
36 /* Variable */
37 typedef struct var_s {
38         unsigned type;            /* flags */
39         double number;
40         char *string;
41         union {
42                 int aidx;               /* func arg idx (for compilation stage) */
43                 struct xhash_s *array;  /* array ptr */
44                 struct var_s *parent;   /* for func args, ptr to actual parameter */
45                 char **walker;          /* list of array elements (for..in) */
46         } x;
47 } var;
48
49 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
50 typedef struct chain_s {
51         struct node_s *first;
52         struct node_s *last;
53         const char *programname;
54 } chain;
55
56 /* Function */
57 typedef struct func_s {
58         unsigned nargs;
59         struct chain_s body;
60 } func;
61
62 /* I/O stream */
63 typedef struct rstream_s {
64         FILE *F;
65         char *buffer;
66         int adv;
67         int size;
68         int pos;
69         smallint is_pipe;
70 } rstream;
71
72 typedef struct hash_item_s {
73         union {
74                 struct var_s v;         /* variable/array hash */
75                 struct rstream_s rs;    /* redirect streams hash */
76                 struct func_s f;        /* functions hash */
77         } data;
78         struct hash_item_s *next;       /* next in chain */
79         char name[1];                   /* really it's longer */
80 } hash_item;
81
82 typedef struct xhash_s {
83         unsigned nel;           /* num of elements */
84         unsigned csize;         /* current hash size */
85         unsigned nprime;        /* next hash size in PRIMES[] */
86         unsigned glen;          /* summary length of item names */
87         struct hash_item_s **items;
88 } xhash;
89
90 /* Tree node */
91 typedef struct node_s {
92         uint32_t info;
93         unsigned lineno;
94         union {
95                 struct node_s *n;
96                 var *v;
97                 int i;
98                 char *s;
99                 regex_t *re;
100         } l;
101         union {
102                 struct node_s *n;
103                 regex_t *ire;
104                 func *f;
105                 int argno;
106         } r;
107         union {
108                 struct node_s *n;
109         } a;
110 } node;
111
112 /* Block of temporary variables */
113 typedef struct nvblock_s {
114         int size;
115         var *pos;
116         struct nvblock_s *prev;
117         struct nvblock_s *next;
118         var nv[0];
119 } nvblock;
120
121 typedef struct tsplitter_s {
122         node n;
123         regex_t re[2];
124 } tsplitter;
125
126 /* simple token classes */
127 /* Order and hex values are very important!!!  See next_token() */
128 #define TC_SEQSTART      1                              /* ( */
129 #define TC_SEQTERM      (1 << 1)                /* ) */
130 #define TC_REGEXP       (1 << 2)                /* /.../ */
131 #define TC_OUTRDR       (1 << 3)                /* | > >> */
132 #define TC_UOPPOST      (1 << 4)                /* unary postfix operator */
133 #define TC_UOPPRE1      (1 << 5)                /* unary prefix operator */
134 #define TC_BINOPX       (1 << 6)                /* two-opnd operator */
135 #define TC_IN           (1 << 7)
136 #define TC_COMMA        (1 << 8)
137 #define TC_PIPE         (1 << 9)                /* input redirection pipe */
138 #define TC_UOPPRE2      (1 << 10)               /* unary prefix operator */
139 #define TC_ARRTERM      (1 << 11)               /* ] */
140 #define TC_GRPSTART     (1 << 12)               /* { */
141 #define TC_GRPTERM      (1 << 13)               /* } */
142 #define TC_SEMICOL      (1 << 14)
143 #define TC_NEWLINE      (1 << 15)
144 #define TC_STATX        (1 << 16)               /* ctl statement (for, next...) */
145 #define TC_WHILE        (1 << 17)
146 #define TC_ELSE         (1 << 18)
147 #define TC_BUILTIN      (1 << 19)
148 #define TC_GETLINE      (1 << 20)
149 #define TC_FUNCDECL     (1 << 21)               /* `function' `func' */
150 #define TC_BEGIN        (1 << 22)
151 #define TC_END          (1 << 23)
152 #define TC_EOF          (1 << 24)
153 #define TC_VARIABLE     (1 << 25)
154 #define TC_ARRAY        (1 << 26)
155 #define TC_FUNCTION     (1 << 27)
156 #define TC_STRING       (1 << 28)
157 #define TC_NUMBER       (1 << 29)
158
159 #define TC_UOPPRE  (TC_UOPPRE1 | TC_UOPPRE2)
160
161 /* combined token classes */
162 #define TC_BINOP   (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
163 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
164 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
165                    | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
166
167 #define TC_STATEMNT (TC_STATX | TC_WHILE)
168 #define TC_OPTERM  (TC_SEMICOL | TC_NEWLINE)
169
170 /* word tokens, cannot mean something else if not expected */
171 #define TC_WORD    (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
172                    | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
173
174 /* discard newlines after these */
175 #define TC_NOTERM  (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
176                    | TC_BINOP | TC_OPTERM)
177
178 /* what can expression begin with */
179 #define TC_OPSEQ   (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
180 /* what can group begin with */
181 #define TC_GRPSEQ  (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
182
183 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
184 /* operator is inserted between them */
185 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
186                    | TC_STRING | TC_NUMBER | TC_UOPPOST)
187 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
188
189 #define OF_RES1    0x010000
190 #define OF_RES2    0x020000
191 #define OF_STR1    0x040000
192 #define OF_STR2    0x080000
193 #define OF_NUM1    0x100000
194 #define OF_CHECKED 0x200000
195
196 /* combined operator flags */
197 #define xx      0
198 #define xV      OF_RES2
199 #define xS      (OF_RES2 | OF_STR2)
200 #define Vx      OF_RES1
201 #define VV      (OF_RES1 | OF_RES2)
202 #define Nx      (OF_RES1 | OF_NUM1)
203 #define NV      (OF_RES1 | OF_NUM1 | OF_RES2)
204 #define Sx      (OF_RES1 | OF_STR1)
205 #define SV      (OF_RES1 | OF_STR1 | OF_RES2)
206 #define SS      (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
207
208 #define OPCLSMASK 0xFF00
209 #define OPNMASK   0x007F
210
211 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
212  * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
213  * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
214  */
215 #define P(x)      (x << 24)
216 #define PRIMASK   0x7F000000
217 #define PRIMASK2  0x7E000000
218
219 /* Operation classes */
220
221 #define SHIFT_TIL_THIS  0x0600
222 #define RECUR_FROM_THIS 0x1000
223
224 enum {
225         OC_DELETE = 0x0100,     OC_EXEC = 0x0200,       OC_NEWSOURCE = 0x0300,
226         OC_PRINT = 0x0400,      OC_PRINTF = 0x0500,     OC_WALKINIT = 0x0600,
227
228         OC_BR = 0x0700,         OC_BREAK = 0x0800,      OC_CONTINUE = 0x0900,
229         OC_EXIT = 0x0a00,       OC_NEXT = 0x0b00,       OC_NEXTFILE = 0x0c00,
230         OC_TEST = 0x0d00,       OC_WALKNEXT = 0x0e00,
231
232         OC_BINARY = 0x1000,     OC_BUILTIN = 0x1100,    OC_COLON = 0x1200,
233         OC_COMMA = 0x1300,      OC_COMPARE = 0x1400,    OC_CONCAT = 0x1500,
234         OC_FBLTIN = 0x1600,     OC_FIELD = 0x1700,      OC_FNARG = 0x1800,
235         OC_FUNC = 0x1900,       OC_GETLINE = 0x1a00,    OC_IN = 0x1b00,
236         OC_LAND = 0x1c00,       OC_LOR = 0x1d00,        OC_MATCH = 0x1e00,
237         OC_MOVE = 0x1f00,       OC_PGETLINE = 0x2000,   OC_REGEXP = 0x2100,
238         OC_REPLACE = 0x2200,    OC_RETURN = 0x2300,     OC_SPRINTF = 0x2400,
239         OC_TERNARY = 0x2500,    OC_UNARY = 0x2600,      OC_VAR = 0x2700,
240         OC_DONE = 0x2800,
241
242         ST_IF = 0x3000,         ST_DO = 0x3100,         ST_FOR = 0x3200,
243         ST_WHILE = 0x3300
244 };
245
246 /* simple builtins */
247 enum {
248         F_in,   F_rn,   F_co,   F_ex,   F_lg,   F_si,   F_sq,   F_sr,
249         F_ti,   F_le,   F_sy,   F_ff,   F_cl
250 };
251
252 /* builtins */
253 enum {
254         B_a2,   B_ix,   B_ma,   B_sp,   B_ss,   B_ti,   B_lo,   B_up,
255         B_ge,   B_gs,   B_su,
256         B_an,   B_co,   B_ls,   B_or,   B_rs,   B_xo,
257 };
258
259 /* tokens and their corresponding info values */
260
261 #define NTC     "\377"  /* switch to next token class (tc<<1) */
262 #define NTCC    '\377'
263
264 #define OC_B    OC_BUILTIN
265
266 static const char tokenlist[] =
267         "\1("       NTC
268         "\1)"       NTC
269         "\1/"       NTC                                 /* REGEXP */
270         "\2>>"      "\1>"       "\1|"       NTC         /* OUTRDR */
271         "\2++"      "\2--"      NTC                     /* UOPPOST */
272         "\2++"      "\2--"      "\1$"       NTC         /* UOPPRE1 */
273         "\2=="      "\1="       "\2+="      "\2-="      /* BINOPX */
274         "\2*="      "\2/="      "\2%="      "\2^="
275         "\1+"       "\1-"       "\3**="     "\2**"
276         "\1/"       "\1%"       "\1^"       "\1*"
277         "\2!="      "\2>="      "\2<="      "\1>"
278         "\1<"       "\2!~"      "\1~"       "\2&&"
279         "\2||"      "\1?"       "\1:"       NTC
280         "\2in"      NTC
281         "\1,"       NTC
282         "\1|"       NTC
283         "\1+"       "\1-"       "\1!"       NTC         /* UOPPRE2 */
284         "\1]"       NTC
285         "\1{"       NTC
286         "\1}"       NTC
287         "\1;"       NTC
288         "\1\n"      NTC
289         "\2if"      "\2do"      "\3for"     "\5break"   /* STATX */
290         "\10continue"           "\6delete"  "\5print"
291         "\6printf"  "\4next"    "\10nextfile"
292         "\6return"  "\4exit"    NTC
293         "\5while"   NTC
294         "\4else"    NTC
295
296         "\3and"     "\5compl"   "\6lshift"  "\2or"
297         "\6rshift"  "\3xor"
298         "\5close"   "\6system"  "\6fflush"  "\5atan2"   /* BUILTIN */
299         "\3cos"     "\3exp"     "\3int"     "\3log"
300         "\4rand"    "\3sin"     "\4sqrt"    "\5srand"
301         "\6gensub"  "\4gsub"    "\5index"   "\6length"
302         "\5match"   "\5split"   "\7sprintf" "\3sub"
303         "\6substr"  "\7systime" "\10strftime"
304         "\7tolower" "\7toupper" NTC
305         "\7getline" NTC
306         "\4func"    "\10function"   NTC
307         "\5BEGIN"   NTC
308         "\3END"     "\0"
309         ;
310
311 static const uint32_t tokeninfo[] = {
312         0,
313         0,
314         OC_REGEXP,
315         xS|'a',     xS|'w',     xS|'|',
316         OC_UNARY|xV|P(9)|'p',       OC_UNARY|xV|P(9)|'m',
317         OC_UNARY|xV|P(9)|'P',       OC_UNARY|xV|P(9)|'M',
318             OC_FIELD|xV|P(5),
319         OC_COMPARE|VV|P(39)|5,      OC_MOVE|VV|P(74),
320             OC_REPLACE|NV|P(74)|'+',    OC_REPLACE|NV|P(74)|'-',
321         OC_REPLACE|NV|P(74)|'*',    OC_REPLACE|NV|P(74)|'/',
322             OC_REPLACE|NV|P(74)|'%',    OC_REPLACE|NV|P(74)|'&',
323         OC_BINARY|NV|P(29)|'+',     OC_BINARY|NV|P(29)|'-',
324             OC_REPLACE|NV|P(74)|'&',    OC_BINARY|NV|P(15)|'&',
325         OC_BINARY|NV|P(25)|'/',     OC_BINARY|NV|P(25)|'%',
326             OC_BINARY|NV|P(15)|'&',     OC_BINARY|NV|P(25)|'*',
327         OC_COMPARE|VV|P(39)|4,      OC_COMPARE|VV|P(39)|3,
328             OC_COMPARE|VV|P(39)|0,      OC_COMPARE|VV|P(39)|1,
329         OC_COMPARE|VV|P(39)|2,      OC_MATCH|Sx|P(45)|'!',
330             OC_MATCH|Sx|P(45)|'~',      OC_LAND|Vx|P(55),
331         OC_LOR|Vx|P(59),            OC_TERNARY|Vx|P(64)|'?',
332             OC_COLON|xx|P(67)|':',
333         OC_IN|SV|P(49),
334         OC_COMMA|SS|P(80),
335         OC_PGETLINE|SV|P(37),
336         OC_UNARY|xV|P(19)|'+',      OC_UNARY|xV|P(19)|'-',
337             OC_UNARY|xV|P(19)|'!',
338         0,
339         0,
340         0,
341         0,
342         0,
343         ST_IF,          ST_DO,          ST_FOR,         OC_BREAK,
344         OC_CONTINUE,                    OC_DELETE|Vx,   OC_PRINT,
345         OC_PRINTF,      OC_NEXT,        OC_NEXTFILE,
346         OC_RETURN|Vx,   OC_EXIT|Nx,
347         ST_WHILE,
348         0,
349
350         OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
351         OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
352         OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
353         OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
354         OC_FBLTIN|F_rn,    OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
355         OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
356         OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF,        OC_B|B_su|P(0xb6),
357         OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti,    OC_B|B_ti|P(0x0b),
358         OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
359         OC_GETLINE|SV|P(0),
360         0,      0,
361         0,
362         0
363 };
364
365 /* internal variable names and their initial values       */
366 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
367 enum {
368         CONVFMT,    OFMT,       FS,         OFS,
369         ORS,        RS,         RT,         FILENAME,
370         SUBSEP,     ARGIND,     ARGC,       ARGV,
371         ERRNO,      FNR,
372         NR,         NF,         IGNORECASE,
373         ENVIRON,    F0,         NUM_INTERNAL_VARS
374 };
375
376 static const char vNames[] =
377         "CONVFMT\0" "OFMT\0"    "FS\0*"     "OFS\0"
378         "ORS\0"     "RS\0*"     "RT\0"      "FILENAME\0"
379         "SUBSEP\0"  "ARGIND\0"  "ARGC\0"    "ARGV\0"
380         "ERRNO\0"   "FNR\0"
381         "NR\0"      "NF\0*"     "IGNORECASE\0*"
382         "ENVIRON\0" "$\0*"      "\0";
383
384 static const char vValues[] =
385         "%.6g\0"    "%.6g\0"    " \0"       " \0"
386         "\n\0"      "\n\0"      "\0"        "\0"
387         "\034\0"
388         "\377";
389
390 /* hash size may grow to these values */
391 #define FIRST_PRIME 61;
392 static const unsigned PRIMES[] = { 251, 1021, 4093, 16381, 65521 };
393
394
395
396 /* Globals. Split in two parts so that first one is addressed
397  * with (mostly short) negative offsets */
398 struct globals {
399         chain beginseq, mainseq, endseq, *seq;
400         node *break_ptr, *continue_ptr;
401         rstream *iF;
402         xhash *vhash, *ahash, *fdhash, *fnhash;
403         const char *g_progname;
404         int g_lineno;
405         int nfields;
406         int maxfields; /* used in fsrealloc() only */
407         var *Fields;
408         nvblock *g_cb;
409         char *g_pos;
410         char *g_buf;
411         smallint icase;
412         smallint exiting;
413         smallint nextrec;
414         smallint nextfile;
415         smallint is_f0_split;
416 };
417 struct globals2 {
418         uint32_t t_info; /* often used */
419         uint32_t t_tclass;
420         char *t_string;
421         int t_lineno;
422         int t_rollback;
423
424         var *intvar[NUM_INTERNAL_VARS]; /* often used */
425
426         /* former statics from various functions */
427         char *split_f0__fstrings;
428
429         uint32_t next_token__save_tclass;
430         uint32_t next_token__save_info;
431         uint32_t next_token__ltclass;
432         smallint next_token__concat_inserted;
433
434         smallint next_input_file__files_happen;
435         rstream next_input_file__rsm;
436
437         var *evaluate__fnargs;
438         unsigned evaluate__seed;
439         regex_t evaluate__sreg;
440
441         var ptest__v;
442
443         tsplitter exec_builtin__tspl;
444
445         /* biggest and least used members go last */
446         double t_double;
447         tsplitter fsplitter, rsplitter;
448 };
449 #define G1 (ptr_to_globals[-1])
450 #define G (*(struct globals2 *const)ptr_to_globals)
451 /* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
452 /* char G1size[sizeof(G1)]; - 0x6c */
453 /* char Gsize[sizeof(G)]; - 0x1cc */
454 /* Trying to keep most of members accessible with short offsets: */
455 /* char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
456 #define beginseq     (G1.beginseq    )
457 #define mainseq      (G1.mainseq     )
458 #define endseq       (G1.endseq      )
459 #define seq          (G1.seq         )
460 #define break_ptr    (G1.break_ptr   )
461 #define continue_ptr (G1.continue_ptr)
462 #define iF           (G1.iF          )
463 #define vhash        (G1.vhash       )
464 #define ahash        (G1.ahash       )
465 #define fdhash       (G1.fdhash      )
466 #define fnhash       (G1.fnhash      )
467 #define g_progname   (G1.g_progname  )
468 #define g_lineno     (G1.g_lineno    )
469 #define nfields      (G1.nfields     )
470 #define maxfields    (G1.maxfields   )
471 #define Fields       (G1.Fields      )
472 #define g_cb         (G1.g_cb        )
473 #define g_pos        (G1.g_pos       )
474 #define g_buf        (G1.g_buf       )
475 #define icase        (G1.icase       )
476 #define exiting      (G1.exiting     )
477 #define nextrec      (G1.nextrec     )
478 #define nextfile     (G1.nextfile    )
479 #define is_f0_split  (G1.is_f0_split )
480 #define t_info       (G.t_info      )
481 #define t_tclass     (G.t_tclass    )
482 #define t_string     (G.t_string    )
483 #define t_double     (G.t_double    )
484 #define t_lineno     (G.t_lineno    )
485 #define t_rollback   (G.t_rollback  )
486 #define intvar       (G.intvar      )
487 #define fsplitter    (G.fsplitter   )
488 #define rsplitter    (G.rsplitter   )
489 #define INIT_G() do { \
490         PTR_TO_GLOBALS = xzalloc(sizeof(G1) + sizeof(G)) + sizeof(G1); \
491         G.next_token__ltclass = TC_OPTERM; \
492         G.evaluate__seed = 1; \
493 } while (0)
494
495
496 /* function prototypes */
497 static void handle_special(var *);
498 static node *parse_expr(uint32_t);
499 static void chain_group(void);
500 static var *evaluate(node *, var *);
501 static rstream *next_input_file(void);
502 static int fmt_num(char *, int, const char *, double, int);
503 static int awk_exit(int) ATTRIBUTE_NORETURN;
504
505 /* ---- error handling ---- */
506
507 static const char EMSG_INTERNAL_ERROR[] = "Internal error";
508 static const char EMSG_UNEXP_EOS[] = "Unexpected end of string";
509 static const char EMSG_UNEXP_TOKEN[] = "Unexpected token";
510 static const char EMSG_DIV_BY_ZERO[] = "Division by zero";
511 static const char EMSG_INV_FMT[] = "Invalid format specifier";
512 static const char EMSG_TOO_FEW_ARGS[] = "Too few arguments for builtin";
513 static const char EMSG_NOT_ARRAY[] = "Not an array";
514 static const char EMSG_POSSIBLE_ERROR[] = "Possible syntax error";
515 static const char EMSG_UNDEF_FUNC[] = "Call to undefined function";
516 #if !ENABLE_FEATURE_AWK_MATH
517 static const char EMSG_NO_MATH[] = "Math support is not compiled in";
518 #endif
519
520 static void zero_out_var(var * vp)
521 {
522         memset(vp, 0, sizeof(*vp));
523 }
524
525 static void syntax_error(const char * const message) ATTRIBUTE_NORETURN;
526 static void syntax_error(const char * const message)
527 {
528         bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
529 }
530
531 /* ---- hash stuff ---- */
532
533 static unsigned hashidx(const char *name)
534 {
535         unsigned idx = 0;
536
537         while (*name) idx = *name++ + (idx << 6) - idx;
538         return idx;
539 }
540
541 /* create new hash */
542 static xhash *hash_init(void)
543 {
544         xhash *newhash;
545
546         newhash = xzalloc(sizeof(xhash));
547         newhash->csize = FIRST_PRIME;
548         newhash->items = xzalloc(newhash->csize * sizeof(hash_item *));
549
550         return newhash;
551 }
552
553 /* find item in hash, return ptr to data, NULL if not found */
554 static void *hash_search(xhash *hash, const char *name)
555 {
556         hash_item *hi;
557
558         hi = hash->items [ hashidx(name) % hash->csize ];
559         while (hi) {
560                 if (strcmp(hi->name, name) == 0)
561                         return &(hi->data);
562                 hi = hi->next;
563         }
564         return NULL;
565 }
566
567 /* grow hash if it becomes too big */
568 static void hash_rebuild(xhash *hash)
569 {
570         unsigned newsize, i, idx;
571         hash_item **newitems, *hi, *thi;
572
573         if (hash->nprime == ARRAY_SIZE(PRIMES))
574                 return;
575
576         newsize = PRIMES[hash->nprime++];
577         newitems = xzalloc(newsize * sizeof(hash_item *));
578
579         for (i = 0; i < hash->csize; i++) {
580                 hi = hash->items[i];
581                 while (hi) {
582                         thi = hi;
583                         hi = thi->next;
584                         idx = hashidx(thi->name) % newsize;
585                         thi->next = newitems[idx];
586                         newitems[idx] = thi;
587                 }
588         }
589
590         free(hash->items);
591         hash->csize = newsize;
592         hash->items = newitems;
593 }
594
595 /* find item in hash, add it if necessary. Return ptr to data */
596 static void *hash_find(xhash *hash, const char *name)
597 {
598         hash_item *hi;
599         unsigned idx;
600         int l;
601
602         hi = hash_search(hash, name);
603         if (!hi) {
604                 if (++hash->nel / hash->csize > 10)
605                         hash_rebuild(hash);
606
607                 l = strlen(name) + 1;
608                 hi = xzalloc(sizeof(hash_item) + l);
609                 memcpy(hi->name, name, l);
610
611                 idx = hashidx(name) % hash->csize;
612                 hi->next = hash->items[idx];
613                 hash->items[idx] = hi;
614                 hash->glen += l;
615         }
616         return &(hi->data);
617 }
618
619 #define findvar(hash, name) ((var*)    hash_find((hash), (name)))
620 #define newvar(name)        ((var*)    hash_find(vhash, (name)))
621 #define newfile(name)       ((rstream*)hash_find(fdhash, (name)))
622 #define newfunc(name)       ((func*)   hash_find(fnhash, (name)))
623
624 static void hash_remove(xhash *hash, const char *name)
625 {
626         hash_item *hi, **phi;
627
628         phi = &(hash->items[hashidx(name) % hash->csize]);
629         while (*phi) {
630                 hi = *phi;
631                 if (strcmp(hi->name, name) == 0) {
632                         hash->glen -= (strlen(name) + 1);
633                         hash->nel--;
634                         *phi = hi->next;
635                         free(hi);
636                         break;
637                 }
638                 phi = &(hi->next);
639         }
640 }
641
642 /* ------ some useful functions ------ */
643
644 static void skip_spaces(char **s)
645 {
646         char *p = *s;
647
648         while (1) {
649                 if (*p == '\\' && p[1] == '\n') {
650                         p++;
651                         t_lineno++;
652                 } else if (*p != ' ' && *p != '\t') {
653                         break;
654                 }
655                 p++;
656         }
657         *s = p;
658 }
659
660 static char *nextword(char **s)
661 {
662         char *p = *s;
663
664         while (*(*s)++) /* */;
665
666         return p;
667 }
668
669 static char nextchar(char **s)
670 {
671         char c, *pps;
672
673         c = *((*s)++);
674         pps = *s;
675         if (c == '\\') c = bb_process_escape_sequence((const char**)s);
676         if (c == '\\' && *s == pps) c = *((*s)++);
677         return c;
678 }
679
680 static int ALWAYS_INLINE isalnum_(int c)
681 {
682         return (isalnum(c) || c == '_');
683 }
684
685 static FILE *afopen(const char *path, const char *mode)
686 {
687         return (*path == '-' && *(path+1) == '\0') ? stdin : xfopen(path, mode);
688 }
689
690 /* -------- working with variables (set/get/copy/etc) -------- */
691
692 static xhash *iamarray(var *v)
693 {
694         var *a = v;
695
696         while (a->type & VF_CHILD)
697                 a = a->x.parent;
698
699         if (!(a->type & VF_ARRAY)) {
700                 a->type |= VF_ARRAY;
701                 a->x.array = hash_init();
702         }
703         return a->x.array;
704 }
705
706 static void clear_array(xhash *array)
707 {
708         unsigned i;
709         hash_item *hi, *thi;
710
711         for (i = 0; i < array->csize; i++) {
712                 hi = array->items[i];
713                 while (hi) {
714                         thi = hi;
715                         hi = hi->next;
716                         free(thi->data.v.string);
717                         free(thi);
718                 }
719                 array->items[i] = NULL;
720         }
721         array->glen = array->nel = 0;
722 }
723
724 /* clear a variable */
725 static var *clrvar(var *v)
726 {
727         if (!(v->type & VF_FSTR))
728                 free(v->string);
729
730         v->type &= VF_DONTTOUCH;
731         v->type |= VF_DIRTY;
732         v->string = NULL;
733         return v;
734 }
735
736 /* assign string value to variable */
737 static var *setvar_p(var *v, char *value)
738 {
739         clrvar(v);
740         v->string = value;
741         handle_special(v);
742         return v;
743 }
744
745 /* same as setvar_p but make a copy of string */
746 static var *setvar_s(var *v, const char *value)
747 {
748         return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
749 }
750
751 /* same as setvar_s but set USER flag */
752 static var *setvar_u(var *v, const char *value)
753 {
754         setvar_s(v, value);
755         v->type |= VF_USER;
756         return v;
757 }
758
759 /* set array element to user string */
760 static void setari_u(var *a, int idx, const char *s)
761 {
762         char sidx[sizeof(int)*3 + 1];
763         var *v;
764
765         sprintf(sidx, "%d", idx);
766         v = findvar(iamarray(a), sidx);
767         setvar_u(v, s);
768 }
769
770 /* assign numeric value to variable */
771 static var *setvar_i(var *v, double value)
772 {
773         clrvar(v);
774         v->type |= VF_NUMBER;
775         v->number = value;
776         handle_special(v);
777         return v;
778 }
779
780 static const char *getvar_s(var *v)
781 {
782         /* if v is numeric and has no cached string, convert it to string */
783         if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
784                 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
785                 v->string = xstrdup(g_buf);
786                 v->type |= VF_CACHED;
787         }
788         return (v->string == NULL) ? "" : v->string;
789 }
790
791 static double getvar_i(var *v)
792 {
793         char *s;
794
795         if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
796                 v->number = 0;
797                 s = v->string;
798                 if (s && *s) {
799                         v->number = strtod(s, &s);
800                         if (v->type & VF_USER) {
801                                 skip_spaces(&s);
802                                 if (*s != '\0')
803                                         v->type &= ~VF_USER;
804                         }
805                 } else {
806                         v->type &= ~VF_USER;
807                 }
808                 v->type |= VF_CACHED;
809         }
810         return v->number;
811 }
812
813 static var *copyvar(var *dest, const var *src)
814 {
815         if (dest != src) {
816                 clrvar(dest);
817                 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
818                 dest->number = src->number;
819                 if (src->string)
820                         dest->string = xstrdup(src->string);
821         }
822         handle_special(dest);
823         return dest;
824 }
825
826 static var *incvar(var *v)
827 {
828         return setvar_i(v, getvar_i(v)+1.);
829 }
830
831 /* return true if v is number or numeric string */
832 static int is_numeric(var *v)
833 {
834         getvar_i(v);
835         return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
836 }
837
838 /* return 1 when value of v corresponds to true, 0 otherwise */
839 static int istrue(var *v)
840 {
841         if (is_numeric(v))
842                 return (v->number == 0) ? 0 : 1;
843         return (v->string && *(v->string)) ? 1 : 0;
844 }
845
846 /* temporary variables allocator. Last allocated should be first freed */
847 static var *nvalloc(int n)
848 {
849         nvblock *pb = NULL;
850         var *v, *r;
851         int size;
852
853         while (g_cb) {
854                 pb = g_cb;
855                 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size) break;
856                 g_cb = g_cb->next;
857         }
858
859         if (!g_cb) {
860                 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
861                 g_cb = xmalloc(sizeof(nvblock) + size * sizeof(var));
862                 g_cb->size = size;
863                 g_cb->pos = g_cb->nv;
864                 g_cb->prev = pb;
865                 g_cb->next = NULL;
866                 if (pb) pb->next = g_cb;
867         }
868
869         v = r = g_cb->pos;
870         g_cb->pos += n;
871
872         while (v < g_cb->pos) {
873                 v->type = 0;
874                 v->string = NULL;
875                 v++;
876         }
877
878         return r;
879 }
880
881 static void nvfree(var *v)
882 {
883         var *p;
884
885         if (v < g_cb->nv || v >= g_cb->pos)
886                 syntax_error(EMSG_INTERNAL_ERROR);
887
888         for (p = v; p < g_cb->pos; p++) {
889                 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
890                         clear_array(iamarray(p));
891                         free(p->x.array->items);
892                         free(p->x.array);
893                 }
894                 if (p->type & VF_WALK)
895                         free(p->x.walker);
896
897                 clrvar(p);
898         }
899
900         g_cb->pos = v;
901         while (g_cb->prev && g_cb->pos == g_cb->nv) {
902                 g_cb = g_cb->prev;
903         }
904 }
905
906 /* ------- awk program text parsing ------- */
907
908 /* Parse next token pointed by global pos, place results into global ttt.
909  * If token isn't expected, give away. Return token class
910  */
911 static uint32_t next_token(uint32_t expected)
912 {
913 #define concat_inserted (G.next_token__concat_inserted)
914 #define save_tclass     (G.next_token__save_tclass)
915 #define save_info       (G.next_token__save_info)
916 /* Initialized to TC_OPTERM: */
917 #define ltclass         (G.next_token__ltclass)
918
919         char *p, *pp, *s;
920         const char *tl;
921         uint32_t tc;
922         const uint32_t *ti;
923         int l;
924
925         if (t_rollback) {
926                 t_rollback = FALSE;
927
928         } else if (concat_inserted) {
929                 concat_inserted = FALSE;
930                 t_tclass = save_tclass;
931                 t_info = save_info;
932
933         } else {
934                 p = g_pos;
935  readnext:
936                 skip_spaces(&p);
937                 g_lineno = t_lineno;
938                 if (*p == '#')
939                         while (*p != '\n' && *p != '\0')
940                                 p++;
941
942                 if (*p == '\n')
943                         t_lineno++;
944
945                 if (*p == '\0') {
946                         tc = TC_EOF;
947
948                 } else if (*p == '\"') {
949                         /* it's a string */
950                         t_string = s = ++p;
951                         while (*p != '\"') {
952                                 if (*p == '\0' || *p == '\n')
953                                         syntax_error(EMSG_UNEXP_EOS);
954                                 *(s++) = nextchar(&p);
955                         }
956                         p++;
957                         *s = '\0';
958                         tc = TC_STRING;
959
960                 } else if ((expected & TC_REGEXP) && *p == '/') {
961                         /* it's regexp */
962                         t_string = s = ++p;
963                         while (*p != '/') {
964                                 if (*p == '\0' || *p == '\n')
965                                         syntax_error(EMSG_UNEXP_EOS);
966                                 *s = *p++;
967                                 if (*s++ == '\\') {
968                                         pp = p;
969                                         *(s-1) = bb_process_escape_sequence((const char **)&p);
970                                         if (*pp == '\\')
971                                                 *s++ = '\\';
972                                         if (p == pp)
973                                                 *s++ = *p++;
974                                 }
975                         }
976                         p++;
977                         *s = '\0';
978                         tc = TC_REGEXP;
979
980                 } else if (*p == '.' || isdigit(*p)) {
981                         /* it's a number */
982                         t_double = strtod(p, &p);
983                         if (*p == '.')
984                                 syntax_error(EMSG_UNEXP_TOKEN);
985                         tc = TC_NUMBER;
986
987                 } else {
988                         /* search for something known */
989                         tl = tokenlist;
990                         tc = 0x00000001;
991                         ti = tokeninfo;
992                         while (*tl) {
993                                 l = *(tl++);
994                                 if (l == NTCC) {
995                                         tc <<= 1;
996                                         continue;
997                                 }
998                                 /* if token class is expected, token
999                                  * matches and it's not a longer word,
1000                                  * then this is what we are looking for
1001                                  */
1002                                 if ((tc & (expected | TC_WORD | TC_NEWLINE))
1003                                  && *tl == *p && strncmp(p, tl, l) == 0
1004                                  && !((tc & TC_WORD) && isalnum_(p[l]))
1005                                 ) {
1006                                         t_info = *ti;
1007                                         p += l;
1008                                         break;
1009                                 }
1010                                 ti++;
1011                                 tl += l;
1012                         }
1013
1014                         if (!*tl) {
1015                                 /* it's a name (var/array/function),
1016                                  * otherwise it's something wrong
1017                                  */
1018                                 if (!isalnum_(*p))
1019                                         syntax_error(EMSG_UNEXP_TOKEN);
1020
1021                                 t_string = --p;
1022                                 while (isalnum_(*(++p))) {
1023                                         *(p-1) = *p;
1024                                 }
1025                                 *(p-1) = '\0';
1026                                 tc = TC_VARIABLE;
1027                                 /* also consume whitespace between functionname and bracket */
1028                                 if (!(expected & TC_VARIABLE))
1029                                         skip_spaces(&p);
1030                                 if (*p == '(') {
1031                                         tc = TC_FUNCTION;
1032                                 } else {
1033                                         if (*p == '[') {
1034                                                 p++;
1035                                                 tc = TC_ARRAY;
1036                                         }
1037                                 }
1038                         }
1039                 }
1040                 g_pos = p;
1041
1042                 /* skipping newlines in some cases */
1043                 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1044                         goto readnext;
1045
1046                 /* insert concatenation operator when needed */
1047                 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1048                         concat_inserted = TRUE;
1049                         save_tclass = tc;
1050                         save_info = t_info;
1051                         tc = TC_BINOP;
1052                         t_info = OC_CONCAT | SS | P(35);
1053                 }
1054
1055                 t_tclass = tc;
1056         }
1057         ltclass = t_tclass;
1058
1059         /* Are we ready for this? */
1060         if (!(ltclass & expected))
1061                 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1062                                 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1063
1064         return ltclass;
1065 #undef concat_inserted
1066 #undef save_tclass
1067 #undef save_info
1068 #undef ltclass
1069 }
1070
1071 static void rollback_token(void)
1072 {
1073         t_rollback = TRUE;
1074 }
1075
1076 static node *new_node(uint32_t info)
1077 {
1078         node *n;
1079
1080         n = xzalloc(sizeof(node));
1081         n->info = info;
1082         n->lineno = g_lineno;
1083         return n;
1084 }
1085
1086 static node *mk_re_node(const char *s, node *n, regex_t *re)
1087 {
1088         n->info = OC_REGEXP;
1089         n->l.re = re;
1090         n->r.ire = re + 1;
1091         xregcomp(re, s, REG_EXTENDED);
1092         xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1093
1094         return n;
1095 }
1096
1097 static node *condition(void)
1098 {
1099         next_token(TC_SEQSTART);
1100         return parse_expr(TC_SEQTERM);
1101 }
1102
1103 /* parse expression terminated by given argument, return ptr
1104  * to built subtree. Terminator is eaten by parse_expr */
1105 static node *parse_expr(uint32_t iexp)
1106 {
1107         node sn;
1108         node *cn = &sn;
1109         node *vn, *glptr;
1110         uint32_t tc, xtc;
1111         var *v;
1112
1113         sn.info = PRIMASK;
1114         sn.r.n = glptr = NULL;
1115         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1116
1117         while (!((tc = next_token(xtc)) & iexp)) {
1118                 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1119                         /* input redirection (<) attached to glptr node */
1120                         cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1121                         cn->a.n = glptr;
1122                         xtc = TC_OPERAND | TC_UOPPRE;
1123                         glptr = NULL;
1124
1125                 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1126                         /* for binary and postfix-unary operators, jump back over
1127                          * previous operators with higher priority */
1128                         vn = cn;
1129                         while ( ((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1130                          || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON)) )
1131                                 vn = vn->a.n;
1132                         if ((t_info & OPCLSMASK) == OC_TERNARY)
1133                                 t_info += P(6);
1134                         cn = vn->a.n->r.n = new_node(t_info);
1135                         cn->a.n = vn->a.n;
1136                         if (tc & TC_BINOP) {
1137                                 cn->l.n = vn;
1138                                 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1139                                 if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1140                                         /* it's a pipe */
1141                                         next_token(TC_GETLINE);
1142                                         /* give maximum priority to this pipe */
1143                                         cn->info &= ~PRIMASK;
1144                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1145                                 }
1146                         } else {
1147                                 cn->r.n = vn;
1148                                 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1149                         }
1150                         vn->a.n = cn;
1151
1152                 } else {
1153                         /* for operands and prefix-unary operators, attach them
1154                          * to last node */
1155                         vn = cn;
1156                         cn = vn->r.n = new_node(t_info);
1157                         cn->a.n = vn;
1158                         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1159                         if (tc & (TC_OPERAND | TC_REGEXP)) {
1160                                 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1161                                 /* one should be very careful with switch on tclass -
1162                                  * only simple tclasses should be used! */
1163                                 switch (tc) {
1164                                 case TC_VARIABLE:
1165                                 case TC_ARRAY:
1166                                         cn->info = OC_VAR;
1167                                         v = hash_search(ahash, t_string);
1168                                         if (v != NULL) {
1169                                                 cn->info = OC_FNARG;
1170                                                 cn->l.i = v->x.aidx;
1171                                         } else {
1172                                                 cn->l.v = newvar(t_string);
1173                                         }
1174                                         if (tc & TC_ARRAY) {
1175                                                 cn->info |= xS;
1176                                                 cn->r.n = parse_expr(TC_ARRTERM);
1177                                         }
1178                                         break;
1179
1180                                 case TC_NUMBER:
1181                                 case TC_STRING:
1182                                         cn->info = OC_VAR;
1183                                         v = cn->l.v = xzalloc(sizeof(var));
1184                                         if (tc & TC_NUMBER)
1185                                                 setvar_i(v, t_double);
1186                                         else
1187                                                 setvar_s(v, t_string);
1188                                         break;
1189
1190                                 case TC_REGEXP:
1191                                         mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1192                                         break;
1193
1194                                 case TC_FUNCTION:
1195                                         cn->info = OC_FUNC;
1196                                         cn->r.f = newfunc(t_string);
1197                                         cn->l.n = condition();
1198                                         break;
1199
1200                                 case TC_SEQSTART:
1201                                         cn = vn->r.n = parse_expr(TC_SEQTERM);
1202                                         cn->a.n = vn;
1203                                         break;
1204
1205                                 case TC_GETLINE:
1206                                         glptr = cn;
1207                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1208                                         break;
1209
1210                                 case TC_BUILTIN:
1211                                         cn->l.n = condition();
1212                                         break;
1213                                 }
1214                         }
1215                 }
1216         }
1217         return sn.r.n;
1218 }
1219
1220 /* add node to chain. Return ptr to alloc'd node */
1221 static node *chain_node(uint32_t info)
1222 {
1223         node *n;
1224
1225         if (!seq->first)
1226                 seq->first = seq->last = new_node(0);
1227
1228         if (seq->programname != g_progname) {
1229                 seq->programname = g_progname;
1230                 n = chain_node(OC_NEWSOURCE);
1231                 n->l.s = xstrdup(g_progname);
1232         }
1233
1234         n = seq->last;
1235         n->info = info;
1236         seq->last = n->a.n = new_node(OC_DONE);
1237
1238         return n;
1239 }
1240
1241 static void chain_expr(uint32_t info)
1242 {
1243         node *n;
1244
1245         n = chain_node(info);
1246         n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1247         if (t_tclass & TC_GRPTERM)
1248                 rollback_token();
1249 }
1250
1251 static node *chain_loop(node *nn)
1252 {
1253         node *n, *n2, *save_brk, *save_cont;
1254
1255         save_brk = break_ptr;
1256         save_cont = continue_ptr;
1257
1258         n = chain_node(OC_BR | Vx);
1259         continue_ptr = new_node(OC_EXEC);
1260         break_ptr = new_node(OC_EXEC);
1261         chain_group();
1262         n2 = chain_node(OC_EXEC | Vx);
1263         n2->l.n = nn;
1264         n2->a.n = n;
1265         continue_ptr->a.n = n2;
1266         break_ptr->a.n = n->r.n = seq->last;
1267
1268         continue_ptr = save_cont;
1269         break_ptr = save_brk;
1270
1271         return n;
1272 }
1273
1274 /* parse group and attach it to chain */
1275 static void chain_group(void)
1276 {
1277         uint32_t c;
1278         node *n, *n2, *n3;
1279
1280         do {
1281                 c = next_token(TC_GRPSEQ);
1282         } while (c & TC_NEWLINE);
1283
1284         if (c & TC_GRPSTART) {
1285                 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1286                         if (t_tclass & TC_NEWLINE) continue;
1287                         rollback_token();
1288                         chain_group();
1289                 }
1290         } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1291                 rollback_token();
1292                 chain_expr(OC_EXEC | Vx);
1293         } else {                                                /* TC_STATEMNT */
1294                 switch (t_info & OPCLSMASK) {
1295                 case ST_IF:
1296                         n = chain_node(OC_BR | Vx);
1297                         n->l.n = condition();
1298                         chain_group();
1299                         n2 = chain_node(OC_EXEC);
1300                         n->r.n = seq->last;
1301                         if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1302                                 chain_group();
1303                                 n2->a.n = seq->last;
1304                         } else {
1305                                 rollback_token();
1306                         }
1307                         break;
1308
1309                 case ST_WHILE:
1310                         n2 = condition();
1311                         n = chain_loop(NULL);
1312                         n->l.n = n2;
1313                         break;
1314
1315                 case ST_DO:
1316                         n2 = chain_node(OC_EXEC);
1317                         n = chain_loop(NULL);
1318                         n2->a.n = n->a.n;
1319                         next_token(TC_WHILE);
1320                         n->l.n = condition();
1321                         break;
1322
1323                 case ST_FOR:
1324                         next_token(TC_SEQSTART);
1325                         n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1326                         if (t_tclass & TC_SEQTERM) {    /* for-in */
1327                                 if ((n2->info & OPCLSMASK) != OC_IN)
1328                                         syntax_error(EMSG_UNEXP_TOKEN);
1329                                 n = chain_node(OC_WALKINIT | VV);
1330                                 n->l.n = n2->l.n;
1331                                 n->r.n = n2->r.n;
1332                                 n = chain_loop(NULL);
1333                                 n->info = OC_WALKNEXT | Vx;
1334                                 n->l.n = n2->l.n;
1335                         } else {                        /* for (;;) */
1336                                 n = chain_node(OC_EXEC | Vx);
1337                                 n->l.n = n2;
1338                                 n2 = parse_expr(TC_SEMICOL);
1339                                 n3 = parse_expr(TC_SEQTERM);
1340                                 n = chain_loop(n3);
1341                                 n->l.n = n2;
1342                                 if (!n2)
1343                                         n->info = OC_EXEC;
1344                         }
1345                         break;
1346
1347                 case OC_PRINT:
1348                 case OC_PRINTF:
1349                         n = chain_node(t_info);
1350                         n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1351                         if (t_tclass & TC_OUTRDR) {
1352                                 n->info |= t_info;
1353                                 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1354                         }
1355                         if (t_tclass & TC_GRPTERM)
1356                                 rollback_token();
1357                         break;
1358
1359                 case OC_BREAK:
1360                         n = chain_node(OC_EXEC);
1361                         n->a.n = break_ptr;
1362                         break;
1363
1364                 case OC_CONTINUE:
1365                         n = chain_node(OC_EXEC);
1366                         n->a.n = continue_ptr;
1367                         break;
1368
1369                 /* delete, next, nextfile, return, exit */
1370                 default:
1371                         chain_expr(t_info);
1372                 }
1373         }
1374 }
1375
1376 static void parse_program(char *p)
1377 {
1378         uint32_t tclass;
1379         node *cn;
1380         func *f;
1381         var *v;
1382
1383         g_pos = p;
1384         t_lineno = 1;
1385         while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1386                         TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1387
1388                 if (tclass & TC_OPTERM)
1389                         continue;
1390
1391                 seq = &mainseq;
1392                 if (tclass & TC_BEGIN) {
1393                         seq = &beginseq;
1394                         chain_group();
1395
1396                 } else if (tclass & TC_END) {
1397                         seq = &endseq;
1398                         chain_group();
1399
1400                 } else if (tclass & TC_FUNCDECL) {
1401                         next_token(TC_FUNCTION);
1402                         g_pos++;
1403                         f = newfunc(t_string);
1404                         f->body.first = NULL;
1405                         f->nargs = 0;
1406                         while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1407                                 v = findvar(ahash, t_string);
1408                                 v->x.aidx = (f->nargs)++;
1409
1410                                 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1411                                         break;
1412                         }
1413                         seq = &(f->body);
1414                         chain_group();
1415                         clear_array(ahash);
1416
1417                 } else if (tclass & TC_OPSEQ) {
1418                         rollback_token();
1419                         cn = chain_node(OC_TEST);
1420                         cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1421                         if (t_tclass & TC_GRPSTART) {
1422                                 rollback_token();
1423                                 chain_group();
1424                         } else {
1425                                 chain_node(OC_PRINT);
1426                         }
1427                         cn->r.n = mainseq.last;
1428
1429                 } else /* if (tclass & TC_GRPSTART) */ {
1430                         rollback_token();
1431                         chain_group();
1432                 }
1433         }
1434 }
1435
1436
1437 /* -------- program execution part -------- */
1438
1439 static node *mk_splitter(const char *s, tsplitter *spl)
1440 {
1441         regex_t *re, *ire;
1442         node *n;
1443
1444         re = &spl->re[0];
1445         ire = &spl->re[1];
1446         n = &spl->n;
1447         if ((n->info & OPCLSMASK) == OC_REGEXP) {
1448                 regfree(re);
1449                 regfree(ire); // TODO: nuke ire, use re+1?
1450         }
1451         if (strlen(s) > 1) {
1452                 mk_re_node(s, n, re);
1453         } else {
1454                 n->info = (uint32_t) *s;
1455         }
1456
1457         return n;
1458 }
1459
1460 /* use node as a regular expression. Supplied with node ptr and regex_t
1461  * storage space. Return ptr to regex (if result points to preg, it should
1462  * be later regfree'd manually
1463  */
1464 static regex_t *as_regex(node *op, regex_t *preg)
1465 {
1466         var *v;
1467         const char *s;
1468
1469         if ((op->info & OPCLSMASK) == OC_REGEXP) {
1470                 return icase ? op->r.ire : op->l.re;
1471         }
1472         v = nvalloc(1);
1473         s = getvar_s(evaluate(op, v));
1474         xregcomp(preg, s, icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED);
1475         nvfree(v);
1476         return preg;
1477 }
1478
1479 /* gradually increasing buffer */
1480 static void qrealloc(char **b, int n, int *size)
1481 {
1482         if (!*b || n >= *size)
1483                 *b = xrealloc(*b, *size = n + (n>>1) + 80);
1484 }
1485
1486 /* resize field storage space */
1487 static void fsrealloc(int size)
1488 {
1489         int i;
1490
1491         if (size >= maxfields) {
1492                 i = maxfields;
1493                 maxfields = size + 16;
1494                 Fields = xrealloc(Fields, maxfields * sizeof(var));
1495                 for (; i < maxfields; i++) {
1496                         Fields[i].type = VF_SPECIAL;
1497                         Fields[i].string = NULL;
1498                 }
1499         }
1500
1501         if (size < nfields) {
1502                 for (i = size; i < nfields; i++) {
1503                         clrvar(Fields + i);
1504                 }
1505         }
1506         nfields = size;
1507 }
1508
1509 static int awk_split(const char *s, node *spl, char **slist)
1510 {
1511         int l, n = 0;
1512         char c[4];
1513         char *s1;
1514         regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1515
1516         /* in worst case, each char would be a separate field */
1517         *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1518         strcpy(s1, s);
1519
1520         c[0] = c[1] = (char)spl->info;
1521         c[2] = c[3] = '\0';
1522         if (*getvar_s(intvar[RS]) == '\0')
1523                 c[2] = '\n';
1524
1525         if ((spl->info & OPCLSMASK) == OC_REGEXP) {             /* regex split */
1526                 while (*s) {
1527                         l = strcspn(s, c+2);
1528                         if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1529                          && pmatch[0].rm_so <= l
1530                         ) {
1531                                 l = pmatch[0].rm_so;
1532                                 if (pmatch[0].rm_eo == 0) {
1533                                         l++;
1534                                         pmatch[0].rm_eo++;
1535                                 }
1536                         } else {
1537                                 pmatch[0].rm_eo = l;
1538                                 if (s[l]) pmatch[0].rm_eo++;
1539                         }
1540
1541                         memcpy(s1, s, l);
1542                         s1[l] = '\0';
1543                         nextword(&s1);
1544                         s += pmatch[0].rm_eo;
1545                         n++;
1546                 }
1547         } else if (c[0] == '\0') {              /* null split */
1548                 while (*s) {
1549                         *s1++ = *s++;
1550                         *s1++ = '\0';
1551                         n++;
1552                 }
1553         } else if (c[0] != ' ') {               /* single-character split */
1554                 if (icase) {
1555                         c[0] = toupper(c[0]);
1556                         c[1] = tolower(c[1]);
1557                 }
1558                 if (*s1) n++;
1559                 while ((s1 = strpbrk(s1, c))) {
1560                         *s1++ = '\0';
1561                         n++;
1562                 }
1563         } else {                                /* space split */
1564                 while (*s) {
1565                         s = skip_whitespace(s);
1566                         if (!*s) break;
1567                         n++;
1568                         while (*s && !isspace(*s))
1569                                 *s1++ = *s++;
1570                         *s1++ = '\0';
1571                 }
1572         }
1573         return n;
1574 }
1575
1576 static void split_f0(void)
1577 {
1578 #define fstrings (G.split_f0__fstrings)
1579
1580         int i, n;
1581         char *s;
1582
1583         if (is_f0_split)
1584                 return;
1585
1586         is_f0_split = TRUE;
1587         free(fstrings);
1588         fsrealloc(0);
1589         n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1590         fsrealloc(n);
1591         s = fstrings;
1592         for (i = 0; i < n; i++) {
1593                 Fields[i].string = nextword(&s);
1594                 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1595         }
1596
1597         /* set NF manually to avoid side effects */
1598         clrvar(intvar[NF]);
1599         intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1600         intvar[NF]->number = nfields;
1601 #undef fstrings
1602 }
1603
1604 /* perform additional actions when some internal variables changed */
1605 static void handle_special(var *v)
1606 {
1607         int n;
1608         char *b;
1609         const char *sep, *s;
1610         int sl, l, len, i, bsize;
1611
1612         if (!(v->type & VF_SPECIAL))
1613                 return;
1614
1615         if (v == intvar[NF]) {
1616                 n = (int)getvar_i(v);
1617                 fsrealloc(n);
1618
1619                 /* recalculate $0 */
1620                 sep = getvar_s(intvar[OFS]);
1621                 sl = strlen(sep);
1622                 b = NULL;
1623                 len = 0;
1624                 for (i = 0; i < n; i++) {
1625                         s = getvar_s(&Fields[i]);
1626                         l = strlen(s);
1627                         if (b) {
1628                                 memcpy(b+len, sep, sl);
1629                                 len += sl;
1630                         }
1631                         qrealloc(&b, len+l+sl, &bsize);
1632                         memcpy(b+len, s, l);
1633                         len += l;
1634                 }
1635                 if (b)
1636                         b[len] = '\0';
1637                 setvar_p(intvar[F0], b);
1638                 is_f0_split = TRUE;
1639
1640         } else if (v == intvar[F0]) {
1641                 is_f0_split = FALSE;
1642
1643         } else if (v == intvar[FS]) {
1644                 mk_splitter(getvar_s(v), &fsplitter);
1645
1646         } else if (v == intvar[RS]) {
1647                 mk_splitter(getvar_s(v), &rsplitter);
1648
1649         } else if (v == intvar[IGNORECASE]) {
1650                 icase = istrue(v);
1651
1652         } else {                                /* $n */
1653                 n = getvar_i(intvar[NF]);
1654                 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1655                 /* right here v is invalid. Just to note... */
1656         }
1657 }
1658
1659 /* step through func/builtin/etc arguments */
1660 static node *nextarg(node **pn)
1661 {
1662         node *n;
1663
1664         n = *pn;
1665         if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1666                 *pn = n->r.n;
1667                 n = n->l.n;
1668         } else {
1669                 *pn = NULL;
1670         }
1671         return n;
1672 }
1673
1674 static void hashwalk_init(var *v, xhash *array)
1675 {
1676         char **w;
1677         hash_item *hi;
1678         int i;
1679
1680         if (v->type & VF_WALK)
1681                 free(v->x.walker);
1682
1683         v->type |= VF_WALK;
1684         w = v->x.walker = xzalloc(2 + 2*sizeof(char *) + array->glen);
1685         w[0] = w[1] = (char *)(w + 2);
1686         for (i = 0; i < array->csize; i++) {
1687                 hi = array->items[i];
1688                 while (hi) {
1689                         strcpy(*w, hi->name);
1690                         nextword(w);
1691                         hi = hi->next;
1692                 }
1693         }
1694 }
1695
1696 static int hashwalk_next(var *v)
1697 {
1698         char **w;
1699
1700         w = v->x.walker;
1701         if (w[1] == w[0])
1702                 return FALSE;
1703
1704         setvar_s(v, nextword(w+1));
1705         return TRUE;
1706 }
1707
1708 /* evaluate node, return 1 when result is true, 0 otherwise */
1709 static int ptest(node *pattern)
1710 {
1711         /* ptest__v is "static": to save stack space? */
1712         return istrue(evaluate(pattern, &G.ptest__v));
1713 }
1714
1715 /* read next record from stream rsm into a variable v */
1716 static int awk_getline(rstream *rsm, var *v)
1717 {
1718         char *b;
1719         regmatch_t pmatch[2];
1720         int a, p, pp=0, size;
1721         int fd, so, eo, r, rp;
1722         char c, *m, *s;
1723
1724         /* we're using our own buffer since we need access to accumulating
1725          * characters
1726          */
1727         fd = fileno(rsm->F);
1728         m = rsm->buffer;
1729         a = rsm->adv;
1730         p = rsm->pos;
1731         size = rsm->size;
1732         c = (char) rsplitter.n.info;
1733         rp = 0;
1734
1735         if (!m) qrealloc(&m, 256, &size);
1736         do {
1737                 b = m + a;
1738                 so = eo = p;
1739                 r = 1;
1740                 if (p > 0) {
1741                         if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1742                                 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1743                                                         b, 1, pmatch, 0) == 0) {
1744                                         so = pmatch[0].rm_so;
1745                                         eo = pmatch[0].rm_eo;
1746                                         if (b[eo] != '\0')
1747                                                 break;
1748                                 }
1749                         } else if (c != '\0') {
1750                                 s = strchr(b+pp, c);
1751                                 if (!s) s = memchr(b+pp, '\0', p - pp);
1752                                 if (s) {
1753                                         so = eo = s-b;
1754                                         eo++;
1755                                         break;
1756                                 }
1757                         } else {
1758                                 while (b[rp] == '\n')
1759                                         rp++;
1760                                 s = strstr(b+rp, "\n\n");
1761                                 if (s) {
1762                                         so = eo = s-b;
1763                                         while (b[eo] == '\n') eo++;
1764                                         if (b[eo] != '\0')
1765                                                 break;
1766                                 }
1767                         }
1768                 }
1769
1770                 if (a > 0) {
1771                         memmove(m, (const void *)(m+a), p+1);
1772                         b = m;
1773                         a = 0;
1774                 }
1775
1776                 qrealloc(&m, a+p+128, &size);
1777                 b = m + a;
1778                 pp = p;
1779                 p += safe_read(fd, b+p, size-p-1);
1780                 if (p < pp) {
1781                         p = 0;
1782                         r = 0;
1783                         setvar_i(intvar[ERRNO], errno);
1784                 }
1785                 b[p] = '\0';
1786
1787         } while (p > pp);
1788
1789         if (p == 0) {
1790                 r--;
1791         } else {
1792                 c = b[so]; b[so] = '\0';
1793                 setvar_s(v, b+rp);
1794                 v->type |= VF_USER;
1795                 b[so] = c;
1796                 c = b[eo]; b[eo] = '\0';
1797                 setvar_s(intvar[RT], b+so);
1798                 b[eo] = c;
1799         }
1800
1801         rsm->buffer = m;
1802         rsm->adv = a + eo;
1803         rsm->pos = p - eo;
1804         rsm->size = size;
1805
1806         return r;
1807 }
1808
1809 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1810 {
1811         int r = 0;
1812         char c;
1813         const char *s = format;
1814
1815         if (int_as_int && n == (int)n) {
1816                 r = snprintf(b, size, "%d", (int)n);
1817         } else {
1818                 do { c = *s; } while (c && *++s);
1819                 if (strchr("diouxX", c)) {
1820                         r = snprintf(b, size, format, (int)n);
1821                 } else if (strchr("eEfgG", c)) {
1822                         r = snprintf(b, size, format, n);
1823                 } else {
1824                         syntax_error(EMSG_INV_FMT);
1825                 }
1826         }
1827         return r;
1828 }
1829
1830
1831 /* formatted output into an allocated buffer, return ptr to buffer */
1832 static char *awk_printf(node *n)
1833 {
1834         char *b = NULL;
1835         char *fmt, *s, *f;
1836         const char *s1;
1837         int i, j, incr, bsize;
1838         char c, c1;
1839         var *v, *arg;
1840
1841         v = nvalloc(1);
1842         fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1843
1844         i = 0;
1845         while (*f) {
1846                 s = f;
1847                 while (*f && (*f != '%' || *(++f) == '%'))
1848                         f++;
1849                 while (*f && !isalpha(*f)) {
1850                         if (*f == '*')
1851                                 syntax_error("%*x formats are not supported");
1852                         f++;
1853                 }
1854
1855                 incr = (f - s) + MAXVARFMT;
1856                 qrealloc(&b, incr + i, &bsize);
1857                 c = *f;
1858                 if (c != '\0') f++;
1859                 c1 = *f;
1860                 *f = '\0';
1861                 arg = evaluate(nextarg(&n), v);
1862
1863                 j = i;
1864                 if (c == 'c' || !c) {
1865                         i += sprintf(b+i, s, is_numeric(arg) ?
1866                                         (char)getvar_i(arg) : *getvar_s(arg));
1867                 } else if (c == 's') {
1868                         s1 = getvar_s(arg);
1869                         qrealloc(&b, incr+i+strlen(s1), &bsize);
1870                         i += sprintf(b+i, s, s1);
1871                 } else {
1872                         i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1873                 }
1874                 *f = c1;
1875
1876                 /* if there was an error while sprintf, return value is negative */
1877                 if (i < j) i = j;
1878         }
1879
1880         b = xrealloc(b, i + 1);
1881         free(fmt);
1882         nvfree(v);
1883         b[i] = '\0';
1884         return b;
1885 }
1886
1887 /* common substitution routine
1888  * replace (nm) substring of (src) that match (n) with (repl), store
1889  * result into (dest), return number of substitutions. If nm=0, replace
1890  * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1891  * subexpression matching (\1-\9)
1892  */
1893 static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int ex)
1894 {
1895         char *ds = NULL;
1896         const char *s;
1897         const char *sp;
1898         int c, i, j, di, rl, so, eo, nbs, n, dssize;
1899         regmatch_t pmatch[10];
1900         regex_t sreg, *re;
1901
1902         re = as_regex(rn, &sreg);
1903         if (!src) src = intvar[F0];
1904         if (!dest) dest = intvar[F0];
1905
1906         i = di = 0;
1907         sp = getvar_s(src);
1908         rl = strlen(repl);
1909         while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0 : REG_NOTBOL) == 0) {
1910                 so = pmatch[0].rm_so;
1911                 eo = pmatch[0].rm_eo;
1912
1913                 qrealloc(&ds, di + eo + rl, &dssize);
1914                 memcpy(ds + di, sp, eo);
1915                 di += eo;
1916                 if (++i >= nm) {
1917                         /* replace */
1918                         di -= (eo - so);
1919                         nbs = 0;
1920                         for (s = repl; *s; s++) {
1921                                 ds[di++] = c = *s;
1922                                 if (c == '\\') {
1923                                         nbs++;
1924                                         continue;
1925                                 }
1926                                 if (c == '&' || (ex && c >= '0' && c <= '9')) {
1927                                         di -= ((nbs + 3) >> 1);
1928                                         j = 0;
1929                                         if (c != '&') {
1930                                                 j = c - '0';
1931                                                 nbs++;
1932                                         }
1933                                         if (nbs % 2) {
1934                                                 ds[di++] = c;
1935                                         } else {
1936                                                 n = pmatch[j].rm_eo - pmatch[j].rm_so;
1937                                                 qrealloc(&ds, di + rl + n, &dssize);
1938                                                 memcpy(ds + di, sp + pmatch[j].rm_so, n);
1939                                                 di += n;
1940                                         }
1941                                 }
1942                                 nbs = 0;
1943                         }
1944                 }
1945
1946                 sp += eo;
1947                 if (i == nm) break;
1948                 if (eo == so) {
1949                         ds[di] = *sp++;
1950                         if (!ds[di++]) break;
1951                 }
1952         }
1953
1954         qrealloc(&ds, di + strlen(sp), &dssize);
1955         strcpy(ds + di, sp);
1956         setvar_p(dest, ds);
1957         if (re == &sreg) regfree(re);
1958         return i;
1959 }
1960
1961 static var *exec_builtin(node *op, var *res)
1962 {
1963 #define tspl (G.exec_builtin__tspl)
1964
1965         int (*to_xxx)(int);
1966         var *tv;
1967         node *an[4];
1968         var *av[4];
1969         const char *as[4];
1970         regmatch_t pmatch[2];
1971         regex_t sreg, *re;
1972         node *spl;
1973         uint32_t isr, info;
1974         int nargs;
1975         time_t tt;
1976         char *s, *s1;
1977         int i, l, ll, n;
1978
1979         tv = nvalloc(4);
1980         isr = info = op->info;
1981         op = op->l.n;
1982
1983         av[2] = av[3] = NULL;
1984         for (i = 0; i < 4 && op; i++) {
1985                 an[i] = nextarg(&op);
1986                 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
1987                 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
1988                 isr >>= 1;
1989         }
1990
1991         nargs = i;
1992         if (nargs < (info >> 30))
1993                 syntax_error(EMSG_TOO_FEW_ARGS);
1994
1995         switch (info & OPNMASK) {
1996
1997         case B_a2:
1998 #if ENABLE_FEATURE_AWK_MATH
1999                 setvar_i(res, atan2(getvar_i(av[i]), getvar_i(av[1])));
2000 #else
2001                 syntax_error(EMSG_NO_MATH);
2002 #endif
2003                 break;
2004
2005         case B_sp:
2006                 if (nargs > 2) {
2007                         spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2008                                 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2009                 } else {
2010                         spl = &fsplitter.n;
2011                 }
2012
2013                 n = awk_split(as[0], spl, &s);
2014                 s1 = s;
2015                 clear_array(iamarray(av[1]));
2016                 for (i=1; i<=n; i++)
2017                         setari_u(av[1], i, nextword(&s1));
2018                 free(s);
2019                 setvar_i(res, n);
2020                 break;
2021
2022         case B_ss:
2023                 l = strlen(as[0]);
2024                 i = getvar_i(av[1]) - 1;
2025                 if (i > l) i = l;
2026                 if (i < 0) i = 0;
2027                 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2028                 if (n < 0) n = 0;
2029                 s = xmalloc(n+1);
2030                 strncpy(s, as[0]+i, n);
2031                 s[n] = '\0';
2032                 setvar_p(res, s);
2033                 break;
2034
2035         case B_an:
2036                 setvar_i(res, (long)getvar_i(av[0]) & (long)getvar_i(av[1]));
2037                 break;
2038
2039         case B_co:
2040                 setvar_i(res, ~(long)getvar_i(av[0]));
2041                 break;
2042
2043         case B_ls:
2044                 setvar_i(res, (long)getvar_i(av[0]) << (long)getvar_i(av[1]));
2045                 break;
2046
2047         case B_or:
2048                 setvar_i(res, (long)getvar_i(av[0]) | (long)getvar_i(av[1]));
2049                 break;
2050
2051         case B_rs:
2052                 setvar_i(res, (long)((unsigned long)getvar_i(av[0]) >> (unsigned long)getvar_i(av[1])));
2053                 break;
2054
2055         case B_xo:
2056                 setvar_i(res, (long)getvar_i(av[0]) ^ (long)getvar_i(av[1]));
2057                 break;
2058
2059         case B_lo:
2060                 to_xxx = tolower;
2061                 goto lo_cont;
2062
2063         case B_up:
2064                 to_xxx = toupper;
2065  lo_cont:
2066                 s1 = s = xstrdup(as[0]);
2067                 while (*s1) {
2068                         *s1 = (*to_xxx)(*s1);
2069                         s1++;
2070                 }
2071                 setvar_p(res, s);
2072                 break;
2073
2074         case B_ix:
2075                 n = 0;
2076                 ll = strlen(as[1]);
2077                 l = strlen(as[0]) - ll;
2078                 if (ll > 0 && l >= 0) {
2079                         if (!icase) {
2080                                 s = strstr(as[0], as[1]);
2081                                 if (s) n = (s - as[0]) + 1;
2082                         } else {
2083                                 /* this piece of code is terribly slow and
2084                                  * really should be rewritten
2085                                  */
2086                                 for (i=0; i<=l; i++) {
2087                                         if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2088                                                 n = i+1;
2089                                                 break;
2090                                         }
2091                                 }
2092                         }
2093                 }
2094                 setvar_i(res, n);
2095                 break;
2096
2097         case B_ti:
2098                 if (nargs > 1)
2099                         tt = getvar_i(av[1]);
2100                 else
2101                         time(&tt);
2102                 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2103                 i = strftime(g_buf, MAXVARFMT,
2104                         ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2105                         localtime(&tt));
2106                 g_buf[i] = '\0';
2107                 setvar_s(res, g_buf);
2108                 break;
2109
2110         case B_ma:
2111                 re = as_regex(an[1], &sreg);
2112                 n = regexec(re, as[0], 1, pmatch, 0);
2113                 if (n == 0) {
2114                         pmatch[0].rm_so++;
2115                         pmatch[0].rm_eo++;
2116                 } else {
2117                         pmatch[0].rm_so = 0;
2118                         pmatch[0].rm_eo = -1;
2119                 }
2120                 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2121                 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2122                 setvar_i(res, pmatch[0].rm_so);
2123                 if (re == &sreg) regfree(re);
2124                 break;
2125
2126         case B_ge:
2127                 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2128                 break;
2129
2130         case B_gs:
2131                 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2132                 break;
2133
2134         case B_su:
2135                 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2136                 break;
2137         }
2138
2139         nvfree(tv);
2140         return res;
2141 #undef tspl
2142 }
2143
2144 /*
2145  * Evaluate node - the heart of the program. Supplied with subtree
2146  * and place where to store result. returns ptr to result.
2147  */
2148 #define XC(n) ((n) >> 8)
2149
2150 static var *evaluate(node *op, var *res)
2151 {
2152 /* This procedure is recursive so we should count every byte */
2153 #define fnargs (G.evaluate__fnargs)
2154 /* seed is initialized to 1 */
2155 #define seed   (G.evaluate__seed)
2156 #define sreg   (G.evaluate__sreg)
2157
2158         node *op1;
2159         var *v1;
2160         union {
2161                 var *v;
2162                 const char *s;
2163                 double d;
2164                 int i;
2165         } L, R;
2166         uint32_t opinfo;
2167         int opn;
2168         union {
2169                 char *s;
2170                 rstream *rsm;
2171                 FILE *F;
2172                 var *v;
2173                 regex_t *re;
2174                 uint32_t info;
2175         } X;
2176
2177         if (!op)
2178                 return setvar_s(res, NULL);
2179
2180         v1 = nvalloc(2);
2181
2182         while (op) {
2183                 opinfo = op->info;
2184                 opn = (opinfo & OPNMASK);
2185                 g_lineno = op->lineno;
2186
2187                 /* execute inevitable things */
2188                 op1 = op->l.n;
2189                 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2190                 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2191                 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2192                 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2193                 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2194
2195                 switch (XC(opinfo & OPCLSMASK)) {
2196
2197                 /* -- iterative node type -- */
2198
2199                 /* test pattern */
2200                 case XC( OC_TEST ):
2201                         if ((op1->info & OPCLSMASK) == OC_COMMA) {
2202                                 /* it's range pattern */
2203                                 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2204                                         op->info |= OF_CHECKED;
2205                                         if (ptest(op1->r.n))
2206                                                 op->info &= ~OF_CHECKED;
2207
2208                                         op = op->a.n;
2209                                 } else {
2210                                         op = op->r.n;
2211                                 }
2212                         } else {
2213                                 op = (ptest(op1)) ? op->a.n : op->r.n;
2214                         }
2215                         break;
2216
2217                 /* just evaluate an expression, also used as unconditional jump */
2218                 case XC( OC_EXEC ):
2219                         break;
2220
2221                 /* branch, used in if-else and various loops */
2222                 case XC( OC_BR ):
2223                         op = istrue(L.v) ? op->a.n : op->r.n;
2224                         break;
2225
2226                 /* initialize for-in loop */
2227                 case XC( OC_WALKINIT ):
2228                         hashwalk_init(L.v, iamarray(R.v));
2229                         break;
2230
2231                 /* get next array item */
2232                 case XC( OC_WALKNEXT ):
2233                         op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2234                         break;
2235
2236                 case XC( OC_PRINT ):
2237                 case XC( OC_PRINTF ):
2238                         X.F = stdout;
2239                         if (op->r.n) {
2240                                 X.rsm = newfile(R.s);
2241                                 if (!X.rsm->F) {
2242                                         if (opn == '|') {
2243                                                 X.rsm->F = popen(R.s, "w");
2244                                                 if (X.rsm->F == NULL)
2245                                                         bb_perror_msg_and_die("popen");
2246                                                 X.rsm->is_pipe = 1;
2247                                         } else {
2248                                                 X.rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2249                                         }
2250                                 }
2251                                 X.F = X.rsm->F;
2252                         }
2253
2254                         if ((opinfo & OPCLSMASK) == OC_PRINT) {
2255                                 if (!op1) {
2256                                         fputs(getvar_s(intvar[F0]), X.F);
2257                                 } else {
2258                                         while (op1) {
2259                                                 L.v = evaluate(nextarg(&op1), v1);
2260                                                 if (L.v->type & VF_NUMBER) {
2261                                                         fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2262                                                                         getvar_i(L.v), TRUE);
2263                                                         fputs(g_buf, X.F);
2264                                                 } else {
2265                                                         fputs(getvar_s(L.v), X.F);
2266                                                 }
2267
2268                                                 if (op1) fputs(getvar_s(intvar[OFS]), X.F);
2269                                         }
2270                                 }
2271                                 fputs(getvar_s(intvar[ORS]), X.F);
2272
2273                         } else {        /* OC_PRINTF */
2274                                 L.s = awk_printf(op1);
2275                                 fputs(L.s, X.F);
2276                                 free((char*)L.s);
2277                         }
2278                         fflush(X.F);
2279                         break;
2280
2281                 case XC( OC_DELETE ):
2282                         X.info = op1->info & OPCLSMASK;
2283                         if (X.info == OC_VAR) {
2284                                 R.v = op1->l.v;
2285                         } else if (X.info == OC_FNARG) {
2286                                 R.v = &fnargs[op1->l.i];
2287                         } else {
2288                                 syntax_error(EMSG_NOT_ARRAY);
2289                         }
2290
2291                         if (op1->r.n) {
2292                                 clrvar(L.v);
2293                                 L.s = getvar_s(evaluate(op1->r.n, v1));
2294                                 hash_remove(iamarray(R.v), L.s);
2295                         } else {
2296                                 clear_array(iamarray(R.v));
2297                         }
2298                         break;
2299
2300                 case XC( OC_NEWSOURCE ):
2301                         g_progname = op->l.s;
2302                         break;
2303
2304                 case XC( OC_RETURN ):
2305                         copyvar(res, L.v);
2306                         break;
2307
2308                 case XC( OC_NEXTFILE ):
2309                         nextfile = TRUE;
2310                 case XC( OC_NEXT ):
2311                         nextrec = TRUE;
2312                 case XC( OC_DONE ):
2313                         clrvar(res);
2314                         break;
2315
2316                 case XC( OC_EXIT ):
2317                         awk_exit(L.d);
2318
2319                 /* -- recursive node type -- */
2320
2321                 case XC( OC_VAR ):
2322                         L.v = op->l.v;
2323                         if (L.v == intvar[NF])
2324                                 split_f0();
2325                         goto v_cont;
2326
2327                 case XC( OC_FNARG ):
2328                         L.v = &fnargs[op->l.i];
2329  v_cont:
2330                         res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2331                         break;
2332
2333                 case XC( OC_IN ):
2334                         setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2335                         break;
2336
2337                 case XC( OC_REGEXP ):
2338                         op1 = op;
2339                         L.s = getvar_s(intvar[F0]);
2340                         goto re_cont;
2341
2342                 case XC( OC_MATCH ):
2343                         op1 = op->r.n;
2344  re_cont:
2345                         X.re = as_regex(op1, &sreg);
2346                         R.i = regexec(X.re, L.s, 0, NULL, 0);
2347                         if (X.re == &sreg) regfree(X.re);
2348                         setvar_i(res, (R.i == 0 ? 1 : 0) ^ (opn == '!' ? 1 : 0));
2349                         break;
2350
2351                 case XC( OC_MOVE ):
2352                         /* if source is a temporary string, jusk relink it to dest */
2353                         if (R.v == v1+1 && R.v->string) {
2354                                 res = setvar_p(L.v, R.v->string);
2355                                 R.v->string = NULL;
2356                         } else {
2357                                 res = copyvar(L.v, R.v);
2358                         }
2359                         break;
2360
2361                 case XC( OC_TERNARY ):
2362                         if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2363                                 syntax_error(EMSG_POSSIBLE_ERROR);
2364                         res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2365                         break;
2366
2367                 case XC( OC_FUNC ):
2368                         if (!op->r.f->body.first)
2369                                 syntax_error(EMSG_UNDEF_FUNC);
2370
2371                         X.v = R.v = nvalloc(op->r.f->nargs+1);
2372                         while (op1) {
2373                                 L.v = evaluate(nextarg(&op1), v1);
2374                                 copyvar(R.v, L.v);
2375                                 R.v->type |= VF_CHILD;
2376                                 R.v->x.parent = L.v;
2377                                 if (++R.v - X.v >= op->r.f->nargs)
2378                                         break;
2379                         }
2380
2381                         R.v = fnargs;
2382                         fnargs = X.v;
2383
2384                         L.s = g_progname;
2385                         res = evaluate(op->r.f->body.first, res);
2386                         g_progname = L.s;
2387
2388                         nvfree(fnargs);
2389                         fnargs = R.v;
2390                         break;
2391
2392                 case XC( OC_GETLINE ):
2393                 case XC( OC_PGETLINE ):
2394                         if (op1) {
2395                                 X.rsm = newfile(L.s);
2396                                 if (!X.rsm->F) {
2397                                         if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2398                                                 X.rsm->F = popen(L.s, "r");
2399                                                 X.rsm->is_pipe = TRUE;
2400                                         } else {
2401                                                 X.rsm->F = fopen(L.s, "r");             /* not xfopen! */
2402                                         }
2403                                 }
2404                         } else {
2405                                 if (!iF) iF = next_input_file();
2406                                 X.rsm = iF;
2407                         }
2408
2409                         if (!X.rsm->F) {
2410                                 setvar_i(intvar[ERRNO], errno);
2411                                 setvar_i(res, -1);
2412                                 break;
2413                         }
2414
2415                         if (!op->r.n)
2416                                 R.v = intvar[F0];
2417
2418                         L.i = awk_getline(X.rsm, R.v);
2419                         if (L.i > 0) {
2420                                 if (!op1) {
2421                                         incvar(intvar[FNR]);
2422                                         incvar(intvar[NR]);
2423                                 }
2424                         }
2425                         setvar_i(res, L.i);
2426                         break;
2427
2428                 /* simple builtins */
2429                 case XC( OC_FBLTIN ):
2430                         switch (opn) {
2431
2432                         case F_in:
2433                                 R.d = (int)L.d;
2434                                 break;
2435
2436                         case F_rn:
2437                                 R.d = (double)rand() / (double)RAND_MAX;
2438                                 break;
2439 #if ENABLE_FEATURE_AWK_MATH
2440                         case F_co:
2441                                 R.d = cos(L.d);
2442                                 break;
2443
2444                         case F_ex:
2445                                 R.d = exp(L.d);
2446                                 break;
2447
2448                         case F_lg:
2449                                 R.d = log(L.d);
2450                                 break;
2451
2452                         case F_si:
2453                                 R.d = sin(L.d);
2454                                 break;
2455
2456                         case F_sq:
2457                                 R.d = sqrt(L.d);
2458                                 break;
2459 #else
2460                         case F_co:
2461                         case F_ex:
2462                         case F_lg:
2463                         case F_si:
2464                         case F_sq:
2465                                 syntax_error(EMSG_NO_MATH);
2466                                 break;
2467 #endif
2468                         case F_sr:
2469                                 R.d = (double)seed;
2470                                 seed = op1 ? (unsigned)L.d : (unsigned)time(NULL);
2471                                 srand(seed);
2472                                 break;
2473
2474                         case F_ti:
2475                                 R.d = time(NULL);
2476                                 break;
2477
2478                         case F_le:
2479                                 if (!op1)
2480                                         L.s = getvar_s(intvar[F0]);
2481                                 R.d = strlen(L.s);
2482                                 break;
2483
2484                         case F_sy:
2485                                 fflush(NULL);
2486                                 R.d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2487                                                 ? (system(L.s) >> 8) : 0;
2488                                 break;
2489
2490                         case F_ff:
2491                                 if (!op1)
2492                                         fflush(stdout);
2493                                 else {
2494                                         if (L.s && *L.s) {
2495                                                 X.rsm = newfile(L.s);
2496                                                 fflush(X.rsm->F);
2497                                         } else {
2498                                                 fflush(NULL);
2499                                         }
2500                                 }
2501                                 break;
2502
2503                         case F_cl:
2504                                 X.rsm = (rstream *)hash_search(fdhash, L.s);
2505                                 if (X.rsm) {
2506                                         R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
2507                                         free(X.rsm->buffer);
2508                                         hash_remove(fdhash, L.s);
2509                                 }
2510                                 if (R.i != 0)
2511                                         setvar_i(intvar[ERRNO], errno);
2512                                 R.d = (double)R.i;
2513                                 break;
2514                         }
2515                         setvar_i(res, R.d);
2516                         break;
2517
2518                 case XC( OC_BUILTIN ):
2519                         res = exec_builtin(op, res);
2520                         break;
2521
2522                 case XC( OC_SPRINTF ):
2523                         setvar_p(res, awk_printf(op1));
2524                         break;
2525
2526                 case XC( OC_UNARY ):
2527                         X.v = R.v;
2528                         L.d = R.d = getvar_i(R.v);
2529                         switch (opn) {
2530                         case 'P':
2531                                 L.d = ++R.d;
2532                                 goto r_op_change;
2533                         case 'p':
2534                                 R.d++;
2535                                 goto r_op_change;
2536                         case 'M':
2537                                 L.d = --R.d;
2538                                 goto r_op_change;
2539                         case 'm':
2540                                 R.d--;
2541                                 goto r_op_change;
2542                         case '!':
2543                                 L.d = istrue(X.v) ? 0 : 1;
2544                                 break;
2545                         case '-':
2546                                 L.d = -R.d;
2547                                 break;
2548  r_op_change:
2549                                 setvar_i(X.v, R.d);
2550                         }
2551                         setvar_i(res, L.d);
2552                         break;
2553
2554                 case XC( OC_FIELD ):
2555                         R.i = (int)getvar_i(R.v);
2556                         if (R.i == 0) {
2557                                 res = intvar[F0];
2558                         } else {
2559                                 split_f0();
2560                                 if (R.i > nfields)
2561                                         fsrealloc(R.i);
2562                                 res = &Fields[R.i - 1];
2563                         }
2564                         break;
2565
2566                 /* concatenation (" ") and index joining (",") */
2567                 case XC( OC_CONCAT ):
2568                 case XC( OC_COMMA ):
2569                         opn = strlen(L.s) + strlen(R.s) + 2;
2570                         X.s = xmalloc(opn);
2571                         strcpy(X.s, L.s);
2572                         if ((opinfo & OPCLSMASK) == OC_COMMA) {
2573                                 L.s = getvar_s(intvar[SUBSEP]);
2574                                 X.s = xrealloc(X.s, opn + strlen(L.s));
2575                                 strcat(X.s, L.s);
2576                         }
2577                         strcat(X.s, R.s);
2578                         setvar_p(res, X.s);
2579                         break;
2580
2581                 case XC( OC_LAND ):
2582                         setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2583                         break;
2584
2585                 case XC( OC_LOR ):
2586                         setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2587                         break;
2588
2589                 case XC( OC_BINARY ):
2590                 case XC( OC_REPLACE ):
2591                         R.d = getvar_i(R.v);
2592                         switch (opn) {
2593                         case '+':
2594                                 L.d += R.d;
2595                                 break;
2596                         case '-':
2597                                 L.d -= R.d;
2598                                 break;
2599                         case '*':
2600                                 L.d *= R.d;
2601                                 break;
2602                         case '/':
2603                                 if (R.d == 0) syntax_error(EMSG_DIV_BY_ZERO);
2604                                 L.d /= R.d;
2605                                 break;
2606                         case '&':
2607 #if ENABLE_FEATURE_AWK_MATH
2608                                 L.d = pow(L.d, R.d);
2609 #else
2610                                 syntax_error(EMSG_NO_MATH);
2611 #endif
2612                                 break;
2613                         case '%':
2614                                 if (R.d == 0) syntax_error(EMSG_DIV_BY_ZERO);
2615                                 L.d -= (int)(L.d / R.d) * R.d;
2616                                 break;
2617                         }
2618                         res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2619                         break;
2620
2621                 case XC( OC_COMPARE ):
2622                         if (is_numeric(L.v) && is_numeric(R.v)) {
2623                                 L.d = getvar_i(L.v) - getvar_i(R.v);
2624                         } else {
2625                                 L.s = getvar_s(L.v);
2626                                 R.s = getvar_s(R.v);
2627                                 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2628                         }
2629                         switch (opn & 0xfe) {
2630                         case 0:
2631                                 R.i = (L.d > 0);
2632                                 break;
2633                         case 2:
2634                                 R.i = (L.d >= 0);
2635                                 break;
2636                         case 4:
2637                                 R.i = (L.d == 0);
2638                                 break;
2639                         }
2640                         setvar_i(res, (opn & 0x1 ? R.i : !R.i) ? 1 : 0);
2641                         break;
2642
2643                 default:
2644                         syntax_error(EMSG_POSSIBLE_ERROR);
2645                 }
2646                 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2647                         op = op->a.n;
2648                 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2649                         break;
2650                 if (nextrec)
2651                         break;
2652         }
2653         nvfree(v1);
2654         return res;
2655 #undef fnargs
2656 #undef seed
2657 #undef sreg
2658 }
2659
2660
2661 /* -------- main & co. -------- */
2662
2663 static int awk_exit(int r)
2664 {
2665         var tv;
2666         unsigned i;
2667         hash_item *hi;
2668
2669         zero_out_var(&tv);
2670
2671         if (!exiting) {
2672                 exiting = TRUE;
2673                 nextrec = FALSE;
2674                 evaluate(endseq.first, &tv);
2675         }
2676
2677         /* waiting for children */
2678         for (i = 0; i < fdhash->csize; i++) {
2679                 hi = fdhash->items[i];
2680                 while (hi) {
2681                         if (hi->data.rs.F && hi->data.rs.is_pipe)
2682                                 pclose(hi->data.rs.F);
2683                         hi = hi->next;
2684                 }
2685         }
2686
2687         exit(r);
2688 }
2689
2690 /* if expr looks like "var=value", perform assignment and return 1,
2691  * otherwise return 0 */
2692 static int is_assignment(const char *expr)
2693 {
2694         char *exprc, *s, *s0, *s1;
2695
2696         exprc = xstrdup(expr);
2697         if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2698                 free(exprc);
2699                 return FALSE;
2700         }
2701
2702         *(s++) = '\0';
2703         s0 = s1 = s;
2704         while (*s)
2705                 *(s1++) = nextchar(&s);
2706
2707         *s1 = '\0';
2708         setvar_u(newvar(exprc), s0);
2709         free(exprc);
2710         return TRUE;
2711 }
2712
2713 /* switch to next input file */
2714 static rstream *next_input_file(void)
2715 {
2716 #define rsm          (G.next_input_file__rsm)
2717 #define files_happen (G.next_input_file__files_happen)
2718
2719         FILE *F = NULL;
2720         const char *fname, *ind;
2721
2722         if (rsm.F) fclose(rsm.F);
2723         rsm.F = NULL;
2724         rsm.pos = rsm.adv = 0;
2725
2726         do {
2727                 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
2728                         if (files_happen)
2729                                 return NULL;
2730                         fname = "-";
2731                         F = stdin;
2732                 } else {
2733                         ind = getvar_s(incvar(intvar[ARGIND]));
2734                         fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
2735                         if (fname && *fname && !is_assignment(fname))
2736                                 F = afopen(fname, "r");
2737                 }
2738         } while (!F);
2739
2740         files_happen = TRUE;
2741         setvar_s(intvar[FILENAME], fname);
2742         rsm.F = F;
2743         return &rsm;
2744 #undef rsm
2745 #undef files_happen
2746 }
2747
2748 int awk_main(int argc, char **argv);
2749 int awk_main(int argc, char **argv)
2750 {
2751         unsigned opt;
2752         char *opt_F, *opt_W;
2753         llist_t *opt_v = NULL;
2754         int i, j, flen;
2755         var *v;
2756         var tv;
2757         char **envp;
2758         char *vnames = (char *)vNames; /* cheat */
2759         char *vvalues = (char *)vValues;
2760
2761         INIT_G();
2762
2763         /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
2764          * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
2765         if (ENABLE_LOCALE_SUPPORT)
2766                 setlocale(LC_NUMERIC, "C");
2767
2768         zero_out_var(&tv);
2769
2770         /* allocate global buffer */
2771         g_buf = xmalloc(MAXVARFMT + 1);
2772
2773         vhash = hash_init();
2774         ahash = hash_init();
2775         fdhash = hash_init();
2776         fnhash = hash_init();
2777
2778         /* initialize variables */
2779         for (i = 0; *vnames; i++) {
2780                 intvar[i] = v = newvar(nextword(&vnames));
2781                 if (*vvalues != '\377')
2782                         setvar_s(v, nextword(&vvalues));
2783                 else
2784                         setvar_i(v, 0);
2785
2786                 if (*vnames == '*') {
2787                         v->type |= VF_SPECIAL;
2788                         vnames++;
2789                 }
2790         }
2791
2792         handle_special(intvar[FS]);
2793         handle_special(intvar[RS]);
2794
2795         newfile("/dev/stdin")->F = stdin;
2796         newfile("/dev/stdout")->F = stdout;
2797         newfile("/dev/stderr")->F = stderr;
2798
2799         /* Huh, people report that sometimes environ is NULL. Oh well. */
2800         if (environ) for (envp = environ; *envp; envp++) {
2801                 /* environ is writable, thus we don't strdup it needlessly */
2802                 char *s = *envp;
2803                 char *s1 = strchr(s, '=');
2804                 if (s1) {
2805                         *s1 = '\0';
2806                         /* Both findvar and setvar_u take const char*
2807                          * as 2nd arg -> environment is not trashed */
2808                         setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
2809                         *s1 = '=';
2810                 }
2811         }
2812         opt_complementary = "v::";
2813         opt = getopt32(argc, argv, "F:v:f:W:", &opt_F, &opt_v, &g_progname, &opt_W);
2814         argv += optind;
2815         argc -= optind;
2816         if (opt & 0x1)
2817                 setvar_s(intvar[FS], opt_F); // -F
2818         while (opt_v) { /* -v */
2819                 if (!is_assignment(llist_pop(&opt_v)))
2820                         bb_show_usage();
2821         }
2822         if (opt & 0x4) { // -f
2823                 char *s = s; /* die, gcc, die */
2824                 FILE *from_file = afopen(g_progname, "r");
2825                 /* one byte is reserved for some trick in next_token */
2826                 if (fseek(from_file, 0, SEEK_END) == 0) {
2827                         flen = ftell(from_file);
2828                         s = xmalloc(flen + 4);
2829                         fseek(from_file, 0, SEEK_SET);
2830                         i = 1 + fread(s + 1, 1, flen, from_file);
2831                 } else {
2832                         for (i = j = 1; j > 0; i += j) {
2833                                 s = xrealloc(s, i + 4096);
2834                                 j = fread(s + i, 1, 4094, from_file);
2835                         }
2836                 }
2837                 s[i] = '\0';
2838                 fclose(from_file);
2839                 parse_program(s + 1);
2840                 free(s);
2841         } else { // no -f: take program from 1st parameter
2842                 if (!argc)
2843                         bb_show_usage();
2844                 g_progname = "cmd. line";
2845                 parse_program(*argv++);
2846                 argc--;
2847         }
2848         if (opt & 0x8) // -W
2849                 bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
2850
2851         /* fill in ARGV array */
2852         setvar_i(intvar[ARGC], argc + 1);
2853         setari_u(intvar[ARGV], 0, "awk");
2854         i = 0;
2855         while (*argv)
2856                 setari_u(intvar[ARGV], ++i, *argv++);
2857
2858         evaluate(beginseq.first, &tv);
2859         if (!mainseq.first && !endseq.first)
2860                 awk_exit(EXIT_SUCCESS);
2861
2862         /* input file could already be opened in BEGIN block */
2863         if (!iF) iF = next_input_file();
2864
2865         /* passing through input files */
2866         while (iF) {
2867                 nextfile = FALSE;
2868                 setvar_i(intvar[FNR], 0);
2869
2870                 while ((i = awk_getline(iF, intvar[F0])) > 0) {
2871                         nextrec = FALSE;
2872                         incvar(intvar[NR]);
2873                         incvar(intvar[FNR]);
2874                         evaluate(mainseq.first, &tv);
2875
2876                         if (nextfile)
2877                                 break;
2878                 }
2879
2880                 if (i < 0)
2881                         syntax_error(strerror(errno));
2882
2883                 iF = next_input_file();
2884         }
2885
2886         awk_exit(EXIT_SUCCESS);
2887         /*return 0;*/
2888 }