b5bab16af292ac0bc2dae2d7c0a4398fdc213e9a
[oweals/busybox.git] / editors / awk.c
1 /* vi: set sw=4 ts=4: */
2 /*
3  * awk implementation for busybox
4  *
5  * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
6  *
7  * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
8  */
9
10 #include "libbb.h"
11 #include "xregex.h"
12 #include <math.h>
13 extern char **environ;
14
15 /* This is a NOEXEC applet. Be very careful! */
16
17
18 #define MAXVARFMT       240
19 #define MINNVBLOCK      64
20
21 /* variable flags */
22 #define VF_NUMBER       0x0001  /* 1 = primary type is number */
23 #define VF_ARRAY        0x0002  /* 1 = it's an array */
24
25 #define VF_CACHED       0x0100  /* 1 = num/str value has cached str/num eq */
26 #define VF_USER         0x0200  /* 1 = user input (may be numeric string) */
27 #define VF_SPECIAL      0x0400  /* 1 = requires extra handling when changed */
28 #define VF_WALK         0x0800  /* 1 = variable has alloc'd x.walker list */
29 #define VF_FSTR         0x1000  /* 1 = var::string points to fstring buffer */
30 #define VF_CHILD        0x2000  /* 1 = function arg; x.parent points to source */
31 #define VF_DIRTY        0x4000  /* 1 = variable was set explicitly */
32
33 /* these flags are static, don't change them when value is changed */
34 #define VF_DONTTOUCH    (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
35
36 /* Variable */
37 typedef struct var_s {
38         unsigned type;            /* flags */
39         double number;
40         char *string;
41         union {
42                 int aidx;               /* func arg idx (for compilation stage) */
43                 struct xhash_s *array;  /* array ptr */
44                 struct var_s *parent;   /* for func args, ptr to actual parameter */
45                 char **walker;          /* list of array elements (for..in) */
46         } x;
47 } var;
48
49 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
50 typedef struct chain_s {
51         struct node_s *first;
52         struct node_s *last;
53         const char *programname;
54 } chain;
55
56 /* Function */
57 typedef struct func_s {
58         unsigned nargs;
59         struct chain_s body;
60 } func;
61
62 /* I/O stream */
63 typedef struct rstream_s {
64         FILE *F;
65         char *buffer;
66         int adv;
67         int size;
68         int pos;
69         smallint is_pipe;
70 } rstream;
71
72 typedef struct hash_item_s {
73         union {
74                 struct var_s v;         /* variable/array hash */
75                 struct rstream_s rs;    /* redirect streams hash */
76                 struct func_s f;        /* functions hash */
77         } data;
78         struct hash_item_s *next;       /* next in chain */
79         char name[1];                   /* really it's longer */
80 } hash_item;
81
82 typedef struct xhash_s {
83         unsigned nel;           /* num of elements */
84         unsigned csize;         /* current hash size */
85         unsigned nprime;        /* next hash size in PRIMES[] */
86         unsigned glen;          /* summary length of item names */
87         struct hash_item_s **items;
88 } xhash;
89
90 /* Tree node */
91 typedef struct node_s {
92         uint32_t info;
93         unsigned lineno;
94         union {
95                 struct node_s *n;
96                 var *v;
97                 int i;
98                 char *s;
99                 regex_t *re;
100         } l;
101         union {
102                 struct node_s *n;
103                 regex_t *ire;
104                 func *f;
105                 int argno;
106         } r;
107         union {
108                 struct node_s *n;
109         } a;
110 } node;
111
112 /* Block of temporary variables */
113 typedef struct nvblock_s {
114         int size;
115         var *pos;
116         struct nvblock_s *prev;
117         struct nvblock_s *next;
118         var nv[0];
119 } nvblock;
120
121 typedef struct tsplitter_s {
122         node n;
123         regex_t re[2];
124 } tsplitter;
125
126 /* simple token classes */
127 /* Order and hex values are very important!!!  See next_token() */
128 #define TC_SEQSTART      1                              /* ( */
129 #define TC_SEQTERM      (1 << 1)                /* ) */
130 #define TC_REGEXP       (1 << 2)                /* /.../ */
131 #define TC_OUTRDR       (1 << 3)                /* | > >> */
132 #define TC_UOPPOST      (1 << 4)                /* unary postfix operator */
133 #define TC_UOPPRE1      (1 << 5)                /* unary prefix operator */
134 #define TC_BINOPX       (1 << 6)                /* two-opnd operator */
135 #define TC_IN           (1 << 7)
136 #define TC_COMMA        (1 << 8)
137 #define TC_PIPE         (1 << 9)                /* input redirection pipe */
138 #define TC_UOPPRE2      (1 << 10)               /* unary prefix operator */
139 #define TC_ARRTERM      (1 << 11)               /* ] */
140 #define TC_GRPSTART     (1 << 12)               /* { */
141 #define TC_GRPTERM      (1 << 13)               /* } */
142 #define TC_SEMICOL      (1 << 14)
143 #define TC_NEWLINE      (1 << 15)
144 #define TC_STATX        (1 << 16)               /* ctl statement (for, next...) */
145 #define TC_WHILE        (1 << 17)
146 #define TC_ELSE         (1 << 18)
147 #define TC_BUILTIN      (1 << 19)
148 #define TC_GETLINE      (1 << 20)
149 #define TC_FUNCDECL     (1 << 21)               /* `function' `func' */
150 #define TC_BEGIN        (1 << 22)
151 #define TC_END          (1 << 23)
152 #define TC_EOF          (1 << 24)
153 #define TC_VARIABLE     (1 << 25)
154 #define TC_ARRAY        (1 << 26)
155 #define TC_FUNCTION     (1 << 27)
156 #define TC_STRING       (1 << 28)
157 #define TC_NUMBER       (1 << 29)
158
159 #define TC_UOPPRE  (TC_UOPPRE1 | TC_UOPPRE2)
160
161 /* combined token classes */
162 #define TC_BINOP   (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
163 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
164 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
165                    | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
166
167 #define TC_STATEMNT (TC_STATX | TC_WHILE)
168 #define TC_OPTERM  (TC_SEMICOL | TC_NEWLINE)
169
170 /* word tokens, cannot mean something else if not expected */
171 #define TC_WORD    (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
172                    | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
173
174 /* discard newlines after these */
175 #define TC_NOTERM  (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
176                    | TC_BINOP | TC_OPTERM)
177
178 /* what can expression begin with */
179 #define TC_OPSEQ   (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
180 /* what can group begin with */
181 #define TC_GRPSEQ  (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
182
183 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
184 /* operator is inserted between them */
185 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
186                    | TC_STRING | TC_NUMBER | TC_UOPPOST)
187 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
188
189 #define OF_RES1    0x010000
190 #define OF_RES2    0x020000
191 #define OF_STR1    0x040000
192 #define OF_STR2    0x080000
193 #define OF_NUM1    0x100000
194 #define OF_CHECKED 0x200000
195
196 /* combined operator flags */
197 #define xx      0
198 #define xV      OF_RES2
199 #define xS      (OF_RES2 | OF_STR2)
200 #define Vx      OF_RES1
201 #define VV      (OF_RES1 | OF_RES2)
202 #define Nx      (OF_RES1 | OF_NUM1)
203 #define NV      (OF_RES1 | OF_NUM1 | OF_RES2)
204 #define Sx      (OF_RES1 | OF_STR1)
205 #define SV      (OF_RES1 | OF_STR1 | OF_RES2)
206 #define SS      (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
207
208 #define OPCLSMASK 0xFF00
209 #define OPNMASK   0x007F
210
211 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
212  * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
213  * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
214  */
215 #define P(x)      (x << 24)
216 #define PRIMASK   0x7F000000
217 #define PRIMASK2  0x7E000000
218
219 /* Operation classes */
220
221 #define SHIFT_TIL_THIS  0x0600
222 #define RECUR_FROM_THIS 0x1000
223
224 enum {
225         OC_DELETE = 0x0100,     OC_EXEC = 0x0200,       OC_NEWSOURCE = 0x0300,
226         OC_PRINT = 0x0400,      OC_PRINTF = 0x0500,     OC_WALKINIT = 0x0600,
227
228         OC_BR = 0x0700,         OC_BREAK = 0x0800,      OC_CONTINUE = 0x0900,
229         OC_EXIT = 0x0a00,       OC_NEXT = 0x0b00,       OC_NEXTFILE = 0x0c00,
230         OC_TEST = 0x0d00,       OC_WALKNEXT = 0x0e00,
231
232         OC_BINARY = 0x1000,     OC_BUILTIN = 0x1100,    OC_COLON = 0x1200,
233         OC_COMMA = 0x1300,      OC_COMPARE = 0x1400,    OC_CONCAT = 0x1500,
234         OC_FBLTIN = 0x1600,     OC_FIELD = 0x1700,      OC_FNARG = 0x1800,
235         OC_FUNC = 0x1900,       OC_GETLINE = 0x1a00,    OC_IN = 0x1b00,
236         OC_LAND = 0x1c00,       OC_LOR = 0x1d00,        OC_MATCH = 0x1e00,
237         OC_MOVE = 0x1f00,       OC_PGETLINE = 0x2000,   OC_REGEXP = 0x2100,
238         OC_REPLACE = 0x2200,    OC_RETURN = 0x2300,     OC_SPRINTF = 0x2400,
239         OC_TERNARY = 0x2500,    OC_UNARY = 0x2600,      OC_VAR = 0x2700,
240         OC_DONE = 0x2800,
241
242         ST_IF = 0x3000,         ST_DO = 0x3100,         ST_FOR = 0x3200,
243         ST_WHILE = 0x3300
244 };
245
246 /* simple builtins */
247 enum {
248         F_in,   F_rn,   F_co,   F_ex,   F_lg,   F_si,   F_sq,   F_sr,
249         F_ti,   F_le,   F_sy,   F_ff,   F_cl
250 };
251
252 /* builtins */
253 enum {
254         B_a2,   B_ix,   B_ma,   B_sp,   B_ss,   B_ti,   B_lo,   B_up,
255         B_ge,   B_gs,   B_su,
256         B_an,   B_co,   B_ls,   B_or,   B_rs,   B_xo,
257 };
258
259 /* tokens and their corresponding info values */
260
261 #define NTC     "\377"  /* switch to next token class (tc<<1) */
262 #define NTCC    '\377'
263
264 #define OC_B    OC_BUILTIN
265
266 static const char tokenlist[] =
267         "\1("       NTC
268         "\1)"       NTC
269         "\1/"       NTC                                 /* REGEXP */
270         "\2>>"      "\1>"       "\1|"       NTC         /* OUTRDR */
271         "\2++"      "\2--"      NTC                     /* UOPPOST */
272         "\2++"      "\2--"      "\1$"       NTC         /* UOPPRE1 */
273         "\2=="      "\1="       "\2+="      "\2-="      /* BINOPX */
274         "\2*="      "\2/="      "\2%="      "\2^="
275         "\1+"       "\1-"       "\3**="     "\2**"
276         "\1/"       "\1%"       "\1^"       "\1*"
277         "\2!="      "\2>="      "\2<="      "\1>"
278         "\1<"       "\2!~"      "\1~"       "\2&&"
279         "\2||"      "\1?"       "\1:"       NTC
280         "\2in"      NTC
281         "\1,"       NTC
282         "\1|"       NTC
283         "\1+"       "\1-"       "\1!"       NTC         /* UOPPRE2 */
284         "\1]"       NTC
285         "\1{"       NTC
286         "\1}"       NTC
287         "\1;"       NTC
288         "\1\n"      NTC
289         "\2if"      "\2do"      "\3for"     "\5break"   /* STATX */
290         "\10continue"           "\6delete"  "\5print"
291         "\6printf"  "\4next"    "\10nextfile"
292         "\6return"  "\4exit"    NTC
293         "\5while"   NTC
294         "\4else"    NTC
295
296         "\3and"     "\5compl"   "\6lshift"  "\2or"
297         "\6rshift"  "\3xor"
298         "\5close"   "\6system"  "\6fflush"  "\5atan2"   /* BUILTIN */
299         "\3cos"     "\3exp"     "\3int"     "\3log"
300         "\4rand"    "\3sin"     "\4sqrt"    "\5srand"
301         "\6gensub"  "\4gsub"    "\5index"   "\6length"
302         "\5match"   "\5split"   "\7sprintf" "\3sub"
303         "\6substr"  "\7systime" "\10strftime"
304         "\7tolower" "\7toupper" NTC
305         "\7getline" NTC
306         "\4func"    "\10function"   NTC
307         "\5BEGIN"   NTC
308         "\3END"     "\0"
309         ;
310
311 static const uint32_t tokeninfo[] = {
312         0,
313         0,
314         OC_REGEXP,
315         xS|'a',     xS|'w',     xS|'|',
316         OC_UNARY|xV|P(9)|'p',       OC_UNARY|xV|P(9)|'m',
317         OC_UNARY|xV|P(9)|'P',       OC_UNARY|xV|P(9)|'M',
318             OC_FIELD|xV|P(5),
319         OC_COMPARE|VV|P(39)|5,      OC_MOVE|VV|P(74),
320             OC_REPLACE|NV|P(74)|'+',    OC_REPLACE|NV|P(74)|'-',
321         OC_REPLACE|NV|P(74)|'*',    OC_REPLACE|NV|P(74)|'/',
322             OC_REPLACE|NV|P(74)|'%',    OC_REPLACE|NV|P(74)|'&',
323         OC_BINARY|NV|P(29)|'+',     OC_BINARY|NV|P(29)|'-',
324             OC_REPLACE|NV|P(74)|'&',    OC_BINARY|NV|P(15)|'&',
325         OC_BINARY|NV|P(25)|'/',     OC_BINARY|NV|P(25)|'%',
326             OC_BINARY|NV|P(15)|'&',     OC_BINARY|NV|P(25)|'*',
327         OC_COMPARE|VV|P(39)|4,      OC_COMPARE|VV|P(39)|3,
328             OC_COMPARE|VV|P(39)|0,      OC_COMPARE|VV|P(39)|1,
329         OC_COMPARE|VV|P(39)|2,      OC_MATCH|Sx|P(45)|'!',
330             OC_MATCH|Sx|P(45)|'~',      OC_LAND|Vx|P(55),
331         OC_LOR|Vx|P(59),            OC_TERNARY|Vx|P(64)|'?',
332             OC_COLON|xx|P(67)|':',
333         OC_IN|SV|P(49),
334         OC_COMMA|SS|P(80),
335         OC_PGETLINE|SV|P(37),
336         OC_UNARY|xV|P(19)|'+',      OC_UNARY|xV|P(19)|'-',
337             OC_UNARY|xV|P(19)|'!',
338         0,
339         0,
340         0,
341         0,
342         0,
343         ST_IF,          ST_DO,          ST_FOR,         OC_BREAK,
344         OC_CONTINUE,                    OC_DELETE|Vx,   OC_PRINT,
345         OC_PRINTF,      OC_NEXT,        OC_NEXTFILE,
346         OC_RETURN|Vx,   OC_EXIT|Nx,
347         ST_WHILE,
348         0,
349
350         OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
351         OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
352         OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
353         OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
354         OC_FBLTIN|F_rn,    OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
355         OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
356         OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF,        OC_B|B_su|P(0xb6),
357         OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti,    OC_B|B_ti|P(0x0b),
358         OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
359         OC_GETLINE|SV|P(0),
360         0,      0,
361         0,
362         0
363 };
364
365 /* internal variable names and their initial values       */
366 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
367 enum {
368         CONVFMT,    OFMT,       FS,         OFS,
369         ORS,        RS,         RT,         FILENAME,
370         SUBSEP,     ARGIND,     ARGC,       ARGV,
371         ERRNO,      FNR,
372         NR,         NF,         IGNORECASE,
373         ENVIRON,    F0,         NUM_INTERNAL_VARS
374 };
375
376 static const char vNames[] =
377         "CONVFMT\0" "OFMT\0"    "FS\0*"     "OFS\0"
378         "ORS\0"     "RS\0*"     "RT\0"      "FILENAME\0"
379         "SUBSEP\0"  "ARGIND\0"  "ARGC\0"    "ARGV\0"
380         "ERRNO\0"   "FNR\0"
381         "NR\0"      "NF\0*"     "IGNORECASE\0*"
382         "ENVIRON\0" "$\0*"      "\0";
383
384 static const char vValues[] =
385         "%.6g\0"    "%.6g\0"    " \0"       " \0"
386         "\n\0"      "\n\0"      "\0"        "\0"
387         "\034\0"
388         "\377";
389
390 /* hash size may grow to these values */
391 #define FIRST_PRIME 61;
392 static const unsigned PRIMES[] = { 251, 1021, 4093, 16381, 65521 };
393 enum { NPRIMES = sizeof(PRIMES) / sizeof(PRIMES[0]) };
394
395 /* globals */
396
397 struct globals {
398         /* former 'struct t' */
399         uint32_t t_info; /* often used */
400         uint32_t t_tclass;
401         char *t_string;
402         double t_double;
403         int t_lineno;
404         int t_rollback;
405
406         /* the rest */
407         smallint icase;
408         smallint exiting;
409         smallint nextrec;
410         smallint nextfile;
411         smallint is_f0_split;
412         chain beginseq, mainseq, endseq, *seq;
413         node *break_ptr, *continue_ptr;
414         rstream *iF;
415         xhash *vhash, *ahash, *fdhash, *fnhash;
416         const char *g_progname;
417         int g_lineno;
418         int nfields;
419         int maxfields; /* used in fsrealloc() only */
420         var *Fields;
421         nvblock *g_cb;
422         char *g_pos;
423         char *g_buf;
424
425         /* former statics from various functions */
426         char *split_f0__fstrings;
427
428         rstream next_input_file__rsm;
429         smallint next_input_file__files_happen;
430
431         smallint next_token__concat_inserted;
432         uint32_t next_token__save_tclass;
433         uint32_t next_token__save_info;
434         uint32_t next_token__ltclass;
435
436         var *evaluate__fnargs;
437         unsigned evaluate__seed;
438         regex_t evaluate__sreg;
439
440         var ptest__v;
441
442         tsplitter exec_builtin__tspl;
443
444         /* biggest members go last */
445         var *intvar[NUM_INTERNAL_VARS];
446         tsplitter fsplitter, rsplitter;
447 };
448 #define G (*ptr_to_globals)
449 /* for debug */
450 /* char Gsize[sizeof(G)];  ~0x240 */
451 /* Trying to keep most of members accessible with short offsets: */
452 /* char Gofs_seed[offsetof(struct globals, evaluate__seed)];  ~0xc0 */
453 #define t_info       (G.t_info      )
454 #define t_tclass     (G.t_tclass    )
455 #define t_string     (G.t_string    )
456 #define t_double     (G.t_double    )
457 #define t_lineno     (G.t_lineno    )
458 #define t_rollback   (G.t_rollback  )
459 #define icase        (G.icase       )
460 #define exiting      (G.exiting     )
461 #define nextrec      (G.nextrec     )
462 #define nextfile     (G.nextfile    )
463 #define is_f0_split  (G.is_f0_split )
464 #define beginseq     (G.beginseq    )
465 #define mainseq      (G.mainseq     )
466 #define endseq       (G.endseq      )
467 #define seq          (G.seq         )
468 #define break_ptr    (G.break_ptr   )
469 #define continue_ptr (G.continue_ptr)
470 #define iF           (G.iF          )
471 #define vhash        (G.vhash       )
472 #define ahash        (G.ahash       )
473 #define fdhash       (G.fdhash      )
474 #define fnhash       (G.fnhash      )
475 #define g_progname   (G.g_progname  )
476 #define g_lineno     (G.g_lineno    )
477 #define nfields      (G.nfields     )
478 #define maxfields    (G.maxfields   )
479 #define Fields       (G.Fields      )
480 #define g_cb         (G.g_cb        )
481 #define g_pos        (G.g_pos       )
482 #define g_buf        (G.g_buf       )
483 #define intvar       (G.intvar      )
484 #define fsplitter    (G.fsplitter   )
485 #define rsplitter    (G.rsplitter   )
486 #define INIT_G() do { \
487         PTR_TO_GLOBALS = xzalloc(sizeof(G)); \
488         G.next_token__ltclass = TC_OPTERM; \
489         G.evaluate__seed = 1; \
490 } while (0)
491
492
493 /* function prototypes */
494 static void handle_special(var *);
495 static node *parse_expr(uint32_t);
496 static void chain_group(void);
497 static var *evaluate(node *, var *);
498 static rstream *next_input_file(void);
499 static int fmt_num(char *, int, const char *, double, int);
500 static int awk_exit(int) ATTRIBUTE_NORETURN;
501
502 /* ---- error handling ---- */
503
504 static const char EMSG_INTERNAL_ERROR[] = "Internal error";
505 static const char EMSG_UNEXP_EOS[] = "Unexpected end of string";
506 static const char EMSG_UNEXP_TOKEN[] = "Unexpected token";
507 static const char EMSG_DIV_BY_ZERO[] = "Division by zero";
508 static const char EMSG_INV_FMT[] = "Invalid format specifier";
509 static const char EMSG_TOO_FEW_ARGS[] = "Too few arguments for builtin";
510 static const char EMSG_NOT_ARRAY[] = "Not an array";
511 static const char EMSG_POSSIBLE_ERROR[] = "Possible syntax error";
512 static const char EMSG_UNDEF_FUNC[] = "Call to undefined function";
513 #if !ENABLE_FEATURE_AWK_MATH
514 static const char EMSG_NO_MATH[] = "Math support is not compiled in";
515 #endif
516
517 static void zero_out_var(var * vp)
518 {
519         memset(vp, 0, sizeof(*vp));
520 }
521
522 static void syntax_error(const char * const message) ATTRIBUTE_NORETURN;
523 static void syntax_error(const char * const message)
524 {
525         bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
526 }
527
528 /* ---- hash stuff ---- */
529
530 static unsigned hashidx(const char *name)
531 {
532         unsigned idx = 0;
533
534         while (*name) idx = *name++ + (idx << 6) - idx;
535         return idx;
536 }
537
538 /* create new hash */
539 static xhash *hash_init(void)
540 {
541         xhash *newhash;
542
543         newhash = xzalloc(sizeof(xhash));
544         newhash->csize = FIRST_PRIME;
545         newhash->items = xzalloc(newhash->csize * sizeof(hash_item *));
546
547         return newhash;
548 }
549
550 /* find item in hash, return ptr to data, NULL if not found */
551 static void *hash_search(xhash *hash, const char *name)
552 {
553         hash_item *hi;
554
555         hi = hash->items [ hashidx(name) % hash->csize ];
556         while (hi) {
557                 if (strcmp(hi->name, name) == 0)
558                         return &(hi->data);
559                 hi = hi->next;
560         }
561         return NULL;
562 }
563
564 /* grow hash if it becomes too big */
565 static void hash_rebuild(xhash *hash)
566 {
567         unsigned newsize, i, idx;
568         hash_item **newitems, *hi, *thi;
569
570         if (hash->nprime == NPRIMES)
571                 return;
572
573         newsize = PRIMES[hash->nprime++];
574         newitems = xzalloc(newsize * sizeof(hash_item *));
575
576         for (i = 0; i < hash->csize; i++) {
577                 hi = hash->items[i];
578                 while (hi) {
579                         thi = hi;
580                         hi = thi->next;
581                         idx = hashidx(thi->name) % newsize;
582                         thi->next = newitems[idx];
583                         newitems[idx] = thi;
584                 }
585         }
586
587         free(hash->items);
588         hash->csize = newsize;
589         hash->items = newitems;
590 }
591
592 /* find item in hash, add it if necessary. Return ptr to data */
593 static void *hash_find(xhash *hash, const char *name)
594 {
595         hash_item *hi;
596         unsigned idx;
597         int l;
598
599         hi = hash_search(hash, name);
600         if (! hi) {
601                 if (++hash->nel / hash->csize > 10)
602                         hash_rebuild(hash);
603
604                 l = strlen(name) + 1;
605                 hi = xzalloc(sizeof(hash_item) + l);
606                 memcpy(hi->name, name, l);
607
608                 idx = hashidx(name) % hash->csize;
609                 hi->next = hash->items[idx];
610                 hash->items[idx] = hi;
611                 hash->glen += l;
612         }
613         return &(hi->data);
614 }
615
616 #define findvar(hash, name) ((var*)    hash_find((hash), (name)))
617 #define newvar(name)        ((var*)    hash_find(vhash, (name)))
618 #define newfile(name)       ((rstream*)hash_find(fdhash, (name)))
619 #define newfunc(name)       ((func*)   hash_find(fnhash, (name)))
620
621 static void hash_remove(xhash *hash, const char *name)
622 {
623         hash_item *hi, **phi;
624
625         phi = &(hash->items[hashidx(name) % hash->csize]);
626         while (*phi) {
627                 hi = *phi;
628                 if (strcmp(hi->name, name) == 0) {
629                         hash->glen -= (strlen(name) + 1);
630                         hash->nel--;
631                         *phi = hi->next;
632                         free(hi);
633                         break;
634                 }
635                 phi = &(hi->next);
636         }
637 }
638
639 /* ------ some useful functions ------ */
640
641 static void skip_spaces(char **s)
642 {
643         char *p = *s;
644
645         while (1) {
646                 if (*p == '\\' && p[1] == '\n') {
647                         p++;
648                         t_lineno++;
649                 } else if (*p != ' ' && *p != '\t') {
650                         break;
651                 }
652                 p++;
653         }
654         *s = p;
655 }
656
657 static char *nextword(char **s)
658 {
659         char *p = *s;
660
661         while (*(*s)++) /* */;
662
663         return p;
664 }
665
666 static char nextchar(char **s)
667 {
668         char c, *pps;
669
670         c = *((*s)++);
671         pps = *s;
672         if (c == '\\') c = bb_process_escape_sequence((const char**)s);
673         if (c == '\\' && *s == pps) c = *((*s)++);
674         return c;
675 }
676
677 static int ATTRIBUTE_ALWAYS_INLINE isalnum_(int c)
678 {
679         return (isalnum(c) || c == '_');
680 }
681
682 static FILE *afopen(const char *path, const char *mode)
683 {
684         return (*path == '-' && *(path+1) == '\0') ? stdin : xfopen(path, mode);
685 }
686
687 /* -------- working with variables (set/get/copy/etc) -------- */
688
689 static xhash *iamarray(var *v)
690 {
691         var *a = v;
692
693         while (a->type & VF_CHILD)
694                 a = a->x.parent;
695
696         if (!(a->type & VF_ARRAY)) {
697                 a->type |= VF_ARRAY;
698                 a->x.array = hash_init();
699         }
700         return a->x.array;
701 }
702
703 static void clear_array(xhash *array)
704 {
705         unsigned i;
706         hash_item *hi, *thi;
707
708         for (i = 0; i < array->csize; i++) {
709                 hi = array->items[i];
710                 while (hi) {
711                         thi = hi;
712                         hi = hi->next;
713                         free(thi->data.v.string);
714                         free(thi);
715                 }
716                 array->items[i] = NULL;
717         }
718         array->glen = array->nel = 0;
719 }
720
721 /* clear a variable */
722 static var *clrvar(var *v)
723 {
724         if (!(v->type & VF_FSTR))
725                 free(v->string);
726
727         v->type &= VF_DONTTOUCH;
728         v->type |= VF_DIRTY;
729         v->string = NULL;
730         return v;
731 }
732
733 /* assign string value to variable */
734 static var *setvar_p(var *v, char *value)
735 {
736         clrvar(v);
737         v->string = value;
738         handle_special(v);
739         return v;
740 }
741
742 /* same as setvar_p but make a copy of string */
743 static var *setvar_s(var *v, const char *value)
744 {
745         return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
746 }
747
748 /* same as setvar_s but set USER flag */
749 static var *setvar_u(var *v, const char *value)
750 {
751         setvar_s(v, value);
752         v->type |= VF_USER;
753         return v;
754 }
755
756 /* set array element to user string */
757 static void setari_u(var *a, int idx, const char *s)
758 {
759         char sidx[sizeof(int)*3 + 1];
760         var *v;
761
762         sprintf(sidx, "%d", idx);
763         v = findvar(iamarray(a), sidx);
764         setvar_u(v, s);
765 }
766
767 /* assign numeric value to variable */
768 static var *setvar_i(var *v, double value)
769 {
770         clrvar(v);
771         v->type |= VF_NUMBER;
772         v->number = value;
773         handle_special(v);
774         return v;
775 }
776
777 static const char *getvar_s(var *v)
778 {
779         /* if v is numeric and has no cached string, convert it to string */
780         if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
781                 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
782                 v->string = xstrdup(g_buf);
783                 v->type |= VF_CACHED;
784         }
785         return (v->string == NULL) ? "" : v->string;
786 }
787
788 static double getvar_i(var *v)
789 {
790         char *s;
791
792         if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
793                 v->number = 0;
794                 s = v->string;
795                 if (s && *s) {
796                         v->number = strtod(s, &s);
797                         if (v->type & VF_USER) {
798                                 skip_spaces(&s);
799                                 if (*s != '\0')
800                                         v->type &= ~VF_USER;
801                         }
802                 } else {
803                         v->type &= ~VF_USER;
804                 }
805                 v->type |= VF_CACHED;
806         }
807         return v->number;
808 }
809
810 static var *copyvar(var *dest, const var *src)
811 {
812         if (dest != src) {
813                 clrvar(dest);
814                 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
815                 dest->number = src->number;
816                 if (src->string)
817                         dest->string = xstrdup(src->string);
818         }
819         handle_special(dest);
820         return dest;
821 }
822
823 static var *incvar(var *v)
824 {
825         return setvar_i(v, getvar_i(v)+1.);
826 }
827
828 /* return true if v is number or numeric string */
829 static int is_numeric(var *v)
830 {
831         getvar_i(v);
832         return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
833 }
834
835 /* return 1 when value of v corresponds to true, 0 otherwise */
836 static int istrue(var *v)
837 {
838         if (is_numeric(v))
839                 return (v->number == 0) ? 0 : 1;
840         return (v->string && *(v->string)) ? 1 : 0;
841 }
842
843 /* temporary variables allocator. Last allocated should be first freed */
844 static var *nvalloc(int n)
845 {
846         nvblock *pb = NULL;
847         var *v, *r;
848         int size;
849
850         while (g_cb) {
851                 pb = g_cb;
852                 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size) break;
853                 g_cb = g_cb->next;
854         }
855
856         if (!g_cb) {
857                 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
858                 g_cb = xmalloc(sizeof(nvblock) + size * sizeof(var));
859                 g_cb->size = size;
860                 g_cb->pos = g_cb->nv;
861                 g_cb->prev = pb;
862                 g_cb->next = NULL;
863                 if (pb) pb->next = g_cb;
864         }
865
866         v = r = g_cb->pos;
867         g_cb->pos += n;
868
869         while (v < g_cb->pos) {
870                 v->type = 0;
871                 v->string = NULL;
872                 v++;
873         }
874
875         return r;
876 }
877
878 static void nvfree(var *v)
879 {
880         var *p;
881
882         if (v < g_cb->nv || v >= g_cb->pos)
883                 syntax_error(EMSG_INTERNAL_ERROR);
884
885         for (p = v; p < g_cb->pos; p++) {
886                 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
887                         clear_array(iamarray(p));
888                         free(p->x.array->items);
889                         free(p->x.array);
890                 }
891                 if (p->type & VF_WALK)
892                         free(p->x.walker);
893
894                 clrvar(p);
895         }
896
897         g_cb->pos = v;
898         while (g_cb->prev && g_cb->pos == g_cb->nv) {
899                 g_cb = g_cb->prev;
900         }
901 }
902
903 /* ------- awk program text parsing ------- */
904
905 /* Parse next token pointed by global pos, place results into global ttt.
906  * If token isn't expected, give away. Return token class
907  */
908 static uint32_t next_token(uint32_t expected)
909 {
910 #define concat_inserted (G.next_token__concat_inserted)
911 #define save_tclass     (G.next_token__save_tclass)
912 #define save_info       (G.next_token__save_info)
913 /* Initialized to TC_OPTERM: */
914 #define ltclass         (G.next_token__ltclass)
915
916         char *p, *pp, *s;
917         const char *tl;
918         uint32_t tc;
919         const uint32_t *ti;
920         int l;
921
922         if (t_rollback) {
923                 t_rollback = FALSE;
924
925         } else if (concat_inserted) {
926                 concat_inserted = FALSE;
927                 t_tclass = save_tclass;
928                 t_info = save_info;
929
930         } else {
931                 p = g_pos;
932  readnext:
933                 skip_spaces(&p);
934                 g_lineno = t_lineno;
935                 if (*p == '#')
936                         while (*p != '\n' && *p != '\0')
937                                 p++;
938
939                 if (*p == '\n')
940                         t_lineno++;
941
942                 if (*p == '\0') {
943                         tc = TC_EOF;
944
945                 } else if (*p == '\"') {
946                         /* it's a string */
947                         t_string = s = ++p;
948                         while (*p != '\"') {
949                                 if (*p == '\0' || *p == '\n')
950                                         syntax_error(EMSG_UNEXP_EOS);
951                                 *(s++) = nextchar(&p);
952                         }
953                         p++;
954                         *s = '\0';
955                         tc = TC_STRING;
956
957                 } else if ((expected & TC_REGEXP) && *p == '/') {
958                         /* it's regexp */
959                         t_string = s = ++p;
960                         while (*p != '/') {
961                                 if (*p == '\0' || *p == '\n')
962                                         syntax_error(EMSG_UNEXP_EOS);
963                                 *s = *p++;
964                                 if (*s++ == '\\') {
965                                         pp = p;
966                                         *(s-1) = bb_process_escape_sequence((const char **)&p);
967                                         if (*pp == '\\')
968                                                 *s++ = '\\';
969                                         if (p == pp)
970                                                 *s++ = *p++;
971                                 }
972                         }
973                         p++;
974                         *s = '\0';
975                         tc = TC_REGEXP;
976
977                 } else if (*p == '.' || isdigit(*p)) {
978                         /* it's a number */
979                         t_double = strtod(p, &p);
980                         if (*p == '.')
981                                 syntax_error(EMSG_UNEXP_TOKEN);
982                         tc = TC_NUMBER;
983
984                 } else {
985                         /* search for something known */
986                         tl = tokenlist;
987                         tc = 0x00000001;
988                         ti = tokeninfo;
989                         while (*tl) {
990                                 l = *(tl++);
991                                 if (l == NTCC) {
992                                         tc <<= 1;
993                                         continue;
994                                 }
995                                 /* if token class is expected, token
996                                  * matches and it's not a longer word,
997                                  * then this is what we are looking for
998                                  */
999                                 if ((tc & (expected | TC_WORD | TC_NEWLINE))
1000                                  && *tl == *p && strncmp(p, tl, l) == 0
1001                                  && !((tc & TC_WORD) && isalnum_(p[l]))
1002                                 ) {
1003                                         t_info = *ti;
1004                                         p += l;
1005                                         break;
1006                                 }
1007                                 ti++;
1008                                 tl += l;
1009                         }
1010
1011                         if (!*tl) {
1012                                 /* it's a name (var/array/function),
1013                                  * otherwise it's something wrong
1014                                  */
1015                                 if (!isalnum_(*p))
1016                                         syntax_error(EMSG_UNEXP_TOKEN);
1017
1018                                 t_string = --p;
1019                                 while (isalnum_(*(++p))) {
1020                                         *(p-1) = *p;
1021                                 }
1022                                 *(p-1) = '\0';
1023                                 tc = TC_VARIABLE;
1024                                 /* also consume whitespace between functionname and bracket */
1025                                 if (!(expected & TC_VARIABLE))
1026                                         skip_spaces(&p);
1027                                 if (*p == '(') {
1028                                         tc = TC_FUNCTION;
1029                                 } else {
1030                                         if (*p == '[') {
1031                                                 p++;
1032                                                 tc = TC_ARRAY;
1033                                         }
1034                                 }
1035                         }
1036                 }
1037                 g_pos = p;
1038
1039                 /* skipping newlines in some cases */
1040                 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1041                         goto readnext;
1042
1043                 /* insert concatenation operator when needed */
1044                 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1045                         concat_inserted = TRUE;
1046                         save_tclass = tc;
1047                         save_info = t_info;
1048                         tc = TC_BINOP;
1049                         t_info = OC_CONCAT | SS | P(35);
1050                 }
1051
1052                 t_tclass = tc;
1053         }
1054         ltclass = t_tclass;
1055
1056         /* Are we ready for this? */
1057         if (!(ltclass & expected))
1058                 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1059                                 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1060
1061         return ltclass;
1062 #undef concat_inserted
1063 #undef save_tclass
1064 #undef save_info
1065 #undef ltclass
1066 }
1067
1068 static void rollback_token(void)
1069 {
1070         t_rollback = TRUE;
1071 }
1072
1073 static node *new_node(uint32_t info)
1074 {
1075         node *n;
1076
1077         n = xzalloc(sizeof(node));
1078         n->info = info;
1079         n->lineno = g_lineno;
1080         return n;
1081 }
1082
1083 static node *mk_re_node(const char *s, node *n, regex_t *re)
1084 {
1085         n->info = OC_REGEXP;
1086         n->l.re = re;
1087         n->r.ire = re + 1;
1088         xregcomp(re, s, REG_EXTENDED);
1089         xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1090
1091         return n;
1092 }
1093
1094 static node *condition(void)
1095 {
1096         next_token(TC_SEQSTART);
1097         return parse_expr(TC_SEQTERM);
1098 }
1099
1100 /* parse expression terminated by given argument, return ptr
1101  * to built subtree. Terminator is eaten by parse_expr */
1102 static node *parse_expr(uint32_t iexp)
1103 {
1104         node sn;
1105         node *cn = &sn;
1106         node *vn, *glptr;
1107         uint32_t tc, xtc;
1108         var *v;
1109
1110         sn.info = PRIMASK;
1111         sn.r.n = glptr = NULL;
1112         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1113
1114         while (!((tc = next_token(xtc)) & iexp)) {
1115                 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1116                         /* input redirection (<) attached to glptr node */
1117                         cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1118                         cn->a.n = glptr;
1119                         xtc = TC_OPERAND | TC_UOPPRE;
1120                         glptr = NULL;
1121
1122                 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1123                         /* for binary and postfix-unary operators, jump back over
1124                          * previous operators with higher priority */
1125                         vn = cn;
1126                         while ( ((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1127                          || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON)) )
1128                                 vn = vn->a.n;
1129                         if ((t_info & OPCLSMASK) == OC_TERNARY)
1130                                 t_info += P(6);
1131                         cn = vn->a.n->r.n = new_node(t_info);
1132                         cn->a.n = vn->a.n;
1133                         if (tc & TC_BINOP) {
1134                                 cn->l.n = vn;
1135                                 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1136                                 if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1137                                         /* it's a pipe */
1138                                         next_token(TC_GETLINE);
1139                                         /* give maximum priority to this pipe */
1140                                         cn->info &= ~PRIMASK;
1141                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1142                                 }
1143                         } else {
1144                                 cn->r.n = vn;
1145                                 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1146                         }
1147                         vn->a.n = cn;
1148
1149                 } else {
1150                         /* for operands and prefix-unary operators, attach them
1151                          * to last node */
1152                         vn = cn;
1153                         cn = vn->r.n = new_node(t_info);
1154                         cn->a.n = vn;
1155                         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1156                         if (tc & (TC_OPERAND | TC_REGEXP)) {
1157                                 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1158                                 /* one should be very careful with switch on tclass -
1159                                  * only simple tclasses should be used! */
1160                                 switch (tc) {
1161                                 case TC_VARIABLE:
1162                                 case TC_ARRAY:
1163                                         cn->info = OC_VAR;
1164                                         v = hash_search(ahash, t_string);
1165                                         if (v != NULL) {
1166                                                 cn->info = OC_FNARG;
1167                                                 cn->l.i = v->x.aidx;
1168                                         } else {
1169                                                 cn->l.v = newvar(t_string);
1170                                         }
1171                                         if (tc & TC_ARRAY) {
1172                                                 cn->info |= xS;
1173                                                 cn->r.n = parse_expr(TC_ARRTERM);
1174                                         }
1175                                         break;
1176
1177                                 case TC_NUMBER:
1178                                 case TC_STRING:
1179                                         cn->info = OC_VAR;
1180                                         v = cn->l.v = xzalloc(sizeof(var));
1181                                         if (tc & TC_NUMBER)
1182                                                 setvar_i(v, t_double);
1183                                         else
1184                                                 setvar_s(v, t_string);
1185                                         break;
1186
1187                                 case TC_REGEXP:
1188                                         mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1189                                         break;
1190
1191                                 case TC_FUNCTION:
1192                                         cn->info = OC_FUNC;
1193                                         cn->r.f = newfunc(t_string);
1194                                         cn->l.n = condition();
1195                                         break;
1196
1197                                 case TC_SEQSTART:
1198                                         cn = vn->r.n = parse_expr(TC_SEQTERM);
1199                                         cn->a.n = vn;
1200                                         break;
1201
1202                                 case TC_GETLINE:
1203                                         glptr = cn;
1204                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1205                                         break;
1206
1207                                 case TC_BUILTIN:
1208                                         cn->l.n = condition();
1209                                         break;
1210                                 }
1211                         }
1212                 }
1213         }
1214         return sn.r.n;
1215 }
1216
1217 /* add node to chain. Return ptr to alloc'd node */
1218 static node *chain_node(uint32_t info)
1219 {
1220         node *n;
1221
1222         if (!seq->first)
1223                 seq->first = seq->last = new_node(0);
1224
1225         if (seq->programname != g_progname) {
1226                 seq->programname = g_progname;
1227                 n = chain_node(OC_NEWSOURCE);
1228                 n->l.s = xstrdup(g_progname);
1229         }
1230
1231         n = seq->last;
1232         n->info = info;
1233         seq->last = n->a.n = new_node(OC_DONE);
1234
1235         return n;
1236 }
1237
1238 static void chain_expr(uint32_t info)
1239 {
1240         node *n;
1241
1242         n = chain_node(info);
1243         n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1244         if (t_tclass & TC_GRPTERM)
1245                 rollback_token();
1246 }
1247
1248 static node *chain_loop(node *nn)
1249 {
1250         node *n, *n2, *save_brk, *save_cont;
1251
1252         save_brk = break_ptr;
1253         save_cont = continue_ptr;
1254
1255         n = chain_node(OC_BR | Vx);
1256         continue_ptr = new_node(OC_EXEC);
1257         break_ptr = new_node(OC_EXEC);
1258         chain_group();
1259         n2 = chain_node(OC_EXEC | Vx);
1260         n2->l.n = nn;
1261         n2->a.n = n;
1262         continue_ptr->a.n = n2;
1263         break_ptr->a.n = n->r.n = seq->last;
1264
1265         continue_ptr = save_cont;
1266         break_ptr = save_brk;
1267
1268         return n;
1269 }
1270
1271 /* parse group and attach it to chain */
1272 static void chain_group(void)
1273 {
1274         uint32_t c;
1275         node *n, *n2, *n3;
1276
1277         do {
1278                 c = next_token(TC_GRPSEQ);
1279         } while (c & TC_NEWLINE);
1280
1281         if (c & TC_GRPSTART) {
1282                 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1283                         if (t_tclass & TC_NEWLINE) continue;
1284                         rollback_token();
1285                         chain_group();
1286                 }
1287         } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1288                 rollback_token();
1289                 chain_expr(OC_EXEC | Vx);
1290         } else {                                                /* TC_STATEMNT */
1291                 switch (t_info & OPCLSMASK) {
1292                 case ST_IF:
1293                         n = chain_node(OC_BR | Vx);
1294                         n->l.n = condition();
1295                         chain_group();
1296                         n2 = chain_node(OC_EXEC);
1297                         n->r.n = seq->last;
1298                         if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1299                                 chain_group();
1300                                 n2->a.n = seq->last;
1301                         } else {
1302                                 rollback_token();
1303                         }
1304                         break;
1305
1306                 case ST_WHILE:
1307                         n2 = condition();
1308                         n = chain_loop(NULL);
1309                         n->l.n = n2;
1310                         break;
1311
1312                 case ST_DO:
1313                         n2 = chain_node(OC_EXEC);
1314                         n = chain_loop(NULL);
1315                         n2->a.n = n->a.n;
1316                         next_token(TC_WHILE);
1317                         n->l.n = condition();
1318                         break;
1319
1320                 case ST_FOR:
1321                         next_token(TC_SEQSTART);
1322                         n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1323                         if (t_tclass & TC_SEQTERM) {    /* for-in */
1324                                 if ((n2->info & OPCLSMASK) != OC_IN)
1325                                         syntax_error(EMSG_UNEXP_TOKEN);
1326                                 n = chain_node(OC_WALKINIT | VV);
1327                                 n->l.n = n2->l.n;
1328                                 n->r.n = n2->r.n;
1329                                 n = chain_loop(NULL);
1330                                 n->info = OC_WALKNEXT | Vx;
1331                                 n->l.n = n2->l.n;
1332                         } else {                        /* for (;;) */
1333                                 n = chain_node(OC_EXEC | Vx);
1334                                 n->l.n = n2;
1335                                 n2 = parse_expr(TC_SEMICOL);
1336                                 n3 = parse_expr(TC_SEQTERM);
1337                                 n = chain_loop(n3);
1338                                 n->l.n = n2;
1339                                 if (! n2)
1340                                         n->info = OC_EXEC;
1341                         }
1342                         break;
1343
1344                 case OC_PRINT:
1345                 case OC_PRINTF:
1346                         n = chain_node(t_info);
1347                         n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1348                         if (t_tclass & TC_OUTRDR) {
1349                                 n->info |= t_info;
1350                                 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1351                         }
1352                         if (t_tclass & TC_GRPTERM)
1353                                 rollback_token();
1354                         break;
1355
1356                 case OC_BREAK:
1357                         n = chain_node(OC_EXEC);
1358                         n->a.n = break_ptr;
1359                         break;
1360
1361                 case OC_CONTINUE:
1362                         n = chain_node(OC_EXEC);
1363                         n->a.n = continue_ptr;
1364                         break;
1365
1366                 /* delete, next, nextfile, return, exit */
1367                 default:
1368                         chain_expr(t_info);
1369                 }
1370         }
1371 }
1372
1373 static void parse_program(char *p)
1374 {
1375         uint32_t tclass;
1376         node *cn;
1377         func *f;
1378         var *v;
1379
1380         g_pos = p;
1381         t_lineno = 1;
1382         while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1383                         TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1384
1385                 if (tclass & TC_OPTERM)
1386                         continue;
1387
1388                 seq = &mainseq;
1389                 if (tclass & TC_BEGIN) {
1390                         seq = &beginseq;
1391                         chain_group();
1392
1393                 } else if (tclass & TC_END) {
1394                         seq = &endseq;
1395                         chain_group();
1396
1397                 } else if (tclass & TC_FUNCDECL) {
1398                         next_token(TC_FUNCTION);
1399                         g_pos++;
1400                         f = newfunc(t_string);
1401                         f->body.first = NULL;
1402                         f->nargs = 0;
1403                         while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1404                                 v = findvar(ahash, t_string);
1405                                 v->x.aidx = (f->nargs)++;
1406
1407                                 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1408                                         break;
1409                         }
1410                         seq = &(f->body);
1411                         chain_group();
1412                         clear_array(ahash);
1413
1414                 } else if (tclass & TC_OPSEQ) {
1415                         rollback_token();
1416                         cn = chain_node(OC_TEST);
1417                         cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1418                         if (t_tclass & TC_GRPSTART) {
1419                                 rollback_token();
1420                                 chain_group();
1421                         } else {
1422                                 chain_node(OC_PRINT);
1423                         }
1424                         cn->r.n = mainseq.last;
1425
1426                 } else /* if (tclass & TC_GRPSTART) */ {
1427                         rollback_token();
1428                         chain_group();
1429                 }
1430         }
1431 }
1432
1433
1434 /* -------- program execution part -------- */
1435
1436 static node *mk_splitter(const char *s, tsplitter *spl)
1437 {
1438         regex_t *re, *ire;
1439         node *n;
1440
1441         re = &spl->re[0];
1442         ire = &spl->re[1];
1443         n = &spl->n;
1444         if ((n->info & OPCLSMASK) == OC_REGEXP) {
1445                 regfree(re);
1446                 regfree(ire);
1447         }
1448         if (strlen(s) > 1) {
1449                 mk_re_node(s, n, re);
1450         } else {
1451                 n->info = (uint32_t) *s;
1452         }
1453
1454         return n;
1455 }
1456
1457 /* use node as a regular expression. Supplied with node ptr and regex_t
1458  * storage space. Return ptr to regex (if result points to preg, it should
1459  * be later regfree'd manually
1460  */
1461 static regex_t *as_regex(node *op, regex_t *preg)
1462 {
1463         var *v;
1464         const char *s;
1465
1466         if ((op->info & OPCLSMASK) == OC_REGEXP) {
1467                 return icase ? op->r.ire : op->l.re;
1468         }
1469         v = nvalloc(1);
1470         s = getvar_s(evaluate(op, v));
1471         xregcomp(preg, s, icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED);
1472         nvfree(v);
1473         return preg;
1474 }
1475
1476 /* gradually increasing buffer */
1477 static void qrealloc(char **b, int n, int *size)
1478 {
1479         if (!*b || n >= *size)
1480                 *b = xrealloc(*b, *size = n + (n>>1) + 80);
1481 }
1482
1483 /* resize field storage space */
1484 static void fsrealloc(int size)
1485 {
1486         int i;
1487
1488         if (size >= maxfields) {
1489                 i = maxfields;
1490                 maxfields = size + 16;
1491                 Fields = xrealloc(Fields, maxfields * sizeof(var));
1492                 for (; i < maxfields; i++) {
1493                         Fields[i].type = VF_SPECIAL;
1494                         Fields[i].string = NULL;
1495                 }
1496         }
1497
1498         if (size < nfields) {
1499                 for (i = size; i < nfields; i++) {
1500                         clrvar(Fields + i);
1501                 }
1502         }
1503         nfields = size;
1504 }
1505
1506 static int awk_split(const char *s, node *spl, char **slist)
1507 {
1508         int l, n = 0;
1509         char c[4];
1510         char *s1;
1511         regmatch_t pmatch[2];
1512
1513         /* in worst case, each char would be a separate field */
1514         *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1515         strcpy(s1, s);
1516
1517         c[0] = c[1] = (char)spl->info;
1518         c[2] = c[3] = '\0';
1519         if (*getvar_s(intvar[RS]) == '\0')
1520                 c[2] = '\n';
1521
1522         if ((spl->info & OPCLSMASK) == OC_REGEXP) {             /* regex split */
1523                 while (*s) {
1524                         l = strcspn(s, c+2);
1525                         if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1526                          && pmatch[0].rm_so <= l
1527                         ) {
1528                                 l = pmatch[0].rm_so;
1529                                 if (pmatch[0].rm_eo == 0) {
1530                                         l++;
1531                                         pmatch[0].rm_eo++;
1532                                 }
1533                         } else {
1534                                 pmatch[0].rm_eo = l;
1535                                 if (s[l]) pmatch[0].rm_eo++;
1536                         }
1537
1538                         memcpy(s1, s, l);
1539                         s1[l] = '\0';
1540                         nextword(&s1);
1541                         s += pmatch[0].rm_eo;
1542                         n++;
1543                 }
1544         } else if (c[0] == '\0') {              /* null split */
1545                 while (*s) {
1546                         *s1++ = *s++;
1547                         *s1++ = '\0';
1548                         n++;
1549                 }
1550         } else if (c[0] != ' ') {               /* single-character split */
1551                 if (icase) {
1552                         c[0] = toupper(c[0]);
1553                         c[1] = tolower(c[1]);
1554                 }
1555                 if (*s1) n++;
1556                 while ((s1 = strpbrk(s1, c))) {
1557                         *s1++ = '\0';
1558                         n++;
1559                 }
1560         } else {                                /* space split */
1561                 while (*s) {
1562                         s = skip_whitespace(s);
1563                         if (!*s) break;
1564                         n++;
1565                         while (*s && !isspace(*s))
1566                                 *s1++ = *s++;
1567                         *s1++ = '\0';
1568                 }
1569         }
1570         return n;
1571 }
1572
1573 static void split_f0(void)
1574 {
1575 #define fstrings (G.split_f0__fstrings)
1576
1577         int i, n;
1578         char *s;
1579
1580         if (is_f0_split)
1581                 return;
1582
1583         is_f0_split = TRUE;
1584         free(fstrings);
1585         fsrealloc(0);
1586         n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1587         fsrealloc(n);
1588         s = fstrings;
1589         for (i = 0; i < n; i++) {
1590                 Fields[i].string = nextword(&s);
1591                 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1592         }
1593
1594         /* set NF manually to avoid side effects */
1595         clrvar(intvar[NF]);
1596         intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1597         intvar[NF]->number = nfields;
1598 #undef fstrings
1599 }
1600
1601 /* perform additional actions when some internal variables changed */
1602 static void handle_special(var *v)
1603 {
1604         int n;
1605         char *b;
1606         const char *sep, *s;
1607         int sl, l, len, i, bsize;
1608
1609         if (!(v->type & VF_SPECIAL))
1610                 return;
1611
1612         if (v == intvar[NF]) {
1613                 n = (int)getvar_i(v);
1614                 fsrealloc(n);
1615
1616                 /* recalculate $0 */
1617                 sep = getvar_s(intvar[OFS]);
1618                 sl = strlen(sep);
1619                 b = NULL;
1620                 len = 0;
1621                 for (i = 0; i < n; i++) {
1622                         s = getvar_s(&Fields[i]);
1623                         l = strlen(s);
1624                         if (b) {
1625                                 memcpy(b+len, sep, sl);
1626                                 len += sl;
1627                         }
1628                         qrealloc(&b, len+l+sl, &bsize);
1629                         memcpy(b+len, s, l);
1630                         len += l;
1631                 }
1632                 if (b)
1633                         b[len] = '\0';
1634                 setvar_p(intvar[F0], b);
1635                 is_f0_split = TRUE;
1636
1637         } else if (v == intvar[F0]) {
1638                 is_f0_split = FALSE;
1639
1640         } else if (v == intvar[FS]) {
1641                 mk_splitter(getvar_s(v), &fsplitter);
1642
1643         } else if (v == intvar[RS]) {
1644                 mk_splitter(getvar_s(v), &rsplitter);
1645
1646         } else if (v == intvar[IGNORECASE]) {
1647                 icase = istrue(v);
1648
1649         } else {                                /* $n */
1650                 n = getvar_i(intvar[NF]);
1651                 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1652                 /* right here v is invalid. Just to note... */
1653         }
1654 }
1655
1656 /* step through func/builtin/etc arguments */
1657 static node *nextarg(node **pn)
1658 {
1659         node *n;
1660
1661         n = *pn;
1662         if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1663                 *pn = n->r.n;
1664                 n = n->l.n;
1665         } else {
1666                 *pn = NULL;
1667         }
1668         return n;
1669 }
1670
1671 static void hashwalk_init(var *v, xhash *array)
1672 {
1673         char **w;
1674         hash_item *hi;
1675         int i;
1676
1677         if (v->type & VF_WALK)
1678                 free(v->x.walker);
1679
1680         v->type |= VF_WALK;
1681         w = v->x.walker = xzalloc(2 + 2*sizeof(char *) + array->glen);
1682         w[0] = w[1] = (char *)(w + 2);
1683         for (i = 0; i < array->csize; i++) {
1684                 hi = array->items[i];
1685                 while (hi) {
1686                         strcpy(*w, hi->name);
1687                         nextword(w);
1688                         hi = hi->next;
1689                 }
1690         }
1691 }
1692
1693 static int hashwalk_next(var *v)
1694 {
1695         char **w;
1696
1697         w = v->x.walker;
1698         if (w[1] == w[0])
1699                 return FALSE;
1700
1701         setvar_s(v, nextword(w+1));
1702         return TRUE;
1703 }
1704
1705 /* evaluate node, return 1 when result is true, 0 otherwise */
1706 static int ptest(node *pattern)
1707 {
1708         /* ptest__v is "static": to save stack space? */
1709         return istrue(evaluate(pattern, &G.ptest__v));
1710 }
1711
1712 /* read next record from stream rsm into a variable v */
1713 static int awk_getline(rstream *rsm, var *v)
1714 {
1715         char *b;
1716         regmatch_t pmatch[2];
1717         int a, p, pp=0, size;
1718         int fd, so, eo, r, rp;
1719         char c, *m, *s;
1720
1721         /* we're using our own buffer since we need access to accumulating
1722          * characters
1723          */
1724         fd = fileno(rsm->F);
1725         m = rsm->buffer;
1726         a = rsm->adv;
1727         p = rsm->pos;
1728         size = rsm->size;
1729         c = (char) rsplitter.n.info;
1730         rp = 0;
1731
1732         if (! m) qrealloc(&m, 256, &size);
1733         do {
1734                 b = m + a;
1735                 so = eo = p;
1736                 r = 1;
1737                 if (p > 0) {
1738                         if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1739                                 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1740                                                         b, 1, pmatch, 0) == 0) {
1741                                         so = pmatch[0].rm_so;
1742                                         eo = pmatch[0].rm_eo;
1743                                         if (b[eo] != '\0')
1744                                                 break;
1745                                 }
1746                         } else if (c != '\0') {
1747                                 s = strchr(b+pp, c);
1748                                 if (! s) s = memchr(b+pp, '\0', p - pp);
1749                                 if (s) {
1750                                         so = eo = s-b;
1751                                         eo++;
1752                                         break;
1753                                 }
1754                         } else {
1755                                 while (b[rp] == '\n')
1756                                         rp++;
1757                                 s = strstr(b+rp, "\n\n");
1758                                 if (s) {
1759                                         so = eo = s-b;
1760                                         while (b[eo] == '\n') eo++;
1761                                         if (b[eo] != '\0')
1762                                                 break;
1763                                 }
1764                         }
1765                 }
1766
1767                 if (a > 0) {
1768                         memmove(m, (const void *)(m+a), p+1);
1769                         b = m;
1770                         a = 0;
1771                 }
1772
1773                 qrealloc(&m, a+p+128, &size);
1774                 b = m + a;
1775                 pp = p;
1776                 p += safe_read(fd, b+p, size-p-1);
1777                 if (p < pp) {
1778                         p = 0;
1779                         r = 0;
1780                         setvar_i(intvar[ERRNO], errno);
1781                 }
1782                 b[p] = '\0';
1783
1784         } while (p > pp);
1785
1786         if (p == 0) {
1787                 r--;
1788         } else {
1789                 c = b[so]; b[so] = '\0';
1790                 setvar_s(v, b+rp);
1791                 v->type |= VF_USER;
1792                 b[so] = c;
1793                 c = b[eo]; b[eo] = '\0';
1794                 setvar_s(intvar[RT], b+so);
1795                 b[eo] = c;
1796         }
1797
1798         rsm->buffer = m;
1799         rsm->adv = a + eo;
1800         rsm->pos = p - eo;
1801         rsm->size = size;
1802
1803         return r;
1804 }
1805
1806 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1807 {
1808         int r = 0;
1809         char c;
1810         const char *s = format;
1811
1812         if (int_as_int && n == (int)n) {
1813                 r = snprintf(b, size, "%d", (int)n);
1814         } else {
1815                 do { c = *s; } while (c && *++s);
1816                 if (strchr("diouxX", c)) {
1817                         r = snprintf(b, size, format, (int)n);
1818                 } else if (strchr("eEfgG", c)) {
1819                         r = snprintf(b, size, format, n);
1820                 } else {
1821                         syntax_error(EMSG_INV_FMT);
1822                 }
1823         }
1824         return r;
1825 }
1826
1827
1828 /* formatted output into an allocated buffer, return ptr to buffer */
1829 static char *awk_printf(node *n)
1830 {
1831         char *b = NULL;
1832         char *fmt, *s, *f;
1833         const char *s1;
1834         int i, j, incr, bsize;
1835         char c, c1;
1836         var *v, *arg;
1837
1838         v = nvalloc(1);
1839         fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1840
1841         i = 0;
1842         while (*f) {
1843                 s = f;
1844                 while (*f && (*f != '%' || *(++f) == '%'))
1845                         f++;
1846                 while (*f && !isalpha(*f)) {
1847                         if (*f == '*')
1848                                 syntax_error("%*x formats are not supported");
1849                         f++;
1850                 }
1851
1852                 incr = (f - s) + MAXVARFMT;
1853                 qrealloc(&b, incr + i, &bsize);
1854                 c = *f;
1855                 if (c != '\0') f++;
1856                 c1 = *f;
1857                 *f = '\0';
1858                 arg = evaluate(nextarg(&n), v);
1859
1860                 j = i;
1861                 if (c == 'c' || !c) {
1862                         i += sprintf(b+i, s, is_numeric(arg) ?
1863                                         (char)getvar_i(arg) : *getvar_s(arg));
1864                 } else if (c == 's') {
1865                         s1 = getvar_s(arg);
1866                         qrealloc(&b, incr+i+strlen(s1), &bsize);
1867                         i += sprintf(b+i, s, s1);
1868                 } else {
1869                         i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1870                 }
1871                 *f = c1;
1872
1873                 /* if there was an error while sprintf, return value is negative */
1874                 if (i < j) i = j;
1875         }
1876
1877         b = xrealloc(b, i + 1);
1878         free(fmt);
1879         nvfree(v);
1880         b[i] = '\0';
1881         return b;
1882 }
1883
1884 /* common substitution routine
1885  * replace (nm) substring of (src) that match (n) with (repl), store
1886  * result into (dest), return number of substitutions. If nm=0, replace
1887  * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1888  * subexpression matching (\1-\9)
1889  */
1890 static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int ex)
1891 {
1892         char *ds = NULL;
1893         const char *s;
1894         const char *sp;
1895         int c, i, j, di, rl, so, eo, nbs, n, dssize;
1896         regmatch_t pmatch[10];
1897         regex_t sreg, *re;
1898
1899         re = as_regex(rn, &sreg);
1900         if (! src) src = intvar[F0];
1901         if (! dest) dest = intvar[F0];
1902
1903         i = di = 0;
1904         sp = getvar_s(src);
1905         rl = strlen(repl);
1906         while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0 : REG_NOTBOL) == 0) {
1907                 so = pmatch[0].rm_so;
1908                 eo = pmatch[0].rm_eo;
1909
1910                 qrealloc(&ds, di + eo + rl, &dssize);
1911                 memcpy(ds + di, sp, eo);
1912                 di += eo;
1913                 if (++i >= nm) {
1914                         /* replace */
1915                         di -= (eo - so);
1916                         nbs = 0;
1917                         for (s = repl; *s; s++) {
1918                                 ds[di++] = c = *s;
1919                                 if (c == '\\') {
1920                                         nbs++;
1921                                         continue;
1922                                 }
1923                                 if (c == '&' || (ex && c >= '0' && c <= '9')) {
1924                                         di -= ((nbs + 3) >> 1);
1925                                         j = 0;
1926                                         if (c != '&') {
1927                                                 j = c - '0';
1928                                                 nbs++;
1929                                         }
1930                                         if (nbs % 2) {
1931                                                 ds[di++] = c;
1932                                         } else {
1933                                                 n = pmatch[j].rm_eo - pmatch[j].rm_so;
1934                                                 qrealloc(&ds, di + rl + n, &dssize);
1935                                                 memcpy(ds + di, sp + pmatch[j].rm_so, n);
1936                                                 di += n;
1937                                         }
1938                                 }
1939                                 nbs = 0;
1940                         }
1941                 }
1942
1943                 sp += eo;
1944                 if (i == nm) break;
1945                 if (eo == so) {
1946                         if (! (ds[di++] = *sp++)) break;
1947                 }
1948         }
1949
1950         qrealloc(&ds, di + strlen(sp), &dssize);
1951         strcpy(ds + di, sp);
1952         setvar_p(dest, ds);
1953         if (re == &sreg) regfree(re);
1954         return i;
1955 }
1956
1957 static var *exec_builtin(node *op, var *res)
1958 {
1959 #define tspl (G.exec_builtin__tspl)
1960
1961         int (*to_xxx)(int);
1962         var *tv;
1963         node *an[4];
1964         var *av[4];
1965         const char *as[4];
1966         regmatch_t pmatch[2];
1967         regex_t sreg, *re;
1968         node *spl;
1969         uint32_t isr, info;
1970         int nargs;
1971         time_t tt;
1972         char *s, *s1;
1973         int i, l, ll, n;
1974
1975         tv = nvalloc(4);
1976         isr = info = op->info;
1977         op = op->l.n;
1978
1979         av[2] = av[3] = NULL;
1980         for (i = 0; i < 4 && op; i++) {
1981                 an[i] = nextarg(&op);
1982                 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
1983                 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
1984                 isr >>= 1;
1985         }
1986
1987         nargs = i;
1988         if (nargs < (info >> 30))
1989                 syntax_error(EMSG_TOO_FEW_ARGS);
1990
1991         switch (info & OPNMASK) {
1992
1993         case B_a2:
1994 #if ENABLE_FEATURE_AWK_MATH
1995                 setvar_i(res, atan2(getvar_i(av[i]), getvar_i(av[1])));
1996 #else
1997                 syntax_error(EMSG_NO_MATH);
1998 #endif
1999                 break;
2000
2001         case B_sp:
2002                 if (nargs > 2) {
2003                         spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2004                                 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2005                 } else {
2006                         spl = &fsplitter.n;
2007                 }
2008
2009                 n = awk_split(as[0], spl, &s);
2010                 s1 = s;
2011                 clear_array(iamarray(av[1]));
2012                 for (i=1; i<=n; i++)
2013                         setari_u(av[1], i, nextword(&s1));
2014                 free(s);
2015                 setvar_i(res, n);
2016                 break;
2017
2018         case B_ss:
2019                 l = strlen(as[0]);
2020                 i = getvar_i(av[1]) - 1;
2021                 if (i > l) i = l;
2022                 if (i < 0) i = 0;
2023                 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2024                 if (n < 0) n = 0;
2025                 s = xmalloc(n+1);
2026                 strncpy(s, as[0]+i, n);
2027                 s[n] = '\0';
2028                 setvar_p(res, s);
2029                 break;
2030
2031         case B_an:
2032                 setvar_i(res, (long)getvar_i(av[0]) & (long)getvar_i(av[1]));
2033                 break;
2034
2035         case B_co:
2036                 setvar_i(res, ~(long)getvar_i(av[0]));
2037                 break;
2038
2039         case B_ls:
2040                 setvar_i(res, (long)getvar_i(av[0]) << (long)getvar_i(av[1]));
2041                 break;
2042
2043         case B_or:
2044                 setvar_i(res, (long)getvar_i(av[0]) | (long)getvar_i(av[1]));
2045                 break;
2046
2047         case B_rs:
2048                 setvar_i(res, (long)((unsigned long)getvar_i(av[0]) >> (unsigned long)getvar_i(av[1])));
2049                 break;
2050
2051         case B_xo:
2052                 setvar_i(res, (long)getvar_i(av[0]) ^ (long)getvar_i(av[1]));
2053                 break;
2054
2055         case B_lo:
2056                 to_xxx = tolower;
2057                 goto lo_cont;
2058
2059         case B_up:
2060                 to_xxx = toupper;
2061  lo_cont:
2062                 s1 = s = xstrdup(as[0]);
2063                 while (*s1) {
2064                         *s1 = (*to_xxx)(*s1);
2065                         s1++;
2066                 }
2067                 setvar_p(res, s);
2068                 break;
2069
2070         case B_ix:
2071                 n = 0;
2072                 ll = strlen(as[1]);
2073                 l = strlen(as[0]) - ll;
2074                 if (ll > 0 && l >= 0) {
2075                         if (!icase) {
2076                                 s = strstr(as[0], as[1]);
2077                                 if (s) n = (s - as[0]) + 1;
2078                         } else {
2079                                 /* this piece of code is terribly slow and
2080                                  * really should be rewritten
2081                                  */
2082                                 for (i=0; i<=l; i++) {
2083                                         if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2084                                                 n = i+1;
2085                                                 break;
2086                                         }
2087                                 }
2088                         }
2089                 }
2090                 setvar_i(res, n);
2091                 break;
2092
2093         case B_ti:
2094                 if (nargs > 1)
2095                         tt = getvar_i(av[1]);
2096                 else
2097                         time(&tt);
2098                 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2099                 i = strftime(g_buf, MAXVARFMT,
2100                         ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2101                         localtime(&tt));
2102                 g_buf[i] = '\0';
2103                 setvar_s(res, g_buf);
2104                 break;
2105
2106         case B_ma:
2107                 re = as_regex(an[1], &sreg);
2108                 n = regexec(re, as[0], 1, pmatch, 0);
2109                 if (n == 0) {
2110                         pmatch[0].rm_so++;
2111                         pmatch[0].rm_eo++;
2112                 } else {
2113                         pmatch[0].rm_so = 0;
2114                         pmatch[0].rm_eo = -1;
2115                 }
2116                 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2117                 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2118                 setvar_i(res, pmatch[0].rm_so);
2119                 if (re == &sreg) regfree(re);
2120                 break;
2121
2122         case B_ge:
2123                 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2124                 break;
2125
2126         case B_gs:
2127                 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2128                 break;
2129
2130         case B_su:
2131                 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2132                 break;
2133         }
2134
2135         nvfree(tv);
2136         return res;
2137 #undef tspl
2138 }
2139
2140 /*
2141  * Evaluate node - the heart of the program. Supplied with subtree
2142  * and place where to store result. returns ptr to result.
2143  */
2144 #define XC(n) ((n) >> 8)
2145
2146 static var *evaluate(node *op, var *res)
2147 {
2148 /* This procedure is recursive so we should count every byte */
2149 #define fnargs (G.evaluate__fnargs)
2150 /* seed is initialized to 1 */
2151 #define seed   (G.evaluate__seed)
2152 #define sreg   (G.evaluate__sreg)
2153
2154         node *op1;
2155         var *v1;
2156         union {
2157                 var *v;
2158                 const char *s;
2159                 double d;
2160                 int i;
2161         } L, R;
2162         uint32_t opinfo;
2163         int opn;
2164         union {
2165                 char *s;
2166                 rstream *rsm;
2167                 FILE *F;
2168                 var *v;
2169                 regex_t *re;
2170                 uint32_t info;
2171         } X;
2172
2173         if (!op)
2174                 return setvar_s(res, NULL);
2175
2176         v1 = nvalloc(2);
2177
2178         while (op) {
2179                 opinfo = op->info;
2180                 opn = (opinfo & OPNMASK);
2181                 g_lineno = op->lineno;
2182
2183                 /* execute inevitable things */
2184                 op1 = op->l.n;
2185                 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2186                 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2187                 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2188                 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2189                 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2190
2191                 switch (XC(opinfo & OPCLSMASK)) {
2192
2193                 /* -- iterative node type -- */
2194
2195                 /* test pattern */
2196                 case XC( OC_TEST ):
2197                         if ((op1->info & OPCLSMASK) == OC_COMMA) {
2198                                 /* it's range pattern */
2199                                 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2200                                         op->info |= OF_CHECKED;
2201                                         if (ptest(op1->r.n))
2202                                                 op->info &= ~OF_CHECKED;
2203
2204                                         op = op->a.n;
2205                                 } else {
2206                                         op = op->r.n;
2207                                 }
2208                         } else {
2209                                 op = (ptest(op1)) ? op->a.n : op->r.n;
2210                         }
2211                         break;
2212
2213                 /* just evaluate an expression, also used as unconditional jump */
2214                 case XC( OC_EXEC ):
2215                         break;
2216
2217                 /* branch, used in if-else and various loops */
2218                 case XC( OC_BR ):
2219                         op = istrue(L.v) ? op->a.n : op->r.n;
2220                         break;
2221
2222                 /* initialize for-in loop */
2223                 case XC( OC_WALKINIT ):
2224                         hashwalk_init(L.v, iamarray(R.v));
2225                         break;
2226
2227                 /* get next array item */
2228                 case XC( OC_WALKNEXT ):
2229                         op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2230                         break;
2231
2232                 case XC( OC_PRINT ):
2233                 case XC( OC_PRINTF ):
2234                         X.F = stdout;
2235                         if (op->r.n) {
2236                                 X.rsm = newfile(R.s);
2237                                 if (!X.rsm->F) {
2238                                         if (opn == '|') {
2239                                                 X.rsm->F = popen(R.s, "w");
2240                                                 if (X.rsm->F == NULL)
2241                                                         bb_perror_msg_and_die("popen");
2242                                                 X.rsm->is_pipe = 1;
2243                                         } else {
2244                                                 X.rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2245                                         }
2246                                 }
2247                                 X.F = X.rsm->F;
2248                         }
2249
2250                         if ((opinfo & OPCLSMASK) == OC_PRINT) {
2251                                 if (!op1) {
2252                                         fputs(getvar_s(intvar[F0]), X.F);
2253                                 } else {
2254                                         while (op1) {
2255                                                 L.v = evaluate(nextarg(&op1), v1);
2256                                                 if (L.v->type & VF_NUMBER) {
2257                                                         fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2258                                                                         getvar_i(L.v), TRUE);
2259                                                         fputs(g_buf, X.F);
2260                                                 } else {
2261                                                         fputs(getvar_s(L.v), X.F);
2262                                                 }
2263
2264                                                 if (op1) fputs(getvar_s(intvar[OFS]), X.F);
2265                                         }
2266                                 }
2267                                 fputs(getvar_s(intvar[ORS]), X.F);
2268
2269                         } else {        /* OC_PRINTF */
2270                                 L.s = awk_printf(op1);
2271                                 fputs(L.s, X.F);
2272                                 free((char*)L.s);
2273                         }
2274                         fflush(X.F);
2275                         break;
2276
2277                 case XC( OC_DELETE ):
2278                         X.info = op1->info & OPCLSMASK;
2279                         if (X.info == OC_VAR) {
2280                                 R.v = op1->l.v;
2281                         } else if (X.info == OC_FNARG) {
2282                                 R.v = &fnargs[op1->l.i];
2283                         } else {
2284                                 syntax_error(EMSG_NOT_ARRAY);
2285                         }
2286
2287                         if (op1->r.n) {
2288                                 clrvar(L.v);
2289                                 L.s = getvar_s(evaluate(op1->r.n, v1));
2290                                 hash_remove(iamarray(R.v), L.s);
2291                         } else {
2292                                 clear_array(iamarray(R.v));
2293                         }
2294                         break;
2295
2296                 case XC( OC_NEWSOURCE ):
2297                         g_progname = op->l.s;
2298                         break;
2299
2300                 case XC( OC_RETURN ):
2301                         copyvar(res, L.v);
2302                         break;
2303
2304                 case XC( OC_NEXTFILE ):
2305                         nextfile = TRUE;
2306                 case XC( OC_NEXT ):
2307                         nextrec = TRUE;
2308                 case XC( OC_DONE ):
2309                         clrvar(res);
2310                         break;
2311
2312                 case XC( OC_EXIT ):
2313                         awk_exit(L.d);
2314
2315                 /* -- recursive node type -- */
2316
2317                 case XC( OC_VAR ):
2318                         L.v = op->l.v;
2319                         if (L.v == intvar[NF])
2320                                 split_f0();
2321                         goto v_cont;
2322
2323                 case XC( OC_FNARG ):
2324                         L.v = &fnargs[op->l.i];
2325  v_cont:
2326                         res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2327                         break;
2328
2329                 case XC( OC_IN ):
2330                         setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2331                         break;
2332
2333                 case XC( OC_REGEXP ):
2334                         op1 = op;
2335                         L.s = getvar_s(intvar[F0]);
2336                         goto re_cont;
2337
2338                 case XC( OC_MATCH ):
2339                         op1 = op->r.n;
2340  re_cont:
2341                         X.re = as_regex(op1, &sreg);
2342                         R.i = regexec(X.re, L.s, 0, NULL, 0);
2343                         if (X.re == &sreg) regfree(X.re);
2344                         setvar_i(res, (R.i == 0 ? 1 : 0) ^ (opn == '!' ? 1 : 0));
2345                         break;
2346
2347                 case XC( OC_MOVE ):
2348                         /* if source is a temporary string, jusk relink it to dest */
2349                         if (R.v == v1+1 && R.v->string) {
2350                                 res = setvar_p(L.v, R.v->string);
2351                                 R.v->string = NULL;
2352                         } else {
2353                                 res = copyvar(L.v, R.v);
2354                         }
2355                         break;
2356
2357                 case XC( OC_TERNARY ):
2358                         if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2359                                 syntax_error(EMSG_POSSIBLE_ERROR);
2360                         res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2361                         break;
2362
2363                 case XC( OC_FUNC ):
2364                         if (!op->r.f->body.first)
2365                                 syntax_error(EMSG_UNDEF_FUNC);
2366
2367                         X.v = R.v = nvalloc(op->r.f->nargs+1);
2368                         while (op1) {
2369                                 L.v = evaluate(nextarg(&op1), v1);
2370                                 copyvar(R.v, L.v);
2371                                 R.v->type |= VF_CHILD;
2372                                 R.v->x.parent = L.v;
2373                                 if (++R.v - X.v >= op->r.f->nargs)
2374                                         break;
2375                         }
2376
2377                         R.v = fnargs;
2378                         fnargs = X.v;
2379
2380                         L.s = g_progname;
2381                         res = evaluate(op->r.f->body.first, res);
2382                         g_progname = L.s;
2383
2384                         nvfree(fnargs);
2385                         fnargs = R.v;
2386                         break;
2387
2388                 case XC( OC_GETLINE ):
2389                 case XC( OC_PGETLINE ):
2390                         if (op1) {
2391                                 X.rsm = newfile(L.s);
2392                                 if (!X.rsm->F) {
2393                                         if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2394                                                 X.rsm->F = popen(L.s, "r");
2395                                                 X.rsm->is_pipe = TRUE;
2396                                         } else {
2397                                                 X.rsm->F = fopen(L.s, "r");             /* not xfopen! */
2398                                         }
2399                                 }
2400                         } else {
2401                                 if (!iF) iF = next_input_file();
2402                                 X.rsm = iF;
2403                         }
2404
2405                         if (!X.rsm->F) {
2406                                 setvar_i(intvar[ERRNO], errno);
2407                                 setvar_i(res, -1);
2408                                 break;
2409                         }
2410
2411                         if (!op->r.n)
2412                                 R.v = intvar[F0];
2413
2414                         L.i = awk_getline(X.rsm, R.v);
2415                         if (L.i > 0) {
2416                                 if (!op1) {
2417                                         incvar(intvar[FNR]);
2418                                         incvar(intvar[NR]);
2419                                 }
2420                         }
2421                         setvar_i(res, L.i);
2422                         break;
2423
2424                 /* simple builtins */
2425                 case XC( OC_FBLTIN ):
2426                         switch (opn) {
2427
2428                         case F_in:
2429                                 R.d = (int)L.d;
2430                                 break;
2431
2432                         case F_rn:
2433                                 R.d = (double)rand() / (double)RAND_MAX;
2434                                 break;
2435 #if ENABLE_FEATURE_AWK_MATH
2436                         case F_co:
2437                                 R.d = cos(L.d);
2438                                 break;
2439
2440                         case F_ex:
2441                                 R.d = exp(L.d);
2442                                 break;
2443
2444                         case F_lg:
2445                                 R.d = log(L.d);
2446                                 break;
2447
2448                         case F_si:
2449                                 R.d = sin(L.d);
2450                                 break;
2451
2452                         case F_sq:
2453                                 R.d = sqrt(L.d);
2454                                 break;
2455 #else
2456                         case F_co:
2457                         case F_ex:
2458                         case F_lg:
2459                         case F_si:
2460                         case F_sq:
2461                                 syntax_error(EMSG_NO_MATH);
2462                                 break;
2463 #endif
2464                         case F_sr:
2465                                 R.d = (double)seed;
2466                                 seed = op1 ? (unsigned)L.d : (unsigned)time(NULL);
2467                                 srand(seed);
2468                                 break;
2469
2470                         case F_ti:
2471                                 R.d = time(NULL);
2472                                 break;
2473
2474                         case F_le:
2475                                 if (!op1)
2476                                         L.s = getvar_s(intvar[F0]);
2477                                 R.d = strlen(L.s);
2478                                 break;
2479
2480                         case F_sy:
2481                                 fflush(NULL);
2482                                 R.d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2483                                                 ? (system(L.s) >> 8) : 0;
2484                                 break;
2485
2486                         case F_ff:
2487                                 if (!op1)
2488                                         fflush(stdout);
2489                                 else {
2490                                         if (L.s && *L.s) {
2491                                                 X.rsm = newfile(L.s);
2492                                                 fflush(X.rsm->F);
2493                                         } else {
2494                                                 fflush(NULL);
2495                                         }
2496                                 }
2497                                 break;
2498
2499                         case F_cl:
2500                                 X.rsm = (rstream *)hash_search(fdhash, L.s);
2501                                 if (X.rsm) {
2502                                         R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
2503                                         free(X.rsm->buffer);
2504                                         hash_remove(fdhash, L.s);
2505                                 }
2506                                 if (R.i != 0)
2507                                         setvar_i(intvar[ERRNO], errno);
2508                                 R.d = (double)R.i;
2509                                 break;
2510                         }
2511                         setvar_i(res, R.d);
2512                         break;
2513
2514                 case XC( OC_BUILTIN ):
2515                         res = exec_builtin(op, res);
2516                         break;
2517
2518                 case XC( OC_SPRINTF ):
2519                         setvar_p(res, awk_printf(op1));
2520                         break;
2521
2522                 case XC( OC_UNARY ):
2523                         X.v = R.v;
2524                         L.d = R.d = getvar_i(R.v);
2525                         switch (opn) {
2526                         case 'P':
2527                                 L.d = ++R.d;
2528                                 goto r_op_change;
2529                         case 'p':
2530                                 R.d++;
2531                                 goto r_op_change;
2532                         case 'M':
2533                                 L.d = --R.d;
2534                                 goto r_op_change;
2535                         case 'm':
2536                                 R.d--;
2537                                 goto r_op_change;
2538                         case '!':
2539                                 L.d = istrue(X.v) ? 0 : 1;
2540                                 break;
2541                         case '-':
2542                                 L.d = -R.d;
2543                                 break;
2544  r_op_change:
2545                                 setvar_i(X.v, R.d);
2546                         }
2547                         setvar_i(res, L.d);
2548                         break;
2549
2550                 case XC( OC_FIELD ):
2551                         R.i = (int)getvar_i(R.v);
2552                         if (R.i == 0) {
2553                                 res = intvar[F0];
2554                         } else {
2555                                 split_f0();
2556                                 if (R.i > nfields)
2557                                         fsrealloc(R.i);
2558                                 res = &Fields[R.i - 1];
2559                         }
2560                         break;
2561
2562                 /* concatenation (" ") and index joining (",") */
2563                 case XC( OC_CONCAT ):
2564                 case XC( OC_COMMA ):
2565                         opn = strlen(L.s) + strlen(R.s) + 2;
2566                         X.s = xmalloc(opn);
2567                         strcpy(X.s, L.s);
2568                         if ((opinfo & OPCLSMASK) == OC_COMMA) {
2569                                 L.s = getvar_s(intvar[SUBSEP]);
2570                                 X.s = xrealloc(X.s, opn + strlen(L.s));
2571                                 strcat(X.s, L.s);
2572                         }
2573                         strcat(X.s, R.s);
2574                         setvar_p(res, X.s);
2575                         break;
2576
2577                 case XC( OC_LAND ):
2578                         setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2579                         break;
2580
2581                 case XC( OC_LOR ):
2582                         setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2583                         break;
2584
2585                 case XC( OC_BINARY ):
2586                 case XC( OC_REPLACE ):
2587                         R.d = getvar_i(R.v);
2588                         switch (opn) {
2589                         case '+':
2590                                 L.d += R.d;
2591                                 break;
2592                         case '-':
2593                                 L.d -= R.d;
2594                                 break;
2595                         case '*':
2596                                 L.d *= R.d;
2597                                 break;
2598                         case '/':
2599                                 if (R.d == 0) syntax_error(EMSG_DIV_BY_ZERO);
2600                                 L.d /= R.d;
2601                                 break;
2602                         case '&':
2603 #if ENABLE_FEATURE_AWK_MATH
2604                                 L.d = pow(L.d, R.d);
2605 #else
2606                                 syntax_error(EMSG_NO_MATH);
2607 #endif
2608                                 break;
2609                         case '%':
2610                                 if (R.d == 0) syntax_error(EMSG_DIV_BY_ZERO);
2611                                 L.d -= (int)(L.d / R.d) * R.d;
2612                                 break;
2613                         }
2614                         res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2615                         break;
2616
2617                 case XC( OC_COMPARE ):
2618                         if (is_numeric(L.v) && is_numeric(R.v)) {
2619                                 L.d = getvar_i(L.v) - getvar_i(R.v);
2620                         } else {
2621                                 L.s = getvar_s(L.v);
2622                                 R.s = getvar_s(R.v);
2623                                 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2624                         }
2625                         switch (opn & 0xfe) {
2626                         case 0:
2627                                 R.i = (L.d > 0);
2628                                 break;
2629                         case 2:
2630                                 R.i = (L.d >= 0);
2631                                 break;
2632                         case 4:
2633                                 R.i = (L.d == 0);
2634                                 break;
2635                         }
2636                         setvar_i(res, (opn & 0x1 ? R.i : !R.i) ? 1 : 0);
2637                         break;
2638
2639                 default:
2640                         syntax_error(EMSG_POSSIBLE_ERROR);
2641                 }
2642                 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2643                         op = op->a.n;
2644                 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2645                         break;
2646                 if (nextrec)
2647                         break;
2648         }
2649         nvfree(v1);
2650         return res;
2651 #undef fnargs
2652 #undef seed
2653 #undef sreg
2654 }
2655
2656
2657 /* -------- main & co. -------- */
2658
2659 static int awk_exit(int r)
2660 {
2661         var tv;
2662         unsigned i;
2663         hash_item *hi;
2664
2665         zero_out_var(&tv);
2666
2667         if (!exiting) {
2668                 exiting = TRUE;
2669                 nextrec = FALSE;
2670                 evaluate(endseq.first, &tv);
2671         }
2672
2673         /* waiting for children */
2674         for (i = 0; i < fdhash->csize; i++) {
2675                 hi = fdhash->items[i];
2676                 while (hi) {
2677                         if (hi->data.rs.F && hi->data.rs.is_pipe)
2678                                 pclose(hi->data.rs.F);
2679                         hi = hi->next;
2680                 }
2681         }
2682
2683         exit(r);
2684 }
2685
2686 /* if expr looks like "var=value", perform assignment and return 1,
2687  * otherwise return 0 */
2688 static int is_assignment(const char *expr)
2689 {
2690         char *exprc, *s, *s0, *s1;
2691
2692         exprc = xstrdup(expr);
2693         if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2694                 free(exprc);
2695                 return FALSE;
2696         }
2697
2698         *(s++) = '\0';
2699         s0 = s1 = s;
2700         while (*s)
2701                 *(s1++) = nextchar(&s);
2702
2703         *s1 = '\0';
2704         setvar_u(newvar(exprc), s0);
2705         free(exprc);
2706         return TRUE;
2707 }
2708
2709 /* switch to next input file */
2710 static rstream *next_input_file(void)
2711 {
2712 #define rsm          (G.next_input_file__rsm)
2713 #define files_happen (G.next_input_file__files_happen)
2714
2715         FILE *F = NULL;
2716         const char *fname, *ind;
2717
2718         if (rsm.F) fclose(rsm.F);
2719         rsm.F = NULL;
2720         rsm.pos = rsm.adv = 0;
2721
2722         do {
2723                 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
2724                         if (files_happen)
2725                                 return NULL;
2726                         fname = "-";
2727                         F = stdin;
2728                 } else {
2729                         ind = getvar_s(incvar(intvar[ARGIND]));
2730                         fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
2731                         if (fname && *fname && !is_assignment(fname))
2732                                 F = afopen(fname, "r");
2733                 }
2734         } while (!F);
2735
2736         files_happen = TRUE;
2737         setvar_s(intvar[FILENAME], fname);
2738         rsm.F = F;
2739         return &rsm;
2740 #undef rsm
2741 #undef files_happen
2742 }
2743
2744 int awk_main(int argc, char **argv);
2745 int awk_main(int argc, char **argv)
2746 {
2747         unsigned opt;
2748         char *opt_F, *opt_W;
2749         llist_t *opt_v = NULL;
2750         int i, j, flen;
2751         var *v;
2752         var tv;
2753         char **envp;
2754         char *vnames = (char *)vNames; /* cheat */
2755         char *vvalues = (char *)vValues;
2756
2757         INIT_G();
2758
2759         /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
2760          * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
2761         if (ENABLE_LOCALE_SUPPORT)
2762                 setlocale(LC_NUMERIC, "C");
2763
2764         zero_out_var(&tv);
2765
2766         /* allocate global buffer */
2767         g_buf = xmalloc(MAXVARFMT + 1);
2768
2769         vhash = hash_init();
2770         ahash = hash_init();
2771         fdhash = hash_init();
2772         fnhash = hash_init();
2773
2774         /* initialize variables */
2775         for (i = 0; *vnames; i++) {
2776                 intvar[i] = v = newvar(nextword(&vnames));
2777                 if (*vvalues != '\377')
2778                         setvar_s(v, nextword(&vvalues));
2779                 else
2780                         setvar_i(v, 0);
2781
2782                 if (*vnames == '*') {
2783                         v->type |= VF_SPECIAL;
2784                         vnames++;
2785                 }
2786         }
2787
2788         handle_special(intvar[FS]);
2789         handle_special(intvar[RS]);
2790
2791         newfile("/dev/stdin")->F = stdin;
2792         newfile("/dev/stdout")->F = stdout;
2793         newfile("/dev/stderr")->F = stderr;
2794
2795         /* Huh, people report that sometimes environ is NULL. Oh well. */
2796         if (environ) for (envp = environ; *envp; envp++) {
2797                 char *s = xstrdup(*envp);
2798                 char *s1 = strchr(s, '=');
2799                 if (s1) {
2800                         *s1++ = '\0';
2801                         setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1);
2802                 }
2803                 free(s);
2804         }
2805         opt_complementary = "v::";
2806         opt = getopt32(argc, argv, "F:v:f:W:", &opt_F, &opt_v, &g_progname, &opt_W);
2807         argv += optind;
2808         argc -= optind;
2809         if (opt & 0x1)
2810                 setvar_s(intvar[FS], opt_F); // -F
2811         while (opt_v) { /* -v */
2812                 if (!is_assignment(llist_pop(&opt_v)))
2813                         bb_show_usage();
2814         }
2815         if (opt & 0x4) { // -f
2816                 char *s = s; /* die, gcc, die */
2817                 FILE *from_file = afopen(g_progname, "r");
2818                 /* one byte is reserved for some trick in next_token */
2819                 if (fseek(from_file, 0, SEEK_END) == 0) {
2820                         flen = ftell(from_file);
2821                         s = xmalloc(flen + 4);
2822                         fseek(from_file, 0, SEEK_SET);
2823                         i = 1 + fread(s + 1, 1, flen, from_file);
2824                 } else {
2825                         for (i = j = 1; j > 0; i += j) {
2826                                 s = xrealloc(s, i + 4096);
2827                                 j = fread(s + i, 1, 4094, from_file);
2828                         }
2829                 }
2830                 s[i] = '\0';
2831                 fclose(from_file);
2832                 parse_program(s + 1);
2833                 free(s);
2834         } else { // no -f: take program from 1st parameter
2835                 if (!argc)
2836                         bb_show_usage();
2837                 g_progname = "cmd. line";
2838                 parse_program(*argv++);
2839                 argc--;
2840         }
2841         if (opt & 0x8) // -W
2842                 bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
2843
2844         /* fill in ARGV array */
2845         setvar_i(intvar[ARGC], argc + 1);
2846         setari_u(intvar[ARGV], 0, "awk");
2847         i = 0;
2848         while (*argv)
2849                 setari_u(intvar[ARGV], ++i, *argv++);
2850
2851         evaluate(beginseq.first, &tv);
2852         if (!mainseq.first && !endseq.first)
2853                 awk_exit(EXIT_SUCCESS);
2854
2855         /* input file could already be opened in BEGIN block */
2856         if (!iF) iF = next_input_file();
2857
2858         /* passing through input files */
2859         while (iF) {
2860                 nextfile = FALSE;
2861                 setvar_i(intvar[FNR], 0);
2862
2863                 while ((i = awk_getline(iF, intvar[F0])) > 0) {
2864                         nextrec = FALSE;
2865                         incvar(intvar[NR]);
2866                         incvar(intvar[FNR]);
2867                         evaluate(mainseq.first, &tv);
2868
2869                         if (nextfile)
2870                                 break;
2871                 }
2872
2873                 if (i < 0)
2874                         syntax_error(strerror(errno));
2875
2876                 iF = next_input_file();
2877         }
2878
2879         awk_exit(EXIT_SUCCESS);
2880         /*return 0;*/
2881 }