awk: another smallish code shrink
[oweals/busybox.git] / editors / awk.c
1 /* vi: set sw=4 ts=4: */
2 /*
3  * awk implementation for busybox
4  *
5  * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
6  *
7  * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
8  */
9
10 #include "libbb.h"
11 #include "xregex.h"
12 #include <math.h>
13
14 /* This is a NOEXEC applet. Be very careful! */
15
16
17 #define MAXVARFMT       240
18 #define MINNVBLOCK      64
19
20 /* variable flags */
21 #define VF_NUMBER       0x0001  /* 1 = primary type is number */
22 #define VF_ARRAY        0x0002  /* 1 = it's an array */
23
24 #define VF_CACHED       0x0100  /* 1 = num/str value has cached str/num eq */
25 #define VF_USER         0x0200  /* 1 = user input (may be numeric string) */
26 #define VF_SPECIAL      0x0400  /* 1 = requires extra handling when changed */
27 #define VF_WALK         0x0800  /* 1 = variable has alloc'd x.walker list */
28 #define VF_FSTR         0x1000  /* 1 = var::string points to fstring buffer */
29 #define VF_CHILD        0x2000  /* 1 = function arg; x.parent points to source */
30 #define VF_DIRTY        0x4000  /* 1 = variable was set explicitly */
31
32 /* these flags are static, don't change them when value is changed */
33 #define VF_DONTTOUCH    (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
34
35 /* Variable */
36 typedef struct var_s {
37         unsigned type;            /* flags */
38         double number;
39         char *string;
40         union {
41                 int aidx;               /* func arg idx (for compilation stage) */
42                 struct xhash_s *array;  /* array ptr */
43                 struct var_s *parent;   /* for func args, ptr to actual parameter */
44                 char **walker;          /* list of array elements (for..in) */
45         } x;
46 } var;
47
48 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
49 typedef struct chain_s {
50         struct node_s *first;
51         struct node_s *last;
52         const char *programname;
53 } chain;
54
55 /* Function */
56 typedef struct func_s {
57         unsigned nargs;
58         struct chain_s body;
59 } func;
60
61 /* I/O stream */
62 typedef struct rstream_s {
63         FILE *F;
64         char *buffer;
65         int adv;
66         int size;
67         int pos;
68         smallint is_pipe;
69 } rstream;
70
71 typedef struct hash_item_s {
72         union {
73                 struct var_s v;         /* variable/array hash */
74                 struct rstream_s rs;    /* redirect streams hash */
75                 struct func_s f;        /* functions hash */
76         } data;
77         struct hash_item_s *next;       /* next in chain */
78         char name[1];                   /* really it's longer */
79 } hash_item;
80
81 typedef struct xhash_s {
82         unsigned nel;           /* num of elements */
83         unsigned csize;         /* current hash size */
84         unsigned nprime;        /* next hash size in PRIMES[] */
85         unsigned glen;          /* summary length of item names */
86         struct hash_item_s **items;
87 } xhash;
88
89 /* Tree node */
90 typedef struct node_s {
91         uint32_t info;
92         unsigned lineno;
93         union {
94                 struct node_s *n;
95                 var *v;
96                 int i;
97                 char *s;
98                 regex_t *re;
99         } l;
100         union {
101                 struct node_s *n;
102                 regex_t *ire;
103                 func *f;
104                 int argno;
105         } r;
106         union {
107                 struct node_s *n;
108         } a;
109 } node;
110
111 /* Block of temporary variables */
112 typedef struct nvblock_s {
113         int size;
114         var *pos;
115         struct nvblock_s *prev;
116         struct nvblock_s *next;
117         var nv[0];
118 } nvblock;
119
120 typedef struct tsplitter_s {
121         node n;
122         regex_t re[2];
123 } tsplitter;
124
125 /* simple token classes */
126 /* Order and hex values are very important!!!  See next_token() */
127 #define TC_SEQSTART      1                              /* ( */
128 #define TC_SEQTERM      (1 << 1)                /* ) */
129 #define TC_REGEXP       (1 << 2)                /* /.../ */
130 #define TC_OUTRDR       (1 << 3)                /* | > >> */
131 #define TC_UOPPOST      (1 << 4)                /* unary postfix operator */
132 #define TC_UOPPRE1      (1 << 5)                /* unary prefix operator */
133 #define TC_BINOPX       (1 << 6)                /* two-opnd operator */
134 #define TC_IN           (1 << 7)
135 #define TC_COMMA        (1 << 8)
136 #define TC_PIPE         (1 << 9)                /* input redirection pipe */
137 #define TC_UOPPRE2      (1 << 10)               /* unary prefix operator */
138 #define TC_ARRTERM      (1 << 11)               /* ] */
139 #define TC_GRPSTART     (1 << 12)               /* { */
140 #define TC_GRPTERM      (1 << 13)               /* } */
141 #define TC_SEMICOL      (1 << 14)
142 #define TC_NEWLINE      (1 << 15)
143 #define TC_STATX        (1 << 16)               /* ctl statement (for, next...) */
144 #define TC_WHILE        (1 << 17)
145 #define TC_ELSE         (1 << 18)
146 #define TC_BUILTIN      (1 << 19)
147 #define TC_GETLINE      (1 << 20)
148 #define TC_FUNCDECL     (1 << 21)               /* `function' `func' */
149 #define TC_BEGIN        (1 << 22)
150 #define TC_END          (1 << 23)
151 #define TC_EOF          (1 << 24)
152 #define TC_VARIABLE     (1 << 25)
153 #define TC_ARRAY        (1 << 26)
154 #define TC_FUNCTION     (1 << 27)
155 #define TC_STRING       (1 << 28)
156 #define TC_NUMBER       (1 << 29)
157
158 #define TC_UOPPRE  (TC_UOPPRE1 | TC_UOPPRE2)
159
160 /* combined token classes */
161 #define TC_BINOP   (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
162 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
163 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
164                    | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
165
166 #define TC_STATEMNT (TC_STATX | TC_WHILE)
167 #define TC_OPTERM  (TC_SEMICOL | TC_NEWLINE)
168
169 /* word tokens, cannot mean something else if not expected */
170 #define TC_WORD    (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
171                    | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
172
173 /* discard newlines after these */
174 #define TC_NOTERM  (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
175                    | TC_BINOP | TC_OPTERM)
176
177 /* what can expression begin with */
178 #define TC_OPSEQ   (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
179 /* what can group begin with */
180 #define TC_GRPSEQ  (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
181
182 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
183 /* operator is inserted between them */
184 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
185                    | TC_STRING | TC_NUMBER | TC_UOPPOST)
186 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
187
188 #define OF_RES1    0x010000
189 #define OF_RES2    0x020000
190 #define OF_STR1    0x040000
191 #define OF_STR2    0x080000
192 #define OF_NUM1    0x100000
193 #define OF_CHECKED 0x200000
194
195 /* combined operator flags */
196 #define xx      0
197 #define xV      OF_RES2
198 #define xS      (OF_RES2 | OF_STR2)
199 #define Vx      OF_RES1
200 #define VV      (OF_RES1 | OF_RES2)
201 #define Nx      (OF_RES1 | OF_NUM1)
202 #define NV      (OF_RES1 | OF_NUM1 | OF_RES2)
203 #define Sx      (OF_RES1 | OF_STR1)
204 #define SV      (OF_RES1 | OF_STR1 | OF_RES2)
205 #define SS      (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
206
207 #define OPCLSMASK 0xFF00
208 #define OPNMASK   0x007F
209
210 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
211  * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
212  * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
213  */
214 #define P(x)      (x << 24)
215 #define PRIMASK   0x7F000000
216 #define PRIMASK2  0x7E000000
217
218 /* Operation classes */
219
220 #define SHIFT_TIL_THIS  0x0600
221 #define RECUR_FROM_THIS 0x1000
222
223 enum {
224         OC_DELETE = 0x0100,     OC_EXEC = 0x0200,       OC_NEWSOURCE = 0x0300,
225         OC_PRINT = 0x0400,      OC_PRINTF = 0x0500,     OC_WALKINIT = 0x0600,
226
227         OC_BR = 0x0700,         OC_BREAK = 0x0800,      OC_CONTINUE = 0x0900,
228         OC_EXIT = 0x0a00,       OC_NEXT = 0x0b00,       OC_NEXTFILE = 0x0c00,
229         OC_TEST = 0x0d00,       OC_WALKNEXT = 0x0e00,
230
231         OC_BINARY = 0x1000,     OC_BUILTIN = 0x1100,    OC_COLON = 0x1200,
232         OC_COMMA = 0x1300,      OC_COMPARE = 0x1400,    OC_CONCAT = 0x1500,
233         OC_FBLTIN = 0x1600,     OC_FIELD = 0x1700,      OC_FNARG = 0x1800,
234         OC_FUNC = 0x1900,       OC_GETLINE = 0x1a00,    OC_IN = 0x1b00,
235         OC_LAND = 0x1c00,       OC_LOR = 0x1d00,        OC_MATCH = 0x1e00,
236         OC_MOVE = 0x1f00,       OC_PGETLINE = 0x2000,   OC_REGEXP = 0x2100,
237         OC_REPLACE = 0x2200,    OC_RETURN = 0x2300,     OC_SPRINTF = 0x2400,
238         OC_TERNARY = 0x2500,    OC_UNARY = 0x2600,      OC_VAR = 0x2700,
239         OC_DONE = 0x2800,
240
241         ST_IF = 0x3000,         ST_DO = 0x3100,         ST_FOR = 0x3200,
242         ST_WHILE = 0x3300
243 };
244
245 /* simple builtins */
246 enum {
247         F_in,   F_rn,   F_co,   F_ex,   F_lg,   F_si,   F_sq,   F_sr,
248         F_ti,   F_le,   F_sy,   F_ff,   F_cl
249 };
250
251 /* builtins */
252 enum {
253         B_a2,   B_ix,   B_ma,   B_sp,   B_ss,   B_ti,   B_lo,   B_up,
254         B_ge,   B_gs,   B_su,
255         B_an,   B_co,   B_ls,   B_or,   B_rs,   B_xo,
256 };
257
258 /* tokens and their corresponding info values */
259
260 #define NTC     "\377"  /* switch to next token class (tc<<1) */
261 #define NTCC    '\377'
262
263 #define OC_B    OC_BUILTIN
264
265 static const char tokenlist[] ALIGN1 =
266         "\1("       NTC
267         "\1)"       NTC
268         "\1/"       NTC                                 /* REGEXP */
269         "\2>>"      "\1>"       "\1|"       NTC         /* OUTRDR */
270         "\2++"      "\2--"      NTC                     /* UOPPOST */
271         "\2++"      "\2--"      "\1$"       NTC         /* UOPPRE1 */
272         "\2=="      "\1="       "\2+="      "\2-="      /* BINOPX */
273         "\2*="      "\2/="      "\2%="      "\2^="
274         "\1+"       "\1-"       "\3**="     "\2**"
275         "\1/"       "\1%"       "\1^"       "\1*"
276         "\2!="      "\2>="      "\2<="      "\1>"
277         "\1<"       "\2!~"      "\1~"       "\2&&"
278         "\2||"      "\1?"       "\1:"       NTC
279         "\2in"      NTC
280         "\1,"       NTC
281         "\1|"       NTC
282         "\1+"       "\1-"       "\1!"       NTC         /* UOPPRE2 */
283         "\1]"       NTC
284         "\1{"       NTC
285         "\1}"       NTC
286         "\1;"       NTC
287         "\1\n"      NTC
288         "\2if"      "\2do"      "\3for"     "\5break"   /* STATX */
289         "\10continue"           "\6delete"  "\5print"
290         "\6printf"  "\4next"    "\10nextfile"
291         "\6return"  "\4exit"    NTC
292         "\5while"   NTC
293         "\4else"    NTC
294
295         "\3and"     "\5compl"   "\6lshift"  "\2or"
296         "\6rshift"  "\3xor"
297         "\5close"   "\6system"  "\6fflush"  "\5atan2"   /* BUILTIN */
298         "\3cos"     "\3exp"     "\3int"     "\3log"
299         "\4rand"    "\3sin"     "\4sqrt"    "\5srand"
300         "\6gensub"  "\4gsub"    "\5index"   "\6length"
301         "\5match"   "\5split"   "\7sprintf" "\3sub"
302         "\6substr"  "\7systime" "\10strftime"
303         "\7tolower" "\7toupper" NTC
304         "\7getline" NTC
305         "\4func"    "\10function"   NTC
306         "\5BEGIN"   NTC
307         "\3END"     "\0"
308         ;
309
310 static const uint32_t tokeninfo[] = {
311         0,
312         0,
313         OC_REGEXP,
314         xS|'a',     xS|'w',     xS|'|',
315         OC_UNARY|xV|P(9)|'p',       OC_UNARY|xV|P(9)|'m',
316         OC_UNARY|xV|P(9)|'P',       OC_UNARY|xV|P(9)|'M',
317             OC_FIELD|xV|P(5),
318         OC_COMPARE|VV|P(39)|5,      OC_MOVE|VV|P(74),
319             OC_REPLACE|NV|P(74)|'+',    OC_REPLACE|NV|P(74)|'-',
320         OC_REPLACE|NV|P(74)|'*',    OC_REPLACE|NV|P(74)|'/',
321             OC_REPLACE|NV|P(74)|'%',    OC_REPLACE|NV|P(74)|'&',
322         OC_BINARY|NV|P(29)|'+',     OC_BINARY|NV|P(29)|'-',
323             OC_REPLACE|NV|P(74)|'&',    OC_BINARY|NV|P(15)|'&',
324         OC_BINARY|NV|P(25)|'/',     OC_BINARY|NV|P(25)|'%',
325             OC_BINARY|NV|P(15)|'&',     OC_BINARY|NV|P(25)|'*',
326         OC_COMPARE|VV|P(39)|4,      OC_COMPARE|VV|P(39)|3,
327             OC_COMPARE|VV|P(39)|0,      OC_COMPARE|VV|P(39)|1,
328         OC_COMPARE|VV|P(39)|2,      OC_MATCH|Sx|P(45)|'!',
329             OC_MATCH|Sx|P(45)|'~',      OC_LAND|Vx|P(55),
330         OC_LOR|Vx|P(59),            OC_TERNARY|Vx|P(64)|'?',
331             OC_COLON|xx|P(67)|':',
332         OC_IN|SV|P(49),
333         OC_COMMA|SS|P(80),
334         OC_PGETLINE|SV|P(37),
335         OC_UNARY|xV|P(19)|'+',      OC_UNARY|xV|P(19)|'-',
336             OC_UNARY|xV|P(19)|'!',
337         0,
338         0,
339         0,
340         0,
341         0,
342         ST_IF,          ST_DO,          ST_FOR,         OC_BREAK,
343         OC_CONTINUE,                    OC_DELETE|Vx,   OC_PRINT,
344         OC_PRINTF,      OC_NEXT,        OC_NEXTFILE,
345         OC_RETURN|Vx,   OC_EXIT|Nx,
346         ST_WHILE,
347         0,
348
349         OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
350         OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
351         OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
352         OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
353         OC_FBLTIN|F_rn,    OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
354         OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
355         OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF,        OC_B|B_su|P(0xb6),
356         OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti,    OC_B|B_ti|P(0x0b),
357         OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
358         OC_GETLINE|SV|P(0),
359         0,      0,
360         0,
361         0
362 };
363
364 /* internal variable names and their initial values       */
365 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
366 enum {
367         CONVFMT,    OFMT,       FS,         OFS,
368         ORS,        RS,         RT,         FILENAME,
369         SUBSEP,     ARGIND,     ARGC,       ARGV,
370         ERRNO,      FNR,
371         NR,         NF,         IGNORECASE,
372         ENVIRON,    F0,         NUM_INTERNAL_VARS
373 };
374
375 static const char vNames[] ALIGN1 =
376         "CONVFMT\0" "OFMT\0"    "FS\0*"     "OFS\0"
377         "ORS\0"     "RS\0*"     "RT\0"      "FILENAME\0"
378         "SUBSEP\0"  "ARGIND\0"  "ARGC\0"    "ARGV\0"
379         "ERRNO\0"   "FNR\0"
380         "NR\0"      "NF\0*"     "IGNORECASE\0*"
381         "ENVIRON\0" "$\0*"      "\0";
382
383 static const char vValues[] ALIGN1 =
384         "%.6g\0"    "%.6g\0"    " \0"       " \0"
385         "\n\0"      "\n\0"      "\0"        "\0"
386         "\034\0"
387         "\377";
388
389 /* hash size may grow to these values */
390 #define FIRST_PRIME 61
391 static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
392
393
394 /* Globals. Split in two parts so that first one is addressed
395  * with (mostly short) negative offsets */
396 struct globals {
397         chain beginseq, mainseq, endseq;
398         chain *seq;
399         node *break_ptr, *continue_ptr;
400         rstream *iF;
401         xhash *vhash, *ahash, *fdhash, *fnhash;
402         const char *g_progname;
403         int g_lineno;
404         int nfields;
405         int maxfields; /* used in fsrealloc() only */
406         var *Fields;
407         nvblock *g_cb;
408         char *g_pos;
409         char *g_buf;
410         smallint icase;
411         smallint exiting;
412         smallint nextrec;
413         smallint nextfile;
414         smallint is_f0_split;
415 };
416 struct globals2 {
417         uint32_t t_info; /* often used */
418         uint32_t t_tclass;
419         char *t_string;
420         int t_lineno;
421         int t_rollback;
422
423         var *intvar[NUM_INTERNAL_VARS]; /* often used */
424
425         /* former statics from various functions */
426         char *split_f0__fstrings;
427
428         uint32_t next_token__save_tclass;
429         uint32_t next_token__save_info;
430         uint32_t next_token__ltclass;
431         smallint next_token__concat_inserted;
432
433         smallint next_input_file__files_happen;
434         rstream next_input_file__rsm;
435
436         var *evaluate__fnargs;
437         unsigned evaluate__seed;
438         regex_t evaluate__sreg;
439
440         var ptest__v;
441
442         tsplitter exec_builtin__tspl;
443
444         /* biggest and least used members go last */
445         double t_double;
446         tsplitter fsplitter, rsplitter;
447 };
448 #define G1 (ptr_to_globals[-1])
449 #define G (*(struct globals2 *)ptr_to_globals)
450 /* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
451 /* char G1size[sizeof(G1)]; - 0x6c */
452 /* char Gsize[sizeof(G)]; - 0x1cc */
453 /* Trying to keep most of members accessible with short offsets: */
454 /* char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
455 #define beginseq     (G1.beginseq    )
456 #define mainseq      (G1.mainseq     )
457 #define endseq       (G1.endseq      )
458 #define seq          (G1.seq         )
459 #define break_ptr    (G1.break_ptr   )
460 #define continue_ptr (G1.continue_ptr)
461 #define iF           (G1.iF          )
462 #define vhash        (G1.vhash       )
463 #define ahash        (G1.ahash       )
464 #define fdhash       (G1.fdhash      )
465 #define fnhash       (G1.fnhash      )
466 #define g_progname   (G1.g_progname  )
467 #define g_lineno     (G1.g_lineno    )
468 #define nfields      (G1.nfields     )
469 #define maxfields    (G1.maxfields   )
470 #define Fields       (G1.Fields      )
471 #define g_cb         (G1.g_cb        )
472 #define g_pos        (G1.g_pos       )
473 #define g_buf        (G1.g_buf       )
474 #define icase        (G1.icase       )
475 #define exiting      (G1.exiting     )
476 #define nextrec      (G1.nextrec     )
477 #define nextfile     (G1.nextfile    )
478 #define is_f0_split  (G1.is_f0_split )
479 #define t_info       (G.t_info      )
480 #define t_tclass     (G.t_tclass    )
481 #define t_string     (G.t_string    )
482 #define t_double     (G.t_double    )
483 #define t_lineno     (G.t_lineno    )
484 #define t_rollback   (G.t_rollback  )
485 #define intvar       (G.intvar      )
486 #define fsplitter    (G.fsplitter   )
487 #define rsplitter    (G.rsplitter   )
488 #define INIT_G() do { \
489         SET_PTR_TO_GLOBALS(xzalloc(sizeof(G1) + sizeof(G)) + sizeof(G1)); \
490         G.next_token__ltclass = TC_OPTERM; \
491         G.evaluate__seed = 1; \
492 } while (0)
493
494
495 /* function prototypes */
496 static void handle_special(var *);
497 static node *parse_expr(uint32_t);
498 static void chain_group(void);
499 static var *evaluate(node *, var *);
500 static rstream *next_input_file(void);
501 static int fmt_num(char *, int, const char *, double, int);
502 static int awk_exit(int) ATTRIBUTE_NORETURN;
503
504 /* ---- error handling ---- */
505
506 static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
507 static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
508 static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
509 static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
510 static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
511 static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin";
512 static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
513 static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
514 static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
515 #if !ENABLE_FEATURE_AWK_MATH
516 static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
517 #endif
518
519 static void zero_out_var(var * vp)
520 {
521         memset(vp, 0, sizeof(*vp));
522 }
523
524 static void syntax_error(const char *const message) ATTRIBUTE_NORETURN;
525 static void syntax_error(const char *const message)
526 {
527         bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
528 }
529
530 /* ---- hash stuff ---- */
531
532 static unsigned hashidx(const char *name)
533 {
534         unsigned idx = 0;
535
536         while (*name) idx = *name++ + (idx << 6) - idx;
537         return idx;
538 }
539
540 /* create new hash */
541 static xhash *hash_init(void)
542 {
543         xhash *newhash;
544
545         newhash = xzalloc(sizeof(xhash));
546         newhash->csize = FIRST_PRIME;
547         newhash->items = xzalloc(newhash->csize * sizeof(hash_item *));
548
549         return newhash;
550 }
551
552 /* find item in hash, return ptr to data, NULL if not found */
553 static void *hash_search(xhash *hash, const char *name)
554 {
555         hash_item *hi;
556
557         hi = hash->items [ hashidx(name) % hash->csize ];
558         while (hi) {
559                 if (strcmp(hi->name, name) == 0)
560                         return &(hi->data);
561                 hi = hi->next;
562         }
563         return NULL;
564 }
565
566 /* grow hash if it becomes too big */
567 static void hash_rebuild(xhash *hash)
568 {
569         unsigned newsize, i, idx;
570         hash_item **newitems, *hi, *thi;
571
572         if (hash->nprime == ARRAY_SIZE(PRIMES))
573                 return;
574
575         newsize = PRIMES[hash->nprime++];
576         newitems = xzalloc(newsize * sizeof(hash_item *));
577
578         for (i = 0; i < hash->csize; i++) {
579                 hi = hash->items[i];
580                 while (hi) {
581                         thi = hi;
582                         hi = thi->next;
583                         idx = hashidx(thi->name) % newsize;
584                         thi->next = newitems[idx];
585                         newitems[idx] = thi;
586                 }
587         }
588
589         free(hash->items);
590         hash->csize = newsize;
591         hash->items = newitems;
592 }
593
594 /* find item in hash, add it if necessary. Return ptr to data */
595 static void *hash_find(xhash *hash, const char *name)
596 {
597         hash_item *hi;
598         unsigned idx;
599         int l;
600
601         hi = hash_search(hash, name);
602         if (!hi) {
603                 if (++hash->nel / hash->csize > 10)
604                         hash_rebuild(hash);
605
606                 l = strlen(name) + 1;
607                 hi = xzalloc(sizeof(hash_item) + l);
608                 memcpy(hi->name, name, l);
609
610                 idx = hashidx(name) % hash->csize;
611                 hi->next = hash->items[idx];
612                 hash->items[idx] = hi;
613                 hash->glen += l;
614         }
615         return &(hi->data);
616 }
617
618 #define findvar(hash, name) ((var*)    hash_find((hash), (name)))
619 #define newvar(name)        ((var*)    hash_find(vhash, (name)))
620 #define newfile(name)       ((rstream*)hash_find(fdhash, (name)))
621 #define newfunc(name)       ((func*)   hash_find(fnhash, (name)))
622
623 static void hash_remove(xhash *hash, const char *name)
624 {
625         hash_item *hi, **phi;
626
627         phi = &(hash->items[hashidx(name) % hash->csize]);
628         while (*phi) {
629                 hi = *phi;
630                 if (strcmp(hi->name, name) == 0) {
631                         hash->glen -= (strlen(name) + 1);
632                         hash->nel--;
633                         *phi = hi->next;
634                         free(hi);
635                         break;
636                 }
637                 phi = &(hi->next);
638         }
639 }
640
641 /* ------ some useful functions ------ */
642
643 static void skip_spaces(char **s)
644 {
645         char *p = *s;
646
647         while (1) {
648                 if (*p == '\\' && p[1] == '\n') {
649                         p++;
650                         t_lineno++;
651                 } else if (*p != ' ' && *p != '\t') {
652                         break;
653                 }
654                 p++;
655         }
656         *s = p;
657 }
658
659 static char *nextword(char **s)
660 {
661         char *p = *s;
662
663         while (*(*s)++) /* */;
664
665         return p;
666 }
667
668 static char nextchar(char **s)
669 {
670         char c, *pps;
671
672         c = *((*s)++);
673         pps = *s;
674         if (c == '\\') c = bb_process_escape_sequence((const char**)s);
675         if (c == '\\' && *s == pps) c = *((*s)++);
676         return c;
677 }
678
679 static ALWAYS_INLINE int isalnum_(int c)
680 {
681         return (isalnum(c) || c == '_');
682 }
683
684 /* -------- working with variables (set/get/copy/etc) -------- */
685
686 static xhash *iamarray(var *v)
687 {
688         var *a = v;
689
690         while (a->type & VF_CHILD)
691                 a = a->x.parent;
692
693         if (!(a->type & VF_ARRAY)) {
694                 a->type |= VF_ARRAY;
695                 a->x.array = hash_init();
696         }
697         return a->x.array;
698 }
699
700 static void clear_array(xhash *array)
701 {
702         unsigned i;
703         hash_item *hi, *thi;
704
705         for (i = 0; i < array->csize; i++) {
706                 hi = array->items[i];
707                 while (hi) {
708                         thi = hi;
709                         hi = hi->next;
710                         free(thi->data.v.string);
711                         free(thi);
712                 }
713                 array->items[i] = NULL;
714         }
715         array->glen = array->nel = 0;
716 }
717
718 /* clear a variable */
719 static var *clrvar(var *v)
720 {
721         if (!(v->type & VF_FSTR))
722                 free(v->string);
723
724         v->type &= VF_DONTTOUCH;
725         v->type |= VF_DIRTY;
726         v->string = NULL;
727         return v;
728 }
729
730 /* assign string value to variable */
731 static var *setvar_p(var *v, char *value)
732 {
733         clrvar(v);
734         v->string = value;
735         handle_special(v);
736         return v;
737 }
738
739 /* same as setvar_p but make a copy of string */
740 static var *setvar_s(var *v, const char *value)
741 {
742         return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
743 }
744
745 /* same as setvar_s but set USER flag */
746 static var *setvar_u(var *v, const char *value)
747 {
748         setvar_s(v, value);
749         v->type |= VF_USER;
750         return v;
751 }
752
753 /* set array element to user string */
754 static void setari_u(var *a, int idx, const char *s)
755 {
756         char sidx[sizeof(int)*3 + 1];
757         var *v;
758
759         sprintf(sidx, "%d", idx);
760         v = findvar(iamarray(a), sidx);
761         setvar_u(v, s);
762 }
763
764 /* assign numeric value to variable */
765 static var *setvar_i(var *v, double value)
766 {
767         clrvar(v);
768         v->type |= VF_NUMBER;
769         v->number = value;
770         handle_special(v);
771         return v;
772 }
773
774 static const char *getvar_s(var *v)
775 {
776         /* if v is numeric and has no cached string, convert it to string */
777         if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
778                 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
779                 v->string = xstrdup(g_buf);
780                 v->type |= VF_CACHED;
781         }
782         return (v->string == NULL) ? "" : v->string;
783 }
784
785 static double getvar_i(var *v)
786 {
787         char *s;
788
789         if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
790                 v->number = 0;
791                 s = v->string;
792                 if (s && *s) {
793                         v->number = strtod(s, &s);
794                         if (v->type & VF_USER) {
795                                 skip_spaces(&s);
796                                 if (*s != '\0')
797                                         v->type &= ~VF_USER;
798                         }
799                 } else {
800                         v->type &= ~VF_USER;
801                 }
802                 v->type |= VF_CACHED;
803         }
804         return v->number;
805 }
806
807 static var *copyvar(var *dest, const var *src)
808 {
809         if (dest != src) {
810                 clrvar(dest);
811                 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
812                 dest->number = src->number;
813                 if (src->string)
814                         dest->string = xstrdup(src->string);
815         }
816         handle_special(dest);
817         return dest;
818 }
819
820 static var *incvar(var *v)
821 {
822         return setvar_i(v, getvar_i(v) + 1.);
823 }
824
825 /* return true if v is number or numeric string */
826 static int is_numeric(var *v)
827 {
828         getvar_i(v);
829         return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
830 }
831
832 /* return 1 when value of v corresponds to true, 0 otherwise */
833 static int istrue(var *v)
834 {
835         if (is_numeric(v))
836                 return (v->number == 0) ? 0 : 1;
837         return (v->string && *(v->string)) ? 1 : 0;
838 }
839
840 /* temporary variables allocator. Last allocated should be first freed */
841 static var *nvalloc(int n)
842 {
843         nvblock *pb = NULL;
844         var *v, *r;
845         int size;
846
847         while (g_cb) {
848                 pb = g_cb;
849                 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size) break;
850                 g_cb = g_cb->next;
851         }
852
853         if (!g_cb) {
854                 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
855                 g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
856                 g_cb->size = size;
857                 g_cb->pos = g_cb->nv;
858                 g_cb->prev = pb;
859                 /*g_cb->next = NULL; - xzalloc did it */
860                 if (pb) pb->next = g_cb;
861         }
862
863         v = r = g_cb->pos;
864         g_cb->pos += n;
865
866         while (v < g_cb->pos) {
867                 v->type = 0;
868                 v->string = NULL;
869                 v++;
870         }
871
872         return r;
873 }
874
875 static void nvfree(var *v)
876 {
877         var *p;
878
879         if (v < g_cb->nv || v >= g_cb->pos)
880                 syntax_error(EMSG_INTERNAL_ERROR);
881
882         for (p = v; p < g_cb->pos; p++) {
883                 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
884                         clear_array(iamarray(p));
885                         free(p->x.array->items);
886                         free(p->x.array);
887                 }
888                 if (p->type & VF_WALK)
889                         free(p->x.walker);
890
891                 clrvar(p);
892         }
893
894         g_cb->pos = v;
895         while (g_cb->prev && g_cb->pos == g_cb->nv) {
896                 g_cb = g_cb->prev;
897         }
898 }
899
900 /* ------- awk program text parsing ------- */
901
902 /* Parse next token pointed by global pos, place results into global ttt.
903  * If token isn't expected, give away. Return token class
904  */
905 static uint32_t next_token(uint32_t expected)
906 {
907 #define concat_inserted (G.next_token__concat_inserted)
908 #define save_tclass     (G.next_token__save_tclass)
909 #define save_info       (G.next_token__save_info)
910 /* Initialized to TC_OPTERM: */
911 #define ltclass         (G.next_token__ltclass)
912
913         char *p, *pp, *s;
914         const char *tl;
915         uint32_t tc;
916         const uint32_t *ti;
917         int l;
918
919         if (t_rollback) {
920                 t_rollback = FALSE;
921
922         } else if (concat_inserted) {
923                 concat_inserted = FALSE;
924                 t_tclass = save_tclass;
925                 t_info = save_info;
926
927         } else {
928                 p = g_pos;
929  readnext:
930                 skip_spaces(&p);
931                 g_lineno = t_lineno;
932                 if (*p == '#')
933                         while (*p != '\n' && *p != '\0')
934                                 p++;
935
936                 if (*p == '\n')
937                         t_lineno++;
938
939                 if (*p == '\0') {
940                         tc = TC_EOF;
941
942                 } else if (*p == '\"') {
943                         /* it's a string */
944                         t_string = s = ++p;
945                         while (*p != '\"') {
946                                 if (*p == '\0' || *p == '\n')
947                                         syntax_error(EMSG_UNEXP_EOS);
948                                 *(s++) = nextchar(&p);
949                         }
950                         p++;
951                         *s = '\0';
952                         tc = TC_STRING;
953
954                 } else if ((expected & TC_REGEXP) && *p == '/') {
955                         /* it's regexp */
956                         t_string = s = ++p;
957                         while (*p != '/') {
958                                 if (*p == '\0' || *p == '\n')
959                                         syntax_error(EMSG_UNEXP_EOS);
960                                 *s = *p++;
961                                 if (*s++ == '\\') {
962                                         pp = p;
963                                         *(s-1) = bb_process_escape_sequence((const char **)&p);
964                                         if (*pp == '\\')
965                                                 *s++ = '\\';
966                                         if (p == pp)
967                                                 *s++ = *p++;
968                                 }
969                         }
970                         p++;
971                         *s = '\0';
972                         tc = TC_REGEXP;
973
974                 } else if (*p == '.' || isdigit(*p)) {
975                         /* it's a number */
976                         t_double = strtod(p, &p);
977                         if (*p == '.')
978                                 syntax_error(EMSG_UNEXP_TOKEN);
979                         tc = TC_NUMBER;
980
981                 } else {
982                         /* search for something known */
983                         tl = tokenlist;
984                         tc = 0x00000001;
985                         ti = tokeninfo;
986                         while (*tl) {
987                                 l = *(tl++);
988                                 if (l == NTCC) {
989                                         tc <<= 1;
990                                         continue;
991                                 }
992                                 /* if token class is expected, token
993                                  * matches and it's not a longer word,
994                                  * then this is what we are looking for
995                                  */
996                                 if ((tc & (expected | TC_WORD | TC_NEWLINE))
997                                  && *tl == *p && strncmp(p, tl, l) == 0
998                                  && !((tc & TC_WORD) && isalnum_(p[l]))
999                                 ) {
1000                                         t_info = *ti;
1001                                         p += l;
1002                                         break;
1003                                 }
1004                                 ti++;
1005                                 tl += l;
1006                         }
1007
1008                         if (!*tl) {
1009                                 /* it's a name (var/array/function),
1010                                  * otherwise it's something wrong
1011                                  */
1012                                 if (!isalnum_(*p))
1013                                         syntax_error(EMSG_UNEXP_TOKEN);
1014
1015                                 t_string = --p;
1016                                 while (isalnum_(*(++p))) {
1017                                         *(p-1) = *p;
1018                                 }
1019                                 *(p-1) = '\0';
1020                                 tc = TC_VARIABLE;
1021                                 /* also consume whitespace between functionname and bracket */
1022                                 if (!(expected & TC_VARIABLE))
1023                                         skip_spaces(&p);
1024                                 if (*p == '(') {
1025                                         tc = TC_FUNCTION;
1026                                 } else {
1027                                         if (*p == '[') {
1028                                                 p++;
1029                                                 tc = TC_ARRAY;
1030                                         }
1031                                 }
1032                         }
1033                 }
1034                 g_pos = p;
1035
1036                 /* skipping newlines in some cases */
1037                 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1038                         goto readnext;
1039
1040                 /* insert concatenation operator when needed */
1041                 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1042                         concat_inserted = TRUE;
1043                         save_tclass = tc;
1044                         save_info = t_info;
1045                         tc = TC_BINOP;
1046                         t_info = OC_CONCAT | SS | P(35);
1047                 }
1048
1049                 t_tclass = tc;
1050         }
1051         ltclass = t_tclass;
1052
1053         /* Are we ready for this? */
1054         if (!(ltclass & expected))
1055                 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1056                                 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1057
1058         return ltclass;
1059 #undef concat_inserted
1060 #undef save_tclass
1061 #undef save_info
1062 #undef ltclass
1063 }
1064
1065 static void rollback_token(void)
1066 {
1067         t_rollback = TRUE;
1068 }
1069
1070 static node *new_node(uint32_t info)
1071 {
1072         node *n;
1073
1074         n = xzalloc(sizeof(node));
1075         n->info = info;
1076         n->lineno = g_lineno;
1077         return n;
1078 }
1079
1080 static node *mk_re_node(const char *s, node *n, regex_t *re)
1081 {
1082         n->info = OC_REGEXP;
1083         n->l.re = re;
1084         n->r.ire = re + 1;
1085         xregcomp(re, s, REG_EXTENDED);
1086         xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1087
1088         return n;
1089 }
1090
1091 static node *condition(void)
1092 {
1093         next_token(TC_SEQSTART);
1094         return parse_expr(TC_SEQTERM);
1095 }
1096
1097 /* parse expression terminated by given argument, return ptr
1098  * to built subtree. Terminator is eaten by parse_expr */
1099 static node *parse_expr(uint32_t iexp)
1100 {
1101         node sn;
1102         node *cn = &sn;
1103         node *vn, *glptr;
1104         uint32_t tc, xtc;
1105         var *v;
1106
1107         sn.info = PRIMASK;
1108         sn.r.n = glptr = NULL;
1109         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1110
1111         while (!((tc = next_token(xtc)) & iexp)) {
1112                 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1113                         /* input redirection (<) attached to glptr node */
1114                         cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1115                         cn->a.n = glptr;
1116                         xtc = TC_OPERAND | TC_UOPPRE;
1117                         glptr = NULL;
1118
1119                 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1120                         /* for binary and postfix-unary operators, jump back over
1121                          * previous operators with higher priority */
1122                         vn = cn;
1123                         while ( ((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1124                          || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON)) )
1125                                 vn = vn->a.n;
1126                         if ((t_info & OPCLSMASK) == OC_TERNARY)
1127                                 t_info += P(6);
1128                         cn = vn->a.n->r.n = new_node(t_info);
1129                         cn->a.n = vn->a.n;
1130                         if (tc & TC_BINOP) {
1131                                 cn->l.n = vn;
1132                                 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1133                                 if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1134                                         /* it's a pipe */
1135                                         next_token(TC_GETLINE);
1136                                         /* give maximum priority to this pipe */
1137                                         cn->info &= ~PRIMASK;
1138                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1139                                 }
1140                         } else {
1141                                 cn->r.n = vn;
1142                                 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1143                         }
1144                         vn->a.n = cn;
1145
1146                 } else {
1147                         /* for operands and prefix-unary operators, attach them
1148                          * to last node */
1149                         vn = cn;
1150                         cn = vn->r.n = new_node(t_info);
1151                         cn->a.n = vn;
1152                         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1153                         if (tc & (TC_OPERAND | TC_REGEXP)) {
1154                                 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1155                                 /* one should be very careful with switch on tclass -
1156                                  * only simple tclasses should be used! */
1157                                 switch (tc) {
1158                                 case TC_VARIABLE:
1159                                 case TC_ARRAY:
1160                                         cn->info = OC_VAR;
1161                                         v = hash_search(ahash, t_string);
1162                                         if (v != NULL) {
1163                                                 cn->info = OC_FNARG;
1164                                                 cn->l.i = v->x.aidx;
1165                                         } else {
1166                                                 cn->l.v = newvar(t_string);
1167                                         }
1168                                         if (tc & TC_ARRAY) {
1169                                                 cn->info |= xS;
1170                                                 cn->r.n = parse_expr(TC_ARRTERM);
1171                                         }
1172                                         break;
1173
1174                                 case TC_NUMBER:
1175                                 case TC_STRING:
1176                                         cn->info = OC_VAR;
1177                                         v = cn->l.v = xzalloc(sizeof(var));
1178                                         if (tc & TC_NUMBER)
1179                                                 setvar_i(v, t_double);
1180                                         else
1181                                                 setvar_s(v, t_string);
1182                                         break;
1183
1184                                 case TC_REGEXP:
1185                                         mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1186                                         break;
1187
1188                                 case TC_FUNCTION:
1189                                         cn->info = OC_FUNC;
1190                                         cn->r.f = newfunc(t_string);
1191                                         cn->l.n = condition();
1192                                         break;
1193
1194                                 case TC_SEQSTART:
1195                                         cn = vn->r.n = parse_expr(TC_SEQTERM);
1196                                         cn->a.n = vn;
1197                                         break;
1198
1199                                 case TC_GETLINE:
1200                                         glptr = cn;
1201                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1202                                         break;
1203
1204                                 case TC_BUILTIN:
1205                                         cn->l.n = condition();
1206                                         break;
1207                                 }
1208                         }
1209                 }
1210         }
1211         return sn.r.n;
1212 }
1213
1214 /* add node to chain. Return ptr to alloc'd node */
1215 static node *chain_node(uint32_t info)
1216 {
1217         node *n;
1218
1219         if (!seq->first)
1220                 seq->first = seq->last = new_node(0);
1221
1222         if (seq->programname != g_progname) {
1223                 seq->programname = g_progname;
1224                 n = chain_node(OC_NEWSOURCE);
1225                 n->l.s = xstrdup(g_progname);
1226         }
1227
1228         n = seq->last;
1229         n->info = info;
1230         seq->last = n->a.n = new_node(OC_DONE);
1231
1232         return n;
1233 }
1234
1235 static void chain_expr(uint32_t info)
1236 {
1237         node *n;
1238
1239         n = chain_node(info);
1240         n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1241         if (t_tclass & TC_GRPTERM)
1242                 rollback_token();
1243 }
1244
1245 static node *chain_loop(node *nn)
1246 {
1247         node *n, *n2, *save_brk, *save_cont;
1248
1249         save_brk = break_ptr;
1250         save_cont = continue_ptr;
1251
1252         n = chain_node(OC_BR | Vx);
1253         continue_ptr = new_node(OC_EXEC);
1254         break_ptr = new_node(OC_EXEC);
1255         chain_group();
1256         n2 = chain_node(OC_EXEC | Vx);
1257         n2->l.n = nn;
1258         n2->a.n = n;
1259         continue_ptr->a.n = n2;
1260         break_ptr->a.n = n->r.n = seq->last;
1261
1262         continue_ptr = save_cont;
1263         break_ptr = save_brk;
1264
1265         return n;
1266 }
1267
1268 /* parse group and attach it to chain */
1269 static void chain_group(void)
1270 {
1271         uint32_t c;
1272         node *n, *n2, *n3;
1273
1274         do {
1275                 c = next_token(TC_GRPSEQ);
1276         } while (c & TC_NEWLINE);
1277
1278         if (c & TC_GRPSTART) {
1279                 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1280                         if (t_tclass & TC_NEWLINE) continue;
1281                         rollback_token();
1282                         chain_group();
1283                 }
1284         } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1285                 rollback_token();
1286                 chain_expr(OC_EXEC | Vx);
1287         } else {                                                /* TC_STATEMNT */
1288                 switch (t_info & OPCLSMASK) {
1289                 case ST_IF:
1290                         n = chain_node(OC_BR | Vx);
1291                         n->l.n = condition();
1292                         chain_group();
1293                         n2 = chain_node(OC_EXEC);
1294                         n->r.n = seq->last;
1295                         if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1296                                 chain_group();
1297                                 n2->a.n = seq->last;
1298                         } else {
1299                                 rollback_token();
1300                         }
1301                         break;
1302
1303                 case ST_WHILE:
1304                         n2 = condition();
1305                         n = chain_loop(NULL);
1306                         n->l.n = n2;
1307                         break;
1308
1309                 case ST_DO:
1310                         n2 = chain_node(OC_EXEC);
1311                         n = chain_loop(NULL);
1312                         n2->a.n = n->a.n;
1313                         next_token(TC_WHILE);
1314                         n->l.n = condition();
1315                         break;
1316
1317                 case ST_FOR:
1318                         next_token(TC_SEQSTART);
1319                         n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1320                         if (t_tclass & TC_SEQTERM) {    /* for-in */
1321                                 if ((n2->info & OPCLSMASK) != OC_IN)
1322                                         syntax_error(EMSG_UNEXP_TOKEN);
1323                                 n = chain_node(OC_WALKINIT | VV);
1324                                 n->l.n = n2->l.n;
1325                                 n->r.n = n2->r.n;
1326                                 n = chain_loop(NULL);
1327                                 n->info = OC_WALKNEXT | Vx;
1328                                 n->l.n = n2->l.n;
1329                         } else {                        /* for (;;) */
1330                                 n = chain_node(OC_EXEC | Vx);
1331                                 n->l.n = n2;
1332                                 n2 = parse_expr(TC_SEMICOL);
1333                                 n3 = parse_expr(TC_SEQTERM);
1334                                 n = chain_loop(n3);
1335                                 n->l.n = n2;
1336                                 if (!n2)
1337                                         n->info = OC_EXEC;
1338                         }
1339                         break;
1340
1341                 case OC_PRINT:
1342                 case OC_PRINTF:
1343                         n = chain_node(t_info);
1344                         n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1345                         if (t_tclass & TC_OUTRDR) {
1346                                 n->info |= t_info;
1347                                 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1348                         }
1349                         if (t_tclass & TC_GRPTERM)
1350                                 rollback_token();
1351                         break;
1352
1353                 case OC_BREAK:
1354                         n = chain_node(OC_EXEC);
1355                         n->a.n = break_ptr;
1356                         break;
1357
1358                 case OC_CONTINUE:
1359                         n = chain_node(OC_EXEC);
1360                         n->a.n = continue_ptr;
1361                         break;
1362
1363                 /* delete, next, nextfile, return, exit */
1364                 default:
1365                         chain_expr(t_info);
1366                 }
1367         }
1368 }
1369
1370 static void parse_program(char *p)
1371 {
1372         uint32_t tclass;
1373         node *cn;
1374         func *f;
1375         var *v;
1376
1377         g_pos = p;
1378         t_lineno = 1;
1379         while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1380                         TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1381
1382                 if (tclass & TC_OPTERM)
1383                         continue;
1384
1385                 seq = &mainseq;
1386                 if (tclass & TC_BEGIN) {
1387                         seq = &beginseq;
1388                         chain_group();
1389
1390                 } else if (tclass & TC_END) {
1391                         seq = &endseq;
1392                         chain_group();
1393
1394                 } else if (tclass & TC_FUNCDECL) {
1395                         next_token(TC_FUNCTION);
1396                         g_pos++;
1397                         f = newfunc(t_string);
1398                         f->body.first = NULL;
1399                         f->nargs = 0;
1400                         while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1401                                 v = findvar(ahash, t_string);
1402                                 v->x.aidx = (f->nargs)++;
1403
1404                                 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1405                                         break;
1406                         }
1407                         seq = &(f->body);
1408                         chain_group();
1409                         clear_array(ahash);
1410
1411                 } else if (tclass & TC_OPSEQ) {
1412                         rollback_token();
1413                         cn = chain_node(OC_TEST);
1414                         cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1415                         if (t_tclass & TC_GRPSTART) {
1416                                 rollback_token();
1417                                 chain_group();
1418                         } else {
1419                                 chain_node(OC_PRINT);
1420                         }
1421                         cn->r.n = mainseq.last;
1422
1423                 } else /* if (tclass & TC_GRPSTART) */ {
1424                         rollback_token();
1425                         chain_group();
1426                 }
1427         }
1428 }
1429
1430
1431 /* -------- program execution part -------- */
1432
1433 static node *mk_splitter(const char *s, tsplitter *spl)
1434 {
1435         regex_t *re, *ire;
1436         node *n;
1437
1438         re = &spl->re[0];
1439         ire = &spl->re[1];
1440         n = &spl->n;
1441         if ((n->info & OPCLSMASK) == OC_REGEXP) {
1442                 regfree(re);
1443                 regfree(ire); // TODO: nuke ire, use re+1?
1444         }
1445         if (strlen(s) > 1) {
1446                 mk_re_node(s, n, re);
1447         } else {
1448                 n->info = (uint32_t) *s;
1449         }
1450
1451         return n;
1452 }
1453
1454 /* use node as a regular expression. Supplied with node ptr and regex_t
1455  * storage space. Return ptr to regex (if result points to preg, it should
1456  * be later regfree'd manually
1457  */
1458 static regex_t *as_regex(node *op, regex_t *preg)
1459 {
1460         var *v;
1461         const char *s;
1462
1463         if ((op->info & OPCLSMASK) == OC_REGEXP) {
1464                 return icase ? op->r.ire : op->l.re;
1465         }
1466         v = nvalloc(1);
1467         s = getvar_s(evaluate(op, v));
1468         xregcomp(preg, s, icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED);
1469         nvfree(v);
1470         return preg;
1471 }
1472
1473 /* gradually increasing buffer */
1474 static void qrealloc(char **b, int n, int *size)
1475 {
1476         if (!*b || n >= *size)
1477                 *b = xrealloc(*b, *size = n + (n>>1) + 80);
1478 }
1479
1480 /* resize field storage space */
1481 static void fsrealloc(int size)
1482 {
1483         int i;
1484
1485         if (size >= maxfields) {
1486                 i = maxfields;
1487                 maxfields = size + 16;
1488                 Fields = xrealloc(Fields, maxfields * sizeof(var));
1489                 for (; i < maxfields; i++) {
1490                         Fields[i].type = VF_SPECIAL;
1491                         Fields[i].string = NULL;
1492                 }
1493         }
1494
1495         if (size < nfields) {
1496                 for (i = size; i < nfields; i++) {
1497                         clrvar(Fields + i);
1498                 }
1499         }
1500         nfields = size;
1501 }
1502
1503 static int awk_split(const char *s, node *spl, char **slist)
1504 {
1505         int l, n = 0;
1506         char c[4];
1507         char *s1;
1508         regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1509
1510         /* in worst case, each char would be a separate field */
1511         *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1512         strcpy(s1, s);
1513
1514         c[0] = c[1] = (char)spl->info;
1515         c[2] = c[3] = '\0';
1516         if (*getvar_s(intvar[RS]) == '\0')
1517                 c[2] = '\n';
1518
1519         if ((spl->info & OPCLSMASK) == OC_REGEXP) {  /* regex split */
1520                 if (!*s)
1521                         return n; /* "": zero fields */
1522                 n++; /* at least one field will be there */
1523                 do {
1524                         l = strcspn(s, c+2); /* len till next NUL or \n */
1525                         if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1526                          && pmatch[0].rm_so <= l
1527                         ) {
1528                                 l = pmatch[0].rm_so;
1529                                 if (pmatch[0].rm_eo == 0) {
1530                                         l++;
1531                                         pmatch[0].rm_eo++;
1532                                 }
1533                                 n++; /* we saw yet another delimiter */
1534                         } else {
1535                                 pmatch[0].rm_eo = l;
1536                                 if (s[l]) pmatch[0].rm_eo++;
1537                         }
1538                         memcpy(s1, s, l);
1539                         s1[l] = '\0';
1540                         nextword(&s1);
1541                         s += pmatch[0].rm_eo;
1542                 } while (*s);
1543                 return n;
1544         }
1545         if (c[0] == '\0') {  /* null split */
1546                 while (*s) {
1547                         *s1++ = *s++;
1548                         *s1++ = '\0';
1549                         n++;
1550                 }
1551                 return n;
1552         }
1553         if (c[0] != ' ') {  /* single-character split */
1554                 if (icase) {
1555                         c[0] = toupper(c[0]);
1556                         c[1] = tolower(c[1]);
1557                 }
1558                 if (*s1) n++;
1559                 while ((s1 = strpbrk(s1, c))) {
1560                         *s1++ = '\0';
1561                         n++;
1562                 }
1563                 return n;
1564         }
1565         /* space split */
1566         while (*s) {
1567                 s = skip_whitespace(s);
1568                 if (!*s) break;
1569                 n++;
1570                 while (*s && !isspace(*s))
1571                         *s1++ = *s++;
1572                 *s1++ = '\0';
1573         }
1574         return n;
1575 }
1576
1577 static void split_f0(void)
1578 {
1579 /* static char *fstrings; */
1580 #define fstrings (G.split_f0__fstrings)
1581
1582         int i, n;
1583         char *s;
1584
1585         if (is_f0_split)
1586                 return;
1587
1588         is_f0_split = TRUE;
1589         free(fstrings);
1590         fsrealloc(0);
1591         n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1592         fsrealloc(n);
1593         s = fstrings;
1594         for (i = 0; i < n; i++) {
1595                 Fields[i].string = nextword(&s);
1596                 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1597         }
1598
1599         /* set NF manually to avoid side effects */
1600         clrvar(intvar[NF]);
1601         intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1602         intvar[NF]->number = nfields;
1603 #undef fstrings
1604 }
1605
1606 /* perform additional actions when some internal variables changed */
1607 static void handle_special(var *v)
1608 {
1609         int n;
1610         char *b;
1611         const char *sep, *s;
1612         int sl, l, len, i, bsize;
1613
1614         if (!(v->type & VF_SPECIAL))
1615                 return;
1616
1617         if (v == intvar[NF]) {
1618                 n = (int)getvar_i(v);
1619                 fsrealloc(n);
1620
1621                 /* recalculate $0 */
1622                 sep = getvar_s(intvar[OFS]);
1623                 sl = strlen(sep);
1624                 b = NULL;
1625                 len = 0;
1626                 for (i = 0; i < n; i++) {
1627                         s = getvar_s(&Fields[i]);
1628                         l = strlen(s);
1629                         if (b) {
1630                                 memcpy(b+len, sep, sl);
1631                                 len += sl;
1632                         }
1633                         qrealloc(&b, len+l+sl, &bsize);
1634                         memcpy(b+len, s, l);
1635                         len += l;
1636                 }
1637                 if (b)
1638                         b[len] = '\0';
1639                 setvar_p(intvar[F0], b);
1640                 is_f0_split = TRUE;
1641
1642         } else if (v == intvar[F0]) {
1643                 is_f0_split = FALSE;
1644
1645         } else if (v == intvar[FS]) {
1646                 mk_splitter(getvar_s(v), &fsplitter);
1647
1648         } else if (v == intvar[RS]) {
1649                 mk_splitter(getvar_s(v), &rsplitter);
1650
1651         } else if (v == intvar[IGNORECASE]) {
1652                 icase = istrue(v);
1653
1654         } else {                                /* $n */
1655                 n = getvar_i(intvar[NF]);
1656                 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1657                 /* right here v is invalid. Just to note... */
1658         }
1659 }
1660
1661 /* step through func/builtin/etc arguments */
1662 static node *nextarg(node **pn)
1663 {
1664         node *n;
1665
1666         n = *pn;
1667         if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1668                 *pn = n->r.n;
1669                 n = n->l.n;
1670         } else {
1671                 *pn = NULL;
1672         }
1673         return n;
1674 }
1675
1676 static void hashwalk_init(var *v, xhash *array)
1677 {
1678         char **w;
1679         hash_item *hi;
1680         unsigned i;
1681
1682         if (v->type & VF_WALK)
1683                 free(v->x.walker);
1684
1685         v->type |= VF_WALK;
1686         w = v->x.walker = xzalloc(2 + 2*sizeof(char *) + array->glen);
1687         w[0] = w[1] = (char *)(w + 2);
1688         for (i = 0; i < array->csize; i++) {
1689                 hi = array->items[i];
1690                 while (hi) {
1691                         strcpy(*w, hi->name);
1692                         nextword(w);
1693                         hi = hi->next;
1694                 }
1695         }
1696 }
1697
1698 static int hashwalk_next(var *v)
1699 {
1700         char **w;
1701
1702         w = v->x.walker;
1703         if (w[1] == w[0])
1704                 return FALSE;
1705
1706         setvar_s(v, nextword(w+1));
1707         return TRUE;
1708 }
1709
1710 /* evaluate node, return 1 when result is true, 0 otherwise */
1711 static int ptest(node *pattern)
1712 {
1713         /* ptest__v is "static": to save stack space? */
1714         return istrue(evaluate(pattern, &G.ptest__v));
1715 }
1716
1717 /* read next record from stream rsm into a variable v */
1718 static int awk_getline(rstream *rsm, var *v)
1719 {
1720         char *b;
1721         regmatch_t pmatch[2];
1722         int a, p, pp=0, size;
1723         int fd, so, eo, r, rp;
1724         char c, *m, *s;
1725
1726         /* we're using our own buffer since we need access to accumulating
1727          * characters
1728          */
1729         fd = fileno(rsm->F);
1730         m = rsm->buffer;
1731         a = rsm->adv;
1732         p = rsm->pos;
1733         size = rsm->size;
1734         c = (char) rsplitter.n.info;
1735         rp = 0;
1736
1737         if (!m) qrealloc(&m, 256, &size);
1738         do {
1739                 b = m + a;
1740                 so = eo = p;
1741                 r = 1;
1742                 if (p > 0) {
1743                         if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1744                                 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1745                                                         b, 1, pmatch, 0) == 0) {
1746                                         so = pmatch[0].rm_so;
1747                                         eo = pmatch[0].rm_eo;
1748                                         if (b[eo] != '\0')
1749                                                 break;
1750                                 }
1751                         } else if (c != '\0') {
1752                                 s = strchr(b+pp, c);
1753                                 if (!s) s = memchr(b+pp, '\0', p - pp);
1754                                 if (s) {
1755                                         so = eo = s-b;
1756                                         eo++;
1757                                         break;
1758                                 }
1759                         } else {
1760                                 while (b[rp] == '\n')
1761                                         rp++;
1762                                 s = strstr(b+rp, "\n\n");
1763                                 if (s) {
1764                                         so = eo = s-b;
1765                                         while (b[eo] == '\n') eo++;
1766                                         if (b[eo] != '\0')
1767                                                 break;
1768                                 }
1769                         }
1770                 }
1771
1772                 if (a > 0) {
1773                         memmove(m, (const void *)(m+a), p+1);
1774                         b = m;
1775                         a = 0;
1776                 }
1777
1778                 qrealloc(&m, a+p+128, &size);
1779                 b = m + a;
1780                 pp = p;
1781                 p += safe_read(fd, b+p, size-p-1);
1782                 if (p < pp) {
1783                         p = 0;
1784                         r = 0;
1785                         setvar_i(intvar[ERRNO], errno);
1786                 }
1787                 b[p] = '\0';
1788
1789         } while (p > pp);
1790
1791         if (p == 0) {
1792                 r--;
1793         } else {
1794                 c = b[so]; b[so] = '\0';
1795                 setvar_s(v, b+rp);
1796                 v->type |= VF_USER;
1797                 b[so] = c;
1798                 c = b[eo]; b[eo] = '\0';
1799                 setvar_s(intvar[RT], b+so);
1800                 b[eo] = c;
1801         }
1802
1803         rsm->buffer = m;
1804         rsm->adv = a + eo;
1805         rsm->pos = p - eo;
1806         rsm->size = size;
1807
1808         return r;
1809 }
1810
1811 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1812 {
1813         int r = 0;
1814         char c;
1815         const char *s = format;
1816
1817         if (int_as_int && n == (int)n) {
1818                 r = snprintf(b, size, "%d", (int)n);
1819         } else {
1820                 do { c = *s; } while (c && *++s);
1821                 if (strchr("diouxX", c)) {
1822                         r = snprintf(b, size, format, (int)n);
1823                 } else if (strchr("eEfgG", c)) {
1824                         r = snprintf(b, size, format, n);
1825                 } else {
1826                         syntax_error(EMSG_INV_FMT);
1827                 }
1828         }
1829         return r;
1830 }
1831
1832
1833 /* formatted output into an allocated buffer, return ptr to buffer */
1834 static char *awk_printf(node *n)
1835 {
1836         char *b = NULL;
1837         char *fmt, *s, *f;
1838         const char *s1;
1839         int i, j, incr, bsize;
1840         char c, c1;
1841         var *v, *arg;
1842
1843         v = nvalloc(1);
1844         fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1845
1846         i = 0;
1847         while (*f) {
1848                 s = f;
1849                 while (*f && (*f != '%' || *(++f) == '%'))
1850                         f++;
1851                 while (*f && !isalpha(*f)) {
1852                         if (*f == '*')
1853                                 syntax_error("%*x formats are not supported");
1854                         f++;
1855                 }
1856
1857                 incr = (f - s) + MAXVARFMT;
1858                 qrealloc(&b, incr + i, &bsize);
1859                 c = *f;
1860                 if (c != '\0') f++;
1861                 c1 = *f;
1862                 *f = '\0';
1863                 arg = evaluate(nextarg(&n), v);
1864
1865                 j = i;
1866                 if (c == 'c' || !c) {
1867                         i += sprintf(b+i, s, is_numeric(arg) ?
1868                                         (char)getvar_i(arg) : *getvar_s(arg));
1869                 } else if (c == 's') {
1870                         s1 = getvar_s(arg);
1871                         qrealloc(&b, incr+i+strlen(s1), &bsize);
1872                         i += sprintf(b+i, s, s1);
1873                 } else {
1874                         i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1875                 }
1876                 *f = c1;
1877
1878                 /* if there was an error while sprintf, return value is negative */
1879                 if (i < j) i = j;
1880         }
1881
1882         b = xrealloc(b, i + 1);
1883         free(fmt);
1884         nvfree(v);
1885         b[i] = '\0';
1886         return b;
1887 }
1888
1889 /* common substitution routine
1890  * replace (nm) substring of (src) that match (n) with (repl), store
1891  * result into (dest), return number of substitutions. If nm=0, replace
1892  * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1893  * subexpression matching (\1-\9)
1894  */
1895 static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int ex)
1896 {
1897         char *ds = NULL;
1898         const char *s;
1899         const char *sp;
1900         int c, i, j, di, rl, so, eo, nbs, n, dssize;
1901         regmatch_t pmatch[10];
1902         regex_t sreg, *re;
1903
1904         re = as_regex(rn, &sreg);
1905         if (!src) src = intvar[F0];
1906         if (!dest) dest = intvar[F0];
1907
1908         i = di = 0;
1909         sp = getvar_s(src);
1910         rl = strlen(repl);
1911         while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0 : REG_NOTBOL) == 0) {
1912                 so = pmatch[0].rm_so;
1913                 eo = pmatch[0].rm_eo;
1914
1915                 qrealloc(&ds, di + eo + rl, &dssize);
1916                 memcpy(ds + di, sp, eo);
1917                 di += eo;
1918                 if (++i >= nm) {
1919                         /* replace */
1920                         di -= (eo - so);
1921                         nbs = 0;
1922                         for (s = repl; *s; s++) {
1923                                 ds[di++] = c = *s;
1924                                 if (c == '\\') {
1925                                         nbs++;
1926                                         continue;
1927                                 }
1928                                 if (c == '&' || (ex && c >= '0' && c <= '9')) {
1929                                         di -= ((nbs + 3) >> 1);
1930                                         j = 0;
1931                                         if (c != '&') {
1932                                                 j = c - '0';
1933                                                 nbs++;
1934                                         }
1935                                         if (nbs % 2) {
1936                                                 ds[di++] = c;
1937                                         } else {
1938                                                 n = pmatch[j].rm_eo - pmatch[j].rm_so;
1939                                                 qrealloc(&ds, di + rl + n, &dssize);
1940                                                 memcpy(ds + di, sp + pmatch[j].rm_so, n);
1941                                                 di += n;
1942                                         }
1943                                 }
1944                                 nbs = 0;
1945                         }
1946                 }
1947
1948                 sp += eo;
1949                 if (i == nm) break;
1950                 if (eo == so) {
1951                         ds[di] = *sp++;
1952                         if (!ds[di++]) break;
1953                 }
1954         }
1955
1956         qrealloc(&ds, di + strlen(sp), &dssize);
1957         strcpy(ds + di, sp);
1958         setvar_p(dest, ds);
1959         if (re == &sreg) regfree(re);
1960         return i;
1961 }
1962
1963 static var *exec_builtin(node *op, var *res)
1964 {
1965 #define tspl (G.exec_builtin__tspl)
1966
1967         int (*to_xxx)(int);
1968         var *tv;
1969         node *an[4];
1970         var *av[4];
1971         const char *as[4];
1972         regmatch_t pmatch[2];
1973         regex_t sreg, *re;
1974         node *spl;
1975         uint32_t isr, info;
1976         int nargs;
1977         time_t tt;
1978         char *s, *s1;
1979         int i, l, ll, n;
1980
1981         tv = nvalloc(4);
1982         isr = info = op->info;
1983         op = op->l.n;
1984
1985         av[2] = av[3] = NULL;
1986         for (i = 0; i < 4 && op; i++) {
1987                 an[i] = nextarg(&op);
1988                 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
1989                 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
1990                 isr >>= 1;
1991         }
1992
1993         nargs = i;
1994         if ((uint32_t)nargs < (info >> 30))
1995                 syntax_error(EMSG_TOO_FEW_ARGS);
1996
1997         switch (info & OPNMASK) {
1998
1999         case B_a2:
2000 #if ENABLE_FEATURE_AWK_MATH
2001                 setvar_i(res, atan2(getvar_i(av[i]), getvar_i(av[1])));
2002 #else
2003                 syntax_error(EMSG_NO_MATH);
2004 #endif
2005                 break;
2006
2007         case B_sp:
2008                 if (nargs > 2) {
2009                         spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2010                                 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2011                 } else {
2012                         spl = &fsplitter.n;
2013                 }
2014
2015                 n = awk_split(as[0], spl, &s);
2016                 s1 = s;
2017                 clear_array(iamarray(av[1]));
2018                 for (i=1; i<=n; i++)
2019                         setari_u(av[1], i, nextword(&s1));
2020                 free(s);
2021                 setvar_i(res, n);
2022                 break;
2023
2024         case B_ss:
2025                 l = strlen(as[0]);
2026                 i = getvar_i(av[1]) - 1;
2027                 if (i > l) i = l;
2028                 if (i < 0) i = 0;
2029                 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2030                 if (n < 0) n = 0;
2031                 s = xstrndup(as[0]+i, n);
2032                 setvar_p(res, s);
2033                 break;
2034
2035         case B_an:
2036                 setvar_i(res, (long)getvar_i(av[0]) & (long)getvar_i(av[1]));
2037                 break;
2038
2039         case B_co:
2040                 setvar_i(res, ~(long)getvar_i(av[0]));
2041                 break;
2042
2043         case B_ls:
2044                 setvar_i(res, (long)getvar_i(av[0]) << (long)getvar_i(av[1]));
2045                 break;
2046
2047         case B_or:
2048                 setvar_i(res, (long)getvar_i(av[0]) | (long)getvar_i(av[1]));
2049                 break;
2050
2051         case B_rs:
2052                 setvar_i(res, (long)((unsigned long)getvar_i(av[0]) >> (unsigned long)getvar_i(av[1])));
2053                 break;
2054
2055         case B_xo:
2056                 setvar_i(res, (long)getvar_i(av[0]) ^ (long)getvar_i(av[1]));
2057                 break;
2058
2059         case B_lo:
2060                 to_xxx = tolower;
2061                 goto lo_cont;
2062
2063         case B_up:
2064                 to_xxx = toupper;
2065  lo_cont:
2066                 s1 = s = xstrdup(as[0]);
2067                 while (*s1) {
2068                         *s1 = (*to_xxx)(*s1);
2069                         s1++;
2070                 }
2071                 setvar_p(res, s);
2072                 break;
2073
2074         case B_ix:
2075                 n = 0;
2076                 ll = strlen(as[1]);
2077                 l = strlen(as[0]) - ll;
2078                 if (ll > 0 && l >= 0) {
2079                         if (!icase) {
2080                                 s = strstr(as[0], as[1]);
2081                                 if (s) n = (s - as[0]) + 1;
2082                         } else {
2083                                 /* this piece of code is terribly slow and
2084                                  * really should be rewritten
2085                                  */
2086                                 for (i=0; i<=l; i++) {
2087                                         if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2088                                                 n = i+1;
2089                                                 break;
2090                                         }
2091                                 }
2092                         }
2093                 }
2094                 setvar_i(res, n);
2095                 break;
2096
2097         case B_ti:
2098                 if (nargs > 1)
2099                         tt = getvar_i(av[1]);
2100                 else
2101                         time(&tt);
2102                 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2103                 i = strftime(g_buf, MAXVARFMT,
2104                         ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2105                         localtime(&tt));
2106                 g_buf[i] = '\0';
2107                 setvar_s(res, g_buf);
2108                 break;
2109
2110         case B_ma:
2111                 re = as_regex(an[1], &sreg);
2112                 n = regexec(re, as[0], 1, pmatch, 0);
2113                 if (n == 0) {
2114                         pmatch[0].rm_so++;
2115                         pmatch[0].rm_eo++;
2116                 } else {
2117                         pmatch[0].rm_so = 0;
2118                         pmatch[0].rm_eo = -1;
2119                 }
2120                 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2121                 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2122                 setvar_i(res, pmatch[0].rm_so);
2123                 if (re == &sreg) regfree(re);
2124                 break;
2125
2126         case B_ge:
2127                 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2128                 break;
2129
2130         case B_gs:
2131                 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2132                 break;
2133
2134         case B_su:
2135                 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2136                 break;
2137         }
2138
2139         nvfree(tv);
2140         return res;
2141 #undef tspl
2142 }
2143
2144 /*
2145  * Evaluate node - the heart of the program. Supplied with subtree
2146  * and place where to store result. returns ptr to result.
2147  */
2148 #define XC(n) ((n) >> 8)
2149
2150 static var *evaluate(node *op, var *res)
2151 {
2152 /* This procedure is recursive so we should count every byte */
2153 #define fnargs (G.evaluate__fnargs)
2154 /* seed is initialized to 1 */
2155 #define seed   (G.evaluate__seed)
2156 #define sreg   (G.evaluate__sreg)
2157
2158         node *op1;
2159         var *v1;
2160         union {
2161                 var *v;
2162                 const char *s;
2163                 double d;
2164                 int i;
2165         } L, R;
2166         uint32_t opinfo;
2167         int opn;
2168         union {
2169                 char *s;
2170                 rstream *rsm;
2171                 FILE *F;
2172                 var *v;
2173                 regex_t *re;
2174                 uint32_t info;
2175         } X;
2176
2177         if (!op)
2178                 return setvar_s(res, NULL);
2179
2180         v1 = nvalloc(2);
2181
2182         while (op) {
2183                 opinfo = op->info;
2184                 opn = (opinfo & OPNMASK);
2185                 g_lineno = op->lineno;
2186
2187                 /* execute inevitable things */
2188                 op1 = op->l.n;
2189                 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2190                 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2191                 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2192                 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2193                 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2194
2195                 switch (XC(opinfo & OPCLSMASK)) {
2196
2197                 /* -- iterative node type -- */
2198
2199                 /* test pattern */
2200                 case XC( OC_TEST ):
2201                         if ((op1->info & OPCLSMASK) == OC_COMMA) {
2202                                 /* it's range pattern */
2203                                 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2204                                         op->info |= OF_CHECKED;
2205                                         if (ptest(op1->r.n))
2206                                                 op->info &= ~OF_CHECKED;
2207
2208                                         op = op->a.n;
2209                                 } else {
2210                                         op = op->r.n;
2211                                 }
2212                         } else {
2213                                 op = (ptest(op1)) ? op->a.n : op->r.n;
2214                         }
2215                         break;
2216
2217                 /* just evaluate an expression, also used as unconditional jump */
2218                 case XC( OC_EXEC ):
2219                         break;
2220
2221                 /* branch, used in if-else and various loops */
2222                 case XC( OC_BR ):
2223                         op = istrue(L.v) ? op->a.n : op->r.n;
2224                         break;
2225
2226                 /* initialize for-in loop */
2227                 case XC( OC_WALKINIT ):
2228                         hashwalk_init(L.v, iamarray(R.v));
2229                         break;
2230
2231                 /* get next array item */
2232                 case XC( OC_WALKNEXT ):
2233                         op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2234                         break;
2235
2236                 case XC( OC_PRINT ):
2237                 case XC( OC_PRINTF ):
2238                         X.F = stdout;
2239                         if (op->r.n) {
2240                                 X.rsm = newfile(R.s);
2241                                 if (!X.rsm->F) {
2242                                         if (opn == '|') {
2243                                                 X.rsm->F = popen(R.s, "w");
2244                                                 if (X.rsm->F == NULL)
2245                                                         bb_perror_msg_and_die("popen");
2246                                                 X.rsm->is_pipe = 1;
2247                                         } else {
2248                                                 X.rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2249                                         }
2250                                 }
2251                                 X.F = X.rsm->F;
2252                         }
2253
2254                         if ((opinfo & OPCLSMASK) == OC_PRINT) {
2255                                 if (!op1) {
2256                                         fputs(getvar_s(intvar[F0]), X.F);
2257                                 } else {
2258                                         while (op1) {
2259                                                 L.v = evaluate(nextarg(&op1), v1);
2260                                                 if (L.v->type & VF_NUMBER) {
2261                                                         fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2262                                                                         getvar_i(L.v), TRUE);
2263                                                         fputs(g_buf, X.F);
2264                                                 } else {
2265                                                         fputs(getvar_s(L.v), X.F);
2266                                                 }
2267
2268                                                 if (op1) fputs(getvar_s(intvar[OFS]), X.F);
2269                                         }
2270                                 }
2271                                 fputs(getvar_s(intvar[ORS]), X.F);
2272
2273                         } else {        /* OC_PRINTF */
2274                                 L.s = awk_printf(op1);
2275                                 fputs(L.s, X.F);
2276                                 free((char*)L.s);
2277                         }
2278                         fflush(X.F);
2279                         break;
2280
2281                 case XC( OC_DELETE ):
2282                         X.info = op1->info & OPCLSMASK;
2283                         if (X.info == OC_VAR) {
2284                                 R.v = op1->l.v;
2285                         } else if (X.info == OC_FNARG) {
2286                                 R.v = &fnargs[op1->l.i];
2287                         } else {
2288                                 syntax_error(EMSG_NOT_ARRAY);
2289                         }
2290
2291                         if (op1->r.n) {
2292                                 clrvar(L.v);
2293                                 L.s = getvar_s(evaluate(op1->r.n, v1));
2294                                 hash_remove(iamarray(R.v), L.s);
2295                         } else {
2296                                 clear_array(iamarray(R.v));
2297                         }
2298                         break;
2299
2300                 case XC( OC_NEWSOURCE ):
2301                         g_progname = op->l.s;
2302                         break;
2303
2304                 case XC( OC_RETURN ):
2305                         copyvar(res, L.v);
2306                         break;
2307
2308                 case XC( OC_NEXTFILE ):
2309                         nextfile = TRUE;
2310                 case XC( OC_NEXT ):
2311                         nextrec = TRUE;
2312                 case XC( OC_DONE ):
2313                         clrvar(res);
2314                         break;
2315
2316                 case XC( OC_EXIT ):
2317                         awk_exit(L.d);
2318
2319                 /* -- recursive node type -- */
2320
2321                 case XC( OC_VAR ):
2322                         L.v = op->l.v;
2323                         if (L.v == intvar[NF])
2324                                 split_f0();
2325                         goto v_cont;
2326
2327                 case XC( OC_FNARG ):
2328                         L.v = &fnargs[op->l.i];
2329  v_cont:
2330                         res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2331                         break;
2332
2333                 case XC( OC_IN ):
2334                         setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2335                         break;
2336
2337                 case XC( OC_REGEXP ):
2338                         op1 = op;
2339                         L.s = getvar_s(intvar[F0]);
2340                         goto re_cont;
2341
2342                 case XC( OC_MATCH ):
2343                         op1 = op->r.n;
2344  re_cont:
2345                         X.re = as_regex(op1, &sreg);
2346                         R.i = regexec(X.re, L.s, 0, NULL, 0);
2347                         if (X.re == &sreg) regfree(X.re);
2348                         setvar_i(res, (R.i == 0 ? 1 : 0) ^ (opn == '!' ? 1 : 0));
2349                         break;
2350
2351                 case XC( OC_MOVE ):
2352                         /* if source is a temporary string, jusk relink it to dest */
2353                         if (R.v == v1+1 && R.v->string) {
2354                                 res = setvar_p(L.v, R.v->string);
2355                                 R.v->string = NULL;
2356                         } else {
2357                                 res = copyvar(L.v, R.v);
2358                         }
2359                         break;
2360
2361                 case XC( OC_TERNARY ):
2362                         if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2363                                 syntax_error(EMSG_POSSIBLE_ERROR);
2364                         res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2365                         break;
2366
2367                 case XC( OC_FUNC ):
2368                         if (!op->r.f->body.first)
2369                                 syntax_error(EMSG_UNDEF_FUNC);
2370
2371                         X.v = R.v = nvalloc(op->r.f->nargs+1);
2372                         while (op1) {
2373                                 L.v = evaluate(nextarg(&op1), v1);
2374                                 copyvar(R.v, L.v);
2375                                 R.v->type |= VF_CHILD;
2376                                 R.v->x.parent = L.v;
2377                                 if (++R.v - X.v >= op->r.f->nargs)
2378                                         break;
2379                         }
2380
2381                         R.v = fnargs;
2382                         fnargs = X.v;
2383
2384                         L.s = g_progname;
2385                         res = evaluate(op->r.f->body.first, res);
2386                         g_progname = L.s;
2387
2388                         nvfree(fnargs);
2389                         fnargs = R.v;
2390                         break;
2391
2392                 case XC( OC_GETLINE ):
2393                 case XC( OC_PGETLINE ):
2394                         if (op1) {
2395                                 X.rsm = newfile(L.s);
2396                                 if (!X.rsm->F) {
2397                                         if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2398                                                 X.rsm->F = popen(L.s, "r");
2399                                                 X.rsm->is_pipe = TRUE;
2400                                         } else {
2401                                                 X.rsm->F = fopen(L.s, "r");             /* not xfopen! */
2402                                         }
2403                                 }
2404                         } else {
2405                                 if (!iF) iF = next_input_file();
2406                                 X.rsm = iF;
2407                         }
2408
2409                         if (!X.rsm->F) {
2410                                 setvar_i(intvar[ERRNO], errno);
2411                                 setvar_i(res, -1);
2412                                 break;
2413                         }
2414
2415                         if (!op->r.n)
2416                                 R.v = intvar[F0];
2417
2418                         L.i = awk_getline(X.rsm, R.v);
2419                         if (L.i > 0) {
2420                                 if (!op1) {
2421                                         incvar(intvar[FNR]);
2422                                         incvar(intvar[NR]);
2423                                 }
2424                         }
2425                         setvar_i(res, L.i);
2426                         break;
2427
2428                 /* simple builtins */
2429                 case XC( OC_FBLTIN ):
2430                         switch (opn) {
2431
2432                         case F_in:
2433                                 R.d = (int)L.d;
2434                                 break;
2435
2436                         case F_rn:
2437                                 R.d = (double)rand() / (double)RAND_MAX;
2438                                 break;
2439 #if ENABLE_FEATURE_AWK_MATH
2440                         case F_co:
2441                                 R.d = cos(L.d);
2442                                 break;
2443
2444                         case F_ex:
2445                                 R.d = exp(L.d);
2446                                 break;
2447
2448                         case F_lg:
2449                                 R.d = log(L.d);
2450                                 break;
2451
2452                         case F_si:
2453                                 R.d = sin(L.d);
2454                                 break;
2455
2456                         case F_sq:
2457                                 R.d = sqrt(L.d);
2458                                 break;
2459 #else
2460                         case F_co:
2461                         case F_ex:
2462                         case F_lg:
2463                         case F_si:
2464                         case F_sq:
2465                                 syntax_error(EMSG_NO_MATH);
2466                                 break;
2467 #endif
2468                         case F_sr:
2469                                 R.d = (double)seed;
2470                                 seed = op1 ? (unsigned)L.d : (unsigned)time(NULL);
2471                                 srand(seed);
2472                                 break;
2473
2474                         case F_ti:
2475                                 R.d = time(NULL);
2476                                 break;
2477
2478                         case F_le:
2479                                 if (!op1)
2480                                         L.s = getvar_s(intvar[F0]);
2481                                 R.d = strlen(L.s);
2482                                 break;
2483
2484                         case F_sy:
2485                                 fflush(NULL);
2486                                 R.d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2487                                                 ? (system(L.s) >> 8) : 0;
2488                                 break;
2489
2490                         case F_ff:
2491                                 if (!op1)
2492                                         fflush(stdout);
2493                                 else {
2494                                         if (L.s && *L.s) {
2495                                                 X.rsm = newfile(L.s);
2496                                                 fflush(X.rsm->F);
2497                                         } else {
2498                                                 fflush(NULL);
2499                                         }
2500                                 }
2501                                 break;
2502
2503                         case F_cl:
2504                                 X.rsm = (rstream *)hash_search(fdhash, L.s);
2505                                 if (X.rsm) {
2506                                         R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
2507                                         free(X.rsm->buffer);
2508                                         hash_remove(fdhash, L.s);
2509                                 }
2510                                 if (R.i != 0)
2511                                         setvar_i(intvar[ERRNO], errno);
2512                                 R.d = (double)R.i;
2513                                 break;
2514                         }
2515                         setvar_i(res, R.d);
2516                         break;
2517
2518                 case XC( OC_BUILTIN ):
2519                         res = exec_builtin(op, res);
2520                         break;
2521
2522                 case XC( OC_SPRINTF ):
2523                         setvar_p(res, awk_printf(op1));
2524                         break;
2525
2526                 case XC( OC_UNARY ):
2527                         X.v = R.v;
2528                         L.d = R.d = getvar_i(R.v);
2529                         switch (opn) {
2530                         case 'P':
2531                                 L.d = ++R.d;
2532                                 goto r_op_change;
2533                         case 'p':
2534                                 R.d++;
2535                                 goto r_op_change;
2536                         case 'M':
2537                                 L.d = --R.d;
2538                                 goto r_op_change;
2539                         case 'm':
2540                                 R.d--;
2541                                 goto r_op_change;
2542                         case '!':
2543                                 L.d = istrue(X.v) ? 0 : 1;
2544                                 break;
2545                         case '-':
2546                                 L.d = -R.d;
2547                                 break;
2548  r_op_change:
2549                                 setvar_i(X.v, R.d);
2550                         }
2551                         setvar_i(res, L.d);
2552                         break;
2553
2554                 case XC( OC_FIELD ):
2555                         R.i = (int)getvar_i(R.v);
2556                         if (R.i == 0) {
2557                                 res = intvar[F0];
2558                         } else {
2559                                 split_f0();
2560                                 if (R.i > nfields)
2561                                         fsrealloc(R.i);
2562                                 res = &Fields[R.i - 1];
2563                         }
2564                         break;
2565
2566                 /* concatenation (" ") and index joining (",") */
2567                 case XC( OC_CONCAT ):
2568                 case XC( OC_COMMA ):
2569                         opn = strlen(L.s) + strlen(R.s) + 2;
2570                         X.s = xmalloc(opn);
2571                         strcpy(X.s, L.s);
2572                         if ((opinfo & OPCLSMASK) == OC_COMMA) {
2573                                 L.s = getvar_s(intvar[SUBSEP]);
2574                                 X.s = xrealloc(X.s, opn + strlen(L.s));
2575                                 strcat(X.s, L.s);
2576                         }
2577                         strcat(X.s, R.s);
2578                         setvar_p(res, X.s);
2579                         break;
2580
2581                 case XC( OC_LAND ):
2582                         setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2583                         break;
2584
2585                 case XC( OC_LOR ):
2586                         setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2587                         break;
2588
2589                 case XC( OC_BINARY ):
2590                 case XC( OC_REPLACE ):
2591                         R.d = getvar_i(R.v);
2592                         switch (opn) {
2593                         case '+':
2594                                 L.d += R.d;
2595                                 break;
2596                         case '-':
2597                                 L.d -= R.d;
2598                                 break;
2599                         case '*':
2600                                 L.d *= R.d;
2601                                 break;
2602                         case '/':
2603                                 if (R.d == 0) syntax_error(EMSG_DIV_BY_ZERO);
2604                                 L.d /= R.d;
2605                                 break;
2606                         case '&':
2607 #if ENABLE_FEATURE_AWK_MATH
2608                                 L.d = pow(L.d, R.d);
2609 #else
2610                                 syntax_error(EMSG_NO_MATH);
2611 #endif
2612                                 break;
2613                         case '%':
2614                                 if (R.d == 0) syntax_error(EMSG_DIV_BY_ZERO);
2615                                 L.d -= (int)(L.d / R.d) * R.d;
2616                                 break;
2617                         }
2618                         res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2619                         break;
2620
2621                 case XC( OC_COMPARE ):
2622                         if (is_numeric(L.v) && is_numeric(R.v)) {
2623                                 L.d = getvar_i(L.v) - getvar_i(R.v);
2624                         } else {
2625                                 L.s = getvar_s(L.v);
2626                                 R.s = getvar_s(R.v);
2627                                 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2628                         }
2629                         switch (opn & 0xfe) {
2630                         case 0:
2631                                 R.i = (L.d > 0);
2632                                 break;
2633                         case 2:
2634                                 R.i = (L.d >= 0);
2635                                 break;
2636                         case 4:
2637                                 R.i = (L.d == 0);
2638                                 break;
2639                         }
2640                         setvar_i(res, (opn & 0x1 ? R.i : !R.i) ? 1 : 0);
2641                         break;
2642
2643                 default:
2644                         syntax_error(EMSG_POSSIBLE_ERROR);
2645                 }
2646                 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2647                         op = op->a.n;
2648                 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2649                         break;
2650                 if (nextrec)
2651                         break;
2652         }
2653         nvfree(v1);
2654         return res;
2655 #undef fnargs
2656 #undef seed
2657 #undef sreg
2658 }
2659
2660
2661 /* -------- main & co. -------- */
2662
2663 static int awk_exit(int r)
2664 {
2665         var tv;
2666         unsigned i;
2667         hash_item *hi;
2668
2669         zero_out_var(&tv);
2670
2671         if (!exiting) {
2672                 exiting = TRUE;
2673                 nextrec = FALSE;
2674                 evaluate(endseq.first, &tv);
2675         }
2676
2677         /* waiting for children */
2678         for (i = 0; i < fdhash->csize; i++) {
2679                 hi = fdhash->items[i];
2680                 while (hi) {
2681                         if (hi->data.rs.F && hi->data.rs.is_pipe)
2682                                 pclose(hi->data.rs.F);
2683                         hi = hi->next;
2684                 }
2685         }
2686
2687         exit(r);
2688 }
2689
2690 /* if expr looks like "var=value", perform assignment and return 1,
2691  * otherwise return 0 */
2692 static int is_assignment(const char *expr)
2693 {
2694         char *exprc, *s, *s0, *s1;
2695
2696         exprc = xstrdup(expr);
2697         if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2698                 free(exprc);
2699                 return FALSE;
2700         }
2701
2702         *(s++) = '\0';
2703         s0 = s1 = s;
2704         while (*s)
2705                 *(s1++) = nextchar(&s);
2706
2707         *s1 = '\0';
2708         setvar_u(newvar(exprc), s0);
2709         free(exprc);
2710         return TRUE;
2711 }
2712
2713 /* switch to next input file */
2714 static rstream *next_input_file(void)
2715 {
2716 #define rsm          (G.next_input_file__rsm)
2717 #define files_happen (G.next_input_file__files_happen)
2718
2719         FILE *F = NULL;
2720         const char *fname, *ind;
2721
2722         if (rsm.F) fclose(rsm.F);
2723         rsm.F = NULL;
2724         rsm.pos = rsm.adv = 0;
2725
2726         do {
2727                 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
2728                         if (files_happen)
2729                                 return NULL;
2730                         fname = "-";
2731                         F = stdin;
2732                 } else {
2733                         ind = getvar_s(incvar(intvar[ARGIND]));
2734                         fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
2735                         if (fname && *fname && !is_assignment(fname))
2736                                 F = xfopen_stdin(fname);
2737                 }
2738         } while (!F);
2739
2740         files_happen = TRUE;
2741         setvar_s(intvar[FILENAME], fname);
2742         rsm.F = F;
2743         return &rsm;
2744 #undef rsm
2745 #undef files_happen
2746 }
2747
2748 int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
2749 int awk_main(int argc, char **argv)
2750 {
2751         unsigned opt;
2752         char *opt_F, *opt_W;
2753         llist_t *list_v = NULL;
2754         llist_t *list_f = NULL;
2755         int i, j;
2756         var *v;
2757         var tv;
2758         char **envp;
2759         char *vnames = (char *)vNames; /* cheat */
2760         char *vvalues = (char *)vValues;
2761
2762         INIT_G();
2763
2764         /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
2765          * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
2766         if (ENABLE_LOCALE_SUPPORT)
2767                 setlocale(LC_NUMERIC, "C");
2768
2769         zero_out_var(&tv);
2770
2771         /* allocate global buffer */
2772         g_buf = xmalloc(MAXVARFMT + 1);
2773
2774         vhash = hash_init();
2775         ahash = hash_init();
2776         fdhash = hash_init();
2777         fnhash = hash_init();
2778
2779         /* initialize variables */
2780         for (i = 0; *vnames; i++) {
2781                 intvar[i] = v = newvar(nextword(&vnames));
2782                 if (*vvalues != '\377')
2783                         setvar_s(v, nextword(&vvalues));
2784                 else
2785                         setvar_i(v, 0);
2786
2787                 if (*vnames == '*') {
2788                         v->type |= VF_SPECIAL;
2789                         vnames++;
2790                 }
2791         }
2792
2793         handle_special(intvar[FS]);
2794         handle_special(intvar[RS]);
2795
2796         newfile("/dev/stdin")->F = stdin;
2797         newfile("/dev/stdout")->F = stdout;
2798         newfile("/dev/stderr")->F = stderr;
2799
2800         /* Huh, people report that sometimes environ is NULL. Oh well. */
2801         if (environ) for (envp = environ; *envp; envp++) {
2802                 /* environ is writable, thus we don't strdup it needlessly */
2803                 char *s = *envp;
2804                 char *s1 = strchr(s, '=');
2805                 if (s1) {
2806                         *s1 = '\0';
2807                         /* Both findvar and setvar_u take const char*
2808                          * as 2nd arg -> environment is not trashed */
2809                         setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
2810                         *s1 = '=';
2811                 }
2812         }
2813         opt_complementary = "v::f::"; /* -v and -f can occur multiple times */
2814         opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, &opt_W);
2815         argv += optind;
2816         argc -= optind;
2817         if (opt & 0x1)
2818                 setvar_s(intvar[FS], opt_F); // -F
2819         while (list_v) { /* -v */
2820                 if (!is_assignment(llist_pop(&list_v)))
2821                         bb_show_usage();
2822         }
2823         if (list_f) { /* -f */
2824                 do {
2825                         char *s = NULL;
2826                         FILE *from_file;
2827
2828                         g_progname = llist_pop(&list_f);
2829                         from_file = xfopen_stdin(g_progname);
2830                         /* one byte is reserved for some trick in next_token */
2831                         for (i = j = 1; j > 0; i += j) {
2832                                 s = xrealloc(s, i + 4096);
2833                                 j = fread(s + i, 1, 4094, from_file);
2834                         }
2835                         s[i] = '\0';
2836                         fclose(from_file);
2837                         parse_program(s + 1);
2838                         free(s);
2839                 } while (list_f);
2840         } else { // no -f: take program from 1st parameter
2841                 if (!argc)
2842                         bb_show_usage();
2843                 g_progname = "cmd. line";
2844                 parse_program(*argv++);
2845                 argc--;
2846         }
2847         if (opt & 0x8) // -W
2848                 bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
2849
2850         /* fill in ARGV array */
2851         setvar_i(intvar[ARGC], argc + 1);
2852         setari_u(intvar[ARGV], 0, "awk");
2853         i = 0;
2854         while (*argv)
2855                 setari_u(intvar[ARGV], ++i, *argv++);
2856
2857         evaluate(beginseq.first, &tv);
2858         if (!mainseq.first && !endseq.first)
2859                 awk_exit(EXIT_SUCCESS);
2860
2861         /* input file could already be opened in BEGIN block */
2862         if (!iF) iF = next_input_file();
2863
2864         /* passing through input files */
2865         while (iF) {
2866                 nextfile = FALSE;
2867                 setvar_i(intvar[FNR], 0);
2868
2869                 while ((i = awk_getline(iF, intvar[F0])) > 0) {
2870                         nextrec = FALSE;
2871                         incvar(intvar[NR]);
2872                         incvar(intvar[FNR]);
2873                         evaluate(mainseq.first, &tv);
2874
2875                         if (nextfile)
2876                                 break;
2877                 }
2878
2879                 if (i < 0)
2880                         syntax_error(strerror(errno));
2881
2882                 iF = next_input_file();
2883         }
2884
2885         awk_exit(EXIT_SUCCESS);
2886         /*return 0;*/
2887 }