awk: simple code shrink
[oweals/busybox.git] / editors / awk.c
1 /* vi: set sw=4 ts=4: */
2 /*
3  * awk implementation for busybox
4  *
5  * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
6  *
7  * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
8  */
9
10 #include "libbb.h"
11 #include "xregex.h"
12 #include <math.h>
13
14 /* This is a NOEXEC applet. Be very careful! */
15
16
17 #define MAXVARFMT       240
18 #define MINNVBLOCK      64
19
20 /* variable flags */
21 #define VF_NUMBER       0x0001  /* 1 = primary type is number */
22 #define VF_ARRAY        0x0002  /* 1 = it's an array */
23
24 #define VF_CACHED       0x0100  /* 1 = num/str value has cached str/num eq */
25 #define VF_USER         0x0200  /* 1 = user input (may be numeric string) */
26 #define VF_SPECIAL      0x0400  /* 1 = requires extra handling when changed */
27 #define VF_WALK         0x0800  /* 1 = variable has alloc'd x.walker list */
28 #define VF_FSTR         0x1000  /* 1 = var::string points to fstring buffer */
29 #define VF_CHILD        0x2000  /* 1 = function arg; x.parent points to source */
30 #define VF_DIRTY        0x4000  /* 1 = variable was set explicitly */
31
32 /* these flags are static, don't change them when value is changed */
33 #define VF_DONTTOUCH    (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
34
35 /* Variable */
36 typedef struct var_s {
37         unsigned type;            /* flags */
38         double number;
39         char *string;
40         union {
41                 int aidx;               /* func arg idx (for compilation stage) */
42                 struct xhash_s *array;  /* array ptr */
43                 struct var_s *parent;   /* for func args, ptr to actual parameter */
44                 char **walker;          /* list of array elements (for..in) */
45         } x;
46 } var;
47
48 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
49 typedef struct chain_s {
50         struct node_s *first;
51         struct node_s *last;
52         const char *programname;
53 } chain;
54
55 /* Function */
56 typedef struct func_s {
57         unsigned nargs;
58         struct chain_s body;
59 } func;
60
61 /* I/O stream */
62 typedef struct rstream_s {
63         FILE *F;
64         char *buffer;
65         int adv;
66         int size;
67         int pos;
68         smallint is_pipe;
69 } rstream;
70
71 typedef struct hash_item_s {
72         union {
73                 struct var_s v;         /* variable/array hash */
74                 struct rstream_s rs;    /* redirect streams hash */
75                 struct func_s f;        /* functions hash */
76         } data;
77         struct hash_item_s *next;       /* next in chain */
78         char name[1];                   /* really it's longer */
79 } hash_item;
80
81 typedef struct xhash_s {
82         unsigned nel;           /* num of elements */
83         unsigned csize;         /* current hash size */
84         unsigned nprime;        /* next hash size in PRIMES[] */
85         unsigned glen;          /* summary length of item names */
86         struct hash_item_s **items;
87 } xhash;
88
89 /* Tree node */
90 typedef struct node_s {
91         uint32_t info;
92         unsigned lineno;
93         union {
94                 struct node_s *n;
95                 var *v;
96                 int i;
97                 char *s;
98                 regex_t *re;
99         } l;
100         union {
101                 struct node_s *n;
102                 regex_t *ire;
103                 func *f;
104                 int argno;
105         } r;
106         union {
107                 struct node_s *n;
108         } a;
109 } node;
110
111 /* Block of temporary variables */
112 typedef struct nvblock_s {
113         int size;
114         var *pos;
115         struct nvblock_s *prev;
116         struct nvblock_s *next;
117         var nv[];
118 } nvblock;
119
120 typedef struct tsplitter_s {
121         node n;
122         regex_t re[2];
123 } tsplitter;
124
125 /* simple token classes */
126 /* Order and hex values are very important!!!  See next_token() */
127 #define TC_SEQSTART      1                              /* ( */
128 #define TC_SEQTERM      (1 << 1)                /* ) */
129 #define TC_REGEXP       (1 << 2)                /* /.../ */
130 #define TC_OUTRDR       (1 << 3)                /* | > >> */
131 #define TC_UOPPOST      (1 << 4)                /* unary postfix operator */
132 #define TC_UOPPRE1      (1 << 5)                /* unary prefix operator */
133 #define TC_BINOPX       (1 << 6)                /* two-opnd operator */
134 #define TC_IN           (1 << 7)
135 #define TC_COMMA        (1 << 8)
136 #define TC_PIPE         (1 << 9)                /* input redirection pipe */
137 #define TC_UOPPRE2      (1 << 10)               /* unary prefix operator */
138 #define TC_ARRTERM      (1 << 11)               /* ] */
139 #define TC_GRPSTART     (1 << 12)               /* { */
140 #define TC_GRPTERM      (1 << 13)               /* } */
141 #define TC_SEMICOL      (1 << 14)
142 #define TC_NEWLINE      (1 << 15)
143 #define TC_STATX        (1 << 16)               /* ctl statement (for, next...) */
144 #define TC_WHILE        (1 << 17)
145 #define TC_ELSE         (1 << 18)
146 #define TC_BUILTIN      (1 << 19)
147 #define TC_GETLINE      (1 << 20)
148 #define TC_FUNCDECL     (1 << 21)               /* `function' `func' */
149 #define TC_BEGIN        (1 << 22)
150 #define TC_END          (1 << 23)
151 #define TC_EOF          (1 << 24)
152 #define TC_VARIABLE     (1 << 25)
153 #define TC_ARRAY        (1 << 26)
154 #define TC_FUNCTION     (1 << 27)
155 #define TC_STRING       (1 << 28)
156 #define TC_NUMBER       (1 << 29)
157
158 #define TC_UOPPRE  (TC_UOPPRE1 | TC_UOPPRE2)
159
160 /* combined token classes */
161 #define TC_BINOP   (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
162 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
163 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
164                    | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
165
166 #define TC_STATEMNT (TC_STATX | TC_WHILE)
167 #define TC_OPTERM  (TC_SEMICOL | TC_NEWLINE)
168
169 /* word tokens, cannot mean something else if not expected */
170 #define TC_WORD    (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
171                    | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
172
173 /* discard newlines after these */
174 #define TC_NOTERM  (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
175                    | TC_BINOP | TC_OPTERM)
176
177 /* what can expression begin with */
178 #define TC_OPSEQ   (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
179 /* what can group begin with */
180 #define TC_GRPSEQ  (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
181
182 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
183 /* operator is inserted between them */
184 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
185                    | TC_STRING | TC_NUMBER | TC_UOPPOST)
186 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
187
188 #define OF_RES1    0x010000
189 #define OF_RES2    0x020000
190 #define OF_STR1    0x040000
191 #define OF_STR2    0x080000
192 #define OF_NUM1    0x100000
193 #define OF_CHECKED 0x200000
194
195 /* combined operator flags */
196 #define xx      0
197 #define xV      OF_RES2
198 #define xS      (OF_RES2 | OF_STR2)
199 #define Vx      OF_RES1
200 #define VV      (OF_RES1 | OF_RES2)
201 #define Nx      (OF_RES1 | OF_NUM1)
202 #define NV      (OF_RES1 | OF_NUM1 | OF_RES2)
203 #define Sx      (OF_RES1 | OF_STR1)
204 #define SV      (OF_RES1 | OF_STR1 | OF_RES2)
205 #define SS      (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
206
207 #define OPCLSMASK 0xFF00
208 #define OPNMASK   0x007F
209
210 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
211  * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
212  * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
213  */
214 #define P(x)      (x << 24)
215 #define PRIMASK   0x7F000000
216 #define PRIMASK2  0x7E000000
217
218 /* Operation classes */
219
220 #define SHIFT_TIL_THIS  0x0600
221 #define RECUR_FROM_THIS 0x1000
222
223 enum {
224         OC_DELETE = 0x0100,     OC_EXEC = 0x0200,       OC_NEWSOURCE = 0x0300,
225         OC_PRINT = 0x0400,      OC_PRINTF = 0x0500,     OC_WALKINIT = 0x0600,
226
227         OC_BR = 0x0700,         OC_BREAK = 0x0800,      OC_CONTINUE = 0x0900,
228         OC_EXIT = 0x0a00,       OC_NEXT = 0x0b00,       OC_NEXTFILE = 0x0c00,
229         OC_TEST = 0x0d00,       OC_WALKNEXT = 0x0e00,
230
231         OC_BINARY = 0x1000,     OC_BUILTIN = 0x1100,    OC_COLON = 0x1200,
232         OC_COMMA = 0x1300,      OC_COMPARE = 0x1400,    OC_CONCAT = 0x1500,
233         OC_FBLTIN = 0x1600,     OC_FIELD = 0x1700,      OC_FNARG = 0x1800,
234         OC_FUNC = 0x1900,       OC_GETLINE = 0x1a00,    OC_IN = 0x1b00,
235         OC_LAND = 0x1c00,       OC_LOR = 0x1d00,        OC_MATCH = 0x1e00,
236         OC_MOVE = 0x1f00,       OC_PGETLINE = 0x2000,   OC_REGEXP = 0x2100,
237         OC_REPLACE = 0x2200,    OC_RETURN = 0x2300,     OC_SPRINTF = 0x2400,
238         OC_TERNARY = 0x2500,    OC_UNARY = 0x2600,      OC_VAR = 0x2700,
239         OC_DONE = 0x2800,
240
241         ST_IF = 0x3000,         ST_DO = 0x3100,         ST_FOR = 0x3200,
242         ST_WHILE = 0x3300
243 };
244
245 /* simple builtins */
246 enum {
247         F_in,   F_rn,   F_co,   F_ex,   F_lg,   F_si,   F_sq,   F_sr,
248         F_ti,   F_le,   F_sy,   F_ff,   F_cl
249 };
250
251 /* builtins */
252 enum {
253         B_a2,   B_ix,   B_ma,   B_sp,   B_ss,   B_ti,   B_mt,   B_lo,   B_up,
254         B_ge,   B_gs,   B_su,
255         B_an,   B_co,   B_ls,   B_or,   B_rs,   B_xo,
256 };
257
258 /* tokens and their corresponding info values */
259
260 #define NTC     "\377"  /* switch to next token class (tc<<1) */
261 #define NTCC    '\377'
262
263 #define OC_B    OC_BUILTIN
264
265 static const char tokenlist[] ALIGN1 =
266         "\1("       NTC
267         "\1)"       NTC
268         "\1/"       NTC                                 /* REGEXP */
269         "\2>>"      "\1>"       "\1|"       NTC         /* OUTRDR */
270         "\2++"      "\2--"      NTC                     /* UOPPOST */
271         "\2++"      "\2--"      "\1$"       NTC         /* UOPPRE1 */
272         "\2=="      "\1="       "\2+="      "\2-="      /* BINOPX */
273         "\2*="      "\2/="      "\2%="      "\2^="
274         "\1+"       "\1-"       "\3**="     "\2**"
275         "\1/"       "\1%"       "\1^"       "\1*"
276         "\2!="      "\2>="      "\2<="      "\1>"
277         "\1<"       "\2!~"      "\1~"       "\2&&"
278         "\2||"      "\1?"       "\1:"       NTC
279         "\2in"      NTC
280         "\1,"       NTC
281         "\1|"       NTC
282         "\1+"       "\1-"       "\1!"       NTC         /* UOPPRE2 */
283         "\1]"       NTC
284         "\1{"       NTC
285         "\1}"       NTC
286         "\1;"       NTC
287         "\1\n"      NTC
288         "\2if"      "\2do"      "\3for"     "\5break"   /* STATX */
289         "\10continue"           "\6delete"  "\5print"
290         "\6printf"  "\4next"    "\10nextfile"
291         "\6return"  "\4exit"    NTC
292         "\5while"   NTC
293         "\4else"    NTC
294
295         "\3and"     "\5compl"   "\6lshift"  "\2or"
296         "\6rshift"  "\3xor"
297         "\5close"   "\6system"  "\6fflush"  "\5atan2"   /* BUILTIN */
298         "\3cos"     "\3exp"     "\3int"     "\3log"
299         "\4rand"    "\3sin"     "\4sqrt"    "\5srand"
300         "\6gensub"  "\4gsub"    "\5index"   "\6length"
301         "\5match"   "\5split"   "\7sprintf" "\3sub"
302         "\6substr"  "\7systime" "\10strftime" "\6mktime"
303         "\7tolower" "\7toupper" NTC
304         "\7getline" NTC
305         "\4func"    "\10function"   NTC
306         "\5BEGIN"   NTC
307         "\3END"     "\0"
308         ;
309
310 static const uint32_t tokeninfo[] = {
311         0,
312         0,
313         OC_REGEXP,
314         xS|'a',     xS|'w',     xS|'|',
315         OC_UNARY|xV|P(9)|'p',       OC_UNARY|xV|P(9)|'m',
316         OC_UNARY|xV|P(9)|'P',       OC_UNARY|xV|P(9)|'M',
317             OC_FIELD|xV|P(5),
318         OC_COMPARE|VV|P(39)|5,      OC_MOVE|VV|P(74),
319             OC_REPLACE|NV|P(74)|'+',    OC_REPLACE|NV|P(74)|'-',
320         OC_REPLACE|NV|P(74)|'*',    OC_REPLACE|NV|P(74)|'/',
321             OC_REPLACE|NV|P(74)|'%',    OC_REPLACE|NV|P(74)|'&',
322         OC_BINARY|NV|P(29)|'+',     OC_BINARY|NV|P(29)|'-',
323             OC_REPLACE|NV|P(74)|'&',    OC_BINARY|NV|P(15)|'&',
324         OC_BINARY|NV|P(25)|'/',     OC_BINARY|NV|P(25)|'%',
325             OC_BINARY|NV|P(15)|'&',     OC_BINARY|NV|P(25)|'*',
326         OC_COMPARE|VV|P(39)|4,      OC_COMPARE|VV|P(39)|3,
327             OC_COMPARE|VV|P(39)|0,      OC_COMPARE|VV|P(39)|1,
328         OC_COMPARE|VV|P(39)|2,      OC_MATCH|Sx|P(45)|'!',
329             OC_MATCH|Sx|P(45)|'~',      OC_LAND|Vx|P(55),
330         OC_LOR|Vx|P(59),            OC_TERNARY|Vx|P(64)|'?',
331             OC_COLON|xx|P(67)|':',
332         OC_IN|SV|P(49),
333         OC_COMMA|SS|P(80),
334         OC_PGETLINE|SV|P(37),
335         OC_UNARY|xV|P(19)|'+',      OC_UNARY|xV|P(19)|'-',
336             OC_UNARY|xV|P(19)|'!',
337         0,
338         0,
339         0,
340         0,
341         0,
342         ST_IF,          ST_DO,          ST_FOR,         OC_BREAK,
343         OC_CONTINUE,                    OC_DELETE|Vx,   OC_PRINT,
344         OC_PRINTF,      OC_NEXT,        OC_NEXTFILE,
345         OC_RETURN|Vx,   OC_EXIT|Nx,
346         ST_WHILE,
347         0,
348
349         OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
350         OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
351         OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
352         OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
353         OC_FBLTIN|F_rn,    OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
354         OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
355         OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF,        OC_B|B_su|P(0xb6),
356         OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti,    OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b),
357         OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
358         OC_GETLINE|SV|P(0),
359         0,      0,
360         0,
361         0
362 };
363
364 /* internal variable names and their initial values       */
365 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
366 enum {
367         CONVFMT,    OFMT,       FS,         OFS,
368         ORS,        RS,         RT,         FILENAME,
369         SUBSEP,     F0,         ARGIND,     ARGC,
370         ARGV,       ERRNO,      FNR,        NR,
371         NF,         IGNORECASE, ENVIRON,    NUM_INTERNAL_VARS
372 };
373
374 static const char vNames[] ALIGN1 =
375         "CONVFMT\0" "OFMT\0"    "FS\0*"     "OFS\0"
376         "ORS\0"     "RS\0*"     "RT\0"      "FILENAME\0"
377         "SUBSEP\0"  "$\0*"      "ARGIND\0"  "ARGC\0"
378         "ARGV\0"    "ERRNO\0"   "FNR\0"     "NR\0"
379         "NF\0*"     "IGNORECASE\0*" "ENVIRON\0" "\0";
380
381 static const char vValues[] ALIGN1 =
382         "%.6g\0"    "%.6g\0"    " \0"       " \0"
383         "\n\0"      "\n\0"      "\0"        "\0"
384         "\034\0"    "\0"        "\377";
385
386 /* hash size may grow to these values */
387 #define FIRST_PRIME 61
388 static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
389
390
391 /* Globals. Split in two parts so that first one is addressed
392  * with (mostly short) negative offsets.
393  * NB: it's unsafe to put members of type "double"
394  * into globals2 (gcc may fail to align them).
395  */
396 struct globals {
397         double t_double;
398         chain beginseq, mainseq, endseq;
399         chain *seq;
400         node *break_ptr, *continue_ptr;
401         rstream *iF;
402         xhash *vhash, *ahash, *fdhash, *fnhash;
403         const char *g_progname;
404         int g_lineno;
405         int nfields;
406         int maxfields; /* used in fsrealloc() only */
407         var *Fields;
408         nvblock *g_cb;
409         char *g_pos;
410         char *g_buf;
411         smallint icase;
412         smallint exiting;
413         smallint nextrec;
414         smallint nextfile;
415         smallint is_f0_split;
416 };
417 struct globals2 {
418         uint32_t t_info; /* often used */
419         uint32_t t_tclass;
420         char *t_string;
421         int t_lineno;
422         int t_rollback;
423
424         var *intvar[NUM_INTERNAL_VARS]; /* often used */
425
426         /* former statics from various functions */
427         char *split_f0__fstrings;
428
429         uint32_t next_token__save_tclass;
430         uint32_t next_token__save_info;
431         uint32_t next_token__ltclass;
432         smallint next_token__concat_inserted;
433
434         smallint next_input_file__files_happen;
435         rstream next_input_file__rsm;
436
437         var *evaluate__fnargs;
438         unsigned evaluate__seed;
439         regex_t evaluate__sreg;
440
441         var ptest__v;
442
443         tsplitter exec_builtin__tspl;
444
445         /* biggest and least used members go last */
446         tsplitter fsplitter, rsplitter;
447 };
448 #define G1 (ptr_to_globals[-1])
449 #define G (*(struct globals2 *)ptr_to_globals)
450 /* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
451 /*char G1size[sizeof(G1)]; - 0x74 */
452 /*char Gsize[sizeof(G)]; - 0x1c4 */
453 /* Trying to keep most of members accessible with short offsets: */
454 /*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
455 #define t_double     (G1.t_double    )
456 #define beginseq     (G1.beginseq    )
457 #define mainseq      (G1.mainseq     )
458 #define endseq       (G1.endseq      )
459 #define seq          (G1.seq         )
460 #define break_ptr    (G1.break_ptr   )
461 #define continue_ptr (G1.continue_ptr)
462 #define iF           (G1.iF          )
463 #define vhash        (G1.vhash       )
464 #define ahash        (G1.ahash       )
465 #define fdhash       (G1.fdhash      )
466 #define fnhash       (G1.fnhash      )
467 #define g_progname   (G1.g_progname  )
468 #define g_lineno     (G1.g_lineno    )
469 #define nfields      (G1.nfields     )
470 #define maxfields    (G1.maxfields   )
471 #define Fields       (G1.Fields      )
472 #define g_cb         (G1.g_cb        )
473 #define g_pos        (G1.g_pos       )
474 #define g_buf        (G1.g_buf       )
475 #define icase        (G1.icase       )
476 #define exiting      (G1.exiting     )
477 #define nextrec      (G1.nextrec     )
478 #define nextfile     (G1.nextfile    )
479 #define is_f0_split  (G1.is_f0_split )
480 #define t_info       (G.t_info      )
481 #define t_tclass     (G.t_tclass    )
482 #define t_string     (G.t_string    )
483 #define t_lineno     (G.t_lineno    )
484 #define t_rollback   (G.t_rollback  )
485 #define intvar       (G.intvar      )
486 #define fsplitter    (G.fsplitter   )
487 #define rsplitter    (G.rsplitter   )
488 #define INIT_G() do { \
489         SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
490         G.next_token__ltclass = TC_OPTERM; \
491         G.evaluate__seed = 1; \
492 } while (0)
493
494
495 /* function prototypes */
496 static void handle_special(var *);
497 static node *parse_expr(uint32_t);
498 static void chain_group(void);
499 static var *evaluate(node *, var *);
500 static rstream *next_input_file(void);
501 static int fmt_num(char *, int, const char *, double, int);
502 static int awk_exit(int) NORETURN;
503
504 /* ---- error handling ---- */
505
506 static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
507 static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
508 static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
509 static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
510 static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
511 static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin";
512 static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
513 static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
514 static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
515 #if !ENABLE_FEATURE_AWK_LIBM
516 static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
517 #endif
518
519 static void zero_out_var(var *vp)
520 {
521         memset(vp, 0, sizeof(*vp));
522 }
523
524 static void syntax_error(const char *message) NORETURN;
525 static void syntax_error(const char *message)
526 {
527         bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
528 }
529
530 /* ---- hash stuff ---- */
531
532 static unsigned hashidx(const char *name)
533 {
534         unsigned idx = 0;
535
536         while (*name)
537                 idx = *name++ + (idx << 6) - idx;
538         return idx;
539 }
540
541 /* create new hash */
542 static xhash *hash_init(void)
543 {
544         xhash *newhash;
545
546         newhash = xzalloc(sizeof(*newhash));
547         newhash->csize = FIRST_PRIME;
548         newhash->items = xzalloc(FIRST_PRIME * sizeof(newhash->items[0]));
549
550         return newhash;
551 }
552
553 /* find item in hash, return ptr to data, NULL if not found */
554 static void *hash_search(xhash *hash, const char *name)
555 {
556         hash_item *hi;
557
558         hi = hash->items[hashidx(name) % hash->csize];
559         while (hi) {
560                 if (strcmp(hi->name, name) == 0)
561                         return &(hi->data);
562                 hi = hi->next;
563         }
564         return NULL;
565 }
566
567 /* grow hash if it becomes too big */
568 static void hash_rebuild(xhash *hash)
569 {
570         unsigned newsize, i, idx;
571         hash_item **newitems, *hi, *thi;
572
573         if (hash->nprime == ARRAY_SIZE(PRIMES))
574                 return;
575
576         newsize = PRIMES[hash->nprime++];
577         newitems = xzalloc(newsize * sizeof(newitems[0]));
578
579         for (i = 0; i < hash->csize; i++) {
580                 hi = hash->items[i];
581                 while (hi) {
582                         thi = hi;
583                         hi = thi->next;
584                         idx = hashidx(thi->name) % newsize;
585                         thi->next = newitems[idx];
586                         newitems[idx] = thi;
587                 }
588         }
589
590         free(hash->items);
591         hash->csize = newsize;
592         hash->items = newitems;
593 }
594
595 /* find item in hash, add it if necessary. Return ptr to data */
596 static void *hash_find(xhash *hash, const char *name)
597 {
598         hash_item *hi;
599         unsigned idx;
600         int l;
601
602         hi = hash_search(hash, name);
603         if (!hi) {
604                 if (++hash->nel / hash->csize > 10)
605                         hash_rebuild(hash);
606
607                 l = strlen(name) + 1;
608                 hi = xzalloc(sizeof(*hi) + l);
609                 strcpy(hi->name, name);
610
611                 idx = hashidx(name) % hash->csize;
612                 hi->next = hash->items[idx];
613                 hash->items[idx] = hi;
614                 hash->glen += l;
615         }
616         return &(hi->data);
617 }
618
619 #define findvar(hash, name) ((var*)    hash_find((hash), (name)))
620 #define newvar(name)        ((var*)    hash_find(vhash, (name)))
621 #define newfile(name)       ((rstream*)hash_find(fdhash, (name)))
622 #define newfunc(name)       ((func*)   hash_find(fnhash, (name)))
623
624 static void hash_remove(xhash *hash, const char *name)
625 {
626         hash_item *hi, **phi;
627
628         phi = &(hash->items[hashidx(name) % hash->csize]);
629         while (*phi) {
630                 hi = *phi;
631                 if (strcmp(hi->name, name) == 0) {
632                         hash->glen -= (strlen(name) + 1);
633                         hash->nel--;
634                         *phi = hi->next;
635                         free(hi);
636                         break;
637                 }
638                 phi = &(hi->next);
639         }
640 }
641
642 /* ------ some useful functions ------ */
643
644 static void skip_spaces(char **s)
645 {
646         char *p = *s;
647
648         while (1) {
649                 if (*p == '\\' && p[1] == '\n') {
650                         p++;
651                         t_lineno++;
652                 } else if (*p != ' ' && *p != '\t') {
653                         break;
654                 }
655                 p++;
656         }
657         *s = p;
658 }
659
660 static char *nextword(char **s)
661 {
662         char *p = *s;
663         while (*(*s)++)
664                 continue;
665         return p;
666 }
667
668 static char nextchar(char **s)
669 {
670         char c, *pps;
671
672         c = *((*s)++);
673         pps = *s;
674         if (c == '\\')
675                 c = bb_process_escape_sequence((const char**)s);
676         if (c == '\\' && *s == pps)
677                 c = *((*s)++);
678         return c;
679 }
680
681 static ALWAYS_INLINE int isalnum_(int c)
682 {
683         return (isalnum(c) || c == '_');
684 }
685
686 static double my_strtod(char **pp)
687 {
688 #if ENABLE_DESKTOP
689         if ((*pp)[0] == '0'
690          && ((((*pp)[1] | 0x20) == 'x') || isdigit((*pp)[1]))
691         ) {
692                 return strtoull(*pp, pp, 0);
693         }
694 #endif
695         return strtod(*pp, pp);
696 }
697
698 /* -------- working with variables (set/get/copy/etc) -------- */
699
700 static xhash *iamarray(var *v)
701 {
702         var *a = v;
703
704         while (a->type & VF_CHILD)
705                 a = a->x.parent;
706
707         if (!(a->type & VF_ARRAY)) {
708                 a->type |= VF_ARRAY;
709                 a->x.array = hash_init();
710         }
711         return a->x.array;
712 }
713
714 static void clear_array(xhash *array)
715 {
716         unsigned i;
717         hash_item *hi, *thi;
718
719         for (i = 0; i < array->csize; i++) {
720                 hi = array->items[i];
721                 while (hi) {
722                         thi = hi;
723                         hi = hi->next;
724                         free(thi->data.v.string);
725                         free(thi);
726                 }
727                 array->items[i] = NULL;
728         }
729         array->glen = array->nel = 0;
730 }
731
732 /* clear a variable */
733 static var *clrvar(var *v)
734 {
735         if (!(v->type & VF_FSTR))
736                 free(v->string);
737
738         v->type &= VF_DONTTOUCH;
739         v->type |= VF_DIRTY;
740         v->string = NULL;
741         return v;
742 }
743
744 /* assign string value to variable */
745 static var *setvar_p(var *v, char *value)
746 {
747         clrvar(v);
748         v->string = value;
749         handle_special(v);
750         return v;
751 }
752
753 /* same as setvar_p but make a copy of string */
754 static var *setvar_s(var *v, const char *value)
755 {
756         return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
757 }
758
759 /* same as setvar_s but sets USER flag */
760 static var *setvar_u(var *v, const char *value)
761 {
762         v = setvar_s(v, value);
763         v->type |= VF_USER;
764         return v;
765 }
766
767 /* set array element to user string */
768 static void setari_u(var *a, int idx, const char *s)
769 {
770         var *v;
771
772         v = findvar(iamarray(a), itoa(idx));
773         setvar_u(v, s);
774 }
775
776 /* assign numeric value to variable */
777 static var *setvar_i(var *v, double value)
778 {
779         clrvar(v);
780         v->type |= VF_NUMBER;
781         v->number = value;
782         handle_special(v);
783         return v;
784 }
785
786 static const char *getvar_s(var *v)
787 {
788         /* if v is numeric and has no cached string, convert it to string */
789         if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
790                 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
791                 v->string = xstrdup(g_buf);
792                 v->type |= VF_CACHED;
793         }
794         return (v->string == NULL) ? "" : v->string;
795 }
796
797 static double getvar_i(var *v)
798 {
799         char *s;
800
801         if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
802                 v->number = 0;
803                 s = v->string;
804                 if (s && *s) {
805                         v->number = my_strtod(&s);
806                         if (v->type & VF_USER) {
807                                 skip_spaces(&s);
808                                 if (*s != '\0')
809                                         v->type &= ~VF_USER;
810                         }
811                 } else {
812                         v->type &= ~VF_USER;
813                 }
814                 v->type |= VF_CACHED;
815         }
816         return v->number;
817 }
818
819 /* Used for operands of bitwise ops */
820 static unsigned long getvar_i_int(var *v)
821 {
822         double d = getvar_i(v);
823
824         /* Casting doubles to longs is undefined for values outside
825          * of target type range. Try to widen it as much as possible */
826         if (d >= 0)
827                 return (unsigned long)d;
828         /* Why? Think about d == -4294967295.0 (assuming 32bit longs) */
829         return - (long) (unsigned long) (-d);
830 }
831
832 static var *copyvar(var *dest, const var *src)
833 {
834         if (dest != src) {
835                 clrvar(dest);
836                 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
837                 dest->number = src->number;
838                 if (src->string)
839                         dest->string = xstrdup(src->string);
840         }
841         handle_special(dest);
842         return dest;
843 }
844
845 static var *incvar(var *v)
846 {
847         return setvar_i(v, getvar_i(v) + 1.0);
848 }
849
850 /* return true if v is number or numeric string */
851 static int is_numeric(var *v)
852 {
853         getvar_i(v);
854         return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
855 }
856
857 /* return 1 when value of v corresponds to true, 0 otherwise */
858 static int istrue(var *v)
859 {
860         if (is_numeric(v))
861                 return (v->number != 0);
862         return (v->string && v->string[0]);
863 }
864
865 /* temporary variables allocator. Last allocated should be first freed */
866 static var *nvalloc(int n)
867 {
868         nvblock *pb = NULL;
869         var *v, *r;
870         int size;
871
872         while (g_cb) {
873                 pb = g_cb;
874                 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size)
875                         break;
876                 g_cb = g_cb->next;
877         }
878
879         if (!g_cb) {
880                 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
881                 g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
882                 g_cb->size = size;
883                 g_cb->pos = g_cb->nv;
884                 g_cb->prev = pb;
885                 /*g_cb->next = NULL; - xzalloc did it */
886                 if (pb)
887                         pb->next = g_cb;
888         }
889
890         v = r = g_cb->pos;
891         g_cb->pos += n;
892
893         while (v < g_cb->pos) {
894                 v->type = 0;
895                 v->string = NULL;
896                 v++;
897         }
898
899         return r;
900 }
901
902 static void nvfree(var *v)
903 {
904         var *p;
905
906         if (v < g_cb->nv || v >= g_cb->pos)
907                 syntax_error(EMSG_INTERNAL_ERROR);
908
909         for (p = v; p < g_cb->pos; p++) {
910                 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
911                         clear_array(iamarray(p));
912                         free(p->x.array->items);
913                         free(p->x.array);
914                 }
915                 if (p->type & VF_WALK) {
916                         //bb_error_msg("free(walker@%p:%p) #1", &p->x.walker, p->x.walker);
917                         free(p->x.walker);
918                 }
919
920                 clrvar(p);
921         }
922
923         g_cb->pos = v;
924         while (g_cb->prev && g_cb->pos == g_cb->nv) {
925                 g_cb = g_cb->prev;
926         }
927 }
928
929 /* ------- awk program text parsing ------- */
930
931 /* Parse next token pointed by global pos, place results into global ttt.
932  * If token isn't expected, give away. Return token class
933  */
934 static uint32_t next_token(uint32_t expected)
935 {
936 #define concat_inserted (G.next_token__concat_inserted)
937 #define save_tclass     (G.next_token__save_tclass)
938 #define save_info       (G.next_token__save_info)
939 /* Initialized to TC_OPTERM: */
940 #define ltclass         (G.next_token__ltclass)
941
942         char *p, *pp, *s;
943         const char *tl;
944         uint32_t tc;
945         const uint32_t *ti;
946         int l;
947
948         if (t_rollback) {
949                 t_rollback = FALSE;
950
951         } else if (concat_inserted) {
952                 concat_inserted = FALSE;
953                 t_tclass = save_tclass;
954                 t_info = save_info;
955
956         } else {
957                 p = g_pos;
958  readnext:
959                 skip_spaces(&p);
960                 g_lineno = t_lineno;
961                 if (*p == '#')
962                         while (*p != '\n' && *p != '\0')
963                                 p++;
964
965                 if (*p == '\n')
966                         t_lineno++;
967
968                 if (*p == '\0') {
969                         tc = TC_EOF;
970
971                 } else if (*p == '\"') {
972                         /* it's a string */
973                         t_string = s = ++p;
974                         while (*p != '\"') {
975                                 if (*p == '\0' || *p == '\n')
976                                         syntax_error(EMSG_UNEXP_EOS);
977                                 *(s++) = nextchar(&p);
978                         }
979                         p++;
980                         *s = '\0';
981                         tc = TC_STRING;
982
983                 } else if ((expected & TC_REGEXP) && *p == '/') {
984                         /* it's regexp */
985                         t_string = s = ++p;
986                         while (*p != '/') {
987                                 if (*p == '\0' || *p == '\n')
988                                         syntax_error(EMSG_UNEXP_EOS);
989                                 *s = *p++;
990                                 if (*s++ == '\\') {
991                                         pp = p;
992                                         *(s-1) = bb_process_escape_sequence((const char **)&p);
993                                         if (*pp == '\\')
994                                                 *s++ = '\\';
995                                         if (p == pp)
996                                                 *s++ = *p++;
997                                 }
998                         }
999                         p++;
1000                         *s = '\0';
1001                         tc = TC_REGEXP;
1002
1003                 } else if (*p == '.' || isdigit(*p)) {
1004                         /* it's a number */
1005                         t_double = my_strtod(&p);
1006                         if (*p == '.')
1007                                 syntax_error(EMSG_UNEXP_TOKEN);
1008                         tc = TC_NUMBER;
1009
1010                 } else {
1011                         /* search for something known */
1012                         tl = tokenlist;
1013                         tc = 0x00000001;
1014                         ti = tokeninfo;
1015                         while (*tl) {
1016                                 l = *(tl++);
1017                                 if (l == NTCC) {
1018                                         tc <<= 1;
1019                                         continue;
1020                                 }
1021                                 /* if token class is expected, token
1022                                  * matches and it's not a longer word,
1023                                  * then this is what we are looking for
1024                                  */
1025                                 if ((tc & (expected | TC_WORD | TC_NEWLINE))
1026                                  && *tl == *p && strncmp(p, tl, l) == 0
1027                                  && !((tc & TC_WORD) && isalnum_(p[l]))
1028                                 ) {
1029                                         t_info = *ti;
1030                                         p += l;
1031                                         break;
1032                                 }
1033                                 ti++;
1034                                 tl += l;
1035                         }
1036
1037                         if (!*tl) {
1038                                 /* it's a name (var/array/function),
1039                                  * otherwise it's something wrong
1040                                  */
1041                                 if (!isalnum_(*p))
1042                                         syntax_error(EMSG_UNEXP_TOKEN);
1043
1044                                 t_string = --p;
1045                                 while (isalnum_(*(++p))) {
1046                                         *(p-1) = *p;
1047                                 }
1048                                 *(p-1) = '\0';
1049                                 tc = TC_VARIABLE;
1050                                 /* also consume whitespace between functionname and bracket */
1051                                 if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY))
1052                                         skip_spaces(&p);
1053                                 if (*p == '(') {
1054                                         tc = TC_FUNCTION;
1055                                 } else {
1056                                         if (*p == '[') {
1057                                                 p++;
1058                                                 tc = TC_ARRAY;
1059                                         }
1060                                 }
1061                         }
1062                 }
1063                 g_pos = p;
1064
1065                 /* skipping newlines in some cases */
1066                 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1067                         goto readnext;
1068
1069                 /* insert concatenation operator when needed */
1070                 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1071                         concat_inserted = TRUE;
1072                         save_tclass = tc;
1073                         save_info = t_info;
1074                         tc = TC_BINOP;
1075                         t_info = OC_CONCAT | SS | P(35);
1076                 }
1077
1078                 t_tclass = tc;
1079         }
1080         ltclass = t_tclass;
1081
1082         /* Are we ready for this? */
1083         if (!(ltclass & expected))
1084                 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1085                                 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1086
1087         return ltclass;
1088 #undef concat_inserted
1089 #undef save_tclass
1090 #undef save_info
1091 #undef ltclass
1092 }
1093
1094 static void rollback_token(void)
1095 {
1096         t_rollback = TRUE;
1097 }
1098
1099 static node *new_node(uint32_t info)
1100 {
1101         node *n;
1102
1103         n = xzalloc(sizeof(node));
1104         n->info = info;
1105         n->lineno = g_lineno;
1106         return n;
1107 }
1108
1109 static node *mk_re_node(const char *s, node *n, regex_t *re)
1110 {
1111         n->info = OC_REGEXP;
1112         n->l.re = re;
1113         n->r.ire = re + 1;
1114         xregcomp(re, s, REG_EXTENDED);
1115         xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1116
1117         return n;
1118 }
1119
1120 static node *condition(void)
1121 {
1122         next_token(TC_SEQSTART);
1123         return parse_expr(TC_SEQTERM);
1124 }
1125
1126 /* parse expression terminated by given argument, return ptr
1127  * to built subtree. Terminator is eaten by parse_expr */
1128 static node *parse_expr(uint32_t iexp)
1129 {
1130         node sn;
1131         node *cn = &sn;
1132         node *vn, *glptr;
1133         uint32_t tc, xtc;
1134         var *v;
1135
1136         sn.info = PRIMASK;
1137         sn.r.n = glptr = NULL;
1138         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1139
1140         while (!((tc = next_token(xtc)) & iexp)) {
1141                 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1142                         /* input redirection (<) attached to glptr node */
1143                         cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1144                         cn->a.n = glptr;
1145                         xtc = TC_OPERAND | TC_UOPPRE;
1146                         glptr = NULL;
1147
1148                 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1149                         /* for binary and postfix-unary operators, jump back over
1150                          * previous operators with higher priority */
1151                         vn = cn;
1152                         while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1153                             || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON))
1154                         ) {
1155                                 vn = vn->a.n;
1156                         }
1157                         if ((t_info & OPCLSMASK) == OC_TERNARY)
1158                                 t_info += P(6);
1159                         cn = vn->a.n->r.n = new_node(t_info);
1160                         cn->a.n = vn->a.n;
1161                         if (tc & TC_BINOP) {
1162                                 cn->l.n = vn;
1163                                 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1164                                 if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1165                                         /* it's a pipe */
1166                                         next_token(TC_GETLINE);
1167                                         /* give maximum priority to this pipe */
1168                                         cn->info &= ~PRIMASK;
1169                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1170                                 }
1171                         } else {
1172                                 cn->r.n = vn;
1173                                 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1174                         }
1175                         vn->a.n = cn;
1176
1177                 } else {
1178                         /* for operands and prefix-unary operators, attach them
1179                          * to last node */
1180                         vn = cn;
1181                         cn = vn->r.n = new_node(t_info);
1182                         cn->a.n = vn;
1183                         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1184                         if (tc & (TC_OPERAND | TC_REGEXP)) {
1185                                 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1186                                 /* one should be very careful with switch on tclass -
1187                                  * only simple tclasses should be used! */
1188                                 switch (tc) {
1189                                 case TC_VARIABLE:
1190                                 case TC_ARRAY:
1191                                         cn->info = OC_VAR;
1192                                         v = hash_search(ahash, t_string);
1193                                         if (v != NULL) {
1194                                                 cn->info = OC_FNARG;
1195                                                 cn->l.i = v->x.aidx;
1196                                         } else {
1197                                                 cn->l.v = newvar(t_string);
1198                                         }
1199                                         if (tc & TC_ARRAY) {
1200                                                 cn->info |= xS;
1201                                                 cn->r.n = parse_expr(TC_ARRTERM);
1202                                         }
1203                                         break;
1204
1205                                 case TC_NUMBER:
1206                                 case TC_STRING:
1207                                         cn->info = OC_VAR;
1208                                         v = cn->l.v = xzalloc(sizeof(var));
1209                                         if (tc & TC_NUMBER)
1210                                                 setvar_i(v, t_double);
1211                                         else
1212                                                 setvar_s(v, t_string);
1213                                         break;
1214
1215                                 case TC_REGEXP:
1216                                         mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1217                                         break;
1218
1219                                 case TC_FUNCTION:
1220                                         cn->info = OC_FUNC;
1221                                         cn->r.f = newfunc(t_string);
1222                                         cn->l.n = condition();
1223                                         break;
1224
1225                                 case TC_SEQSTART:
1226                                         cn = vn->r.n = parse_expr(TC_SEQTERM);
1227                                         cn->a.n = vn;
1228                                         break;
1229
1230                                 case TC_GETLINE:
1231                                         glptr = cn;
1232                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1233                                         break;
1234
1235                                 case TC_BUILTIN:
1236                                         cn->l.n = condition();
1237                                         break;
1238                                 }
1239                         }
1240                 }
1241         }
1242         return sn.r.n;
1243 }
1244
1245 /* add node to chain. Return ptr to alloc'd node */
1246 static node *chain_node(uint32_t info)
1247 {
1248         node *n;
1249
1250         if (!seq->first)
1251                 seq->first = seq->last = new_node(0);
1252
1253         if (seq->programname != g_progname) {
1254                 seq->programname = g_progname;
1255                 n = chain_node(OC_NEWSOURCE);
1256                 n->l.s = xstrdup(g_progname);
1257         }
1258
1259         n = seq->last;
1260         n->info = info;
1261         seq->last = n->a.n = new_node(OC_DONE);
1262
1263         return n;
1264 }
1265
1266 static void chain_expr(uint32_t info)
1267 {
1268         node *n;
1269
1270         n = chain_node(info);
1271         n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1272         if (t_tclass & TC_GRPTERM)
1273                 rollback_token();
1274 }
1275
1276 static node *chain_loop(node *nn)
1277 {
1278         node *n, *n2, *save_brk, *save_cont;
1279
1280         save_brk = break_ptr;
1281         save_cont = continue_ptr;
1282
1283         n = chain_node(OC_BR | Vx);
1284         continue_ptr = new_node(OC_EXEC);
1285         break_ptr = new_node(OC_EXEC);
1286         chain_group();
1287         n2 = chain_node(OC_EXEC | Vx);
1288         n2->l.n = nn;
1289         n2->a.n = n;
1290         continue_ptr->a.n = n2;
1291         break_ptr->a.n = n->r.n = seq->last;
1292
1293         continue_ptr = save_cont;
1294         break_ptr = save_brk;
1295
1296         return n;
1297 }
1298
1299 /* parse group and attach it to chain */
1300 static void chain_group(void)
1301 {
1302         uint32_t c;
1303         node *n, *n2, *n3;
1304
1305         do {
1306                 c = next_token(TC_GRPSEQ);
1307         } while (c & TC_NEWLINE);
1308
1309         if (c & TC_GRPSTART) {
1310                 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1311                         if (t_tclass & TC_NEWLINE) continue;
1312                         rollback_token();
1313                         chain_group();
1314                 }
1315         } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1316                 rollback_token();
1317                 chain_expr(OC_EXEC | Vx);
1318         } else {                                                /* TC_STATEMNT */
1319                 switch (t_info & OPCLSMASK) {
1320                 case ST_IF:
1321                         n = chain_node(OC_BR | Vx);
1322                         n->l.n = condition();
1323                         chain_group();
1324                         n2 = chain_node(OC_EXEC);
1325                         n->r.n = seq->last;
1326                         if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1327                                 chain_group();
1328                                 n2->a.n = seq->last;
1329                         } else {
1330                                 rollback_token();
1331                         }
1332                         break;
1333
1334                 case ST_WHILE:
1335                         n2 = condition();
1336                         n = chain_loop(NULL);
1337                         n->l.n = n2;
1338                         break;
1339
1340                 case ST_DO:
1341                         n2 = chain_node(OC_EXEC);
1342                         n = chain_loop(NULL);
1343                         n2->a.n = n->a.n;
1344                         next_token(TC_WHILE);
1345                         n->l.n = condition();
1346                         break;
1347
1348                 case ST_FOR:
1349                         next_token(TC_SEQSTART);
1350                         n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1351                         if (t_tclass & TC_SEQTERM) {    /* for-in */
1352                                 if ((n2->info & OPCLSMASK) != OC_IN)
1353                                         syntax_error(EMSG_UNEXP_TOKEN);
1354                                 n = chain_node(OC_WALKINIT | VV);
1355                                 n->l.n = n2->l.n;
1356                                 n->r.n = n2->r.n;
1357                                 n = chain_loop(NULL);
1358                                 n->info = OC_WALKNEXT | Vx;
1359                                 n->l.n = n2->l.n;
1360                         } else {                        /* for (;;) */
1361                                 n = chain_node(OC_EXEC | Vx);
1362                                 n->l.n = n2;
1363                                 n2 = parse_expr(TC_SEMICOL);
1364                                 n3 = parse_expr(TC_SEQTERM);
1365                                 n = chain_loop(n3);
1366                                 n->l.n = n2;
1367                                 if (!n2)
1368                                         n->info = OC_EXEC;
1369                         }
1370                         break;
1371
1372                 case OC_PRINT:
1373                 case OC_PRINTF:
1374                         n = chain_node(t_info);
1375                         n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1376                         if (t_tclass & TC_OUTRDR) {
1377                                 n->info |= t_info;
1378                                 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1379                         }
1380                         if (t_tclass & TC_GRPTERM)
1381                                 rollback_token();
1382                         break;
1383
1384                 case OC_BREAK:
1385                         n = chain_node(OC_EXEC);
1386                         n->a.n = break_ptr;
1387                         break;
1388
1389                 case OC_CONTINUE:
1390                         n = chain_node(OC_EXEC);
1391                         n->a.n = continue_ptr;
1392                         break;
1393
1394                 /* delete, next, nextfile, return, exit */
1395                 default:
1396                         chain_expr(t_info);
1397                 }
1398         }
1399 }
1400
1401 static void parse_program(char *p)
1402 {
1403         uint32_t tclass;
1404         node *cn;
1405         func *f;
1406         var *v;
1407
1408         g_pos = p;
1409         t_lineno = 1;
1410         while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1411                         TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1412
1413                 if (tclass & TC_OPTERM)
1414                         continue;
1415
1416                 seq = &mainseq;
1417                 if (tclass & TC_BEGIN) {
1418                         seq = &beginseq;
1419                         chain_group();
1420
1421                 } else if (tclass & TC_END) {
1422                         seq = &endseq;
1423                         chain_group();
1424
1425                 } else if (tclass & TC_FUNCDECL) {
1426                         next_token(TC_FUNCTION);
1427                         g_pos++;
1428                         f = newfunc(t_string);
1429                         f->body.first = NULL;
1430                         f->nargs = 0;
1431                         while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1432                                 v = findvar(ahash, t_string);
1433                                 v->x.aidx = (f->nargs)++;
1434
1435                                 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1436                                         break;
1437                         }
1438                         seq = &(f->body);
1439                         chain_group();
1440                         clear_array(ahash);
1441
1442                 } else if (tclass & TC_OPSEQ) {
1443                         rollback_token();
1444                         cn = chain_node(OC_TEST);
1445                         cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1446                         if (t_tclass & TC_GRPSTART) {
1447                                 rollback_token();
1448                                 chain_group();
1449                         } else {
1450                                 chain_node(OC_PRINT);
1451                         }
1452                         cn->r.n = mainseq.last;
1453
1454                 } else /* if (tclass & TC_GRPSTART) */ {
1455                         rollback_token();
1456                         chain_group();
1457                 }
1458         }
1459 }
1460
1461
1462 /* -------- program execution part -------- */
1463
1464 static node *mk_splitter(const char *s, tsplitter *spl)
1465 {
1466         regex_t *re, *ire;
1467         node *n;
1468
1469         re = &spl->re[0];
1470         ire = &spl->re[1];
1471         n = &spl->n;
1472         if ((n->info & OPCLSMASK) == OC_REGEXP) {
1473                 regfree(re);
1474                 regfree(ire); // TODO: nuke ire, use re+1?
1475         }
1476         if (strlen(s) > 1) {
1477                 mk_re_node(s, n, re);
1478         } else {
1479                 n->info = (uint32_t) *s;
1480         }
1481
1482         return n;
1483 }
1484
1485 /* use node as a regular expression. Supplied with node ptr and regex_t
1486  * storage space. Return ptr to regex (if result points to preg, it should
1487  * be later regfree'd manually
1488  */
1489 static regex_t *as_regex(node *op, regex_t *preg)
1490 {
1491         int cflags;
1492         var *v;
1493         const char *s;
1494
1495         if ((op->info & OPCLSMASK) == OC_REGEXP) {
1496                 return icase ? op->r.ire : op->l.re;
1497         }
1498         v = nvalloc(1);
1499         s = getvar_s(evaluate(op, v));
1500
1501         cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
1502         /* Testcase where REG_EXTENDED fails (unpaired '{'):
1503          * echo Hi | awk 'gsub("@(samp|code|file)\{","");'
1504          * gawk 3.1.5 eats this. We revert to ~REG_EXTENDED
1505          * (maybe gsub is not supposed to use REG_EXTENDED?).
1506          */
1507         if (regcomp(preg, s, cflags)) {
1508                 cflags &= ~REG_EXTENDED;
1509                 xregcomp(preg, s, cflags);
1510         }
1511         nvfree(v);
1512         return preg;
1513 }
1514
1515 /* gradually increasing buffer */
1516 static char* qrealloc(char *b, int n, int *size)
1517 {
1518         if (!b || n >= *size) {
1519                 *size = n + (n>>1) + 80;
1520                 b = xrealloc(b, *size);
1521         }
1522         return b;
1523 }
1524
1525 /* resize field storage space */
1526 static void fsrealloc(int size)
1527 {
1528         int i;
1529
1530         if (size >= maxfields) {
1531                 i = maxfields;
1532                 maxfields = size + 16;
1533                 Fields = xrealloc(Fields, maxfields * sizeof(var));
1534                 for (; i < maxfields; i++) {
1535                         Fields[i].type = VF_SPECIAL;
1536                         Fields[i].string = NULL;
1537                 }
1538         }
1539
1540         if (size < nfields) {
1541                 for (i = size; i < nfields; i++) {
1542                         clrvar(Fields + i);
1543                 }
1544         }
1545         nfields = size;
1546 }
1547
1548 static int awk_split(const char *s, node *spl, char **slist)
1549 {
1550         int l, n = 0;
1551         char c[4];
1552         char *s1;
1553         regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1554
1555         /* in worst case, each char would be a separate field */
1556         *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1557         strcpy(s1, s);
1558
1559         c[0] = c[1] = (char)spl->info;
1560         c[2] = c[3] = '\0';
1561         if (*getvar_s(intvar[RS]) == '\0')
1562                 c[2] = '\n';
1563
1564         if ((spl->info & OPCLSMASK) == OC_REGEXP) {  /* regex split */
1565                 if (!*s)
1566                         return n; /* "": zero fields */
1567                 n++; /* at least one field will be there */
1568                 do {
1569                         l = strcspn(s, c+2); /* len till next NUL or \n */
1570                         if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1571                          && pmatch[0].rm_so <= l
1572                         ) {
1573                                 l = pmatch[0].rm_so;
1574                                 if (pmatch[0].rm_eo == 0) {
1575                                         l++;
1576                                         pmatch[0].rm_eo++;
1577                                 }
1578                                 n++; /* we saw yet another delimiter */
1579                         } else {
1580                                 pmatch[0].rm_eo = l;
1581                                 if (s[l])
1582                                         pmatch[0].rm_eo++;
1583                         }
1584                         memcpy(s1, s, l);
1585                         /* make sure we remove *all* of the separator chars */
1586                         do {
1587                                 s1[l] = '\0';
1588                         } while (++l < pmatch[0].rm_eo);
1589                         nextword(&s1);
1590                         s += pmatch[0].rm_eo;
1591                 } while (*s);
1592                 return n;
1593         }
1594         if (c[0] == '\0') {  /* null split */
1595                 while (*s) {
1596                         *s1++ = *s++;
1597                         *s1++ = '\0';
1598                         n++;
1599                 }
1600                 return n;
1601         }
1602         if (c[0] != ' ') {  /* single-character split */
1603                 if (icase) {
1604                         c[0] = toupper(c[0]);
1605                         c[1] = tolower(c[1]);
1606                 }
1607                 if (*s1) n++;
1608                 while ((s1 = strpbrk(s1, c))) {
1609                         *s1++ = '\0';
1610                         n++;
1611                 }
1612                 return n;
1613         }
1614         /* space split */
1615         while (*s) {
1616                 s = skip_whitespace(s);
1617                 if (!*s) break;
1618                 n++;
1619                 while (*s && !isspace(*s))
1620                         *s1++ = *s++;
1621                 *s1++ = '\0';
1622         }
1623         return n;
1624 }
1625
1626 static void split_f0(void)
1627 {
1628 /* static char *fstrings; */
1629 #define fstrings (G.split_f0__fstrings)
1630
1631         int i, n;
1632         char *s;
1633
1634         if (is_f0_split)
1635                 return;
1636
1637         is_f0_split = TRUE;
1638         free(fstrings);
1639         fsrealloc(0);
1640         n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1641         fsrealloc(n);
1642         s = fstrings;
1643         for (i = 0; i < n; i++) {
1644                 Fields[i].string = nextword(&s);
1645                 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1646         }
1647
1648         /* set NF manually to avoid side effects */
1649         clrvar(intvar[NF]);
1650         intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1651         intvar[NF]->number = nfields;
1652 #undef fstrings
1653 }
1654
1655 /* perform additional actions when some internal variables changed */
1656 static void handle_special(var *v)
1657 {
1658         int n;
1659         char *b;
1660         const char *sep, *s;
1661         int sl, l, len, i, bsize;
1662
1663         if (!(v->type & VF_SPECIAL))
1664                 return;
1665
1666         if (v == intvar[NF]) {
1667                 n = (int)getvar_i(v);
1668                 fsrealloc(n);
1669
1670                 /* recalculate $0 */
1671                 sep = getvar_s(intvar[OFS]);
1672                 sl = strlen(sep);
1673                 b = NULL;
1674                 len = 0;
1675                 for (i = 0; i < n; i++) {
1676                         s = getvar_s(&Fields[i]);
1677                         l = strlen(s);
1678                         if (b) {
1679                                 memcpy(b+len, sep, sl);
1680                                 len += sl;
1681                         }
1682                         b = qrealloc(b, len+l+sl, &bsize);
1683                         memcpy(b+len, s, l);
1684                         len += l;
1685                 }
1686                 if (b)
1687                         b[len] = '\0';
1688                 setvar_p(intvar[F0], b);
1689                 is_f0_split = TRUE;
1690
1691         } else if (v == intvar[F0]) {
1692                 is_f0_split = FALSE;
1693
1694         } else if (v == intvar[FS]) {
1695                 mk_splitter(getvar_s(v), &fsplitter);
1696
1697         } else if (v == intvar[RS]) {
1698                 mk_splitter(getvar_s(v), &rsplitter);
1699
1700         } else if (v == intvar[IGNORECASE]) {
1701                 icase = istrue(v);
1702
1703         } else {                                /* $n */
1704                 n = getvar_i(intvar[NF]);
1705                 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1706                 /* right here v is invalid. Just to note... */
1707         }
1708 }
1709
1710 /* step through func/builtin/etc arguments */
1711 static node *nextarg(node **pn)
1712 {
1713         node *n;
1714
1715         n = *pn;
1716         if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1717                 *pn = n->r.n;
1718                 n = n->l.n;
1719         } else {
1720                 *pn = NULL;
1721         }
1722         return n;
1723 }
1724
1725 static void hashwalk_init(var *v, xhash *array)
1726 {
1727         char **w;
1728         hash_item *hi;
1729         unsigned i;
1730         char **prev_walker = (v->type & VF_WALK) ? v->x.walker : NULL;
1731
1732         v->type |= VF_WALK;
1733
1734         /* walker structure is: "[ptr2end][ptr2start][prev]<word1>NUL<word2>NUL" */
1735         w = v->x.walker = xzalloc(2 + 3*sizeof(char *) + array->glen);
1736         //bb_error_msg("walker@%p=%p", &v->x.walker, v->x.walker);
1737         w[0] = w[1] = (char *)(w + 3);
1738         w[2] = (char *)prev_walker;
1739         for (i = 0; i < array->csize; i++) {
1740                 hi = array->items[i];
1741                 while (hi) {
1742                         strcpy(w[0], hi->name);
1743                         nextword(&w[0]);
1744                         hi = hi->next;
1745                 }
1746         }
1747 }
1748
1749 static int hashwalk_next(var *v)
1750 {
1751         char **w;
1752
1753         w = v->x.walker;
1754         if (w[1] == w[0]) {
1755                 char **prev_walker = (char**)w[2];
1756
1757                 //bb_error_msg("free(walker@%p:%p) #3, restoring to %p", &v->x.walker, v->x.walker, prev_walker);
1758                 free(v->x.walker);
1759                 v->x.walker = prev_walker;
1760                 return FALSE;
1761         }
1762
1763         setvar_s(v, nextword(&w[1]));
1764         return TRUE;
1765 }
1766
1767 /* evaluate node, return 1 when result is true, 0 otherwise */
1768 static int ptest(node *pattern)
1769 {
1770         /* ptest__v is "static": to save stack space? */
1771         return istrue(evaluate(pattern, &G.ptest__v));
1772 }
1773
1774 /* read next record from stream rsm into a variable v */
1775 static int awk_getline(rstream *rsm, var *v)
1776 {
1777         char *b;
1778         regmatch_t pmatch[2];
1779         int a, p, pp=0, size;
1780         int fd, so, eo, r, rp;
1781         char c, *m, *s;
1782
1783         /* we're using our own buffer since we need access to accumulating
1784          * characters
1785          */
1786         fd = fileno(rsm->F);
1787         m = rsm->buffer;
1788         a = rsm->adv;
1789         p = rsm->pos;
1790         size = rsm->size;
1791         c = (char) rsplitter.n.info;
1792         rp = 0;
1793
1794         if (!m)
1795                 m = qrealloc(m, 256, &size);
1796         do {
1797                 b = m + a;
1798                 so = eo = p;
1799                 r = 1;
1800                 if (p > 0) {
1801                         if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1802                                 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1803                                                         b, 1, pmatch, 0) == 0) {
1804                                         so = pmatch[0].rm_so;
1805                                         eo = pmatch[0].rm_eo;
1806                                         if (b[eo] != '\0')
1807                                                 break;
1808                                 }
1809                         } else if (c != '\0') {
1810                                 s = strchr(b+pp, c);
1811                                 if (!s) s = memchr(b+pp, '\0', p - pp);
1812                                 if (s) {
1813                                         so = eo = s-b;
1814                                         eo++;
1815                                         break;
1816                                 }
1817                         } else {
1818                                 while (b[rp] == '\n')
1819                                         rp++;
1820                                 s = strstr(b+rp, "\n\n");
1821                                 if (s) {
1822                                         so = eo = s-b;
1823                                         while (b[eo] == '\n') eo++;
1824                                         if (b[eo] != '\0')
1825                                                 break;
1826                                 }
1827                         }
1828                 }
1829
1830                 if (a > 0) {
1831                         memmove(m, (const void *)(m+a), p+1);
1832                         b = m;
1833                         a = 0;
1834                 }
1835
1836                 m = qrealloc(m, a+p+128, &size);
1837                 b = m + a;
1838                 pp = p;
1839                 p += safe_read(fd, b+p, size-p-1);
1840                 if (p < pp) {
1841                         p = 0;
1842                         r = 0;
1843                         setvar_i(intvar[ERRNO], errno);
1844                 }
1845                 b[p] = '\0';
1846
1847         } while (p > pp);
1848
1849         if (p == 0) {
1850                 r--;
1851         } else {
1852                 c = b[so]; b[so] = '\0';
1853                 setvar_s(v, b+rp);
1854                 v->type |= VF_USER;
1855                 b[so] = c;
1856                 c = b[eo]; b[eo] = '\0';
1857                 setvar_s(intvar[RT], b+so);
1858                 b[eo] = c;
1859         }
1860
1861         rsm->buffer = m;
1862         rsm->adv = a + eo;
1863         rsm->pos = p - eo;
1864         rsm->size = size;
1865
1866         return r;
1867 }
1868
1869 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1870 {
1871         int r = 0;
1872         char c;
1873         const char *s = format;
1874
1875         if (int_as_int && n == (int)n) {
1876                 r = snprintf(b, size, "%d", (int)n);
1877         } else {
1878                 do { c = *s; } while (c && *++s);
1879                 if (strchr("diouxX", c)) {
1880                         r = snprintf(b, size, format, (int)n);
1881                 } else if (strchr("eEfgG", c)) {
1882                         r = snprintf(b, size, format, n);
1883                 } else {
1884                         syntax_error(EMSG_INV_FMT);
1885                 }
1886         }
1887         return r;
1888 }
1889
1890 /* formatted output into an allocated buffer, return ptr to buffer */
1891 static char *awk_printf(node *n)
1892 {
1893         char *b = NULL;
1894         char *fmt, *s, *f;
1895         const char *s1;
1896         int i, j, incr, bsize;
1897         char c, c1;
1898         var *v, *arg;
1899
1900         v = nvalloc(1);
1901         fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1902
1903         i = 0;
1904         while (*f) {
1905                 s = f;
1906                 while (*f && (*f != '%' || *(++f) == '%'))
1907                         f++;
1908                 while (*f && !isalpha(*f)) {
1909                         if (*f == '*')
1910                                 syntax_error("%*x formats are not supported");
1911                         f++;
1912                 }
1913
1914                 incr = (f - s) + MAXVARFMT;
1915                 b = qrealloc(b, incr + i, &bsize);
1916                 c = *f;
1917                 if (c != '\0') f++;
1918                 c1 = *f;
1919                 *f = '\0';
1920                 arg = evaluate(nextarg(&n), v);
1921
1922                 j = i;
1923                 if (c == 'c' || !c) {
1924                         i += sprintf(b+i, s, is_numeric(arg) ?
1925                                         (char)getvar_i(arg) : *getvar_s(arg));
1926                 } else if (c == 's') {
1927                         s1 = getvar_s(arg);
1928                         b = qrealloc(b, incr+i+strlen(s1), &bsize);
1929                         i += sprintf(b+i, s, s1);
1930                 } else {
1931                         i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1932                 }
1933                 *f = c1;
1934
1935                 /* if there was an error while sprintf, return value is negative */
1936                 if (i < j) i = j;
1937         }
1938
1939         b = xrealloc(b, i + 1);
1940         free(fmt);
1941         nvfree(v);
1942         b[i] = '\0';
1943         return b;
1944 }
1945
1946 /* common substitution routine
1947  * replace (nm) substring of (src) that match (n) with (repl), store
1948  * result into (dest), return number of substitutions. If nm=0, replace
1949  * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1950  * subexpression matching (\1-\9)
1951  */
1952 static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int ex)
1953 {
1954         char *ds = NULL;
1955         const char *s;
1956         const char *sp;
1957         int c, i, j, di, rl, so, eo, nbs, n, dssize;
1958         regmatch_t pmatch[10];
1959         regex_t sreg, *re;
1960
1961         re = as_regex(rn, &sreg);
1962         if (!src) src = intvar[F0];
1963         if (!dest) dest = intvar[F0];
1964
1965         i = di = 0;
1966         sp = getvar_s(src);
1967         rl = strlen(repl);
1968         while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0 : REG_NOTBOL) == 0) {
1969                 so = pmatch[0].rm_so;
1970                 eo = pmatch[0].rm_eo;
1971
1972                 ds = qrealloc(ds, di + eo + rl, &dssize);
1973                 memcpy(ds + di, sp, eo);
1974                 di += eo;
1975                 if (++i >= nm) {
1976                         /* replace */
1977                         di -= (eo - so);
1978                         nbs = 0;
1979                         for (s = repl; *s; s++) {
1980                                 ds[di++] = c = *s;
1981                                 if (c == '\\') {
1982                                         nbs++;
1983                                         continue;
1984                                 }
1985                                 if (c == '&' || (ex && c >= '0' && c <= '9')) {
1986                                         di -= ((nbs + 3) >> 1);
1987                                         j = 0;
1988                                         if (c != '&') {
1989                                                 j = c - '0';
1990                                                 nbs++;
1991                                         }
1992                                         if (nbs % 2) {
1993                                                 ds[di++] = c;
1994                                         } else {
1995                                                 n = pmatch[j].rm_eo - pmatch[j].rm_so;
1996                                                 ds = qrealloc(ds, di + rl + n, &dssize);
1997                                                 memcpy(ds + di, sp + pmatch[j].rm_so, n);
1998                                                 di += n;
1999                                         }
2000                                 }
2001                                 nbs = 0;
2002                         }
2003                 }
2004
2005                 sp += eo;
2006                 if (i == nm)
2007                         break;
2008                 if (eo == so) {
2009                         ds[di] = *sp++;
2010                         if (!ds[di++])
2011                                 break;
2012                 }
2013         }
2014
2015         ds = qrealloc(ds, di + strlen(sp), &dssize);
2016         strcpy(ds + di, sp);
2017         setvar_p(dest, ds);
2018         if (re == &sreg)
2019                 regfree(re);
2020         return i;
2021 }
2022
2023 static NOINLINE int do_mktime(const char *ds)
2024 {
2025         struct tm then;
2026         int count;
2027
2028         /*memset(&then, 0, sizeof(then)); - not needed */
2029         then.tm_isdst = -1; /* default is unknown */
2030
2031         /* manpage of mktime says these fields are ints,
2032          * so we can sscanf stuff directly into them */
2033         count = sscanf(ds, "%u %u %u %u %u %u %d",
2034                 &then.tm_year, &then.tm_mon, &then.tm_mday,
2035                 &then.tm_hour, &then.tm_min, &then.tm_sec,
2036                 &then.tm_isdst);
2037
2038         if (count < 6
2039          || (unsigned)then.tm_mon < 1
2040          || (unsigned)then.tm_year < 1900
2041         ) {
2042                 return -1;
2043         }
2044
2045         then.tm_mon -= 1;
2046         then.tm_year -= 1900;
2047
2048         return mktime(&then);
2049 }
2050
2051 static NOINLINE var *exec_builtin(node *op, var *res)
2052 {
2053 #define tspl (G.exec_builtin__tspl)
2054
2055         var *tv;
2056         node *an[4];
2057         var *av[4];
2058         const char *as[4];
2059         regmatch_t pmatch[2];
2060         regex_t sreg, *re;
2061         node *spl;
2062         uint32_t isr, info;
2063         int nargs;
2064         time_t tt;
2065         char *s, *s1;
2066         int i, l, ll, n;
2067
2068         tv = nvalloc(4);
2069         isr = info = op->info;
2070         op = op->l.n;
2071
2072         av[2] = av[3] = NULL;
2073         for (i = 0; i < 4 && op; i++) {
2074                 an[i] = nextarg(&op);
2075                 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
2076                 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
2077                 isr >>= 1;
2078         }
2079
2080         nargs = i;
2081         if ((uint32_t)nargs < (info >> 30))
2082                 syntax_error(EMSG_TOO_FEW_ARGS);
2083
2084         info &= OPNMASK;
2085         switch (info) {
2086
2087         case B_a2:
2088 #if ENABLE_FEATURE_AWK_LIBM
2089                 setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1])));
2090 #else
2091                 syntax_error(EMSG_NO_MATH);
2092 #endif
2093                 break;
2094
2095         case B_sp:
2096                 if (nargs > 2) {
2097                         spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2098                                 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2099                 } else {
2100                         spl = &fsplitter.n;
2101                 }
2102
2103                 n = awk_split(as[0], spl, &s);
2104                 s1 = s;
2105                 clear_array(iamarray(av[1]));
2106                 for (i = 1; i <= n; i++)
2107                         setari_u(av[1], i, nextword(&s1));
2108                 free(s);
2109                 setvar_i(res, n);
2110                 break;
2111
2112         case B_ss:
2113                 l = strlen(as[0]);
2114                 i = getvar_i(av[1]) - 1;
2115                 if (i > l) i = l;
2116                 if (i < 0) i = 0;
2117                 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2118                 if (n < 0) n = 0;
2119                 s = xstrndup(as[0]+i, n);
2120                 setvar_p(res, s);
2121                 break;
2122
2123         /* Bitwise ops must assume that operands are unsigned. GNU Awk 3.1.5:
2124          * awk '{ print or(-1,1) }' gives "4.29497e+09", not "-2.xxxe+09" */
2125         case B_an:
2126                 setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1]));
2127                 break;
2128
2129         case B_co:
2130                 setvar_i(res, ~getvar_i_int(av[0]));
2131                 break;
2132
2133         case B_ls:
2134                 setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1]));
2135                 break;
2136
2137         case B_or:
2138                 setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1]));
2139                 break;
2140
2141         case B_rs:
2142                 setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1]));
2143                 break;
2144
2145         case B_xo:
2146                 setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1]));
2147                 break;
2148
2149         case B_lo:
2150         case B_up:
2151                 s1 = s = xstrdup(as[0]);
2152                 while (*s1) {
2153                         //*s1 = (info == B_up) ? toupper(*s1) : tolower(*s1);
2154                         if ((unsigned char)((*s1 | 0x20) - 'a') <= ('z' - 'a'))
2155                                 *s1 = (info == B_up) ? (*s1 & 0xdf) : (*s1 | 0x20);
2156                         s1++;
2157                 }
2158                 setvar_p(res, s);
2159                 break;
2160
2161         case B_ix:
2162                 n = 0;
2163                 ll = strlen(as[1]);
2164                 l = strlen(as[0]) - ll;
2165                 if (ll > 0 && l >= 0) {
2166                         if (!icase) {
2167                                 s = strstr(as[0], as[1]);
2168                                 if (s) n = (s - as[0]) + 1;
2169                         } else {
2170                                 /* this piece of code is terribly slow and
2171                                  * really should be rewritten
2172                                  */
2173                                 for (i=0; i<=l; i++) {
2174                                         if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2175                                                 n = i+1;
2176                                                 break;
2177                                         }
2178                                 }
2179                         }
2180                 }
2181                 setvar_i(res, n);
2182                 break;
2183
2184         case B_ti:
2185                 if (nargs > 1)
2186                         tt = getvar_i(av[1]);
2187                 else
2188                         time(&tt);
2189                 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2190                 i = strftime(g_buf, MAXVARFMT,
2191                         ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2192                         localtime(&tt));
2193                 g_buf[i] = '\0';
2194                 setvar_s(res, g_buf);
2195                 break;
2196
2197         case B_mt:
2198                 setvar_i(res, do_mktime(as[0]));
2199                 break;
2200
2201         case B_ma:
2202                 re = as_regex(an[1], &sreg);
2203                 n = regexec(re, as[0], 1, pmatch, 0);
2204                 if (n == 0) {
2205                         pmatch[0].rm_so++;
2206                         pmatch[0].rm_eo++;
2207                 } else {
2208                         pmatch[0].rm_so = 0;
2209                         pmatch[0].rm_eo = -1;
2210                 }
2211                 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2212                 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2213                 setvar_i(res, pmatch[0].rm_so);
2214                 if (re == &sreg) regfree(re);
2215                 break;
2216
2217         case B_ge:
2218                 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2219                 break;
2220
2221         case B_gs:
2222                 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2223                 break;
2224
2225         case B_su:
2226                 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2227                 break;
2228         }
2229
2230         nvfree(tv);
2231         return res;
2232 #undef tspl
2233 }
2234
2235 /*
2236  * Evaluate node - the heart of the program. Supplied with subtree
2237  * and place where to store result. returns ptr to result.
2238  */
2239 #define XC(n) ((n) >> 8)
2240
2241 static var *evaluate(node *op, var *res)
2242 {
2243 /* This procedure is recursive so we should count every byte */
2244 #define fnargs (G.evaluate__fnargs)
2245 /* seed is initialized to 1 */
2246 #define seed   (G.evaluate__seed)
2247 #define sreg   (G.evaluate__sreg)
2248
2249         node *op1;
2250         var *v1;
2251         union {
2252                 var *v;
2253                 const char *s;
2254                 double d;
2255                 int i;
2256         } L, R;
2257         uint32_t opinfo;
2258         int opn;
2259         union {
2260                 char *s;
2261                 rstream *rsm;
2262                 FILE *F;
2263                 var *v;
2264                 regex_t *re;
2265                 uint32_t info;
2266         } X;
2267
2268         if (!op)
2269                 return setvar_s(res, NULL);
2270
2271         v1 = nvalloc(2);
2272
2273         while (op) {
2274                 opinfo = op->info;
2275                 opn = (opinfo & OPNMASK);
2276                 g_lineno = op->lineno;
2277
2278                 /* execute inevitable things */
2279                 op1 = op->l.n;
2280                 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2281                 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2282                 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2283                 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2284                 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2285
2286                 switch (XC(opinfo & OPCLSMASK)) {
2287
2288                 /* -- iterative node type -- */
2289
2290                 /* test pattern */
2291                 case XC( OC_TEST ):
2292                         if ((op1->info & OPCLSMASK) == OC_COMMA) {
2293                                 /* it's range pattern */
2294                                 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2295                                         op->info |= OF_CHECKED;
2296                                         if (ptest(op1->r.n))
2297                                                 op->info &= ~OF_CHECKED;
2298
2299                                         op = op->a.n;
2300                                 } else {
2301                                         op = op->r.n;
2302                                 }
2303                         } else {
2304                                 op = (ptest(op1)) ? op->a.n : op->r.n;
2305                         }
2306                         break;
2307
2308                 /* just evaluate an expression, also used as unconditional jump */
2309                 case XC( OC_EXEC ):
2310                         break;
2311
2312                 /* branch, used in if-else and various loops */
2313                 case XC( OC_BR ):
2314                         op = istrue(L.v) ? op->a.n : op->r.n;
2315                         break;
2316
2317                 /* initialize for-in loop */
2318                 case XC( OC_WALKINIT ):
2319                         hashwalk_init(L.v, iamarray(R.v));
2320                         break;
2321
2322                 /* get next array item */
2323                 case XC( OC_WALKNEXT ):
2324                         op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2325                         break;
2326
2327                 case XC( OC_PRINT ):
2328                 case XC( OC_PRINTF ):
2329                         X.F = stdout;
2330                         if (op->r.n) {
2331                                 X.rsm = newfile(R.s);
2332                                 if (!X.rsm->F) {
2333                                         if (opn == '|') {
2334                                                 X.rsm->F = popen(R.s, "w");
2335                                                 if (X.rsm->F == NULL)
2336                                                         bb_perror_msg_and_die("popen");
2337                                                 X.rsm->is_pipe = 1;
2338                                         } else {
2339                                                 X.rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2340                                         }
2341                                 }
2342                                 X.F = X.rsm->F;
2343                         }
2344
2345                         if ((opinfo & OPCLSMASK) == OC_PRINT) {
2346                                 if (!op1) {
2347                                         fputs(getvar_s(intvar[F0]), X.F);
2348                                 } else {
2349                                         while (op1) {
2350                                                 L.v = evaluate(nextarg(&op1), v1);
2351                                                 if (L.v->type & VF_NUMBER) {
2352                                                         fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2353                                                                         getvar_i(L.v), TRUE);
2354                                                         fputs(g_buf, X.F);
2355                                                 } else {
2356                                                         fputs(getvar_s(L.v), X.F);
2357                                                 }
2358
2359                                                 if (op1) fputs(getvar_s(intvar[OFS]), X.F);
2360                                         }
2361                                 }
2362                                 fputs(getvar_s(intvar[ORS]), X.F);
2363
2364                         } else {        /* OC_PRINTF */
2365                                 L.s = awk_printf(op1);
2366                                 fputs(L.s, X.F);
2367                                 free((char*)L.s);
2368                         }
2369                         fflush(X.F);
2370                         break;
2371
2372                 case XC( OC_DELETE ):
2373                         X.info = op1->info & OPCLSMASK;
2374                         if (X.info == OC_VAR) {
2375                                 R.v = op1->l.v;
2376                         } else if (X.info == OC_FNARG) {
2377                                 R.v = &fnargs[op1->l.i];
2378                         } else {
2379                                 syntax_error(EMSG_NOT_ARRAY);
2380                         }
2381
2382                         if (op1->r.n) {
2383                                 clrvar(L.v);
2384                                 L.s = getvar_s(evaluate(op1->r.n, v1));
2385                                 hash_remove(iamarray(R.v), L.s);
2386                         } else {
2387                                 clear_array(iamarray(R.v));
2388                         }
2389                         break;
2390
2391                 case XC( OC_NEWSOURCE ):
2392                         g_progname = op->l.s;
2393                         break;
2394
2395                 case XC( OC_RETURN ):
2396                         copyvar(res, L.v);
2397                         break;
2398
2399                 case XC( OC_NEXTFILE ):
2400                         nextfile = TRUE;
2401                 case XC( OC_NEXT ):
2402                         nextrec = TRUE;
2403                 case XC( OC_DONE ):
2404                         clrvar(res);
2405                         break;
2406
2407                 case XC( OC_EXIT ):
2408                         awk_exit(L.d);
2409
2410                 /* -- recursive node type -- */
2411
2412                 case XC( OC_VAR ):
2413                         L.v = op->l.v;
2414                         if (L.v == intvar[NF])
2415                                 split_f0();
2416                         goto v_cont;
2417
2418                 case XC( OC_FNARG ):
2419                         L.v = &fnargs[op->l.i];
2420  v_cont:
2421                         res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2422                         break;
2423
2424                 case XC( OC_IN ):
2425                         setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2426                         break;
2427
2428                 case XC( OC_REGEXP ):
2429                         op1 = op;
2430                         L.s = getvar_s(intvar[F0]);
2431                         goto re_cont;
2432
2433                 case XC( OC_MATCH ):
2434                         op1 = op->r.n;
2435  re_cont:
2436                         X.re = as_regex(op1, &sreg);
2437                         R.i = regexec(X.re, L.s, 0, NULL, 0);
2438                         if (X.re == &sreg) regfree(X.re);
2439                         setvar_i(res, (R.i == 0) ^ (opn == '!'));
2440                         break;
2441
2442                 case XC( OC_MOVE ):
2443                         /* if source is a temporary string, jusk relink it to dest */
2444 //Disabled: if R.v is numeric but happens to have cached R.v->string,
2445 //then L.v ends up being a string, which is wrong
2446 //                      if (R.v == v1+1 && R.v->string) {
2447 //                              res = setvar_p(L.v, R.v->string);
2448 //                              R.v->string = NULL;
2449 //                      } else {
2450                                 res = copyvar(L.v, R.v);
2451 //                      }
2452                         break;
2453
2454                 case XC( OC_TERNARY ):
2455                         if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2456                                 syntax_error(EMSG_POSSIBLE_ERROR);
2457                         res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2458                         break;
2459
2460                 case XC( OC_FUNC ):
2461                         if (!op->r.f->body.first)
2462                                 syntax_error(EMSG_UNDEF_FUNC);
2463
2464                         X.v = R.v = nvalloc(op->r.f->nargs + 1);
2465                         while (op1) {
2466                                 L.v = evaluate(nextarg(&op1), v1);
2467                                 copyvar(R.v, L.v);
2468                                 R.v->type |= VF_CHILD;
2469                                 R.v->x.parent = L.v;
2470                                 if (++R.v - X.v >= op->r.f->nargs)
2471                                         break;
2472                         }
2473
2474                         R.v = fnargs;
2475                         fnargs = X.v;
2476
2477                         L.s = g_progname;
2478                         res = evaluate(op->r.f->body.first, res);
2479                         g_progname = L.s;
2480
2481                         nvfree(fnargs);
2482                         fnargs = R.v;
2483                         break;
2484
2485                 case XC( OC_GETLINE ):
2486                 case XC( OC_PGETLINE ):
2487                         if (op1) {
2488                                 X.rsm = newfile(L.s);
2489                                 if (!X.rsm->F) {
2490                                         if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2491                                                 X.rsm->F = popen(L.s, "r");
2492                                                 X.rsm->is_pipe = TRUE;
2493                                         } else {
2494                                                 X.rsm->F = fopen_for_read(L.s);         /* not xfopen! */
2495                                         }
2496                                 }
2497                         } else {
2498                                 if (!iF) iF = next_input_file();
2499                                 X.rsm = iF;
2500                         }
2501
2502                         if (!X.rsm->F) {
2503                                 setvar_i(intvar[ERRNO], errno);
2504                                 setvar_i(res, -1);
2505                                 break;
2506                         }
2507
2508                         if (!op->r.n)
2509                                 R.v = intvar[F0];
2510
2511                         L.i = awk_getline(X.rsm, R.v);
2512                         if (L.i > 0) {
2513                                 if (!op1) {
2514                                         incvar(intvar[FNR]);
2515                                         incvar(intvar[NR]);
2516                                 }
2517                         }
2518                         setvar_i(res, L.i);
2519                         break;
2520
2521                 /* simple builtins */
2522                 case XC( OC_FBLTIN ):
2523                         switch (opn) {
2524
2525                         case F_in:
2526                                 R.d = (int)L.d;
2527                                 break;
2528
2529                         case F_rn:
2530                                 R.d = (double)rand() / (double)RAND_MAX;
2531                                 break;
2532 #if ENABLE_FEATURE_AWK_LIBM
2533                         case F_co:
2534                                 R.d = cos(L.d);
2535                                 break;
2536
2537                         case F_ex:
2538                                 R.d = exp(L.d);
2539                                 break;
2540
2541                         case F_lg:
2542                                 R.d = log(L.d);
2543                                 break;
2544
2545                         case F_si:
2546                                 R.d = sin(L.d);
2547                                 break;
2548
2549                         case F_sq:
2550                                 R.d = sqrt(L.d);
2551                                 break;
2552 #else
2553                         case F_co:
2554                         case F_ex:
2555                         case F_lg:
2556                         case F_si:
2557                         case F_sq:
2558                                 syntax_error(EMSG_NO_MATH);
2559                                 break;
2560 #endif
2561                         case F_sr:
2562                                 R.d = (double)seed;
2563                                 seed = op1 ? (unsigned)L.d : (unsigned)time(NULL);
2564                                 srand(seed);
2565                                 break;
2566
2567                         case F_ti:
2568                                 R.d = time(NULL);
2569                                 break;
2570
2571                         case F_le:
2572                                 if (!op1)
2573                                         L.s = getvar_s(intvar[F0]);
2574                                 R.d = strlen(L.s);
2575                                 break;
2576
2577                         case F_sy:
2578                                 fflush_all();
2579                                 R.d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2580                                                 ? (system(L.s) >> 8) : 0;
2581                                 break;
2582
2583                         case F_ff:
2584                                 if (!op1)
2585                                         fflush(stdout);
2586                                 else {
2587                                         if (L.s && *L.s) {
2588                                                 X.rsm = newfile(L.s);
2589                                                 fflush(X.rsm->F);
2590                                         } else {
2591                                                 fflush_all();
2592                                         }
2593                                 }
2594                                 break;
2595
2596                         case F_cl:
2597                                 X.rsm = (rstream *)hash_search(fdhash, L.s);
2598                                 if (X.rsm) {
2599                                         R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
2600                                         free(X.rsm->buffer);
2601                                         hash_remove(fdhash, L.s);
2602                                 }
2603                                 if (R.i != 0)
2604                                         setvar_i(intvar[ERRNO], errno);
2605                                 R.d = (double)R.i;
2606                                 break;
2607                         }
2608                         setvar_i(res, R.d);
2609                         break;
2610
2611                 case XC( OC_BUILTIN ):
2612                         res = exec_builtin(op, res);
2613                         break;
2614
2615                 case XC( OC_SPRINTF ):
2616                         setvar_p(res, awk_printf(op1));
2617                         break;
2618
2619                 case XC( OC_UNARY ):
2620                         X.v = R.v;
2621                         L.d = R.d = getvar_i(R.v);
2622                         switch (opn) {
2623                         case 'P':
2624                                 L.d = ++R.d;
2625                                 goto r_op_change;
2626                         case 'p':
2627                                 R.d++;
2628                                 goto r_op_change;
2629                         case 'M':
2630                                 L.d = --R.d;
2631                                 goto r_op_change;
2632                         case 'm':
2633                                 R.d--;
2634                                 goto r_op_change;
2635                         case '!':
2636                                 L.d = !istrue(X.v);
2637                                 break;
2638                         case '-':
2639                                 L.d = -R.d;
2640                                 break;
2641  r_op_change:
2642                                 setvar_i(X.v, R.d);
2643                         }
2644                         setvar_i(res, L.d);
2645                         break;
2646
2647                 case XC( OC_FIELD ):
2648                         R.i = (int)getvar_i(R.v);
2649                         if (R.i == 0) {
2650                                 res = intvar[F0];
2651                         } else {
2652                                 split_f0();
2653                                 if (R.i > nfields)
2654                                         fsrealloc(R.i);
2655                                 res = &Fields[R.i - 1];
2656                         }
2657                         break;
2658
2659                 /* concatenation (" ") and index joining (",") */
2660                 case XC( OC_CONCAT ):
2661                 case XC( OC_COMMA ):
2662                         opn = strlen(L.s) + strlen(R.s) + 2;
2663                         X.s = xmalloc(opn);
2664                         strcpy(X.s, L.s);
2665                         if ((opinfo & OPCLSMASK) == OC_COMMA) {
2666                                 L.s = getvar_s(intvar[SUBSEP]);
2667                                 X.s = xrealloc(X.s, opn + strlen(L.s));
2668                                 strcat(X.s, L.s);
2669                         }
2670                         strcat(X.s, R.s);
2671                         setvar_p(res, X.s);
2672                         break;
2673
2674                 case XC( OC_LAND ):
2675                         setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2676                         break;
2677
2678                 case XC( OC_LOR ):
2679                         setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2680                         break;
2681
2682                 case XC( OC_BINARY ):
2683                 case XC( OC_REPLACE ):
2684                         R.d = getvar_i(R.v);
2685                         switch (opn) {
2686                         case '+':
2687                                 L.d += R.d;
2688                                 break;
2689                         case '-':
2690                                 L.d -= R.d;
2691                                 break;
2692                         case '*':
2693                                 L.d *= R.d;
2694                                 break;
2695                         case '/':
2696                                 if (R.d == 0)
2697                                         syntax_error(EMSG_DIV_BY_ZERO);
2698                                 L.d /= R.d;
2699                                 break;
2700                         case '&':
2701 #if ENABLE_FEATURE_AWK_LIBM
2702                                 L.d = pow(L.d, R.d);
2703 #else
2704                                 syntax_error(EMSG_NO_MATH);
2705 #endif
2706                                 break;
2707                         case '%':
2708                                 if (R.d == 0)
2709                                         syntax_error(EMSG_DIV_BY_ZERO);
2710                                 L.d -= (int)(L.d / R.d) * R.d;
2711                                 break;
2712                         }
2713                         res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2714                         break;
2715
2716                 case XC( OC_COMPARE ):
2717                         if (is_numeric(L.v) && is_numeric(R.v)) {
2718                                 L.d = getvar_i(L.v) - getvar_i(R.v);
2719                         } else {
2720                                 L.s = getvar_s(L.v);
2721                                 R.s = getvar_s(R.v);
2722                                 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2723                         }
2724                         switch (opn & 0xfe) {
2725                         case 0:
2726                                 R.i = (L.d > 0);
2727                                 break;
2728                         case 2:
2729                                 R.i = (L.d >= 0);
2730                                 break;
2731                         case 4:
2732                                 R.i = (L.d == 0);
2733                                 break;
2734                         }
2735                         setvar_i(res, (opn & 1 ? R.i : !R.i) ? 1 : 0);
2736                         break;
2737
2738                 default:
2739                         syntax_error(EMSG_POSSIBLE_ERROR);
2740                 }
2741                 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2742                         op = op->a.n;
2743                 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2744                         break;
2745                 if (nextrec)
2746                         break;
2747         }
2748         nvfree(v1);
2749         return res;
2750 #undef fnargs
2751 #undef seed
2752 #undef sreg
2753 }
2754
2755
2756 /* -------- main & co. -------- */
2757
2758 static int awk_exit(int r)
2759 {
2760         var tv;
2761         unsigned i;
2762         hash_item *hi;
2763
2764         zero_out_var(&tv);
2765
2766         if (!exiting) {
2767                 exiting = TRUE;
2768                 nextrec = FALSE;
2769                 evaluate(endseq.first, &tv);
2770         }
2771
2772         /* waiting for children */
2773         for (i = 0; i < fdhash->csize; i++) {
2774                 hi = fdhash->items[i];
2775                 while (hi) {
2776                         if (hi->data.rs.F && hi->data.rs.is_pipe)
2777                                 pclose(hi->data.rs.F);
2778                         hi = hi->next;
2779                 }
2780         }
2781
2782         exit(r);
2783 }
2784
2785 /* if expr looks like "var=value", perform assignment and return 1,
2786  * otherwise return 0 */
2787 static int is_assignment(const char *expr)
2788 {
2789         char *exprc, *s, *s0, *s1;
2790
2791         exprc = xstrdup(expr);
2792         if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2793                 free(exprc);
2794                 return FALSE;
2795         }
2796
2797         *(s++) = '\0';
2798         s0 = s1 = s;
2799         while (*s)
2800                 *(s1++) = nextchar(&s);
2801
2802         *s1 = '\0';
2803         setvar_u(newvar(exprc), s0);
2804         free(exprc);
2805         return TRUE;
2806 }
2807
2808 /* switch to next input file */
2809 static rstream *next_input_file(void)
2810 {
2811 #define rsm          (G.next_input_file__rsm)
2812 #define files_happen (G.next_input_file__files_happen)
2813
2814         FILE *F = NULL;
2815         const char *fname, *ind;
2816
2817         if (rsm.F)
2818                 fclose(rsm.F);
2819         rsm.F = NULL;
2820         rsm.pos = rsm.adv = 0;
2821
2822         do {
2823                 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
2824                         if (files_happen)
2825                                 return NULL;
2826                         fname = "-";
2827                         F = stdin;
2828                 } else {
2829                         ind = getvar_s(incvar(intvar[ARGIND]));
2830                         fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
2831                         if (fname && *fname && !is_assignment(fname))
2832                                 F = xfopen_stdin(fname);
2833                 }
2834         } while (!F);
2835
2836         files_happen = TRUE;
2837         setvar_s(intvar[FILENAME], fname);
2838         rsm.F = F;
2839         return &rsm;
2840 #undef rsm
2841 #undef files_happen
2842 }
2843
2844 int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
2845 int awk_main(int argc, char **argv)
2846 {
2847         unsigned opt;
2848         char *opt_F, *opt_W;
2849         llist_t *list_v = NULL;
2850         llist_t *list_f = NULL;
2851         int i, j;
2852         var *v;
2853         var tv;
2854         char **envp;
2855         char *vnames = (char *)vNames; /* cheat */
2856         char *vvalues = (char *)vValues;
2857
2858         INIT_G();
2859
2860         /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
2861          * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
2862         if (ENABLE_LOCALE_SUPPORT)
2863                 setlocale(LC_NUMERIC, "C");
2864
2865         zero_out_var(&tv);
2866
2867         /* allocate global buffer */
2868         g_buf = xmalloc(MAXVARFMT + 1);
2869
2870         vhash = hash_init();
2871         ahash = hash_init();
2872         fdhash = hash_init();
2873         fnhash = hash_init();
2874
2875         /* initialize variables */
2876         for (i = 0; *vnames; i++) {
2877                 intvar[i] = v = newvar(nextword(&vnames));
2878                 if (*vvalues != '\377')
2879                         setvar_s(v, nextword(&vvalues));
2880                 else
2881                         setvar_i(v, 0);
2882
2883                 if (*vnames == '*') {
2884                         v->type |= VF_SPECIAL;
2885                         vnames++;
2886                 }
2887         }
2888
2889         handle_special(intvar[FS]);
2890         handle_special(intvar[RS]);
2891
2892         newfile("/dev/stdin")->F = stdin;
2893         newfile("/dev/stdout")->F = stdout;
2894         newfile("/dev/stderr")->F = stderr;
2895
2896         /* Huh, people report that sometimes environ is NULL. Oh well. */
2897         if (environ) for (envp = environ; *envp; envp++) {
2898                 /* environ is writable, thus we don't strdup it needlessly */
2899                 char *s = *envp;
2900                 char *s1 = strchr(s, '=');
2901                 if (s1) {
2902                         *s1 = '\0';
2903                         /* Both findvar and setvar_u take const char*
2904                          * as 2nd arg -> environment is not trashed */
2905                         setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
2906                         *s1 = '=';
2907                 }
2908         }
2909         opt_complementary = "v::f::"; /* -v and -f can occur multiple times */
2910         opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, &opt_W);
2911         argv += optind;
2912         argc -= optind;
2913         if (opt & 0x1)
2914                 setvar_s(intvar[FS], opt_F); // -F
2915         while (list_v) { /* -v */
2916                 if (!is_assignment(llist_pop(&list_v)))
2917                         bb_show_usage();
2918         }
2919         if (list_f) { /* -f */
2920                 do {
2921                         char *s = NULL;
2922                         FILE *from_file;
2923
2924                         g_progname = llist_pop(&list_f);
2925                         from_file = xfopen_stdin(g_progname);
2926                         /* one byte is reserved for some trick in next_token */
2927                         for (i = j = 1; j > 0; i += j) {
2928                                 s = xrealloc(s, i + 4096);
2929                                 j = fread(s + i, 1, 4094, from_file);
2930                         }
2931                         s[i] = '\0';
2932                         fclose(from_file);
2933                         parse_program(s + 1);
2934                         free(s);
2935                 } while (list_f);
2936                 argc++;
2937         } else { // no -f: take program from 1st parameter
2938                 if (!argc)
2939                         bb_show_usage();
2940                 g_progname = "cmd. line";
2941                 parse_program(*argv++);
2942         }
2943         if (opt & 0x8) // -W
2944                 bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
2945
2946         /* fill in ARGV array */
2947         setvar_i(intvar[ARGC], argc);
2948         setari_u(intvar[ARGV], 0, "awk");
2949         i = 0;
2950         while (*argv)
2951                 setari_u(intvar[ARGV], ++i, *argv++);
2952
2953         evaluate(beginseq.first, &tv);
2954         if (!mainseq.first && !endseq.first)
2955                 awk_exit(EXIT_SUCCESS);
2956
2957         /* input file could already be opened in BEGIN block */
2958         if (!iF)
2959                 iF = next_input_file();
2960
2961         /* passing through input files */
2962         while (iF) {
2963                 nextfile = FALSE;
2964                 setvar_i(intvar[FNR], 0);
2965
2966                 while ((i = awk_getline(iF, intvar[F0])) > 0) {
2967                         nextrec = FALSE;
2968                         incvar(intvar[NR]);
2969                         incvar(intvar[FNR]);
2970                         evaluate(mainseq.first, &tv);
2971
2972                         if (nextfile)
2973                                 break;
2974                 }
2975
2976                 if (i < 0)
2977                         syntax_error(strerror(errno));
2978
2979                 iF = next_input_file();
2980         }
2981
2982         awk_exit(EXIT_SUCCESS);
2983         /*return 0;*/
2984 }