d15dbb3e5dedc8a6e659fd8373cdf82065366561
[oweals/busybox.git] / editors / awk.c
1 /* vi: set sw=4 ts=4: */
2 /*
3  * awk implementation for busybox
4  *
5  * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
6  *
7  * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
8  */
9
10 #include "libbb.h"
11 #include "xregex.h"
12 #include <math.h>
13
14 /* This is a NOEXEC applet. Be very careful! */
15
16
17 #define MAXVARFMT       240
18 #define MINNVBLOCK      64
19
20 /* variable flags */
21 #define VF_NUMBER       0x0001  /* 1 = primary type is number */
22 #define VF_ARRAY        0x0002  /* 1 = it's an array */
23
24 #define VF_CACHED       0x0100  /* 1 = num/str value has cached str/num eq */
25 #define VF_USER         0x0200  /* 1 = user input (may be numeric string) */
26 #define VF_SPECIAL      0x0400  /* 1 = requires extra handling when changed */
27 #define VF_WALK         0x0800  /* 1 = variable has alloc'd x.walker list */
28 #define VF_FSTR         0x1000  /* 1 = var::string points to fstring buffer */
29 #define VF_CHILD        0x2000  /* 1 = function arg; x.parent points to source */
30 #define VF_DIRTY        0x4000  /* 1 = variable was set explicitly */
31
32 /* these flags are static, don't change them when value is changed */
33 #define VF_DONTTOUCH    (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
34
35 /* Variable */
36 typedef struct var_s {
37         unsigned type;            /* flags */
38         double number;
39         char *string;
40         union {
41                 int aidx;               /* func arg idx (for compilation stage) */
42                 struct xhash_s *array;  /* array ptr */
43                 struct var_s *parent;   /* for func args, ptr to actual parameter */
44                 char **walker;          /* list of array elements (for..in) */
45         } x;
46 } var;
47
48 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
49 typedef struct chain_s {
50         struct node_s *first;
51         struct node_s *last;
52         const char *programname;
53 } chain;
54
55 /* Function */
56 typedef struct func_s {
57         unsigned nargs;
58         struct chain_s body;
59 } func;
60
61 /* I/O stream */
62 typedef struct rstream_s {
63         FILE *F;
64         char *buffer;
65         int adv;
66         int size;
67         int pos;
68         smallint is_pipe;
69 } rstream;
70
71 typedef struct hash_item_s {
72         union {
73                 struct var_s v;         /* variable/array hash */
74                 struct rstream_s rs;    /* redirect streams hash */
75                 struct func_s f;        /* functions hash */
76         } data;
77         struct hash_item_s *next;       /* next in chain */
78         char name[1];                   /* really it's longer */
79 } hash_item;
80
81 typedef struct xhash_s {
82         unsigned nel;           /* num of elements */
83         unsigned csize;         /* current hash size */
84         unsigned nprime;        /* next hash size in PRIMES[] */
85         unsigned glen;          /* summary length of item names */
86         struct hash_item_s **items;
87 } xhash;
88
89 /* Tree node */
90 typedef struct node_s {
91         uint32_t info;
92         unsigned lineno;
93         union {
94                 struct node_s *n;
95                 var *v;
96                 int i;
97                 char *s;
98                 regex_t *re;
99         } l;
100         union {
101                 struct node_s *n;
102                 regex_t *ire;
103                 func *f;
104                 int argno;
105         } r;
106         union {
107                 struct node_s *n;
108         } a;
109 } node;
110
111 /* Block of temporary variables */
112 typedef struct nvblock_s {
113         int size;
114         var *pos;
115         struct nvblock_s *prev;
116         struct nvblock_s *next;
117         var nv[];
118 } nvblock;
119
120 typedef struct tsplitter_s {
121         node n;
122         regex_t re[2];
123 } tsplitter;
124
125 /* simple token classes */
126 /* Order and hex values are very important!!!  See next_token() */
127 #define TC_SEQSTART      1                              /* ( */
128 #define TC_SEQTERM      (1 << 1)                /* ) */
129 #define TC_REGEXP       (1 << 2)                /* /.../ */
130 #define TC_OUTRDR       (1 << 3)                /* | > >> */
131 #define TC_UOPPOST      (1 << 4)                /* unary postfix operator */
132 #define TC_UOPPRE1      (1 << 5)                /* unary prefix operator */
133 #define TC_BINOPX       (1 << 6)                /* two-opnd operator */
134 #define TC_IN           (1 << 7)
135 #define TC_COMMA        (1 << 8)
136 #define TC_PIPE         (1 << 9)                /* input redirection pipe */
137 #define TC_UOPPRE2      (1 << 10)               /* unary prefix operator */
138 #define TC_ARRTERM      (1 << 11)               /* ] */
139 #define TC_GRPSTART     (1 << 12)               /* { */
140 #define TC_GRPTERM      (1 << 13)               /* } */
141 #define TC_SEMICOL      (1 << 14)
142 #define TC_NEWLINE      (1 << 15)
143 #define TC_STATX        (1 << 16)               /* ctl statement (for, next...) */
144 #define TC_WHILE        (1 << 17)
145 #define TC_ELSE         (1 << 18)
146 #define TC_BUILTIN      (1 << 19)
147 #define TC_GETLINE      (1 << 20)
148 #define TC_FUNCDECL     (1 << 21)               /* `function' `func' */
149 #define TC_BEGIN        (1 << 22)
150 #define TC_END          (1 << 23)
151 #define TC_EOF          (1 << 24)
152 #define TC_VARIABLE     (1 << 25)
153 #define TC_ARRAY        (1 << 26)
154 #define TC_FUNCTION     (1 << 27)
155 #define TC_STRING       (1 << 28)
156 #define TC_NUMBER       (1 << 29)
157
158 #define TC_UOPPRE  (TC_UOPPRE1 | TC_UOPPRE2)
159
160 /* combined token classes */
161 #define TC_BINOP   (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
162 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
163 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
164                    | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
165
166 #define TC_STATEMNT (TC_STATX | TC_WHILE)
167 #define TC_OPTERM  (TC_SEMICOL | TC_NEWLINE)
168
169 /* word tokens, cannot mean something else if not expected */
170 #define TC_WORD    (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
171                    | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
172
173 /* discard newlines after these */
174 #define TC_NOTERM  (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
175                    | TC_BINOP | TC_OPTERM)
176
177 /* what can expression begin with */
178 #define TC_OPSEQ   (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
179 /* what can group begin with */
180 #define TC_GRPSEQ  (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
181
182 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
183 /* operator is inserted between them */
184 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
185                    | TC_STRING | TC_NUMBER | TC_UOPPOST)
186 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
187
188 #define OF_RES1    0x010000
189 #define OF_RES2    0x020000
190 #define OF_STR1    0x040000
191 #define OF_STR2    0x080000
192 #define OF_NUM1    0x100000
193 #define OF_CHECKED 0x200000
194
195 /* combined operator flags */
196 #define xx      0
197 #define xV      OF_RES2
198 #define xS      (OF_RES2 | OF_STR2)
199 #define Vx      OF_RES1
200 #define VV      (OF_RES1 | OF_RES2)
201 #define Nx      (OF_RES1 | OF_NUM1)
202 #define NV      (OF_RES1 | OF_NUM1 | OF_RES2)
203 #define Sx      (OF_RES1 | OF_STR1)
204 #define SV      (OF_RES1 | OF_STR1 | OF_RES2)
205 #define SS      (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
206
207 #define OPCLSMASK 0xFF00
208 #define OPNMASK   0x007F
209
210 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
211  * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
212  * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
213  */
214 #define P(x)      (x << 24)
215 #define PRIMASK   0x7F000000
216 #define PRIMASK2  0x7E000000
217
218 /* Operation classes */
219
220 #define SHIFT_TIL_THIS  0x0600
221 #define RECUR_FROM_THIS 0x1000
222
223 enum {
224         OC_DELETE = 0x0100,     OC_EXEC = 0x0200,       OC_NEWSOURCE = 0x0300,
225         OC_PRINT = 0x0400,      OC_PRINTF = 0x0500,     OC_WALKINIT = 0x0600,
226
227         OC_BR = 0x0700,         OC_BREAK = 0x0800,      OC_CONTINUE = 0x0900,
228         OC_EXIT = 0x0a00,       OC_NEXT = 0x0b00,       OC_NEXTFILE = 0x0c00,
229         OC_TEST = 0x0d00,       OC_WALKNEXT = 0x0e00,
230
231         OC_BINARY = 0x1000,     OC_BUILTIN = 0x1100,    OC_COLON = 0x1200,
232         OC_COMMA = 0x1300,      OC_COMPARE = 0x1400,    OC_CONCAT = 0x1500,
233         OC_FBLTIN = 0x1600,     OC_FIELD = 0x1700,      OC_FNARG = 0x1800,
234         OC_FUNC = 0x1900,       OC_GETLINE = 0x1a00,    OC_IN = 0x1b00,
235         OC_LAND = 0x1c00,       OC_LOR = 0x1d00,        OC_MATCH = 0x1e00,
236         OC_MOVE = 0x1f00,       OC_PGETLINE = 0x2000,   OC_REGEXP = 0x2100,
237         OC_REPLACE = 0x2200,    OC_RETURN = 0x2300,     OC_SPRINTF = 0x2400,
238         OC_TERNARY = 0x2500,    OC_UNARY = 0x2600,      OC_VAR = 0x2700,
239         OC_DONE = 0x2800,
240
241         ST_IF = 0x3000,         ST_DO = 0x3100,         ST_FOR = 0x3200,
242         ST_WHILE = 0x3300
243 };
244
245 /* simple builtins */
246 enum {
247         F_in,   F_rn,   F_co,   F_ex,   F_lg,   F_si,   F_sq,   F_sr,
248         F_ti,   F_le,   F_sy,   F_ff,   F_cl
249 };
250
251 /* builtins */
252 enum {
253         B_a2,   B_ix,   B_ma,   B_sp,   B_ss,   B_ti,   B_mt,   B_lo,   B_up,
254         B_ge,   B_gs,   B_su,
255         B_an,   B_co,   B_ls,   B_or,   B_rs,   B_xo,
256 };
257
258 /* tokens and their corresponding info values */
259
260 #define NTC     "\377"  /* switch to next token class (tc<<1) */
261 #define NTCC    '\377'
262
263 #define OC_B    OC_BUILTIN
264
265 static const char tokenlist[] ALIGN1 =
266         "\1("       NTC
267         "\1)"       NTC
268         "\1/"       NTC                                 /* REGEXP */
269         "\2>>"      "\1>"       "\1|"       NTC         /* OUTRDR */
270         "\2++"      "\2--"      NTC                     /* UOPPOST */
271         "\2++"      "\2--"      "\1$"       NTC         /* UOPPRE1 */
272         "\2=="      "\1="       "\2+="      "\2-="      /* BINOPX */
273         "\2*="      "\2/="      "\2%="      "\2^="
274         "\1+"       "\1-"       "\3**="     "\2**"
275         "\1/"       "\1%"       "\1^"       "\1*"
276         "\2!="      "\2>="      "\2<="      "\1>"
277         "\1<"       "\2!~"      "\1~"       "\2&&"
278         "\2||"      "\1?"       "\1:"       NTC
279         "\2in"      NTC
280         "\1,"       NTC
281         "\1|"       NTC
282         "\1+"       "\1-"       "\1!"       NTC         /* UOPPRE2 */
283         "\1]"       NTC
284         "\1{"       NTC
285         "\1}"       NTC
286         "\1;"       NTC
287         "\1\n"      NTC
288         "\2if"      "\2do"      "\3for"     "\5break"   /* STATX */
289         "\10continue"           "\6delete"  "\5print"
290         "\6printf"  "\4next"    "\10nextfile"
291         "\6return"  "\4exit"    NTC
292         "\5while"   NTC
293         "\4else"    NTC
294
295         "\3and"     "\5compl"   "\6lshift"  "\2or"
296         "\6rshift"  "\3xor"
297         "\5close"   "\6system"  "\6fflush"  "\5atan2"   /* BUILTIN */
298         "\3cos"     "\3exp"     "\3int"     "\3log"
299         "\4rand"    "\3sin"     "\4sqrt"    "\5srand"
300         "\6gensub"  "\4gsub"    "\5index"   "\6length"
301         "\5match"   "\5split"   "\7sprintf" "\3sub"
302         "\6substr"  "\7systime" "\10strftime" "\6mktime"
303         "\7tolower" "\7toupper" NTC
304         "\7getline" NTC
305         "\4func"    "\10function"   NTC
306         "\5BEGIN"   NTC
307         "\3END"     "\0"
308         ;
309
310 static const uint32_t tokeninfo[] = {
311         0,
312         0,
313         OC_REGEXP,
314         xS|'a',     xS|'w',     xS|'|',
315         OC_UNARY|xV|P(9)|'p',       OC_UNARY|xV|P(9)|'m',
316         OC_UNARY|xV|P(9)|'P',       OC_UNARY|xV|P(9)|'M',
317             OC_FIELD|xV|P(5),
318         OC_COMPARE|VV|P(39)|5,      OC_MOVE|VV|P(74),
319             OC_REPLACE|NV|P(74)|'+',    OC_REPLACE|NV|P(74)|'-',
320         OC_REPLACE|NV|P(74)|'*',    OC_REPLACE|NV|P(74)|'/',
321             OC_REPLACE|NV|P(74)|'%',    OC_REPLACE|NV|P(74)|'&',
322         OC_BINARY|NV|P(29)|'+',     OC_BINARY|NV|P(29)|'-',
323             OC_REPLACE|NV|P(74)|'&',    OC_BINARY|NV|P(15)|'&',
324         OC_BINARY|NV|P(25)|'/',     OC_BINARY|NV|P(25)|'%',
325             OC_BINARY|NV|P(15)|'&',     OC_BINARY|NV|P(25)|'*',
326         OC_COMPARE|VV|P(39)|4,      OC_COMPARE|VV|P(39)|3,
327             OC_COMPARE|VV|P(39)|0,      OC_COMPARE|VV|P(39)|1,
328         OC_COMPARE|VV|P(39)|2,      OC_MATCH|Sx|P(45)|'!',
329             OC_MATCH|Sx|P(45)|'~',      OC_LAND|Vx|P(55),
330         OC_LOR|Vx|P(59),            OC_TERNARY|Vx|P(64)|'?',
331             OC_COLON|xx|P(67)|':',
332         OC_IN|SV|P(49),
333         OC_COMMA|SS|P(80),
334         OC_PGETLINE|SV|P(37),
335         OC_UNARY|xV|P(19)|'+',      OC_UNARY|xV|P(19)|'-',
336             OC_UNARY|xV|P(19)|'!',
337         0,
338         0,
339         0,
340         0,
341         0,
342         ST_IF,          ST_DO,          ST_FOR,         OC_BREAK,
343         OC_CONTINUE,                    OC_DELETE|Vx,   OC_PRINT,
344         OC_PRINTF,      OC_NEXT,        OC_NEXTFILE,
345         OC_RETURN|Vx,   OC_EXIT|Nx,
346         ST_WHILE,
347         0,
348
349         OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
350         OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
351         OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
352         OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
353         OC_FBLTIN|F_rn,    OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
354         OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
355         OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF,        OC_B|B_su|P(0xb6),
356         OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti,    OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b),
357         OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
358         OC_GETLINE|SV|P(0),
359         0,      0,
360         0,
361         0
362 };
363
364 /* internal variable names and their initial values       */
365 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
366 enum {
367         CONVFMT,    OFMT,       FS,         OFS,
368         ORS,        RS,         RT,         FILENAME,
369         SUBSEP,     F0,         ARGIND,     ARGC,
370         ARGV,       ERRNO,      FNR,        NR,
371         NF,         IGNORECASE, ENVIRON,    NUM_INTERNAL_VARS
372 };
373
374 static const char vNames[] ALIGN1 =
375         "CONVFMT\0" "OFMT\0"    "FS\0*"     "OFS\0"
376         "ORS\0"     "RS\0*"     "RT\0"      "FILENAME\0"
377         "SUBSEP\0"  "$\0*"      "ARGIND\0"  "ARGC\0"
378         "ARGV\0"    "ERRNO\0"   "FNR\0"     "NR\0"
379         "NF\0*"     "IGNORECASE\0*" "ENVIRON\0" "\0";
380
381 static const char vValues[] ALIGN1 =
382         "%.6g\0"    "%.6g\0"    " \0"       " \0"
383         "\n\0"      "\n\0"      "\0"        "\0"
384         "\034\0"    "\0"        "\377";
385
386 /* hash size may grow to these values */
387 #define FIRST_PRIME 61
388 static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
389
390
391 /* Globals. Split in two parts so that first one is addressed
392  * with (mostly short) negative offsets.
393  * NB: it's unsafe to put members of type "double"
394  * into globals2 (gcc may fail to align them).
395  */
396 struct globals {
397         double t_double;
398         chain beginseq, mainseq, endseq;
399         chain *seq;
400         node *break_ptr, *continue_ptr;
401         rstream *iF;
402         xhash *vhash, *ahash, *fdhash, *fnhash;
403         const char *g_progname;
404         int g_lineno;
405         int nfields;
406         int maxfields; /* used in fsrealloc() only */
407         var *Fields;
408         nvblock *g_cb;
409         char *g_pos;
410         char *g_buf;
411         smallint icase;
412         smallint exiting;
413         smallint nextrec;
414         smallint nextfile;
415         smallint is_f0_split;
416 };
417 struct globals2 {
418         uint32_t t_info; /* often used */
419         uint32_t t_tclass;
420         char *t_string;
421         int t_lineno;
422         int t_rollback;
423
424         var *intvar[NUM_INTERNAL_VARS]; /* often used */
425
426         /* former statics from various functions */
427         char *split_f0__fstrings;
428
429         uint32_t next_token__save_tclass;
430         uint32_t next_token__save_info;
431         uint32_t next_token__ltclass;
432         smallint next_token__concat_inserted;
433
434         smallint next_input_file__files_happen;
435         rstream next_input_file__rsm;
436
437         var *evaluate__fnargs;
438         unsigned evaluate__seed;
439         regex_t evaluate__sreg;
440
441         var ptest__v;
442
443         tsplitter exec_builtin__tspl;
444
445         /* biggest and least used members go last */
446         tsplitter fsplitter, rsplitter;
447 };
448 #define G1 (ptr_to_globals[-1])
449 #define G (*(struct globals2 *)ptr_to_globals)
450 /* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
451 /*char G1size[sizeof(G1)]; - 0x74 */
452 /*char Gsize[sizeof(G)]; - 0x1c4 */
453 /* Trying to keep most of members accessible with short offsets: */
454 /*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
455 #define t_double     (G1.t_double    )
456 #define beginseq     (G1.beginseq    )
457 #define mainseq      (G1.mainseq     )
458 #define endseq       (G1.endseq      )
459 #define seq          (G1.seq         )
460 #define break_ptr    (G1.break_ptr   )
461 #define continue_ptr (G1.continue_ptr)
462 #define iF           (G1.iF          )
463 #define vhash        (G1.vhash       )
464 #define ahash        (G1.ahash       )
465 #define fdhash       (G1.fdhash      )
466 #define fnhash       (G1.fnhash      )
467 #define g_progname   (G1.g_progname  )
468 #define g_lineno     (G1.g_lineno    )
469 #define nfields      (G1.nfields     )
470 #define maxfields    (G1.maxfields   )
471 #define Fields       (G1.Fields      )
472 #define g_cb         (G1.g_cb        )
473 #define g_pos        (G1.g_pos       )
474 #define g_buf        (G1.g_buf       )
475 #define icase        (G1.icase       )
476 #define exiting      (G1.exiting     )
477 #define nextrec      (G1.nextrec     )
478 #define nextfile     (G1.nextfile    )
479 #define is_f0_split  (G1.is_f0_split )
480 #define t_info       (G.t_info      )
481 #define t_tclass     (G.t_tclass    )
482 #define t_string     (G.t_string    )
483 #define t_lineno     (G.t_lineno    )
484 #define t_rollback   (G.t_rollback  )
485 #define intvar       (G.intvar      )
486 #define fsplitter    (G.fsplitter   )
487 #define rsplitter    (G.rsplitter   )
488 #define INIT_G() do { \
489         SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
490         G.next_token__ltclass = TC_OPTERM; \
491         G.evaluate__seed = 1; \
492 } while (0)
493
494
495 /* function prototypes */
496 static void handle_special(var *);
497 static node *parse_expr(uint32_t);
498 static void chain_group(void);
499 static var *evaluate(node *, var *);
500 static rstream *next_input_file(void);
501 static int fmt_num(char *, int, const char *, double, int);
502 static int awk_exit(int) NORETURN;
503
504 /* ---- error handling ---- */
505
506 static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
507 static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
508 static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
509 static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
510 static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
511 static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin";
512 static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
513 static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
514 static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
515 #if !ENABLE_FEATURE_AWK_LIBM
516 static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
517 #endif
518
519 static void zero_out_var(var * vp)
520 {
521         memset(vp, 0, sizeof(*vp));
522 }
523
524 static void syntax_error(const char *message) NORETURN;
525 static void syntax_error(const char *message)
526 {
527         bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
528 }
529
530 /* ---- hash stuff ---- */
531
532 static unsigned hashidx(const char *name)
533 {
534         unsigned idx = 0;
535
536         while (*name) idx = *name++ + (idx << 6) - idx;
537         return idx;
538 }
539
540 /* create new hash */
541 static xhash *hash_init(void)
542 {
543         xhash *newhash;
544
545         newhash = xzalloc(sizeof(xhash));
546         newhash->csize = FIRST_PRIME;
547         newhash->items = xzalloc(newhash->csize * sizeof(hash_item *));
548
549         return newhash;
550 }
551
552 /* find item in hash, return ptr to data, NULL if not found */
553 static void *hash_search(xhash *hash, const char *name)
554 {
555         hash_item *hi;
556
557         hi = hash->items [ hashidx(name) % hash->csize ];
558         while (hi) {
559                 if (strcmp(hi->name, name) == 0)
560                         return &(hi->data);
561                 hi = hi->next;
562         }
563         return NULL;
564 }
565
566 /* grow hash if it becomes too big */
567 static void hash_rebuild(xhash *hash)
568 {
569         unsigned newsize, i, idx;
570         hash_item **newitems, *hi, *thi;
571
572         if (hash->nprime == ARRAY_SIZE(PRIMES))
573                 return;
574
575         newsize = PRIMES[hash->nprime++];
576         newitems = xzalloc(newsize * sizeof(hash_item *));
577
578         for (i = 0; i < hash->csize; i++) {
579                 hi = hash->items[i];
580                 while (hi) {
581                         thi = hi;
582                         hi = thi->next;
583                         idx = hashidx(thi->name) % newsize;
584                         thi->next = newitems[idx];
585                         newitems[idx] = thi;
586                 }
587         }
588
589         free(hash->items);
590         hash->csize = newsize;
591         hash->items = newitems;
592 }
593
594 /* find item in hash, add it if necessary. Return ptr to data */
595 static void *hash_find(xhash *hash, const char *name)
596 {
597         hash_item *hi;
598         unsigned idx;
599         int l;
600
601         hi = hash_search(hash, name);
602         if (!hi) {
603                 if (++hash->nel / hash->csize > 10)
604                         hash_rebuild(hash);
605
606                 l = strlen(name) + 1;
607                 hi = xzalloc(sizeof(*hi) + l);
608                 strcpy(hi->name, name);
609
610                 idx = hashidx(name) % hash->csize;
611                 hi->next = hash->items[idx];
612                 hash->items[idx] = hi;
613                 hash->glen += l;
614         }
615         return &(hi->data);
616 }
617
618 #define findvar(hash, name) ((var*)    hash_find((hash), (name)))
619 #define newvar(name)        ((var*)    hash_find(vhash, (name)))
620 #define newfile(name)       ((rstream*)hash_find(fdhash, (name)))
621 #define newfunc(name)       ((func*)   hash_find(fnhash, (name)))
622
623 static void hash_remove(xhash *hash, const char *name)
624 {
625         hash_item *hi, **phi;
626
627         phi = &(hash->items[hashidx(name) % hash->csize]);
628         while (*phi) {
629                 hi = *phi;
630                 if (strcmp(hi->name, name) == 0) {
631                         hash->glen -= (strlen(name) + 1);
632                         hash->nel--;
633                         *phi = hi->next;
634                         free(hi);
635                         break;
636                 }
637                 phi = &(hi->next);
638         }
639 }
640
641 /* ------ some useful functions ------ */
642
643 static void skip_spaces(char **s)
644 {
645         char *p = *s;
646
647         while (1) {
648                 if (*p == '\\' && p[1] == '\n') {
649                         p++;
650                         t_lineno++;
651                 } else if (*p != ' ' && *p != '\t') {
652                         break;
653                 }
654                 p++;
655         }
656         *s = p;
657 }
658
659 static char *nextword(char **s)
660 {
661         char *p = *s;
662
663         while (*(*s)++) /* */;
664
665         return p;
666 }
667
668 static char nextchar(char **s)
669 {
670         char c, *pps;
671
672         c = *((*s)++);
673         pps = *s;
674         if (c == '\\') c = bb_process_escape_sequence((const char**)s);
675         if (c == '\\' && *s == pps) c = *((*s)++);
676         return c;
677 }
678
679 static ALWAYS_INLINE int isalnum_(int c)
680 {
681         return (isalnum(c) || c == '_');
682 }
683
684 static double my_strtod(char **pp)
685 {
686 #if ENABLE_DESKTOP
687         if ((*pp)[0] == '0'
688          && ((((*pp)[1] | 0x20) == 'x') || isdigit((*pp)[1]))
689         ) {
690                 return strtoull(*pp, pp, 0);
691         }
692 #endif
693         return strtod(*pp, pp);
694 }
695
696 /* -------- working with variables (set/get/copy/etc) -------- */
697
698 static xhash *iamarray(var *v)
699 {
700         var *a = v;
701
702         while (a->type & VF_CHILD)
703                 a = a->x.parent;
704
705         if (!(a->type & VF_ARRAY)) {
706                 a->type |= VF_ARRAY;
707                 a->x.array = hash_init();
708         }
709         return a->x.array;
710 }
711
712 static void clear_array(xhash *array)
713 {
714         unsigned i;
715         hash_item *hi, *thi;
716
717         for (i = 0; i < array->csize; i++) {
718                 hi = array->items[i];
719                 while (hi) {
720                         thi = hi;
721                         hi = hi->next;
722                         free(thi->data.v.string);
723                         free(thi);
724                 }
725                 array->items[i] = NULL;
726         }
727         array->glen = array->nel = 0;
728 }
729
730 /* clear a variable */
731 static var *clrvar(var *v)
732 {
733         if (!(v->type & VF_FSTR))
734                 free(v->string);
735
736         v->type &= VF_DONTTOUCH;
737         v->type |= VF_DIRTY;
738         v->string = NULL;
739         return v;
740 }
741
742 /* assign string value to variable */
743 static var *setvar_p(var *v, char *value)
744 {
745         clrvar(v);
746         v->string = value;
747         handle_special(v);
748         return v;
749 }
750
751 /* same as setvar_p but make a copy of string */
752 static var *setvar_s(var *v, const char *value)
753 {
754         return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
755 }
756
757 /* same as setvar_s but set USER flag */
758 static var *setvar_u(var *v, const char *value)
759 {
760         setvar_s(v, value);
761         v->type |= VF_USER;
762         return v;
763 }
764
765 /* set array element to user string */
766 static void setari_u(var *a, int idx, const char *s)
767 {
768         char sidx[sizeof(int)*3 + 1];
769         var *v;
770
771         sprintf(sidx, "%d", idx);
772         v = findvar(iamarray(a), sidx);
773         setvar_u(v, s);
774 }
775
776 /* assign numeric value to variable */
777 static var *setvar_i(var *v, double value)
778 {
779         clrvar(v);
780         v->type |= VF_NUMBER;
781         v->number = value;
782         handle_special(v);
783         return v;
784 }
785
786 static const char *getvar_s(var *v)
787 {
788         /* if v is numeric and has no cached string, convert it to string */
789         if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
790                 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
791                 v->string = xstrdup(g_buf);
792                 v->type |= VF_CACHED;
793         }
794         return (v->string == NULL) ? "" : v->string;
795 }
796
797 static double getvar_i(var *v)
798 {
799         char *s;
800
801         if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
802                 v->number = 0;
803                 s = v->string;
804                 if (s && *s) {
805                         v->number = my_strtod(&s);
806                         if (v->type & VF_USER) {
807                                 skip_spaces(&s);
808                                 if (*s != '\0')
809                                         v->type &= ~VF_USER;
810                         }
811                 } else {
812                         v->type &= ~VF_USER;
813                 }
814                 v->type |= VF_CACHED;
815         }
816         return v->number;
817 }
818
819 /* Used for operands of bitwise ops */
820 static unsigned long getvar_i_int(var *v)
821 {
822         double d = getvar_i(v);
823
824         /* Casting doubles to longs is undefined for values outside
825          * of target type range. Try to widen it as much as possible */
826         if (d >= 0)
827                 return (unsigned long)d;
828         /* Why? Think about d == -4294967295.0 (assuming 32bit longs) */
829         return - (long) (unsigned long) (-d);
830 }
831
832 static var *copyvar(var *dest, const var *src)
833 {
834         if (dest != src) {
835                 clrvar(dest);
836                 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
837                 dest->number = src->number;
838                 if (src->string)
839                         dest->string = xstrdup(src->string);
840         }
841         handle_special(dest);
842         return dest;
843 }
844
845 static var *incvar(var *v)
846 {
847         return setvar_i(v, getvar_i(v) + 1.);
848 }
849
850 /* return true if v is number or numeric string */
851 static int is_numeric(var *v)
852 {
853         getvar_i(v);
854         return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
855 }
856
857 /* return 1 when value of v corresponds to true, 0 otherwise */
858 static int istrue(var *v)
859 {
860         if (is_numeric(v))
861                 return (v->number == 0) ? 0 : 1;
862         return (v->string && *(v->string)) ? 1 : 0;
863 }
864
865 /* temporary variables allocator. Last allocated should be first freed */
866 static var *nvalloc(int n)
867 {
868         nvblock *pb = NULL;
869         var *v, *r;
870         int size;
871
872         while (g_cb) {
873                 pb = g_cb;
874                 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size) break;
875                 g_cb = g_cb->next;
876         }
877
878         if (!g_cb) {
879                 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
880                 g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
881                 g_cb->size = size;
882                 g_cb->pos = g_cb->nv;
883                 g_cb->prev = pb;
884                 /*g_cb->next = NULL; - xzalloc did it */
885                 if (pb) pb->next = g_cb;
886         }
887
888         v = r = g_cb->pos;
889         g_cb->pos += n;
890
891         while (v < g_cb->pos) {
892                 v->type = 0;
893                 v->string = NULL;
894                 v++;
895         }
896
897         return r;
898 }
899
900 static void nvfree(var *v)
901 {
902         var *p;
903
904         if (v < g_cb->nv || v >= g_cb->pos)
905                 syntax_error(EMSG_INTERNAL_ERROR);
906
907         for (p = v; p < g_cb->pos; p++) {
908                 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
909                         clear_array(iamarray(p));
910                         free(p->x.array->items);
911                         free(p->x.array);
912                 }
913                 if (p->type & VF_WALK)
914                         free(p->x.walker);
915
916                 clrvar(p);
917         }
918
919         g_cb->pos = v;
920         while (g_cb->prev && g_cb->pos == g_cb->nv) {
921                 g_cb = g_cb->prev;
922         }
923 }
924
925 /* ------- awk program text parsing ------- */
926
927 /* Parse next token pointed by global pos, place results into global ttt.
928  * If token isn't expected, give away. Return token class
929  */
930 static uint32_t next_token(uint32_t expected)
931 {
932 #define concat_inserted (G.next_token__concat_inserted)
933 #define save_tclass     (G.next_token__save_tclass)
934 #define save_info       (G.next_token__save_info)
935 /* Initialized to TC_OPTERM: */
936 #define ltclass         (G.next_token__ltclass)
937
938         char *p, *pp, *s;
939         const char *tl;
940         uint32_t tc;
941         const uint32_t *ti;
942         int l;
943
944         if (t_rollback) {
945                 t_rollback = FALSE;
946
947         } else if (concat_inserted) {
948                 concat_inserted = FALSE;
949                 t_tclass = save_tclass;
950                 t_info = save_info;
951
952         } else {
953                 p = g_pos;
954  readnext:
955                 skip_spaces(&p);
956                 g_lineno = t_lineno;
957                 if (*p == '#')
958                         while (*p != '\n' && *p != '\0')
959                                 p++;
960
961                 if (*p == '\n')
962                         t_lineno++;
963
964                 if (*p == '\0') {
965                         tc = TC_EOF;
966
967                 } else if (*p == '\"') {
968                         /* it's a string */
969                         t_string = s = ++p;
970                         while (*p != '\"') {
971                                 if (*p == '\0' || *p == '\n')
972                                         syntax_error(EMSG_UNEXP_EOS);
973                                 *(s++) = nextchar(&p);
974                         }
975                         p++;
976                         *s = '\0';
977                         tc = TC_STRING;
978
979                 } else if ((expected & TC_REGEXP) && *p == '/') {
980                         /* it's regexp */
981                         t_string = s = ++p;
982                         while (*p != '/') {
983                                 if (*p == '\0' || *p == '\n')
984                                         syntax_error(EMSG_UNEXP_EOS);
985                                 *s = *p++;
986                                 if (*s++ == '\\') {
987                                         pp = p;
988                                         *(s-1) = bb_process_escape_sequence((const char **)&p);
989                                         if (*pp == '\\')
990                                                 *s++ = '\\';
991                                         if (p == pp)
992                                                 *s++ = *p++;
993                                 }
994                         }
995                         p++;
996                         *s = '\0';
997                         tc = TC_REGEXP;
998
999                 } else if (*p == '.' || isdigit(*p)) {
1000                         /* it's a number */
1001                         t_double = my_strtod(&p);
1002                         if (*p == '.')
1003                                 syntax_error(EMSG_UNEXP_TOKEN);
1004                         tc = TC_NUMBER;
1005
1006                 } else {
1007                         /* search for something known */
1008                         tl = tokenlist;
1009                         tc = 0x00000001;
1010                         ti = tokeninfo;
1011                         while (*tl) {
1012                                 l = *(tl++);
1013                                 if (l == NTCC) {
1014                                         tc <<= 1;
1015                                         continue;
1016                                 }
1017                                 /* if token class is expected, token
1018                                  * matches and it's not a longer word,
1019                                  * then this is what we are looking for
1020                                  */
1021                                 if ((tc & (expected | TC_WORD | TC_NEWLINE))
1022                                  && *tl == *p && strncmp(p, tl, l) == 0
1023                                  && !((tc & TC_WORD) && isalnum_(p[l]))
1024                                 ) {
1025                                         t_info = *ti;
1026                                         p += l;
1027                                         break;
1028                                 }
1029                                 ti++;
1030                                 tl += l;
1031                         }
1032
1033                         if (!*tl) {
1034                                 /* it's a name (var/array/function),
1035                                  * otherwise it's something wrong
1036                                  */
1037                                 if (!isalnum_(*p))
1038                                         syntax_error(EMSG_UNEXP_TOKEN);
1039
1040                                 t_string = --p;
1041                                 while (isalnum_(*(++p))) {
1042                                         *(p-1) = *p;
1043                                 }
1044                                 *(p-1) = '\0';
1045                                 tc = TC_VARIABLE;
1046                                 /* also consume whitespace between functionname and bracket */
1047                                 if (!(expected & TC_VARIABLE))
1048                                         skip_spaces(&p);
1049                                 if (*p == '(') {
1050                                         tc = TC_FUNCTION;
1051                                 } else {
1052                                         if (*p == '[') {
1053                                                 p++;
1054                                                 tc = TC_ARRAY;
1055                                         }
1056                                 }
1057                         }
1058                 }
1059                 g_pos = p;
1060
1061                 /* skipping newlines in some cases */
1062                 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1063                         goto readnext;
1064
1065                 /* insert concatenation operator when needed */
1066                 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1067                         concat_inserted = TRUE;
1068                         save_tclass = tc;
1069                         save_info = t_info;
1070                         tc = TC_BINOP;
1071                         t_info = OC_CONCAT | SS | P(35);
1072                 }
1073
1074                 t_tclass = tc;
1075         }
1076         ltclass = t_tclass;
1077
1078         /* Are we ready for this? */
1079         if (!(ltclass & expected))
1080                 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1081                                 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1082
1083         return ltclass;
1084 #undef concat_inserted
1085 #undef save_tclass
1086 #undef save_info
1087 #undef ltclass
1088 }
1089
1090 static void rollback_token(void)
1091 {
1092         t_rollback = TRUE;
1093 }
1094
1095 static node *new_node(uint32_t info)
1096 {
1097         node *n;
1098
1099         n = xzalloc(sizeof(node));
1100         n->info = info;
1101         n->lineno = g_lineno;
1102         return n;
1103 }
1104
1105 static node *mk_re_node(const char *s, node *n, regex_t *re)
1106 {
1107         n->info = OC_REGEXP;
1108         n->l.re = re;
1109         n->r.ire = re + 1;
1110         xregcomp(re, s, REG_EXTENDED);
1111         xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1112
1113         return n;
1114 }
1115
1116 static node *condition(void)
1117 {
1118         next_token(TC_SEQSTART);
1119         return parse_expr(TC_SEQTERM);
1120 }
1121
1122 /* parse expression terminated by given argument, return ptr
1123  * to built subtree. Terminator is eaten by parse_expr */
1124 static node *parse_expr(uint32_t iexp)
1125 {
1126         node sn;
1127         node *cn = &sn;
1128         node *vn, *glptr;
1129         uint32_t tc, xtc;
1130         var *v;
1131
1132         sn.info = PRIMASK;
1133         sn.r.n = glptr = NULL;
1134         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1135
1136         while (!((tc = next_token(xtc)) & iexp)) {
1137                 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1138                         /* input redirection (<) attached to glptr node */
1139                         cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1140                         cn->a.n = glptr;
1141                         xtc = TC_OPERAND | TC_UOPPRE;
1142                         glptr = NULL;
1143
1144                 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1145                         /* for binary and postfix-unary operators, jump back over
1146                          * previous operators with higher priority */
1147                         vn = cn;
1148                         while ( ((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1149                          || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON)) )
1150                                 vn = vn->a.n;
1151                         if ((t_info & OPCLSMASK) == OC_TERNARY)
1152                                 t_info += P(6);
1153                         cn = vn->a.n->r.n = new_node(t_info);
1154                         cn->a.n = vn->a.n;
1155                         if (tc & TC_BINOP) {
1156                                 cn->l.n = vn;
1157                                 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1158                                 if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1159                                         /* it's a pipe */
1160                                         next_token(TC_GETLINE);
1161                                         /* give maximum priority to this pipe */
1162                                         cn->info &= ~PRIMASK;
1163                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1164                                 }
1165                         } else {
1166                                 cn->r.n = vn;
1167                                 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1168                         }
1169                         vn->a.n = cn;
1170
1171                 } else {
1172                         /* for operands and prefix-unary operators, attach them
1173                          * to last node */
1174                         vn = cn;
1175                         cn = vn->r.n = new_node(t_info);
1176                         cn->a.n = vn;
1177                         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1178                         if (tc & (TC_OPERAND | TC_REGEXP)) {
1179                                 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1180                                 /* one should be very careful with switch on tclass -
1181                                  * only simple tclasses should be used! */
1182                                 switch (tc) {
1183                                 case TC_VARIABLE:
1184                                 case TC_ARRAY:
1185                                         cn->info = OC_VAR;
1186                                         v = hash_search(ahash, t_string);
1187                                         if (v != NULL) {
1188                                                 cn->info = OC_FNARG;
1189                                                 cn->l.i = v->x.aidx;
1190                                         } else {
1191                                                 cn->l.v = newvar(t_string);
1192                                         }
1193                                         if (tc & TC_ARRAY) {
1194                                                 cn->info |= xS;
1195                                                 cn->r.n = parse_expr(TC_ARRTERM);
1196                                         }
1197                                         break;
1198
1199                                 case TC_NUMBER:
1200                                 case TC_STRING:
1201                                         cn->info = OC_VAR;
1202                                         v = cn->l.v = xzalloc(sizeof(var));
1203                                         if (tc & TC_NUMBER)
1204                                                 setvar_i(v, t_double);
1205                                         else
1206                                                 setvar_s(v, t_string);
1207                                         break;
1208
1209                                 case TC_REGEXP:
1210                                         mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1211                                         break;
1212
1213                                 case TC_FUNCTION:
1214                                         cn->info = OC_FUNC;
1215                                         cn->r.f = newfunc(t_string);
1216                                         cn->l.n = condition();
1217                                         break;
1218
1219                                 case TC_SEQSTART:
1220                                         cn = vn->r.n = parse_expr(TC_SEQTERM);
1221                                         cn->a.n = vn;
1222                                         break;
1223
1224                                 case TC_GETLINE:
1225                                         glptr = cn;
1226                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1227                                         break;
1228
1229                                 case TC_BUILTIN:
1230                                         cn->l.n = condition();
1231                                         break;
1232                                 }
1233                         }
1234                 }
1235         }
1236         return sn.r.n;
1237 }
1238
1239 /* add node to chain. Return ptr to alloc'd node */
1240 static node *chain_node(uint32_t info)
1241 {
1242         node *n;
1243
1244         if (!seq->first)
1245                 seq->first = seq->last = new_node(0);
1246
1247         if (seq->programname != g_progname) {
1248                 seq->programname = g_progname;
1249                 n = chain_node(OC_NEWSOURCE);
1250                 n->l.s = xstrdup(g_progname);
1251         }
1252
1253         n = seq->last;
1254         n->info = info;
1255         seq->last = n->a.n = new_node(OC_DONE);
1256
1257         return n;
1258 }
1259
1260 static void chain_expr(uint32_t info)
1261 {
1262         node *n;
1263
1264         n = chain_node(info);
1265         n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1266         if (t_tclass & TC_GRPTERM)
1267                 rollback_token();
1268 }
1269
1270 static node *chain_loop(node *nn)
1271 {
1272         node *n, *n2, *save_brk, *save_cont;
1273
1274         save_brk = break_ptr;
1275         save_cont = continue_ptr;
1276
1277         n = chain_node(OC_BR | Vx);
1278         continue_ptr = new_node(OC_EXEC);
1279         break_ptr = new_node(OC_EXEC);
1280         chain_group();
1281         n2 = chain_node(OC_EXEC | Vx);
1282         n2->l.n = nn;
1283         n2->a.n = n;
1284         continue_ptr->a.n = n2;
1285         break_ptr->a.n = n->r.n = seq->last;
1286
1287         continue_ptr = save_cont;
1288         break_ptr = save_brk;
1289
1290         return n;
1291 }
1292
1293 /* parse group and attach it to chain */
1294 static void chain_group(void)
1295 {
1296         uint32_t c;
1297         node *n, *n2, *n3;
1298
1299         do {
1300                 c = next_token(TC_GRPSEQ);
1301         } while (c & TC_NEWLINE);
1302
1303         if (c & TC_GRPSTART) {
1304                 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1305                         if (t_tclass & TC_NEWLINE) continue;
1306                         rollback_token();
1307                         chain_group();
1308                 }
1309         } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1310                 rollback_token();
1311                 chain_expr(OC_EXEC | Vx);
1312         } else {                                                /* TC_STATEMNT */
1313                 switch (t_info & OPCLSMASK) {
1314                 case ST_IF:
1315                         n = chain_node(OC_BR | Vx);
1316                         n->l.n = condition();
1317                         chain_group();
1318                         n2 = chain_node(OC_EXEC);
1319                         n->r.n = seq->last;
1320                         if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1321                                 chain_group();
1322                                 n2->a.n = seq->last;
1323                         } else {
1324                                 rollback_token();
1325                         }
1326                         break;
1327
1328                 case ST_WHILE:
1329                         n2 = condition();
1330                         n = chain_loop(NULL);
1331                         n->l.n = n2;
1332                         break;
1333
1334                 case ST_DO:
1335                         n2 = chain_node(OC_EXEC);
1336                         n = chain_loop(NULL);
1337                         n2->a.n = n->a.n;
1338                         next_token(TC_WHILE);
1339                         n->l.n = condition();
1340                         break;
1341
1342                 case ST_FOR:
1343                         next_token(TC_SEQSTART);
1344                         n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1345                         if (t_tclass & TC_SEQTERM) {    /* for-in */
1346                                 if ((n2->info & OPCLSMASK) != OC_IN)
1347                                         syntax_error(EMSG_UNEXP_TOKEN);
1348                                 n = chain_node(OC_WALKINIT | VV);
1349                                 n->l.n = n2->l.n;
1350                                 n->r.n = n2->r.n;
1351                                 n = chain_loop(NULL);
1352                                 n->info = OC_WALKNEXT | Vx;
1353                                 n->l.n = n2->l.n;
1354                         } else {                        /* for (;;) */
1355                                 n = chain_node(OC_EXEC | Vx);
1356                                 n->l.n = n2;
1357                                 n2 = parse_expr(TC_SEMICOL);
1358                                 n3 = parse_expr(TC_SEQTERM);
1359                                 n = chain_loop(n3);
1360                                 n->l.n = n2;
1361                                 if (!n2)
1362                                         n->info = OC_EXEC;
1363                         }
1364                         break;
1365
1366                 case OC_PRINT:
1367                 case OC_PRINTF:
1368                         n = chain_node(t_info);
1369                         n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1370                         if (t_tclass & TC_OUTRDR) {
1371                                 n->info |= t_info;
1372                                 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1373                         }
1374                         if (t_tclass & TC_GRPTERM)
1375                                 rollback_token();
1376                         break;
1377
1378                 case OC_BREAK:
1379                         n = chain_node(OC_EXEC);
1380                         n->a.n = break_ptr;
1381                         break;
1382
1383                 case OC_CONTINUE:
1384                         n = chain_node(OC_EXEC);
1385                         n->a.n = continue_ptr;
1386                         break;
1387
1388                 /* delete, next, nextfile, return, exit */
1389                 default:
1390                         chain_expr(t_info);
1391                 }
1392         }
1393 }
1394
1395 static void parse_program(char *p)
1396 {
1397         uint32_t tclass;
1398         node *cn;
1399         func *f;
1400         var *v;
1401
1402         g_pos = p;
1403         t_lineno = 1;
1404         while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1405                         TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1406
1407                 if (tclass & TC_OPTERM)
1408                         continue;
1409
1410                 seq = &mainseq;
1411                 if (tclass & TC_BEGIN) {
1412                         seq = &beginseq;
1413                         chain_group();
1414
1415                 } else if (tclass & TC_END) {
1416                         seq = &endseq;
1417                         chain_group();
1418
1419                 } else if (tclass & TC_FUNCDECL) {
1420                         next_token(TC_FUNCTION);
1421                         g_pos++;
1422                         f = newfunc(t_string);
1423                         f->body.first = NULL;
1424                         f->nargs = 0;
1425                         while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1426                                 v = findvar(ahash, t_string);
1427                                 v->x.aidx = (f->nargs)++;
1428
1429                                 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1430                                         break;
1431                         }
1432                         seq = &(f->body);
1433                         chain_group();
1434                         clear_array(ahash);
1435
1436                 } else if (tclass & TC_OPSEQ) {
1437                         rollback_token();
1438                         cn = chain_node(OC_TEST);
1439                         cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1440                         if (t_tclass & TC_GRPSTART) {
1441                                 rollback_token();
1442                                 chain_group();
1443                         } else {
1444                                 chain_node(OC_PRINT);
1445                         }
1446                         cn->r.n = mainseq.last;
1447
1448                 } else /* if (tclass & TC_GRPSTART) */ {
1449                         rollback_token();
1450                         chain_group();
1451                 }
1452         }
1453 }
1454
1455
1456 /* -------- program execution part -------- */
1457
1458 static node *mk_splitter(const char *s, tsplitter *spl)
1459 {
1460         regex_t *re, *ire;
1461         node *n;
1462
1463         re = &spl->re[0];
1464         ire = &spl->re[1];
1465         n = &spl->n;
1466         if ((n->info & OPCLSMASK) == OC_REGEXP) {
1467                 regfree(re);
1468                 regfree(ire); // TODO: nuke ire, use re+1?
1469         }
1470         if (strlen(s) > 1) {
1471                 mk_re_node(s, n, re);
1472         } else {
1473                 n->info = (uint32_t) *s;
1474         }
1475
1476         return n;
1477 }
1478
1479 /* use node as a regular expression. Supplied with node ptr and regex_t
1480  * storage space. Return ptr to regex (if result points to preg, it should
1481  * be later regfree'd manually
1482  */
1483 static regex_t *as_regex(node *op, regex_t *preg)
1484 {
1485         int cflags;
1486         var *v;
1487         const char *s;
1488
1489         if ((op->info & OPCLSMASK) == OC_REGEXP) {
1490                 return icase ? op->r.ire : op->l.re;
1491         }
1492         v = nvalloc(1);
1493         s = getvar_s(evaluate(op, v));
1494
1495         cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
1496         /* Testcase where REG_EXTENDED fails (unpaired '{'):
1497          * echo Hi | awk 'gsub("@(samp|code|file)\{","");'
1498          * gawk 3.1.5 eats this. We revert to ~REG_EXTENDED
1499          * (maybe gsub is not supposed to use REG_EXTENDED?).
1500          */
1501         if (regcomp(preg, s, cflags)) {
1502                 cflags &= ~REG_EXTENDED;
1503                 xregcomp(preg, s, cflags);
1504         }
1505         nvfree(v);
1506         return preg;
1507 }
1508
1509 /* gradually increasing buffer */
1510 static void qrealloc(char **b, int n, int *size)
1511 {
1512         if (!*b || n >= *size) {
1513                 *size = n + (n>>1) + 80;
1514                 *b = xrealloc(*b, *size);
1515         }
1516 }
1517
1518 /* resize field storage space */
1519 static void fsrealloc(int size)
1520 {
1521         int i;
1522
1523         if (size >= maxfields) {
1524                 i = maxfields;
1525                 maxfields = size + 16;
1526                 Fields = xrealloc(Fields, maxfields * sizeof(var));
1527                 for (; i < maxfields; i++) {
1528                         Fields[i].type = VF_SPECIAL;
1529                         Fields[i].string = NULL;
1530                 }
1531         }
1532
1533         if (size < nfields) {
1534                 for (i = size; i < nfields; i++) {
1535                         clrvar(Fields + i);
1536                 }
1537         }
1538         nfields = size;
1539 }
1540
1541 static int awk_split(const char *s, node *spl, char **slist)
1542 {
1543         int l, n = 0;
1544         char c[4];
1545         char *s1;
1546         regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1547
1548         /* in worst case, each char would be a separate field */
1549         *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1550         strcpy(s1, s);
1551
1552         c[0] = c[1] = (char)spl->info;
1553         c[2] = c[3] = '\0';
1554         if (*getvar_s(intvar[RS]) == '\0')
1555                 c[2] = '\n';
1556
1557         if ((spl->info & OPCLSMASK) == OC_REGEXP) {  /* regex split */
1558                 if (!*s)
1559                         return n; /* "": zero fields */
1560                 n++; /* at least one field will be there */
1561                 do {
1562                         l = strcspn(s, c+2); /* len till next NUL or \n */
1563                         if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1564                          && pmatch[0].rm_so <= l
1565                         ) {
1566                                 l = pmatch[0].rm_so;
1567                                 if (pmatch[0].rm_eo == 0) {
1568                                         l++;
1569                                         pmatch[0].rm_eo++;
1570                                 }
1571                                 n++; /* we saw yet another delimiter */
1572                         } else {
1573                                 pmatch[0].rm_eo = l;
1574                                 if (s[l])
1575                                         pmatch[0].rm_eo++;
1576                         }
1577                         memcpy(s1, s, l);
1578                         /* make sure we remove *all* of the separator chars */
1579                         do {
1580                                 s1[l] = '\0';
1581                         } while (++l < pmatch[0].rm_eo);
1582                         nextword(&s1);
1583                         s += pmatch[0].rm_eo;
1584                 } while (*s);
1585                 return n;
1586         }
1587         if (c[0] == '\0') {  /* null split */
1588                 while (*s) {
1589                         *s1++ = *s++;
1590                         *s1++ = '\0';
1591                         n++;
1592                 }
1593                 return n;
1594         }
1595         if (c[0] != ' ') {  /* single-character split */
1596                 if (icase) {
1597                         c[0] = toupper(c[0]);
1598                         c[1] = tolower(c[1]);
1599                 }
1600                 if (*s1) n++;
1601                 while ((s1 = strpbrk(s1, c))) {
1602                         *s1++ = '\0';
1603                         n++;
1604                 }
1605                 return n;
1606         }
1607         /* space split */
1608         while (*s) {
1609                 s = skip_whitespace(s);
1610                 if (!*s) break;
1611                 n++;
1612                 while (*s && !isspace(*s))
1613                         *s1++ = *s++;
1614                 *s1++ = '\0';
1615         }
1616         return n;
1617 }
1618
1619 static void split_f0(void)
1620 {
1621 /* static char *fstrings; */
1622 #define fstrings (G.split_f0__fstrings)
1623
1624         int i, n;
1625         char *s;
1626
1627         if (is_f0_split)
1628                 return;
1629
1630         is_f0_split = TRUE;
1631         free(fstrings);
1632         fsrealloc(0);
1633         n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1634         fsrealloc(n);
1635         s = fstrings;
1636         for (i = 0; i < n; i++) {
1637                 Fields[i].string = nextword(&s);
1638                 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1639         }
1640
1641         /* set NF manually to avoid side effects */
1642         clrvar(intvar[NF]);
1643         intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1644         intvar[NF]->number = nfields;
1645 #undef fstrings
1646 }
1647
1648 /* perform additional actions when some internal variables changed */
1649 static void handle_special(var *v)
1650 {
1651         int n;
1652         char *b;
1653         const char *sep, *s;
1654         int sl, l, len, i, bsize;
1655
1656         if (!(v->type & VF_SPECIAL))
1657                 return;
1658
1659         if (v == intvar[NF]) {
1660                 n = (int)getvar_i(v);
1661                 fsrealloc(n);
1662
1663                 /* recalculate $0 */
1664                 sep = getvar_s(intvar[OFS]);
1665                 sl = strlen(sep);
1666                 b = NULL;
1667                 len = 0;
1668                 for (i = 0; i < n; i++) {
1669                         s = getvar_s(&Fields[i]);
1670                         l = strlen(s);
1671                         if (b) {
1672                                 memcpy(b+len, sep, sl);
1673                                 len += sl;
1674                         }
1675                         qrealloc(&b, len+l+sl, &bsize);
1676                         memcpy(b+len, s, l);
1677                         len += l;
1678                 }
1679                 if (b)
1680                         b[len] = '\0';
1681                 setvar_p(intvar[F0], b);
1682                 is_f0_split = TRUE;
1683
1684         } else if (v == intvar[F0]) {
1685                 is_f0_split = FALSE;
1686
1687         } else if (v == intvar[FS]) {
1688                 mk_splitter(getvar_s(v), &fsplitter);
1689
1690         } else if (v == intvar[RS]) {
1691                 mk_splitter(getvar_s(v), &rsplitter);
1692
1693         } else if (v == intvar[IGNORECASE]) {
1694                 icase = istrue(v);
1695
1696         } else {                                /* $n */
1697                 n = getvar_i(intvar[NF]);
1698                 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1699                 /* right here v is invalid. Just to note... */
1700         }
1701 }
1702
1703 /* step through func/builtin/etc arguments */
1704 static node *nextarg(node **pn)
1705 {
1706         node *n;
1707
1708         n = *pn;
1709         if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1710                 *pn = n->r.n;
1711                 n = n->l.n;
1712         } else {
1713                 *pn = NULL;
1714         }
1715         return n;
1716 }
1717
1718 static void hashwalk_init(var *v, xhash *array)
1719 {
1720         char **w;
1721         hash_item *hi;
1722         unsigned i;
1723
1724         if (v->type & VF_WALK)
1725                 free(v->x.walker);
1726
1727         v->type |= VF_WALK;
1728         w = v->x.walker = xzalloc(2 + 2*sizeof(char *) + array->glen);
1729         w[0] = w[1] = (char *)(w + 2);
1730         for (i = 0; i < array->csize; i++) {
1731                 hi = array->items[i];
1732                 while (hi) {
1733                         strcpy(*w, hi->name);
1734                         nextword(w);
1735                         hi = hi->next;
1736                 }
1737         }
1738 }
1739
1740 static int hashwalk_next(var *v)
1741 {
1742         char **w;
1743
1744         w = v->x.walker;
1745         if (w[1] == w[0])
1746                 return FALSE;
1747
1748         setvar_s(v, nextword(w+1));
1749         return TRUE;
1750 }
1751
1752 /* evaluate node, return 1 when result is true, 0 otherwise */
1753 static int ptest(node *pattern)
1754 {
1755         /* ptest__v is "static": to save stack space? */
1756         return istrue(evaluate(pattern, &G.ptest__v));
1757 }
1758
1759 /* read next record from stream rsm into a variable v */
1760 static int awk_getline(rstream *rsm, var *v)
1761 {
1762         char *b;
1763         regmatch_t pmatch[2];
1764         int a, p, pp=0, size;
1765         int fd, so, eo, r, rp;
1766         char c, *m, *s;
1767
1768         /* we're using our own buffer since we need access to accumulating
1769          * characters
1770          */
1771         fd = fileno(rsm->F);
1772         m = rsm->buffer;
1773         a = rsm->adv;
1774         p = rsm->pos;
1775         size = rsm->size;
1776         c = (char) rsplitter.n.info;
1777         rp = 0;
1778
1779         if (!m) qrealloc(&m, 256, &size);
1780         do {
1781                 b = m + a;
1782                 so = eo = p;
1783                 r = 1;
1784                 if (p > 0) {
1785                         if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1786                                 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1787                                                         b, 1, pmatch, 0) == 0) {
1788                                         so = pmatch[0].rm_so;
1789                                         eo = pmatch[0].rm_eo;
1790                                         if (b[eo] != '\0')
1791                                                 break;
1792                                 }
1793                         } else if (c != '\0') {
1794                                 s = strchr(b+pp, c);
1795                                 if (!s) s = memchr(b+pp, '\0', p - pp);
1796                                 if (s) {
1797                                         so = eo = s-b;
1798                                         eo++;
1799                                         break;
1800                                 }
1801                         } else {
1802                                 while (b[rp] == '\n')
1803                                         rp++;
1804                                 s = strstr(b+rp, "\n\n");
1805                                 if (s) {
1806                                         so = eo = s-b;
1807                                         while (b[eo] == '\n') eo++;
1808                                         if (b[eo] != '\0')
1809                                                 break;
1810                                 }
1811                         }
1812                 }
1813
1814                 if (a > 0) {
1815                         memmove(m, (const void *)(m+a), p+1);
1816                         b = m;
1817                         a = 0;
1818                 }
1819
1820                 qrealloc(&m, a+p+128, &size);
1821                 b = m + a;
1822                 pp = p;
1823                 p += safe_read(fd, b+p, size-p-1);
1824                 if (p < pp) {
1825                         p = 0;
1826                         r = 0;
1827                         setvar_i(intvar[ERRNO], errno);
1828                 }
1829                 b[p] = '\0';
1830
1831         } while (p > pp);
1832
1833         if (p == 0) {
1834                 r--;
1835         } else {
1836                 c = b[so]; b[so] = '\0';
1837                 setvar_s(v, b+rp);
1838                 v->type |= VF_USER;
1839                 b[so] = c;
1840                 c = b[eo]; b[eo] = '\0';
1841                 setvar_s(intvar[RT], b+so);
1842                 b[eo] = c;
1843         }
1844
1845         rsm->buffer = m;
1846         rsm->adv = a + eo;
1847         rsm->pos = p - eo;
1848         rsm->size = size;
1849
1850         return r;
1851 }
1852
1853 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1854 {
1855         int r = 0;
1856         char c;
1857         const char *s = format;
1858
1859         if (int_as_int && n == (int)n) {
1860                 r = snprintf(b, size, "%d", (int)n);
1861         } else {
1862                 do { c = *s; } while (c && *++s);
1863                 if (strchr("diouxX", c)) {
1864                         r = snprintf(b, size, format, (int)n);
1865                 } else if (strchr("eEfgG", c)) {
1866                         r = snprintf(b, size, format, n);
1867                 } else {
1868                         syntax_error(EMSG_INV_FMT);
1869                 }
1870         }
1871         return r;
1872 }
1873
1874 /* formatted output into an allocated buffer, return ptr to buffer */
1875 static char *awk_printf(node *n)
1876 {
1877         char *b = NULL;
1878         char *fmt, *s, *f;
1879         const char *s1;
1880         int i, j, incr, bsize;
1881         char c, c1;
1882         var *v, *arg;
1883
1884         v = nvalloc(1);
1885         fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1886
1887         i = 0;
1888         while (*f) {
1889                 s = f;
1890                 while (*f && (*f != '%' || *(++f) == '%'))
1891                         f++;
1892                 while (*f && !isalpha(*f)) {
1893                         if (*f == '*')
1894                                 syntax_error("%*x formats are not supported");
1895                         f++;
1896                 }
1897
1898                 incr = (f - s) + MAXVARFMT;
1899                 qrealloc(&b, incr + i, &bsize);
1900                 c = *f;
1901                 if (c != '\0') f++;
1902                 c1 = *f;
1903                 *f = '\0';
1904                 arg = evaluate(nextarg(&n), v);
1905
1906                 j = i;
1907                 if (c == 'c' || !c) {
1908                         i += sprintf(b+i, s, is_numeric(arg) ?
1909                                         (char)getvar_i(arg) : *getvar_s(arg));
1910                 } else if (c == 's') {
1911                         s1 = getvar_s(arg);
1912                         qrealloc(&b, incr+i+strlen(s1), &bsize);
1913                         i += sprintf(b+i, s, s1);
1914                 } else {
1915                         i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1916                 }
1917                 *f = c1;
1918
1919                 /* if there was an error while sprintf, return value is negative */
1920                 if (i < j) i = j;
1921         }
1922
1923         b = xrealloc(b, i + 1);
1924         free(fmt);
1925         nvfree(v);
1926         b[i] = '\0';
1927         return b;
1928 }
1929
1930 /* common substitution routine
1931  * replace (nm) substring of (src) that match (n) with (repl), store
1932  * result into (dest), return number of substitutions. If nm=0, replace
1933  * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1934  * subexpression matching (\1-\9)
1935  */
1936 static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int ex)
1937 {
1938         char *ds = NULL;
1939         const char *s;
1940         const char *sp;
1941         int c, i, j, di, rl, so, eo, nbs, n, dssize;
1942         regmatch_t pmatch[10];
1943         regex_t sreg, *re;
1944
1945         re = as_regex(rn, &sreg);
1946         if (!src) src = intvar[F0];
1947         if (!dest) dest = intvar[F0];
1948
1949         i = di = 0;
1950         sp = getvar_s(src);
1951         rl = strlen(repl);
1952         while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0 : REG_NOTBOL) == 0) {
1953                 so = pmatch[0].rm_so;
1954                 eo = pmatch[0].rm_eo;
1955
1956                 qrealloc(&ds, di + eo + rl, &dssize);
1957                 memcpy(ds + di, sp, eo);
1958                 di += eo;
1959                 if (++i >= nm) {
1960                         /* replace */
1961                         di -= (eo - so);
1962                         nbs = 0;
1963                         for (s = repl; *s; s++) {
1964                                 ds[di++] = c = *s;
1965                                 if (c == '\\') {
1966                                         nbs++;
1967                                         continue;
1968                                 }
1969                                 if (c == '&' || (ex && c >= '0' && c <= '9')) {
1970                                         di -= ((nbs + 3) >> 1);
1971                                         j = 0;
1972                                         if (c != '&') {
1973                                                 j = c - '0';
1974                                                 nbs++;
1975                                         }
1976                                         if (nbs % 2) {
1977                                                 ds[di++] = c;
1978                                         } else {
1979                                                 n = pmatch[j].rm_eo - pmatch[j].rm_so;
1980                                                 qrealloc(&ds, di + rl + n, &dssize);
1981                                                 memcpy(ds + di, sp + pmatch[j].rm_so, n);
1982                                                 di += n;
1983                                         }
1984                                 }
1985                                 nbs = 0;
1986                         }
1987                 }
1988
1989                 sp += eo;
1990                 if (i == nm) break;
1991                 if (eo == so) {
1992                         ds[di] = *sp++;
1993                         if (!ds[di++]) break;
1994                 }
1995         }
1996
1997         qrealloc(&ds, di + strlen(sp), &dssize);
1998         strcpy(ds + di, sp);
1999         setvar_p(dest, ds);
2000         if (re == &sreg) regfree(re);
2001         return i;
2002 }
2003
2004 static NOINLINE int do_mktime(const char *ds)
2005 {
2006         struct tm then;
2007         int count;
2008
2009         /*memset(&then, 0, sizeof(then)); - not needed */
2010         then.tm_isdst = -1; /* default is unknown */
2011
2012         /* manpage of mktime says these fields are ints,
2013          * so we can sscanf stuff directly into them */
2014         count = sscanf(ds, "%u %u %u %u %u %u %d",
2015                 &then.tm_year, &then.tm_mon, &then.tm_mday,
2016                 &then.tm_hour, &then.tm_min, &then.tm_sec,
2017                 &then.tm_isdst);
2018
2019         if (count < 6
2020          || (unsigned)then.tm_mon < 1
2021          || (unsigned)then.tm_year < 1900
2022         ) {
2023                 return -1;
2024         }
2025
2026         then.tm_mon -= 1;
2027         then.tm_year -= 1900;
2028
2029         return mktime(&then);
2030 }
2031
2032 static NOINLINE var *exec_builtin(node *op, var *res)
2033 {
2034 #define tspl (G.exec_builtin__tspl)
2035
2036         int (*to_xxx)(int);
2037         var *tv;
2038         node *an[4];
2039         var *av[4];
2040         const char *as[4];
2041         regmatch_t pmatch[2];
2042         regex_t sreg, *re;
2043         node *spl;
2044         uint32_t isr, info;
2045         int nargs;
2046         time_t tt;
2047         char *s, *s1;
2048         int i, l, ll, n;
2049
2050         tv = nvalloc(4);
2051         isr = info = op->info;
2052         op = op->l.n;
2053
2054         av[2] = av[3] = NULL;
2055         for (i = 0; i < 4 && op; i++) {
2056                 an[i] = nextarg(&op);
2057                 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
2058                 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
2059                 isr >>= 1;
2060         }
2061
2062         nargs = i;
2063         if ((uint32_t)nargs < (info >> 30))
2064                 syntax_error(EMSG_TOO_FEW_ARGS);
2065
2066         switch (info & OPNMASK) {
2067
2068         case B_a2:
2069 #if ENABLE_FEATURE_AWK_LIBM
2070                 setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1])));
2071 #else
2072                 syntax_error(EMSG_NO_MATH);
2073 #endif
2074                 break;
2075
2076         case B_sp:
2077                 if (nargs > 2) {
2078                         spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2079                                 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2080                 } else {
2081                         spl = &fsplitter.n;
2082                 }
2083
2084                 n = awk_split(as[0], spl, &s);
2085                 s1 = s;
2086                 clear_array(iamarray(av[1]));
2087                 for (i = 1; i <= n; i++)
2088                         setari_u(av[1], i, nextword(&s1));
2089                 free(s);
2090                 setvar_i(res, n);
2091                 break;
2092
2093         case B_ss:
2094                 l = strlen(as[0]);
2095                 i = getvar_i(av[1]) - 1;
2096                 if (i > l) i = l;
2097                 if (i < 0) i = 0;
2098                 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2099                 if (n < 0) n = 0;
2100                 s = xstrndup(as[0]+i, n);
2101                 setvar_p(res, s);
2102                 break;
2103
2104         /* Bitwise ops must assume that operands are unsigned. GNU Awk 3.1.5:
2105          * awk '{ print or(-1,1) }' gives "4.29497e+09", not "-2.xxxe+09" */
2106         case B_an:
2107                 setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1]));
2108                 break;
2109
2110         case B_co:
2111                 setvar_i(res, ~getvar_i_int(av[0]));
2112                 break;
2113
2114         case B_ls:
2115                 setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1]));
2116                 break;
2117
2118         case B_or:
2119                 setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1]));
2120                 break;
2121
2122         case B_rs:
2123                 setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1]));
2124                 break;
2125
2126         case B_xo:
2127                 setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1]));
2128                 break;
2129
2130         case B_lo:
2131                 to_xxx = tolower;
2132                 goto lo_cont;
2133
2134         case B_up:
2135                 to_xxx = toupper;
2136  lo_cont:
2137                 s1 = s = xstrdup(as[0]);
2138                 while (*s1) {
2139                         *s1 = (*to_xxx)(*s1);
2140                         s1++;
2141                 }
2142                 setvar_p(res, s);
2143                 break;
2144
2145         case B_ix:
2146                 n = 0;
2147                 ll = strlen(as[1]);
2148                 l = strlen(as[0]) - ll;
2149                 if (ll > 0 && l >= 0) {
2150                         if (!icase) {
2151                                 s = strstr(as[0], as[1]);
2152                                 if (s) n = (s - as[0]) + 1;
2153                         } else {
2154                                 /* this piece of code is terribly slow and
2155                                  * really should be rewritten
2156                                  */
2157                                 for (i=0; i<=l; i++) {
2158                                         if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2159                                                 n = i+1;
2160                                                 break;
2161                                         }
2162                                 }
2163                         }
2164                 }
2165                 setvar_i(res, n);
2166                 break;
2167
2168         case B_ti:
2169                 if (nargs > 1)
2170                         tt = getvar_i(av[1]);
2171                 else
2172                         time(&tt);
2173                 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2174                 i = strftime(g_buf, MAXVARFMT,
2175                         ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2176                         localtime(&tt));
2177                 g_buf[i] = '\0';
2178                 setvar_s(res, g_buf);
2179                 break;
2180
2181         case B_mt:
2182                 setvar_i(res, do_mktime(as[0]));
2183                 break;
2184
2185         case B_ma:
2186                 re = as_regex(an[1], &sreg);
2187                 n = regexec(re, as[0], 1, pmatch, 0);
2188                 if (n == 0) {
2189                         pmatch[0].rm_so++;
2190                         pmatch[0].rm_eo++;
2191                 } else {
2192                         pmatch[0].rm_so = 0;
2193                         pmatch[0].rm_eo = -1;
2194                 }
2195                 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2196                 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2197                 setvar_i(res, pmatch[0].rm_so);
2198                 if (re == &sreg) regfree(re);
2199                 break;
2200
2201         case B_ge:
2202                 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2203                 break;
2204
2205         case B_gs:
2206                 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2207                 break;
2208
2209         case B_su:
2210                 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2211                 break;
2212         }
2213
2214         nvfree(tv);
2215         return res;
2216 #undef tspl
2217 }
2218
2219 /*
2220  * Evaluate node - the heart of the program. Supplied with subtree
2221  * and place where to store result. returns ptr to result.
2222  */
2223 #define XC(n) ((n) >> 8)
2224
2225 static var *evaluate(node *op, var *res)
2226 {
2227 /* This procedure is recursive so we should count every byte */
2228 #define fnargs (G.evaluate__fnargs)
2229 /* seed is initialized to 1 */
2230 #define seed   (G.evaluate__seed)
2231 #define sreg   (G.evaluate__sreg)
2232
2233         node *op1;
2234         var *v1;
2235         union {
2236                 var *v;
2237                 const char *s;
2238                 double d;
2239                 int i;
2240         } L, R;
2241         uint32_t opinfo;
2242         int opn;
2243         union {
2244                 char *s;
2245                 rstream *rsm;
2246                 FILE *F;
2247                 var *v;
2248                 regex_t *re;
2249                 uint32_t info;
2250         } X;
2251
2252         if (!op)
2253                 return setvar_s(res, NULL);
2254
2255         v1 = nvalloc(2);
2256
2257         while (op) {
2258                 opinfo = op->info;
2259                 opn = (opinfo & OPNMASK);
2260                 g_lineno = op->lineno;
2261
2262                 /* execute inevitable things */
2263                 op1 = op->l.n;
2264                 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2265                 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2266                 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2267                 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2268                 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2269
2270                 switch (XC(opinfo & OPCLSMASK)) {
2271
2272                 /* -- iterative node type -- */
2273
2274                 /* test pattern */
2275                 case XC( OC_TEST ):
2276                         if ((op1->info & OPCLSMASK) == OC_COMMA) {
2277                                 /* it's range pattern */
2278                                 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2279                                         op->info |= OF_CHECKED;
2280                                         if (ptest(op1->r.n))
2281                                                 op->info &= ~OF_CHECKED;
2282
2283                                         op = op->a.n;
2284                                 } else {
2285                                         op = op->r.n;
2286                                 }
2287                         } else {
2288                                 op = (ptest(op1)) ? op->a.n : op->r.n;
2289                         }
2290                         break;
2291
2292                 /* just evaluate an expression, also used as unconditional jump */
2293                 case XC( OC_EXEC ):
2294                         break;
2295
2296                 /* branch, used in if-else and various loops */
2297                 case XC( OC_BR ):
2298                         op = istrue(L.v) ? op->a.n : op->r.n;
2299                         break;
2300
2301                 /* initialize for-in loop */
2302                 case XC( OC_WALKINIT ):
2303                         hashwalk_init(L.v, iamarray(R.v));
2304                         break;
2305
2306                 /* get next array item */
2307                 case XC( OC_WALKNEXT ):
2308                         op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2309                         break;
2310
2311                 case XC( OC_PRINT ):
2312                 case XC( OC_PRINTF ):
2313                         X.F = stdout;
2314                         if (op->r.n) {
2315                                 X.rsm = newfile(R.s);
2316                                 if (!X.rsm->F) {
2317                                         if (opn == '|') {
2318                                                 X.rsm->F = popen(R.s, "w");
2319                                                 if (X.rsm->F == NULL)
2320                                                         bb_perror_msg_and_die("popen");
2321                                                 X.rsm->is_pipe = 1;
2322                                         } else {
2323                                                 X.rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2324                                         }
2325                                 }
2326                                 X.F = X.rsm->F;
2327                         }
2328
2329                         if ((opinfo & OPCLSMASK) == OC_PRINT) {
2330                                 if (!op1) {
2331                                         fputs(getvar_s(intvar[F0]), X.F);
2332                                 } else {
2333                                         while (op1) {
2334                                                 L.v = evaluate(nextarg(&op1), v1);
2335                                                 if (L.v->type & VF_NUMBER) {
2336                                                         fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2337                                                                         getvar_i(L.v), TRUE);
2338                                                         fputs(g_buf, X.F);
2339                                                 } else {
2340                                                         fputs(getvar_s(L.v), X.F);
2341                                                 }
2342
2343                                                 if (op1) fputs(getvar_s(intvar[OFS]), X.F);
2344                                         }
2345                                 }
2346                                 fputs(getvar_s(intvar[ORS]), X.F);
2347
2348                         } else {        /* OC_PRINTF */
2349                                 L.s = awk_printf(op1);
2350                                 fputs(L.s, X.F);
2351                                 free((char*)L.s);
2352                         }
2353                         fflush(X.F);
2354                         break;
2355
2356                 case XC( OC_DELETE ):
2357                         X.info = op1->info & OPCLSMASK;
2358                         if (X.info == OC_VAR) {
2359                                 R.v = op1->l.v;
2360                         } else if (X.info == OC_FNARG) {
2361                                 R.v = &fnargs[op1->l.i];
2362                         } else {
2363                                 syntax_error(EMSG_NOT_ARRAY);
2364                         }
2365
2366                         if (op1->r.n) {
2367                                 clrvar(L.v);
2368                                 L.s = getvar_s(evaluate(op1->r.n, v1));
2369                                 hash_remove(iamarray(R.v), L.s);
2370                         } else {
2371                                 clear_array(iamarray(R.v));
2372                         }
2373                         break;
2374
2375                 case XC( OC_NEWSOURCE ):
2376                         g_progname = op->l.s;
2377                         break;
2378
2379                 case XC( OC_RETURN ):
2380                         copyvar(res, L.v);
2381                         break;
2382
2383                 case XC( OC_NEXTFILE ):
2384                         nextfile = TRUE;
2385                 case XC( OC_NEXT ):
2386                         nextrec = TRUE;
2387                 case XC( OC_DONE ):
2388                         clrvar(res);
2389                         break;
2390
2391                 case XC( OC_EXIT ):
2392                         awk_exit(L.d);
2393
2394                 /* -- recursive node type -- */
2395
2396                 case XC( OC_VAR ):
2397                         L.v = op->l.v;
2398                         if (L.v == intvar[NF])
2399                                 split_f0();
2400                         goto v_cont;
2401
2402                 case XC( OC_FNARG ):
2403                         L.v = &fnargs[op->l.i];
2404  v_cont:
2405                         res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2406                         break;
2407
2408                 case XC( OC_IN ):
2409                         setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2410                         break;
2411
2412                 case XC( OC_REGEXP ):
2413                         op1 = op;
2414                         L.s = getvar_s(intvar[F0]);
2415                         goto re_cont;
2416
2417                 case XC( OC_MATCH ):
2418                         op1 = op->r.n;
2419  re_cont:
2420                         X.re = as_regex(op1, &sreg);
2421                         R.i = regexec(X.re, L.s, 0, NULL, 0);
2422                         if (X.re == &sreg) regfree(X.re);
2423                         setvar_i(res, (R.i == 0 ? 1 : 0) ^ (opn == '!' ? 1 : 0));
2424                         break;
2425
2426                 case XC( OC_MOVE ):
2427                         /* if source is a temporary string, jusk relink it to dest */
2428                         if (R.v == v1+1 && R.v->string) {
2429                                 res = setvar_p(L.v, R.v->string);
2430                                 R.v->string = NULL;
2431                         } else {
2432                                 res = copyvar(L.v, R.v);
2433                         }
2434                         break;
2435
2436                 case XC( OC_TERNARY ):
2437                         if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2438                                 syntax_error(EMSG_POSSIBLE_ERROR);
2439                         res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2440                         break;
2441
2442                 case XC( OC_FUNC ):
2443                         if (!op->r.f->body.first)
2444                                 syntax_error(EMSG_UNDEF_FUNC);
2445
2446                         X.v = R.v = nvalloc(op->r.f->nargs+1);
2447                         while (op1) {
2448                                 L.v = evaluate(nextarg(&op1), v1);
2449                                 copyvar(R.v, L.v);
2450                                 R.v->type |= VF_CHILD;
2451                                 R.v->x.parent = L.v;
2452                                 if (++R.v - X.v >= op->r.f->nargs)
2453                                         break;
2454                         }
2455
2456                         R.v = fnargs;
2457                         fnargs = X.v;
2458
2459                         L.s = g_progname;
2460                         res = evaluate(op->r.f->body.first, res);
2461                         g_progname = L.s;
2462
2463                         nvfree(fnargs);
2464                         fnargs = R.v;
2465                         break;
2466
2467                 case XC( OC_GETLINE ):
2468                 case XC( OC_PGETLINE ):
2469                         if (op1) {
2470                                 X.rsm = newfile(L.s);
2471                                 if (!X.rsm->F) {
2472                                         if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2473                                                 X.rsm->F = popen(L.s, "r");
2474                                                 X.rsm->is_pipe = TRUE;
2475                                         } else {
2476                                                 X.rsm->F = fopen_for_read(L.s);         /* not xfopen! */
2477                                         }
2478                                 }
2479                         } else {
2480                                 if (!iF) iF = next_input_file();
2481                                 X.rsm = iF;
2482                         }
2483
2484                         if (!X.rsm->F) {
2485                                 setvar_i(intvar[ERRNO], errno);
2486                                 setvar_i(res, -1);
2487                                 break;
2488                         }
2489
2490                         if (!op->r.n)
2491                                 R.v = intvar[F0];
2492
2493                         L.i = awk_getline(X.rsm, R.v);
2494                         if (L.i > 0) {
2495                                 if (!op1) {
2496                                         incvar(intvar[FNR]);
2497                                         incvar(intvar[NR]);
2498                                 }
2499                         }
2500                         setvar_i(res, L.i);
2501                         break;
2502
2503                 /* simple builtins */
2504                 case XC( OC_FBLTIN ):
2505                         switch (opn) {
2506
2507                         case F_in:
2508                                 R.d = (int)L.d;
2509                                 break;
2510
2511                         case F_rn:
2512                                 R.d = (double)rand() / (double)RAND_MAX;
2513                                 break;
2514 #if ENABLE_FEATURE_AWK_LIBM
2515                         case F_co:
2516                                 R.d = cos(L.d);
2517                                 break;
2518
2519                         case F_ex:
2520                                 R.d = exp(L.d);
2521                                 break;
2522
2523                         case F_lg:
2524                                 R.d = log(L.d);
2525                                 break;
2526
2527                         case F_si:
2528                                 R.d = sin(L.d);
2529                                 break;
2530
2531                         case F_sq:
2532                                 R.d = sqrt(L.d);
2533                                 break;
2534 #else
2535                         case F_co:
2536                         case F_ex:
2537                         case F_lg:
2538                         case F_si:
2539                         case F_sq:
2540                                 syntax_error(EMSG_NO_MATH);
2541                                 break;
2542 #endif
2543                         case F_sr:
2544                                 R.d = (double)seed;
2545                                 seed = op1 ? (unsigned)L.d : (unsigned)time(NULL);
2546                                 srand(seed);
2547                                 break;
2548
2549                         case F_ti:
2550                                 R.d = time(NULL);
2551                                 break;
2552
2553                         case F_le:
2554                                 if (!op1)
2555                                         L.s = getvar_s(intvar[F0]);
2556                                 R.d = strlen(L.s);
2557                                 break;
2558
2559                         case F_sy:
2560                                 fflush(NULL);
2561                                 R.d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2562                                                 ? (system(L.s) >> 8) : 0;
2563                                 break;
2564
2565                         case F_ff:
2566                                 if (!op1)
2567                                         fflush(stdout);
2568                                 else {
2569                                         if (L.s && *L.s) {
2570                                                 X.rsm = newfile(L.s);
2571                                                 fflush(X.rsm->F);
2572                                         } else {
2573                                                 fflush(NULL);
2574                                         }
2575                                 }
2576                                 break;
2577
2578                         case F_cl:
2579                                 X.rsm = (rstream *)hash_search(fdhash, L.s);
2580                                 if (X.rsm) {
2581                                         R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
2582                                         free(X.rsm->buffer);
2583                                         hash_remove(fdhash, L.s);
2584                                 }
2585                                 if (R.i != 0)
2586                                         setvar_i(intvar[ERRNO], errno);
2587                                 R.d = (double)R.i;
2588                                 break;
2589                         }
2590                         setvar_i(res, R.d);
2591                         break;
2592
2593                 case XC( OC_BUILTIN ):
2594                         res = exec_builtin(op, res);
2595                         break;
2596
2597                 case XC( OC_SPRINTF ):
2598                         setvar_p(res, awk_printf(op1));
2599                         break;
2600
2601                 case XC( OC_UNARY ):
2602                         X.v = R.v;
2603                         L.d = R.d = getvar_i(R.v);
2604                         switch (opn) {
2605                         case 'P':
2606                                 L.d = ++R.d;
2607                                 goto r_op_change;
2608                         case 'p':
2609                                 R.d++;
2610                                 goto r_op_change;
2611                         case 'M':
2612                                 L.d = --R.d;
2613                                 goto r_op_change;
2614                         case 'm':
2615                                 R.d--;
2616                                 goto r_op_change;
2617                         case '!':
2618                                 L.d = istrue(X.v) ? 0 : 1;
2619                                 break;
2620                         case '-':
2621                                 L.d = -R.d;
2622                                 break;
2623  r_op_change:
2624                                 setvar_i(X.v, R.d);
2625                         }
2626                         setvar_i(res, L.d);
2627                         break;
2628
2629                 case XC( OC_FIELD ):
2630                         R.i = (int)getvar_i(R.v);
2631                         if (R.i == 0) {
2632                                 res = intvar[F0];
2633                         } else {
2634                                 split_f0();
2635                                 if (R.i > nfields)
2636                                         fsrealloc(R.i);
2637                                 res = &Fields[R.i - 1];
2638                         }
2639                         break;
2640
2641                 /* concatenation (" ") and index joining (",") */
2642                 case XC( OC_CONCAT ):
2643                 case XC( OC_COMMA ):
2644                         opn = strlen(L.s) + strlen(R.s) + 2;
2645                         X.s = xmalloc(opn);
2646                         strcpy(X.s, L.s);
2647                         if ((opinfo & OPCLSMASK) == OC_COMMA) {
2648                                 L.s = getvar_s(intvar[SUBSEP]);
2649                                 X.s = xrealloc(X.s, opn + strlen(L.s));
2650                                 strcat(X.s, L.s);
2651                         }
2652                         strcat(X.s, R.s);
2653                         setvar_p(res, X.s);
2654                         break;
2655
2656                 case XC( OC_LAND ):
2657                         setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2658                         break;
2659
2660                 case XC( OC_LOR ):
2661                         setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2662                         break;
2663
2664                 case XC( OC_BINARY ):
2665                 case XC( OC_REPLACE ):
2666                         R.d = getvar_i(R.v);
2667                         switch (opn) {
2668                         case '+':
2669                                 L.d += R.d;
2670                                 break;
2671                         case '-':
2672                                 L.d -= R.d;
2673                                 break;
2674                         case '*':
2675                                 L.d *= R.d;
2676                                 break;
2677                         case '/':
2678                                 if (R.d == 0) syntax_error(EMSG_DIV_BY_ZERO);
2679                                 L.d /= R.d;
2680                                 break;
2681                         case '&':
2682 #if ENABLE_FEATURE_AWK_LIBM
2683                                 L.d = pow(L.d, R.d);
2684 #else
2685                                 syntax_error(EMSG_NO_MATH);
2686 #endif
2687                                 break;
2688                         case '%':
2689                                 if (R.d == 0) syntax_error(EMSG_DIV_BY_ZERO);
2690                                 L.d -= (int)(L.d / R.d) * R.d;
2691                                 break;
2692                         }
2693                         res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2694                         break;
2695
2696                 case XC( OC_COMPARE ):
2697                         if (is_numeric(L.v) && is_numeric(R.v)) {
2698                                 L.d = getvar_i(L.v) - getvar_i(R.v);
2699                         } else {
2700                                 L.s = getvar_s(L.v);
2701                                 R.s = getvar_s(R.v);
2702                                 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2703                         }
2704                         switch (opn & 0xfe) {
2705                         case 0:
2706                                 R.i = (L.d > 0);
2707                                 break;
2708                         case 2:
2709                                 R.i = (L.d >= 0);
2710                                 break;
2711                         case 4:
2712                                 R.i = (L.d == 0);
2713                                 break;
2714                         }
2715                         setvar_i(res, (opn & 0x1 ? R.i : !R.i) ? 1 : 0);
2716                         break;
2717
2718                 default:
2719                         syntax_error(EMSG_POSSIBLE_ERROR);
2720                 }
2721                 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2722                         op = op->a.n;
2723                 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2724                         break;
2725                 if (nextrec)
2726                         break;
2727         }
2728         nvfree(v1);
2729         return res;
2730 #undef fnargs
2731 #undef seed
2732 #undef sreg
2733 }
2734
2735
2736 /* -------- main & co. -------- */
2737
2738 static int awk_exit(int r)
2739 {
2740         var tv;
2741         unsigned i;
2742         hash_item *hi;
2743
2744         zero_out_var(&tv);
2745
2746         if (!exiting) {
2747                 exiting = TRUE;
2748                 nextrec = FALSE;
2749                 evaluate(endseq.first, &tv);
2750         }
2751
2752         /* waiting for children */
2753         for (i = 0; i < fdhash->csize; i++) {
2754                 hi = fdhash->items[i];
2755                 while (hi) {
2756                         if (hi->data.rs.F && hi->data.rs.is_pipe)
2757                                 pclose(hi->data.rs.F);
2758                         hi = hi->next;
2759                 }
2760         }
2761
2762         exit(r);
2763 }
2764
2765 /* if expr looks like "var=value", perform assignment and return 1,
2766  * otherwise return 0 */
2767 static int is_assignment(const char *expr)
2768 {
2769         char *exprc, *s, *s0, *s1;
2770
2771         exprc = xstrdup(expr);
2772         if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2773                 free(exprc);
2774                 return FALSE;
2775         }
2776
2777         *(s++) = '\0';
2778         s0 = s1 = s;
2779         while (*s)
2780                 *(s1++) = nextchar(&s);
2781
2782         *s1 = '\0';
2783         setvar_u(newvar(exprc), s0);
2784         free(exprc);
2785         return TRUE;
2786 }
2787
2788 /* switch to next input file */
2789 static rstream *next_input_file(void)
2790 {
2791 #define rsm          (G.next_input_file__rsm)
2792 #define files_happen (G.next_input_file__files_happen)
2793
2794         FILE *F = NULL;
2795         const char *fname, *ind;
2796
2797         if (rsm.F) fclose(rsm.F);
2798         rsm.F = NULL;
2799         rsm.pos = rsm.adv = 0;
2800
2801         do {
2802                 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
2803                         if (files_happen)
2804                                 return NULL;
2805                         fname = "-";
2806                         F = stdin;
2807                 } else {
2808                         ind = getvar_s(incvar(intvar[ARGIND]));
2809                         fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
2810                         if (fname && *fname && !is_assignment(fname))
2811                                 F = xfopen_stdin(fname);
2812                 }
2813         } while (!F);
2814
2815         files_happen = TRUE;
2816         setvar_s(intvar[FILENAME], fname);
2817         rsm.F = F;
2818         return &rsm;
2819 #undef rsm
2820 #undef files_happen
2821 }
2822
2823 int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
2824 int awk_main(int argc, char **argv)
2825 {
2826         unsigned opt;
2827         char *opt_F, *opt_W;
2828         llist_t *list_v = NULL;
2829         llist_t *list_f = NULL;
2830         int i, j;
2831         var *v;
2832         var tv;
2833         char **envp;
2834         char *vnames = (char *)vNames; /* cheat */
2835         char *vvalues = (char *)vValues;
2836
2837         INIT_G();
2838
2839         /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
2840          * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
2841         if (ENABLE_LOCALE_SUPPORT)
2842                 setlocale(LC_NUMERIC, "C");
2843
2844         zero_out_var(&tv);
2845
2846         /* allocate global buffer */
2847         g_buf = xmalloc(MAXVARFMT + 1);
2848
2849         vhash = hash_init();
2850         ahash = hash_init();
2851         fdhash = hash_init();
2852         fnhash = hash_init();
2853
2854         /* initialize variables */
2855         for (i = 0; *vnames; i++) {
2856                 intvar[i] = v = newvar(nextword(&vnames));
2857                 if (*vvalues != '\377')
2858                         setvar_s(v, nextword(&vvalues));
2859                 else
2860                         setvar_i(v, 0);
2861
2862                 if (*vnames == '*') {
2863                         v->type |= VF_SPECIAL;
2864                         vnames++;
2865                 }
2866         }
2867
2868         handle_special(intvar[FS]);
2869         handle_special(intvar[RS]);
2870
2871         newfile("/dev/stdin")->F = stdin;
2872         newfile("/dev/stdout")->F = stdout;
2873         newfile("/dev/stderr")->F = stderr;
2874
2875         /* Huh, people report that sometimes environ is NULL. Oh well. */
2876         if (environ) for (envp = environ; *envp; envp++) {
2877                 /* environ is writable, thus we don't strdup it needlessly */
2878                 char *s = *envp;
2879                 char *s1 = strchr(s, '=');
2880                 if (s1) {
2881                         *s1 = '\0';
2882                         /* Both findvar and setvar_u take const char*
2883                          * as 2nd arg -> environment is not trashed */
2884                         setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
2885                         *s1 = '=';
2886                 }
2887         }
2888         opt_complementary = "v::f::"; /* -v and -f can occur multiple times */
2889         opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, &opt_W);
2890         argv += optind;
2891         argc -= optind;
2892         if (opt & 0x1)
2893                 setvar_s(intvar[FS], opt_F); // -F
2894         while (list_v) { /* -v */
2895                 if (!is_assignment(llist_pop(&list_v)))
2896                         bb_show_usage();
2897         }
2898         if (list_f) { /* -f */
2899                 do {
2900                         char *s = NULL;
2901                         FILE *from_file;
2902
2903                         g_progname = llist_pop(&list_f);
2904                         from_file = xfopen_stdin(g_progname);
2905                         /* one byte is reserved for some trick in next_token */
2906                         for (i = j = 1; j > 0; i += j) {
2907                                 s = xrealloc(s, i + 4096);
2908                                 j = fread(s + i, 1, 4094, from_file);
2909                         }
2910                         s[i] = '\0';
2911                         fclose(from_file);
2912                         parse_program(s + 1);
2913                         free(s);
2914                 } while (list_f);
2915                 argc++;
2916         } else { // no -f: take program from 1st parameter
2917                 if (!argc)
2918                         bb_show_usage();
2919                 g_progname = "cmd. line";
2920                 parse_program(*argv++);
2921         }
2922         if (opt & 0x8) // -W
2923                 bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
2924
2925         /* fill in ARGV array */
2926         setvar_i(intvar[ARGC], argc);
2927         setari_u(intvar[ARGV], 0, "awk");
2928         i = 0;
2929         while (*argv)
2930                 setari_u(intvar[ARGV], ++i, *argv++);
2931
2932         evaluate(beginseq.first, &tv);
2933         if (!mainseq.first && !endseq.first)
2934                 awk_exit(EXIT_SUCCESS);
2935
2936         /* input file could already be opened in BEGIN block */
2937         if (!iF) iF = next_input_file();
2938
2939         /* passing through input files */
2940         while (iF) {
2941                 nextfile = FALSE;
2942                 setvar_i(intvar[FNR], 0);
2943
2944                 while ((i = awk_getline(iF, intvar[F0])) > 0) {
2945                         nextrec = FALSE;
2946                         incvar(intvar[NR]);
2947                         incvar(intvar[FNR]);
2948                         evaluate(mainseq.first, &tv);
2949
2950                         if (nextfile)
2951                                 break;
2952                 }
2953
2954                 if (i < 0)
2955                         syntax_error(strerror(errno));
2956
2957                 iF = next_input_file();
2958         }
2959
2960         awk_exit(EXIT_SUCCESS);
2961         /*return 0;*/
2962 }