b9bc01f1653dd6ec8b6e1b22465c69ae1b5ff315
[oweals/busybox.git] / editors / awk.c
1 /* vi: set sw=4 ts=4: */
2 /*
3  * awk implementation for busybox
4  *
5  * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
6  *
7  * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
8  */
9
10 #include "libbb.h"
11 #include "xregex.h"
12 #include <math.h>
13
14 /* This is a NOEXEC applet. Be very careful! */
15
16
17 #define MAXVARFMT       240
18 #define MINNVBLOCK      64
19
20 /* variable flags */
21 #define VF_NUMBER       0x0001  /* 1 = primary type is number */
22 #define VF_ARRAY        0x0002  /* 1 = it's an array */
23
24 #define VF_CACHED       0x0100  /* 1 = num/str value has cached str/num eq */
25 #define VF_USER         0x0200  /* 1 = user input (may be numeric string) */
26 #define VF_SPECIAL      0x0400  /* 1 = requires extra handling when changed */
27 #define VF_WALK         0x0800  /* 1 = variable has alloc'd x.walker list */
28 #define VF_FSTR         0x1000  /* 1 = var::string points to fstring buffer */
29 #define VF_CHILD        0x2000  /* 1 = function arg; x.parent points to source */
30 #define VF_DIRTY        0x4000  /* 1 = variable was set explicitly */
31
32 /* these flags are static, don't change them when value is changed */
33 #define VF_DONTTOUCH    (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
34
35 /* Variable */
36 typedef struct var_s {
37         unsigned type;            /* flags */
38         double number;
39         char *string;
40         union {
41                 int aidx;               /* func arg idx (for compilation stage) */
42                 struct xhash_s *array;  /* array ptr */
43                 struct var_s *parent;   /* for func args, ptr to actual parameter */
44                 char **walker;          /* list of array elements (for..in) */
45         } x;
46 } var;
47
48 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
49 typedef struct chain_s {
50         struct node_s *first;
51         struct node_s *last;
52         const char *programname;
53 } chain;
54
55 /* Function */
56 typedef struct func_s {
57         unsigned nargs;
58         struct chain_s body;
59 } func;
60
61 /* I/O stream */
62 typedef struct rstream_s {
63         FILE *F;
64         char *buffer;
65         int adv;
66         int size;
67         int pos;
68         smallint is_pipe;
69 } rstream;
70
71 typedef struct hash_item_s {
72         union {
73                 struct var_s v;         /* variable/array hash */
74                 struct rstream_s rs;    /* redirect streams hash */
75                 struct func_s f;        /* functions hash */
76         } data;
77         struct hash_item_s *next;       /* next in chain */
78         char name[1];                   /* really it's longer */
79 } hash_item;
80
81 typedef struct xhash_s {
82         unsigned nel;           /* num of elements */
83         unsigned csize;         /* current hash size */
84         unsigned nprime;        /* next hash size in PRIMES[] */
85         unsigned glen;          /* summary length of item names */
86         struct hash_item_s **items;
87 } xhash;
88
89 /* Tree node */
90 typedef struct node_s {
91         uint32_t info;
92         unsigned lineno;
93         union {
94                 struct node_s *n;
95                 var *v;
96                 int i;
97                 char *s;
98                 regex_t *re;
99         } l;
100         union {
101                 struct node_s *n;
102                 regex_t *ire;
103                 func *f;
104                 int argno;
105         } r;
106         union {
107                 struct node_s *n;
108         } a;
109 } node;
110
111 /* Block of temporary variables */
112 typedef struct nvblock_s {
113         int size;
114         var *pos;
115         struct nvblock_s *prev;
116         struct nvblock_s *next;
117         var nv[];
118 } nvblock;
119
120 typedef struct tsplitter_s {
121         node n;
122         regex_t re[2];
123 } tsplitter;
124
125 /* simple token classes */
126 /* Order and hex values are very important!!!  See next_token() */
127 #define TC_SEQSTART      1                              /* ( */
128 #define TC_SEQTERM      (1 << 1)                /* ) */
129 #define TC_REGEXP       (1 << 2)                /* /.../ */
130 #define TC_OUTRDR       (1 << 3)                /* | > >> */
131 #define TC_UOPPOST      (1 << 4)                /* unary postfix operator */
132 #define TC_UOPPRE1      (1 << 5)                /* unary prefix operator */
133 #define TC_BINOPX       (1 << 6)                /* two-opnd operator */
134 #define TC_IN           (1 << 7)
135 #define TC_COMMA        (1 << 8)
136 #define TC_PIPE         (1 << 9)                /* input redirection pipe */
137 #define TC_UOPPRE2      (1 << 10)               /* unary prefix operator */
138 #define TC_ARRTERM      (1 << 11)               /* ] */
139 #define TC_GRPSTART     (1 << 12)               /* { */
140 #define TC_GRPTERM      (1 << 13)               /* } */
141 #define TC_SEMICOL      (1 << 14)
142 #define TC_NEWLINE      (1 << 15)
143 #define TC_STATX        (1 << 16)               /* ctl statement (for, next...) */
144 #define TC_WHILE        (1 << 17)
145 #define TC_ELSE         (1 << 18)
146 #define TC_BUILTIN      (1 << 19)
147 #define TC_GETLINE      (1 << 20)
148 #define TC_FUNCDECL     (1 << 21)               /* `function' `func' */
149 #define TC_BEGIN        (1 << 22)
150 #define TC_END          (1 << 23)
151 #define TC_EOF          (1 << 24)
152 #define TC_VARIABLE     (1 << 25)
153 #define TC_ARRAY        (1 << 26)
154 #define TC_FUNCTION     (1 << 27)
155 #define TC_STRING       (1 << 28)
156 #define TC_NUMBER       (1 << 29)
157
158 #define TC_UOPPRE  (TC_UOPPRE1 | TC_UOPPRE2)
159
160 /* combined token classes */
161 #define TC_BINOP   (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
162 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
163 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
164                    | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
165
166 #define TC_STATEMNT (TC_STATX | TC_WHILE)
167 #define TC_OPTERM  (TC_SEMICOL | TC_NEWLINE)
168
169 /* word tokens, cannot mean something else if not expected */
170 #define TC_WORD    (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
171                    | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
172
173 /* discard newlines after these */
174 #define TC_NOTERM  (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
175                    | TC_BINOP | TC_OPTERM)
176
177 /* what can expression begin with */
178 #define TC_OPSEQ   (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
179 /* what can group begin with */
180 #define TC_GRPSEQ  (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
181
182 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
183 /* operator is inserted between them */
184 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
185                    | TC_STRING | TC_NUMBER | TC_UOPPOST)
186 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
187
188 #define OF_RES1    0x010000
189 #define OF_RES2    0x020000
190 #define OF_STR1    0x040000
191 #define OF_STR2    0x080000
192 #define OF_NUM1    0x100000
193 #define OF_CHECKED 0x200000
194
195 /* combined operator flags */
196 #define xx      0
197 #define xV      OF_RES2
198 #define xS      (OF_RES2 | OF_STR2)
199 #define Vx      OF_RES1
200 #define VV      (OF_RES1 | OF_RES2)
201 #define Nx      (OF_RES1 | OF_NUM1)
202 #define NV      (OF_RES1 | OF_NUM1 | OF_RES2)
203 #define Sx      (OF_RES1 | OF_STR1)
204 #define SV      (OF_RES1 | OF_STR1 | OF_RES2)
205 #define SS      (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
206
207 #define OPCLSMASK 0xFF00
208 #define OPNMASK   0x007F
209
210 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
211  * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
212  * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
213  */
214 #define P(x)      (x << 24)
215 #define PRIMASK   0x7F000000
216 #define PRIMASK2  0x7E000000
217
218 /* Operation classes */
219
220 #define SHIFT_TIL_THIS  0x0600
221 #define RECUR_FROM_THIS 0x1000
222
223 enum {
224         OC_DELETE = 0x0100,     OC_EXEC = 0x0200,       OC_NEWSOURCE = 0x0300,
225         OC_PRINT = 0x0400,      OC_PRINTF = 0x0500,     OC_WALKINIT = 0x0600,
226
227         OC_BR = 0x0700,         OC_BREAK = 0x0800,      OC_CONTINUE = 0x0900,
228         OC_EXIT = 0x0a00,       OC_NEXT = 0x0b00,       OC_NEXTFILE = 0x0c00,
229         OC_TEST = 0x0d00,       OC_WALKNEXT = 0x0e00,
230
231         OC_BINARY = 0x1000,     OC_BUILTIN = 0x1100,    OC_COLON = 0x1200,
232         OC_COMMA = 0x1300,      OC_COMPARE = 0x1400,    OC_CONCAT = 0x1500,
233         OC_FBLTIN = 0x1600,     OC_FIELD = 0x1700,      OC_FNARG = 0x1800,
234         OC_FUNC = 0x1900,       OC_GETLINE = 0x1a00,    OC_IN = 0x1b00,
235         OC_LAND = 0x1c00,       OC_LOR = 0x1d00,        OC_MATCH = 0x1e00,
236         OC_MOVE = 0x1f00,       OC_PGETLINE = 0x2000,   OC_REGEXP = 0x2100,
237         OC_REPLACE = 0x2200,    OC_RETURN = 0x2300,     OC_SPRINTF = 0x2400,
238         OC_TERNARY = 0x2500,    OC_UNARY = 0x2600,      OC_VAR = 0x2700,
239         OC_DONE = 0x2800,
240
241         ST_IF = 0x3000,         ST_DO = 0x3100,         ST_FOR = 0x3200,
242         ST_WHILE = 0x3300
243 };
244
245 /* simple builtins */
246 enum {
247         F_in,   F_rn,   F_co,   F_ex,   F_lg,   F_si,   F_sq,   F_sr,
248         F_ti,   F_le,   F_sy,   F_ff,   F_cl
249 };
250
251 /* builtins */
252 enum {
253         B_a2,   B_ix,   B_ma,   B_sp,   B_ss,   B_ti,   B_mt,   B_lo,   B_up,
254         B_ge,   B_gs,   B_su,
255         B_an,   B_co,   B_ls,   B_or,   B_rs,   B_xo,
256 };
257
258 /* tokens and their corresponding info values */
259
260 #define NTC     "\377"  /* switch to next token class (tc<<1) */
261 #define NTCC    '\377'
262
263 #define OC_B    OC_BUILTIN
264
265 static const char tokenlist[] ALIGN1 =
266         "\1("       NTC
267         "\1)"       NTC
268         "\1/"       NTC                                 /* REGEXP */
269         "\2>>"      "\1>"       "\1|"       NTC         /* OUTRDR */
270         "\2++"      "\2--"      NTC                     /* UOPPOST */
271         "\2++"      "\2--"      "\1$"       NTC         /* UOPPRE1 */
272         "\2=="      "\1="       "\2+="      "\2-="      /* BINOPX */
273         "\2*="      "\2/="      "\2%="      "\2^="
274         "\1+"       "\1-"       "\3**="     "\2**"
275         "\1/"       "\1%"       "\1^"       "\1*"
276         "\2!="      "\2>="      "\2<="      "\1>"
277         "\1<"       "\2!~"      "\1~"       "\2&&"
278         "\2||"      "\1?"       "\1:"       NTC
279         "\2in"      NTC
280         "\1,"       NTC
281         "\1|"       NTC
282         "\1+"       "\1-"       "\1!"       NTC         /* UOPPRE2 */
283         "\1]"       NTC
284         "\1{"       NTC
285         "\1}"       NTC
286         "\1;"       NTC
287         "\1\n"      NTC
288         "\2if"      "\2do"      "\3for"     "\5break"   /* STATX */
289         "\10continue"           "\6delete"  "\5print"
290         "\6printf"  "\4next"    "\10nextfile"
291         "\6return"  "\4exit"    NTC
292         "\5while"   NTC
293         "\4else"    NTC
294
295         "\3and"     "\5compl"   "\6lshift"  "\2or"
296         "\6rshift"  "\3xor"
297         "\5close"   "\6system"  "\6fflush"  "\5atan2"   /* BUILTIN */
298         "\3cos"     "\3exp"     "\3int"     "\3log"
299         "\4rand"    "\3sin"     "\4sqrt"    "\5srand"
300         "\6gensub"  "\4gsub"    "\5index"   "\6length"
301         "\5match"   "\5split"   "\7sprintf" "\3sub"
302         "\6substr"  "\7systime" "\10strftime" "\6mktime"
303         "\7tolower" "\7toupper" NTC
304         "\7getline" NTC
305         "\4func"    "\10function"   NTC
306         "\5BEGIN"   NTC
307         "\3END"     "\0"
308         ;
309
310 static const uint32_t tokeninfo[] = {
311         0,
312         0,
313         OC_REGEXP,
314         xS|'a',     xS|'w',     xS|'|',
315         OC_UNARY|xV|P(9)|'p',       OC_UNARY|xV|P(9)|'m',
316         OC_UNARY|xV|P(9)|'P',       OC_UNARY|xV|P(9)|'M',
317             OC_FIELD|xV|P(5),
318         OC_COMPARE|VV|P(39)|5,      OC_MOVE|VV|P(74),
319             OC_REPLACE|NV|P(74)|'+',    OC_REPLACE|NV|P(74)|'-',
320         OC_REPLACE|NV|P(74)|'*',    OC_REPLACE|NV|P(74)|'/',
321             OC_REPLACE|NV|P(74)|'%',    OC_REPLACE|NV|P(74)|'&',
322         OC_BINARY|NV|P(29)|'+',     OC_BINARY|NV|P(29)|'-',
323             OC_REPLACE|NV|P(74)|'&',    OC_BINARY|NV|P(15)|'&',
324         OC_BINARY|NV|P(25)|'/',     OC_BINARY|NV|P(25)|'%',
325             OC_BINARY|NV|P(15)|'&',     OC_BINARY|NV|P(25)|'*',
326         OC_COMPARE|VV|P(39)|4,      OC_COMPARE|VV|P(39)|3,
327             OC_COMPARE|VV|P(39)|0,      OC_COMPARE|VV|P(39)|1,
328         OC_COMPARE|VV|P(39)|2,      OC_MATCH|Sx|P(45)|'!',
329             OC_MATCH|Sx|P(45)|'~',      OC_LAND|Vx|P(55),
330         OC_LOR|Vx|P(59),            OC_TERNARY|Vx|P(64)|'?',
331             OC_COLON|xx|P(67)|':',
332         OC_IN|SV|P(49),
333         OC_COMMA|SS|P(80),
334         OC_PGETLINE|SV|P(37),
335         OC_UNARY|xV|P(19)|'+',      OC_UNARY|xV|P(19)|'-',
336             OC_UNARY|xV|P(19)|'!',
337         0,
338         0,
339         0,
340         0,
341         0,
342         ST_IF,          ST_DO,          ST_FOR,         OC_BREAK,
343         OC_CONTINUE,                    OC_DELETE|Vx,   OC_PRINT,
344         OC_PRINTF,      OC_NEXT,        OC_NEXTFILE,
345         OC_RETURN|Vx,   OC_EXIT|Nx,
346         ST_WHILE,
347         0,
348
349         OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
350         OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
351         OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
352         OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
353         OC_FBLTIN|F_rn,    OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
354         OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
355         OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF,        OC_B|B_su|P(0xb6),
356         OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti,    OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b),
357         OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
358         OC_GETLINE|SV|P(0),
359         0,      0,
360         0,
361         0
362 };
363
364 /* internal variable names and their initial values       */
365 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
366 enum {
367         CONVFMT,    OFMT,       FS,         OFS,
368         ORS,        RS,         RT,         FILENAME,
369         SUBSEP,     F0,         ARGIND,     ARGC,
370         ARGV,       ERRNO,      FNR,        NR,
371         NF,         IGNORECASE, ENVIRON,    NUM_INTERNAL_VARS
372 };
373
374 static const char vNames[] ALIGN1 =
375         "CONVFMT\0" "OFMT\0"    "FS\0*"     "OFS\0"
376         "ORS\0"     "RS\0*"     "RT\0"      "FILENAME\0"
377         "SUBSEP\0"  "$\0*"      "ARGIND\0"  "ARGC\0"
378         "ARGV\0"    "ERRNO\0"   "FNR\0"     "NR\0"
379         "NF\0*"     "IGNORECASE\0*" "ENVIRON\0" "\0";
380
381 static const char vValues[] ALIGN1 =
382         "%.6g\0"    "%.6g\0"    " \0"       " \0"
383         "\n\0"      "\n\0"      "\0"        "\0"
384         "\034\0"    "\0"        "\377";
385
386 /* hash size may grow to these values */
387 #define FIRST_PRIME 61
388 static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
389
390
391 /* Globals. Split in two parts so that first one is addressed
392  * with (mostly short) negative offsets.
393  * NB: it's unsafe to put members of type "double"
394  * into globals2 (gcc may fail to align them).
395  */
396 struct globals {
397         double t_double;
398         chain beginseq, mainseq, endseq;
399         chain *seq;
400         node *break_ptr, *continue_ptr;
401         rstream *iF;
402         xhash *vhash, *ahash, *fdhash, *fnhash;
403         const char *g_progname;
404         int g_lineno;
405         int nfields;
406         int maxfields; /* used in fsrealloc() only */
407         var *Fields;
408         nvblock *g_cb;
409         char *g_pos;
410         char *g_buf;
411         smallint icase;
412         smallint exiting;
413         smallint nextrec;
414         smallint nextfile;
415         smallint is_f0_split;
416 };
417 struct globals2 {
418         uint32_t t_info; /* often used */
419         uint32_t t_tclass;
420         char *t_string;
421         int t_lineno;
422         int t_rollback;
423
424         var *intvar[NUM_INTERNAL_VARS]; /* often used */
425
426         /* former statics from various functions */
427         char *split_f0__fstrings;
428
429         uint32_t next_token__save_tclass;
430         uint32_t next_token__save_info;
431         uint32_t next_token__ltclass;
432         smallint next_token__concat_inserted;
433
434         smallint next_input_file__files_happen;
435         rstream next_input_file__rsm;
436
437         var *evaluate__fnargs;
438         unsigned evaluate__seed;
439         regex_t evaluate__sreg;
440
441         var ptest__v;
442
443         tsplitter exec_builtin__tspl;
444
445         /* biggest and least used members go last */
446         tsplitter fsplitter, rsplitter;
447 };
448 #define G1 (ptr_to_globals[-1])
449 #define G (*(struct globals2 *)ptr_to_globals)
450 /* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
451 /*char G1size[sizeof(G1)]; - 0x74 */
452 /*char Gsize[sizeof(G)]; - 0x1c4 */
453 /* Trying to keep most of members accessible with short offsets: */
454 /*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
455 #define t_double     (G1.t_double    )
456 #define beginseq     (G1.beginseq    )
457 #define mainseq      (G1.mainseq     )
458 #define endseq       (G1.endseq      )
459 #define seq          (G1.seq         )
460 #define break_ptr    (G1.break_ptr   )
461 #define continue_ptr (G1.continue_ptr)
462 #define iF           (G1.iF          )
463 #define vhash        (G1.vhash       )
464 #define ahash        (G1.ahash       )
465 #define fdhash       (G1.fdhash      )
466 #define fnhash       (G1.fnhash      )
467 #define g_progname   (G1.g_progname  )
468 #define g_lineno     (G1.g_lineno    )
469 #define nfields      (G1.nfields     )
470 #define maxfields    (G1.maxfields   )
471 #define Fields       (G1.Fields      )
472 #define g_cb         (G1.g_cb        )
473 #define g_pos        (G1.g_pos       )
474 #define g_buf        (G1.g_buf       )
475 #define icase        (G1.icase       )
476 #define exiting      (G1.exiting     )
477 #define nextrec      (G1.nextrec     )
478 #define nextfile     (G1.nextfile    )
479 #define is_f0_split  (G1.is_f0_split )
480 #define t_info       (G.t_info      )
481 #define t_tclass     (G.t_tclass    )
482 #define t_string     (G.t_string    )
483 #define t_lineno     (G.t_lineno    )
484 #define t_rollback   (G.t_rollback  )
485 #define intvar       (G.intvar      )
486 #define fsplitter    (G.fsplitter   )
487 #define rsplitter    (G.rsplitter   )
488 #define INIT_G() do { \
489         SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
490         G.next_token__ltclass = TC_OPTERM; \
491         G.evaluate__seed = 1; \
492 } while (0)
493
494
495 /* function prototypes */
496 static void handle_special(var *);
497 static node *parse_expr(uint32_t);
498 static void chain_group(void);
499 static var *evaluate(node *, var *);
500 static rstream *next_input_file(void);
501 static int fmt_num(char *, int, const char *, double, int);
502 static int awk_exit(int) NORETURN;
503
504 /* ---- error handling ---- */
505
506 static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
507 static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
508 static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
509 static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
510 static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
511 static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin";
512 static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
513 static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
514 static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
515 #if !ENABLE_FEATURE_AWK_LIBM
516 static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
517 #endif
518
519 static void zero_out_var(var *vp)
520 {
521         memset(vp, 0, sizeof(*vp));
522 }
523
524 static void syntax_error(const char *message) NORETURN;
525 static void syntax_error(const char *message)
526 {
527         bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
528 }
529
530 /* ---- hash stuff ---- */
531
532 static unsigned hashidx(const char *name)
533 {
534         unsigned idx = 0;
535
536         while (*name)
537                 idx = *name++ + (idx << 6) - idx;
538         return idx;
539 }
540
541 /* create new hash */
542 static xhash *hash_init(void)
543 {
544         xhash *newhash;
545
546         newhash = xzalloc(sizeof(*newhash));
547         newhash->csize = FIRST_PRIME;
548         newhash->items = xzalloc(FIRST_PRIME * sizeof(newhash->items[0]));
549
550         return newhash;
551 }
552
553 /* find item in hash, return ptr to data, NULL if not found */
554 static void *hash_search(xhash *hash, const char *name)
555 {
556         hash_item *hi;
557
558         hi = hash->items[hashidx(name) % hash->csize];
559         while (hi) {
560                 if (strcmp(hi->name, name) == 0)
561                         return &(hi->data);
562                 hi = hi->next;
563         }
564         return NULL;
565 }
566
567 /* grow hash if it becomes too big */
568 static void hash_rebuild(xhash *hash)
569 {
570         unsigned newsize, i, idx;
571         hash_item **newitems, *hi, *thi;
572
573         if (hash->nprime == ARRAY_SIZE(PRIMES))
574                 return;
575
576         newsize = PRIMES[hash->nprime++];
577         newitems = xzalloc(newsize * sizeof(newitems[0]));
578
579         for (i = 0; i < hash->csize; i++) {
580                 hi = hash->items[i];
581                 while (hi) {
582                         thi = hi;
583                         hi = thi->next;
584                         idx = hashidx(thi->name) % newsize;
585                         thi->next = newitems[idx];
586                         newitems[idx] = thi;
587                 }
588         }
589
590         free(hash->items);
591         hash->csize = newsize;
592         hash->items = newitems;
593 }
594
595 /* find item in hash, add it if necessary. Return ptr to data */
596 static void *hash_find(xhash *hash, const char *name)
597 {
598         hash_item *hi;
599         unsigned idx;
600         int l;
601
602         hi = hash_search(hash, name);
603         if (!hi) {
604                 if (++hash->nel / hash->csize > 10)
605                         hash_rebuild(hash);
606
607                 l = strlen(name) + 1;
608                 hi = xzalloc(sizeof(*hi) + l);
609                 strcpy(hi->name, name);
610
611                 idx = hashidx(name) % hash->csize;
612                 hi->next = hash->items[idx];
613                 hash->items[idx] = hi;
614                 hash->glen += l;
615         }
616         return &(hi->data);
617 }
618
619 #define findvar(hash, name) ((var*)    hash_find((hash), (name)))
620 #define newvar(name)        ((var*)    hash_find(vhash, (name)))
621 #define newfile(name)       ((rstream*)hash_find(fdhash, (name)))
622 #define newfunc(name)       ((func*)   hash_find(fnhash, (name)))
623
624 static void hash_remove(xhash *hash, const char *name)
625 {
626         hash_item *hi, **phi;
627
628         phi = &(hash->items[hashidx(name) % hash->csize]);
629         while (*phi) {
630                 hi = *phi;
631                 if (strcmp(hi->name, name) == 0) {
632                         hash->glen -= (strlen(name) + 1);
633                         hash->nel--;
634                         *phi = hi->next;
635                         free(hi);
636                         break;
637                 }
638                 phi = &(hi->next);
639         }
640 }
641
642 /* ------ some useful functions ------ */
643
644 static void skip_spaces(char **s)
645 {
646         char *p = *s;
647
648         while (1) {
649                 if (*p == '\\' && p[1] == '\n') {
650                         p++;
651                         t_lineno++;
652                 } else if (*p != ' ' && *p != '\t') {
653                         break;
654                 }
655                 p++;
656         }
657         *s = p;
658 }
659
660 static char *nextword(char **s)
661 {
662         char *p = *s;
663         while (*(*s)++)
664                 continue;
665         return p;
666 }
667
668 static char nextchar(char **s)
669 {
670         char c, *pps;
671
672         c = *((*s)++);
673         pps = *s;
674         if (c == '\\')
675                 c = bb_process_escape_sequence((const char**)s);
676         if (c == '\\' && *s == pps)
677                 c = *((*s)++);
678         return c;
679 }
680
681 static ALWAYS_INLINE int isalnum_(int c)
682 {
683         return (isalnum(c) || c == '_');
684 }
685
686 static double my_strtod(char **pp)
687 {
688 #if ENABLE_DESKTOP
689         if ((*pp)[0] == '0'
690          && ((((*pp)[1] | 0x20) == 'x') || isdigit((*pp)[1]))
691         ) {
692                 return strtoull(*pp, pp, 0);
693         }
694 #endif
695         return strtod(*pp, pp);
696 }
697
698 /* -------- working with variables (set/get/copy/etc) -------- */
699
700 static xhash *iamarray(var *v)
701 {
702         var *a = v;
703
704         while (a->type & VF_CHILD)
705                 a = a->x.parent;
706
707         if (!(a->type & VF_ARRAY)) {
708                 a->type |= VF_ARRAY;
709                 a->x.array = hash_init();
710         }
711         return a->x.array;
712 }
713
714 static void clear_array(xhash *array)
715 {
716         unsigned i;
717         hash_item *hi, *thi;
718
719         for (i = 0; i < array->csize; i++) {
720                 hi = array->items[i];
721                 while (hi) {
722                         thi = hi;
723                         hi = hi->next;
724                         free(thi->data.v.string);
725                         free(thi);
726                 }
727                 array->items[i] = NULL;
728         }
729         array->glen = array->nel = 0;
730 }
731
732 /* clear a variable */
733 static var *clrvar(var *v)
734 {
735         if (!(v->type & VF_FSTR))
736                 free(v->string);
737
738         v->type &= VF_DONTTOUCH;
739         v->type |= VF_DIRTY;
740         v->string = NULL;
741         return v;
742 }
743
744 /* assign string value to variable */
745 static var *setvar_p(var *v, char *value)
746 {
747         clrvar(v);
748         v->string = value;
749         handle_special(v);
750         return v;
751 }
752
753 /* same as setvar_p but make a copy of string */
754 static var *setvar_s(var *v, const char *value)
755 {
756         return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
757 }
758
759 /* same as setvar_s but sets USER flag */
760 static var *setvar_u(var *v, const char *value)
761 {
762         v = setvar_s(v, value);
763         v->type |= VF_USER;
764         return v;
765 }
766
767 /* set array element to user string */
768 static void setari_u(var *a, int idx, const char *s)
769 {
770         var *v;
771
772         v = findvar(iamarray(a), itoa(idx));
773         setvar_u(v, s);
774 }
775
776 /* assign numeric value to variable */
777 static var *setvar_i(var *v, double value)
778 {
779         clrvar(v);
780         v->type |= VF_NUMBER;
781         v->number = value;
782         handle_special(v);
783         return v;
784 }
785
786 static const char *getvar_s(var *v)
787 {
788         /* if v is numeric and has no cached string, convert it to string */
789         if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
790                 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
791                 v->string = xstrdup(g_buf);
792                 v->type |= VF_CACHED;
793         }
794         return (v->string == NULL) ? "" : v->string;
795 }
796
797 static double getvar_i(var *v)
798 {
799         char *s;
800
801         if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
802                 v->number = 0;
803                 s = v->string;
804                 if (s && *s) {
805                         v->number = my_strtod(&s);
806                         if (v->type & VF_USER) {
807                                 skip_spaces(&s);
808                                 if (*s != '\0')
809                                         v->type &= ~VF_USER;
810                         }
811                 } else {
812                         v->type &= ~VF_USER;
813                 }
814                 v->type |= VF_CACHED;
815         }
816         return v->number;
817 }
818
819 /* Used for operands of bitwise ops */
820 static unsigned long getvar_i_int(var *v)
821 {
822         double d = getvar_i(v);
823
824         /* Casting doubles to longs is undefined for values outside
825          * of target type range. Try to widen it as much as possible */
826         if (d >= 0)
827                 return (unsigned long)d;
828         /* Why? Think about d == -4294967295.0 (assuming 32bit longs) */
829         return - (long) (unsigned long) (-d);
830 }
831
832 static var *copyvar(var *dest, const var *src)
833 {
834         if (dest != src) {
835                 clrvar(dest);
836                 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
837                 dest->number = src->number;
838                 if (src->string)
839                         dest->string = xstrdup(src->string);
840         }
841         handle_special(dest);
842         return dest;
843 }
844
845 static var *incvar(var *v)
846 {
847         return setvar_i(v, getvar_i(v) + 1.0);
848 }
849
850 /* return true if v is number or numeric string */
851 static int is_numeric(var *v)
852 {
853         getvar_i(v);
854         return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
855 }
856
857 /* return 1 when value of v corresponds to true, 0 otherwise */
858 static int istrue(var *v)
859 {
860         if (is_numeric(v))
861                 return (v->number != 0);
862         return (v->string && v->string[0]);
863 }
864
865 /* temporary variables allocator. Last allocated should be first freed */
866 static var *nvalloc(int n)
867 {
868         nvblock *pb = NULL;
869         var *v, *r;
870         int size;
871
872         while (g_cb) {
873                 pb = g_cb;
874                 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size)
875                         break;
876                 g_cb = g_cb->next;
877         }
878
879         if (!g_cb) {
880                 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
881                 g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
882                 g_cb->size = size;
883                 g_cb->pos = g_cb->nv;
884                 g_cb->prev = pb;
885                 /*g_cb->next = NULL; - xzalloc did it */
886                 if (pb)
887                         pb->next = g_cb;
888         }
889
890         v = r = g_cb->pos;
891         g_cb->pos += n;
892
893         while (v < g_cb->pos) {
894                 v->type = 0;
895                 v->string = NULL;
896                 v++;
897         }
898
899         return r;
900 }
901
902 static void nvfree(var *v)
903 {
904         var *p;
905
906         if (v < g_cb->nv || v >= g_cb->pos)
907                 syntax_error(EMSG_INTERNAL_ERROR);
908
909         for (p = v; p < g_cb->pos; p++) {
910                 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
911                         clear_array(iamarray(p));
912                         free(p->x.array->items);
913                         free(p->x.array);
914                 }
915                 if (p->type & VF_WALK)
916                         free(p->x.walker);
917
918                 clrvar(p);
919         }
920
921         g_cb->pos = v;
922         while (g_cb->prev && g_cb->pos == g_cb->nv) {
923                 g_cb = g_cb->prev;
924         }
925 }
926
927 /* ------- awk program text parsing ------- */
928
929 /* Parse next token pointed by global pos, place results into global ttt.
930  * If token isn't expected, give away. Return token class
931  */
932 static uint32_t next_token(uint32_t expected)
933 {
934 #define concat_inserted (G.next_token__concat_inserted)
935 #define save_tclass     (G.next_token__save_tclass)
936 #define save_info       (G.next_token__save_info)
937 /* Initialized to TC_OPTERM: */
938 #define ltclass         (G.next_token__ltclass)
939
940         char *p, *pp, *s;
941         const char *tl;
942         uint32_t tc;
943         const uint32_t *ti;
944         int l;
945
946         if (t_rollback) {
947                 t_rollback = FALSE;
948
949         } else if (concat_inserted) {
950                 concat_inserted = FALSE;
951                 t_tclass = save_tclass;
952                 t_info = save_info;
953
954         } else {
955                 p = g_pos;
956  readnext:
957                 skip_spaces(&p);
958                 g_lineno = t_lineno;
959                 if (*p == '#')
960                         while (*p != '\n' && *p != '\0')
961                                 p++;
962
963                 if (*p == '\n')
964                         t_lineno++;
965
966                 if (*p == '\0') {
967                         tc = TC_EOF;
968
969                 } else if (*p == '\"') {
970                         /* it's a string */
971                         t_string = s = ++p;
972                         while (*p != '\"') {
973                                 if (*p == '\0' || *p == '\n')
974                                         syntax_error(EMSG_UNEXP_EOS);
975                                 *(s++) = nextchar(&p);
976                         }
977                         p++;
978                         *s = '\0';
979                         tc = TC_STRING;
980
981                 } else if ((expected & TC_REGEXP) && *p == '/') {
982                         /* it's regexp */
983                         t_string = s = ++p;
984                         while (*p != '/') {
985                                 if (*p == '\0' || *p == '\n')
986                                         syntax_error(EMSG_UNEXP_EOS);
987                                 *s = *p++;
988                                 if (*s++ == '\\') {
989                                         pp = p;
990                                         *(s-1) = bb_process_escape_sequence((const char **)&p);
991                                         if (*pp == '\\')
992                                                 *s++ = '\\';
993                                         if (p == pp)
994                                                 *s++ = *p++;
995                                 }
996                         }
997                         p++;
998                         *s = '\0';
999                         tc = TC_REGEXP;
1000
1001                 } else if (*p == '.' || isdigit(*p)) {
1002                         /* it's a number */
1003                         t_double = my_strtod(&p);
1004                         if (*p == '.')
1005                                 syntax_error(EMSG_UNEXP_TOKEN);
1006                         tc = TC_NUMBER;
1007
1008                 } else {
1009                         /* search for something known */
1010                         tl = tokenlist;
1011                         tc = 0x00000001;
1012                         ti = tokeninfo;
1013                         while (*tl) {
1014                                 l = *(tl++);
1015                                 if (l == NTCC) {
1016                                         tc <<= 1;
1017                                         continue;
1018                                 }
1019                                 /* if token class is expected, token
1020                                  * matches and it's not a longer word,
1021                                  * then this is what we are looking for
1022                                  */
1023                                 if ((tc & (expected | TC_WORD | TC_NEWLINE))
1024                                  && *tl == *p && strncmp(p, tl, l) == 0
1025                                  && !((tc & TC_WORD) && isalnum_(p[l]))
1026                                 ) {
1027                                         t_info = *ti;
1028                                         p += l;
1029                                         break;
1030                                 }
1031                                 ti++;
1032                                 tl += l;
1033                         }
1034
1035                         if (!*tl) {
1036                                 /* it's a name (var/array/function),
1037                                  * otherwise it's something wrong
1038                                  */
1039                                 if (!isalnum_(*p))
1040                                         syntax_error(EMSG_UNEXP_TOKEN);
1041
1042                                 t_string = --p;
1043                                 while (isalnum_(*(++p))) {
1044                                         *(p-1) = *p;
1045                                 }
1046                                 *(p-1) = '\0';
1047                                 tc = TC_VARIABLE;
1048                                 /* also consume whitespace between functionname and bracket */
1049                                 if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY))
1050                                         skip_spaces(&p);
1051                                 if (*p == '(') {
1052                                         tc = TC_FUNCTION;
1053                                 } else {
1054                                         if (*p == '[') {
1055                                                 p++;
1056                                                 tc = TC_ARRAY;
1057                                         }
1058                                 }
1059                         }
1060                 }
1061                 g_pos = p;
1062
1063                 /* skipping newlines in some cases */
1064                 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1065                         goto readnext;
1066
1067                 /* insert concatenation operator when needed */
1068                 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1069                         concat_inserted = TRUE;
1070                         save_tclass = tc;
1071                         save_info = t_info;
1072                         tc = TC_BINOP;
1073                         t_info = OC_CONCAT | SS | P(35);
1074                 }
1075
1076                 t_tclass = tc;
1077         }
1078         ltclass = t_tclass;
1079
1080         /* Are we ready for this? */
1081         if (!(ltclass & expected))
1082                 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1083                                 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1084
1085         return ltclass;
1086 #undef concat_inserted
1087 #undef save_tclass
1088 #undef save_info
1089 #undef ltclass
1090 }
1091
1092 static void rollback_token(void)
1093 {
1094         t_rollback = TRUE;
1095 }
1096
1097 static node *new_node(uint32_t info)
1098 {
1099         node *n;
1100
1101         n = xzalloc(sizeof(node));
1102         n->info = info;
1103         n->lineno = g_lineno;
1104         return n;
1105 }
1106
1107 static node *mk_re_node(const char *s, node *n, regex_t *re)
1108 {
1109         n->info = OC_REGEXP;
1110         n->l.re = re;
1111         n->r.ire = re + 1;
1112         xregcomp(re, s, REG_EXTENDED);
1113         xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1114
1115         return n;
1116 }
1117
1118 static node *condition(void)
1119 {
1120         next_token(TC_SEQSTART);
1121         return parse_expr(TC_SEQTERM);
1122 }
1123
1124 /* parse expression terminated by given argument, return ptr
1125  * to built subtree. Terminator is eaten by parse_expr */
1126 static node *parse_expr(uint32_t iexp)
1127 {
1128         node sn;
1129         node *cn = &sn;
1130         node *vn, *glptr;
1131         uint32_t tc, xtc;
1132         var *v;
1133
1134         sn.info = PRIMASK;
1135         sn.r.n = glptr = NULL;
1136         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1137
1138         while (!((tc = next_token(xtc)) & iexp)) {
1139                 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1140                         /* input redirection (<) attached to glptr node */
1141                         cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1142                         cn->a.n = glptr;
1143                         xtc = TC_OPERAND | TC_UOPPRE;
1144                         glptr = NULL;
1145
1146                 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1147                         /* for binary and postfix-unary operators, jump back over
1148                          * previous operators with higher priority */
1149                         vn = cn;
1150                         while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1151                             || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON))
1152                         ) {
1153                                 vn = vn->a.n;
1154                         }
1155                         if ((t_info & OPCLSMASK) == OC_TERNARY)
1156                                 t_info += P(6);
1157                         cn = vn->a.n->r.n = new_node(t_info);
1158                         cn->a.n = vn->a.n;
1159                         if (tc & TC_BINOP) {
1160                                 cn->l.n = vn;
1161                                 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1162                                 if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1163                                         /* it's a pipe */
1164                                         next_token(TC_GETLINE);
1165                                         /* give maximum priority to this pipe */
1166                                         cn->info &= ~PRIMASK;
1167                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1168                                 }
1169                         } else {
1170                                 cn->r.n = vn;
1171                                 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1172                         }
1173                         vn->a.n = cn;
1174
1175                 } else {
1176                         /* for operands and prefix-unary operators, attach them
1177                          * to last node */
1178                         vn = cn;
1179                         cn = vn->r.n = new_node(t_info);
1180                         cn->a.n = vn;
1181                         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1182                         if (tc & (TC_OPERAND | TC_REGEXP)) {
1183                                 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1184                                 /* one should be very careful with switch on tclass -
1185                                  * only simple tclasses should be used! */
1186                                 switch (tc) {
1187                                 case TC_VARIABLE:
1188                                 case TC_ARRAY:
1189                                         cn->info = OC_VAR;
1190                                         v = hash_search(ahash, t_string);
1191                                         if (v != NULL) {
1192                                                 cn->info = OC_FNARG;
1193                                                 cn->l.i = v->x.aidx;
1194                                         } else {
1195                                                 cn->l.v = newvar(t_string);
1196                                         }
1197                                         if (tc & TC_ARRAY) {
1198                                                 cn->info |= xS;
1199                                                 cn->r.n = parse_expr(TC_ARRTERM);
1200                                         }
1201                                         break;
1202
1203                                 case TC_NUMBER:
1204                                 case TC_STRING:
1205                                         cn->info = OC_VAR;
1206                                         v = cn->l.v = xzalloc(sizeof(var));
1207                                         if (tc & TC_NUMBER)
1208                                                 setvar_i(v, t_double);
1209                                         else
1210                                                 setvar_s(v, t_string);
1211                                         break;
1212
1213                                 case TC_REGEXP:
1214                                         mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1215                                         break;
1216
1217                                 case TC_FUNCTION:
1218                                         cn->info = OC_FUNC;
1219                                         cn->r.f = newfunc(t_string);
1220                                         cn->l.n = condition();
1221                                         break;
1222
1223                                 case TC_SEQSTART:
1224                                         cn = vn->r.n = parse_expr(TC_SEQTERM);
1225                                         cn->a.n = vn;
1226                                         break;
1227
1228                                 case TC_GETLINE:
1229                                         glptr = cn;
1230                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1231                                         break;
1232
1233                                 case TC_BUILTIN:
1234                                         cn->l.n = condition();
1235                                         break;
1236                                 }
1237                         }
1238                 }
1239         }
1240         return sn.r.n;
1241 }
1242
1243 /* add node to chain. Return ptr to alloc'd node */
1244 static node *chain_node(uint32_t info)
1245 {
1246         node *n;
1247
1248         if (!seq->first)
1249                 seq->first = seq->last = new_node(0);
1250
1251         if (seq->programname != g_progname) {
1252                 seq->programname = g_progname;
1253                 n = chain_node(OC_NEWSOURCE);
1254                 n->l.s = xstrdup(g_progname);
1255         }
1256
1257         n = seq->last;
1258         n->info = info;
1259         seq->last = n->a.n = new_node(OC_DONE);
1260
1261         return n;
1262 }
1263
1264 static void chain_expr(uint32_t info)
1265 {
1266         node *n;
1267
1268         n = chain_node(info);
1269         n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1270         if (t_tclass & TC_GRPTERM)
1271                 rollback_token();
1272 }
1273
1274 static node *chain_loop(node *nn)
1275 {
1276         node *n, *n2, *save_brk, *save_cont;
1277
1278         save_brk = break_ptr;
1279         save_cont = continue_ptr;
1280
1281         n = chain_node(OC_BR | Vx);
1282         continue_ptr = new_node(OC_EXEC);
1283         break_ptr = new_node(OC_EXEC);
1284         chain_group();
1285         n2 = chain_node(OC_EXEC | Vx);
1286         n2->l.n = nn;
1287         n2->a.n = n;
1288         continue_ptr->a.n = n2;
1289         break_ptr->a.n = n->r.n = seq->last;
1290
1291         continue_ptr = save_cont;
1292         break_ptr = save_brk;
1293
1294         return n;
1295 }
1296
1297 /* parse group and attach it to chain */
1298 static void chain_group(void)
1299 {
1300         uint32_t c;
1301         node *n, *n2, *n3;
1302
1303         do {
1304                 c = next_token(TC_GRPSEQ);
1305         } while (c & TC_NEWLINE);
1306
1307         if (c & TC_GRPSTART) {
1308                 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1309                         if (t_tclass & TC_NEWLINE) continue;
1310                         rollback_token();
1311                         chain_group();
1312                 }
1313         } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1314                 rollback_token();
1315                 chain_expr(OC_EXEC | Vx);
1316         } else {                                                /* TC_STATEMNT */
1317                 switch (t_info & OPCLSMASK) {
1318                 case ST_IF:
1319                         n = chain_node(OC_BR | Vx);
1320                         n->l.n = condition();
1321                         chain_group();
1322                         n2 = chain_node(OC_EXEC);
1323                         n->r.n = seq->last;
1324                         if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1325                                 chain_group();
1326                                 n2->a.n = seq->last;
1327                         } else {
1328                                 rollback_token();
1329                         }
1330                         break;
1331
1332                 case ST_WHILE:
1333                         n2 = condition();
1334                         n = chain_loop(NULL);
1335                         n->l.n = n2;
1336                         break;
1337
1338                 case ST_DO:
1339                         n2 = chain_node(OC_EXEC);
1340                         n = chain_loop(NULL);
1341                         n2->a.n = n->a.n;
1342                         next_token(TC_WHILE);
1343                         n->l.n = condition();
1344                         break;
1345
1346                 case ST_FOR:
1347                         next_token(TC_SEQSTART);
1348                         n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1349                         if (t_tclass & TC_SEQTERM) {    /* for-in */
1350                                 if ((n2->info & OPCLSMASK) != OC_IN)
1351                                         syntax_error(EMSG_UNEXP_TOKEN);
1352                                 n = chain_node(OC_WALKINIT | VV);
1353                                 n->l.n = n2->l.n;
1354                                 n->r.n = n2->r.n;
1355                                 n = chain_loop(NULL);
1356                                 n->info = OC_WALKNEXT | Vx;
1357                                 n->l.n = n2->l.n;
1358                         } else {                        /* for (;;) */
1359                                 n = chain_node(OC_EXEC | Vx);
1360                                 n->l.n = n2;
1361                                 n2 = parse_expr(TC_SEMICOL);
1362                                 n3 = parse_expr(TC_SEQTERM);
1363                                 n = chain_loop(n3);
1364                                 n->l.n = n2;
1365                                 if (!n2)
1366                                         n->info = OC_EXEC;
1367                         }
1368                         break;
1369
1370                 case OC_PRINT:
1371                 case OC_PRINTF:
1372                         n = chain_node(t_info);
1373                         n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1374                         if (t_tclass & TC_OUTRDR) {
1375                                 n->info |= t_info;
1376                                 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1377                         }
1378                         if (t_tclass & TC_GRPTERM)
1379                                 rollback_token();
1380                         break;
1381
1382                 case OC_BREAK:
1383                         n = chain_node(OC_EXEC);
1384                         n->a.n = break_ptr;
1385                         break;
1386
1387                 case OC_CONTINUE:
1388                         n = chain_node(OC_EXEC);
1389                         n->a.n = continue_ptr;
1390                         break;
1391
1392                 /* delete, next, nextfile, return, exit */
1393                 default:
1394                         chain_expr(t_info);
1395                 }
1396         }
1397 }
1398
1399 static void parse_program(char *p)
1400 {
1401         uint32_t tclass;
1402         node *cn;
1403         func *f;
1404         var *v;
1405
1406         g_pos = p;
1407         t_lineno = 1;
1408         while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1409                         TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1410
1411                 if (tclass & TC_OPTERM)
1412                         continue;
1413
1414                 seq = &mainseq;
1415                 if (tclass & TC_BEGIN) {
1416                         seq = &beginseq;
1417                         chain_group();
1418
1419                 } else if (tclass & TC_END) {
1420                         seq = &endseq;
1421                         chain_group();
1422
1423                 } else if (tclass & TC_FUNCDECL) {
1424                         next_token(TC_FUNCTION);
1425                         g_pos++;
1426                         f = newfunc(t_string);
1427                         f->body.first = NULL;
1428                         f->nargs = 0;
1429                         while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1430                                 v = findvar(ahash, t_string);
1431                                 v->x.aidx = (f->nargs)++;
1432
1433                                 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1434                                         break;
1435                         }
1436                         seq = &(f->body);
1437                         chain_group();
1438                         clear_array(ahash);
1439
1440                 } else if (tclass & TC_OPSEQ) {
1441                         rollback_token();
1442                         cn = chain_node(OC_TEST);
1443                         cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1444                         if (t_tclass & TC_GRPSTART) {
1445                                 rollback_token();
1446                                 chain_group();
1447                         } else {
1448                                 chain_node(OC_PRINT);
1449                         }
1450                         cn->r.n = mainseq.last;
1451
1452                 } else /* if (tclass & TC_GRPSTART) */ {
1453                         rollback_token();
1454                         chain_group();
1455                 }
1456         }
1457 }
1458
1459
1460 /* -------- program execution part -------- */
1461
1462 static node *mk_splitter(const char *s, tsplitter *spl)
1463 {
1464         regex_t *re, *ire;
1465         node *n;
1466
1467         re = &spl->re[0];
1468         ire = &spl->re[1];
1469         n = &spl->n;
1470         if ((n->info & OPCLSMASK) == OC_REGEXP) {
1471                 regfree(re);
1472                 regfree(ire); // TODO: nuke ire, use re+1?
1473         }
1474         if (strlen(s) > 1) {
1475                 mk_re_node(s, n, re);
1476         } else {
1477                 n->info = (uint32_t) *s;
1478         }
1479
1480         return n;
1481 }
1482
1483 /* use node as a regular expression. Supplied with node ptr and regex_t
1484  * storage space. Return ptr to regex (if result points to preg, it should
1485  * be later regfree'd manually
1486  */
1487 static regex_t *as_regex(node *op, regex_t *preg)
1488 {
1489         int cflags;
1490         var *v;
1491         const char *s;
1492
1493         if ((op->info & OPCLSMASK) == OC_REGEXP) {
1494                 return icase ? op->r.ire : op->l.re;
1495         }
1496         v = nvalloc(1);
1497         s = getvar_s(evaluate(op, v));
1498
1499         cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
1500         /* Testcase where REG_EXTENDED fails (unpaired '{'):
1501          * echo Hi | awk 'gsub("@(samp|code|file)\{","");'
1502          * gawk 3.1.5 eats this. We revert to ~REG_EXTENDED
1503          * (maybe gsub is not supposed to use REG_EXTENDED?).
1504          */
1505         if (regcomp(preg, s, cflags)) {
1506                 cflags &= ~REG_EXTENDED;
1507                 xregcomp(preg, s, cflags);
1508         }
1509         nvfree(v);
1510         return preg;
1511 }
1512
1513 /* gradually increasing buffer */
1514 static void qrealloc(char **b, int n, int *size)
1515 {
1516         if (!*b || n >= *size) {
1517                 *size = n + (n>>1) + 80;
1518                 *b = xrealloc(*b, *size);
1519         }
1520 }
1521
1522 /* resize field storage space */
1523 static void fsrealloc(int size)
1524 {
1525         int i;
1526
1527         if (size >= maxfields) {
1528                 i = maxfields;
1529                 maxfields = size + 16;
1530                 Fields = xrealloc(Fields, maxfields * sizeof(var));
1531                 for (; i < maxfields; i++) {
1532                         Fields[i].type = VF_SPECIAL;
1533                         Fields[i].string = NULL;
1534                 }
1535         }
1536
1537         if (size < nfields) {
1538                 for (i = size; i < nfields; i++) {
1539                         clrvar(Fields + i);
1540                 }
1541         }
1542         nfields = size;
1543 }
1544
1545 static int awk_split(const char *s, node *spl, char **slist)
1546 {
1547         int l, n = 0;
1548         char c[4];
1549         char *s1;
1550         regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1551
1552         /* in worst case, each char would be a separate field */
1553         *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1554         strcpy(s1, s);
1555
1556         c[0] = c[1] = (char)spl->info;
1557         c[2] = c[3] = '\0';
1558         if (*getvar_s(intvar[RS]) == '\0')
1559                 c[2] = '\n';
1560
1561         if ((spl->info & OPCLSMASK) == OC_REGEXP) {  /* regex split */
1562                 if (!*s)
1563                         return n; /* "": zero fields */
1564                 n++; /* at least one field will be there */
1565                 do {
1566                         l = strcspn(s, c+2); /* len till next NUL or \n */
1567                         if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1568                          && pmatch[0].rm_so <= l
1569                         ) {
1570                                 l = pmatch[0].rm_so;
1571                                 if (pmatch[0].rm_eo == 0) {
1572                                         l++;
1573                                         pmatch[0].rm_eo++;
1574                                 }
1575                                 n++; /* we saw yet another delimiter */
1576                         } else {
1577                                 pmatch[0].rm_eo = l;
1578                                 if (s[l])
1579                                         pmatch[0].rm_eo++;
1580                         }
1581                         memcpy(s1, s, l);
1582                         /* make sure we remove *all* of the separator chars */
1583                         do {
1584                                 s1[l] = '\0';
1585                         } while (++l < pmatch[0].rm_eo);
1586                         nextword(&s1);
1587                         s += pmatch[0].rm_eo;
1588                 } while (*s);
1589                 return n;
1590         }
1591         if (c[0] == '\0') {  /* null split */
1592                 while (*s) {
1593                         *s1++ = *s++;
1594                         *s1++ = '\0';
1595                         n++;
1596                 }
1597                 return n;
1598         }
1599         if (c[0] != ' ') {  /* single-character split */
1600                 if (icase) {
1601                         c[0] = toupper(c[0]);
1602                         c[1] = tolower(c[1]);
1603                 }
1604                 if (*s1) n++;
1605                 while ((s1 = strpbrk(s1, c))) {
1606                         *s1++ = '\0';
1607                         n++;
1608                 }
1609                 return n;
1610         }
1611         /* space split */
1612         while (*s) {
1613                 s = skip_whitespace(s);
1614                 if (!*s) break;
1615                 n++;
1616                 while (*s && !isspace(*s))
1617                         *s1++ = *s++;
1618                 *s1++ = '\0';
1619         }
1620         return n;
1621 }
1622
1623 static void split_f0(void)
1624 {
1625 /* static char *fstrings; */
1626 #define fstrings (G.split_f0__fstrings)
1627
1628         int i, n;
1629         char *s;
1630
1631         if (is_f0_split)
1632                 return;
1633
1634         is_f0_split = TRUE;
1635         free(fstrings);
1636         fsrealloc(0);
1637         n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1638         fsrealloc(n);
1639         s = fstrings;
1640         for (i = 0; i < n; i++) {
1641                 Fields[i].string = nextword(&s);
1642                 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1643         }
1644
1645         /* set NF manually to avoid side effects */
1646         clrvar(intvar[NF]);
1647         intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1648         intvar[NF]->number = nfields;
1649 #undef fstrings
1650 }
1651
1652 /* perform additional actions when some internal variables changed */
1653 static void handle_special(var *v)
1654 {
1655         int n;
1656         char *b;
1657         const char *sep, *s;
1658         int sl, l, len, i, bsize;
1659
1660         if (!(v->type & VF_SPECIAL))
1661                 return;
1662
1663         if (v == intvar[NF]) {
1664                 n = (int)getvar_i(v);
1665                 fsrealloc(n);
1666
1667                 /* recalculate $0 */
1668                 sep = getvar_s(intvar[OFS]);
1669                 sl = strlen(sep);
1670                 b = NULL;
1671                 len = 0;
1672                 for (i = 0; i < n; i++) {
1673                         s = getvar_s(&Fields[i]);
1674                         l = strlen(s);
1675                         if (b) {
1676                                 memcpy(b+len, sep, sl);
1677                                 len += sl;
1678                         }
1679                         qrealloc(&b, len+l+sl, &bsize);
1680                         memcpy(b+len, s, l);
1681                         len += l;
1682                 }
1683                 if (b)
1684                         b[len] = '\0';
1685                 setvar_p(intvar[F0], b);
1686                 is_f0_split = TRUE;
1687
1688         } else if (v == intvar[F0]) {
1689                 is_f0_split = FALSE;
1690
1691         } else if (v == intvar[FS]) {
1692                 mk_splitter(getvar_s(v), &fsplitter);
1693
1694         } else if (v == intvar[RS]) {
1695                 mk_splitter(getvar_s(v), &rsplitter);
1696
1697         } else if (v == intvar[IGNORECASE]) {
1698                 icase = istrue(v);
1699
1700         } else {                                /* $n */
1701                 n = getvar_i(intvar[NF]);
1702                 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1703                 /* right here v is invalid. Just to note... */
1704         }
1705 }
1706
1707 /* step through func/builtin/etc arguments */
1708 static node *nextarg(node **pn)
1709 {
1710         node *n;
1711
1712         n = *pn;
1713         if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1714                 *pn = n->r.n;
1715                 n = n->l.n;
1716         } else {
1717                 *pn = NULL;
1718         }
1719         return n;
1720 }
1721
1722 static void hashwalk_init(var *v, xhash *array)
1723 {
1724         char **w;
1725         hash_item *hi;
1726         unsigned i;
1727
1728         if (v->type & VF_WALK)
1729                 free(v->x.walker);
1730
1731         v->type |= VF_WALK;
1732         w = v->x.walker = xzalloc(2 + 2*sizeof(char *) + array->glen);
1733         w[0] = w[1] = (char *)(w + 2);
1734         for (i = 0; i < array->csize; i++) {
1735                 hi = array->items[i];
1736                 while (hi) {
1737                         strcpy(*w, hi->name);
1738                         nextword(w);
1739                         hi = hi->next;
1740                 }
1741         }
1742 }
1743
1744 static int hashwalk_next(var *v)
1745 {
1746         char **w;
1747
1748         w = v->x.walker;
1749         if (w[1] == w[0])
1750                 return FALSE;
1751
1752         setvar_s(v, nextword(w+1));
1753         return TRUE;
1754 }
1755
1756 /* evaluate node, return 1 when result is true, 0 otherwise */
1757 static int ptest(node *pattern)
1758 {
1759         /* ptest__v is "static": to save stack space? */
1760         return istrue(evaluate(pattern, &G.ptest__v));
1761 }
1762
1763 /* read next record from stream rsm into a variable v */
1764 static int awk_getline(rstream *rsm, var *v)
1765 {
1766         char *b;
1767         regmatch_t pmatch[2];
1768         int a, p, pp=0, size;
1769         int fd, so, eo, r, rp;
1770         char c, *m, *s;
1771
1772         /* we're using our own buffer since we need access to accumulating
1773          * characters
1774          */
1775         fd = fileno(rsm->F);
1776         m = rsm->buffer;
1777         a = rsm->adv;
1778         p = rsm->pos;
1779         size = rsm->size;
1780         c = (char) rsplitter.n.info;
1781         rp = 0;
1782
1783         if (!m) qrealloc(&m, 256, &size);
1784         do {
1785                 b = m + a;
1786                 so = eo = p;
1787                 r = 1;
1788                 if (p > 0) {
1789                         if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1790                                 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1791                                                         b, 1, pmatch, 0) == 0) {
1792                                         so = pmatch[0].rm_so;
1793                                         eo = pmatch[0].rm_eo;
1794                                         if (b[eo] != '\0')
1795                                                 break;
1796                                 }
1797                         } else if (c != '\0') {
1798                                 s = strchr(b+pp, c);
1799                                 if (!s) s = memchr(b+pp, '\0', p - pp);
1800                                 if (s) {
1801                                         so = eo = s-b;
1802                                         eo++;
1803                                         break;
1804                                 }
1805                         } else {
1806                                 while (b[rp] == '\n')
1807                                         rp++;
1808                                 s = strstr(b+rp, "\n\n");
1809                                 if (s) {
1810                                         so = eo = s-b;
1811                                         while (b[eo] == '\n') eo++;
1812                                         if (b[eo] != '\0')
1813                                                 break;
1814                                 }
1815                         }
1816                 }
1817
1818                 if (a > 0) {
1819                         memmove(m, (const void *)(m+a), p+1);
1820                         b = m;
1821                         a = 0;
1822                 }
1823
1824                 qrealloc(&m, a+p+128, &size);
1825                 b = m + a;
1826                 pp = p;
1827                 p += safe_read(fd, b+p, size-p-1);
1828                 if (p < pp) {
1829                         p = 0;
1830                         r = 0;
1831                         setvar_i(intvar[ERRNO], errno);
1832                 }
1833                 b[p] = '\0';
1834
1835         } while (p > pp);
1836
1837         if (p == 0) {
1838                 r--;
1839         } else {
1840                 c = b[so]; b[so] = '\0';
1841                 setvar_s(v, b+rp);
1842                 v->type |= VF_USER;
1843                 b[so] = c;
1844                 c = b[eo]; b[eo] = '\0';
1845                 setvar_s(intvar[RT], b+so);
1846                 b[eo] = c;
1847         }
1848
1849         rsm->buffer = m;
1850         rsm->adv = a + eo;
1851         rsm->pos = p - eo;
1852         rsm->size = size;
1853
1854         return r;
1855 }
1856
1857 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1858 {
1859         int r = 0;
1860         char c;
1861         const char *s = format;
1862
1863         if (int_as_int && n == (int)n) {
1864                 r = snprintf(b, size, "%d", (int)n);
1865         } else {
1866                 do { c = *s; } while (c && *++s);
1867                 if (strchr("diouxX", c)) {
1868                         r = snprintf(b, size, format, (int)n);
1869                 } else if (strchr("eEfgG", c)) {
1870                         r = snprintf(b, size, format, n);
1871                 } else {
1872                         syntax_error(EMSG_INV_FMT);
1873                 }
1874         }
1875         return r;
1876 }
1877
1878 /* formatted output into an allocated buffer, return ptr to buffer */
1879 static char *awk_printf(node *n)
1880 {
1881         char *b = NULL;
1882         char *fmt, *s, *f;
1883         const char *s1;
1884         int i, j, incr, bsize;
1885         char c, c1;
1886         var *v, *arg;
1887
1888         v = nvalloc(1);
1889         fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1890
1891         i = 0;
1892         while (*f) {
1893                 s = f;
1894                 while (*f && (*f != '%' || *(++f) == '%'))
1895                         f++;
1896                 while (*f && !isalpha(*f)) {
1897                         if (*f == '*')
1898                                 syntax_error("%*x formats are not supported");
1899                         f++;
1900                 }
1901
1902                 incr = (f - s) + MAXVARFMT;
1903                 qrealloc(&b, incr + i, &bsize);
1904                 c = *f;
1905                 if (c != '\0') f++;
1906                 c1 = *f;
1907                 *f = '\0';
1908                 arg = evaluate(nextarg(&n), v);
1909
1910                 j = i;
1911                 if (c == 'c' || !c) {
1912                         i += sprintf(b+i, s, is_numeric(arg) ?
1913                                         (char)getvar_i(arg) : *getvar_s(arg));
1914                 } else if (c == 's') {
1915                         s1 = getvar_s(arg);
1916                         qrealloc(&b, incr+i+strlen(s1), &bsize);
1917                         i += sprintf(b+i, s, s1);
1918                 } else {
1919                         i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1920                 }
1921                 *f = c1;
1922
1923                 /* if there was an error while sprintf, return value is negative */
1924                 if (i < j) i = j;
1925         }
1926
1927         b = xrealloc(b, i + 1);
1928         free(fmt);
1929         nvfree(v);
1930         b[i] = '\0';
1931         return b;
1932 }
1933
1934 /* common substitution routine
1935  * replace (nm) substring of (src) that match (n) with (repl), store
1936  * result into (dest), return number of substitutions. If nm=0, replace
1937  * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1938  * subexpression matching (\1-\9)
1939  */
1940 static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int ex)
1941 {
1942         char *ds = NULL;
1943         const char *s;
1944         const char *sp;
1945         int c, i, j, di, rl, so, eo, nbs, n, dssize;
1946         regmatch_t pmatch[10];
1947         regex_t sreg, *re;
1948
1949         re = as_regex(rn, &sreg);
1950         if (!src) src = intvar[F0];
1951         if (!dest) dest = intvar[F0];
1952
1953         i = di = 0;
1954         sp = getvar_s(src);
1955         rl = strlen(repl);
1956         while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0 : REG_NOTBOL) == 0) {
1957                 so = pmatch[0].rm_so;
1958                 eo = pmatch[0].rm_eo;
1959
1960                 qrealloc(&ds, di + eo + rl, &dssize);
1961                 memcpy(ds + di, sp, eo);
1962                 di += eo;
1963                 if (++i >= nm) {
1964                         /* replace */
1965                         di -= (eo - so);
1966                         nbs = 0;
1967                         for (s = repl; *s; s++) {
1968                                 ds[di++] = c = *s;
1969                                 if (c == '\\') {
1970                                         nbs++;
1971                                         continue;
1972                                 }
1973                                 if (c == '&' || (ex && c >= '0' && c <= '9')) {
1974                                         di -= ((nbs + 3) >> 1);
1975                                         j = 0;
1976                                         if (c != '&') {
1977                                                 j = c - '0';
1978                                                 nbs++;
1979                                         }
1980                                         if (nbs % 2) {
1981                                                 ds[di++] = c;
1982                                         } else {
1983                                                 n = pmatch[j].rm_eo - pmatch[j].rm_so;
1984                                                 qrealloc(&ds, di + rl + n, &dssize);
1985                                                 memcpy(ds + di, sp + pmatch[j].rm_so, n);
1986                                                 di += n;
1987                                         }
1988                                 }
1989                                 nbs = 0;
1990                         }
1991                 }
1992
1993                 sp += eo;
1994                 if (i == nm)
1995                         break;
1996                 if (eo == so) {
1997                         ds[di] = *sp++;
1998                         if (!ds[di++])
1999                                 break;
2000                 }
2001         }
2002
2003         qrealloc(&ds, di + strlen(sp), &dssize);
2004         strcpy(ds + di, sp);
2005         setvar_p(dest, ds);
2006         if (re == &sreg)
2007                 regfree(re);
2008         return i;
2009 }
2010
2011 static NOINLINE int do_mktime(const char *ds)
2012 {
2013         struct tm then;
2014         int count;
2015
2016         /*memset(&then, 0, sizeof(then)); - not needed */
2017         then.tm_isdst = -1; /* default is unknown */
2018
2019         /* manpage of mktime says these fields are ints,
2020          * so we can sscanf stuff directly into them */
2021         count = sscanf(ds, "%u %u %u %u %u %u %d",
2022                 &then.tm_year, &then.tm_mon, &then.tm_mday,
2023                 &then.tm_hour, &then.tm_min, &then.tm_sec,
2024                 &then.tm_isdst);
2025
2026         if (count < 6
2027          || (unsigned)then.tm_mon < 1
2028          || (unsigned)then.tm_year < 1900
2029         ) {
2030                 return -1;
2031         }
2032
2033         then.tm_mon -= 1;
2034         then.tm_year -= 1900;
2035
2036         return mktime(&then);
2037 }
2038
2039 static NOINLINE var *exec_builtin(node *op, var *res)
2040 {
2041 #define tspl (G.exec_builtin__tspl)
2042
2043         var *tv;
2044         node *an[4];
2045         var *av[4];
2046         const char *as[4];
2047         regmatch_t pmatch[2];
2048         regex_t sreg, *re;
2049         node *spl;
2050         uint32_t isr, info;
2051         int nargs;
2052         time_t tt;
2053         char *s, *s1;
2054         int i, l, ll, n;
2055
2056         tv = nvalloc(4);
2057         isr = info = op->info;
2058         op = op->l.n;
2059
2060         av[2] = av[3] = NULL;
2061         for (i = 0; i < 4 && op; i++) {
2062                 an[i] = nextarg(&op);
2063                 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
2064                 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
2065                 isr >>= 1;
2066         }
2067
2068         nargs = i;
2069         if ((uint32_t)nargs < (info >> 30))
2070                 syntax_error(EMSG_TOO_FEW_ARGS);
2071
2072         info &= OPNMASK;
2073         switch (info) {
2074
2075         case B_a2:
2076 #if ENABLE_FEATURE_AWK_LIBM
2077                 setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1])));
2078 #else
2079                 syntax_error(EMSG_NO_MATH);
2080 #endif
2081                 break;
2082
2083         case B_sp:
2084                 if (nargs > 2) {
2085                         spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2086                                 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2087                 } else {
2088                         spl = &fsplitter.n;
2089                 }
2090
2091                 n = awk_split(as[0], spl, &s);
2092                 s1 = s;
2093                 clear_array(iamarray(av[1]));
2094                 for (i = 1; i <= n; i++)
2095                         setari_u(av[1], i, nextword(&s1));
2096                 free(s);
2097                 setvar_i(res, n);
2098                 break;
2099
2100         case B_ss:
2101                 l = strlen(as[0]);
2102                 i = getvar_i(av[1]) - 1;
2103                 if (i > l) i = l;
2104                 if (i < 0) i = 0;
2105                 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2106                 if (n < 0) n = 0;
2107                 s = xstrndup(as[0]+i, n);
2108                 setvar_p(res, s);
2109                 break;
2110
2111         /* Bitwise ops must assume that operands are unsigned. GNU Awk 3.1.5:
2112          * awk '{ print or(-1,1) }' gives "4.29497e+09", not "-2.xxxe+09" */
2113         case B_an:
2114                 setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1]));
2115                 break;
2116
2117         case B_co:
2118                 setvar_i(res, ~getvar_i_int(av[0]));
2119                 break;
2120
2121         case B_ls:
2122                 setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1]));
2123                 break;
2124
2125         case B_or:
2126                 setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1]));
2127                 break;
2128
2129         case B_rs:
2130                 setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1]));
2131                 break;
2132
2133         case B_xo:
2134                 setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1]));
2135                 break;
2136
2137         case B_lo:
2138         case B_up:
2139                 s1 = s = xstrdup(as[0]);
2140                 while (*s1) {
2141                         //*s1 = (info == B_up) ? toupper(*s1) : tolower(*s1);
2142                         if ((unsigned char)((*s1 | 0x20) - 'a') <= ('z' - 'a'))
2143                                 *s1 = (info == B_up) ? (*s1 & 0xdf) : (*s1 | 0x20);
2144                         s1++;
2145                 }
2146                 setvar_p(res, s);
2147                 break;
2148
2149         case B_ix:
2150                 n = 0;
2151                 ll = strlen(as[1]);
2152                 l = strlen(as[0]) - ll;
2153                 if (ll > 0 && l >= 0) {
2154                         if (!icase) {
2155                                 s = strstr(as[0], as[1]);
2156                                 if (s) n = (s - as[0]) + 1;
2157                         } else {
2158                                 /* this piece of code is terribly slow and
2159                                  * really should be rewritten
2160                                  */
2161                                 for (i=0; i<=l; i++) {
2162                                         if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2163                                                 n = i+1;
2164                                                 break;
2165                                         }
2166                                 }
2167                         }
2168                 }
2169                 setvar_i(res, n);
2170                 break;
2171
2172         case B_ti:
2173                 if (nargs > 1)
2174                         tt = getvar_i(av[1]);
2175                 else
2176                         time(&tt);
2177                 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2178                 i = strftime(g_buf, MAXVARFMT,
2179                         ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2180                         localtime(&tt));
2181                 g_buf[i] = '\0';
2182                 setvar_s(res, g_buf);
2183                 break;
2184
2185         case B_mt:
2186                 setvar_i(res, do_mktime(as[0]));
2187                 break;
2188
2189         case B_ma:
2190                 re = as_regex(an[1], &sreg);
2191                 n = regexec(re, as[0], 1, pmatch, 0);
2192                 if (n == 0) {
2193                         pmatch[0].rm_so++;
2194                         pmatch[0].rm_eo++;
2195                 } else {
2196                         pmatch[0].rm_so = 0;
2197                         pmatch[0].rm_eo = -1;
2198                 }
2199                 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2200                 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2201                 setvar_i(res, pmatch[0].rm_so);
2202                 if (re == &sreg) regfree(re);
2203                 break;
2204
2205         case B_ge:
2206                 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2207                 break;
2208
2209         case B_gs:
2210                 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2211                 break;
2212
2213         case B_su:
2214                 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2215                 break;
2216         }
2217
2218         nvfree(tv);
2219         return res;
2220 #undef tspl
2221 }
2222
2223 /*
2224  * Evaluate node - the heart of the program. Supplied with subtree
2225  * and place where to store result. returns ptr to result.
2226  */
2227 #define XC(n) ((n) >> 8)
2228
2229 static var *evaluate(node *op, var *res)
2230 {
2231 /* This procedure is recursive so we should count every byte */
2232 #define fnargs (G.evaluate__fnargs)
2233 /* seed is initialized to 1 */
2234 #define seed   (G.evaluate__seed)
2235 #define sreg   (G.evaluate__sreg)
2236
2237         node *op1;
2238         var *v1;
2239         union {
2240                 var *v;
2241                 const char *s;
2242                 double d;
2243                 int i;
2244         } L, R;
2245         uint32_t opinfo;
2246         int opn;
2247         union {
2248                 char *s;
2249                 rstream *rsm;
2250                 FILE *F;
2251                 var *v;
2252                 regex_t *re;
2253                 uint32_t info;
2254         } X;
2255
2256         if (!op)
2257                 return setvar_s(res, NULL);
2258
2259         v1 = nvalloc(2);
2260
2261         while (op) {
2262                 opinfo = op->info;
2263                 opn = (opinfo & OPNMASK);
2264                 g_lineno = op->lineno;
2265
2266                 /* execute inevitable things */
2267                 op1 = op->l.n;
2268                 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2269                 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2270                 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2271                 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2272                 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2273
2274                 switch (XC(opinfo & OPCLSMASK)) {
2275
2276                 /* -- iterative node type -- */
2277
2278                 /* test pattern */
2279                 case XC( OC_TEST ):
2280                         if ((op1->info & OPCLSMASK) == OC_COMMA) {
2281                                 /* it's range pattern */
2282                                 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2283                                         op->info |= OF_CHECKED;
2284                                         if (ptest(op1->r.n))
2285                                                 op->info &= ~OF_CHECKED;
2286
2287                                         op = op->a.n;
2288                                 } else {
2289                                         op = op->r.n;
2290                                 }
2291                         } else {
2292                                 op = (ptest(op1)) ? op->a.n : op->r.n;
2293                         }
2294                         break;
2295
2296                 /* just evaluate an expression, also used as unconditional jump */
2297                 case XC( OC_EXEC ):
2298                         break;
2299
2300                 /* branch, used in if-else and various loops */
2301                 case XC( OC_BR ):
2302                         op = istrue(L.v) ? op->a.n : op->r.n;
2303                         break;
2304
2305                 /* initialize for-in loop */
2306                 case XC( OC_WALKINIT ):
2307                         hashwalk_init(L.v, iamarray(R.v));
2308                         break;
2309
2310                 /* get next array item */
2311                 case XC( OC_WALKNEXT ):
2312                         op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2313                         break;
2314
2315                 case XC( OC_PRINT ):
2316                 case XC( OC_PRINTF ):
2317                         X.F = stdout;
2318                         if (op->r.n) {
2319                                 X.rsm = newfile(R.s);
2320                                 if (!X.rsm->F) {
2321                                         if (opn == '|') {
2322                                                 X.rsm->F = popen(R.s, "w");
2323                                                 if (X.rsm->F == NULL)
2324                                                         bb_perror_msg_and_die("popen");
2325                                                 X.rsm->is_pipe = 1;
2326                                         } else {
2327                                                 X.rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2328                                         }
2329                                 }
2330                                 X.F = X.rsm->F;
2331                         }
2332
2333                         if ((opinfo & OPCLSMASK) == OC_PRINT) {
2334                                 if (!op1) {
2335                                         fputs(getvar_s(intvar[F0]), X.F);
2336                                 } else {
2337                                         while (op1) {
2338                                                 L.v = evaluate(nextarg(&op1), v1);
2339                                                 if (L.v->type & VF_NUMBER) {
2340                                                         fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2341                                                                         getvar_i(L.v), TRUE);
2342                                                         fputs(g_buf, X.F);
2343                                                 } else {
2344                                                         fputs(getvar_s(L.v), X.F);
2345                                                 }
2346
2347                                                 if (op1) fputs(getvar_s(intvar[OFS]), X.F);
2348                                         }
2349                                 }
2350                                 fputs(getvar_s(intvar[ORS]), X.F);
2351
2352                         } else {        /* OC_PRINTF */
2353                                 L.s = awk_printf(op1);
2354                                 fputs(L.s, X.F);
2355                                 free((char*)L.s);
2356                         }
2357                         fflush(X.F);
2358                         break;
2359
2360                 case XC( OC_DELETE ):
2361                         X.info = op1->info & OPCLSMASK;
2362                         if (X.info == OC_VAR) {
2363                                 R.v = op1->l.v;
2364                         } else if (X.info == OC_FNARG) {
2365                                 R.v = &fnargs[op1->l.i];
2366                         } else {
2367                                 syntax_error(EMSG_NOT_ARRAY);
2368                         }
2369
2370                         if (op1->r.n) {
2371                                 clrvar(L.v);
2372                                 L.s = getvar_s(evaluate(op1->r.n, v1));
2373                                 hash_remove(iamarray(R.v), L.s);
2374                         } else {
2375                                 clear_array(iamarray(R.v));
2376                         }
2377                         break;
2378
2379                 case XC( OC_NEWSOURCE ):
2380                         g_progname = op->l.s;
2381                         break;
2382
2383                 case XC( OC_RETURN ):
2384                         copyvar(res, L.v);
2385                         break;
2386
2387                 case XC( OC_NEXTFILE ):
2388                         nextfile = TRUE;
2389                 case XC( OC_NEXT ):
2390                         nextrec = TRUE;
2391                 case XC( OC_DONE ):
2392                         clrvar(res);
2393                         break;
2394
2395                 case XC( OC_EXIT ):
2396                         awk_exit(L.d);
2397
2398                 /* -- recursive node type -- */
2399
2400                 case XC( OC_VAR ):
2401                         L.v = op->l.v;
2402                         if (L.v == intvar[NF])
2403                                 split_f0();
2404                         goto v_cont;
2405
2406                 case XC( OC_FNARG ):
2407                         L.v = &fnargs[op->l.i];
2408  v_cont:
2409                         res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2410                         break;
2411
2412                 case XC( OC_IN ):
2413                         setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2414                         break;
2415
2416                 case XC( OC_REGEXP ):
2417                         op1 = op;
2418                         L.s = getvar_s(intvar[F0]);
2419                         goto re_cont;
2420
2421                 case XC( OC_MATCH ):
2422                         op1 = op->r.n;
2423  re_cont:
2424                         X.re = as_regex(op1, &sreg);
2425                         R.i = regexec(X.re, L.s, 0, NULL, 0);
2426                         if (X.re == &sreg) regfree(X.re);
2427                         setvar_i(res, (R.i == 0) ^ (opn == '!'));
2428                         break;
2429
2430                 case XC( OC_MOVE ):
2431                         /* if source is a temporary string, jusk relink it to dest */
2432 //Disabled: if R.v is numeric but happens to have cached R.v->string,
2433 //then L.v ends up being a string, which is wrong
2434 //                      if (R.v == v1+1 && R.v->string) {
2435 //                              res = setvar_p(L.v, R.v->string);
2436 //                              R.v->string = NULL;
2437 //                      } else {
2438                                 res = copyvar(L.v, R.v);
2439 //                      }
2440                         break;
2441
2442                 case XC( OC_TERNARY ):
2443                         if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2444                                 syntax_error(EMSG_POSSIBLE_ERROR);
2445                         res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2446                         break;
2447
2448                 case XC( OC_FUNC ):
2449                         if (!op->r.f->body.first)
2450                                 syntax_error(EMSG_UNDEF_FUNC);
2451
2452                         X.v = R.v = nvalloc(op->r.f->nargs + 1);
2453                         while (op1) {
2454                                 L.v = evaluate(nextarg(&op1), v1);
2455                                 copyvar(R.v, L.v);
2456                                 R.v->type |= VF_CHILD;
2457                                 R.v->x.parent = L.v;
2458                                 if (++R.v - X.v >= op->r.f->nargs)
2459                                         break;
2460                         }
2461
2462                         R.v = fnargs;
2463                         fnargs = X.v;
2464
2465                         L.s = g_progname;
2466                         res = evaluate(op->r.f->body.first, res);
2467                         g_progname = L.s;
2468
2469                         nvfree(fnargs);
2470                         fnargs = R.v;
2471                         break;
2472
2473                 case XC( OC_GETLINE ):
2474                 case XC( OC_PGETLINE ):
2475                         if (op1) {
2476                                 X.rsm = newfile(L.s);
2477                                 if (!X.rsm->F) {
2478                                         if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2479                                                 X.rsm->F = popen(L.s, "r");
2480                                                 X.rsm->is_pipe = TRUE;
2481                                         } else {
2482                                                 X.rsm->F = fopen_for_read(L.s);         /* not xfopen! */
2483                                         }
2484                                 }
2485                         } else {
2486                                 if (!iF) iF = next_input_file();
2487                                 X.rsm = iF;
2488                         }
2489
2490                         if (!X.rsm->F) {
2491                                 setvar_i(intvar[ERRNO], errno);
2492                                 setvar_i(res, -1);
2493                                 break;
2494                         }
2495
2496                         if (!op->r.n)
2497                                 R.v = intvar[F0];
2498
2499                         L.i = awk_getline(X.rsm, R.v);
2500                         if (L.i > 0) {
2501                                 if (!op1) {
2502                                         incvar(intvar[FNR]);
2503                                         incvar(intvar[NR]);
2504                                 }
2505                         }
2506                         setvar_i(res, L.i);
2507                         break;
2508
2509                 /* simple builtins */
2510                 case XC( OC_FBLTIN ):
2511                         switch (opn) {
2512
2513                         case F_in:
2514                                 R.d = (int)L.d;
2515                                 break;
2516
2517                         case F_rn:
2518                                 R.d = (double)rand() / (double)RAND_MAX;
2519                                 break;
2520 #if ENABLE_FEATURE_AWK_LIBM
2521                         case F_co:
2522                                 R.d = cos(L.d);
2523                                 break;
2524
2525                         case F_ex:
2526                                 R.d = exp(L.d);
2527                                 break;
2528
2529                         case F_lg:
2530                                 R.d = log(L.d);
2531                                 break;
2532
2533                         case F_si:
2534                                 R.d = sin(L.d);
2535                                 break;
2536
2537                         case F_sq:
2538                                 R.d = sqrt(L.d);
2539                                 break;
2540 #else
2541                         case F_co:
2542                         case F_ex:
2543                         case F_lg:
2544                         case F_si:
2545                         case F_sq:
2546                                 syntax_error(EMSG_NO_MATH);
2547                                 break;
2548 #endif
2549                         case F_sr:
2550                                 R.d = (double)seed;
2551                                 seed = op1 ? (unsigned)L.d : (unsigned)time(NULL);
2552                                 srand(seed);
2553                                 break;
2554
2555                         case F_ti:
2556                                 R.d = time(NULL);
2557                                 break;
2558
2559                         case F_le:
2560                                 if (!op1)
2561                                         L.s = getvar_s(intvar[F0]);
2562                                 R.d = strlen(L.s);
2563                                 break;
2564
2565                         case F_sy:
2566                                 fflush_all();
2567                                 R.d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2568                                                 ? (system(L.s) >> 8) : 0;
2569                                 break;
2570
2571                         case F_ff:
2572                                 if (!op1)
2573                                         fflush(stdout);
2574                                 else {
2575                                         if (L.s && *L.s) {
2576                                                 X.rsm = newfile(L.s);
2577                                                 fflush(X.rsm->F);
2578                                         } else {
2579                                                 fflush_all();
2580                                         }
2581                                 }
2582                                 break;
2583
2584                         case F_cl:
2585                                 X.rsm = (rstream *)hash_search(fdhash, L.s);
2586                                 if (X.rsm) {
2587                                         R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
2588                                         free(X.rsm->buffer);
2589                                         hash_remove(fdhash, L.s);
2590                                 }
2591                                 if (R.i != 0)
2592                                         setvar_i(intvar[ERRNO], errno);
2593                                 R.d = (double)R.i;
2594                                 break;
2595                         }
2596                         setvar_i(res, R.d);
2597                         break;
2598
2599                 case XC( OC_BUILTIN ):
2600                         res = exec_builtin(op, res);
2601                         break;
2602
2603                 case XC( OC_SPRINTF ):
2604                         setvar_p(res, awk_printf(op1));
2605                         break;
2606
2607                 case XC( OC_UNARY ):
2608                         X.v = R.v;
2609                         L.d = R.d = getvar_i(R.v);
2610                         switch (opn) {
2611                         case 'P':
2612                                 L.d = ++R.d;
2613                                 goto r_op_change;
2614                         case 'p':
2615                                 R.d++;
2616                                 goto r_op_change;
2617                         case 'M':
2618                                 L.d = --R.d;
2619                                 goto r_op_change;
2620                         case 'm':
2621                                 R.d--;
2622                                 goto r_op_change;
2623                         case '!':
2624                                 L.d = !istrue(X.v);
2625                                 break;
2626                         case '-':
2627                                 L.d = -R.d;
2628                                 break;
2629  r_op_change:
2630                                 setvar_i(X.v, R.d);
2631                         }
2632                         setvar_i(res, L.d);
2633                         break;
2634
2635                 case XC( OC_FIELD ):
2636                         R.i = (int)getvar_i(R.v);
2637                         if (R.i == 0) {
2638                                 res = intvar[F0];
2639                         } else {
2640                                 split_f0();
2641                                 if (R.i > nfields)
2642                                         fsrealloc(R.i);
2643                                 res = &Fields[R.i - 1];
2644                         }
2645                         break;
2646
2647                 /* concatenation (" ") and index joining (",") */
2648                 case XC( OC_CONCAT ):
2649                 case XC( OC_COMMA ):
2650                         opn = strlen(L.s) + strlen(R.s) + 2;
2651                         X.s = xmalloc(opn);
2652                         strcpy(X.s, L.s);
2653                         if ((opinfo & OPCLSMASK) == OC_COMMA) {
2654                                 L.s = getvar_s(intvar[SUBSEP]);
2655                                 X.s = xrealloc(X.s, opn + strlen(L.s));
2656                                 strcat(X.s, L.s);
2657                         }
2658                         strcat(X.s, R.s);
2659                         setvar_p(res, X.s);
2660                         break;
2661
2662                 case XC( OC_LAND ):
2663                         setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2664                         break;
2665
2666                 case XC( OC_LOR ):
2667                         setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2668                         break;
2669
2670                 case XC( OC_BINARY ):
2671                 case XC( OC_REPLACE ):
2672                         R.d = getvar_i(R.v);
2673                         switch (opn) {
2674                         case '+':
2675                                 L.d += R.d;
2676                                 break;
2677                         case '-':
2678                                 L.d -= R.d;
2679                                 break;
2680                         case '*':
2681                                 L.d *= R.d;
2682                                 break;
2683                         case '/':
2684                                 if (R.d == 0)
2685                                         syntax_error(EMSG_DIV_BY_ZERO);
2686                                 L.d /= R.d;
2687                                 break;
2688                         case '&':
2689 #if ENABLE_FEATURE_AWK_LIBM
2690                                 L.d = pow(L.d, R.d);
2691 #else
2692                                 syntax_error(EMSG_NO_MATH);
2693 #endif
2694                                 break;
2695                         case '%':
2696                                 if (R.d == 0)
2697                                         syntax_error(EMSG_DIV_BY_ZERO);
2698                                 L.d -= (int)(L.d / R.d) * R.d;
2699                                 break;
2700                         }
2701                         res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2702                         break;
2703
2704                 case XC( OC_COMPARE ):
2705                         if (is_numeric(L.v) && is_numeric(R.v)) {
2706                                 L.d = getvar_i(L.v) - getvar_i(R.v);
2707                         } else {
2708                                 L.s = getvar_s(L.v);
2709                                 R.s = getvar_s(R.v);
2710                                 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2711                         }
2712                         switch (opn & 0xfe) {
2713                         case 0:
2714                                 R.i = (L.d > 0);
2715                                 break;
2716                         case 2:
2717                                 R.i = (L.d >= 0);
2718                                 break;
2719                         case 4:
2720                                 R.i = (L.d == 0);
2721                                 break;
2722                         }
2723                         setvar_i(res, (opn & 1 ? R.i : !R.i) ? 1 : 0);
2724                         break;
2725
2726                 default:
2727                         syntax_error(EMSG_POSSIBLE_ERROR);
2728                 }
2729                 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2730                         op = op->a.n;
2731                 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2732                         break;
2733                 if (nextrec)
2734                         break;
2735         }
2736         nvfree(v1);
2737         return res;
2738 #undef fnargs
2739 #undef seed
2740 #undef sreg
2741 }
2742
2743
2744 /* -------- main & co. -------- */
2745
2746 static int awk_exit(int r)
2747 {
2748         var tv;
2749         unsigned i;
2750         hash_item *hi;
2751
2752         zero_out_var(&tv);
2753
2754         if (!exiting) {
2755                 exiting = TRUE;
2756                 nextrec = FALSE;
2757                 evaluate(endseq.first, &tv);
2758         }
2759
2760         /* waiting for children */
2761         for (i = 0; i < fdhash->csize; i++) {
2762                 hi = fdhash->items[i];
2763                 while (hi) {
2764                         if (hi->data.rs.F && hi->data.rs.is_pipe)
2765                                 pclose(hi->data.rs.F);
2766                         hi = hi->next;
2767                 }
2768         }
2769
2770         exit(r);
2771 }
2772
2773 /* if expr looks like "var=value", perform assignment and return 1,
2774  * otherwise return 0 */
2775 static int is_assignment(const char *expr)
2776 {
2777         char *exprc, *s, *s0, *s1;
2778
2779         exprc = xstrdup(expr);
2780         if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2781                 free(exprc);
2782                 return FALSE;
2783         }
2784
2785         *(s++) = '\0';
2786         s0 = s1 = s;
2787         while (*s)
2788                 *(s1++) = nextchar(&s);
2789
2790         *s1 = '\0';
2791         setvar_u(newvar(exprc), s0);
2792         free(exprc);
2793         return TRUE;
2794 }
2795
2796 /* switch to next input file */
2797 static rstream *next_input_file(void)
2798 {
2799 #define rsm          (G.next_input_file__rsm)
2800 #define files_happen (G.next_input_file__files_happen)
2801
2802         FILE *F = NULL;
2803         const char *fname, *ind;
2804
2805         if (rsm.F)
2806                 fclose(rsm.F);
2807         rsm.F = NULL;
2808         rsm.pos = rsm.adv = 0;
2809
2810         do {
2811                 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
2812                         if (files_happen)
2813                                 return NULL;
2814                         fname = "-";
2815                         F = stdin;
2816                 } else {
2817                         ind = getvar_s(incvar(intvar[ARGIND]));
2818                         fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
2819                         if (fname && *fname && !is_assignment(fname))
2820                                 F = xfopen_stdin(fname);
2821                 }
2822         } while (!F);
2823
2824         files_happen = TRUE;
2825         setvar_s(intvar[FILENAME], fname);
2826         rsm.F = F;
2827         return &rsm;
2828 #undef rsm
2829 #undef files_happen
2830 }
2831
2832 int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
2833 int awk_main(int argc, char **argv)
2834 {
2835         unsigned opt;
2836         char *opt_F, *opt_W;
2837         llist_t *list_v = NULL;
2838         llist_t *list_f = NULL;
2839         int i, j;
2840         var *v;
2841         var tv;
2842         char **envp;
2843         char *vnames = (char *)vNames; /* cheat */
2844         char *vvalues = (char *)vValues;
2845
2846         INIT_G();
2847
2848         /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
2849          * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
2850         if (ENABLE_LOCALE_SUPPORT)
2851                 setlocale(LC_NUMERIC, "C");
2852
2853         zero_out_var(&tv);
2854
2855         /* allocate global buffer */
2856         g_buf = xmalloc(MAXVARFMT + 1);
2857
2858         vhash = hash_init();
2859         ahash = hash_init();
2860         fdhash = hash_init();
2861         fnhash = hash_init();
2862
2863         /* initialize variables */
2864         for (i = 0; *vnames; i++) {
2865                 intvar[i] = v = newvar(nextword(&vnames));
2866                 if (*vvalues != '\377')
2867                         setvar_s(v, nextword(&vvalues));
2868                 else
2869                         setvar_i(v, 0);
2870
2871                 if (*vnames == '*') {
2872                         v->type |= VF_SPECIAL;
2873                         vnames++;
2874                 }
2875         }
2876
2877         handle_special(intvar[FS]);
2878         handle_special(intvar[RS]);
2879
2880         newfile("/dev/stdin")->F = stdin;
2881         newfile("/dev/stdout")->F = stdout;
2882         newfile("/dev/stderr")->F = stderr;
2883
2884         /* Huh, people report that sometimes environ is NULL. Oh well. */
2885         if (environ) for (envp = environ; *envp; envp++) {
2886                 /* environ is writable, thus we don't strdup it needlessly */
2887                 char *s = *envp;
2888                 char *s1 = strchr(s, '=');
2889                 if (s1) {
2890                         *s1 = '\0';
2891                         /* Both findvar and setvar_u take const char*
2892                          * as 2nd arg -> environment is not trashed */
2893                         setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
2894                         *s1 = '=';
2895                 }
2896         }
2897         opt_complementary = "v::f::"; /* -v and -f can occur multiple times */
2898         opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, &opt_W);
2899         argv += optind;
2900         argc -= optind;
2901         if (opt & 0x1)
2902                 setvar_s(intvar[FS], opt_F); // -F
2903         while (list_v) { /* -v */
2904                 if (!is_assignment(llist_pop(&list_v)))
2905                         bb_show_usage();
2906         }
2907         if (list_f) { /* -f */
2908                 do {
2909                         char *s = NULL;
2910                         FILE *from_file;
2911
2912                         g_progname = llist_pop(&list_f);
2913                         from_file = xfopen_stdin(g_progname);
2914                         /* one byte is reserved for some trick in next_token */
2915                         for (i = j = 1; j > 0; i += j) {
2916                                 s = xrealloc(s, i + 4096);
2917                                 j = fread(s + i, 1, 4094, from_file);
2918                         }
2919                         s[i] = '\0';
2920                         fclose(from_file);
2921                         parse_program(s + 1);
2922                         free(s);
2923                 } while (list_f);
2924                 argc++;
2925         } else { // no -f: take program from 1st parameter
2926                 if (!argc)
2927                         bb_show_usage();
2928                 g_progname = "cmd. line";
2929                 parse_program(*argv++);
2930         }
2931         if (opt & 0x8) // -W
2932                 bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
2933
2934         /* fill in ARGV array */
2935         setvar_i(intvar[ARGC], argc);
2936         setari_u(intvar[ARGV], 0, "awk");
2937         i = 0;
2938         while (*argv)
2939                 setari_u(intvar[ARGV], ++i, *argv++);
2940
2941         evaluate(beginseq.first, &tv);
2942         if (!mainseq.first && !endseq.first)
2943                 awk_exit(EXIT_SUCCESS);
2944
2945         /* input file could already be opened in BEGIN block */
2946         if (!iF)
2947                 iF = next_input_file();
2948
2949         /* passing through input files */
2950         while (iF) {
2951                 nextfile = FALSE;
2952                 setvar_i(intvar[FNR], 0);
2953
2954                 while ((i = awk_getline(iF, intvar[F0])) > 0) {
2955                         nextrec = FALSE;
2956                         incvar(intvar[NR]);
2957                         incvar(intvar[FNR]);
2958                         evaluate(mainseq.first, &tv);
2959
2960                         if (nextfile)
2961                                 break;
2962                 }
2963
2964                 if (i < 0)
2965                         syntax_error(strerror(errno));
2966
2967                 iF = next_input_file();
2968         }
2969
2970         awk_exit(EXIT_SUCCESS);
2971         /*return 0;*/
2972 }