awk: fix the case where nested "for" loops with the same variable misbehave
[oweals/busybox.git] / editors / awk.c
1 /* vi: set sw=4 ts=4: */
2 /*
3  * awk implementation for busybox
4  *
5  * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
6  *
7  * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
8  */
9
10 #include "libbb.h"
11 #include "xregex.h"
12 #include <math.h>
13
14 /* This is a NOEXEC applet. Be very careful! */
15
16
17 #define MAXVARFMT       240
18 #define MINNVBLOCK      64
19
20 /* variable flags */
21 #define VF_NUMBER       0x0001  /* 1 = primary type is number */
22 #define VF_ARRAY        0x0002  /* 1 = it's an array */
23
24 #define VF_CACHED       0x0100  /* 1 = num/str value has cached str/num eq */
25 #define VF_USER         0x0200  /* 1 = user input (may be numeric string) */
26 #define VF_SPECIAL      0x0400  /* 1 = requires extra handling when changed */
27 #define VF_WALK         0x0800  /* 1 = variable has alloc'd x.walker list */
28 #define VF_FSTR         0x1000  /* 1 = var::string points to fstring buffer */
29 #define VF_CHILD        0x2000  /* 1 = function arg; x.parent points to source */
30 #define VF_DIRTY        0x4000  /* 1 = variable was set explicitly */
31
32 /* these flags are static, don't change them when value is changed */
33 #define VF_DONTTOUCH    (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
34
35 /* Variable */
36 typedef struct var_s {
37         unsigned type;            /* flags */
38         double number;
39         char *string;
40         union {
41                 int aidx;               /* func arg idx (for compilation stage) */
42                 struct xhash_s *array;  /* array ptr */
43                 struct var_s *parent;   /* for func args, ptr to actual parameter */
44                 char **walker;          /* list of array elements (for..in) */
45         } x;
46 } var;
47
48 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
49 typedef struct chain_s {
50         struct node_s *first;
51         struct node_s *last;
52         const char *programname;
53 } chain;
54
55 /* Function */
56 typedef struct func_s {
57         unsigned nargs;
58         struct chain_s body;
59 } func;
60
61 /* I/O stream */
62 typedef struct rstream_s {
63         FILE *F;
64         char *buffer;
65         int adv;
66         int size;
67         int pos;
68         smallint is_pipe;
69 } rstream;
70
71 typedef struct hash_item_s {
72         union {
73                 struct var_s v;         /* variable/array hash */
74                 struct rstream_s rs;    /* redirect streams hash */
75                 struct func_s f;        /* functions hash */
76         } data;
77         struct hash_item_s *next;       /* next in chain */
78         char name[1];                   /* really it's longer */
79 } hash_item;
80
81 typedef struct xhash_s {
82         unsigned nel;           /* num of elements */
83         unsigned csize;         /* current hash size */
84         unsigned nprime;        /* next hash size in PRIMES[] */
85         unsigned glen;          /* summary length of item names */
86         struct hash_item_s **items;
87 } xhash;
88
89 /* Tree node */
90 typedef struct node_s {
91         uint32_t info;
92         unsigned lineno;
93         union {
94                 struct node_s *n;
95                 var *v;
96                 int i;
97                 char *s;
98                 regex_t *re;
99         } l;
100         union {
101                 struct node_s *n;
102                 regex_t *ire;
103                 func *f;
104                 int argno;
105         } r;
106         union {
107                 struct node_s *n;
108         } a;
109 } node;
110
111 /* Block of temporary variables */
112 typedef struct nvblock_s {
113         int size;
114         var *pos;
115         struct nvblock_s *prev;
116         struct nvblock_s *next;
117         var nv[];
118 } nvblock;
119
120 typedef struct tsplitter_s {
121         node n;
122         regex_t re[2];
123 } tsplitter;
124
125 /* simple token classes */
126 /* Order and hex values are very important!!!  See next_token() */
127 #define TC_SEQSTART      1                              /* ( */
128 #define TC_SEQTERM      (1 << 1)                /* ) */
129 #define TC_REGEXP       (1 << 2)                /* /.../ */
130 #define TC_OUTRDR       (1 << 3)                /* | > >> */
131 #define TC_UOPPOST      (1 << 4)                /* unary postfix operator */
132 #define TC_UOPPRE1      (1 << 5)                /* unary prefix operator */
133 #define TC_BINOPX       (1 << 6)                /* two-opnd operator */
134 #define TC_IN           (1 << 7)
135 #define TC_COMMA        (1 << 8)
136 #define TC_PIPE         (1 << 9)                /* input redirection pipe */
137 #define TC_UOPPRE2      (1 << 10)               /* unary prefix operator */
138 #define TC_ARRTERM      (1 << 11)               /* ] */
139 #define TC_GRPSTART     (1 << 12)               /* { */
140 #define TC_GRPTERM      (1 << 13)               /* } */
141 #define TC_SEMICOL      (1 << 14)
142 #define TC_NEWLINE      (1 << 15)
143 #define TC_STATX        (1 << 16)               /* ctl statement (for, next...) */
144 #define TC_WHILE        (1 << 17)
145 #define TC_ELSE         (1 << 18)
146 #define TC_BUILTIN      (1 << 19)
147 #define TC_GETLINE      (1 << 20)
148 #define TC_FUNCDECL     (1 << 21)               /* `function' `func' */
149 #define TC_BEGIN        (1 << 22)
150 #define TC_END          (1 << 23)
151 #define TC_EOF          (1 << 24)
152 #define TC_VARIABLE     (1 << 25)
153 #define TC_ARRAY        (1 << 26)
154 #define TC_FUNCTION     (1 << 27)
155 #define TC_STRING       (1 << 28)
156 #define TC_NUMBER       (1 << 29)
157
158 #define TC_UOPPRE  (TC_UOPPRE1 | TC_UOPPRE2)
159
160 /* combined token classes */
161 #define TC_BINOP   (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
162 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
163 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
164                    | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
165
166 #define TC_STATEMNT (TC_STATX | TC_WHILE)
167 #define TC_OPTERM  (TC_SEMICOL | TC_NEWLINE)
168
169 /* word tokens, cannot mean something else if not expected */
170 #define TC_WORD    (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
171                    | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
172
173 /* discard newlines after these */
174 #define TC_NOTERM  (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
175                    | TC_BINOP | TC_OPTERM)
176
177 /* what can expression begin with */
178 #define TC_OPSEQ   (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
179 /* what can group begin with */
180 #define TC_GRPSEQ  (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
181
182 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
183 /* operator is inserted between them */
184 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
185                    | TC_STRING | TC_NUMBER | TC_UOPPOST)
186 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
187
188 #define OF_RES1    0x010000
189 #define OF_RES2    0x020000
190 #define OF_STR1    0x040000
191 #define OF_STR2    0x080000
192 #define OF_NUM1    0x100000
193 #define OF_CHECKED 0x200000
194
195 /* combined operator flags */
196 #define xx      0
197 #define xV      OF_RES2
198 #define xS      (OF_RES2 | OF_STR2)
199 #define Vx      OF_RES1
200 #define VV      (OF_RES1 | OF_RES2)
201 #define Nx      (OF_RES1 | OF_NUM1)
202 #define NV      (OF_RES1 | OF_NUM1 | OF_RES2)
203 #define Sx      (OF_RES1 | OF_STR1)
204 #define SV      (OF_RES1 | OF_STR1 | OF_RES2)
205 #define SS      (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
206
207 #define OPCLSMASK 0xFF00
208 #define OPNMASK   0x007F
209
210 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
211  * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
212  * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
213  */
214 #define P(x)      (x << 24)
215 #define PRIMASK   0x7F000000
216 #define PRIMASK2  0x7E000000
217
218 /* Operation classes */
219
220 #define SHIFT_TIL_THIS  0x0600
221 #define RECUR_FROM_THIS 0x1000
222
223 enum {
224         OC_DELETE = 0x0100,     OC_EXEC = 0x0200,       OC_NEWSOURCE = 0x0300,
225         OC_PRINT = 0x0400,      OC_PRINTF = 0x0500,     OC_WALKINIT = 0x0600,
226
227         OC_BR = 0x0700,         OC_BREAK = 0x0800,      OC_CONTINUE = 0x0900,
228         OC_EXIT = 0x0a00,       OC_NEXT = 0x0b00,       OC_NEXTFILE = 0x0c00,
229         OC_TEST = 0x0d00,       OC_WALKNEXT = 0x0e00,
230
231         OC_BINARY = 0x1000,     OC_BUILTIN = 0x1100,    OC_COLON = 0x1200,
232         OC_COMMA = 0x1300,      OC_COMPARE = 0x1400,    OC_CONCAT = 0x1500,
233         OC_FBLTIN = 0x1600,     OC_FIELD = 0x1700,      OC_FNARG = 0x1800,
234         OC_FUNC = 0x1900,       OC_GETLINE = 0x1a00,    OC_IN = 0x1b00,
235         OC_LAND = 0x1c00,       OC_LOR = 0x1d00,        OC_MATCH = 0x1e00,
236         OC_MOVE = 0x1f00,       OC_PGETLINE = 0x2000,   OC_REGEXP = 0x2100,
237         OC_REPLACE = 0x2200,    OC_RETURN = 0x2300,     OC_SPRINTF = 0x2400,
238         OC_TERNARY = 0x2500,    OC_UNARY = 0x2600,      OC_VAR = 0x2700,
239         OC_DONE = 0x2800,
240
241         ST_IF = 0x3000,         ST_DO = 0x3100,         ST_FOR = 0x3200,
242         ST_WHILE = 0x3300
243 };
244
245 /* simple builtins */
246 enum {
247         F_in,   F_rn,   F_co,   F_ex,   F_lg,   F_si,   F_sq,   F_sr,
248         F_ti,   F_le,   F_sy,   F_ff,   F_cl
249 };
250
251 /* builtins */
252 enum {
253         B_a2,   B_ix,   B_ma,   B_sp,   B_ss,   B_ti,   B_mt,   B_lo,   B_up,
254         B_ge,   B_gs,   B_su,
255         B_an,   B_co,   B_ls,   B_or,   B_rs,   B_xo,
256 };
257
258 /* tokens and their corresponding info values */
259
260 #define NTC     "\377"  /* switch to next token class (tc<<1) */
261 #define NTCC    '\377'
262
263 #define OC_B    OC_BUILTIN
264
265 static const char tokenlist[] ALIGN1 =
266         "\1("       NTC
267         "\1)"       NTC
268         "\1/"       NTC                                 /* REGEXP */
269         "\2>>"      "\1>"       "\1|"       NTC         /* OUTRDR */
270         "\2++"      "\2--"      NTC                     /* UOPPOST */
271         "\2++"      "\2--"      "\1$"       NTC         /* UOPPRE1 */
272         "\2=="      "\1="       "\2+="      "\2-="      /* BINOPX */
273         "\2*="      "\2/="      "\2%="      "\2^="
274         "\1+"       "\1-"       "\3**="     "\2**"
275         "\1/"       "\1%"       "\1^"       "\1*"
276         "\2!="      "\2>="      "\2<="      "\1>"
277         "\1<"       "\2!~"      "\1~"       "\2&&"
278         "\2||"      "\1?"       "\1:"       NTC
279         "\2in"      NTC
280         "\1,"       NTC
281         "\1|"       NTC
282         "\1+"       "\1-"       "\1!"       NTC         /* UOPPRE2 */
283         "\1]"       NTC
284         "\1{"       NTC
285         "\1}"       NTC
286         "\1;"       NTC
287         "\1\n"      NTC
288         "\2if"      "\2do"      "\3for"     "\5break"   /* STATX */
289         "\10continue"           "\6delete"  "\5print"
290         "\6printf"  "\4next"    "\10nextfile"
291         "\6return"  "\4exit"    NTC
292         "\5while"   NTC
293         "\4else"    NTC
294
295         "\3and"     "\5compl"   "\6lshift"  "\2or"
296         "\6rshift"  "\3xor"
297         "\5close"   "\6system"  "\6fflush"  "\5atan2"   /* BUILTIN */
298         "\3cos"     "\3exp"     "\3int"     "\3log"
299         "\4rand"    "\3sin"     "\4sqrt"    "\5srand"
300         "\6gensub"  "\4gsub"    "\5index"   "\6length"
301         "\5match"   "\5split"   "\7sprintf" "\3sub"
302         "\6substr"  "\7systime" "\10strftime" "\6mktime"
303         "\7tolower" "\7toupper" NTC
304         "\7getline" NTC
305         "\4func"    "\10function"   NTC
306         "\5BEGIN"   NTC
307         "\3END"     "\0"
308         ;
309
310 static const uint32_t tokeninfo[] = {
311         0,
312         0,
313         OC_REGEXP,
314         xS|'a',     xS|'w',     xS|'|',
315         OC_UNARY|xV|P(9)|'p',       OC_UNARY|xV|P(9)|'m',
316         OC_UNARY|xV|P(9)|'P',       OC_UNARY|xV|P(9)|'M',
317             OC_FIELD|xV|P(5),
318         OC_COMPARE|VV|P(39)|5,      OC_MOVE|VV|P(74),
319             OC_REPLACE|NV|P(74)|'+',    OC_REPLACE|NV|P(74)|'-',
320         OC_REPLACE|NV|P(74)|'*',    OC_REPLACE|NV|P(74)|'/',
321             OC_REPLACE|NV|P(74)|'%',    OC_REPLACE|NV|P(74)|'&',
322         OC_BINARY|NV|P(29)|'+',     OC_BINARY|NV|P(29)|'-',
323             OC_REPLACE|NV|P(74)|'&',    OC_BINARY|NV|P(15)|'&',
324         OC_BINARY|NV|P(25)|'/',     OC_BINARY|NV|P(25)|'%',
325             OC_BINARY|NV|P(15)|'&',     OC_BINARY|NV|P(25)|'*',
326         OC_COMPARE|VV|P(39)|4,      OC_COMPARE|VV|P(39)|3,
327             OC_COMPARE|VV|P(39)|0,      OC_COMPARE|VV|P(39)|1,
328         OC_COMPARE|VV|P(39)|2,      OC_MATCH|Sx|P(45)|'!',
329             OC_MATCH|Sx|P(45)|'~',      OC_LAND|Vx|P(55),
330         OC_LOR|Vx|P(59),            OC_TERNARY|Vx|P(64)|'?',
331             OC_COLON|xx|P(67)|':',
332         OC_IN|SV|P(49),
333         OC_COMMA|SS|P(80),
334         OC_PGETLINE|SV|P(37),
335         OC_UNARY|xV|P(19)|'+',      OC_UNARY|xV|P(19)|'-',
336             OC_UNARY|xV|P(19)|'!',
337         0,
338         0,
339         0,
340         0,
341         0,
342         ST_IF,          ST_DO,          ST_FOR,         OC_BREAK,
343         OC_CONTINUE,                    OC_DELETE|Vx,   OC_PRINT,
344         OC_PRINTF,      OC_NEXT,        OC_NEXTFILE,
345         OC_RETURN|Vx,   OC_EXIT|Nx,
346         ST_WHILE,
347         0,
348
349         OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
350         OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
351         OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
352         OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
353         OC_FBLTIN|F_rn,    OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
354         OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
355         OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF,        OC_B|B_su|P(0xb6),
356         OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti,    OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b),
357         OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
358         OC_GETLINE|SV|P(0),
359         0,      0,
360         0,
361         0
362 };
363
364 /* internal variable names and their initial values       */
365 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
366 enum {
367         CONVFMT,    OFMT,       FS,         OFS,
368         ORS,        RS,         RT,         FILENAME,
369         SUBSEP,     F0,         ARGIND,     ARGC,
370         ARGV,       ERRNO,      FNR,        NR,
371         NF,         IGNORECASE, ENVIRON,    NUM_INTERNAL_VARS
372 };
373
374 static const char vNames[] ALIGN1 =
375         "CONVFMT\0" "OFMT\0"    "FS\0*"     "OFS\0"
376         "ORS\0"     "RS\0*"     "RT\0"      "FILENAME\0"
377         "SUBSEP\0"  "$\0*"      "ARGIND\0"  "ARGC\0"
378         "ARGV\0"    "ERRNO\0"   "FNR\0"     "NR\0"
379         "NF\0*"     "IGNORECASE\0*" "ENVIRON\0" "\0";
380
381 static const char vValues[] ALIGN1 =
382         "%.6g\0"    "%.6g\0"    " \0"       " \0"
383         "\n\0"      "\n\0"      "\0"        "\0"
384         "\034\0"    "\0"        "\377";
385
386 /* hash size may grow to these values */
387 #define FIRST_PRIME 61
388 static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
389
390
391 /* Globals. Split in two parts so that first one is addressed
392  * with (mostly short) negative offsets.
393  * NB: it's unsafe to put members of type "double"
394  * into globals2 (gcc may fail to align them).
395  */
396 struct globals {
397         double t_double;
398         chain beginseq, mainseq, endseq;
399         chain *seq;
400         node *break_ptr, *continue_ptr;
401         rstream *iF;
402         xhash *vhash, *ahash, *fdhash, *fnhash;
403         const char *g_progname;
404         int g_lineno;
405         int nfields;
406         int maxfields; /* used in fsrealloc() only */
407         var *Fields;
408         nvblock *g_cb;
409         char *g_pos;
410         char *g_buf;
411         smallint icase;
412         smallint exiting;
413         smallint nextrec;
414         smallint nextfile;
415         smallint is_f0_split;
416 };
417 struct globals2 {
418         uint32_t t_info; /* often used */
419         uint32_t t_tclass;
420         char *t_string;
421         int t_lineno;
422         int t_rollback;
423
424         var *intvar[NUM_INTERNAL_VARS]; /* often used */
425
426         /* former statics from various functions */
427         char *split_f0__fstrings;
428
429         uint32_t next_token__save_tclass;
430         uint32_t next_token__save_info;
431         uint32_t next_token__ltclass;
432         smallint next_token__concat_inserted;
433
434         smallint next_input_file__files_happen;
435         rstream next_input_file__rsm;
436
437         var *evaluate__fnargs;
438         unsigned evaluate__seed;
439         regex_t evaluate__sreg;
440
441         var ptest__v;
442
443         tsplitter exec_builtin__tspl;
444
445         /* biggest and least used members go last */
446         tsplitter fsplitter, rsplitter;
447 };
448 #define G1 (ptr_to_globals[-1])
449 #define G (*(struct globals2 *)ptr_to_globals)
450 /* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
451 /*char G1size[sizeof(G1)]; - 0x74 */
452 /*char Gsize[sizeof(G)]; - 0x1c4 */
453 /* Trying to keep most of members accessible with short offsets: */
454 /*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
455 #define t_double     (G1.t_double    )
456 #define beginseq     (G1.beginseq    )
457 #define mainseq      (G1.mainseq     )
458 #define endseq       (G1.endseq      )
459 #define seq          (G1.seq         )
460 #define break_ptr    (G1.break_ptr   )
461 #define continue_ptr (G1.continue_ptr)
462 #define iF           (G1.iF          )
463 #define vhash        (G1.vhash       )
464 #define ahash        (G1.ahash       )
465 #define fdhash       (G1.fdhash      )
466 #define fnhash       (G1.fnhash      )
467 #define g_progname   (G1.g_progname  )
468 #define g_lineno     (G1.g_lineno    )
469 #define nfields      (G1.nfields     )
470 #define maxfields    (G1.maxfields   )
471 #define Fields       (G1.Fields      )
472 #define g_cb         (G1.g_cb        )
473 #define g_pos        (G1.g_pos       )
474 #define g_buf        (G1.g_buf       )
475 #define icase        (G1.icase       )
476 #define exiting      (G1.exiting     )
477 #define nextrec      (G1.nextrec     )
478 #define nextfile     (G1.nextfile    )
479 #define is_f0_split  (G1.is_f0_split )
480 #define t_info       (G.t_info      )
481 #define t_tclass     (G.t_tclass    )
482 #define t_string     (G.t_string    )
483 #define t_lineno     (G.t_lineno    )
484 #define t_rollback   (G.t_rollback  )
485 #define intvar       (G.intvar      )
486 #define fsplitter    (G.fsplitter   )
487 #define rsplitter    (G.rsplitter   )
488 #define INIT_G() do { \
489         SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
490         G.next_token__ltclass = TC_OPTERM; \
491         G.evaluate__seed = 1; \
492 } while (0)
493
494
495 /* function prototypes */
496 static void handle_special(var *);
497 static node *parse_expr(uint32_t);
498 static void chain_group(void);
499 static var *evaluate(node *, var *);
500 static rstream *next_input_file(void);
501 static int fmt_num(char *, int, const char *, double, int);
502 static int awk_exit(int) NORETURN;
503
504 /* ---- error handling ---- */
505
506 static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
507 static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
508 static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
509 static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
510 static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
511 static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin";
512 static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
513 static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
514 static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
515 #if !ENABLE_FEATURE_AWK_LIBM
516 static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
517 #endif
518
519 static void zero_out_var(var *vp)
520 {
521         memset(vp, 0, sizeof(*vp));
522 }
523
524 static void syntax_error(const char *message) NORETURN;
525 static void syntax_error(const char *message)
526 {
527         bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
528 }
529
530 /* ---- hash stuff ---- */
531
532 static unsigned hashidx(const char *name)
533 {
534         unsigned idx = 0;
535
536         while (*name)
537                 idx = *name++ + (idx << 6) - idx;
538         return idx;
539 }
540
541 /* create new hash */
542 static xhash *hash_init(void)
543 {
544         xhash *newhash;
545
546         newhash = xzalloc(sizeof(*newhash));
547         newhash->csize = FIRST_PRIME;
548         newhash->items = xzalloc(FIRST_PRIME * sizeof(newhash->items[0]));
549
550         return newhash;
551 }
552
553 /* find item in hash, return ptr to data, NULL if not found */
554 static void *hash_search(xhash *hash, const char *name)
555 {
556         hash_item *hi;
557
558         hi = hash->items[hashidx(name) % hash->csize];
559         while (hi) {
560                 if (strcmp(hi->name, name) == 0)
561                         return &(hi->data);
562                 hi = hi->next;
563         }
564         return NULL;
565 }
566
567 /* grow hash if it becomes too big */
568 static void hash_rebuild(xhash *hash)
569 {
570         unsigned newsize, i, idx;
571         hash_item **newitems, *hi, *thi;
572
573         if (hash->nprime == ARRAY_SIZE(PRIMES))
574                 return;
575
576         newsize = PRIMES[hash->nprime++];
577         newitems = xzalloc(newsize * sizeof(newitems[0]));
578
579         for (i = 0; i < hash->csize; i++) {
580                 hi = hash->items[i];
581                 while (hi) {
582                         thi = hi;
583                         hi = thi->next;
584                         idx = hashidx(thi->name) % newsize;
585                         thi->next = newitems[idx];
586                         newitems[idx] = thi;
587                 }
588         }
589
590         free(hash->items);
591         hash->csize = newsize;
592         hash->items = newitems;
593 }
594
595 /* find item in hash, add it if necessary. Return ptr to data */
596 static void *hash_find(xhash *hash, const char *name)
597 {
598         hash_item *hi;
599         unsigned idx;
600         int l;
601
602         hi = hash_search(hash, name);
603         if (!hi) {
604                 if (++hash->nel / hash->csize > 10)
605                         hash_rebuild(hash);
606
607                 l = strlen(name) + 1;
608                 hi = xzalloc(sizeof(*hi) + l);
609                 strcpy(hi->name, name);
610
611                 idx = hashidx(name) % hash->csize;
612                 hi->next = hash->items[idx];
613                 hash->items[idx] = hi;
614                 hash->glen += l;
615         }
616         return &(hi->data);
617 }
618
619 #define findvar(hash, name) ((var*)    hash_find((hash), (name)))
620 #define newvar(name)        ((var*)    hash_find(vhash, (name)))
621 #define newfile(name)       ((rstream*)hash_find(fdhash, (name)))
622 #define newfunc(name)       ((func*)   hash_find(fnhash, (name)))
623
624 static void hash_remove(xhash *hash, const char *name)
625 {
626         hash_item *hi, **phi;
627
628         phi = &(hash->items[hashidx(name) % hash->csize]);
629         while (*phi) {
630                 hi = *phi;
631                 if (strcmp(hi->name, name) == 0) {
632                         hash->glen -= (strlen(name) + 1);
633                         hash->nel--;
634                         *phi = hi->next;
635                         free(hi);
636                         break;
637                 }
638                 phi = &(hi->next);
639         }
640 }
641
642 /* ------ some useful functions ------ */
643
644 static void skip_spaces(char **s)
645 {
646         char *p = *s;
647
648         while (1) {
649                 if (*p == '\\' && p[1] == '\n') {
650                         p++;
651                         t_lineno++;
652                 } else if (*p != ' ' && *p != '\t') {
653                         break;
654                 }
655                 p++;
656         }
657         *s = p;
658 }
659
660 static char *nextword(char **s)
661 {
662         char *p = *s;
663         while (*(*s)++)
664                 continue;
665         return p;
666 }
667
668 static char nextchar(char **s)
669 {
670         char c, *pps;
671
672         c = *((*s)++);
673         pps = *s;
674         if (c == '\\')
675                 c = bb_process_escape_sequence((const char**)s);
676         if (c == '\\' && *s == pps)
677                 c = *((*s)++);
678         return c;
679 }
680
681 static ALWAYS_INLINE int isalnum_(int c)
682 {
683         return (isalnum(c) || c == '_');
684 }
685
686 static double my_strtod(char **pp)
687 {
688 #if ENABLE_DESKTOP
689         if ((*pp)[0] == '0'
690          && ((((*pp)[1] | 0x20) == 'x') || isdigit((*pp)[1]))
691         ) {
692                 return strtoull(*pp, pp, 0);
693         }
694 #endif
695         return strtod(*pp, pp);
696 }
697
698 /* -------- working with variables (set/get/copy/etc) -------- */
699
700 static xhash *iamarray(var *v)
701 {
702         var *a = v;
703
704         while (a->type & VF_CHILD)
705                 a = a->x.parent;
706
707         if (!(a->type & VF_ARRAY)) {
708                 a->type |= VF_ARRAY;
709                 a->x.array = hash_init();
710         }
711         return a->x.array;
712 }
713
714 static void clear_array(xhash *array)
715 {
716         unsigned i;
717         hash_item *hi, *thi;
718
719         for (i = 0; i < array->csize; i++) {
720                 hi = array->items[i];
721                 while (hi) {
722                         thi = hi;
723                         hi = hi->next;
724                         free(thi->data.v.string);
725                         free(thi);
726                 }
727                 array->items[i] = NULL;
728         }
729         array->glen = array->nel = 0;
730 }
731
732 /* clear a variable */
733 static var *clrvar(var *v)
734 {
735         if (!(v->type & VF_FSTR))
736                 free(v->string);
737
738         v->type &= VF_DONTTOUCH;
739         v->type |= VF_DIRTY;
740         v->string = NULL;
741         return v;
742 }
743
744 /* assign string value to variable */
745 static var *setvar_p(var *v, char *value)
746 {
747         clrvar(v);
748         v->string = value;
749         handle_special(v);
750         return v;
751 }
752
753 /* same as setvar_p but make a copy of string */
754 static var *setvar_s(var *v, const char *value)
755 {
756         return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
757 }
758
759 /* same as setvar_s but sets USER flag */
760 static var *setvar_u(var *v, const char *value)
761 {
762         v = setvar_s(v, value);
763         v->type |= VF_USER;
764         return v;
765 }
766
767 /* set array element to user string */
768 static void setari_u(var *a, int idx, const char *s)
769 {
770         var *v;
771
772         v = findvar(iamarray(a), itoa(idx));
773         setvar_u(v, s);
774 }
775
776 /* assign numeric value to variable */
777 static var *setvar_i(var *v, double value)
778 {
779         clrvar(v);
780         v->type |= VF_NUMBER;
781         v->number = value;
782         handle_special(v);
783         return v;
784 }
785
786 static const char *getvar_s(var *v)
787 {
788         /* if v is numeric and has no cached string, convert it to string */
789         if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
790                 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
791                 v->string = xstrdup(g_buf);
792                 v->type |= VF_CACHED;
793         }
794         return (v->string == NULL) ? "" : v->string;
795 }
796
797 static double getvar_i(var *v)
798 {
799         char *s;
800
801         if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
802                 v->number = 0;
803                 s = v->string;
804                 if (s && *s) {
805                         v->number = my_strtod(&s);
806                         if (v->type & VF_USER) {
807                                 skip_spaces(&s);
808                                 if (*s != '\0')
809                                         v->type &= ~VF_USER;
810                         }
811                 } else {
812                         v->type &= ~VF_USER;
813                 }
814                 v->type |= VF_CACHED;
815         }
816         return v->number;
817 }
818
819 /* Used for operands of bitwise ops */
820 static unsigned long getvar_i_int(var *v)
821 {
822         double d = getvar_i(v);
823
824         /* Casting doubles to longs is undefined for values outside
825          * of target type range. Try to widen it as much as possible */
826         if (d >= 0)
827                 return (unsigned long)d;
828         /* Why? Think about d == -4294967295.0 (assuming 32bit longs) */
829         return - (long) (unsigned long) (-d);
830 }
831
832 static var *copyvar(var *dest, const var *src)
833 {
834         if (dest != src) {
835                 clrvar(dest);
836                 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
837                 dest->number = src->number;
838                 if (src->string)
839                         dest->string = xstrdup(src->string);
840         }
841         handle_special(dest);
842         return dest;
843 }
844
845 static var *incvar(var *v)
846 {
847         return setvar_i(v, getvar_i(v) + 1.0);
848 }
849
850 /* return true if v is number or numeric string */
851 static int is_numeric(var *v)
852 {
853         getvar_i(v);
854         return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
855 }
856
857 /* return 1 when value of v corresponds to true, 0 otherwise */
858 static int istrue(var *v)
859 {
860         if (is_numeric(v))
861                 return (v->number != 0);
862         return (v->string && v->string[0]);
863 }
864
865 /* temporary variables allocator. Last allocated should be first freed */
866 static var *nvalloc(int n)
867 {
868         nvblock *pb = NULL;
869         var *v, *r;
870         int size;
871
872         while (g_cb) {
873                 pb = g_cb;
874                 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size)
875                         break;
876                 g_cb = g_cb->next;
877         }
878
879         if (!g_cb) {
880                 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
881                 g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
882                 g_cb->size = size;
883                 g_cb->pos = g_cb->nv;
884                 g_cb->prev = pb;
885                 /*g_cb->next = NULL; - xzalloc did it */
886                 if (pb)
887                         pb->next = g_cb;
888         }
889
890         v = r = g_cb->pos;
891         g_cb->pos += n;
892
893         while (v < g_cb->pos) {
894                 v->type = 0;
895                 v->string = NULL;
896                 v++;
897         }
898
899         return r;
900 }
901
902 static void nvfree(var *v)
903 {
904         var *p;
905
906         if (v < g_cb->nv || v >= g_cb->pos)
907                 syntax_error(EMSG_INTERNAL_ERROR);
908
909         for (p = v; p < g_cb->pos; p++) {
910                 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
911                         clear_array(iamarray(p));
912                         free(p->x.array->items);
913                         free(p->x.array);
914                 }
915                 if (p->type & VF_WALK) {
916                         //bb_error_msg("free(walker@%p:%p) #1", &p->x.walker, p->x.walker);
917                         free(p->x.walker);
918                 }
919
920                 clrvar(p);
921         }
922
923         g_cb->pos = v;
924         while (g_cb->prev && g_cb->pos == g_cb->nv) {
925                 g_cb = g_cb->prev;
926         }
927 }
928
929 /* ------- awk program text parsing ------- */
930
931 /* Parse next token pointed by global pos, place results into global ttt.
932  * If token isn't expected, give away. Return token class
933  */
934 static uint32_t next_token(uint32_t expected)
935 {
936 #define concat_inserted (G.next_token__concat_inserted)
937 #define save_tclass     (G.next_token__save_tclass)
938 #define save_info       (G.next_token__save_info)
939 /* Initialized to TC_OPTERM: */
940 #define ltclass         (G.next_token__ltclass)
941
942         char *p, *pp, *s;
943         const char *tl;
944         uint32_t tc;
945         const uint32_t *ti;
946         int l;
947
948         if (t_rollback) {
949                 t_rollback = FALSE;
950
951         } else if (concat_inserted) {
952                 concat_inserted = FALSE;
953                 t_tclass = save_tclass;
954                 t_info = save_info;
955
956         } else {
957                 p = g_pos;
958  readnext:
959                 skip_spaces(&p);
960                 g_lineno = t_lineno;
961                 if (*p == '#')
962                         while (*p != '\n' && *p != '\0')
963                                 p++;
964
965                 if (*p == '\n')
966                         t_lineno++;
967
968                 if (*p == '\0') {
969                         tc = TC_EOF;
970
971                 } else if (*p == '\"') {
972                         /* it's a string */
973                         t_string = s = ++p;
974                         while (*p != '\"') {
975                                 if (*p == '\0' || *p == '\n')
976                                         syntax_error(EMSG_UNEXP_EOS);
977                                 *(s++) = nextchar(&p);
978                         }
979                         p++;
980                         *s = '\0';
981                         tc = TC_STRING;
982
983                 } else if ((expected & TC_REGEXP) && *p == '/') {
984                         /* it's regexp */
985                         t_string = s = ++p;
986                         while (*p != '/') {
987                                 if (*p == '\0' || *p == '\n')
988                                         syntax_error(EMSG_UNEXP_EOS);
989                                 *s = *p++;
990                                 if (*s++ == '\\') {
991                                         pp = p;
992                                         *(s-1) = bb_process_escape_sequence((const char **)&p);
993                                         if (*pp == '\\')
994                                                 *s++ = '\\';
995                                         if (p == pp)
996                                                 *s++ = *p++;
997                                 }
998                         }
999                         p++;
1000                         *s = '\0';
1001                         tc = TC_REGEXP;
1002
1003                 } else if (*p == '.' || isdigit(*p)) {
1004                         /* it's a number */
1005                         t_double = my_strtod(&p);
1006                         if (*p == '.')
1007                                 syntax_error(EMSG_UNEXP_TOKEN);
1008                         tc = TC_NUMBER;
1009
1010                 } else {
1011                         /* search for something known */
1012                         tl = tokenlist;
1013                         tc = 0x00000001;
1014                         ti = tokeninfo;
1015                         while (*tl) {
1016                                 l = *(tl++);
1017                                 if (l == NTCC) {
1018                                         tc <<= 1;
1019                                         continue;
1020                                 }
1021                                 /* if token class is expected, token
1022                                  * matches and it's not a longer word,
1023                                  * then this is what we are looking for
1024                                  */
1025                                 if ((tc & (expected | TC_WORD | TC_NEWLINE))
1026                                  && *tl == *p && strncmp(p, tl, l) == 0
1027                                  && !((tc & TC_WORD) && isalnum_(p[l]))
1028                                 ) {
1029                                         t_info = *ti;
1030                                         p += l;
1031                                         break;
1032                                 }
1033                                 ti++;
1034                                 tl += l;
1035                         }
1036
1037                         if (!*tl) {
1038                                 /* it's a name (var/array/function),
1039                                  * otherwise it's something wrong
1040                                  */
1041                                 if (!isalnum_(*p))
1042                                         syntax_error(EMSG_UNEXP_TOKEN);
1043
1044                                 t_string = --p;
1045                                 while (isalnum_(*(++p))) {
1046                                         *(p-1) = *p;
1047                                 }
1048                                 *(p-1) = '\0';
1049                                 tc = TC_VARIABLE;
1050                                 /* also consume whitespace between functionname and bracket */
1051                                 if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY))
1052                                         skip_spaces(&p);
1053                                 if (*p == '(') {
1054                                         tc = TC_FUNCTION;
1055                                 } else {
1056                                         if (*p == '[') {
1057                                                 p++;
1058                                                 tc = TC_ARRAY;
1059                                         }
1060                                 }
1061                         }
1062                 }
1063                 g_pos = p;
1064
1065                 /* skipping newlines in some cases */
1066                 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1067                         goto readnext;
1068
1069                 /* insert concatenation operator when needed */
1070                 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1071                         concat_inserted = TRUE;
1072                         save_tclass = tc;
1073                         save_info = t_info;
1074                         tc = TC_BINOP;
1075                         t_info = OC_CONCAT | SS | P(35);
1076                 }
1077
1078                 t_tclass = tc;
1079         }
1080         ltclass = t_tclass;
1081
1082         /* Are we ready for this? */
1083         if (!(ltclass & expected))
1084                 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1085                                 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1086
1087         return ltclass;
1088 #undef concat_inserted
1089 #undef save_tclass
1090 #undef save_info
1091 #undef ltclass
1092 }
1093
1094 static void rollback_token(void)
1095 {
1096         t_rollback = TRUE;
1097 }
1098
1099 static node *new_node(uint32_t info)
1100 {
1101         node *n;
1102
1103         n = xzalloc(sizeof(node));
1104         n->info = info;
1105         n->lineno = g_lineno;
1106         return n;
1107 }
1108
1109 static node *mk_re_node(const char *s, node *n, regex_t *re)
1110 {
1111         n->info = OC_REGEXP;
1112         n->l.re = re;
1113         n->r.ire = re + 1;
1114         xregcomp(re, s, REG_EXTENDED);
1115         xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1116
1117         return n;
1118 }
1119
1120 static node *condition(void)
1121 {
1122         next_token(TC_SEQSTART);
1123         return parse_expr(TC_SEQTERM);
1124 }
1125
1126 /* parse expression terminated by given argument, return ptr
1127  * to built subtree. Terminator is eaten by parse_expr */
1128 static node *parse_expr(uint32_t iexp)
1129 {
1130         node sn;
1131         node *cn = &sn;
1132         node *vn, *glptr;
1133         uint32_t tc, xtc;
1134         var *v;
1135
1136         sn.info = PRIMASK;
1137         sn.r.n = glptr = NULL;
1138         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1139
1140         while (!((tc = next_token(xtc)) & iexp)) {
1141                 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1142                         /* input redirection (<) attached to glptr node */
1143                         cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1144                         cn->a.n = glptr;
1145                         xtc = TC_OPERAND | TC_UOPPRE;
1146                         glptr = NULL;
1147
1148                 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1149                         /* for binary and postfix-unary operators, jump back over
1150                          * previous operators with higher priority */
1151                         vn = cn;
1152                         while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1153                             || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON))
1154                         ) {
1155                                 vn = vn->a.n;
1156                         }
1157                         if ((t_info & OPCLSMASK) == OC_TERNARY)
1158                                 t_info += P(6);
1159                         cn = vn->a.n->r.n = new_node(t_info);
1160                         cn->a.n = vn->a.n;
1161                         if (tc & TC_BINOP) {
1162                                 cn->l.n = vn;
1163                                 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1164                                 if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1165                                         /* it's a pipe */
1166                                         next_token(TC_GETLINE);
1167                                         /* give maximum priority to this pipe */
1168                                         cn->info &= ~PRIMASK;
1169                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1170                                 }
1171                         } else {
1172                                 cn->r.n = vn;
1173                                 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1174                         }
1175                         vn->a.n = cn;
1176
1177                 } else {
1178                         /* for operands and prefix-unary operators, attach them
1179                          * to last node */
1180                         vn = cn;
1181                         cn = vn->r.n = new_node(t_info);
1182                         cn->a.n = vn;
1183                         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1184                         if (tc & (TC_OPERAND | TC_REGEXP)) {
1185                                 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1186                                 /* one should be very careful with switch on tclass -
1187                                  * only simple tclasses should be used! */
1188                                 switch (tc) {
1189                                 case TC_VARIABLE:
1190                                 case TC_ARRAY:
1191                                         cn->info = OC_VAR;
1192                                         v = hash_search(ahash, t_string);
1193                                         if (v != NULL) {
1194                                                 cn->info = OC_FNARG;
1195                                                 cn->l.i = v->x.aidx;
1196                                         } else {
1197                                                 cn->l.v = newvar(t_string);
1198                                         }
1199                                         if (tc & TC_ARRAY) {
1200                                                 cn->info |= xS;
1201                                                 cn->r.n = parse_expr(TC_ARRTERM);
1202                                         }
1203                                         break;
1204
1205                                 case TC_NUMBER:
1206                                 case TC_STRING:
1207                                         cn->info = OC_VAR;
1208                                         v = cn->l.v = xzalloc(sizeof(var));
1209                                         if (tc & TC_NUMBER)
1210                                                 setvar_i(v, t_double);
1211                                         else
1212                                                 setvar_s(v, t_string);
1213                                         break;
1214
1215                                 case TC_REGEXP:
1216                                         mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1217                                         break;
1218
1219                                 case TC_FUNCTION:
1220                                         cn->info = OC_FUNC;
1221                                         cn->r.f = newfunc(t_string);
1222                                         cn->l.n = condition();
1223                                         break;
1224
1225                                 case TC_SEQSTART:
1226                                         cn = vn->r.n = parse_expr(TC_SEQTERM);
1227                                         cn->a.n = vn;
1228                                         break;
1229
1230                                 case TC_GETLINE:
1231                                         glptr = cn;
1232                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1233                                         break;
1234
1235                                 case TC_BUILTIN:
1236                                         cn->l.n = condition();
1237                                         break;
1238                                 }
1239                         }
1240                 }
1241         }
1242         return sn.r.n;
1243 }
1244
1245 /* add node to chain. Return ptr to alloc'd node */
1246 static node *chain_node(uint32_t info)
1247 {
1248         node *n;
1249
1250         if (!seq->first)
1251                 seq->first = seq->last = new_node(0);
1252
1253         if (seq->programname != g_progname) {
1254                 seq->programname = g_progname;
1255                 n = chain_node(OC_NEWSOURCE);
1256                 n->l.s = xstrdup(g_progname);
1257         }
1258
1259         n = seq->last;
1260         n->info = info;
1261         seq->last = n->a.n = new_node(OC_DONE);
1262
1263         return n;
1264 }
1265
1266 static void chain_expr(uint32_t info)
1267 {
1268         node *n;
1269
1270         n = chain_node(info);
1271         n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1272         if (t_tclass & TC_GRPTERM)
1273                 rollback_token();
1274 }
1275
1276 static node *chain_loop(node *nn)
1277 {
1278         node *n, *n2, *save_brk, *save_cont;
1279
1280         save_brk = break_ptr;
1281         save_cont = continue_ptr;
1282
1283         n = chain_node(OC_BR | Vx);
1284         continue_ptr = new_node(OC_EXEC);
1285         break_ptr = new_node(OC_EXEC);
1286         chain_group();
1287         n2 = chain_node(OC_EXEC | Vx);
1288         n2->l.n = nn;
1289         n2->a.n = n;
1290         continue_ptr->a.n = n2;
1291         break_ptr->a.n = n->r.n = seq->last;
1292
1293         continue_ptr = save_cont;
1294         break_ptr = save_brk;
1295
1296         return n;
1297 }
1298
1299 /* parse group and attach it to chain */
1300 static void chain_group(void)
1301 {
1302         uint32_t c;
1303         node *n, *n2, *n3;
1304
1305         do {
1306                 c = next_token(TC_GRPSEQ);
1307         } while (c & TC_NEWLINE);
1308
1309         if (c & TC_GRPSTART) {
1310                 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1311                         if (t_tclass & TC_NEWLINE) continue;
1312                         rollback_token();
1313                         chain_group();
1314                 }
1315         } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1316                 rollback_token();
1317                 chain_expr(OC_EXEC | Vx);
1318         } else {                                                /* TC_STATEMNT */
1319                 switch (t_info & OPCLSMASK) {
1320                 case ST_IF:
1321                         n = chain_node(OC_BR | Vx);
1322                         n->l.n = condition();
1323                         chain_group();
1324                         n2 = chain_node(OC_EXEC);
1325                         n->r.n = seq->last;
1326                         if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1327                                 chain_group();
1328                                 n2->a.n = seq->last;
1329                         } else {
1330                                 rollback_token();
1331                         }
1332                         break;
1333
1334                 case ST_WHILE:
1335                         n2 = condition();
1336                         n = chain_loop(NULL);
1337                         n->l.n = n2;
1338                         break;
1339
1340                 case ST_DO:
1341                         n2 = chain_node(OC_EXEC);
1342                         n = chain_loop(NULL);
1343                         n2->a.n = n->a.n;
1344                         next_token(TC_WHILE);
1345                         n->l.n = condition();
1346                         break;
1347
1348                 case ST_FOR:
1349                         next_token(TC_SEQSTART);
1350                         n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1351                         if (t_tclass & TC_SEQTERM) {    /* for-in */
1352                                 if ((n2->info & OPCLSMASK) != OC_IN)
1353                                         syntax_error(EMSG_UNEXP_TOKEN);
1354                                 n = chain_node(OC_WALKINIT | VV);
1355                                 n->l.n = n2->l.n;
1356                                 n->r.n = n2->r.n;
1357                                 n = chain_loop(NULL);
1358                                 n->info = OC_WALKNEXT | Vx;
1359                                 n->l.n = n2->l.n;
1360                         } else {                        /* for (;;) */
1361                                 n = chain_node(OC_EXEC | Vx);
1362                                 n->l.n = n2;
1363                                 n2 = parse_expr(TC_SEMICOL);
1364                                 n3 = parse_expr(TC_SEQTERM);
1365                                 n = chain_loop(n3);
1366                                 n->l.n = n2;
1367                                 if (!n2)
1368                                         n->info = OC_EXEC;
1369                         }
1370                         break;
1371
1372                 case OC_PRINT:
1373                 case OC_PRINTF:
1374                         n = chain_node(t_info);
1375                         n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1376                         if (t_tclass & TC_OUTRDR) {
1377                                 n->info |= t_info;
1378                                 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1379                         }
1380                         if (t_tclass & TC_GRPTERM)
1381                                 rollback_token();
1382                         break;
1383
1384                 case OC_BREAK:
1385                         n = chain_node(OC_EXEC);
1386                         n->a.n = break_ptr;
1387                         break;
1388
1389                 case OC_CONTINUE:
1390                         n = chain_node(OC_EXEC);
1391                         n->a.n = continue_ptr;
1392                         break;
1393
1394                 /* delete, next, nextfile, return, exit */
1395                 default:
1396                         chain_expr(t_info);
1397                 }
1398         }
1399 }
1400
1401 static void parse_program(char *p)
1402 {
1403         uint32_t tclass;
1404         node *cn;
1405         func *f;
1406         var *v;
1407
1408         g_pos = p;
1409         t_lineno = 1;
1410         while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1411                         TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1412
1413                 if (tclass & TC_OPTERM)
1414                         continue;
1415
1416                 seq = &mainseq;
1417                 if (tclass & TC_BEGIN) {
1418                         seq = &beginseq;
1419                         chain_group();
1420
1421                 } else if (tclass & TC_END) {
1422                         seq = &endseq;
1423                         chain_group();
1424
1425                 } else if (tclass & TC_FUNCDECL) {
1426                         next_token(TC_FUNCTION);
1427                         g_pos++;
1428                         f = newfunc(t_string);
1429                         f->body.first = NULL;
1430                         f->nargs = 0;
1431                         while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1432                                 v = findvar(ahash, t_string);
1433                                 v->x.aidx = (f->nargs)++;
1434
1435                                 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1436                                         break;
1437                         }
1438                         seq = &(f->body);
1439                         chain_group();
1440                         clear_array(ahash);
1441
1442                 } else if (tclass & TC_OPSEQ) {
1443                         rollback_token();
1444                         cn = chain_node(OC_TEST);
1445                         cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1446                         if (t_tclass & TC_GRPSTART) {
1447                                 rollback_token();
1448                                 chain_group();
1449                         } else {
1450                                 chain_node(OC_PRINT);
1451                         }
1452                         cn->r.n = mainseq.last;
1453
1454                 } else /* if (tclass & TC_GRPSTART) */ {
1455                         rollback_token();
1456                         chain_group();
1457                 }
1458         }
1459 }
1460
1461
1462 /* -------- program execution part -------- */
1463
1464 static node *mk_splitter(const char *s, tsplitter *spl)
1465 {
1466         regex_t *re, *ire;
1467         node *n;
1468
1469         re = &spl->re[0];
1470         ire = &spl->re[1];
1471         n = &spl->n;
1472         if ((n->info & OPCLSMASK) == OC_REGEXP) {
1473                 regfree(re);
1474                 regfree(ire); // TODO: nuke ire, use re+1?
1475         }
1476         if (strlen(s) > 1) {
1477                 mk_re_node(s, n, re);
1478         } else {
1479                 n->info = (uint32_t) *s;
1480         }
1481
1482         return n;
1483 }
1484
1485 /* use node as a regular expression. Supplied with node ptr and regex_t
1486  * storage space. Return ptr to regex (if result points to preg, it should
1487  * be later regfree'd manually
1488  */
1489 static regex_t *as_regex(node *op, regex_t *preg)
1490 {
1491         int cflags;
1492         var *v;
1493         const char *s;
1494
1495         if ((op->info & OPCLSMASK) == OC_REGEXP) {
1496                 return icase ? op->r.ire : op->l.re;
1497         }
1498         v = nvalloc(1);
1499         s = getvar_s(evaluate(op, v));
1500
1501         cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
1502         /* Testcase where REG_EXTENDED fails (unpaired '{'):
1503          * echo Hi | awk 'gsub("@(samp|code|file)\{","");'
1504          * gawk 3.1.5 eats this. We revert to ~REG_EXTENDED
1505          * (maybe gsub is not supposed to use REG_EXTENDED?).
1506          */
1507         if (regcomp(preg, s, cflags)) {
1508                 cflags &= ~REG_EXTENDED;
1509                 xregcomp(preg, s, cflags);
1510         }
1511         nvfree(v);
1512         return preg;
1513 }
1514
1515 /* gradually increasing buffer */
1516 static void qrealloc(char **b, int n, int *size)
1517 {
1518         if (!*b || n >= *size) {
1519                 *size = n + (n>>1) + 80;
1520                 *b = xrealloc(*b, *size);
1521         }
1522 }
1523
1524 /* resize field storage space */
1525 static void fsrealloc(int size)
1526 {
1527         int i;
1528
1529         if (size >= maxfields) {
1530                 i = maxfields;
1531                 maxfields = size + 16;
1532                 Fields = xrealloc(Fields, maxfields * sizeof(var));
1533                 for (; i < maxfields; i++) {
1534                         Fields[i].type = VF_SPECIAL;
1535                         Fields[i].string = NULL;
1536                 }
1537         }
1538
1539         if (size < nfields) {
1540                 for (i = size; i < nfields; i++) {
1541                         clrvar(Fields + i);
1542                 }
1543         }
1544         nfields = size;
1545 }
1546
1547 static int awk_split(const char *s, node *spl, char **slist)
1548 {
1549         int l, n = 0;
1550         char c[4];
1551         char *s1;
1552         regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1553
1554         /* in worst case, each char would be a separate field */
1555         *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1556         strcpy(s1, s);
1557
1558         c[0] = c[1] = (char)spl->info;
1559         c[2] = c[3] = '\0';
1560         if (*getvar_s(intvar[RS]) == '\0')
1561                 c[2] = '\n';
1562
1563         if ((spl->info & OPCLSMASK) == OC_REGEXP) {  /* regex split */
1564                 if (!*s)
1565                         return n; /* "": zero fields */
1566                 n++; /* at least one field will be there */
1567                 do {
1568                         l = strcspn(s, c+2); /* len till next NUL or \n */
1569                         if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1570                          && pmatch[0].rm_so <= l
1571                         ) {
1572                                 l = pmatch[0].rm_so;
1573                                 if (pmatch[0].rm_eo == 0) {
1574                                         l++;
1575                                         pmatch[0].rm_eo++;
1576                                 }
1577                                 n++; /* we saw yet another delimiter */
1578                         } else {
1579                                 pmatch[0].rm_eo = l;
1580                                 if (s[l])
1581                                         pmatch[0].rm_eo++;
1582                         }
1583                         memcpy(s1, s, l);
1584                         /* make sure we remove *all* of the separator chars */
1585                         do {
1586                                 s1[l] = '\0';
1587                         } while (++l < pmatch[0].rm_eo);
1588                         nextword(&s1);
1589                         s += pmatch[0].rm_eo;
1590                 } while (*s);
1591                 return n;
1592         }
1593         if (c[0] == '\0') {  /* null split */
1594                 while (*s) {
1595                         *s1++ = *s++;
1596                         *s1++ = '\0';
1597                         n++;
1598                 }
1599                 return n;
1600         }
1601         if (c[0] != ' ') {  /* single-character split */
1602                 if (icase) {
1603                         c[0] = toupper(c[0]);
1604                         c[1] = tolower(c[1]);
1605                 }
1606                 if (*s1) n++;
1607                 while ((s1 = strpbrk(s1, c))) {
1608                         *s1++ = '\0';
1609                         n++;
1610                 }
1611                 return n;
1612         }
1613         /* space split */
1614         while (*s) {
1615                 s = skip_whitespace(s);
1616                 if (!*s) break;
1617                 n++;
1618                 while (*s && !isspace(*s))
1619                         *s1++ = *s++;
1620                 *s1++ = '\0';
1621         }
1622         return n;
1623 }
1624
1625 static void split_f0(void)
1626 {
1627 /* static char *fstrings; */
1628 #define fstrings (G.split_f0__fstrings)
1629
1630         int i, n;
1631         char *s;
1632
1633         if (is_f0_split)
1634                 return;
1635
1636         is_f0_split = TRUE;
1637         free(fstrings);
1638         fsrealloc(0);
1639         n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1640         fsrealloc(n);
1641         s = fstrings;
1642         for (i = 0; i < n; i++) {
1643                 Fields[i].string = nextword(&s);
1644                 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1645         }
1646
1647         /* set NF manually to avoid side effects */
1648         clrvar(intvar[NF]);
1649         intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1650         intvar[NF]->number = nfields;
1651 #undef fstrings
1652 }
1653
1654 /* perform additional actions when some internal variables changed */
1655 static void handle_special(var *v)
1656 {
1657         int n;
1658         char *b;
1659         const char *sep, *s;
1660         int sl, l, len, i, bsize;
1661
1662         if (!(v->type & VF_SPECIAL))
1663                 return;
1664
1665         if (v == intvar[NF]) {
1666                 n = (int)getvar_i(v);
1667                 fsrealloc(n);
1668
1669                 /* recalculate $0 */
1670                 sep = getvar_s(intvar[OFS]);
1671                 sl = strlen(sep);
1672                 b = NULL;
1673                 len = 0;
1674                 for (i = 0; i < n; i++) {
1675                         s = getvar_s(&Fields[i]);
1676                         l = strlen(s);
1677                         if (b) {
1678                                 memcpy(b+len, sep, sl);
1679                                 len += sl;
1680                         }
1681                         qrealloc(&b, len+l+sl, &bsize);
1682                         memcpy(b+len, s, l);
1683                         len += l;
1684                 }
1685                 if (b)
1686                         b[len] = '\0';
1687                 setvar_p(intvar[F0], b);
1688                 is_f0_split = TRUE;
1689
1690         } else if (v == intvar[F0]) {
1691                 is_f0_split = FALSE;
1692
1693         } else if (v == intvar[FS]) {
1694                 mk_splitter(getvar_s(v), &fsplitter);
1695
1696         } else if (v == intvar[RS]) {
1697                 mk_splitter(getvar_s(v), &rsplitter);
1698
1699         } else if (v == intvar[IGNORECASE]) {
1700                 icase = istrue(v);
1701
1702         } else {                                /* $n */
1703                 n = getvar_i(intvar[NF]);
1704                 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1705                 /* right here v is invalid. Just to note... */
1706         }
1707 }
1708
1709 /* step through func/builtin/etc arguments */
1710 static node *nextarg(node **pn)
1711 {
1712         node *n;
1713
1714         n = *pn;
1715         if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1716                 *pn = n->r.n;
1717                 n = n->l.n;
1718         } else {
1719                 *pn = NULL;
1720         }
1721         return n;
1722 }
1723
1724 static void hashwalk_init(var *v, xhash *array)
1725 {
1726         char **w;
1727         hash_item *hi;
1728         unsigned i;
1729         char **prev_walker = (v->type & VF_WALK) ? v->x.walker : NULL;
1730
1731         v->type |= VF_WALK;
1732
1733         /* walker structure is: "[ptr2end][ptr2start][prev]<word1>NUL<word2>NUL" */
1734         w = v->x.walker = xzalloc(2 + 3*sizeof(char *) + array->glen);
1735         //bb_error_msg("walker@%p=%p", &v->x.walker, v->x.walker);
1736         w[0] = w[1] = (char *)(w + 3);
1737         w[2] = (char *)prev_walker;
1738         for (i = 0; i < array->csize; i++) {
1739                 hi = array->items[i];
1740                 while (hi) {
1741                         strcpy(w[0], hi->name);
1742                         nextword(&w[0]);
1743                         hi = hi->next;
1744                 }
1745         }
1746 }
1747
1748 static int hashwalk_next(var *v)
1749 {
1750         char **w;
1751
1752         w = v->x.walker;
1753         if (w[1] == w[0]) {
1754                 char **prev_walker = (char**)w[2];
1755
1756                 //bb_error_msg("free(walker@%p:%p) #3, restoring to %p", &v->x.walker, v->x.walker, prev_walker);
1757                 free(v->x.walker);
1758                 v->x.walker = prev_walker;
1759                 return FALSE;
1760         }
1761
1762         setvar_s(v, nextword(&w[1]));
1763         return TRUE;
1764 }
1765
1766 /* evaluate node, return 1 when result is true, 0 otherwise */
1767 static int ptest(node *pattern)
1768 {
1769         /* ptest__v is "static": to save stack space? */
1770         return istrue(evaluate(pattern, &G.ptest__v));
1771 }
1772
1773 /* read next record from stream rsm into a variable v */
1774 static int awk_getline(rstream *rsm, var *v)
1775 {
1776         char *b;
1777         regmatch_t pmatch[2];
1778         int a, p, pp=0, size;
1779         int fd, so, eo, r, rp;
1780         char c, *m, *s;
1781
1782         /* we're using our own buffer since we need access to accumulating
1783          * characters
1784          */
1785         fd = fileno(rsm->F);
1786         m = rsm->buffer;
1787         a = rsm->adv;
1788         p = rsm->pos;
1789         size = rsm->size;
1790         c = (char) rsplitter.n.info;
1791         rp = 0;
1792
1793         if (!m) qrealloc(&m, 256, &size);
1794         do {
1795                 b = m + a;
1796                 so = eo = p;
1797                 r = 1;
1798                 if (p > 0) {
1799                         if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1800                                 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1801                                                         b, 1, pmatch, 0) == 0) {
1802                                         so = pmatch[0].rm_so;
1803                                         eo = pmatch[0].rm_eo;
1804                                         if (b[eo] != '\0')
1805                                                 break;
1806                                 }
1807                         } else if (c != '\0') {
1808                                 s = strchr(b+pp, c);
1809                                 if (!s) s = memchr(b+pp, '\0', p - pp);
1810                                 if (s) {
1811                                         so = eo = s-b;
1812                                         eo++;
1813                                         break;
1814                                 }
1815                         } else {
1816                                 while (b[rp] == '\n')
1817                                         rp++;
1818                                 s = strstr(b+rp, "\n\n");
1819                                 if (s) {
1820                                         so = eo = s-b;
1821                                         while (b[eo] == '\n') eo++;
1822                                         if (b[eo] != '\0')
1823                                                 break;
1824                                 }
1825                         }
1826                 }
1827
1828                 if (a > 0) {
1829                         memmove(m, (const void *)(m+a), p+1);
1830                         b = m;
1831                         a = 0;
1832                 }
1833
1834                 qrealloc(&m, a+p+128, &size);
1835                 b = m + a;
1836                 pp = p;
1837                 p += safe_read(fd, b+p, size-p-1);
1838                 if (p < pp) {
1839                         p = 0;
1840                         r = 0;
1841                         setvar_i(intvar[ERRNO], errno);
1842                 }
1843                 b[p] = '\0';
1844
1845         } while (p > pp);
1846
1847         if (p == 0) {
1848                 r--;
1849         } else {
1850                 c = b[so]; b[so] = '\0';
1851                 setvar_s(v, b+rp);
1852                 v->type |= VF_USER;
1853                 b[so] = c;
1854                 c = b[eo]; b[eo] = '\0';
1855                 setvar_s(intvar[RT], b+so);
1856                 b[eo] = c;
1857         }
1858
1859         rsm->buffer = m;
1860         rsm->adv = a + eo;
1861         rsm->pos = p - eo;
1862         rsm->size = size;
1863
1864         return r;
1865 }
1866
1867 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1868 {
1869         int r = 0;
1870         char c;
1871         const char *s = format;
1872
1873         if (int_as_int && n == (int)n) {
1874                 r = snprintf(b, size, "%d", (int)n);
1875         } else {
1876                 do { c = *s; } while (c && *++s);
1877                 if (strchr("diouxX", c)) {
1878                         r = snprintf(b, size, format, (int)n);
1879                 } else if (strchr("eEfgG", c)) {
1880                         r = snprintf(b, size, format, n);
1881                 } else {
1882                         syntax_error(EMSG_INV_FMT);
1883                 }
1884         }
1885         return r;
1886 }
1887
1888 /* formatted output into an allocated buffer, return ptr to buffer */
1889 static char *awk_printf(node *n)
1890 {
1891         char *b = NULL;
1892         char *fmt, *s, *f;
1893         const char *s1;
1894         int i, j, incr, bsize;
1895         char c, c1;
1896         var *v, *arg;
1897
1898         v = nvalloc(1);
1899         fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1900
1901         i = 0;
1902         while (*f) {
1903                 s = f;
1904                 while (*f && (*f != '%' || *(++f) == '%'))
1905                         f++;
1906                 while (*f && !isalpha(*f)) {
1907                         if (*f == '*')
1908                                 syntax_error("%*x formats are not supported");
1909                         f++;
1910                 }
1911
1912                 incr = (f - s) + MAXVARFMT;
1913                 qrealloc(&b, incr + i, &bsize);
1914                 c = *f;
1915                 if (c != '\0') f++;
1916                 c1 = *f;
1917                 *f = '\0';
1918                 arg = evaluate(nextarg(&n), v);
1919
1920                 j = i;
1921                 if (c == 'c' || !c) {
1922                         i += sprintf(b+i, s, is_numeric(arg) ?
1923                                         (char)getvar_i(arg) : *getvar_s(arg));
1924                 } else if (c == 's') {
1925                         s1 = getvar_s(arg);
1926                         qrealloc(&b, incr+i+strlen(s1), &bsize);
1927                         i += sprintf(b+i, s, s1);
1928                 } else {
1929                         i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1930                 }
1931                 *f = c1;
1932
1933                 /* if there was an error while sprintf, return value is negative */
1934                 if (i < j) i = j;
1935         }
1936
1937         b = xrealloc(b, i + 1);
1938         free(fmt);
1939         nvfree(v);
1940         b[i] = '\0';
1941         return b;
1942 }
1943
1944 /* common substitution routine
1945  * replace (nm) substring of (src) that match (n) with (repl), store
1946  * result into (dest), return number of substitutions. If nm=0, replace
1947  * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1948  * subexpression matching (\1-\9)
1949  */
1950 static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int ex)
1951 {
1952         char *ds = NULL;
1953         const char *s;
1954         const char *sp;
1955         int c, i, j, di, rl, so, eo, nbs, n, dssize;
1956         regmatch_t pmatch[10];
1957         regex_t sreg, *re;
1958
1959         re = as_regex(rn, &sreg);
1960         if (!src) src = intvar[F0];
1961         if (!dest) dest = intvar[F0];
1962
1963         i = di = 0;
1964         sp = getvar_s(src);
1965         rl = strlen(repl);
1966         while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0 : REG_NOTBOL) == 0) {
1967                 so = pmatch[0].rm_so;
1968                 eo = pmatch[0].rm_eo;
1969
1970                 qrealloc(&ds, di + eo + rl, &dssize);
1971                 memcpy(ds + di, sp, eo);
1972                 di += eo;
1973                 if (++i >= nm) {
1974                         /* replace */
1975                         di -= (eo - so);
1976                         nbs = 0;
1977                         for (s = repl; *s; s++) {
1978                                 ds[di++] = c = *s;
1979                                 if (c == '\\') {
1980                                         nbs++;
1981                                         continue;
1982                                 }
1983                                 if (c == '&' || (ex && c >= '0' && c <= '9')) {
1984                                         di -= ((nbs + 3) >> 1);
1985                                         j = 0;
1986                                         if (c != '&') {
1987                                                 j = c - '0';
1988                                                 nbs++;
1989                                         }
1990                                         if (nbs % 2) {
1991                                                 ds[di++] = c;
1992                                         } else {
1993                                                 n = pmatch[j].rm_eo - pmatch[j].rm_so;
1994                                                 qrealloc(&ds, di + rl + n, &dssize);
1995                                                 memcpy(ds + di, sp + pmatch[j].rm_so, n);
1996                                                 di += n;
1997                                         }
1998                                 }
1999                                 nbs = 0;
2000                         }
2001                 }
2002
2003                 sp += eo;
2004                 if (i == nm)
2005                         break;
2006                 if (eo == so) {
2007                         ds[di] = *sp++;
2008                         if (!ds[di++])
2009                                 break;
2010                 }
2011         }
2012
2013         qrealloc(&ds, di + strlen(sp), &dssize);
2014         strcpy(ds + di, sp);
2015         setvar_p(dest, ds);
2016         if (re == &sreg)
2017                 regfree(re);
2018         return i;
2019 }
2020
2021 static NOINLINE int do_mktime(const char *ds)
2022 {
2023         struct tm then;
2024         int count;
2025
2026         /*memset(&then, 0, sizeof(then)); - not needed */
2027         then.tm_isdst = -1; /* default is unknown */
2028
2029         /* manpage of mktime says these fields are ints,
2030          * so we can sscanf stuff directly into them */
2031         count = sscanf(ds, "%u %u %u %u %u %u %d",
2032                 &then.tm_year, &then.tm_mon, &then.tm_mday,
2033                 &then.tm_hour, &then.tm_min, &then.tm_sec,
2034                 &then.tm_isdst);
2035
2036         if (count < 6
2037          || (unsigned)then.tm_mon < 1
2038          || (unsigned)then.tm_year < 1900
2039         ) {
2040                 return -1;
2041         }
2042
2043         then.tm_mon -= 1;
2044         then.tm_year -= 1900;
2045
2046         return mktime(&then);
2047 }
2048
2049 static NOINLINE var *exec_builtin(node *op, var *res)
2050 {
2051 #define tspl (G.exec_builtin__tspl)
2052
2053         var *tv;
2054         node *an[4];
2055         var *av[4];
2056         const char *as[4];
2057         regmatch_t pmatch[2];
2058         regex_t sreg, *re;
2059         node *spl;
2060         uint32_t isr, info;
2061         int nargs;
2062         time_t tt;
2063         char *s, *s1;
2064         int i, l, ll, n;
2065
2066         tv = nvalloc(4);
2067         isr = info = op->info;
2068         op = op->l.n;
2069
2070         av[2] = av[3] = NULL;
2071         for (i = 0; i < 4 && op; i++) {
2072                 an[i] = nextarg(&op);
2073                 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
2074                 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
2075                 isr >>= 1;
2076         }
2077
2078         nargs = i;
2079         if ((uint32_t)nargs < (info >> 30))
2080                 syntax_error(EMSG_TOO_FEW_ARGS);
2081
2082         info &= OPNMASK;
2083         switch (info) {
2084
2085         case B_a2:
2086 #if ENABLE_FEATURE_AWK_LIBM
2087                 setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1])));
2088 #else
2089                 syntax_error(EMSG_NO_MATH);
2090 #endif
2091                 break;
2092
2093         case B_sp:
2094                 if (nargs > 2) {
2095                         spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2096                                 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2097                 } else {
2098                         spl = &fsplitter.n;
2099                 }
2100
2101                 n = awk_split(as[0], spl, &s);
2102                 s1 = s;
2103                 clear_array(iamarray(av[1]));
2104                 for (i = 1; i <= n; i++)
2105                         setari_u(av[1], i, nextword(&s1));
2106                 free(s);
2107                 setvar_i(res, n);
2108                 break;
2109
2110         case B_ss:
2111                 l = strlen(as[0]);
2112                 i = getvar_i(av[1]) - 1;
2113                 if (i > l) i = l;
2114                 if (i < 0) i = 0;
2115                 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2116                 if (n < 0) n = 0;
2117                 s = xstrndup(as[0]+i, n);
2118                 setvar_p(res, s);
2119                 break;
2120
2121         /* Bitwise ops must assume that operands are unsigned. GNU Awk 3.1.5:
2122          * awk '{ print or(-1,1) }' gives "4.29497e+09", not "-2.xxxe+09" */
2123         case B_an:
2124                 setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1]));
2125                 break;
2126
2127         case B_co:
2128                 setvar_i(res, ~getvar_i_int(av[0]));
2129                 break;
2130
2131         case B_ls:
2132                 setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1]));
2133                 break;
2134
2135         case B_or:
2136                 setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1]));
2137                 break;
2138
2139         case B_rs:
2140                 setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1]));
2141                 break;
2142
2143         case B_xo:
2144                 setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1]));
2145                 break;
2146
2147         case B_lo:
2148         case B_up:
2149                 s1 = s = xstrdup(as[0]);
2150                 while (*s1) {
2151                         //*s1 = (info == B_up) ? toupper(*s1) : tolower(*s1);
2152                         if ((unsigned char)((*s1 | 0x20) - 'a') <= ('z' - 'a'))
2153                                 *s1 = (info == B_up) ? (*s1 & 0xdf) : (*s1 | 0x20);
2154                         s1++;
2155                 }
2156                 setvar_p(res, s);
2157                 break;
2158
2159         case B_ix:
2160                 n = 0;
2161                 ll = strlen(as[1]);
2162                 l = strlen(as[0]) - ll;
2163                 if (ll > 0 && l >= 0) {
2164                         if (!icase) {
2165                                 s = strstr(as[0], as[1]);
2166                                 if (s) n = (s - as[0]) + 1;
2167                         } else {
2168                                 /* this piece of code is terribly slow and
2169                                  * really should be rewritten
2170                                  */
2171                                 for (i=0; i<=l; i++) {
2172                                         if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2173                                                 n = i+1;
2174                                                 break;
2175                                         }
2176                                 }
2177                         }
2178                 }
2179                 setvar_i(res, n);
2180                 break;
2181
2182         case B_ti:
2183                 if (nargs > 1)
2184                         tt = getvar_i(av[1]);
2185                 else
2186                         time(&tt);
2187                 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2188                 i = strftime(g_buf, MAXVARFMT,
2189                         ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2190                         localtime(&tt));
2191                 g_buf[i] = '\0';
2192                 setvar_s(res, g_buf);
2193                 break;
2194
2195         case B_mt:
2196                 setvar_i(res, do_mktime(as[0]));
2197                 break;
2198
2199         case B_ma:
2200                 re = as_regex(an[1], &sreg);
2201                 n = regexec(re, as[0], 1, pmatch, 0);
2202                 if (n == 0) {
2203                         pmatch[0].rm_so++;
2204                         pmatch[0].rm_eo++;
2205                 } else {
2206                         pmatch[0].rm_so = 0;
2207                         pmatch[0].rm_eo = -1;
2208                 }
2209                 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2210                 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2211                 setvar_i(res, pmatch[0].rm_so);
2212                 if (re == &sreg) regfree(re);
2213                 break;
2214
2215         case B_ge:
2216                 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2217                 break;
2218
2219         case B_gs:
2220                 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2221                 break;
2222
2223         case B_su:
2224                 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2225                 break;
2226         }
2227
2228         nvfree(tv);
2229         return res;
2230 #undef tspl
2231 }
2232
2233 /*
2234  * Evaluate node - the heart of the program. Supplied with subtree
2235  * and place where to store result. returns ptr to result.
2236  */
2237 #define XC(n) ((n) >> 8)
2238
2239 static var *evaluate(node *op, var *res)
2240 {
2241 /* This procedure is recursive so we should count every byte */
2242 #define fnargs (G.evaluate__fnargs)
2243 /* seed is initialized to 1 */
2244 #define seed   (G.evaluate__seed)
2245 #define sreg   (G.evaluate__sreg)
2246
2247         node *op1;
2248         var *v1;
2249         union {
2250                 var *v;
2251                 const char *s;
2252                 double d;
2253                 int i;
2254         } L, R;
2255         uint32_t opinfo;
2256         int opn;
2257         union {
2258                 char *s;
2259                 rstream *rsm;
2260                 FILE *F;
2261                 var *v;
2262                 regex_t *re;
2263                 uint32_t info;
2264         } X;
2265
2266         if (!op)
2267                 return setvar_s(res, NULL);
2268
2269         v1 = nvalloc(2);
2270
2271         while (op) {
2272                 opinfo = op->info;
2273                 opn = (opinfo & OPNMASK);
2274                 g_lineno = op->lineno;
2275
2276                 /* execute inevitable things */
2277                 op1 = op->l.n;
2278                 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2279                 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2280                 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2281                 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2282                 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2283
2284                 switch (XC(opinfo & OPCLSMASK)) {
2285
2286                 /* -- iterative node type -- */
2287
2288                 /* test pattern */
2289                 case XC( OC_TEST ):
2290                         if ((op1->info & OPCLSMASK) == OC_COMMA) {
2291                                 /* it's range pattern */
2292                                 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2293                                         op->info |= OF_CHECKED;
2294                                         if (ptest(op1->r.n))
2295                                                 op->info &= ~OF_CHECKED;
2296
2297                                         op = op->a.n;
2298                                 } else {
2299                                         op = op->r.n;
2300                                 }
2301                         } else {
2302                                 op = (ptest(op1)) ? op->a.n : op->r.n;
2303                         }
2304                         break;
2305
2306                 /* just evaluate an expression, also used as unconditional jump */
2307                 case XC( OC_EXEC ):
2308                         break;
2309
2310                 /* branch, used in if-else and various loops */
2311                 case XC( OC_BR ):
2312                         op = istrue(L.v) ? op->a.n : op->r.n;
2313                         break;
2314
2315                 /* initialize for-in loop */
2316                 case XC( OC_WALKINIT ):
2317                         hashwalk_init(L.v, iamarray(R.v));
2318                         break;
2319
2320                 /* get next array item */
2321                 case XC( OC_WALKNEXT ):
2322                         op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2323                         break;
2324
2325                 case XC( OC_PRINT ):
2326                 case XC( OC_PRINTF ):
2327                         X.F = stdout;
2328                         if (op->r.n) {
2329                                 X.rsm = newfile(R.s);
2330                                 if (!X.rsm->F) {
2331                                         if (opn == '|') {
2332                                                 X.rsm->F = popen(R.s, "w");
2333                                                 if (X.rsm->F == NULL)
2334                                                         bb_perror_msg_and_die("popen");
2335                                                 X.rsm->is_pipe = 1;
2336                                         } else {
2337                                                 X.rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2338                                         }
2339                                 }
2340                                 X.F = X.rsm->F;
2341                         }
2342
2343                         if ((opinfo & OPCLSMASK) == OC_PRINT) {
2344                                 if (!op1) {
2345                                         fputs(getvar_s(intvar[F0]), X.F);
2346                                 } else {
2347                                         while (op1) {
2348                                                 L.v = evaluate(nextarg(&op1), v1);
2349                                                 if (L.v->type & VF_NUMBER) {
2350                                                         fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2351                                                                         getvar_i(L.v), TRUE);
2352                                                         fputs(g_buf, X.F);
2353                                                 } else {
2354                                                         fputs(getvar_s(L.v), X.F);
2355                                                 }
2356
2357                                                 if (op1) fputs(getvar_s(intvar[OFS]), X.F);
2358                                         }
2359                                 }
2360                                 fputs(getvar_s(intvar[ORS]), X.F);
2361
2362                         } else {        /* OC_PRINTF */
2363                                 L.s = awk_printf(op1);
2364                                 fputs(L.s, X.F);
2365                                 free((char*)L.s);
2366                         }
2367                         fflush(X.F);
2368                         break;
2369
2370                 case XC( OC_DELETE ):
2371                         X.info = op1->info & OPCLSMASK;
2372                         if (X.info == OC_VAR) {
2373                                 R.v = op1->l.v;
2374                         } else if (X.info == OC_FNARG) {
2375                                 R.v = &fnargs[op1->l.i];
2376                         } else {
2377                                 syntax_error(EMSG_NOT_ARRAY);
2378                         }
2379
2380                         if (op1->r.n) {
2381                                 clrvar(L.v);
2382                                 L.s = getvar_s(evaluate(op1->r.n, v1));
2383                                 hash_remove(iamarray(R.v), L.s);
2384                         } else {
2385                                 clear_array(iamarray(R.v));
2386                         }
2387                         break;
2388
2389                 case XC( OC_NEWSOURCE ):
2390                         g_progname = op->l.s;
2391                         break;
2392
2393                 case XC( OC_RETURN ):
2394                         copyvar(res, L.v);
2395                         break;
2396
2397                 case XC( OC_NEXTFILE ):
2398                         nextfile = TRUE;
2399                 case XC( OC_NEXT ):
2400                         nextrec = TRUE;
2401                 case XC( OC_DONE ):
2402                         clrvar(res);
2403                         break;
2404
2405                 case XC( OC_EXIT ):
2406                         awk_exit(L.d);
2407
2408                 /* -- recursive node type -- */
2409
2410                 case XC( OC_VAR ):
2411                         L.v = op->l.v;
2412                         if (L.v == intvar[NF])
2413                                 split_f0();
2414                         goto v_cont;
2415
2416                 case XC( OC_FNARG ):
2417                         L.v = &fnargs[op->l.i];
2418  v_cont:
2419                         res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2420                         break;
2421
2422                 case XC( OC_IN ):
2423                         setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2424                         break;
2425
2426                 case XC( OC_REGEXP ):
2427                         op1 = op;
2428                         L.s = getvar_s(intvar[F0]);
2429                         goto re_cont;
2430
2431                 case XC( OC_MATCH ):
2432                         op1 = op->r.n;
2433  re_cont:
2434                         X.re = as_regex(op1, &sreg);
2435                         R.i = regexec(X.re, L.s, 0, NULL, 0);
2436                         if (X.re == &sreg) regfree(X.re);
2437                         setvar_i(res, (R.i == 0) ^ (opn == '!'));
2438                         break;
2439
2440                 case XC( OC_MOVE ):
2441                         /* if source is a temporary string, jusk relink it to dest */
2442 //Disabled: if R.v is numeric but happens to have cached R.v->string,
2443 //then L.v ends up being a string, which is wrong
2444 //                      if (R.v == v1+1 && R.v->string) {
2445 //                              res = setvar_p(L.v, R.v->string);
2446 //                              R.v->string = NULL;
2447 //                      } else {
2448                                 res = copyvar(L.v, R.v);
2449 //                      }
2450                         break;
2451
2452                 case XC( OC_TERNARY ):
2453                         if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2454                                 syntax_error(EMSG_POSSIBLE_ERROR);
2455                         res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2456                         break;
2457
2458                 case XC( OC_FUNC ):
2459                         if (!op->r.f->body.first)
2460                                 syntax_error(EMSG_UNDEF_FUNC);
2461
2462                         X.v = R.v = nvalloc(op->r.f->nargs + 1);
2463                         while (op1) {
2464                                 L.v = evaluate(nextarg(&op1), v1);
2465                                 copyvar(R.v, L.v);
2466                                 R.v->type |= VF_CHILD;
2467                                 R.v->x.parent = L.v;
2468                                 if (++R.v - X.v >= op->r.f->nargs)
2469                                         break;
2470                         }
2471
2472                         R.v = fnargs;
2473                         fnargs = X.v;
2474
2475                         L.s = g_progname;
2476                         res = evaluate(op->r.f->body.first, res);
2477                         g_progname = L.s;
2478
2479                         nvfree(fnargs);
2480                         fnargs = R.v;
2481                         break;
2482
2483                 case XC( OC_GETLINE ):
2484                 case XC( OC_PGETLINE ):
2485                         if (op1) {
2486                                 X.rsm = newfile(L.s);
2487                                 if (!X.rsm->F) {
2488                                         if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2489                                                 X.rsm->F = popen(L.s, "r");
2490                                                 X.rsm->is_pipe = TRUE;
2491                                         } else {
2492                                                 X.rsm->F = fopen_for_read(L.s);         /* not xfopen! */
2493                                         }
2494                                 }
2495                         } else {
2496                                 if (!iF) iF = next_input_file();
2497                                 X.rsm = iF;
2498                         }
2499
2500                         if (!X.rsm->F) {
2501                                 setvar_i(intvar[ERRNO], errno);
2502                                 setvar_i(res, -1);
2503                                 break;
2504                         }
2505
2506                         if (!op->r.n)
2507                                 R.v = intvar[F0];
2508
2509                         L.i = awk_getline(X.rsm, R.v);
2510                         if (L.i > 0) {
2511                                 if (!op1) {
2512                                         incvar(intvar[FNR]);
2513                                         incvar(intvar[NR]);
2514                                 }
2515                         }
2516                         setvar_i(res, L.i);
2517                         break;
2518
2519                 /* simple builtins */
2520                 case XC( OC_FBLTIN ):
2521                         switch (opn) {
2522
2523                         case F_in:
2524                                 R.d = (int)L.d;
2525                                 break;
2526
2527                         case F_rn:
2528                                 R.d = (double)rand() / (double)RAND_MAX;
2529                                 break;
2530 #if ENABLE_FEATURE_AWK_LIBM
2531                         case F_co:
2532                                 R.d = cos(L.d);
2533                                 break;
2534
2535                         case F_ex:
2536                                 R.d = exp(L.d);
2537                                 break;
2538
2539                         case F_lg:
2540                                 R.d = log(L.d);
2541                                 break;
2542
2543                         case F_si:
2544                                 R.d = sin(L.d);
2545                                 break;
2546
2547                         case F_sq:
2548                                 R.d = sqrt(L.d);
2549                                 break;
2550 #else
2551                         case F_co:
2552                         case F_ex:
2553                         case F_lg:
2554                         case F_si:
2555                         case F_sq:
2556                                 syntax_error(EMSG_NO_MATH);
2557                                 break;
2558 #endif
2559                         case F_sr:
2560                                 R.d = (double)seed;
2561                                 seed = op1 ? (unsigned)L.d : (unsigned)time(NULL);
2562                                 srand(seed);
2563                                 break;
2564
2565                         case F_ti:
2566                                 R.d = time(NULL);
2567                                 break;
2568
2569                         case F_le:
2570                                 if (!op1)
2571                                         L.s = getvar_s(intvar[F0]);
2572                                 R.d = strlen(L.s);
2573                                 break;
2574
2575                         case F_sy:
2576                                 fflush_all();
2577                                 R.d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2578                                                 ? (system(L.s) >> 8) : 0;
2579                                 break;
2580
2581                         case F_ff:
2582                                 if (!op1)
2583                                         fflush(stdout);
2584                                 else {
2585                                         if (L.s && *L.s) {
2586                                                 X.rsm = newfile(L.s);
2587                                                 fflush(X.rsm->F);
2588                                         } else {
2589                                                 fflush_all();
2590                                         }
2591                                 }
2592                                 break;
2593
2594                         case F_cl:
2595                                 X.rsm = (rstream *)hash_search(fdhash, L.s);
2596                                 if (X.rsm) {
2597                                         R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
2598                                         free(X.rsm->buffer);
2599                                         hash_remove(fdhash, L.s);
2600                                 }
2601                                 if (R.i != 0)
2602                                         setvar_i(intvar[ERRNO], errno);
2603                                 R.d = (double)R.i;
2604                                 break;
2605                         }
2606                         setvar_i(res, R.d);
2607                         break;
2608
2609                 case XC( OC_BUILTIN ):
2610                         res = exec_builtin(op, res);
2611                         break;
2612
2613                 case XC( OC_SPRINTF ):
2614                         setvar_p(res, awk_printf(op1));
2615                         break;
2616
2617                 case XC( OC_UNARY ):
2618                         X.v = R.v;
2619                         L.d = R.d = getvar_i(R.v);
2620                         switch (opn) {
2621                         case 'P':
2622                                 L.d = ++R.d;
2623                                 goto r_op_change;
2624                         case 'p':
2625                                 R.d++;
2626                                 goto r_op_change;
2627                         case 'M':
2628                                 L.d = --R.d;
2629                                 goto r_op_change;
2630                         case 'm':
2631                                 R.d--;
2632                                 goto r_op_change;
2633                         case '!':
2634                                 L.d = !istrue(X.v);
2635                                 break;
2636                         case '-':
2637                                 L.d = -R.d;
2638                                 break;
2639  r_op_change:
2640                                 setvar_i(X.v, R.d);
2641                         }
2642                         setvar_i(res, L.d);
2643                         break;
2644
2645                 case XC( OC_FIELD ):
2646                         R.i = (int)getvar_i(R.v);
2647                         if (R.i == 0) {
2648                                 res = intvar[F0];
2649                         } else {
2650                                 split_f0();
2651                                 if (R.i > nfields)
2652                                         fsrealloc(R.i);
2653                                 res = &Fields[R.i - 1];
2654                         }
2655                         break;
2656
2657                 /* concatenation (" ") and index joining (",") */
2658                 case XC( OC_CONCAT ):
2659                 case XC( OC_COMMA ):
2660                         opn = strlen(L.s) + strlen(R.s) + 2;
2661                         X.s = xmalloc(opn);
2662                         strcpy(X.s, L.s);
2663                         if ((opinfo & OPCLSMASK) == OC_COMMA) {
2664                                 L.s = getvar_s(intvar[SUBSEP]);
2665                                 X.s = xrealloc(X.s, opn + strlen(L.s));
2666                                 strcat(X.s, L.s);
2667                         }
2668                         strcat(X.s, R.s);
2669                         setvar_p(res, X.s);
2670                         break;
2671
2672                 case XC( OC_LAND ):
2673                         setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2674                         break;
2675
2676                 case XC( OC_LOR ):
2677                         setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2678                         break;
2679
2680                 case XC( OC_BINARY ):
2681                 case XC( OC_REPLACE ):
2682                         R.d = getvar_i(R.v);
2683                         switch (opn) {
2684                         case '+':
2685                                 L.d += R.d;
2686                                 break;
2687                         case '-':
2688                                 L.d -= R.d;
2689                                 break;
2690                         case '*':
2691                                 L.d *= R.d;
2692                                 break;
2693                         case '/':
2694                                 if (R.d == 0)
2695                                         syntax_error(EMSG_DIV_BY_ZERO);
2696                                 L.d /= R.d;
2697                                 break;
2698                         case '&':
2699 #if ENABLE_FEATURE_AWK_LIBM
2700                                 L.d = pow(L.d, R.d);
2701 #else
2702                                 syntax_error(EMSG_NO_MATH);
2703 #endif
2704                                 break;
2705                         case '%':
2706                                 if (R.d == 0)
2707                                         syntax_error(EMSG_DIV_BY_ZERO);
2708                                 L.d -= (int)(L.d / R.d) * R.d;
2709                                 break;
2710                         }
2711                         res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2712                         break;
2713
2714                 case XC( OC_COMPARE ):
2715                         if (is_numeric(L.v) && is_numeric(R.v)) {
2716                                 L.d = getvar_i(L.v) - getvar_i(R.v);
2717                         } else {
2718                                 L.s = getvar_s(L.v);
2719                                 R.s = getvar_s(R.v);
2720                                 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2721                         }
2722                         switch (opn & 0xfe) {
2723                         case 0:
2724                                 R.i = (L.d > 0);
2725                                 break;
2726                         case 2:
2727                                 R.i = (L.d >= 0);
2728                                 break;
2729                         case 4:
2730                                 R.i = (L.d == 0);
2731                                 break;
2732                         }
2733                         setvar_i(res, (opn & 1 ? R.i : !R.i) ? 1 : 0);
2734                         break;
2735
2736                 default:
2737                         syntax_error(EMSG_POSSIBLE_ERROR);
2738                 }
2739                 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2740                         op = op->a.n;
2741                 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2742                         break;
2743                 if (nextrec)
2744                         break;
2745         }
2746         nvfree(v1);
2747         return res;
2748 #undef fnargs
2749 #undef seed
2750 #undef sreg
2751 }
2752
2753
2754 /* -------- main & co. -------- */
2755
2756 static int awk_exit(int r)
2757 {
2758         var tv;
2759         unsigned i;
2760         hash_item *hi;
2761
2762         zero_out_var(&tv);
2763
2764         if (!exiting) {
2765                 exiting = TRUE;
2766                 nextrec = FALSE;
2767                 evaluate(endseq.first, &tv);
2768         }
2769
2770         /* waiting for children */
2771         for (i = 0; i < fdhash->csize; i++) {
2772                 hi = fdhash->items[i];
2773                 while (hi) {
2774                         if (hi->data.rs.F && hi->data.rs.is_pipe)
2775                                 pclose(hi->data.rs.F);
2776                         hi = hi->next;
2777                 }
2778         }
2779
2780         exit(r);
2781 }
2782
2783 /* if expr looks like "var=value", perform assignment and return 1,
2784  * otherwise return 0 */
2785 static int is_assignment(const char *expr)
2786 {
2787         char *exprc, *s, *s0, *s1;
2788
2789         exprc = xstrdup(expr);
2790         if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2791                 free(exprc);
2792                 return FALSE;
2793         }
2794
2795         *(s++) = '\0';
2796         s0 = s1 = s;
2797         while (*s)
2798                 *(s1++) = nextchar(&s);
2799
2800         *s1 = '\0';
2801         setvar_u(newvar(exprc), s0);
2802         free(exprc);
2803         return TRUE;
2804 }
2805
2806 /* switch to next input file */
2807 static rstream *next_input_file(void)
2808 {
2809 #define rsm          (G.next_input_file__rsm)
2810 #define files_happen (G.next_input_file__files_happen)
2811
2812         FILE *F = NULL;
2813         const char *fname, *ind;
2814
2815         if (rsm.F)
2816                 fclose(rsm.F);
2817         rsm.F = NULL;
2818         rsm.pos = rsm.adv = 0;
2819
2820         do {
2821                 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
2822                         if (files_happen)
2823                                 return NULL;
2824                         fname = "-";
2825                         F = stdin;
2826                 } else {
2827                         ind = getvar_s(incvar(intvar[ARGIND]));
2828                         fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
2829                         if (fname && *fname && !is_assignment(fname))
2830                                 F = xfopen_stdin(fname);
2831                 }
2832         } while (!F);
2833
2834         files_happen = TRUE;
2835         setvar_s(intvar[FILENAME], fname);
2836         rsm.F = F;
2837         return &rsm;
2838 #undef rsm
2839 #undef files_happen
2840 }
2841
2842 int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
2843 int awk_main(int argc, char **argv)
2844 {
2845         unsigned opt;
2846         char *opt_F, *opt_W;
2847         llist_t *list_v = NULL;
2848         llist_t *list_f = NULL;
2849         int i, j;
2850         var *v;
2851         var tv;
2852         char **envp;
2853         char *vnames = (char *)vNames; /* cheat */
2854         char *vvalues = (char *)vValues;
2855
2856         INIT_G();
2857
2858         /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
2859          * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
2860         if (ENABLE_LOCALE_SUPPORT)
2861                 setlocale(LC_NUMERIC, "C");
2862
2863         zero_out_var(&tv);
2864
2865         /* allocate global buffer */
2866         g_buf = xmalloc(MAXVARFMT + 1);
2867
2868         vhash = hash_init();
2869         ahash = hash_init();
2870         fdhash = hash_init();
2871         fnhash = hash_init();
2872
2873         /* initialize variables */
2874         for (i = 0; *vnames; i++) {
2875                 intvar[i] = v = newvar(nextword(&vnames));
2876                 if (*vvalues != '\377')
2877                         setvar_s(v, nextword(&vvalues));
2878                 else
2879                         setvar_i(v, 0);
2880
2881                 if (*vnames == '*') {
2882                         v->type |= VF_SPECIAL;
2883                         vnames++;
2884                 }
2885         }
2886
2887         handle_special(intvar[FS]);
2888         handle_special(intvar[RS]);
2889
2890         newfile("/dev/stdin")->F = stdin;
2891         newfile("/dev/stdout")->F = stdout;
2892         newfile("/dev/stderr")->F = stderr;
2893
2894         /* Huh, people report that sometimes environ is NULL. Oh well. */
2895         if (environ) for (envp = environ; *envp; envp++) {
2896                 /* environ is writable, thus we don't strdup it needlessly */
2897                 char *s = *envp;
2898                 char *s1 = strchr(s, '=');
2899                 if (s1) {
2900                         *s1 = '\0';
2901                         /* Both findvar and setvar_u take const char*
2902                          * as 2nd arg -> environment is not trashed */
2903                         setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
2904                         *s1 = '=';
2905                 }
2906         }
2907         opt_complementary = "v::f::"; /* -v and -f can occur multiple times */
2908         opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, &opt_W);
2909         argv += optind;
2910         argc -= optind;
2911         if (opt & 0x1)
2912                 setvar_s(intvar[FS], opt_F); // -F
2913         while (list_v) { /* -v */
2914                 if (!is_assignment(llist_pop(&list_v)))
2915                         bb_show_usage();
2916         }
2917         if (list_f) { /* -f */
2918                 do {
2919                         char *s = NULL;
2920                         FILE *from_file;
2921
2922                         g_progname = llist_pop(&list_f);
2923                         from_file = xfopen_stdin(g_progname);
2924                         /* one byte is reserved for some trick in next_token */
2925                         for (i = j = 1; j > 0; i += j) {
2926                                 s = xrealloc(s, i + 4096);
2927                                 j = fread(s + i, 1, 4094, from_file);
2928                         }
2929                         s[i] = '\0';
2930                         fclose(from_file);
2931                         parse_program(s + 1);
2932                         free(s);
2933                 } while (list_f);
2934                 argc++;
2935         } else { // no -f: take program from 1st parameter
2936                 if (!argc)
2937                         bb_show_usage();
2938                 g_progname = "cmd. line";
2939                 parse_program(*argv++);
2940         }
2941         if (opt & 0x8) // -W
2942                 bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
2943
2944         /* fill in ARGV array */
2945         setvar_i(intvar[ARGC], argc);
2946         setari_u(intvar[ARGV], 0, "awk");
2947         i = 0;
2948         while (*argv)
2949                 setari_u(intvar[ARGV], ++i, *argv++);
2950
2951         evaluate(beginseq.first, &tv);
2952         if (!mainseq.first && !endseq.first)
2953                 awk_exit(EXIT_SUCCESS);
2954
2955         /* input file could already be opened in BEGIN block */
2956         if (!iF)
2957                 iF = next_input_file();
2958
2959         /* passing through input files */
2960         while (iF) {
2961                 nextfile = FALSE;
2962                 setvar_i(intvar[FNR], 0);
2963
2964                 while ((i = awk_getline(iF, intvar[F0])) > 0) {
2965                         nextrec = FALSE;
2966                         incvar(intvar[NR]);
2967                         incvar(intvar[FNR]);
2968                         evaluate(mainseq.first, &tv);
2969
2970                         if (nextfile)
2971                                 break;
2972                 }
2973
2974                 if (i < 0)
2975                         syntax_error(strerror(errno));
2976
2977                 iF = next_input_file();
2978         }
2979
2980         awk_exit(EXIT_SUCCESS);
2981         /*return 0;*/
2982 }