awk: bitwise ops cast oprands and results to unsigned long,
[oweals/busybox.git] / editors / awk.c
1 /* vi: set sw=4 ts=4: */
2 /*
3  * awk implementation for busybox
4  *
5  * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
6  *
7  * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
8  */
9
10 #include "libbb.h"
11 #include "xregex.h"
12 #include <math.h>
13
14 /* This is a NOEXEC applet. Be very careful! */
15
16
17 #define MAXVARFMT       240
18 #define MINNVBLOCK      64
19
20 /* variable flags */
21 #define VF_NUMBER       0x0001  /* 1 = primary type is number */
22 #define VF_ARRAY        0x0002  /* 1 = it's an array */
23
24 #define VF_CACHED       0x0100  /* 1 = num/str value has cached str/num eq */
25 #define VF_USER         0x0200  /* 1 = user input (may be numeric string) */
26 #define VF_SPECIAL      0x0400  /* 1 = requires extra handling when changed */
27 #define VF_WALK         0x0800  /* 1 = variable has alloc'd x.walker list */
28 #define VF_FSTR         0x1000  /* 1 = var::string points to fstring buffer */
29 #define VF_CHILD        0x2000  /* 1 = function arg; x.parent points to source */
30 #define VF_DIRTY        0x4000  /* 1 = variable was set explicitly */
31
32 /* these flags are static, don't change them when value is changed */
33 #define VF_DONTTOUCH    (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
34
35 /* Variable */
36 typedef struct var_s {
37         unsigned type;            /* flags */
38         double number;
39         char *string;
40         union {
41                 int aidx;               /* func arg idx (for compilation stage) */
42                 struct xhash_s *array;  /* array ptr */
43                 struct var_s *parent;   /* for func args, ptr to actual parameter */
44                 char **walker;          /* list of array elements (for..in) */
45         } x;
46 } var;
47
48 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
49 typedef struct chain_s {
50         struct node_s *first;
51         struct node_s *last;
52         const char *programname;
53 } chain;
54
55 /* Function */
56 typedef struct func_s {
57         unsigned nargs;
58         struct chain_s body;
59 } func;
60
61 /* I/O stream */
62 typedef struct rstream_s {
63         FILE *F;
64         char *buffer;
65         int adv;
66         int size;
67         int pos;
68         smallint is_pipe;
69 } rstream;
70
71 typedef struct hash_item_s {
72         union {
73                 struct var_s v;         /* variable/array hash */
74                 struct rstream_s rs;    /* redirect streams hash */
75                 struct func_s f;        /* functions hash */
76         } data;
77         struct hash_item_s *next;       /* next in chain */
78         char name[1];                   /* really it's longer */
79 } hash_item;
80
81 typedef struct xhash_s {
82         unsigned nel;           /* num of elements */
83         unsigned csize;         /* current hash size */
84         unsigned nprime;        /* next hash size in PRIMES[] */
85         unsigned glen;          /* summary length of item names */
86         struct hash_item_s **items;
87 } xhash;
88
89 /* Tree node */
90 typedef struct node_s {
91         uint32_t info;
92         unsigned lineno;
93         union {
94                 struct node_s *n;
95                 var *v;
96                 int i;
97                 char *s;
98                 regex_t *re;
99         } l;
100         union {
101                 struct node_s *n;
102                 regex_t *ire;
103                 func *f;
104                 int argno;
105         } r;
106         union {
107                 struct node_s *n;
108         } a;
109 } node;
110
111 /* Block of temporary variables */
112 typedef struct nvblock_s {
113         int size;
114         var *pos;
115         struct nvblock_s *prev;
116         struct nvblock_s *next;
117         var nv[0];
118 } nvblock;
119
120 typedef struct tsplitter_s {
121         node n;
122         regex_t re[2];
123 } tsplitter;
124
125 /* simple token classes */
126 /* Order and hex values are very important!!!  See next_token() */
127 #define TC_SEQSTART      1                              /* ( */
128 #define TC_SEQTERM      (1 << 1)                /* ) */
129 #define TC_REGEXP       (1 << 2)                /* /.../ */
130 #define TC_OUTRDR       (1 << 3)                /* | > >> */
131 #define TC_UOPPOST      (1 << 4)                /* unary postfix operator */
132 #define TC_UOPPRE1      (1 << 5)                /* unary prefix operator */
133 #define TC_BINOPX       (1 << 6)                /* two-opnd operator */
134 #define TC_IN           (1 << 7)
135 #define TC_COMMA        (1 << 8)
136 #define TC_PIPE         (1 << 9)                /* input redirection pipe */
137 #define TC_UOPPRE2      (1 << 10)               /* unary prefix operator */
138 #define TC_ARRTERM      (1 << 11)               /* ] */
139 #define TC_GRPSTART     (1 << 12)               /* { */
140 #define TC_GRPTERM      (1 << 13)               /* } */
141 #define TC_SEMICOL      (1 << 14)
142 #define TC_NEWLINE      (1 << 15)
143 #define TC_STATX        (1 << 16)               /* ctl statement (for, next...) */
144 #define TC_WHILE        (1 << 17)
145 #define TC_ELSE         (1 << 18)
146 #define TC_BUILTIN      (1 << 19)
147 #define TC_GETLINE      (1 << 20)
148 #define TC_FUNCDECL     (1 << 21)               /* `function' `func' */
149 #define TC_BEGIN        (1 << 22)
150 #define TC_END          (1 << 23)
151 #define TC_EOF          (1 << 24)
152 #define TC_VARIABLE     (1 << 25)
153 #define TC_ARRAY        (1 << 26)
154 #define TC_FUNCTION     (1 << 27)
155 #define TC_STRING       (1 << 28)
156 #define TC_NUMBER       (1 << 29)
157
158 #define TC_UOPPRE  (TC_UOPPRE1 | TC_UOPPRE2)
159
160 /* combined token classes */
161 #define TC_BINOP   (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
162 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
163 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
164                    | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
165
166 #define TC_STATEMNT (TC_STATX | TC_WHILE)
167 #define TC_OPTERM  (TC_SEMICOL | TC_NEWLINE)
168
169 /* word tokens, cannot mean something else if not expected */
170 #define TC_WORD    (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
171                    | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
172
173 /* discard newlines after these */
174 #define TC_NOTERM  (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
175                    | TC_BINOP | TC_OPTERM)
176
177 /* what can expression begin with */
178 #define TC_OPSEQ   (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
179 /* what can group begin with */
180 #define TC_GRPSEQ  (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
181
182 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
183 /* operator is inserted between them */
184 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
185                    | TC_STRING | TC_NUMBER | TC_UOPPOST)
186 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
187
188 #define OF_RES1    0x010000
189 #define OF_RES2    0x020000
190 #define OF_STR1    0x040000
191 #define OF_STR2    0x080000
192 #define OF_NUM1    0x100000
193 #define OF_CHECKED 0x200000
194
195 /* combined operator flags */
196 #define xx      0
197 #define xV      OF_RES2
198 #define xS      (OF_RES2 | OF_STR2)
199 #define Vx      OF_RES1
200 #define VV      (OF_RES1 | OF_RES2)
201 #define Nx      (OF_RES1 | OF_NUM1)
202 #define NV      (OF_RES1 | OF_NUM1 | OF_RES2)
203 #define Sx      (OF_RES1 | OF_STR1)
204 #define SV      (OF_RES1 | OF_STR1 | OF_RES2)
205 #define SS      (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
206
207 #define OPCLSMASK 0xFF00
208 #define OPNMASK   0x007F
209
210 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
211  * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
212  * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
213  */
214 #define P(x)      (x << 24)
215 #define PRIMASK   0x7F000000
216 #define PRIMASK2  0x7E000000
217
218 /* Operation classes */
219
220 #define SHIFT_TIL_THIS  0x0600
221 #define RECUR_FROM_THIS 0x1000
222
223 enum {
224         OC_DELETE = 0x0100,     OC_EXEC = 0x0200,       OC_NEWSOURCE = 0x0300,
225         OC_PRINT = 0x0400,      OC_PRINTF = 0x0500,     OC_WALKINIT = 0x0600,
226
227         OC_BR = 0x0700,         OC_BREAK = 0x0800,      OC_CONTINUE = 0x0900,
228         OC_EXIT = 0x0a00,       OC_NEXT = 0x0b00,       OC_NEXTFILE = 0x0c00,
229         OC_TEST = 0x0d00,       OC_WALKNEXT = 0x0e00,
230
231         OC_BINARY = 0x1000,     OC_BUILTIN = 0x1100,    OC_COLON = 0x1200,
232         OC_COMMA = 0x1300,      OC_COMPARE = 0x1400,    OC_CONCAT = 0x1500,
233         OC_FBLTIN = 0x1600,     OC_FIELD = 0x1700,      OC_FNARG = 0x1800,
234         OC_FUNC = 0x1900,       OC_GETLINE = 0x1a00,    OC_IN = 0x1b00,
235         OC_LAND = 0x1c00,       OC_LOR = 0x1d00,        OC_MATCH = 0x1e00,
236         OC_MOVE = 0x1f00,       OC_PGETLINE = 0x2000,   OC_REGEXP = 0x2100,
237         OC_REPLACE = 0x2200,    OC_RETURN = 0x2300,     OC_SPRINTF = 0x2400,
238         OC_TERNARY = 0x2500,    OC_UNARY = 0x2600,      OC_VAR = 0x2700,
239         OC_DONE = 0x2800,
240
241         ST_IF = 0x3000,         ST_DO = 0x3100,         ST_FOR = 0x3200,
242         ST_WHILE = 0x3300
243 };
244
245 /* simple builtins */
246 enum {
247         F_in,   F_rn,   F_co,   F_ex,   F_lg,   F_si,   F_sq,   F_sr,
248         F_ti,   F_le,   F_sy,   F_ff,   F_cl
249 };
250
251 /* builtins */
252 enum {
253         B_a2,   B_ix,   B_ma,   B_sp,   B_ss,   B_ti,   B_lo,   B_up,
254         B_ge,   B_gs,   B_su,
255         B_an,   B_co,   B_ls,   B_or,   B_rs,   B_xo,
256 };
257
258 /* tokens and their corresponding info values */
259
260 #define NTC     "\377"  /* switch to next token class (tc<<1) */
261 #define NTCC    '\377'
262
263 #define OC_B    OC_BUILTIN
264
265 static const char tokenlist[] ALIGN1 =
266         "\1("       NTC
267         "\1)"       NTC
268         "\1/"       NTC                                 /* REGEXP */
269         "\2>>"      "\1>"       "\1|"       NTC         /* OUTRDR */
270         "\2++"      "\2--"      NTC                     /* UOPPOST */
271         "\2++"      "\2--"      "\1$"       NTC         /* UOPPRE1 */
272         "\2=="      "\1="       "\2+="      "\2-="      /* BINOPX */
273         "\2*="      "\2/="      "\2%="      "\2^="
274         "\1+"       "\1-"       "\3**="     "\2**"
275         "\1/"       "\1%"       "\1^"       "\1*"
276         "\2!="      "\2>="      "\2<="      "\1>"
277         "\1<"       "\2!~"      "\1~"       "\2&&"
278         "\2||"      "\1?"       "\1:"       NTC
279         "\2in"      NTC
280         "\1,"       NTC
281         "\1|"       NTC
282         "\1+"       "\1-"       "\1!"       NTC         /* UOPPRE2 */
283         "\1]"       NTC
284         "\1{"       NTC
285         "\1}"       NTC
286         "\1;"       NTC
287         "\1\n"      NTC
288         "\2if"      "\2do"      "\3for"     "\5break"   /* STATX */
289         "\10continue"           "\6delete"  "\5print"
290         "\6printf"  "\4next"    "\10nextfile"
291         "\6return"  "\4exit"    NTC
292         "\5while"   NTC
293         "\4else"    NTC
294
295         "\3and"     "\5compl"   "\6lshift"  "\2or"
296         "\6rshift"  "\3xor"
297         "\5close"   "\6system"  "\6fflush"  "\5atan2"   /* BUILTIN */
298         "\3cos"     "\3exp"     "\3int"     "\3log"
299         "\4rand"    "\3sin"     "\4sqrt"    "\5srand"
300         "\6gensub"  "\4gsub"    "\5index"   "\6length"
301         "\5match"   "\5split"   "\7sprintf" "\3sub"
302         "\6substr"  "\7systime" "\10strftime"
303         "\7tolower" "\7toupper" NTC
304         "\7getline" NTC
305         "\4func"    "\10function"   NTC
306         "\5BEGIN"   NTC
307         "\3END"     "\0"
308         ;
309
310 static const uint32_t tokeninfo[] = {
311         0,
312         0,
313         OC_REGEXP,
314         xS|'a',     xS|'w',     xS|'|',
315         OC_UNARY|xV|P(9)|'p',       OC_UNARY|xV|P(9)|'m',
316         OC_UNARY|xV|P(9)|'P',       OC_UNARY|xV|P(9)|'M',
317             OC_FIELD|xV|P(5),
318         OC_COMPARE|VV|P(39)|5,      OC_MOVE|VV|P(74),
319             OC_REPLACE|NV|P(74)|'+',    OC_REPLACE|NV|P(74)|'-',
320         OC_REPLACE|NV|P(74)|'*',    OC_REPLACE|NV|P(74)|'/',
321             OC_REPLACE|NV|P(74)|'%',    OC_REPLACE|NV|P(74)|'&',
322         OC_BINARY|NV|P(29)|'+',     OC_BINARY|NV|P(29)|'-',
323             OC_REPLACE|NV|P(74)|'&',    OC_BINARY|NV|P(15)|'&',
324         OC_BINARY|NV|P(25)|'/',     OC_BINARY|NV|P(25)|'%',
325             OC_BINARY|NV|P(15)|'&',     OC_BINARY|NV|P(25)|'*',
326         OC_COMPARE|VV|P(39)|4,      OC_COMPARE|VV|P(39)|3,
327             OC_COMPARE|VV|P(39)|0,      OC_COMPARE|VV|P(39)|1,
328         OC_COMPARE|VV|P(39)|2,      OC_MATCH|Sx|P(45)|'!',
329             OC_MATCH|Sx|P(45)|'~',      OC_LAND|Vx|P(55),
330         OC_LOR|Vx|P(59),            OC_TERNARY|Vx|P(64)|'?',
331             OC_COLON|xx|P(67)|':',
332         OC_IN|SV|P(49),
333         OC_COMMA|SS|P(80),
334         OC_PGETLINE|SV|P(37),
335         OC_UNARY|xV|P(19)|'+',      OC_UNARY|xV|P(19)|'-',
336             OC_UNARY|xV|P(19)|'!',
337         0,
338         0,
339         0,
340         0,
341         0,
342         ST_IF,          ST_DO,          ST_FOR,         OC_BREAK,
343         OC_CONTINUE,                    OC_DELETE|Vx,   OC_PRINT,
344         OC_PRINTF,      OC_NEXT,        OC_NEXTFILE,
345         OC_RETURN|Vx,   OC_EXIT|Nx,
346         ST_WHILE,
347         0,
348
349         OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
350         OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
351         OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
352         OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
353         OC_FBLTIN|F_rn,    OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
354         OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
355         OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF,        OC_B|B_su|P(0xb6),
356         OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti,    OC_B|B_ti|P(0x0b),
357         OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
358         OC_GETLINE|SV|P(0),
359         0,      0,
360         0,
361         0
362 };
363
364 /* internal variable names and their initial values       */
365 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
366 enum {
367         CONVFMT,    OFMT,       FS,         OFS,
368         ORS,        RS,         RT,         FILENAME,
369         SUBSEP,     ARGIND,     ARGC,       ARGV,
370         ERRNO,      FNR,
371         NR,         NF,         IGNORECASE,
372         ENVIRON,    F0,         NUM_INTERNAL_VARS
373 };
374
375 static const char vNames[] ALIGN1 =
376         "CONVFMT\0" "OFMT\0"    "FS\0*"     "OFS\0"
377         "ORS\0"     "RS\0*"     "RT\0"      "FILENAME\0"
378         "SUBSEP\0"  "ARGIND\0"  "ARGC\0"    "ARGV\0"
379         "ERRNO\0"   "FNR\0"
380         "NR\0"      "NF\0*"     "IGNORECASE\0*"
381         "ENVIRON\0" "$\0*"      "\0";
382
383 static const char vValues[] ALIGN1 =
384         "%.6g\0"    "%.6g\0"    " \0"       " \0"
385         "\n\0"      "\n\0"      "\0"        "\0"
386         "\034\0"
387         "\377";
388
389 /* hash size may grow to these values */
390 #define FIRST_PRIME 61
391 static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
392
393
394 /* Globals. Split in two parts so that first one is addressed
395  * with (mostly short) negative offsets */
396 struct globals {
397         chain beginseq, mainseq, endseq;
398         chain *seq;
399         node *break_ptr, *continue_ptr;
400         rstream *iF;
401         xhash *vhash, *ahash, *fdhash, *fnhash;
402         const char *g_progname;
403         int g_lineno;
404         int nfields;
405         int maxfields; /* used in fsrealloc() only */
406         var *Fields;
407         nvblock *g_cb;
408         char *g_pos;
409         char *g_buf;
410         smallint icase;
411         smallint exiting;
412         smallint nextrec;
413         smallint nextfile;
414         smallint is_f0_split;
415 };
416 struct globals2 {
417         uint32_t t_info; /* often used */
418         uint32_t t_tclass;
419         char *t_string;
420         int t_lineno;
421         int t_rollback;
422
423         var *intvar[NUM_INTERNAL_VARS]; /* often used */
424
425         /* former statics from various functions */
426         char *split_f0__fstrings;
427
428         uint32_t next_token__save_tclass;
429         uint32_t next_token__save_info;
430         uint32_t next_token__ltclass;
431         smallint next_token__concat_inserted;
432
433         smallint next_input_file__files_happen;
434         rstream next_input_file__rsm;
435
436         var *evaluate__fnargs;
437         unsigned evaluate__seed;
438         regex_t evaluate__sreg;
439
440         var ptest__v;
441
442         tsplitter exec_builtin__tspl;
443
444         /* biggest and least used members go last */
445         double t_double;
446         tsplitter fsplitter, rsplitter;
447 };
448 #define G1 (ptr_to_globals[-1])
449 #define G (*(struct globals2 *)ptr_to_globals)
450 /* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
451 /* char G1size[sizeof(G1)]; - 0x6c */
452 /* char Gsize[sizeof(G)]; - 0x1cc */
453 /* Trying to keep most of members accessible with short offsets: */
454 /* char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
455 #define beginseq     (G1.beginseq    )
456 #define mainseq      (G1.mainseq     )
457 #define endseq       (G1.endseq      )
458 #define seq          (G1.seq         )
459 #define break_ptr    (G1.break_ptr   )
460 #define continue_ptr (G1.continue_ptr)
461 #define iF           (G1.iF          )
462 #define vhash        (G1.vhash       )
463 #define ahash        (G1.ahash       )
464 #define fdhash       (G1.fdhash      )
465 #define fnhash       (G1.fnhash      )
466 #define g_progname   (G1.g_progname  )
467 #define g_lineno     (G1.g_lineno    )
468 #define nfields      (G1.nfields     )
469 #define maxfields    (G1.maxfields   )
470 #define Fields       (G1.Fields      )
471 #define g_cb         (G1.g_cb        )
472 #define g_pos        (G1.g_pos       )
473 #define g_buf        (G1.g_buf       )
474 #define icase        (G1.icase       )
475 #define exiting      (G1.exiting     )
476 #define nextrec      (G1.nextrec     )
477 #define nextfile     (G1.nextfile    )
478 #define is_f0_split  (G1.is_f0_split )
479 #define t_info       (G.t_info      )
480 #define t_tclass     (G.t_tclass    )
481 #define t_string     (G.t_string    )
482 #define t_double     (G.t_double    )
483 #define t_lineno     (G.t_lineno    )
484 #define t_rollback   (G.t_rollback  )
485 #define intvar       (G.intvar      )
486 #define fsplitter    (G.fsplitter   )
487 #define rsplitter    (G.rsplitter   )
488 #define INIT_G() do { \
489         SET_PTR_TO_GLOBALS(xzalloc(sizeof(G1) + sizeof(G)) + sizeof(G1)); \
490         G.next_token__ltclass = TC_OPTERM; \
491         G.evaluate__seed = 1; \
492 } while (0)
493
494
495 /* function prototypes */
496 static void handle_special(var *);
497 static node *parse_expr(uint32_t);
498 static void chain_group(void);
499 static var *evaluate(node *, var *);
500 static rstream *next_input_file(void);
501 static int fmt_num(char *, int, const char *, double, int);
502 static int awk_exit(int) NORETURN;
503
504 /* ---- error handling ---- */
505
506 static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
507 static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
508 static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
509 static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
510 static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
511 static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin";
512 static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
513 static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
514 static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
515 #if !ENABLE_FEATURE_AWK_MATH
516 static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
517 #endif
518
519 static void zero_out_var(var * vp)
520 {
521         memset(vp, 0, sizeof(*vp));
522 }
523
524 static void syntax_error(const char *const message) NORETURN;
525 static void syntax_error(const char *const message)
526 {
527         bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
528 }
529
530 /* ---- hash stuff ---- */
531
532 static unsigned hashidx(const char *name)
533 {
534         unsigned idx = 0;
535
536         while (*name) idx = *name++ + (idx << 6) - idx;
537         return idx;
538 }
539
540 /* create new hash */
541 static xhash *hash_init(void)
542 {
543         xhash *newhash;
544
545         newhash = xzalloc(sizeof(xhash));
546         newhash->csize = FIRST_PRIME;
547         newhash->items = xzalloc(newhash->csize * sizeof(hash_item *));
548
549         return newhash;
550 }
551
552 /* find item in hash, return ptr to data, NULL if not found */
553 static void *hash_search(xhash *hash, const char *name)
554 {
555         hash_item *hi;
556
557         hi = hash->items [ hashidx(name) % hash->csize ];
558         while (hi) {
559                 if (strcmp(hi->name, name) == 0)
560                         return &(hi->data);
561                 hi = hi->next;
562         }
563         return NULL;
564 }
565
566 /* grow hash if it becomes too big */
567 static void hash_rebuild(xhash *hash)
568 {
569         unsigned newsize, i, idx;
570         hash_item **newitems, *hi, *thi;
571
572         if (hash->nprime == ARRAY_SIZE(PRIMES))
573                 return;
574
575         newsize = PRIMES[hash->nprime++];
576         newitems = xzalloc(newsize * sizeof(hash_item *));
577
578         for (i = 0; i < hash->csize; i++) {
579                 hi = hash->items[i];
580                 while (hi) {
581                         thi = hi;
582                         hi = thi->next;
583                         idx = hashidx(thi->name) % newsize;
584                         thi->next = newitems[idx];
585                         newitems[idx] = thi;
586                 }
587         }
588
589         free(hash->items);
590         hash->csize = newsize;
591         hash->items = newitems;
592 }
593
594 /* find item in hash, add it if necessary. Return ptr to data */
595 static void *hash_find(xhash *hash, const char *name)
596 {
597         hash_item *hi;
598         unsigned idx;
599         int l;
600
601         hi = hash_search(hash, name);
602         if (!hi) {
603                 if (++hash->nel / hash->csize > 10)
604                         hash_rebuild(hash);
605
606                 l = strlen(name) + 1;
607                 hi = xzalloc(sizeof(hash_item) + l);
608                 memcpy(hi->name, name, l);
609
610                 idx = hashidx(name) % hash->csize;
611                 hi->next = hash->items[idx];
612                 hash->items[idx] = hi;
613                 hash->glen += l;
614         }
615         return &(hi->data);
616 }
617
618 #define findvar(hash, name) ((var*)    hash_find((hash), (name)))
619 #define newvar(name)        ((var*)    hash_find(vhash, (name)))
620 #define newfile(name)       ((rstream*)hash_find(fdhash, (name)))
621 #define newfunc(name)       ((func*)   hash_find(fnhash, (name)))
622
623 static void hash_remove(xhash *hash, const char *name)
624 {
625         hash_item *hi, **phi;
626
627         phi = &(hash->items[hashidx(name) % hash->csize]);
628         while (*phi) {
629                 hi = *phi;
630                 if (strcmp(hi->name, name) == 0) {
631                         hash->glen -= (strlen(name) + 1);
632                         hash->nel--;
633                         *phi = hi->next;
634                         free(hi);
635                         break;
636                 }
637                 phi = &(hi->next);
638         }
639 }
640
641 /* ------ some useful functions ------ */
642
643 static void skip_spaces(char **s)
644 {
645         char *p = *s;
646
647         while (1) {
648                 if (*p == '\\' && p[1] == '\n') {
649                         p++;
650                         t_lineno++;
651                 } else if (*p != ' ' && *p != '\t') {
652                         break;
653                 }
654                 p++;
655         }
656         *s = p;
657 }
658
659 static char *nextword(char **s)
660 {
661         char *p = *s;
662
663         while (*(*s)++) /* */;
664
665         return p;
666 }
667
668 static char nextchar(char **s)
669 {
670         char c, *pps;
671
672         c = *((*s)++);
673         pps = *s;
674         if (c == '\\') c = bb_process_escape_sequence((const char**)s);
675         if (c == '\\' && *s == pps) c = *((*s)++);
676         return c;
677 }
678
679 static ALWAYS_INLINE int isalnum_(int c)
680 {
681         return (isalnum(c) || c == '_');
682 }
683
684 /* -------- working with variables (set/get/copy/etc) -------- */
685
686 static xhash *iamarray(var *v)
687 {
688         var *a = v;
689
690         while (a->type & VF_CHILD)
691                 a = a->x.parent;
692
693         if (!(a->type & VF_ARRAY)) {
694                 a->type |= VF_ARRAY;
695                 a->x.array = hash_init();
696         }
697         return a->x.array;
698 }
699
700 static void clear_array(xhash *array)
701 {
702         unsigned i;
703         hash_item *hi, *thi;
704
705         for (i = 0; i < array->csize; i++) {
706                 hi = array->items[i];
707                 while (hi) {
708                         thi = hi;
709                         hi = hi->next;
710                         free(thi->data.v.string);
711                         free(thi);
712                 }
713                 array->items[i] = NULL;
714         }
715         array->glen = array->nel = 0;
716 }
717
718 /* clear a variable */
719 static var *clrvar(var *v)
720 {
721         if (!(v->type & VF_FSTR))
722                 free(v->string);
723
724         v->type &= VF_DONTTOUCH;
725         v->type |= VF_DIRTY;
726         v->string = NULL;
727         return v;
728 }
729
730 /* assign string value to variable */
731 static var *setvar_p(var *v, char *value)
732 {
733         clrvar(v);
734         v->string = value;
735         handle_special(v);
736         return v;
737 }
738
739 /* same as setvar_p but make a copy of string */
740 static var *setvar_s(var *v, const char *value)
741 {
742         return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
743 }
744
745 /* same as setvar_s but set USER flag */
746 static var *setvar_u(var *v, const char *value)
747 {
748         setvar_s(v, value);
749         v->type |= VF_USER;
750         return v;
751 }
752
753 /* set array element to user string */
754 static void setari_u(var *a, int idx, const char *s)
755 {
756         char sidx[sizeof(int)*3 + 1];
757         var *v;
758
759         sprintf(sidx, "%d", idx);
760         v = findvar(iamarray(a), sidx);
761         setvar_u(v, s);
762 }
763
764 /* assign numeric value to variable */
765 static var *setvar_i(var *v, double value)
766 {
767         clrvar(v);
768         v->type |= VF_NUMBER;
769         v->number = value;
770         handle_special(v);
771         return v;
772 }
773
774 static const char *getvar_s(var *v)
775 {
776         /* if v is numeric and has no cached string, convert it to string */
777         if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
778                 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
779                 v->string = xstrdup(g_buf);
780                 v->type |= VF_CACHED;
781         }
782         return (v->string == NULL) ? "" : v->string;
783 }
784
785 static double getvar_i(var *v)
786 {
787         char *s;
788
789         if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
790                 v->number = 0;
791                 s = v->string;
792                 if (s && *s) {
793                         v->number = strtod(s, &s);
794                         if (v->type & VF_USER) {
795                                 skip_spaces(&s);
796                                 if (*s != '\0')
797                                         v->type &= ~VF_USER;
798                         }
799                 } else {
800                         v->type &= ~VF_USER;
801                 }
802                 v->type |= VF_CACHED;
803         }
804         return v->number;
805 }
806
807 static var *copyvar(var *dest, const var *src)
808 {
809         if (dest != src) {
810                 clrvar(dest);
811                 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
812                 dest->number = src->number;
813                 if (src->string)
814                         dest->string = xstrdup(src->string);
815         }
816         handle_special(dest);
817         return dest;
818 }
819
820 static var *incvar(var *v)
821 {
822         return setvar_i(v, getvar_i(v) + 1.);
823 }
824
825 /* return true if v is number or numeric string */
826 static int is_numeric(var *v)
827 {
828         getvar_i(v);
829         return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
830 }
831
832 /* return 1 when value of v corresponds to true, 0 otherwise */
833 static int istrue(var *v)
834 {
835         if (is_numeric(v))
836                 return (v->number == 0) ? 0 : 1;
837         return (v->string && *(v->string)) ? 1 : 0;
838 }
839
840 /* temporary variables allocator. Last allocated should be first freed */
841 static var *nvalloc(int n)
842 {
843         nvblock *pb = NULL;
844         var *v, *r;
845         int size;
846
847         while (g_cb) {
848                 pb = g_cb;
849                 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size) break;
850                 g_cb = g_cb->next;
851         }
852
853         if (!g_cb) {
854                 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
855                 g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
856                 g_cb->size = size;
857                 g_cb->pos = g_cb->nv;
858                 g_cb->prev = pb;
859                 /*g_cb->next = NULL; - xzalloc did it */
860                 if (pb) pb->next = g_cb;
861         }
862
863         v = r = g_cb->pos;
864         g_cb->pos += n;
865
866         while (v < g_cb->pos) {
867                 v->type = 0;
868                 v->string = NULL;
869                 v++;
870         }
871
872         return r;
873 }
874
875 static void nvfree(var *v)
876 {
877         var *p;
878
879         if (v < g_cb->nv || v >= g_cb->pos)
880                 syntax_error(EMSG_INTERNAL_ERROR);
881
882         for (p = v; p < g_cb->pos; p++) {
883                 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
884                         clear_array(iamarray(p));
885                         free(p->x.array->items);
886                         free(p->x.array);
887                 }
888                 if (p->type & VF_WALK)
889                         free(p->x.walker);
890
891                 clrvar(p);
892         }
893
894         g_cb->pos = v;
895         while (g_cb->prev && g_cb->pos == g_cb->nv) {
896                 g_cb = g_cb->prev;
897         }
898 }
899
900 /* ------- awk program text parsing ------- */
901
902 /* Parse next token pointed by global pos, place results into global ttt.
903  * If token isn't expected, give away. Return token class
904  */
905 static uint32_t next_token(uint32_t expected)
906 {
907 #define concat_inserted (G.next_token__concat_inserted)
908 #define save_tclass     (G.next_token__save_tclass)
909 #define save_info       (G.next_token__save_info)
910 /* Initialized to TC_OPTERM: */
911 #define ltclass         (G.next_token__ltclass)
912
913         char *p, *pp, *s;
914         const char *tl;
915         uint32_t tc;
916         const uint32_t *ti;
917         int l;
918
919         if (t_rollback) {
920                 t_rollback = FALSE;
921
922         } else if (concat_inserted) {
923                 concat_inserted = FALSE;
924                 t_tclass = save_tclass;
925                 t_info = save_info;
926
927         } else {
928                 p = g_pos;
929  readnext:
930                 skip_spaces(&p);
931                 g_lineno = t_lineno;
932                 if (*p == '#')
933                         while (*p != '\n' && *p != '\0')
934                                 p++;
935
936                 if (*p == '\n')
937                         t_lineno++;
938
939                 if (*p == '\0') {
940                         tc = TC_EOF;
941
942                 } else if (*p == '\"') {
943                         /* it's a string */
944                         t_string = s = ++p;
945                         while (*p != '\"') {
946                                 if (*p == '\0' || *p == '\n')
947                                         syntax_error(EMSG_UNEXP_EOS);
948                                 *(s++) = nextchar(&p);
949                         }
950                         p++;
951                         *s = '\0';
952                         tc = TC_STRING;
953
954                 } else if ((expected & TC_REGEXP) && *p == '/') {
955                         /* it's regexp */
956                         t_string = s = ++p;
957                         while (*p != '/') {
958                                 if (*p == '\0' || *p == '\n')
959                                         syntax_error(EMSG_UNEXP_EOS);
960                                 *s = *p++;
961                                 if (*s++ == '\\') {
962                                         pp = p;
963                                         *(s-1) = bb_process_escape_sequence((const char **)&p);
964                                         if (*pp == '\\')
965                                                 *s++ = '\\';
966                                         if (p == pp)
967                                                 *s++ = *p++;
968                                 }
969                         }
970                         p++;
971                         *s = '\0';
972                         tc = TC_REGEXP;
973
974                 } else if (*p == '.' || isdigit(*p)) {
975                         /* it's a number */
976 #if ENABLE_DESKTOP
977                         if (p[0] == '0' && (p[1] | 0x20) == 'x')
978                                 t_double = strtoll(p, &p, 0);
979                         else
980 #endif
981                                 t_double = strtod(p, &p);
982                         if (*p == '.')
983                                 syntax_error(EMSG_UNEXP_TOKEN);
984                         tc = TC_NUMBER;
985
986                 } else {
987                         /* search for something known */
988                         tl = tokenlist;
989                         tc = 0x00000001;
990                         ti = tokeninfo;
991                         while (*tl) {
992                                 l = *(tl++);
993                                 if (l == NTCC) {
994                                         tc <<= 1;
995                                         continue;
996                                 }
997                                 /* if token class is expected, token
998                                  * matches and it's not a longer word,
999                                  * then this is what we are looking for
1000                                  */
1001                                 if ((tc & (expected | TC_WORD | TC_NEWLINE))
1002                                  && *tl == *p && strncmp(p, tl, l) == 0
1003                                  && !((tc & TC_WORD) && isalnum_(p[l]))
1004                                 ) {
1005                                         t_info = *ti;
1006                                         p += l;
1007                                         break;
1008                                 }
1009                                 ti++;
1010                                 tl += l;
1011                         }
1012
1013                         if (!*tl) {
1014                                 /* it's a name (var/array/function),
1015                                  * otherwise it's something wrong
1016                                  */
1017                                 if (!isalnum_(*p))
1018                                         syntax_error(EMSG_UNEXP_TOKEN);
1019
1020                                 t_string = --p;
1021                                 while (isalnum_(*(++p))) {
1022                                         *(p-1) = *p;
1023                                 }
1024                                 *(p-1) = '\0';
1025                                 tc = TC_VARIABLE;
1026                                 /* also consume whitespace between functionname and bracket */
1027                                 if (!(expected & TC_VARIABLE))
1028                                         skip_spaces(&p);
1029                                 if (*p == '(') {
1030                                         tc = TC_FUNCTION;
1031                                 } else {
1032                                         if (*p == '[') {
1033                                                 p++;
1034                                                 tc = TC_ARRAY;
1035                                         }
1036                                 }
1037                         }
1038                 }
1039                 g_pos = p;
1040
1041                 /* skipping newlines in some cases */
1042                 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1043                         goto readnext;
1044
1045                 /* insert concatenation operator when needed */
1046                 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1047                         concat_inserted = TRUE;
1048                         save_tclass = tc;
1049                         save_info = t_info;
1050                         tc = TC_BINOP;
1051                         t_info = OC_CONCAT | SS | P(35);
1052                 }
1053
1054                 t_tclass = tc;
1055         }
1056         ltclass = t_tclass;
1057
1058         /* Are we ready for this? */
1059         if (!(ltclass & expected))
1060                 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1061                                 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1062
1063         return ltclass;
1064 #undef concat_inserted
1065 #undef save_tclass
1066 #undef save_info
1067 #undef ltclass
1068 }
1069
1070 static void rollback_token(void)
1071 {
1072         t_rollback = TRUE;
1073 }
1074
1075 static node *new_node(uint32_t info)
1076 {
1077         node *n;
1078
1079         n = xzalloc(sizeof(node));
1080         n->info = info;
1081         n->lineno = g_lineno;
1082         return n;
1083 }
1084
1085 static node *mk_re_node(const char *s, node *n, regex_t *re)
1086 {
1087         n->info = OC_REGEXP;
1088         n->l.re = re;
1089         n->r.ire = re + 1;
1090         xregcomp(re, s, REG_EXTENDED);
1091         xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1092
1093         return n;
1094 }
1095
1096 static node *condition(void)
1097 {
1098         next_token(TC_SEQSTART);
1099         return parse_expr(TC_SEQTERM);
1100 }
1101
1102 /* parse expression terminated by given argument, return ptr
1103  * to built subtree. Terminator is eaten by parse_expr */
1104 static node *parse_expr(uint32_t iexp)
1105 {
1106         node sn;
1107         node *cn = &sn;
1108         node *vn, *glptr;
1109         uint32_t tc, xtc;
1110         var *v;
1111
1112         sn.info = PRIMASK;
1113         sn.r.n = glptr = NULL;
1114         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1115
1116         while (!((tc = next_token(xtc)) & iexp)) {
1117                 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1118                         /* input redirection (<) attached to glptr node */
1119                         cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1120                         cn->a.n = glptr;
1121                         xtc = TC_OPERAND | TC_UOPPRE;
1122                         glptr = NULL;
1123
1124                 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1125                         /* for binary and postfix-unary operators, jump back over
1126                          * previous operators with higher priority */
1127                         vn = cn;
1128                         while ( ((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1129                          || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON)) )
1130                                 vn = vn->a.n;
1131                         if ((t_info & OPCLSMASK) == OC_TERNARY)
1132                                 t_info += P(6);
1133                         cn = vn->a.n->r.n = new_node(t_info);
1134                         cn->a.n = vn->a.n;
1135                         if (tc & TC_BINOP) {
1136                                 cn->l.n = vn;
1137                                 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1138                                 if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1139                                         /* it's a pipe */
1140                                         next_token(TC_GETLINE);
1141                                         /* give maximum priority to this pipe */
1142                                         cn->info &= ~PRIMASK;
1143                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1144                                 }
1145                         } else {
1146                                 cn->r.n = vn;
1147                                 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1148                         }
1149                         vn->a.n = cn;
1150
1151                 } else {
1152                         /* for operands and prefix-unary operators, attach them
1153                          * to last node */
1154                         vn = cn;
1155                         cn = vn->r.n = new_node(t_info);
1156                         cn->a.n = vn;
1157                         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1158                         if (tc & (TC_OPERAND | TC_REGEXP)) {
1159                                 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1160                                 /* one should be very careful with switch on tclass -
1161                                  * only simple tclasses should be used! */
1162                                 switch (tc) {
1163                                 case TC_VARIABLE:
1164                                 case TC_ARRAY:
1165                                         cn->info = OC_VAR;
1166                                         v = hash_search(ahash, t_string);
1167                                         if (v != NULL) {
1168                                                 cn->info = OC_FNARG;
1169                                                 cn->l.i = v->x.aidx;
1170                                         } else {
1171                                                 cn->l.v = newvar(t_string);
1172                                         }
1173                                         if (tc & TC_ARRAY) {
1174                                                 cn->info |= xS;
1175                                                 cn->r.n = parse_expr(TC_ARRTERM);
1176                                         }
1177                                         break;
1178
1179                                 case TC_NUMBER:
1180                                 case TC_STRING:
1181                                         cn->info = OC_VAR;
1182                                         v = cn->l.v = xzalloc(sizeof(var));
1183                                         if (tc & TC_NUMBER)
1184                                                 setvar_i(v, t_double);
1185                                         else
1186                                                 setvar_s(v, t_string);
1187                                         break;
1188
1189                                 case TC_REGEXP:
1190                                         mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1191                                         break;
1192
1193                                 case TC_FUNCTION:
1194                                         cn->info = OC_FUNC;
1195                                         cn->r.f = newfunc(t_string);
1196                                         cn->l.n = condition();
1197                                         break;
1198
1199                                 case TC_SEQSTART:
1200                                         cn = vn->r.n = parse_expr(TC_SEQTERM);
1201                                         cn->a.n = vn;
1202                                         break;
1203
1204                                 case TC_GETLINE:
1205                                         glptr = cn;
1206                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1207                                         break;
1208
1209                                 case TC_BUILTIN:
1210                                         cn->l.n = condition();
1211                                         break;
1212                                 }
1213                         }
1214                 }
1215         }
1216         return sn.r.n;
1217 }
1218
1219 /* add node to chain. Return ptr to alloc'd node */
1220 static node *chain_node(uint32_t info)
1221 {
1222         node *n;
1223
1224         if (!seq->first)
1225                 seq->first = seq->last = new_node(0);
1226
1227         if (seq->programname != g_progname) {
1228                 seq->programname = g_progname;
1229                 n = chain_node(OC_NEWSOURCE);
1230                 n->l.s = xstrdup(g_progname);
1231         }
1232
1233         n = seq->last;
1234         n->info = info;
1235         seq->last = n->a.n = new_node(OC_DONE);
1236
1237         return n;
1238 }
1239
1240 static void chain_expr(uint32_t info)
1241 {
1242         node *n;
1243
1244         n = chain_node(info);
1245         n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1246         if (t_tclass & TC_GRPTERM)
1247                 rollback_token();
1248 }
1249
1250 static node *chain_loop(node *nn)
1251 {
1252         node *n, *n2, *save_brk, *save_cont;
1253
1254         save_brk = break_ptr;
1255         save_cont = continue_ptr;
1256
1257         n = chain_node(OC_BR | Vx);
1258         continue_ptr = new_node(OC_EXEC);
1259         break_ptr = new_node(OC_EXEC);
1260         chain_group();
1261         n2 = chain_node(OC_EXEC | Vx);
1262         n2->l.n = nn;
1263         n2->a.n = n;
1264         continue_ptr->a.n = n2;
1265         break_ptr->a.n = n->r.n = seq->last;
1266
1267         continue_ptr = save_cont;
1268         break_ptr = save_brk;
1269
1270         return n;
1271 }
1272
1273 /* parse group and attach it to chain */
1274 static void chain_group(void)
1275 {
1276         uint32_t c;
1277         node *n, *n2, *n3;
1278
1279         do {
1280                 c = next_token(TC_GRPSEQ);
1281         } while (c & TC_NEWLINE);
1282
1283         if (c & TC_GRPSTART) {
1284                 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1285                         if (t_tclass & TC_NEWLINE) continue;
1286                         rollback_token();
1287                         chain_group();
1288                 }
1289         } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1290                 rollback_token();
1291                 chain_expr(OC_EXEC | Vx);
1292         } else {                                                /* TC_STATEMNT */
1293                 switch (t_info & OPCLSMASK) {
1294                 case ST_IF:
1295                         n = chain_node(OC_BR | Vx);
1296                         n->l.n = condition();
1297                         chain_group();
1298                         n2 = chain_node(OC_EXEC);
1299                         n->r.n = seq->last;
1300                         if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1301                                 chain_group();
1302                                 n2->a.n = seq->last;
1303                         } else {
1304                                 rollback_token();
1305                         }
1306                         break;
1307
1308                 case ST_WHILE:
1309                         n2 = condition();
1310                         n = chain_loop(NULL);
1311                         n->l.n = n2;
1312                         break;
1313
1314                 case ST_DO:
1315                         n2 = chain_node(OC_EXEC);
1316                         n = chain_loop(NULL);
1317                         n2->a.n = n->a.n;
1318                         next_token(TC_WHILE);
1319                         n->l.n = condition();
1320                         break;
1321
1322                 case ST_FOR:
1323                         next_token(TC_SEQSTART);
1324                         n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1325                         if (t_tclass & TC_SEQTERM) {    /* for-in */
1326                                 if ((n2->info & OPCLSMASK) != OC_IN)
1327                                         syntax_error(EMSG_UNEXP_TOKEN);
1328                                 n = chain_node(OC_WALKINIT | VV);
1329                                 n->l.n = n2->l.n;
1330                                 n->r.n = n2->r.n;
1331                                 n = chain_loop(NULL);
1332                                 n->info = OC_WALKNEXT | Vx;
1333                                 n->l.n = n2->l.n;
1334                         } else {                        /* for (;;) */
1335                                 n = chain_node(OC_EXEC | Vx);
1336                                 n->l.n = n2;
1337                                 n2 = parse_expr(TC_SEMICOL);
1338                                 n3 = parse_expr(TC_SEQTERM);
1339                                 n = chain_loop(n3);
1340                                 n->l.n = n2;
1341                                 if (!n2)
1342                                         n->info = OC_EXEC;
1343                         }
1344                         break;
1345
1346                 case OC_PRINT:
1347                 case OC_PRINTF:
1348                         n = chain_node(t_info);
1349                         n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1350                         if (t_tclass & TC_OUTRDR) {
1351                                 n->info |= t_info;
1352                                 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1353                         }
1354                         if (t_tclass & TC_GRPTERM)
1355                                 rollback_token();
1356                         break;
1357
1358                 case OC_BREAK:
1359                         n = chain_node(OC_EXEC);
1360                         n->a.n = break_ptr;
1361                         break;
1362
1363                 case OC_CONTINUE:
1364                         n = chain_node(OC_EXEC);
1365                         n->a.n = continue_ptr;
1366                         break;
1367
1368                 /* delete, next, nextfile, return, exit */
1369                 default:
1370                         chain_expr(t_info);
1371                 }
1372         }
1373 }
1374
1375 static void parse_program(char *p)
1376 {
1377         uint32_t tclass;
1378         node *cn;
1379         func *f;
1380         var *v;
1381
1382         g_pos = p;
1383         t_lineno = 1;
1384         while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1385                         TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1386
1387                 if (tclass & TC_OPTERM)
1388                         continue;
1389
1390                 seq = &mainseq;
1391                 if (tclass & TC_BEGIN) {
1392                         seq = &beginseq;
1393                         chain_group();
1394
1395                 } else if (tclass & TC_END) {
1396                         seq = &endseq;
1397                         chain_group();
1398
1399                 } else if (tclass & TC_FUNCDECL) {
1400                         next_token(TC_FUNCTION);
1401                         g_pos++;
1402                         f = newfunc(t_string);
1403                         f->body.first = NULL;
1404                         f->nargs = 0;
1405                         while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1406                                 v = findvar(ahash, t_string);
1407                                 v->x.aidx = (f->nargs)++;
1408
1409                                 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1410                                         break;
1411                         }
1412                         seq = &(f->body);
1413                         chain_group();
1414                         clear_array(ahash);
1415
1416                 } else if (tclass & TC_OPSEQ) {
1417                         rollback_token();
1418                         cn = chain_node(OC_TEST);
1419                         cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1420                         if (t_tclass & TC_GRPSTART) {
1421                                 rollback_token();
1422                                 chain_group();
1423                         } else {
1424                                 chain_node(OC_PRINT);
1425                         }
1426                         cn->r.n = mainseq.last;
1427
1428                 } else /* if (tclass & TC_GRPSTART) */ {
1429                         rollback_token();
1430                         chain_group();
1431                 }
1432         }
1433 }
1434
1435
1436 /* -------- program execution part -------- */
1437
1438 static node *mk_splitter(const char *s, tsplitter *spl)
1439 {
1440         regex_t *re, *ire;
1441         node *n;
1442
1443         re = &spl->re[0];
1444         ire = &spl->re[1];
1445         n = &spl->n;
1446         if ((n->info & OPCLSMASK) == OC_REGEXP) {
1447                 regfree(re);
1448                 regfree(ire); // TODO: nuke ire, use re+1?
1449         }
1450         if (strlen(s) > 1) {
1451                 mk_re_node(s, n, re);
1452         } else {
1453                 n->info = (uint32_t) *s;
1454         }
1455
1456         return n;
1457 }
1458
1459 /* use node as a regular expression. Supplied with node ptr and regex_t
1460  * storage space. Return ptr to regex (if result points to preg, it should
1461  * be later regfree'd manually
1462  */
1463 static regex_t *as_regex(node *op, regex_t *preg)
1464 {
1465         var *v;
1466         const char *s;
1467
1468         if ((op->info & OPCLSMASK) == OC_REGEXP) {
1469                 return icase ? op->r.ire : op->l.re;
1470         }
1471         v = nvalloc(1);
1472         s = getvar_s(evaluate(op, v));
1473         xregcomp(preg, s, icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED);
1474         nvfree(v);
1475         return preg;
1476 }
1477
1478 /* gradually increasing buffer */
1479 static void qrealloc(char **b, int n, int *size)
1480 {
1481         if (!*b || n >= *size) {
1482                 *size = n + (n>>1) + 80;
1483                 *b = xrealloc(*b, *size);
1484         }
1485 }
1486
1487 /* resize field storage space */
1488 static void fsrealloc(int size)
1489 {
1490         int i;
1491
1492         if (size >= maxfields) {
1493                 i = maxfields;
1494                 maxfields = size + 16;
1495                 Fields = xrealloc(Fields, maxfields * sizeof(var));
1496                 for (; i < maxfields; i++) {
1497                         Fields[i].type = VF_SPECIAL;
1498                         Fields[i].string = NULL;
1499                 }
1500         }
1501
1502         if (size < nfields) {
1503                 for (i = size; i < nfields; i++) {
1504                         clrvar(Fields + i);
1505                 }
1506         }
1507         nfields = size;
1508 }
1509
1510 static int awk_split(const char *s, node *spl, char **slist)
1511 {
1512         int l, n = 0;
1513         char c[4];
1514         char *s1;
1515         regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1516
1517         /* in worst case, each char would be a separate field */
1518         *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1519         strcpy(s1, s);
1520
1521         c[0] = c[1] = (char)spl->info;
1522         c[2] = c[3] = '\0';
1523         if (*getvar_s(intvar[RS]) == '\0')
1524                 c[2] = '\n';
1525
1526         if ((spl->info & OPCLSMASK) == OC_REGEXP) {  /* regex split */
1527                 if (!*s)
1528                         return n; /* "": zero fields */
1529                 n++; /* at least one field will be there */
1530                 do {
1531                         l = strcspn(s, c+2); /* len till next NUL or \n */
1532                         if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1533                          && pmatch[0].rm_so <= l
1534                         ) {
1535                                 l = pmatch[0].rm_so;
1536                                 if (pmatch[0].rm_eo == 0) {
1537                                         l++;
1538                                         pmatch[0].rm_eo++;
1539                                 }
1540                                 n++; /* we saw yet another delimiter */
1541                         } else {
1542                                 pmatch[0].rm_eo = l;
1543                                 if (s[l]) pmatch[0].rm_eo++;
1544                         }
1545                         memcpy(s1, s, l);
1546                         s1[l] = '\0';
1547                         nextword(&s1);
1548                         s += pmatch[0].rm_eo;
1549                 } while (*s);
1550                 return n;
1551         }
1552         if (c[0] == '\0') {  /* null split */
1553                 while (*s) {
1554                         *s1++ = *s++;
1555                         *s1++ = '\0';
1556                         n++;
1557                 }
1558                 return n;
1559         }
1560         if (c[0] != ' ') {  /* single-character split */
1561                 if (icase) {
1562                         c[0] = toupper(c[0]);
1563                         c[1] = tolower(c[1]);
1564                 }
1565                 if (*s1) n++;
1566                 while ((s1 = strpbrk(s1, c))) {
1567                         *s1++ = '\0';
1568                         n++;
1569                 }
1570                 return n;
1571         }
1572         /* space split */
1573         while (*s) {
1574                 s = skip_whitespace(s);
1575                 if (!*s) break;
1576                 n++;
1577                 while (*s && !isspace(*s))
1578                         *s1++ = *s++;
1579                 *s1++ = '\0';
1580         }
1581         return n;
1582 }
1583
1584 static void split_f0(void)
1585 {
1586 /* static char *fstrings; */
1587 #define fstrings (G.split_f0__fstrings)
1588
1589         int i, n;
1590         char *s;
1591
1592         if (is_f0_split)
1593                 return;
1594
1595         is_f0_split = TRUE;
1596         free(fstrings);
1597         fsrealloc(0);
1598         n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1599         fsrealloc(n);
1600         s = fstrings;
1601         for (i = 0; i < n; i++) {
1602                 Fields[i].string = nextword(&s);
1603                 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1604         }
1605
1606         /* set NF manually to avoid side effects */
1607         clrvar(intvar[NF]);
1608         intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1609         intvar[NF]->number = nfields;
1610 #undef fstrings
1611 }
1612
1613 /* perform additional actions when some internal variables changed */
1614 static void handle_special(var *v)
1615 {
1616         int n;
1617         char *b;
1618         const char *sep, *s;
1619         int sl, l, len, i, bsize;
1620
1621         if (!(v->type & VF_SPECIAL))
1622                 return;
1623
1624         if (v == intvar[NF]) {
1625                 n = (int)getvar_i(v);
1626                 fsrealloc(n);
1627
1628                 /* recalculate $0 */
1629                 sep = getvar_s(intvar[OFS]);
1630                 sl = strlen(sep);
1631                 b = NULL;
1632                 len = 0;
1633                 for (i = 0; i < n; i++) {
1634                         s = getvar_s(&Fields[i]);
1635                         l = strlen(s);
1636                         if (b) {
1637                                 memcpy(b+len, sep, sl);
1638                                 len += sl;
1639                         }
1640                         qrealloc(&b, len+l+sl, &bsize);
1641                         memcpy(b+len, s, l);
1642                         len += l;
1643                 }
1644                 if (b)
1645                         b[len] = '\0';
1646                 setvar_p(intvar[F0], b);
1647                 is_f0_split = TRUE;
1648
1649         } else if (v == intvar[F0]) {
1650                 is_f0_split = FALSE;
1651
1652         } else if (v == intvar[FS]) {
1653                 mk_splitter(getvar_s(v), &fsplitter);
1654
1655         } else if (v == intvar[RS]) {
1656                 mk_splitter(getvar_s(v), &rsplitter);
1657
1658         } else if (v == intvar[IGNORECASE]) {
1659                 icase = istrue(v);
1660
1661         } else {                                /* $n */
1662                 n = getvar_i(intvar[NF]);
1663                 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1664                 /* right here v is invalid. Just to note... */
1665         }
1666 }
1667
1668 /* step through func/builtin/etc arguments */
1669 static node *nextarg(node **pn)
1670 {
1671         node *n;
1672
1673         n = *pn;
1674         if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1675                 *pn = n->r.n;
1676                 n = n->l.n;
1677         } else {
1678                 *pn = NULL;
1679         }
1680         return n;
1681 }
1682
1683 static void hashwalk_init(var *v, xhash *array)
1684 {
1685         char **w;
1686         hash_item *hi;
1687         unsigned i;
1688
1689         if (v->type & VF_WALK)
1690                 free(v->x.walker);
1691
1692         v->type |= VF_WALK;
1693         w = v->x.walker = xzalloc(2 + 2*sizeof(char *) + array->glen);
1694         w[0] = w[1] = (char *)(w + 2);
1695         for (i = 0; i < array->csize; i++) {
1696                 hi = array->items[i];
1697                 while (hi) {
1698                         strcpy(*w, hi->name);
1699                         nextword(w);
1700                         hi = hi->next;
1701                 }
1702         }
1703 }
1704
1705 static int hashwalk_next(var *v)
1706 {
1707         char **w;
1708
1709         w = v->x.walker;
1710         if (w[1] == w[0])
1711                 return FALSE;
1712
1713         setvar_s(v, nextword(w+1));
1714         return TRUE;
1715 }
1716
1717 /* evaluate node, return 1 when result is true, 0 otherwise */
1718 static int ptest(node *pattern)
1719 {
1720         /* ptest__v is "static": to save stack space? */
1721         return istrue(evaluate(pattern, &G.ptest__v));
1722 }
1723
1724 /* read next record from stream rsm into a variable v */
1725 static int awk_getline(rstream *rsm, var *v)
1726 {
1727         char *b;
1728         regmatch_t pmatch[2];
1729         int a, p, pp=0, size;
1730         int fd, so, eo, r, rp;
1731         char c, *m, *s;
1732
1733         /* we're using our own buffer since we need access to accumulating
1734          * characters
1735          */
1736         fd = fileno(rsm->F);
1737         m = rsm->buffer;
1738         a = rsm->adv;
1739         p = rsm->pos;
1740         size = rsm->size;
1741         c = (char) rsplitter.n.info;
1742         rp = 0;
1743
1744         if (!m) qrealloc(&m, 256, &size);
1745         do {
1746                 b = m + a;
1747                 so = eo = p;
1748                 r = 1;
1749                 if (p > 0) {
1750                         if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1751                                 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1752                                                         b, 1, pmatch, 0) == 0) {
1753                                         so = pmatch[0].rm_so;
1754                                         eo = pmatch[0].rm_eo;
1755                                         if (b[eo] != '\0')
1756                                                 break;
1757                                 }
1758                         } else if (c != '\0') {
1759                                 s = strchr(b+pp, c);
1760                                 if (!s) s = memchr(b+pp, '\0', p - pp);
1761                                 if (s) {
1762                                         so = eo = s-b;
1763                                         eo++;
1764                                         break;
1765                                 }
1766                         } else {
1767                                 while (b[rp] == '\n')
1768                                         rp++;
1769                                 s = strstr(b+rp, "\n\n");
1770                                 if (s) {
1771                                         so = eo = s-b;
1772                                         while (b[eo] == '\n') eo++;
1773                                         if (b[eo] != '\0')
1774                                                 break;
1775                                 }
1776                         }
1777                 }
1778
1779                 if (a > 0) {
1780                         memmove(m, (const void *)(m+a), p+1);
1781                         b = m;
1782                         a = 0;
1783                 }
1784
1785                 qrealloc(&m, a+p+128, &size);
1786                 b = m + a;
1787                 pp = p;
1788                 p += safe_read(fd, b+p, size-p-1);
1789                 if (p < pp) {
1790                         p = 0;
1791                         r = 0;
1792                         setvar_i(intvar[ERRNO], errno);
1793                 }
1794                 b[p] = '\0';
1795
1796         } while (p > pp);
1797
1798         if (p == 0) {
1799                 r--;
1800         } else {
1801                 c = b[so]; b[so] = '\0';
1802                 setvar_s(v, b+rp);
1803                 v->type |= VF_USER;
1804                 b[so] = c;
1805                 c = b[eo]; b[eo] = '\0';
1806                 setvar_s(intvar[RT], b+so);
1807                 b[eo] = c;
1808         }
1809
1810         rsm->buffer = m;
1811         rsm->adv = a + eo;
1812         rsm->pos = p - eo;
1813         rsm->size = size;
1814
1815         return r;
1816 }
1817
1818 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1819 {
1820         int r = 0;
1821         char c;
1822         const char *s = format;
1823
1824         if (int_as_int && n == (int)n) {
1825                 r = snprintf(b, size, "%d", (int)n);
1826         } else {
1827                 do { c = *s; } while (c && *++s);
1828                 if (strchr("diouxX", c)) {
1829                         r = snprintf(b, size, format, (int)n);
1830                 } else if (strchr("eEfgG", c)) {
1831                         r = snprintf(b, size, format, n);
1832                 } else {
1833                         syntax_error(EMSG_INV_FMT);
1834                 }
1835         }
1836         return r;
1837 }
1838
1839
1840 /* formatted output into an allocated buffer, return ptr to buffer */
1841 static char *awk_printf(node *n)
1842 {
1843         char *b = NULL;
1844         char *fmt, *s, *f;
1845         const char *s1;
1846         int i, j, incr, bsize;
1847         char c, c1;
1848         var *v, *arg;
1849
1850         v = nvalloc(1);
1851         fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1852
1853         i = 0;
1854         while (*f) {
1855                 s = f;
1856                 while (*f && (*f != '%' || *(++f) == '%'))
1857                         f++;
1858                 while (*f && !isalpha(*f)) {
1859                         if (*f == '*')
1860                                 syntax_error("%*x formats are not supported");
1861                         f++;
1862                 }
1863
1864                 incr = (f - s) + MAXVARFMT;
1865                 qrealloc(&b, incr + i, &bsize);
1866                 c = *f;
1867                 if (c != '\0') f++;
1868                 c1 = *f;
1869                 *f = '\0';
1870                 arg = evaluate(nextarg(&n), v);
1871
1872                 j = i;
1873                 if (c == 'c' || !c) {
1874                         i += sprintf(b+i, s, is_numeric(arg) ?
1875                                         (char)getvar_i(arg) : *getvar_s(arg));
1876                 } else if (c == 's') {
1877                         s1 = getvar_s(arg);
1878                         qrealloc(&b, incr+i+strlen(s1), &bsize);
1879                         i += sprintf(b+i, s, s1);
1880                 } else {
1881                         i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1882                 }
1883                 *f = c1;
1884
1885                 /* if there was an error while sprintf, return value is negative */
1886                 if (i < j) i = j;
1887         }
1888
1889         b = xrealloc(b, i + 1);
1890         free(fmt);
1891         nvfree(v);
1892         b[i] = '\0';
1893         return b;
1894 }
1895
1896 /* common substitution routine
1897  * replace (nm) substring of (src) that match (n) with (repl), store
1898  * result into (dest), return number of substitutions. If nm=0, replace
1899  * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1900  * subexpression matching (\1-\9)
1901  */
1902 static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int ex)
1903 {
1904         char *ds = NULL;
1905         const char *s;
1906         const char *sp;
1907         int c, i, j, di, rl, so, eo, nbs, n, dssize;
1908         regmatch_t pmatch[10];
1909         regex_t sreg, *re;
1910
1911         re = as_regex(rn, &sreg);
1912         if (!src) src = intvar[F0];
1913         if (!dest) dest = intvar[F0];
1914
1915         i = di = 0;
1916         sp = getvar_s(src);
1917         rl = strlen(repl);
1918         while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0 : REG_NOTBOL) == 0) {
1919                 so = pmatch[0].rm_so;
1920                 eo = pmatch[0].rm_eo;
1921
1922                 qrealloc(&ds, di + eo + rl, &dssize);
1923                 memcpy(ds + di, sp, eo);
1924                 di += eo;
1925                 if (++i >= nm) {
1926                         /* replace */
1927                         di -= (eo - so);
1928                         nbs = 0;
1929                         for (s = repl; *s; s++) {
1930                                 ds[di++] = c = *s;
1931                                 if (c == '\\') {
1932                                         nbs++;
1933                                         continue;
1934                                 }
1935                                 if (c == '&' || (ex && c >= '0' && c <= '9')) {
1936                                         di -= ((nbs + 3) >> 1);
1937                                         j = 0;
1938                                         if (c != '&') {
1939                                                 j = c - '0';
1940                                                 nbs++;
1941                                         }
1942                                         if (nbs % 2) {
1943                                                 ds[di++] = c;
1944                                         } else {
1945                                                 n = pmatch[j].rm_eo - pmatch[j].rm_so;
1946                                                 qrealloc(&ds, di + rl + n, &dssize);
1947                                                 memcpy(ds + di, sp + pmatch[j].rm_so, n);
1948                                                 di += n;
1949                                         }
1950                                 }
1951                                 nbs = 0;
1952                         }
1953                 }
1954
1955                 sp += eo;
1956                 if (i == nm) break;
1957                 if (eo == so) {
1958                         ds[di] = *sp++;
1959                         if (!ds[di++]) break;
1960                 }
1961         }
1962
1963         qrealloc(&ds, di + strlen(sp), &dssize);
1964         strcpy(ds + di, sp);
1965         setvar_p(dest, ds);
1966         if (re == &sreg) regfree(re);
1967         return i;
1968 }
1969
1970 static var *exec_builtin(node *op, var *res)
1971 {
1972 #define tspl (G.exec_builtin__tspl)
1973
1974         int (*to_xxx)(int);
1975         var *tv;
1976         node *an[4];
1977         var *av[4];
1978         const char *as[4];
1979         regmatch_t pmatch[2];
1980         regex_t sreg, *re;
1981         node *spl;
1982         uint32_t isr, info;
1983         int nargs;
1984         time_t tt;
1985         char *s, *s1;
1986         int i, l, ll, n;
1987
1988         tv = nvalloc(4);
1989         isr = info = op->info;
1990         op = op->l.n;
1991
1992         av[2] = av[3] = NULL;
1993         for (i = 0; i < 4 && op; i++) {
1994                 an[i] = nextarg(&op);
1995                 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
1996                 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
1997                 isr >>= 1;
1998         }
1999
2000         nargs = i;
2001         if ((uint32_t)nargs < (info >> 30))
2002                 syntax_error(EMSG_TOO_FEW_ARGS);
2003
2004         switch (info & OPNMASK) {
2005
2006         case B_a2:
2007 #if ENABLE_FEATURE_AWK_MATH
2008                 setvar_i(res, atan2(getvar_i(av[i]), getvar_i(av[1])));
2009 #else
2010                 syntax_error(EMSG_NO_MATH);
2011 #endif
2012                 break;
2013
2014         case B_sp:
2015                 if (nargs > 2) {
2016                         spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2017                                 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2018                 } else {
2019                         spl = &fsplitter.n;
2020                 }
2021
2022                 n = awk_split(as[0], spl, &s);
2023                 s1 = s;
2024                 clear_array(iamarray(av[1]));
2025                 for (i=1; i<=n; i++)
2026                         setari_u(av[1], i, nextword(&s1));
2027                 free(s);
2028                 setvar_i(res, n);
2029                 break;
2030
2031         case B_ss:
2032                 l = strlen(as[0]);
2033                 i = getvar_i(av[1]) - 1;
2034                 if (i > l) i = l;
2035                 if (i < 0) i = 0;
2036                 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2037                 if (n < 0) n = 0;
2038                 s = xstrndup(as[0]+i, n);
2039                 setvar_p(res, s);
2040                 break;
2041
2042         /* Bitwise ops must assume that operands are unsigned. GNU Awk 3.1.5:
2043          * awk '{ print or(-1,1) }' gives "4.29497e+09", not "-2.xxxe+09" */
2044         case B_an:
2045                 setvar_i(res, (unsigned long)getvar_i(av[0]) & (unsigned long)getvar_i(av[1]));
2046                 break;
2047
2048         case B_co:
2049                 setvar_i(res, ~(unsigned long)getvar_i(av[0]));
2050                 break;
2051
2052         case B_ls:
2053                 setvar_i(res, (unsigned long)getvar_i(av[0]) << (unsigned long)getvar_i(av[1]));
2054                 break;
2055
2056         case B_or:
2057                 setvar_i(res, (unsigned long)getvar_i(av[0]) | (unsigned long)getvar_i(av[1]));
2058                 break;
2059
2060         case B_rs:
2061                 setvar_i(res, (unsigned long)getvar_i(av[0]) >> (unsigned long)getvar_i(av[1]));
2062                 break;
2063
2064         case B_xo:
2065                 setvar_i(res, (unsigned long)getvar_i(av[0]) ^ (unsigned long)getvar_i(av[1]));
2066                 break;
2067
2068         case B_lo:
2069                 to_xxx = tolower;
2070                 goto lo_cont;
2071
2072         case B_up:
2073                 to_xxx = toupper;
2074  lo_cont:
2075                 s1 = s = xstrdup(as[0]);
2076                 while (*s1) {
2077                         *s1 = (*to_xxx)(*s1);
2078                         s1++;
2079                 }
2080                 setvar_p(res, s);
2081                 break;
2082
2083         case B_ix:
2084                 n = 0;
2085                 ll = strlen(as[1]);
2086                 l = strlen(as[0]) - ll;
2087                 if (ll > 0 && l >= 0) {
2088                         if (!icase) {
2089                                 s = strstr(as[0], as[1]);
2090                                 if (s) n = (s - as[0]) + 1;
2091                         } else {
2092                                 /* this piece of code is terribly slow and
2093                                  * really should be rewritten
2094                                  */
2095                                 for (i=0; i<=l; i++) {
2096                                         if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2097                                                 n = i+1;
2098                                                 break;
2099                                         }
2100                                 }
2101                         }
2102                 }
2103                 setvar_i(res, n);
2104                 break;
2105
2106         case B_ti:
2107                 if (nargs > 1)
2108                         tt = getvar_i(av[1]);
2109                 else
2110                         time(&tt);
2111                 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2112                 i = strftime(g_buf, MAXVARFMT,
2113                         ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2114                         localtime(&tt));
2115                 g_buf[i] = '\0';
2116                 setvar_s(res, g_buf);
2117                 break;
2118
2119         case B_ma:
2120                 re = as_regex(an[1], &sreg);
2121                 n = regexec(re, as[0], 1, pmatch, 0);
2122                 if (n == 0) {
2123                         pmatch[0].rm_so++;
2124                         pmatch[0].rm_eo++;
2125                 } else {
2126                         pmatch[0].rm_so = 0;
2127                         pmatch[0].rm_eo = -1;
2128                 }
2129                 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2130                 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2131                 setvar_i(res, pmatch[0].rm_so);
2132                 if (re == &sreg) regfree(re);
2133                 break;
2134
2135         case B_ge:
2136                 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2137                 break;
2138
2139         case B_gs:
2140                 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2141                 break;
2142
2143         case B_su:
2144                 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2145                 break;
2146         }
2147
2148         nvfree(tv);
2149         return res;
2150 #undef tspl
2151 }
2152
2153 /*
2154  * Evaluate node - the heart of the program. Supplied with subtree
2155  * and place where to store result. returns ptr to result.
2156  */
2157 #define XC(n) ((n) >> 8)
2158
2159 static var *evaluate(node *op, var *res)
2160 {
2161 /* This procedure is recursive so we should count every byte */
2162 #define fnargs (G.evaluate__fnargs)
2163 /* seed is initialized to 1 */
2164 #define seed   (G.evaluate__seed)
2165 #define sreg   (G.evaluate__sreg)
2166
2167         node *op1;
2168         var *v1;
2169         union {
2170                 var *v;
2171                 const char *s;
2172                 double d;
2173                 int i;
2174         } L, R;
2175         uint32_t opinfo;
2176         int opn;
2177         union {
2178                 char *s;
2179                 rstream *rsm;
2180                 FILE *F;
2181                 var *v;
2182                 regex_t *re;
2183                 uint32_t info;
2184         } X;
2185
2186         if (!op)
2187                 return setvar_s(res, NULL);
2188
2189         v1 = nvalloc(2);
2190
2191         while (op) {
2192                 opinfo = op->info;
2193                 opn = (opinfo & OPNMASK);
2194                 g_lineno = op->lineno;
2195
2196                 /* execute inevitable things */
2197                 op1 = op->l.n;
2198                 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2199                 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2200                 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2201                 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2202                 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2203
2204                 switch (XC(opinfo & OPCLSMASK)) {
2205
2206                 /* -- iterative node type -- */
2207
2208                 /* test pattern */
2209                 case XC( OC_TEST ):
2210                         if ((op1->info & OPCLSMASK) == OC_COMMA) {
2211                                 /* it's range pattern */
2212                                 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2213                                         op->info |= OF_CHECKED;
2214                                         if (ptest(op1->r.n))
2215                                                 op->info &= ~OF_CHECKED;
2216
2217                                         op = op->a.n;
2218                                 } else {
2219                                         op = op->r.n;
2220                                 }
2221                         } else {
2222                                 op = (ptest(op1)) ? op->a.n : op->r.n;
2223                         }
2224                         break;
2225
2226                 /* just evaluate an expression, also used as unconditional jump */
2227                 case XC( OC_EXEC ):
2228                         break;
2229
2230                 /* branch, used in if-else and various loops */
2231                 case XC( OC_BR ):
2232                         op = istrue(L.v) ? op->a.n : op->r.n;
2233                         break;
2234
2235                 /* initialize for-in loop */
2236                 case XC( OC_WALKINIT ):
2237                         hashwalk_init(L.v, iamarray(R.v));
2238                         break;
2239
2240                 /* get next array item */
2241                 case XC( OC_WALKNEXT ):
2242                         op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2243                         break;
2244
2245                 case XC( OC_PRINT ):
2246                 case XC( OC_PRINTF ):
2247                         X.F = stdout;
2248                         if (op->r.n) {
2249                                 X.rsm = newfile(R.s);
2250                                 if (!X.rsm->F) {
2251                                         if (opn == '|') {
2252                                                 X.rsm->F = popen(R.s, "w");
2253                                                 if (X.rsm->F == NULL)
2254                                                         bb_perror_msg_and_die("popen");
2255                                                 X.rsm->is_pipe = 1;
2256                                         } else {
2257                                                 X.rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2258                                         }
2259                                 }
2260                                 X.F = X.rsm->F;
2261                         }
2262
2263                         if ((opinfo & OPCLSMASK) == OC_PRINT) {
2264                                 if (!op1) {
2265                                         fputs(getvar_s(intvar[F0]), X.F);
2266                                 } else {
2267                                         while (op1) {
2268                                                 L.v = evaluate(nextarg(&op1), v1);
2269                                                 if (L.v->type & VF_NUMBER) {
2270                                                         fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2271                                                                         getvar_i(L.v), TRUE);
2272                                                         fputs(g_buf, X.F);
2273                                                 } else {
2274                                                         fputs(getvar_s(L.v), X.F);
2275                                                 }
2276
2277                                                 if (op1) fputs(getvar_s(intvar[OFS]), X.F);
2278                                         }
2279                                 }
2280                                 fputs(getvar_s(intvar[ORS]), X.F);
2281
2282                         } else {        /* OC_PRINTF */
2283                                 L.s = awk_printf(op1);
2284                                 fputs(L.s, X.F);
2285                                 free((char*)L.s);
2286                         }
2287                         fflush(X.F);
2288                         break;
2289
2290                 case XC( OC_DELETE ):
2291                         X.info = op1->info & OPCLSMASK;
2292                         if (X.info == OC_VAR) {
2293                                 R.v = op1->l.v;
2294                         } else if (X.info == OC_FNARG) {
2295                                 R.v = &fnargs[op1->l.i];
2296                         } else {
2297                                 syntax_error(EMSG_NOT_ARRAY);
2298                         }
2299
2300                         if (op1->r.n) {
2301                                 clrvar(L.v);
2302                                 L.s = getvar_s(evaluate(op1->r.n, v1));
2303                                 hash_remove(iamarray(R.v), L.s);
2304                         } else {
2305                                 clear_array(iamarray(R.v));
2306                         }
2307                         break;
2308
2309                 case XC( OC_NEWSOURCE ):
2310                         g_progname = op->l.s;
2311                         break;
2312
2313                 case XC( OC_RETURN ):
2314                         copyvar(res, L.v);
2315                         break;
2316
2317                 case XC( OC_NEXTFILE ):
2318                         nextfile = TRUE;
2319                 case XC( OC_NEXT ):
2320                         nextrec = TRUE;
2321                 case XC( OC_DONE ):
2322                         clrvar(res);
2323                         break;
2324
2325                 case XC( OC_EXIT ):
2326                         awk_exit(L.d);
2327
2328                 /* -- recursive node type -- */
2329
2330                 case XC( OC_VAR ):
2331                         L.v = op->l.v;
2332                         if (L.v == intvar[NF])
2333                                 split_f0();
2334                         goto v_cont;
2335
2336                 case XC( OC_FNARG ):
2337                         L.v = &fnargs[op->l.i];
2338  v_cont:
2339                         res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2340                         break;
2341
2342                 case XC( OC_IN ):
2343                         setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2344                         break;
2345
2346                 case XC( OC_REGEXP ):
2347                         op1 = op;
2348                         L.s = getvar_s(intvar[F0]);
2349                         goto re_cont;
2350
2351                 case XC( OC_MATCH ):
2352                         op1 = op->r.n;
2353  re_cont:
2354                         X.re = as_regex(op1, &sreg);
2355                         R.i = regexec(X.re, L.s, 0, NULL, 0);
2356                         if (X.re == &sreg) regfree(X.re);
2357                         setvar_i(res, (R.i == 0 ? 1 : 0) ^ (opn == '!' ? 1 : 0));
2358                         break;
2359
2360                 case XC( OC_MOVE ):
2361                         /* if source is a temporary string, jusk relink it to dest */
2362                         if (R.v == v1+1 && R.v->string) {
2363                                 res = setvar_p(L.v, R.v->string);
2364                                 R.v->string = NULL;
2365                         } else {
2366                                 res = copyvar(L.v, R.v);
2367                         }
2368                         break;
2369
2370                 case XC( OC_TERNARY ):
2371                         if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2372                                 syntax_error(EMSG_POSSIBLE_ERROR);
2373                         res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2374                         break;
2375
2376                 case XC( OC_FUNC ):
2377                         if (!op->r.f->body.first)
2378                                 syntax_error(EMSG_UNDEF_FUNC);
2379
2380                         X.v = R.v = nvalloc(op->r.f->nargs+1);
2381                         while (op1) {
2382                                 L.v = evaluate(nextarg(&op1), v1);
2383                                 copyvar(R.v, L.v);
2384                                 R.v->type |= VF_CHILD;
2385                                 R.v->x.parent = L.v;
2386                                 if (++R.v - X.v >= op->r.f->nargs)
2387                                         break;
2388                         }
2389
2390                         R.v = fnargs;
2391                         fnargs = X.v;
2392
2393                         L.s = g_progname;
2394                         res = evaluate(op->r.f->body.first, res);
2395                         g_progname = L.s;
2396
2397                         nvfree(fnargs);
2398                         fnargs = R.v;
2399                         break;
2400
2401                 case XC( OC_GETLINE ):
2402                 case XC( OC_PGETLINE ):
2403                         if (op1) {
2404                                 X.rsm = newfile(L.s);
2405                                 if (!X.rsm->F) {
2406                                         if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2407                                                 X.rsm->F = popen(L.s, "r");
2408                                                 X.rsm->is_pipe = TRUE;
2409                                         } else {
2410                                                 X.rsm->F = fopen_for_read(L.s);         /* not xfopen! */
2411                                         }
2412                                 }
2413                         } else {
2414                                 if (!iF) iF = next_input_file();
2415                                 X.rsm = iF;
2416                         }
2417
2418                         if (!X.rsm->F) {
2419                                 setvar_i(intvar[ERRNO], errno);
2420                                 setvar_i(res, -1);
2421                                 break;
2422                         }
2423
2424                         if (!op->r.n)
2425                                 R.v = intvar[F0];
2426
2427                         L.i = awk_getline(X.rsm, R.v);
2428                         if (L.i > 0) {
2429                                 if (!op1) {
2430                                         incvar(intvar[FNR]);
2431                                         incvar(intvar[NR]);
2432                                 }
2433                         }
2434                         setvar_i(res, L.i);
2435                         break;
2436
2437                 /* simple builtins */
2438                 case XC( OC_FBLTIN ):
2439                         switch (opn) {
2440
2441                         case F_in:
2442                                 R.d = (int)L.d;
2443                                 break;
2444
2445                         case F_rn:
2446                                 R.d = (double)rand() / (double)RAND_MAX;
2447                                 break;
2448 #if ENABLE_FEATURE_AWK_MATH
2449                         case F_co:
2450                                 R.d = cos(L.d);
2451                                 break;
2452
2453                         case F_ex:
2454                                 R.d = exp(L.d);
2455                                 break;
2456
2457                         case F_lg:
2458                                 R.d = log(L.d);
2459                                 break;
2460
2461                         case F_si:
2462                                 R.d = sin(L.d);
2463                                 break;
2464
2465                         case F_sq:
2466                                 R.d = sqrt(L.d);
2467                                 break;
2468 #else
2469                         case F_co:
2470                         case F_ex:
2471                         case F_lg:
2472                         case F_si:
2473                         case F_sq:
2474                                 syntax_error(EMSG_NO_MATH);
2475                                 break;
2476 #endif
2477                         case F_sr:
2478                                 R.d = (double)seed;
2479                                 seed = op1 ? (unsigned)L.d : (unsigned)time(NULL);
2480                                 srand(seed);
2481                                 break;
2482
2483                         case F_ti:
2484                                 R.d = time(NULL);
2485                                 break;
2486
2487                         case F_le:
2488                                 if (!op1)
2489                                         L.s = getvar_s(intvar[F0]);
2490                                 R.d = strlen(L.s);
2491                                 break;
2492
2493                         case F_sy:
2494                                 fflush(NULL);
2495                                 R.d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2496                                                 ? (system(L.s) >> 8) : 0;
2497                                 break;
2498
2499                         case F_ff:
2500                                 if (!op1)
2501                                         fflush(stdout);
2502                                 else {
2503                                         if (L.s && *L.s) {
2504                                                 X.rsm = newfile(L.s);
2505                                                 fflush(X.rsm->F);
2506                                         } else {
2507                                                 fflush(NULL);
2508                                         }
2509                                 }
2510                                 break;
2511
2512                         case F_cl:
2513                                 X.rsm = (rstream *)hash_search(fdhash, L.s);
2514                                 if (X.rsm) {
2515                                         R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
2516                                         free(X.rsm->buffer);
2517                                         hash_remove(fdhash, L.s);
2518                                 }
2519                                 if (R.i != 0)
2520                                         setvar_i(intvar[ERRNO], errno);
2521                                 R.d = (double)R.i;
2522                                 break;
2523                         }
2524                         setvar_i(res, R.d);
2525                         break;
2526
2527                 case XC( OC_BUILTIN ):
2528                         res = exec_builtin(op, res);
2529                         break;
2530
2531                 case XC( OC_SPRINTF ):
2532                         setvar_p(res, awk_printf(op1));
2533                         break;
2534
2535                 case XC( OC_UNARY ):
2536                         X.v = R.v;
2537                         L.d = R.d = getvar_i(R.v);
2538                         switch (opn) {
2539                         case 'P':
2540                                 L.d = ++R.d;
2541                                 goto r_op_change;
2542                         case 'p':
2543                                 R.d++;
2544                                 goto r_op_change;
2545                         case 'M':
2546                                 L.d = --R.d;
2547                                 goto r_op_change;
2548                         case 'm':
2549                                 R.d--;
2550                                 goto r_op_change;
2551                         case '!':
2552                                 L.d = istrue(X.v) ? 0 : 1;
2553                                 break;
2554                         case '-':
2555                                 L.d = -R.d;
2556                                 break;
2557  r_op_change:
2558                                 setvar_i(X.v, R.d);
2559                         }
2560                         setvar_i(res, L.d);
2561                         break;
2562
2563                 case XC( OC_FIELD ):
2564                         R.i = (int)getvar_i(R.v);
2565                         if (R.i == 0) {
2566                                 res = intvar[F0];
2567                         } else {
2568                                 split_f0();
2569                                 if (R.i > nfields)
2570                                         fsrealloc(R.i);
2571                                 res = &Fields[R.i - 1];
2572                         }
2573                         break;
2574
2575                 /* concatenation (" ") and index joining (",") */
2576                 case XC( OC_CONCAT ):
2577                 case XC( OC_COMMA ):
2578                         opn = strlen(L.s) + strlen(R.s) + 2;
2579                         X.s = xmalloc(opn);
2580                         strcpy(X.s, L.s);
2581                         if ((opinfo & OPCLSMASK) == OC_COMMA) {
2582                                 L.s = getvar_s(intvar[SUBSEP]);
2583                                 X.s = xrealloc(X.s, opn + strlen(L.s));
2584                                 strcat(X.s, L.s);
2585                         }
2586                         strcat(X.s, R.s);
2587                         setvar_p(res, X.s);
2588                         break;
2589
2590                 case XC( OC_LAND ):
2591                         setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2592                         break;
2593
2594                 case XC( OC_LOR ):
2595                         setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2596                         break;
2597
2598                 case XC( OC_BINARY ):
2599                 case XC( OC_REPLACE ):
2600                         R.d = getvar_i(R.v);
2601                         switch (opn) {
2602                         case '+':
2603                                 L.d += R.d;
2604                                 break;
2605                         case '-':
2606                                 L.d -= R.d;
2607                                 break;
2608                         case '*':
2609                                 L.d *= R.d;
2610                                 break;
2611                         case '/':
2612                                 if (R.d == 0) syntax_error(EMSG_DIV_BY_ZERO);
2613                                 L.d /= R.d;
2614                                 break;
2615                         case '&':
2616 #if ENABLE_FEATURE_AWK_MATH
2617                                 L.d = pow(L.d, R.d);
2618 #else
2619                                 syntax_error(EMSG_NO_MATH);
2620 #endif
2621                                 break;
2622                         case '%':
2623                                 if (R.d == 0) syntax_error(EMSG_DIV_BY_ZERO);
2624                                 L.d -= (int)(L.d / R.d) * R.d;
2625                                 break;
2626                         }
2627                         res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2628                         break;
2629
2630                 case XC( OC_COMPARE ):
2631                         if (is_numeric(L.v) && is_numeric(R.v)) {
2632                                 L.d = getvar_i(L.v) - getvar_i(R.v);
2633                         } else {
2634                                 L.s = getvar_s(L.v);
2635                                 R.s = getvar_s(R.v);
2636                                 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2637                         }
2638                         switch (opn & 0xfe) {
2639                         case 0:
2640                                 R.i = (L.d > 0);
2641                                 break;
2642                         case 2:
2643                                 R.i = (L.d >= 0);
2644                                 break;
2645                         case 4:
2646                                 R.i = (L.d == 0);
2647                                 break;
2648                         }
2649                         setvar_i(res, (opn & 0x1 ? R.i : !R.i) ? 1 : 0);
2650                         break;
2651
2652                 default:
2653                         syntax_error(EMSG_POSSIBLE_ERROR);
2654                 }
2655                 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2656                         op = op->a.n;
2657                 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2658                         break;
2659                 if (nextrec)
2660                         break;
2661         }
2662         nvfree(v1);
2663         return res;
2664 #undef fnargs
2665 #undef seed
2666 #undef sreg
2667 }
2668
2669
2670 /* -------- main & co. -------- */
2671
2672 static int awk_exit(int r)
2673 {
2674         var tv;
2675         unsigned i;
2676         hash_item *hi;
2677
2678         zero_out_var(&tv);
2679
2680         if (!exiting) {
2681                 exiting = TRUE;
2682                 nextrec = FALSE;
2683                 evaluate(endseq.first, &tv);
2684         }
2685
2686         /* waiting for children */
2687         for (i = 0; i < fdhash->csize; i++) {
2688                 hi = fdhash->items[i];
2689                 while (hi) {
2690                         if (hi->data.rs.F && hi->data.rs.is_pipe)
2691                                 pclose(hi->data.rs.F);
2692                         hi = hi->next;
2693                 }
2694         }
2695
2696         exit(r);
2697 }
2698
2699 /* if expr looks like "var=value", perform assignment and return 1,
2700  * otherwise return 0 */
2701 static int is_assignment(const char *expr)
2702 {
2703         char *exprc, *s, *s0, *s1;
2704
2705         exprc = xstrdup(expr);
2706         if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2707                 free(exprc);
2708                 return FALSE;
2709         }
2710
2711         *(s++) = '\0';
2712         s0 = s1 = s;
2713         while (*s)
2714                 *(s1++) = nextchar(&s);
2715
2716         *s1 = '\0';
2717         setvar_u(newvar(exprc), s0);
2718         free(exprc);
2719         return TRUE;
2720 }
2721
2722 /* switch to next input file */
2723 static rstream *next_input_file(void)
2724 {
2725 #define rsm          (G.next_input_file__rsm)
2726 #define files_happen (G.next_input_file__files_happen)
2727
2728         FILE *F = NULL;
2729         const char *fname, *ind;
2730
2731         if (rsm.F) fclose(rsm.F);
2732         rsm.F = NULL;
2733         rsm.pos = rsm.adv = 0;
2734
2735         do {
2736                 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
2737                         if (files_happen)
2738                                 return NULL;
2739                         fname = "-";
2740                         F = stdin;
2741                 } else {
2742                         ind = getvar_s(incvar(intvar[ARGIND]));
2743                         fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
2744                         if (fname && *fname && !is_assignment(fname))
2745                                 F = xfopen_stdin(fname);
2746                 }
2747         } while (!F);
2748
2749         files_happen = TRUE;
2750         setvar_s(intvar[FILENAME], fname);
2751         rsm.F = F;
2752         return &rsm;
2753 #undef rsm
2754 #undef files_happen
2755 }
2756
2757 int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
2758 int awk_main(int argc, char **argv)
2759 {
2760         unsigned opt;
2761         char *opt_F, *opt_W;
2762         llist_t *list_v = NULL;
2763         llist_t *list_f = NULL;
2764         int i, j;
2765         var *v;
2766         var tv;
2767         char **envp;
2768         char *vnames = (char *)vNames; /* cheat */
2769         char *vvalues = (char *)vValues;
2770
2771         INIT_G();
2772
2773         /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
2774          * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
2775         if (ENABLE_LOCALE_SUPPORT)
2776                 setlocale(LC_NUMERIC, "C");
2777
2778         zero_out_var(&tv);
2779
2780         /* allocate global buffer */
2781         g_buf = xmalloc(MAXVARFMT + 1);
2782
2783         vhash = hash_init();
2784         ahash = hash_init();
2785         fdhash = hash_init();
2786         fnhash = hash_init();
2787
2788         /* initialize variables */
2789         for (i = 0; *vnames; i++) {
2790                 intvar[i] = v = newvar(nextword(&vnames));
2791                 if (*vvalues != '\377')
2792                         setvar_s(v, nextword(&vvalues));
2793                 else
2794                         setvar_i(v, 0);
2795
2796                 if (*vnames == '*') {
2797                         v->type |= VF_SPECIAL;
2798                         vnames++;
2799                 }
2800         }
2801
2802         handle_special(intvar[FS]);
2803         handle_special(intvar[RS]);
2804
2805         newfile("/dev/stdin")->F = stdin;
2806         newfile("/dev/stdout")->F = stdout;
2807         newfile("/dev/stderr")->F = stderr;
2808
2809         /* Huh, people report that sometimes environ is NULL. Oh well. */
2810         if (environ) for (envp = environ; *envp; envp++) {
2811                 /* environ is writable, thus we don't strdup it needlessly */
2812                 char *s = *envp;
2813                 char *s1 = strchr(s, '=');
2814                 if (s1) {
2815                         *s1 = '\0';
2816                         /* Both findvar and setvar_u take const char*
2817                          * as 2nd arg -> environment is not trashed */
2818                         setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
2819                         *s1 = '=';
2820                 }
2821         }
2822         opt_complementary = "v::f::"; /* -v and -f can occur multiple times */
2823         opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, &opt_W);
2824         argv += optind;
2825         argc -= optind;
2826         if (opt & 0x1)
2827                 setvar_s(intvar[FS], opt_F); // -F
2828         while (list_v) { /* -v */
2829                 if (!is_assignment(llist_pop(&list_v)))
2830                         bb_show_usage();
2831         }
2832         if (list_f) { /* -f */
2833                 do {
2834                         char *s = NULL;
2835                         FILE *from_file;
2836
2837                         g_progname = llist_pop(&list_f);
2838                         from_file = xfopen_stdin(g_progname);
2839                         /* one byte is reserved for some trick in next_token */
2840                         for (i = j = 1; j > 0; i += j) {
2841                                 s = xrealloc(s, i + 4096);
2842                                 j = fread(s + i, 1, 4094, from_file);
2843                         }
2844                         s[i] = '\0';
2845                         fclose(from_file);
2846                         parse_program(s + 1);
2847                         free(s);
2848                 } while (list_f);
2849         } else { // no -f: take program from 1st parameter
2850                 if (!argc)
2851                         bb_show_usage();
2852                 g_progname = "cmd. line";
2853                 parse_program(*argv++);
2854                 argc--;
2855         }
2856         if (opt & 0x8) // -W
2857                 bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
2858
2859         /* fill in ARGV array */
2860         setvar_i(intvar[ARGC], argc + 1);
2861         setari_u(intvar[ARGV], 0, "awk");
2862         i = 0;
2863         while (*argv)
2864                 setari_u(intvar[ARGV], ++i, *argv++);
2865
2866         evaluate(beginseq.first, &tv);
2867         if (!mainseq.first && !endseq.first)
2868                 awk_exit(EXIT_SUCCESS);
2869
2870         /* input file could already be opened in BEGIN block */
2871         if (!iF) iF = next_input_file();
2872
2873         /* passing through input files */
2874         while (iF) {
2875                 nextfile = FALSE;
2876                 setvar_i(intvar[FNR], 0);
2877
2878                 while ((i = awk_getline(iF, intvar[F0])) > 0) {
2879                         nextrec = FALSE;
2880                         incvar(intvar[NR]);
2881                         incvar(intvar[FNR]);
2882                         evaluate(mainseq.first, &tv);
2883
2884                         if (nextfile)
2885                                 break;
2886                 }
2887
2888                 if (i < 0)
2889                         syntax_error(strerror(errno));
2890
2891                 iF = next_input_file();
2892         }
2893
2894         awk_exit(EXIT_SUCCESS);
2895         /*return 0;*/
2896 }