libbb: [x]fopen_for_{read,write} introduced and used.
[oweals/busybox.git] / editors / awk.c
1 /* vi: set sw=4 ts=4: */
2 /*
3  * awk implementation for busybox
4  *
5  * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
6  *
7  * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
8  */
9
10 #include "libbb.h"
11 #include "xregex.h"
12 #include <math.h>
13
14 /* This is a NOEXEC applet. Be very careful! */
15
16
17 #define MAXVARFMT       240
18 #define MINNVBLOCK      64
19
20 /* variable flags */
21 #define VF_NUMBER       0x0001  /* 1 = primary type is number */
22 #define VF_ARRAY        0x0002  /* 1 = it's an array */
23
24 #define VF_CACHED       0x0100  /* 1 = num/str value has cached str/num eq */
25 #define VF_USER         0x0200  /* 1 = user input (may be numeric string) */
26 #define VF_SPECIAL      0x0400  /* 1 = requires extra handling when changed */
27 #define VF_WALK         0x0800  /* 1 = variable has alloc'd x.walker list */
28 #define VF_FSTR         0x1000  /* 1 = var::string points to fstring buffer */
29 #define VF_CHILD        0x2000  /* 1 = function arg; x.parent points to source */
30 #define VF_DIRTY        0x4000  /* 1 = variable was set explicitly */
31
32 /* these flags are static, don't change them when value is changed */
33 #define VF_DONTTOUCH    (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
34
35 /* Variable */
36 typedef struct var_s {
37         unsigned type;            /* flags */
38         double number;
39         char *string;
40         union {
41                 int aidx;               /* func arg idx (for compilation stage) */
42                 struct xhash_s *array;  /* array ptr */
43                 struct var_s *parent;   /* for func args, ptr to actual parameter */
44                 char **walker;          /* list of array elements (for..in) */
45         } x;
46 } var;
47
48 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
49 typedef struct chain_s {
50         struct node_s *first;
51         struct node_s *last;
52         const char *programname;
53 } chain;
54
55 /* Function */
56 typedef struct func_s {
57         unsigned nargs;
58         struct chain_s body;
59 } func;
60
61 /* I/O stream */
62 typedef struct rstream_s {
63         FILE *F;
64         char *buffer;
65         int adv;
66         int size;
67         int pos;
68         smallint is_pipe;
69 } rstream;
70
71 typedef struct hash_item_s {
72         union {
73                 struct var_s v;         /* variable/array hash */
74                 struct rstream_s rs;    /* redirect streams hash */
75                 struct func_s f;        /* functions hash */
76         } data;
77         struct hash_item_s *next;       /* next in chain */
78         char name[1];                   /* really it's longer */
79 } hash_item;
80
81 typedef struct xhash_s {
82         unsigned nel;           /* num of elements */
83         unsigned csize;         /* current hash size */
84         unsigned nprime;        /* next hash size in PRIMES[] */
85         unsigned glen;          /* summary length of item names */
86         struct hash_item_s **items;
87 } xhash;
88
89 /* Tree node */
90 typedef struct node_s {
91         uint32_t info;
92         unsigned lineno;
93         union {
94                 struct node_s *n;
95                 var *v;
96                 int i;
97                 char *s;
98                 regex_t *re;
99         } l;
100         union {
101                 struct node_s *n;
102                 regex_t *ire;
103                 func *f;
104                 int argno;
105         } r;
106         union {
107                 struct node_s *n;
108         } a;
109 } node;
110
111 /* Block of temporary variables */
112 typedef struct nvblock_s {
113         int size;
114         var *pos;
115         struct nvblock_s *prev;
116         struct nvblock_s *next;
117         var nv[0];
118 } nvblock;
119
120 typedef struct tsplitter_s {
121         node n;
122         regex_t re[2];
123 } tsplitter;
124
125 /* simple token classes */
126 /* Order and hex values are very important!!!  See next_token() */
127 #define TC_SEQSTART      1                              /* ( */
128 #define TC_SEQTERM      (1 << 1)                /* ) */
129 #define TC_REGEXP       (1 << 2)                /* /.../ */
130 #define TC_OUTRDR       (1 << 3)                /* | > >> */
131 #define TC_UOPPOST      (1 << 4)                /* unary postfix operator */
132 #define TC_UOPPRE1      (1 << 5)                /* unary prefix operator */
133 #define TC_BINOPX       (1 << 6)                /* two-opnd operator */
134 #define TC_IN           (1 << 7)
135 #define TC_COMMA        (1 << 8)
136 #define TC_PIPE         (1 << 9)                /* input redirection pipe */
137 #define TC_UOPPRE2      (1 << 10)               /* unary prefix operator */
138 #define TC_ARRTERM      (1 << 11)               /* ] */
139 #define TC_GRPSTART     (1 << 12)               /* { */
140 #define TC_GRPTERM      (1 << 13)               /* } */
141 #define TC_SEMICOL      (1 << 14)
142 #define TC_NEWLINE      (1 << 15)
143 #define TC_STATX        (1 << 16)               /* ctl statement (for, next...) */
144 #define TC_WHILE        (1 << 17)
145 #define TC_ELSE         (1 << 18)
146 #define TC_BUILTIN      (1 << 19)
147 #define TC_GETLINE      (1 << 20)
148 #define TC_FUNCDECL     (1 << 21)               /* `function' `func' */
149 #define TC_BEGIN        (1 << 22)
150 #define TC_END          (1 << 23)
151 #define TC_EOF          (1 << 24)
152 #define TC_VARIABLE     (1 << 25)
153 #define TC_ARRAY        (1 << 26)
154 #define TC_FUNCTION     (1 << 27)
155 #define TC_STRING       (1 << 28)
156 #define TC_NUMBER       (1 << 29)
157
158 #define TC_UOPPRE  (TC_UOPPRE1 | TC_UOPPRE2)
159
160 /* combined token classes */
161 #define TC_BINOP   (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
162 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
163 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
164                    | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
165
166 #define TC_STATEMNT (TC_STATX | TC_WHILE)
167 #define TC_OPTERM  (TC_SEMICOL | TC_NEWLINE)
168
169 /* word tokens, cannot mean something else if not expected */
170 #define TC_WORD    (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
171                    | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
172
173 /* discard newlines after these */
174 #define TC_NOTERM  (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
175                    | TC_BINOP | TC_OPTERM)
176
177 /* what can expression begin with */
178 #define TC_OPSEQ   (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
179 /* what can group begin with */
180 #define TC_GRPSEQ  (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
181
182 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
183 /* operator is inserted between them */
184 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
185                    | TC_STRING | TC_NUMBER | TC_UOPPOST)
186 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
187
188 #define OF_RES1    0x010000
189 #define OF_RES2    0x020000
190 #define OF_STR1    0x040000
191 #define OF_STR2    0x080000
192 #define OF_NUM1    0x100000
193 #define OF_CHECKED 0x200000
194
195 /* combined operator flags */
196 #define xx      0
197 #define xV      OF_RES2
198 #define xS      (OF_RES2 | OF_STR2)
199 #define Vx      OF_RES1
200 #define VV      (OF_RES1 | OF_RES2)
201 #define Nx      (OF_RES1 | OF_NUM1)
202 #define NV      (OF_RES1 | OF_NUM1 | OF_RES2)
203 #define Sx      (OF_RES1 | OF_STR1)
204 #define SV      (OF_RES1 | OF_STR1 | OF_RES2)
205 #define SS      (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
206
207 #define OPCLSMASK 0xFF00
208 #define OPNMASK   0x007F
209
210 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
211  * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
212  * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
213  */
214 #define P(x)      (x << 24)
215 #define PRIMASK   0x7F000000
216 #define PRIMASK2  0x7E000000
217
218 /* Operation classes */
219
220 #define SHIFT_TIL_THIS  0x0600
221 #define RECUR_FROM_THIS 0x1000
222
223 enum {
224         OC_DELETE = 0x0100,     OC_EXEC = 0x0200,       OC_NEWSOURCE = 0x0300,
225         OC_PRINT = 0x0400,      OC_PRINTF = 0x0500,     OC_WALKINIT = 0x0600,
226
227         OC_BR = 0x0700,         OC_BREAK = 0x0800,      OC_CONTINUE = 0x0900,
228         OC_EXIT = 0x0a00,       OC_NEXT = 0x0b00,       OC_NEXTFILE = 0x0c00,
229         OC_TEST = 0x0d00,       OC_WALKNEXT = 0x0e00,
230
231         OC_BINARY = 0x1000,     OC_BUILTIN = 0x1100,    OC_COLON = 0x1200,
232         OC_COMMA = 0x1300,      OC_COMPARE = 0x1400,    OC_CONCAT = 0x1500,
233         OC_FBLTIN = 0x1600,     OC_FIELD = 0x1700,      OC_FNARG = 0x1800,
234         OC_FUNC = 0x1900,       OC_GETLINE = 0x1a00,    OC_IN = 0x1b00,
235         OC_LAND = 0x1c00,       OC_LOR = 0x1d00,        OC_MATCH = 0x1e00,
236         OC_MOVE = 0x1f00,       OC_PGETLINE = 0x2000,   OC_REGEXP = 0x2100,
237         OC_REPLACE = 0x2200,    OC_RETURN = 0x2300,     OC_SPRINTF = 0x2400,
238         OC_TERNARY = 0x2500,    OC_UNARY = 0x2600,      OC_VAR = 0x2700,
239         OC_DONE = 0x2800,
240
241         ST_IF = 0x3000,         ST_DO = 0x3100,         ST_FOR = 0x3200,
242         ST_WHILE = 0x3300
243 };
244
245 /* simple builtins */
246 enum {
247         F_in,   F_rn,   F_co,   F_ex,   F_lg,   F_si,   F_sq,   F_sr,
248         F_ti,   F_le,   F_sy,   F_ff,   F_cl
249 };
250
251 /* builtins */
252 enum {
253         B_a2,   B_ix,   B_ma,   B_sp,   B_ss,   B_ti,   B_lo,   B_up,
254         B_ge,   B_gs,   B_su,
255         B_an,   B_co,   B_ls,   B_or,   B_rs,   B_xo,
256 };
257
258 /* tokens and their corresponding info values */
259
260 #define NTC     "\377"  /* switch to next token class (tc<<1) */
261 #define NTCC    '\377'
262
263 #define OC_B    OC_BUILTIN
264
265 static const char tokenlist[] ALIGN1 =
266         "\1("       NTC
267         "\1)"       NTC
268         "\1/"       NTC                                 /* REGEXP */
269         "\2>>"      "\1>"       "\1|"       NTC         /* OUTRDR */
270         "\2++"      "\2--"      NTC                     /* UOPPOST */
271         "\2++"      "\2--"      "\1$"       NTC         /* UOPPRE1 */
272         "\2=="      "\1="       "\2+="      "\2-="      /* BINOPX */
273         "\2*="      "\2/="      "\2%="      "\2^="
274         "\1+"       "\1-"       "\3**="     "\2**"
275         "\1/"       "\1%"       "\1^"       "\1*"
276         "\2!="      "\2>="      "\2<="      "\1>"
277         "\1<"       "\2!~"      "\1~"       "\2&&"
278         "\2||"      "\1?"       "\1:"       NTC
279         "\2in"      NTC
280         "\1,"       NTC
281         "\1|"       NTC
282         "\1+"       "\1-"       "\1!"       NTC         /* UOPPRE2 */
283         "\1]"       NTC
284         "\1{"       NTC
285         "\1}"       NTC
286         "\1;"       NTC
287         "\1\n"      NTC
288         "\2if"      "\2do"      "\3for"     "\5break"   /* STATX */
289         "\10continue"           "\6delete"  "\5print"
290         "\6printf"  "\4next"    "\10nextfile"
291         "\6return"  "\4exit"    NTC
292         "\5while"   NTC
293         "\4else"    NTC
294
295         "\3and"     "\5compl"   "\6lshift"  "\2or"
296         "\6rshift"  "\3xor"
297         "\5close"   "\6system"  "\6fflush"  "\5atan2"   /* BUILTIN */
298         "\3cos"     "\3exp"     "\3int"     "\3log"
299         "\4rand"    "\3sin"     "\4sqrt"    "\5srand"
300         "\6gensub"  "\4gsub"    "\5index"   "\6length"
301         "\5match"   "\5split"   "\7sprintf" "\3sub"
302         "\6substr"  "\7systime" "\10strftime"
303         "\7tolower" "\7toupper" NTC
304         "\7getline" NTC
305         "\4func"    "\10function"   NTC
306         "\5BEGIN"   NTC
307         "\3END"     "\0"
308         ;
309
310 static const uint32_t tokeninfo[] = {
311         0,
312         0,
313         OC_REGEXP,
314         xS|'a',     xS|'w',     xS|'|',
315         OC_UNARY|xV|P(9)|'p',       OC_UNARY|xV|P(9)|'m',
316         OC_UNARY|xV|P(9)|'P',       OC_UNARY|xV|P(9)|'M',
317             OC_FIELD|xV|P(5),
318         OC_COMPARE|VV|P(39)|5,      OC_MOVE|VV|P(74),
319             OC_REPLACE|NV|P(74)|'+',    OC_REPLACE|NV|P(74)|'-',
320         OC_REPLACE|NV|P(74)|'*',    OC_REPLACE|NV|P(74)|'/',
321             OC_REPLACE|NV|P(74)|'%',    OC_REPLACE|NV|P(74)|'&',
322         OC_BINARY|NV|P(29)|'+',     OC_BINARY|NV|P(29)|'-',
323             OC_REPLACE|NV|P(74)|'&',    OC_BINARY|NV|P(15)|'&',
324         OC_BINARY|NV|P(25)|'/',     OC_BINARY|NV|P(25)|'%',
325             OC_BINARY|NV|P(15)|'&',     OC_BINARY|NV|P(25)|'*',
326         OC_COMPARE|VV|P(39)|4,      OC_COMPARE|VV|P(39)|3,
327             OC_COMPARE|VV|P(39)|0,      OC_COMPARE|VV|P(39)|1,
328         OC_COMPARE|VV|P(39)|2,      OC_MATCH|Sx|P(45)|'!',
329             OC_MATCH|Sx|P(45)|'~',      OC_LAND|Vx|P(55),
330         OC_LOR|Vx|P(59),            OC_TERNARY|Vx|P(64)|'?',
331             OC_COLON|xx|P(67)|':',
332         OC_IN|SV|P(49),
333         OC_COMMA|SS|P(80),
334         OC_PGETLINE|SV|P(37),
335         OC_UNARY|xV|P(19)|'+',      OC_UNARY|xV|P(19)|'-',
336             OC_UNARY|xV|P(19)|'!',
337         0,
338         0,
339         0,
340         0,
341         0,
342         ST_IF,          ST_DO,          ST_FOR,         OC_BREAK,
343         OC_CONTINUE,                    OC_DELETE|Vx,   OC_PRINT,
344         OC_PRINTF,      OC_NEXT,        OC_NEXTFILE,
345         OC_RETURN|Vx,   OC_EXIT|Nx,
346         ST_WHILE,
347         0,
348
349         OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
350         OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
351         OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
352         OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
353         OC_FBLTIN|F_rn,    OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
354         OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
355         OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF,        OC_B|B_su|P(0xb6),
356         OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti,    OC_B|B_ti|P(0x0b),
357         OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
358         OC_GETLINE|SV|P(0),
359         0,      0,
360         0,
361         0
362 };
363
364 /* internal variable names and their initial values       */
365 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
366 enum {
367         CONVFMT,    OFMT,       FS,         OFS,
368         ORS,        RS,         RT,         FILENAME,
369         SUBSEP,     ARGIND,     ARGC,       ARGV,
370         ERRNO,      FNR,
371         NR,         NF,         IGNORECASE,
372         ENVIRON,    F0,         NUM_INTERNAL_VARS
373 };
374
375 static const char vNames[] ALIGN1 =
376         "CONVFMT\0" "OFMT\0"    "FS\0*"     "OFS\0"
377         "ORS\0"     "RS\0*"     "RT\0"      "FILENAME\0"
378         "SUBSEP\0"  "ARGIND\0"  "ARGC\0"    "ARGV\0"
379         "ERRNO\0"   "FNR\0"
380         "NR\0"      "NF\0*"     "IGNORECASE\0*"
381         "ENVIRON\0" "$\0*"      "\0";
382
383 static const char vValues[] ALIGN1 =
384         "%.6g\0"    "%.6g\0"    " \0"       " \0"
385         "\n\0"      "\n\0"      "\0"        "\0"
386         "\034\0"
387         "\377";
388
389 /* hash size may grow to these values */
390 #define FIRST_PRIME 61
391 static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
392
393
394 /* Globals. Split in two parts so that first one is addressed
395  * with (mostly short) negative offsets */
396 struct globals {
397         chain beginseq, mainseq, endseq;
398         chain *seq;
399         node *break_ptr, *continue_ptr;
400         rstream *iF;
401         xhash *vhash, *ahash, *fdhash, *fnhash;
402         const char *g_progname;
403         int g_lineno;
404         int nfields;
405         int maxfields; /* used in fsrealloc() only */
406         var *Fields;
407         nvblock *g_cb;
408         char *g_pos;
409         char *g_buf;
410         smallint icase;
411         smallint exiting;
412         smallint nextrec;
413         smallint nextfile;
414         smallint is_f0_split;
415 };
416 struct globals2 {
417         uint32_t t_info; /* often used */
418         uint32_t t_tclass;
419         char *t_string;
420         int t_lineno;
421         int t_rollback;
422
423         var *intvar[NUM_INTERNAL_VARS]; /* often used */
424
425         /* former statics from various functions */
426         char *split_f0__fstrings;
427
428         uint32_t next_token__save_tclass;
429         uint32_t next_token__save_info;
430         uint32_t next_token__ltclass;
431         smallint next_token__concat_inserted;
432
433         smallint next_input_file__files_happen;
434         rstream next_input_file__rsm;
435
436         var *evaluate__fnargs;
437         unsigned evaluate__seed;
438         regex_t evaluate__sreg;
439
440         var ptest__v;
441
442         tsplitter exec_builtin__tspl;
443
444         /* biggest and least used members go last */
445         double t_double;
446         tsplitter fsplitter, rsplitter;
447 };
448 #define G1 (ptr_to_globals[-1])
449 #define G (*(struct globals2 *)ptr_to_globals)
450 /* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
451 /* char G1size[sizeof(G1)]; - 0x6c */
452 /* char Gsize[sizeof(G)]; - 0x1cc */
453 /* Trying to keep most of members accessible with short offsets: */
454 /* char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
455 #define beginseq     (G1.beginseq    )
456 #define mainseq      (G1.mainseq     )
457 #define endseq       (G1.endseq      )
458 #define seq          (G1.seq         )
459 #define break_ptr    (G1.break_ptr   )
460 #define continue_ptr (G1.continue_ptr)
461 #define iF           (G1.iF          )
462 #define vhash        (G1.vhash       )
463 #define ahash        (G1.ahash       )
464 #define fdhash       (G1.fdhash      )
465 #define fnhash       (G1.fnhash      )
466 #define g_progname   (G1.g_progname  )
467 #define g_lineno     (G1.g_lineno    )
468 #define nfields      (G1.nfields     )
469 #define maxfields    (G1.maxfields   )
470 #define Fields       (G1.Fields      )
471 #define g_cb         (G1.g_cb        )
472 #define g_pos        (G1.g_pos       )
473 #define g_buf        (G1.g_buf       )
474 #define icase        (G1.icase       )
475 #define exiting      (G1.exiting     )
476 #define nextrec      (G1.nextrec     )
477 #define nextfile     (G1.nextfile    )
478 #define is_f0_split  (G1.is_f0_split )
479 #define t_info       (G.t_info      )
480 #define t_tclass     (G.t_tclass    )
481 #define t_string     (G.t_string    )
482 #define t_double     (G.t_double    )
483 #define t_lineno     (G.t_lineno    )
484 #define t_rollback   (G.t_rollback  )
485 #define intvar       (G.intvar      )
486 #define fsplitter    (G.fsplitter   )
487 #define rsplitter    (G.rsplitter   )
488 #define INIT_G() do { \
489         SET_PTR_TO_GLOBALS(xzalloc(sizeof(G1) + sizeof(G)) + sizeof(G1)); \
490         G.next_token__ltclass = TC_OPTERM; \
491         G.evaluate__seed = 1; \
492 } while (0)
493
494
495 /* function prototypes */
496 static void handle_special(var *);
497 static node *parse_expr(uint32_t);
498 static void chain_group(void);
499 static var *evaluate(node *, var *);
500 static rstream *next_input_file(void);
501 static int fmt_num(char *, int, const char *, double, int);
502 static int awk_exit(int) NORETURN;
503
504 /* ---- error handling ---- */
505
506 static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
507 static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
508 static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
509 static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
510 static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
511 static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin";
512 static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
513 static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
514 static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
515 #if !ENABLE_FEATURE_AWK_MATH
516 static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
517 #endif
518
519 static void zero_out_var(var * vp)
520 {
521         memset(vp, 0, sizeof(*vp));
522 }
523
524 static void syntax_error(const char *const message) NORETURN;
525 static void syntax_error(const char *const message)
526 {
527         bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
528 }
529
530 /* ---- hash stuff ---- */
531
532 static unsigned hashidx(const char *name)
533 {
534         unsigned idx = 0;
535
536         while (*name) idx = *name++ + (idx << 6) - idx;
537         return idx;
538 }
539
540 /* create new hash */
541 static xhash *hash_init(void)
542 {
543         xhash *newhash;
544
545         newhash = xzalloc(sizeof(xhash));
546         newhash->csize = FIRST_PRIME;
547         newhash->items = xzalloc(newhash->csize * sizeof(hash_item *));
548
549         return newhash;
550 }
551
552 /* find item in hash, return ptr to data, NULL if not found */
553 static void *hash_search(xhash *hash, const char *name)
554 {
555         hash_item *hi;
556
557         hi = hash->items [ hashidx(name) % hash->csize ];
558         while (hi) {
559                 if (strcmp(hi->name, name) == 0)
560                         return &(hi->data);
561                 hi = hi->next;
562         }
563         return NULL;
564 }
565
566 /* grow hash if it becomes too big */
567 static void hash_rebuild(xhash *hash)
568 {
569         unsigned newsize, i, idx;
570         hash_item **newitems, *hi, *thi;
571
572         if (hash->nprime == ARRAY_SIZE(PRIMES))
573                 return;
574
575         newsize = PRIMES[hash->nprime++];
576         newitems = xzalloc(newsize * sizeof(hash_item *));
577
578         for (i = 0; i < hash->csize; i++) {
579                 hi = hash->items[i];
580                 while (hi) {
581                         thi = hi;
582                         hi = thi->next;
583                         idx = hashidx(thi->name) % newsize;
584                         thi->next = newitems[idx];
585                         newitems[idx] = thi;
586                 }
587         }
588
589         free(hash->items);
590         hash->csize = newsize;
591         hash->items = newitems;
592 }
593
594 /* find item in hash, add it if necessary. Return ptr to data */
595 static void *hash_find(xhash *hash, const char *name)
596 {
597         hash_item *hi;
598         unsigned idx;
599         int l;
600
601         hi = hash_search(hash, name);
602         if (!hi) {
603                 if (++hash->nel / hash->csize > 10)
604                         hash_rebuild(hash);
605
606                 l = strlen(name) + 1;
607                 hi = xzalloc(sizeof(hash_item) + l);
608                 memcpy(hi->name, name, l);
609
610                 idx = hashidx(name) % hash->csize;
611                 hi->next = hash->items[idx];
612                 hash->items[idx] = hi;
613                 hash->glen += l;
614         }
615         return &(hi->data);
616 }
617
618 #define findvar(hash, name) ((var*)    hash_find((hash), (name)))
619 #define newvar(name)        ((var*)    hash_find(vhash, (name)))
620 #define newfile(name)       ((rstream*)hash_find(fdhash, (name)))
621 #define newfunc(name)       ((func*)   hash_find(fnhash, (name)))
622
623 static void hash_remove(xhash *hash, const char *name)
624 {
625         hash_item *hi, **phi;
626
627         phi = &(hash->items[hashidx(name) % hash->csize]);
628         while (*phi) {
629                 hi = *phi;
630                 if (strcmp(hi->name, name) == 0) {
631                         hash->glen -= (strlen(name) + 1);
632                         hash->nel--;
633                         *phi = hi->next;
634                         free(hi);
635                         break;
636                 }
637                 phi = &(hi->next);
638         }
639 }
640
641 /* ------ some useful functions ------ */
642
643 static void skip_spaces(char **s)
644 {
645         char *p = *s;
646
647         while (1) {
648                 if (*p == '\\' && p[1] == '\n') {
649                         p++;
650                         t_lineno++;
651                 } else if (*p != ' ' && *p != '\t') {
652                         break;
653                 }
654                 p++;
655         }
656         *s = p;
657 }
658
659 static char *nextword(char **s)
660 {
661         char *p = *s;
662
663         while (*(*s)++) /* */;
664
665         return p;
666 }
667
668 static char nextchar(char **s)
669 {
670         char c, *pps;
671
672         c = *((*s)++);
673         pps = *s;
674         if (c == '\\') c = bb_process_escape_sequence((const char**)s);
675         if (c == '\\' && *s == pps) c = *((*s)++);
676         return c;
677 }
678
679 static ALWAYS_INLINE int isalnum_(int c)
680 {
681         return (isalnum(c) || c == '_');
682 }
683
684 /* -------- working with variables (set/get/copy/etc) -------- */
685
686 static xhash *iamarray(var *v)
687 {
688         var *a = v;
689
690         while (a->type & VF_CHILD)
691                 a = a->x.parent;
692
693         if (!(a->type & VF_ARRAY)) {
694                 a->type |= VF_ARRAY;
695                 a->x.array = hash_init();
696         }
697         return a->x.array;
698 }
699
700 static void clear_array(xhash *array)
701 {
702         unsigned i;
703         hash_item *hi, *thi;
704
705         for (i = 0; i < array->csize; i++) {
706                 hi = array->items[i];
707                 while (hi) {
708                         thi = hi;
709                         hi = hi->next;
710                         free(thi->data.v.string);
711                         free(thi);
712                 }
713                 array->items[i] = NULL;
714         }
715         array->glen = array->nel = 0;
716 }
717
718 /* clear a variable */
719 static var *clrvar(var *v)
720 {
721         if (!(v->type & VF_FSTR))
722                 free(v->string);
723
724         v->type &= VF_DONTTOUCH;
725         v->type |= VF_DIRTY;
726         v->string = NULL;
727         return v;
728 }
729
730 /* assign string value to variable */
731 static var *setvar_p(var *v, char *value)
732 {
733         clrvar(v);
734         v->string = value;
735         handle_special(v);
736         return v;
737 }
738
739 /* same as setvar_p but make a copy of string */
740 static var *setvar_s(var *v, const char *value)
741 {
742         return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
743 }
744
745 /* same as setvar_s but set USER flag */
746 static var *setvar_u(var *v, const char *value)
747 {
748         setvar_s(v, value);
749         v->type |= VF_USER;
750         return v;
751 }
752
753 /* set array element to user string */
754 static void setari_u(var *a, int idx, const char *s)
755 {
756         char sidx[sizeof(int)*3 + 1];
757         var *v;
758
759         sprintf(sidx, "%d", idx);
760         v = findvar(iamarray(a), sidx);
761         setvar_u(v, s);
762 }
763
764 /* assign numeric value to variable */
765 static var *setvar_i(var *v, double value)
766 {
767         clrvar(v);
768         v->type |= VF_NUMBER;
769         v->number = value;
770         handle_special(v);
771         return v;
772 }
773
774 static const char *getvar_s(var *v)
775 {
776         /* if v is numeric and has no cached string, convert it to string */
777         if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
778                 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
779                 v->string = xstrdup(g_buf);
780                 v->type |= VF_CACHED;
781         }
782         return (v->string == NULL) ? "" : v->string;
783 }
784
785 static double getvar_i(var *v)
786 {
787         char *s;
788
789         if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
790                 v->number = 0;
791                 s = v->string;
792                 if (s && *s) {
793                         v->number = strtod(s, &s);
794                         if (v->type & VF_USER) {
795                                 skip_spaces(&s);
796                                 if (*s != '\0')
797                                         v->type &= ~VF_USER;
798                         }
799                 } else {
800                         v->type &= ~VF_USER;
801                 }
802                 v->type |= VF_CACHED;
803         }
804         return v->number;
805 }
806
807 static var *copyvar(var *dest, const var *src)
808 {
809         if (dest != src) {
810                 clrvar(dest);
811                 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
812                 dest->number = src->number;
813                 if (src->string)
814                         dest->string = xstrdup(src->string);
815         }
816         handle_special(dest);
817         return dest;
818 }
819
820 static var *incvar(var *v)
821 {
822         return setvar_i(v, getvar_i(v) + 1.);
823 }
824
825 /* return true if v is number or numeric string */
826 static int is_numeric(var *v)
827 {
828         getvar_i(v);
829         return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
830 }
831
832 /* return 1 when value of v corresponds to true, 0 otherwise */
833 static int istrue(var *v)
834 {
835         if (is_numeric(v))
836                 return (v->number == 0) ? 0 : 1;
837         return (v->string && *(v->string)) ? 1 : 0;
838 }
839
840 /* temporary variables allocator. Last allocated should be first freed */
841 static var *nvalloc(int n)
842 {
843         nvblock *pb = NULL;
844         var *v, *r;
845         int size;
846
847         while (g_cb) {
848                 pb = g_cb;
849                 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size) break;
850                 g_cb = g_cb->next;
851         }
852
853         if (!g_cb) {
854                 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
855                 g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
856                 g_cb->size = size;
857                 g_cb->pos = g_cb->nv;
858                 g_cb->prev = pb;
859                 /*g_cb->next = NULL; - xzalloc did it */
860                 if (pb) pb->next = g_cb;
861         }
862
863         v = r = g_cb->pos;
864         g_cb->pos += n;
865
866         while (v < g_cb->pos) {
867                 v->type = 0;
868                 v->string = NULL;
869                 v++;
870         }
871
872         return r;
873 }
874
875 static void nvfree(var *v)
876 {
877         var *p;
878
879         if (v < g_cb->nv || v >= g_cb->pos)
880                 syntax_error(EMSG_INTERNAL_ERROR);
881
882         for (p = v; p < g_cb->pos; p++) {
883                 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
884                         clear_array(iamarray(p));
885                         free(p->x.array->items);
886                         free(p->x.array);
887                 }
888                 if (p->type & VF_WALK)
889                         free(p->x.walker);
890
891                 clrvar(p);
892         }
893
894         g_cb->pos = v;
895         while (g_cb->prev && g_cb->pos == g_cb->nv) {
896                 g_cb = g_cb->prev;
897         }
898 }
899
900 /* ------- awk program text parsing ------- */
901
902 /* Parse next token pointed by global pos, place results into global ttt.
903  * If token isn't expected, give away. Return token class
904  */
905 static uint32_t next_token(uint32_t expected)
906 {
907 #define concat_inserted (G.next_token__concat_inserted)
908 #define save_tclass     (G.next_token__save_tclass)
909 #define save_info       (G.next_token__save_info)
910 /* Initialized to TC_OPTERM: */
911 #define ltclass         (G.next_token__ltclass)
912
913         char *p, *pp, *s;
914         const char *tl;
915         uint32_t tc;
916         const uint32_t *ti;
917         int l;
918
919         if (t_rollback) {
920                 t_rollback = FALSE;
921
922         } else if (concat_inserted) {
923                 concat_inserted = FALSE;
924                 t_tclass = save_tclass;
925                 t_info = save_info;
926
927         } else {
928                 p = g_pos;
929  readnext:
930                 skip_spaces(&p);
931                 g_lineno = t_lineno;
932                 if (*p == '#')
933                         while (*p != '\n' && *p != '\0')
934                                 p++;
935
936                 if (*p == '\n')
937                         t_lineno++;
938
939                 if (*p == '\0') {
940                         tc = TC_EOF;
941
942                 } else if (*p == '\"') {
943                         /* it's a string */
944                         t_string = s = ++p;
945                         while (*p != '\"') {
946                                 if (*p == '\0' || *p == '\n')
947                                         syntax_error(EMSG_UNEXP_EOS);
948                                 *(s++) = nextchar(&p);
949                         }
950                         p++;
951                         *s = '\0';
952                         tc = TC_STRING;
953
954                 } else if ((expected & TC_REGEXP) && *p == '/') {
955                         /* it's regexp */
956                         t_string = s = ++p;
957                         while (*p != '/') {
958                                 if (*p == '\0' || *p == '\n')
959                                         syntax_error(EMSG_UNEXP_EOS);
960                                 *s = *p++;
961                                 if (*s++ == '\\') {
962                                         pp = p;
963                                         *(s-1) = bb_process_escape_sequence((const char **)&p);
964                                         if (*pp == '\\')
965                                                 *s++ = '\\';
966                                         if (p == pp)
967                                                 *s++ = *p++;
968                                 }
969                         }
970                         p++;
971                         *s = '\0';
972                         tc = TC_REGEXP;
973
974                 } else if (*p == '.' || isdigit(*p)) {
975                         /* it's a number */
976                         t_double = strtod(p, &p);
977                         if (*p == '.')
978                                 syntax_error(EMSG_UNEXP_TOKEN);
979                         tc = TC_NUMBER;
980
981                 } else {
982                         /* search for something known */
983                         tl = tokenlist;
984                         tc = 0x00000001;
985                         ti = tokeninfo;
986                         while (*tl) {
987                                 l = *(tl++);
988                                 if (l == NTCC) {
989                                         tc <<= 1;
990                                         continue;
991                                 }
992                                 /* if token class is expected, token
993                                  * matches and it's not a longer word,
994                                  * then this is what we are looking for
995                                  */
996                                 if ((tc & (expected | TC_WORD | TC_NEWLINE))
997                                  && *tl == *p && strncmp(p, tl, l) == 0
998                                  && !((tc & TC_WORD) && isalnum_(p[l]))
999                                 ) {
1000                                         t_info = *ti;
1001                                         p += l;
1002                                         break;
1003                                 }
1004                                 ti++;
1005                                 tl += l;
1006                         }
1007
1008                         if (!*tl) {
1009                                 /* it's a name (var/array/function),
1010                                  * otherwise it's something wrong
1011                                  */
1012                                 if (!isalnum_(*p))
1013                                         syntax_error(EMSG_UNEXP_TOKEN);
1014
1015                                 t_string = --p;
1016                                 while (isalnum_(*(++p))) {
1017                                         *(p-1) = *p;
1018                                 }
1019                                 *(p-1) = '\0';
1020                                 tc = TC_VARIABLE;
1021                                 /* also consume whitespace between functionname and bracket */
1022                                 if (!(expected & TC_VARIABLE))
1023                                         skip_spaces(&p);
1024                                 if (*p == '(') {
1025                                         tc = TC_FUNCTION;
1026                                 } else {
1027                                         if (*p == '[') {
1028                                                 p++;
1029                                                 tc = TC_ARRAY;
1030                                         }
1031                                 }
1032                         }
1033                 }
1034                 g_pos = p;
1035
1036                 /* skipping newlines in some cases */
1037                 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1038                         goto readnext;
1039
1040                 /* insert concatenation operator when needed */
1041                 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1042                         concat_inserted = TRUE;
1043                         save_tclass = tc;
1044                         save_info = t_info;
1045                         tc = TC_BINOP;
1046                         t_info = OC_CONCAT | SS | P(35);
1047                 }
1048
1049                 t_tclass = tc;
1050         }
1051         ltclass = t_tclass;
1052
1053         /* Are we ready for this? */
1054         if (!(ltclass & expected))
1055                 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1056                                 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1057
1058         return ltclass;
1059 #undef concat_inserted
1060 #undef save_tclass
1061 #undef save_info
1062 #undef ltclass
1063 }
1064
1065 static void rollback_token(void)
1066 {
1067         t_rollback = TRUE;
1068 }
1069
1070 static node *new_node(uint32_t info)
1071 {
1072         node *n;
1073
1074         n = xzalloc(sizeof(node));
1075         n->info = info;
1076         n->lineno = g_lineno;
1077         return n;
1078 }
1079
1080 static node *mk_re_node(const char *s, node *n, regex_t *re)
1081 {
1082         n->info = OC_REGEXP;
1083         n->l.re = re;
1084         n->r.ire = re + 1;
1085         xregcomp(re, s, REG_EXTENDED);
1086         xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1087
1088         return n;
1089 }
1090
1091 static node *condition(void)
1092 {
1093         next_token(TC_SEQSTART);
1094         return parse_expr(TC_SEQTERM);
1095 }
1096
1097 /* parse expression terminated by given argument, return ptr
1098  * to built subtree. Terminator is eaten by parse_expr */
1099 static node *parse_expr(uint32_t iexp)
1100 {
1101         node sn;
1102         node *cn = &sn;
1103         node *vn, *glptr;
1104         uint32_t tc, xtc;
1105         var *v;
1106
1107         sn.info = PRIMASK;
1108         sn.r.n = glptr = NULL;
1109         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1110
1111         while (!((tc = next_token(xtc)) & iexp)) {
1112                 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1113                         /* input redirection (<) attached to glptr node */
1114                         cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1115                         cn->a.n = glptr;
1116                         xtc = TC_OPERAND | TC_UOPPRE;
1117                         glptr = NULL;
1118
1119                 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1120                         /* for binary and postfix-unary operators, jump back over
1121                          * previous operators with higher priority */
1122                         vn = cn;
1123                         while ( ((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1124                          || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON)) )
1125                                 vn = vn->a.n;
1126                         if ((t_info & OPCLSMASK) == OC_TERNARY)
1127                                 t_info += P(6);
1128                         cn = vn->a.n->r.n = new_node(t_info);
1129                         cn->a.n = vn->a.n;
1130                         if (tc & TC_BINOP) {
1131                                 cn->l.n = vn;
1132                                 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1133                                 if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1134                                         /* it's a pipe */
1135                                         next_token(TC_GETLINE);
1136                                         /* give maximum priority to this pipe */
1137                                         cn->info &= ~PRIMASK;
1138                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1139                                 }
1140                         } else {
1141                                 cn->r.n = vn;
1142                                 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1143                         }
1144                         vn->a.n = cn;
1145
1146                 } else {
1147                         /* for operands and prefix-unary operators, attach them
1148                          * to last node */
1149                         vn = cn;
1150                         cn = vn->r.n = new_node(t_info);
1151                         cn->a.n = vn;
1152                         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1153                         if (tc & (TC_OPERAND | TC_REGEXP)) {
1154                                 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1155                                 /* one should be very careful with switch on tclass -
1156                                  * only simple tclasses should be used! */
1157                                 switch (tc) {
1158                                 case TC_VARIABLE:
1159                                 case TC_ARRAY:
1160                                         cn->info = OC_VAR;
1161                                         v = hash_search(ahash, t_string);
1162                                         if (v != NULL) {
1163                                                 cn->info = OC_FNARG;
1164                                                 cn->l.i = v->x.aidx;
1165                                         } else {
1166                                                 cn->l.v = newvar(t_string);
1167                                         }
1168                                         if (tc & TC_ARRAY) {
1169                                                 cn->info |= xS;
1170                                                 cn->r.n = parse_expr(TC_ARRTERM);
1171                                         }
1172                                         break;
1173
1174                                 case TC_NUMBER:
1175                                 case TC_STRING:
1176                                         cn->info = OC_VAR;
1177                                         v = cn->l.v = xzalloc(sizeof(var));
1178                                         if (tc & TC_NUMBER)
1179                                                 setvar_i(v, t_double);
1180                                         else
1181                                                 setvar_s(v, t_string);
1182                                         break;
1183
1184                                 case TC_REGEXP:
1185                                         mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1186                                         break;
1187
1188                                 case TC_FUNCTION:
1189                                         cn->info = OC_FUNC;
1190                                         cn->r.f = newfunc(t_string);
1191                                         cn->l.n = condition();
1192                                         break;
1193
1194                                 case TC_SEQSTART:
1195                                         cn = vn->r.n = parse_expr(TC_SEQTERM);
1196                                         cn->a.n = vn;
1197                                         break;
1198
1199                                 case TC_GETLINE:
1200                                         glptr = cn;
1201                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1202                                         break;
1203
1204                                 case TC_BUILTIN:
1205                                         cn->l.n = condition();
1206                                         break;
1207                                 }
1208                         }
1209                 }
1210         }
1211         return sn.r.n;
1212 }
1213
1214 /* add node to chain. Return ptr to alloc'd node */
1215 static node *chain_node(uint32_t info)
1216 {
1217         node *n;
1218
1219         if (!seq->first)
1220                 seq->first = seq->last = new_node(0);
1221
1222         if (seq->programname != g_progname) {
1223                 seq->programname = g_progname;
1224                 n = chain_node(OC_NEWSOURCE);
1225                 n->l.s = xstrdup(g_progname);
1226         }
1227
1228         n = seq->last;
1229         n->info = info;
1230         seq->last = n->a.n = new_node(OC_DONE);
1231
1232         return n;
1233 }
1234
1235 static void chain_expr(uint32_t info)
1236 {
1237         node *n;
1238
1239         n = chain_node(info);
1240         n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1241         if (t_tclass & TC_GRPTERM)
1242                 rollback_token();
1243 }
1244
1245 static node *chain_loop(node *nn)
1246 {
1247         node *n, *n2, *save_brk, *save_cont;
1248
1249         save_brk = break_ptr;
1250         save_cont = continue_ptr;
1251
1252         n = chain_node(OC_BR | Vx);
1253         continue_ptr = new_node(OC_EXEC);
1254         break_ptr = new_node(OC_EXEC);
1255         chain_group();
1256         n2 = chain_node(OC_EXEC | Vx);
1257         n2->l.n = nn;
1258         n2->a.n = n;
1259         continue_ptr->a.n = n2;
1260         break_ptr->a.n = n->r.n = seq->last;
1261
1262         continue_ptr = save_cont;
1263         break_ptr = save_brk;
1264
1265         return n;
1266 }
1267
1268 /* parse group and attach it to chain */
1269 static void chain_group(void)
1270 {
1271         uint32_t c;
1272         node *n, *n2, *n3;
1273
1274         do {
1275                 c = next_token(TC_GRPSEQ);
1276         } while (c & TC_NEWLINE);
1277
1278         if (c & TC_GRPSTART) {
1279                 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1280                         if (t_tclass & TC_NEWLINE) continue;
1281                         rollback_token();
1282                         chain_group();
1283                 }
1284         } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1285                 rollback_token();
1286                 chain_expr(OC_EXEC | Vx);
1287         } else {                                                /* TC_STATEMNT */
1288                 switch (t_info & OPCLSMASK) {
1289                 case ST_IF:
1290                         n = chain_node(OC_BR | Vx);
1291                         n->l.n = condition();
1292                         chain_group();
1293                         n2 = chain_node(OC_EXEC);
1294                         n->r.n = seq->last;
1295                         if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1296                                 chain_group();
1297                                 n2->a.n = seq->last;
1298                         } else {
1299                                 rollback_token();
1300                         }
1301                         break;
1302
1303                 case ST_WHILE:
1304                         n2 = condition();
1305                         n = chain_loop(NULL);
1306                         n->l.n = n2;
1307                         break;
1308
1309                 case ST_DO:
1310                         n2 = chain_node(OC_EXEC);
1311                         n = chain_loop(NULL);
1312                         n2->a.n = n->a.n;
1313                         next_token(TC_WHILE);
1314                         n->l.n = condition();
1315                         break;
1316
1317                 case ST_FOR:
1318                         next_token(TC_SEQSTART);
1319                         n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1320                         if (t_tclass & TC_SEQTERM) {    /* for-in */
1321                                 if ((n2->info & OPCLSMASK) != OC_IN)
1322                                         syntax_error(EMSG_UNEXP_TOKEN);
1323                                 n = chain_node(OC_WALKINIT | VV);
1324                                 n->l.n = n2->l.n;
1325                                 n->r.n = n2->r.n;
1326                                 n = chain_loop(NULL);
1327                                 n->info = OC_WALKNEXT | Vx;
1328                                 n->l.n = n2->l.n;
1329                         } else {                        /* for (;;) */
1330                                 n = chain_node(OC_EXEC | Vx);
1331                                 n->l.n = n2;
1332                                 n2 = parse_expr(TC_SEMICOL);
1333                                 n3 = parse_expr(TC_SEQTERM);
1334                                 n = chain_loop(n3);
1335                                 n->l.n = n2;
1336                                 if (!n2)
1337                                         n->info = OC_EXEC;
1338                         }
1339                         break;
1340
1341                 case OC_PRINT:
1342                 case OC_PRINTF:
1343                         n = chain_node(t_info);
1344                         n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1345                         if (t_tclass & TC_OUTRDR) {
1346                                 n->info |= t_info;
1347                                 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1348                         }
1349                         if (t_tclass & TC_GRPTERM)
1350                                 rollback_token();
1351                         break;
1352
1353                 case OC_BREAK:
1354                         n = chain_node(OC_EXEC);
1355                         n->a.n = break_ptr;
1356                         break;
1357
1358                 case OC_CONTINUE:
1359                         n = chain_node(OC_EXEC);
1360                         n->a.n = continue_ptr;
1361                         break;
1362
1363                 /* delete, next, nextfile, return, exit */
1364                 default:
1365                         chain_expr(t_info);
1366                 }
1367         }
1368 }
1369
1370 static void parse_program(char *p)
1371 {
1372         uint32_t tclass;
1373         node *cn;
1374         func *f;
1375         var *v;
1376
1377         g_pos = p;
1378         t_lineno = 1;
1379         while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1380                         TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1381
1382                 if (tclass & TC_OPTERM)
1383                         continue;
1384
1385                 seq = &mainseq;
1386                 if (tclass & TC_BEGIN) {
1387                         seq = &beginseq;
1388                         chain_group();
1389
1390                 } else if (tclass & TC_END) {
1391                         seq = &endseq;
1392                         chain_group();
1393
1394                 } else if (tclass & TC_FUNCDECL) {
1395                         next_token(TC_FUNCTION);
1396                         g_pos++;
1397                         f = newfunc(t_string);
1398                         f->body.first = NULL;
1399                         f->nargs = 0;
1400                         while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1401                                 v = findvar(ahash, t_string);
1402                                 v->x.aidx = (f->nargs)++;
1403
1404                                 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1405                                         break;
1406                         }
1407                         seq = &(f->body);
1408                         chain_group();
1409                         clear_array(ahash);
1410
1411                 } else if (tclass & TC_OPSEQ) {
1412                         rollback_token();
1413                         cn = chain_node(OC_TEST);
1414                         cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1415                         if (t_tclass & TC_GRPSTART) {
1416                                 rollback_token();
1417                                 chain_group();
1418                         } else {
1419                                 chain_node(OC_PRINT);
1420                         }
1421                         cn->r.n = mainseq.last;
1422
1423                 } else /* if (tclass & TC_GRPSTART) */ {
1424                         rollback_token();
1425                         chain_group();
1426                 }
1427         }
1428 }
1429
1430
1431 /* -------- program execution part -------- */
1432
1433 static node *mk_splitter(const char *s, tsplitter *spl)
1434 {
1435         regex_t *re, *ire;
1436         node *n;
1437
1438         re = &spl->re[0];
1439         ire = &spl->re[1];
1440         n = &spl->n;
1441         if ((n->info & OPCLSMASK) == OC_REGEXP) {
1442                 regfree(re);
1443                 regfree(ire); // TODO: nuke ire, use re+1?
1444         }
1445         if (strlen(s) > 1) {
1446                 mk_re_node(s, n, re);
1447         } else {
1448                 n->info = (uint32_t) *s;
1449         }
1450
1451         return n;
1452 }
1453
1454 /* use node as a regular expression. Supplied with node ptr and regex_t
1455  * storage space. Return ptr to regex (if result points to preg, it should
1456  * be later regfree'd manually
1457  */
1458 static regex_t *as_regex(node *op, regex_t *preg)
1459 {
1460         var *v;
1461         const char *s;
1462
1463         if ((op->info & OPCLSMASK) == OC_REGEXP) {
1464                 return icase ? op->r.ire : op->l.re;
1465         }
1466         v = nvalloc(1);
1467         s = getvar_s(evaluate(op, v));
1468         xregcomp(preg, s, icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED);
1469         nvfree(v);
1470         return preg;
1471 }
1472
1473 /* gradually increasing buffer */
1474 static void qrealloc(char **b, int n, int *size)
1475 {
1476         if (!*b || n >= *size) {
1477                 *size = n + (n>>1) + 80;
1478                 *b = xrealloc(*b, *size);
1479         }
1480 }
1481
1482 /* resize field storage space */
1483 static void fsrealloc(int size)
1484 {
1485         int i;
1486
1487         if (size >= maxfields) {
1488                 i = maxfields;
1489                 maxfields = size + 16;
1490                 Fields = xrealloc(Fields, maxfields * sizeof(var));
1491                 for (; i < maxfields; i++) {
1492                         Fields[i].type = VF_SPECIAL;
1493                         Fields[i].string = NULL;
1494                 }
1495         }
1496
1497         if (size < nfields) {
1498                 for (i = size; i < nfields; i++) {
1499                         clrvar(Fields + i);
1500                 }
1501         }
1502         nfields = size;
1503 }
1504
1505 static int awk_split(const char *s, node *spl, char **slist)
1506 {
1507         int l, n = 0;
1508         char c[4];
1509         char *s1;
1510         regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1511
1512         /* in worst case, each char would be a separate field */
1513         *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1514         strcpy(s1, s);
1515
1516         c[0] = c[1] = (char)spl->info;
1517         c[2] = c[3] = '\0';
1518         if (*getvar_s(intvar[RS]) == '\0')
1519                 c[2] = '\n';
1520
1521         if ((spl->info & OPCLSMASK) == OC_REGEXP) {  /* regex split */
1522                 if (!*s)
1523                         return n; /* "": zero fields */
1524                 n++; /* at least one field will be there */
1525                 do {
1526                         l = strcspn(s, c+2); /* len till next NUL or \n */
1527                         if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1528                          && pmatch[0].rm_so <= l
1529                         ) {
1530                                 l = pmatch[0].rm_so;
1531                                 if (pmatch[0].rm_eo == 0) {
1532                                         l++;
1533                                         pmatch[0].rm_eo++;
1534                                 }
1535                                 n++; /* we saw yet another delimiter */
1536                         } else {
1537                                 pmatch[0].rm_eo = l;
1538                                 if (s[l]) pmatch[0].rm_eo++;
1539                         }
1540                         memcpy(s1, s, l);
1541                         s1[l] = '\0';
1542                         nextword(&s1);
1543                         s += pmatch[0].rm_eo;
1544                 } while (*s);
1545                 return n;
1546         }
1547         if (c[0] == '\0') {  /* null split */
1548                 while (*s) {
1549                         *s1++ = *s++;
1550                         *s1++ = '\0';
1551                         n++;
1552                 }
1553                 return n;
1554         }
1555         if (c[0] != ' ') {  /* single-character split */
1556                 if (icase) {
1557                         c[0] = toupper(c[0]);
1558                         c[1] = tolower(c[1]);
1559                 }
1560                 if (*s1) n++;
1561                 while ((s1 = strpbrk(s1, c))) {
1562                         *s1++ = '\0';
1563                         n++;
1564                 }
1565                 return n;
1566         }
1567         /* space split */
1568         while (*s) {
1569                 s = skip_whitespace(s);
1570                 if (!*s) break;
1571                 n++;
1572                 while (*s && !isspace(*s))
1573                         *s1++ = *s++;
1574                 *s1++ = '\0';
1575         }
1576         return n;
1577 }
1578
1579 static void split_f0(void)
1580 {
1581 /* static char *fstrings; */
1582 #define fstrings (G.split_f0__fstrings)
1583
1584         int i, n;
1585         char *s;
1586
1587         if (is_f0_split)
1588                 return;
1589
1590         is_f0_split = TRUE;
1591         free(fstrings);
1592         fsrealloc(0);
1593         n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1594         fsrealloc(n);
1595         s = fstrings;
1596         for (i = 0; i < n; i++) {
1597                 Fields[i].string = nextword(&s);
1598                 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1599         }
1600
1601         /* set NF manually to avoid side effects */
1602         clrvar(intvar[NF]);
1603         intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1604         intvar[NF]->number = nfields;
1605 #undef fstrings
1606 }
1607
1608 /* perform additional actions when some internal variables changed */
1609 static void handle_special(var *v)
1610 {
1611         int n;
1612         char *b;
1613         const char *sep, *s;
1614         int sl, l, len, i, bsize;
1615
1616         if (!(v->type & VF_SPECIAL))
1617                 return;
1618
1619         if (v == intvar[NF]) {
1620                 n = (int)getvar_i(v);
1621                 fsrealloc(n);
1622
1623                 /* recalculate $0 */
1624                 sep = getvar_s(intvar[OFS]);
1625                 sl = strlen(sep);
1626                 b = NULL;
1627                 len = 0;
1628                 for (i = 0; i < n; i++) {
1629                         s = getvar_s(&Fields[i]);
1630                         l = strlen(s);
1631                         if (b) {
1632                                 memcpy(b+len, sep, sl);
1633                                 len += sl;
1634                         }
1635                         qrealloc(&b, len+l+sl, &bsize);
1636                         memcpy(b+len, s, l);
1637                         len += l;
1638                 }
1639                 if (b)
1640                         b[len] = '\0';
1641                 setvar_p(intvar[F0], b);
1642                 is_f0_split = TRUE;
1643
1644         } else if (v == intvar[F0]) {
1645                 is_f0_split = FALSE;
1646
1647         } else if (v == intvar[FS]) {
1648                 mk_splitter(getvar_s(v), &fsplitter);
1649
1650         } else if (v == intvar[RS]) {
1651                 mk_splitter(getvar_s(v), &rsplitter);
1652
1653         } else if (v == intvar[IGNORECASE]) {
1654                 icase = istrue(v);
1655
1656         } else {                                /* $n */
1657                 n = getvar_i(intvar[NF]);
1658                 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1659                 /* right here v is invalid. Just to note... */
1660         }
1661 }
1662
1663 /* step through func/builtin/etc arguments */
1664 static node *nextarg(node **pn)
1665 {
1666         node *n;
1667
1668         n = *pn;
1669         if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1670                 *pn = n->r.n;
1671                 n = n->l.n;
1672         } else {
1673                 *pn = NULL;
1674         }
1675         return n;
1676 }
1677
1678 static void hashwalk_init(var *v, xhash *array)
1679 {
1680         char **w;
1681         hash_item *hi;
1682         unsigned i;
1683
1684         if (v->type & VF_WALK)
1685                 free(v->x.walker);
1686
1687         v->type |= VF_WALK;
1688         w = v->x.walker = xzalloc(2 + 2*sizeof(char *) + array->glen);
1689         w[0] = w[1] = (char *)(w + 2);
1690         for (i = 0; i < array->csize; i++) {
1691                 hi = array->items[i];
1692                 while (hi) {
1693                         strcpy(*w, hi->name);
1694                         nextword(w);
1695                         hi = hi->next;
1696                 }
1697         }
1698 }
1699
1700 static int hashwalk_next(var *v)
1701 {
1702         char **w;
1703
1704         w = v->x.walker;
1705         if (w[1] == w[0])
1706                 return FALSE;
1707
1708         setvar_s(v, nextword(w+1));
1709         return TRUE;
1710 }
1711
1712 /* evaluate node, return 1 when result is true, 0 otherwise */
1713 static int ptest(node *pattern)
1714 {
1715         /* ptest__v is "static": to save stack space? */
1716         return istrue(evaluate(pattern, &G.ptest__v));
1717 }
1718
1719 /* read next record from stream rsm into a variable v */
1720 static int awk_getline(rstream *rsm, var *v)
1721 {
1722         char *b;
1723         regmatch_t pmatch[2];
1724         int a, p, pp=0, size;
1725         int fd, so, eo, r, rp;
1726         char c, *m, *s;
1727
1728         /* we're using our own buffer since we need access to accumulating
1729          * characters
1730          */
1731         fd = fileno(rsm->F);
1732         m = rsm->buffer;
1733         a = rsm->adv;
1734         p = rsm->pos;
1735         size = rsm->size;
1736         c = (char) rsplitter.n.info;
1737         rp = 0;
1738
1739         if (!m) qrealloc(&m, 256, &size);
1740         do {
1741                 b = m + a;
1742                 so = eo = p;
1743                 r = 1;
1744                 if (p > 0) {
1745                         if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1746                                 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1747                                                         b, 1, pmatch, 0) == 0) {
1748                                         so = pmatch[0].rm_so;
1749                                         eo = pmatch[0].rm_eo;
1750                                         if (b[eo] != '\0')
1751                                                 break;
1752                                 }
1753                         } else if (c != '\0') {
1754                                 s = strchr(b+pp, c);
1755                                 if (!s) s = memchr(b+pp, '\0', p - pp);
1756                                 if (s) {
1757                                         so = eo = s-b;
1758                                         eo++;
1759                                         break;
1760                                 }
1761                         } else {
1762                                 while (b[rp] == '\n')
1763                                         rp++;
1764                                 s = strstr(b+rp, "\n\n");
1765                                 if (s) {
1766                                         so = eo = s-b;
1767                                         while (b[eo] == '\n') eo++;
1768                                         if (b[eo] != '\0')
1769                                                 break;
1770                                 }
1771                         }
1772                 }
1773
1774                 if (a > 0) {
1775                         memmove(m, (const void *)(m+a), p+1);
1776                         b = m;
1777                         a = 0;
1778                 }
1779
1780                 qrealloc(&m, a+p+128, &size);
1781                 b = m + a;
1782                 pp = p;
1783                 p += safe_read(fd, b+p, size-p-1);
1784                 if (p < pp) {
1785                         p = 0;
1786                         r = 0;
1787                         setvar_i(intvar[ERRNO], errno);
1788                 }
1789                 b[p] = '\0';
1790
1791         } while (p > pp);
1792
1793         if (p == 0) {
1794                 r--;
1795         } else {
1796                 c = b[so]; b[so] = '\0';
1797                 setvar_s(v, b+rp);
1798                 v->type |= VF_USER;
1799                 b[so] = c;
1800                 c = b[eo]; b[eo] = '\0';
1801                 setvar_s(intvar[RT], b+so);
1802                 b[eo] = c;
1803         }
1804
1805         rsm->buffer = m;
1806         rsm->adv = a + eo;
1807         rsm->pos = p - eo;
1808         rsm->size = size;
1809
1810         return r;
1811 }
1812
1813 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1814 {
1815         int r = 0;
1816         char c;
1817         const char *s = format;
1818
1819         if (int_as_int && n == (int)n) {
1820                 r = snprintf(b, size, "%d", (int)n);
1821         } else {
1822                 do { c = *s; } while (c && *++s);
1823                 if (strchr("diouxX", c)) {
1824                         r = snprintf(b, size, format, (int)n);
1825                 } else if (strchr("eEfgG", c)) {
1826                         r = snprintf(b, size, format, n);
1827                 } else {
1828                         syntax_error(EMSG_INV_FMT);
1829                 }
1830         }
1831         return r;
1832 }
1833
1834
1835 /* formatted output into an allocated buffer, return ptr to buffer */
1836 static char *awk_printf(node *n)
1837 {
1838         char *b = NULL;
1839         char *fmt, *s, *f;
1840         const char *s1;
1841         int i, j, incr, bsize;
1842         char c, c1;
1843         var *v, *arg;
1844
1845         v = nvalloc(1);
1846         fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1847
1848         i = 0;
1849         while (*f) {
1850                 s = f;
1851                 while (*f && (*f != '%' || *(++f) == '%'))
1852                         f++;
1853                 while (*f && !isalpha(*f)) {
1854                         if (*f == '*')
1855                                 syntax_error("%*x formats are not supported");
1856                         f++;
1857                 }
1858
1859                 incr = (f - s) + MAXVARFMT;
1860                 qrealloc(&b, incr + i, &bsize);
1861                 c = *f;
1862                 if (c != '\0') f++;
1863                 c1 = *f;
1864                 *f = '\0';
1865                 arg = evaluate(nextarg(&n), v);
1866
1867                 j = i;
1868                 if (c == 'c' || !c) {
1869                         i += sprintf(b+i, s, is_numeric(arg) ?
1870                                         (char)getvar_i(arg) : *getvar_s(arg));
1871                 } else if (c == 's') {
1872                         s1 = getvar_s(arg);
1873                         qrealloc(&b, incr+i+strlen(s1), &bsize);
1874                         i += sprintf(b+i, s, s1);
1875                 } else {
1876                         i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1877                 }
1878                 *f = c1;
1879
1880                 /* if there was an error while sprintf, return value is negative */
1881                 if (i < j) i = j;
1882         }
1883
1884         b = xrealloc(b, i + 1);
1885         free(fmt);
1886         nvfree(v);
1887         b[i] = '\0';
1888         return b;
1889 }
1890
1891 /* common substitution routine
1892  * replace (nm) substring of (src) that match (n) with (repl), store
1893  * result into (dest), return number of substitutions. If nm=0, replace
1894  * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1895  * subexpression matching (\1-\9)
1896  */
1897 static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int ex)
1898 {
1899         char *ds = NULL;
1900         const char *s;
1901         const char *sp;
1902         int c, i, j, di, rl, so, eo, nbs, n, dssize;
1903         regmatch_t pmatch[10];
1904         regex_t sreg, *re;
1905
1906         re = as_regex(rn, &sreg);
1907         if (!src) src = intvar[F0];
1908         if (!dest) dest = intvar[F0];
1909
1910         i = di = 0;
1911         sp = getvar_s(src);
1912         rl = strlen(repl);
1913         while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0 : REG_NOTBOL) == 0) {
1914                 so = pmatch[0].rm_so;
1915                 eo = pmatch[0].rm_eo;
1916
1917                 qrealloc(&ds, di + eo + rl, &dssize);
1918                 memcpy(ds + di, sp, eo);
1919                 di += eo;
1920                 if (++i >= nm) {
1921                         /* replace */
1922                         di -= (eo - so);
1923                         nbs = 0;
1924                         for (s = repl; *s; s++) {
1925                                 ds[di++] = c = *s;
1926                                 if (c == '\\') {
1927                                         nbs++;
1928                                         continue;
1929                                 }
1930                                 if (c == '&' || (ex && c >= '0' && c <= '9')) {
1931                                         di -= ((nbs + 3) >> 1);
1932                                         j = 0;
1933                                         if (c != '&') {
1934                                                 j = c - '0';
1935                                                 nbs++;
1936                                         }
1937                                         if (nbs % 2) {
1938                                                 ds[di++] = c;
1939                                         } else {
1940                                                 n = pmatch[j].rm_eo - pmatch[j].rm_so;
1941                                                 qrealloc(&ds, di + rl + n, &dssize);
1942                                                 memcpy(ds + di, sp + pmatch[j].rm_so, n);
1943                                                 di += n;
1944                                         }
1945                                 }
1946                                 nbs = 0;
1947                         }
1948                 }
1949
1950                 sp += eo;
1951                 if (i == nm) break;
1952                 if (eo == so) {
1953                         ds[di] = *sp++;
1954                         if (!ds[di++]) break;
1955                 }
1956         }
1957
1958         qrealloc(&ds, di + strlen(sp), &dssize);
1959         strcpy(ds + di, sp);
1960         setvar_p(dest, ds);
1961         if (re == &sreg) regfree(re);
1962         return i;
1963 }
1964
1965 static var *exec_builtin(node *op, var *res)
1966 {
1967 #define tspl (G.exec_builtin__tspl)
1968
1969         int (*to_xxx)(int);
1970         var *tv;
1971         node *an[4];
1972         var *av[4];
1973         const char *as[4];
1974         regmatch_t pmatch[2];
1975         regex_t sreg, *re;
1976         node *spl;
1977         uint32_t isr, info;
1978         int nargs;
1979         time_t tt;
1980         char *s, *s1;
1981         int i, l, ll, n;
1982
1983         tv = nvalloc(4);
1984         isr = info = op->info;
1985         op = op->l.n;
1986
1987         av[2] = av[3] = NULL;
1988         for (i = 0; i < 4 && op; i++) {
1989                 an[i] = nextarg(&op);
1990                 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
1991                 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
1992                 isr >>= 1;
1993         }
1994
1995         nargs = i;
1996         if ((uint32_t)nargs < (info >> 30))
1997                 syntax_error(EMSG_TOO_FEW_ARGS);
1998
1999         switch (info & OPNMASK) {
2000
2001         case B_a2:
2002 #if ENABLE_FEATURE_AWK_MATH
2003                 setvar_i(res, atan2(getvar_i(av[i]), getvar_i(av[1])));
2004 #else
2005                 syntax_error(EMSG_NO_MATH);
2006 #endif
2007                 break;
2008
2009         case B_sp:
2010                 if (nargs > 2) {
2011                         spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2012                                 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2013                 } else {
2014                         spl = &fsplitter.n;
2015                 }
2016
2017                 n = awk_split(as[0], spl, &s);
2018                 s1 = s;
2019                 clear_array(iamarray(av[1]));
2020                 for (i=1; i<=n; i++)
2021                         setari_u(av[1], i, nextword(&s1));
2022                 free(s);
2023                 setvar_i(res, n);
2024                 break;
2025
2026         case B_ss:
2027                 l = strlen(as[0]);
2028                 i = getvar_i(av[1]) - 1;
2029                 if (i > l) i = l;
2030                 if (i < 0) i = 0;
2031                 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2032                 if (n < 0) n = 0;
2033                 s = xstrndup(as[0]+i, n);
2034                 setvar_p(res, s);
2035                 break;
2036
2037         case B_an:
2038                 setvar_i(res, (long)getvar_i(av[0]) & (long)getvar_i(av[1]));
2039                 break;
2040
2041         case B_co:
2042                 setvar_i(res, ~(long)getvar_i(av[0]));
2043                 break;
2044
2045         case B_ls:
2046                 setvar_i(res, (long)getvar_i(av[0]) << (long)getvar_i(av[1]));
2047                 break;
2048
2049         case B_or:
2050                 setvar_i(res, (long)getvar_i(av[0]) | (long)getvar_i(av[1]));
2051                 break;
2052
2053         case B_rs:
2054                 setvar_i(res, (long)((unsigned long)getvar_i(av[0]) >> (unsigned long)getvar_i(av[1])));
2055                 break;
2056
2057         case B_xo:
2058                 setvar_i(res, (long)getvar_i(av[0]) ^ (long)getvar_i(av[1]));
2059                 break;
2060
2061         case B_lo:
2062                 to_xxx = tolower;
2063                 goto lo_cont;
2064
2065         case B_up:
2066                 to_xxx = toupper;
2067  lo_cont:
2068                 s1 = s = xstrdup(as[0]);
2069                 while (*s1) {
2070                         *s1 = (*to_xxx)(*s1);
2071                         s1++;
2072                 }
2073                 setvar_p(res, s);
2074                 break;
2075
2076         case B_ix:
2077                 n = 0;
2078                 ll = strlen(as[1]);
2079                 l = strlen(as[0]) - ll;
2080                 if (ll > 0 && l >= 0) {
2081                         if (!icase) {
2082                                 s = strstr(as[0], as[1]);
2083                                 if (s) n = (s - as[0]) + 1;
2084                         } else {
2085                                 /* this piece of code is terribly slow and
2086                                  * really should be rewritten
2087                                  */
2088                                 for (i=0; i<=l; i++) {
2089                                         if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2090                                                 n = i+1;
2091                                                 break;
2092                                         }
2093                                 }
2094                         }
2095                 }
2096                 setvar_i(res, n);
2097                 break;
2098
2099         case B_ti:
2100                 if (nargs > 1)
2101                         tt = getvar_i(av[1]);
2102                 else
2103                         time(&tt);
2104                 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2105                 i = strftime(g_buf, MAXVARFMT,
2106                         ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2107                         localtime(&tt));
2108                 g_buf[i] = '\0';
2109                 setvar_s(res, g_buf);
2110                 break;
2111
2112         case B_ma:
2113                 re = as_regex(an[1], &sreg);
2114                 n = regexec(re, as[0], 1, pmatch, 0);
2115                 if (n == 0) {
2116                         pmatch[0].rm_so++;
2117                         pmatch[0].rm_eo++;
2118                 } else {
2119                         pmatch[0].rm_so = 0;
2120                         pmatch[0].rm_eo = -1;
2121                 }
2122                 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2123                 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2124                 setvar_i(res, pmatch[0].rm_so);
2125                 if (re == &sreg) regfree(re);
2126                 break;
2127
2128         case B_ge:
2129                 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2130                 break;
2131
2132         case B_gs:
2133                 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2134                 break;
2135
2136         case B_su:
2137                 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2138                 break;
2139         }
2140
2141         nvfree(tv);
2142         return res;
2143 #undef tspl
2144 }
2145
2146 /*
2147  * Evaluate node - the heart of the program. Supplied with subtree
2148  * and place where to store result. returns ptr to result.
2149  */
2150 #define XC(n) ((n) >> 8)
2151
2152 static var *evaluate(node *op, var *res)
2153 {
2154 /* This procedure is recursive so we should count every byte */
2155 #define fnargs (G.evaluate__fnargs)
2156 /* seed is initialized to 1 */
2157 #define seed   (G.evaluate__seed)
2158 #define sreg   (G.evaluate__sreg)
2159
2160         node *op1;
2161         var *v1;
2162         union {
2163                 var *v;
2164                 const char *s;
2165                 double d;
2166                 int i;
2167         } L, R;
2168         uint32_t opinfo;
2169         int opn;
2170         union {
2171                 char *s;
2172                 rstream *rsm;
2173                 FILE *F;
2174                 var *v;
2175                 regex_t *re;
2176                 uint32_t info;
2177         } X;
2178
2179         if (!op)
2180                 return setvar_s(res, NULL);
2181
2182         v1 = nvalloc(2);
2183
2184         while (op) {
2185                 opinfo = op->info;
2186                 opn = (opinfo & OPNMASK);
2187                 g_lineno = op->lineno;
2188
2189                 /* execute inevitable things */
2190                 op1 = op->l.n;
2191                 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2192                 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2193                 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2194                 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2195                 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2196
2197                 switch (XC(opinfo & OPCLSMASK)) {
2198
2199                 /* -- iterative node type -- */
2200
2201                 /* test pattern */
2202                 case XC( OC_TEST ):
2203                         if ((op1->info & OPCLSMASK) == OC_COMMA) {
2204                                 /* it's range pattern */
2205                                 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2206                                         op->info |= OF_CHECKED;
2207                                         if (ptest(op1->r.n))
2208                                                 op->info &= ~OF_CHECKED;
2209
2210                                         op = op->a.n;
2211                                 } else {
2212                                         op = op->r.n;
2213                                 }
2214                         } else {
2215                                 op = (ptest(op1)) ? op->a.n : op->r.n;
2216                         }
2217                         break;
2218
2219                 /* just evaluate an expression, also used as unconditional jump */
2220                 case XC( OC_EXEC ):
2221                         break;
2222
2223                 /* branch, used in if-else and various loops */
2224                 case XC( OC_BR ):
2225                         op = istrue(L.v) ? op->a.n : op->r.n;
2226                         break;
2227
2228                 /* initialize for-in loop */
2229                 case XC( OC_WALKINIT ):
2230                         hashwalk_init(L.v, iamarray(R.v));
2231                         break;
2232
2233                 /* get next array item */
2234                 case XC( OC_WALKNEXT ):
2235                         op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2236                         break;
2237
2238                 case XC( OC_PRINT ):
2239                 case XC( OC_PRINTF ):
2240                         X.F = stdout;
2241                         if (op->r.n) {
2242                                 X.rsm = newfile(R.s);
2243                                 if (!X.rsm->F) {
2244                                         if (opn == '|') {
2245                                                 X.rsm->F = popen(R.s, "w");
2246                                                 if (X.rsm->F == NULL)
2247                                                         bb_perror_msg_and_die("popen");
2248                                                 X.rsm->is_pipe = 1;
2249                                         } else {
2250                                                 X.rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2251                                         }
2252                                 }
2253                                 X.F = X.rsm->F;
2254                         }
2255
2256                         if ((opinfo & OPCLSMASK) == OC_PRINT) {
2257                                 if (!op1) {
2258                                         fputs(getvar_s(intvar[F0]), X.F);
2259                                 } else {
2260                                         while (op1) {
2261                                                 L.v = evaluate(nextarg(&op1), v1);
2262                                                 if (L.v->type & VF_NUMBER) {
2263                                                         fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2264                                                                         getvar_i(L.v), TRUE);
2265                                                         fputs(g_buf, X.F);
2266                                                 } else {
2267                                                         fputs(getvar_s(L.v), X.F);
2268                                                 }
2269
2270                                                 if (op1) fputs(getvar_s(intvar[OFS]), X.F);
2271                                         }
2272                                 }
2273                                 fputs(getvar_s(intvar[ORS]), X.F);
2274
2275                         } else {        /* OC_PRINTF */
2276                                 L.s = awk_printf(op1);
2277                                 fputs(L.s, X.F);
2278                                 free((char*)L.s);
2279                         }
2280                         fflush(X.F);
2281                         break;
2282
2283                 case XC( OC_DELETE ):
2284                         X.info = op1->info & OPCLSMASK;
2285                         if (X.info == OC_VAR) {
2286                                 R.v = op1->l.v;
2287                         } else if (X.info == OC_FNARG) {
2288                                 R.v = &fnargs[op1->l.i];
2289                         } else {
2290                                 syntax_error(EMSG_NOT_ARRAY);
2291                         }
2292
2293                         if (op1->r.n) {
2294                                 clrvar(L.v);
2295                                 L.s = getvar_s(evaluate(op1->r.n, v1));
2296                                 hash_remove(iamarray(R.v), L.s);
2297                         } else {
2298                                 clear_array(iamarray(R.v));
2299                         }
2300                         break;
2301
2302                 case XC( OC_NEWSOURCE ):
2303                         g_progname = op->l.s;
2304                         break;
2305
2306                 case XC( OC_RETURN ):
2307                         copyvar(res, L.v);
2308                         break;
2309
2310                 case XC( OC_NEXTFILE ):
2311                         nextfile = TRUE;
2312                 case XC( OC_NEXT ):
2313                         nextrec = TRUE;
2314                 case XC( OC_DONE ):
2315                         clrvar(res);
2316                         break;
2317
2318                 case XC( OC_EXIT ):
2319                         awk_exit(L.d);
2320
2321                 /* -- recursive node type -- */
2322
2323                 case XC( OC_VAR ):
2324                         L.v = op->l.v;
2325                         if (L.v == intvar[NF])
2326                                 split_f0();
2327                         goto v_cont;
2328
2329                 case XC( OC_FNARG ):
2330                         L.v = &fnargs[op->l.i];
2331  v_cont:
2332                         res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2333                         break;
2334
2335                 case XC( OC_IN ):
2336                         setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2337                         break;
2338
2339                 case XC( OC_REGEXP ):
2340                         op1 = op;
2341                         L.s = getvar_s(intvar[F0]);
2342                         goto re_cont;
2343
2344                 case XC( OC_MATCH ):
2345                         op1 = op->r.n;
2346  re_cont:
2347                         X.re = as_regex(op1, &sreg);
2348                         R.i = regexec(X.re, L.s, 0, NULL, 0);
2349                         if (X.re == &sreg) regfree(X.re);
2350                         setvar_i(res, (R.i == 0 ? 1 : 0) ^ (opn == '!' ? 1 : 0));
2351                         break;
2352
2353                 case XC( OC_MOVE ):
2354                         /* if source is a temporary string, jusk relink it to dest */
2355                         if (R.v == v1+1 && R.v->string) {
2356                                 res = setvar_p(L.v, R.v->string);
2357                                 R.v->string = NULL;
2358                         } else {
2359                                 res = copyvar(L.v, R.v);
2360                         }
2361                         break;
2362
2363                 case XC( OC_TERNARY ):
2364                         if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2365                                 syntax_error(EMSG_POSSIBLE_ERROR);
2366                         res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2367                         break;
2368
2369                 case XC( OC_FUNC ):
2370                         if (!op->r.f->body.first)
2371                                 syntax_error(EMSG_UNDEF_FUNC);
2372
2373                         X.v = R.v = nvalloc(op->r.f->nargs+1);
2374                         while (op1) {
2375                                 L.v = evaluate(nextarg(&op1), v1);
2376                                 copyvar(R.v, L.v);
2377                                 R.v->type |= VF_CHILD;
2378                                 R.v->x.parent = L.v;
2379                                 if (++R.v - X.v >= op->r.f->nargs)
2380                                         break;
2381                         }
2382
2383                         R.v = fnargs;
2384                         fnargs = X.v;
2385
2386                         L.s = g_progname;
2387                         res = evaluate(op->r.f->body.first, res);
2388                         g_progname = L.s;
2389
2390                         nvfree(fnargs);
2391                         fnargs = R.v;
2392                         break;
2393
2394                 case XC( OC_GETLINE ):
2395                 case XC( OC_PGETLINE ):
2396                         if (op1) {
2397                                 X.rsm = newfile(L.s);
2398                                 if (!X.rsm->F) {
2399                                         if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2400                                                 X.rsm->F = popen(L.s, "r");
2401                                                 X.rsm->is_pipe = TRUE;
2402                                         } else {
2403                                                 X.rsm->F = fopen_for_read(L.s);         /* not xfopen! */
2404                                         }
2405                                 }
2406                         } else {
2407                                 if (!iF) iF = next_input_file();
2408                                 X.rsm = iF;
2409                         }
2410
2411                         if (!X.rsm->F) {
2412                                 setvar_i(intvar[ERRNO], errno);
2413                                 setvar_i(res, -1);
2414                                 break;
2415                         }
2416
2417                         if (!op->r.n)
2418                                 R.v = intvar[F0];
2419
2420                         L.i = awk_getline(X.rsm, R.v);
2421                         if (L.i > 0) {
2422                                 if (!op1) {
2423                                         incvar(intvar[FNR]);
2424                                         incvar(intvar[NR]);
2425                                 }
2426                         }
2427                         setvar_i(res, L.i);
2428                         break;
2429
2430                 /* simple builtins */
2431                 case XC( OC_FBLTIN ):
2432                         switch (opn) {
2433
2434                         case F_in:
2435                                 R.d = (int)L.d;
2436                                 break;
2437
2438                         case F_rn:
2439                                 R.d = (double)rand() / (double)RAND_MAX;
2440                                 break;
2441 #if ENABLE_FEATURE_AWK_MATH
2442                         case F_co:
2443                                 R.d = cos(L.d);
2444                                 break;
2445
2446                         case F_ex:
2447                                 R.d = exp(L.d);
2448                                 break;
2449
2450                         case F_lg:
2451                                 R.d = log(L.d);
2452                                 break;
2453
2454                         case F_si:
2455                                 R.d = sin(L.d);
2456                                 break;
2457
2458                         case F_sq:
2459                                 R.d = sqrt(L.d);
2460                                 break;
2461 #else
2462                         case F_co:
2463                         case F_ex:
2464                         case F_lg:
2465                         case F_si:
2466                         case F_sq:
2467                                 syntax_error(EMSG_NO_MATH);
2468                                 break;
2469 #endif
2470                         case F_sr:
2471                                 R.d = (double)seed;
2472                                 seed = op1 ? (unsigned)L.d : (unsigned)time(NULL);
2473                                 srand(seed);
2474                                 break;
2475
2476                         case F_ti:
2477                                 R.d = time(NULL);
2478                                 break;
2479
2480                         case F_le:
2481                                 if (!op1)
2482                                         L.s = getvar_s(intvar[F0]);
2483                                 R.d = strlen(L.s);
2484                                 break;
2485
2486                         case F_sy:
2487                                 fflush(NULL);
2488                                 R.d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2489                                                 ? (system(L.s) >> 8) : 0;
2490                                 break;
2491
2492                         case F_ff:
2493                                 if (!op1)
2494                                         fflush(stdout);
2495                                 else {
2496                                         if (L.s && *L.s) {
2497                                                 X.rsm = newfile(L.s);
2498                                                 fflush(X.rsm->F);
2499                                         } else {
2500                                                 fflush(NULL);
2501                                         }
2502                                 }
2503                                 break;
2504
2505                         case F_cl:
2506                                 X.rsm = (rstream *)hash_search(fdhash, L.s);
2507                                 if (X.rsm) {
2508                                         R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
2509                                         free(X.rsm->buffer);
2510                                         hash_remove(fdhash, L.s);
2511                                 }
2512                                 if (R.i != 0)
2513                                         setvar_i(intvar[ERRNO], errno);
2514                                 R.d = (double)R.i;
2515                                 break;
2516                         }
2517                         setvar_i(res, R.d);
2518                         break;
2519
2520                 case XC( OC_BUILTIN ):
2521                         res = exec_builtin(op, res);
2522                         break;
2523
2524                 case XC( OC_SPRINTF ):
2525                         setvar_p(res, awk_printf(op1));
2526                         break;
2527
2528                 case XC( OC_UNARY ):
2529                         X.v = R.v;
2530                         L.d = R.d = getvar_i(R.v);
2531                         switch (opn) {
2532                         case 'P':
2533                                 L.d = ++R.d;
2534                                 goto r_op_change;
2535                         case 'p':
2536                                 R.d++;
2537                                 goto r_op_change;
2538                         case 'M':
2539                                 L.d = --R.d;
2540                                 goto r_op_change;
2541                         case 'm':
2542                                 R.d--;
2543                                 goto r_op_change;
2544                         case '!':
2545                                 L.d = istrue(X.v) ? 0 : 1;
2546                                 break;
2547                         case '-':
2548                                 L.d = -R.d;
2549                                 break;
2550  r_op_change:
2551                                 setvar_i(X.v, R.d);
2552                         }
2553                         setvar_i(res, L.d);
2554                         break;
2555
2556                 case XC( OC_FIELD ):
2557                         R.i = (int)getvar_i(R.v);
2558                         if (R.i == 0) {
2559                                 res = intvar[F0];
2560                         } else {
2561                                 split_f0();
2562                                 if (R.i > nfields)
2563                                         fsrealloc(R.i);
2564                                 res = &Fields[R.i - 1];
2565                         }
2566                         break;
2567
2568                 /* concatenation (" ") and index joining (",") */
2569                 case XC( OC_CONCAT ):
2570                 case XC( OC_COMMA ):
2571                         opn = strlen(L.s) + strlen(R.s) + 2;
2572                         X.s = xmalloc(opn);
2573                         strcpy(X.s, L.s);
2574                         if ((opinfo & OPCLSMASK) == OC_COMMA) {
2575                                 L.s = getvar_s(intvar[SUBSEP]);
2576                                 X.s = xrealloc(X.s, opn + strlen(L.s));
2577                                 strcat(X.s, L.s);
2578                         }
2579                         strcat(X.s, R.s);
2580                         setvar_p(res, X.s);
2581                         break;
2582
2583                 case XC( OC_LAND ):
2584                         setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2585                         break;
2586
2587                 case XC( OC_LOR ):
2588                         setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2589                         break;
2590
2591                 case XC( OC_BINARY ):
2592                 case XC( OC_REPLACE ):
2593                         R.d = getvar_i(R.v);
2594                         switch (opn) {
2595                         case '+':
2596                                 L.d += R.d;
2597                                 break;
2598                         case '-':
2599                                 L.d -= R.d;
2600                                 break;
2601                         case '*':
2602                                 L.d *= R.d;
2603                                 break;
2604                         case '/':
2605                                 if (R.d == 0) syntax_error(EMSG_DIV_BY_ZERO);
2606                                 L.d /= R.d;
2607                                 break;
2608                         case '&':
2609 #if ENABLE_FEATURE_AWK_MATH
2610                                 L.d = pow(L.d, R.d);
2611 #else
2612                                 syntax_error(EMSG_NO_MATH);
2613 #endif
2614                                 break;
2615                         case '%':
2616                                 if (R.d == 0) syntax_error(EMSG_DIV_BY_ZERO);
2617                                 L.d -= (int)(L.d / R.d) * R.d;
2618                                 break;
2619                         }
2620                         res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2621                         break;
2622
2623                 case XC( OC_COMPARE ):
2624                         if (is_numeric(L.v) && is_numeric(R.v)) {
2625                                 L.d = getvar_i(L.v) - getvar_i(R.v);
2626                         } else {
2627                                 L.s = getvar_s(L.v);
2628                                 R.s = getvar_s(R.v);
2629                                 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2630                         }
2631                         switch (opn & 0xfe) {
2632                         case 0:
2633                                 R.i = (L.d > 0);
2634                                 break;
2635                         case 2:
2636                                 R.i = (L.d >= 0);
2637                                 break;
2638                         case 4:
2639                                 R.i = (L.d == 0);
2640                                 break;
2641                         }
2642                         setvar_i(res, (opn & 0x1 ? R.i : !R.i) ? 1 : 0);
2643                         break;
2644
2645                 default:
2646                         syntax_error(EMSG_POSSIBLE_ERROR);
2647                 }
2648                 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2649                         op = op->a.n;
2650                 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2651                         break;
2652                 if (nextrec)
2653                         break;
2654         }
2655         nvfree(v1);
2656         return res;
2657 #undef fnargs
2658 #undef seed
2659 #undef sreg
2660 }
2661
2662
2663 /* -------- main & co. -------- */
2664
2665 static int awk_exit(int r)
2666 {
2667         var tv;
2668         unsigned i;
2669         hash_item *hi;
2670
2671         zero_out_var(&tv);
2672
2673         if (!exiting) {
2674                 exiting = TRUE;
2675                 nextrec = FALSE;
2676                 evaluate(endseq.first, &tv);
2677         }
2678
2679         /* waiting for children */
2680         for (i = 0; i < fdhash->csize; i++) {
2681                 hi = fdhash->items[i];
2682                 while (hi) {
2683                         if (hi->data.rs.F && hi->data.rs.is_pipe)
2684                                 pclose(hi->data.rs.F);
2685                         hi = hi->next;
2686                 }
2687         }
2688
2689         exit(r);
2690 }
2691
2692 /* if expr looks like "var=value", perform assignment and return 1,
2693  * otherwise return 0 */
2694 static int is_assignment(const char *expr)
2695 {
2696         char *exprc, *s, *s0, *s1;
2697
2698         exprc = xstrdup(expr);
2699         if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2700                 free(exprc);
2701                 return FALSE;
2702         }
2703
2704         *(s++) = '\0';
2705         s0 = s1 = s;
2706         while (*s)
2707                 *(s1++) = nextchar(&s);
2708
2709         *s1 = '\0';
2710         setvar_u(newvar(exprc), s0);
2711         free(exprc);
2712         return TRUE;
2713 }
2714
2715 /* switch to next input file */
2716 static rstream *next_input_file(void)
2717 {
2718 #define rsm          (G.next_input_file__rsm)
2719 #define files_happen (G.next_input_file__files_happen)
2720
2721         FILE *F = NULL;
2722         const char *fname, *ind;
2723
2724         if (rsm.F) fclose(rsm.F);
2725         rsm.F = NULL;
2726         rsm.pos = rsm.adv = 0;
2727
2728         do {
2729                 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
2730                         if (files_happen)
2731                                 return NULL;
2732                         fname = "-";
2733                         F = stdin;
2734                 } else {
2735                         ind = getvar_s(incvar(intvar[ARGIND]));
2736                         fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
2737                         if (fname && *fname && !is_assignment(fname))
2738                                 F = xfopen_stdin(fname);
2739                 }
2740         } while (!F);
2741
2742         files_happen = TRUE;
2743         setvar_s(intvar[FILENAME], fname);
2744         rsm.F = F;
2745         return &rsm;
2746 #undef rsm
2747 #undef files_happen
2748 }
2749
2750 int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
2751 int awk_main(int argc, char **argv)
2752 {
2753         unsigned opt;
2754         char *opt_F, *opt_W;
2755         llist_t *list_v = NULL;
2756         llist_t *list_f = NULL;
2757         int i, j;
2758         var *v;
2759         var tv;
2760         char **envp;
2761         char *vnames = (char *)vNames; /* cheat */
2762         char *vvalues = (char *)vValues;
2763
2764         INIT_G();
2765
2766         /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
2767          * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
2768         if (ENABLE_LOCALE_SUPPORT)
2769                 setlocale(LC_NUMERIC, "C");
2770
2771         zero_out_var(&tv);
2772
2773         /* allocate global buffer */
2774         g_buf = xmalloc(MAXVARFMT + 1);
2775
2776         vhash = hash_init();
2777         ahash = hash_init();
2778         fdhash = hash_init();
2779         fnhash = hash_init();
2780
2781         /* initialize variables */
2782         for (i = 0; *vnames; i++) {
2783                 intvar[i] = v = newvar(nextword(&vnames));
2784                 if (*vvalues != '\377')
2785                         setvar_s(v, nextword(&vvalues));
2786                 else
2787                         setvar_i(v, 0);
2788
2789                 if (*vnames == '*') {
2790                         v->type |= VF_SPECIAL;
2791                         vnames++;
2792                 }
2793         }
2794
2795         handle_special(intvar[FS]);
2796         handle_special(intvar[RS]);
2797
2798         newfile("/dev/stdin")->F = stdin;
2799         newfile("/dev/stdout")->F = stdout;
2800         newfile("/dev/stderr")->F = stderr;
2801
2802         /* Huh, people report that sometimes environ is NULL. Oh well. */
2803         if (environ) for (envp = environ; *envp; envp++) {
2804                 /* environ is writable, thus we don't strdup it needlessly */
2805                 char *s = *envp;
2806                 char *s1 = strchr(s, '=');
2807                 if (s1) {
2808                         *s1 = '\0';
2809                         /* Both findvar and setvar_u take const char*
2810                          * as 2nd arg -> environment is not trashed */
2811                         setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
2812                         *s1 = '=';
2813                 }
2814         }
2815         opt_complementary = "v::f::"; /* -v and -f can occur multiple times */
2816         opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, &opt_W);
2817         argv += optind;
2818         argc -= optind;
2819         if (opt & 0x1)
2820                 setvar_s(intvar[FS], opt_F); // -F
2821         while (list_v) { /* -v */
2822                 if (!is_assignment(llist_pop(&list_v)))
2823                         bb_show_usage();
2824         }
2825         if (list_f) { /* -f */
2826                 do {
2827                         char *s = NULL;
2828                         FILE *from_file;
2829
2830                         g_progname = llist_pop(&list_f);
2831                         from_file = xfopen_stdin(g_progname);
2832                         /* one byte is reserved for some trick in next_token */
2833                         for (i = j = 1; j > 0; i += j) {
2834                                 s = xrealloc(s, i + 4096);
2835                                 j = fread(s + i, 1, 4094, from_file);
2836                         }
2837                         s[i] = '\0';
2838                         fclose(from_file);
2839                         parse_program(s + 1);
2840                         free(s);
2841                 } while (list_f);
2842         } else { // no -f: take program from 1st parameter
2843                 if (!argc)
2844                         bb_show_usage();
2845                 g_progname = "cmd. line";
2846                 parse_program(*argv++);
2847                 argc--;
2848         }
2849         if (opt & 0x8) // -W
2850                 bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
2851
2852         /* fill in ARGV array */
2853         setvar_i(intvar[ARGC], argc + 1);
2854         setari_u(intvar[ARGV], 0, "awk");
2855         i = 0;
2856         while (*argv)
2857                 setari_u(intvar[ARGV], ++i, *argv++);
2858
2859         evaluate(beginseq.first, &tv);
2860         if (!mainseq.first && !endseq.first)
2861                 awk_exit(EXIT_SUCCESS);
2862
2863         /* input file could already be opened in BEGIN block */
2864         if (!iF) iF = next_input_file();
2865
2866         /* passing through input files */
2867         while (iF) {
2868                 nextfile = FALSE;
2869                 setvar_i(intvar[FNR], 0);
2870
2871                 while ((i = awk_getline(iF, intvar[F0])) > 0) {
2872                         nextrec = FALSE;
2873                         incvar(intvar[NR]);
2874                         incvar(intvar[FNR]);
2875                         evaluate(mainseq.first, &tv);
2876
2877                         if (nextfile)
2878                                 break;
2879                 }
2880
2881                 if (i < 0)
2882                         syntax_error(strerror(errno));
2883
2884                 iF = next_input_file();
2885         }
2886
2887         awk_exit(EXIT_SUCCESS);
2888         /*return 0;*/
2889 }