awk: be more paranoid when freeing loop variable; make code less obfuscated
[oweals/busybox.git] / editors / awk.c
1 /* vi: set sw=4 ts=4: */
2 /*
3  * awk implementation for busybox
4  *
5  * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
6  *
7  * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
8  */
9
10 #include "libbb.h"
11 #include "xregex.h"
12 #include <math.h>
13
14 /* This is a NOEXEC applet. Be very careful! */
15
16
17 /* If you comment out one of these below, it will be #defined later
18  * to perform debug printfs to stderr: */
19 #define debug_printf_walker(...)  do {} while (0)
20
21 #ifndef debug_printf_walker
22 # define debug_printf_walker(...) (fprintf(stderr, __VA_ARGS__))
23 #endif
24
25
26
27 #define MAXVARFMT       240
28 #define MINNVBLOCK      64
29
30 /* variable flags */
31 #define VF_NUMBER       0x0001  /* 1 = primary type is number */
32 #define VF_ARRAY        0x0002  /* 1 = it's an array */
33
34 #define VF_CACHED       0x0100  /* 1 = num/str value has cached str/num eq */
35 #define VF_USER         0x0200  /* 1 = user input (may be numeric string) */
36 #define VF_SPECIAL      0x0400  /* 1 = requires extra handling when changed */
37 #define VF_WALK         0x0800  /* 1 = variable has alloc'd x.walker list */
38 #define VF_FSTR         0x1000  /* 1 = var::string points to fstring buffer */
39 #define VF_CHILD        0x2000  /* 1 = function arg; x.parent points to source */
40 #define VF_DIRTY        0x4000  /* 1 = variable was set explicitly */
41
42 /* these flags are static, don't change them when value is changed */
43 #define VF_DONTTOUCH    (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
44
45 typedef struct walker_list {
46         char *end;
47         char *cur;
48         struct walker_list *prev;
49         char wbuf[1];
50 } walker_list;
51
52 /* Variable */
53 typedef struct var_s {
54         unsigned type;            /* flags */
55         double number;
56         char *string;
57         union {
58                 int aidx;               /* func arg idx (for compilation stage) */
59                 struct xhash_s *array;  /* array ptr */
60                 struct var_s *parent;   /* for func args, ptr to actual parameter */
61                 walker_list *walker;    /* list of array elements (for..in) */
62         } x;
63 } var;
64
65 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
66 typedef struct chain_s {
67         struct node_s *first;
68         struct node_s *last;
69         const char *programname;
70 } chain;
71
72 /* Function */
73 typedef struct func_s {
74         unsigned nargs;
75         struct chain_s body;
76 } func;
77
78 /* I/O stream */
79 typedef struct rstream_s {
80         FILE *F;
81         char *buffer;
82         int adv;
83         int size;
84         int pos;
85         smallint is_pipe;
86 } rstream;
87
88 typedef struct hash_item_s {
89         union {
90                 struct var_s v;         /* variable/array hash */
91                 struct rstream_s rs;    /* redirect streams hash */
92                 struct func_s f;        /* functions hash */
93         } data;
94         struct hash_item_s *next;       /* next in chain */
95         char name[1];                   /* really it's longer */
96 } hash_item;
97
98 typedef struct xhash_s {
99         unsigned nel;           /* num of elements */
100         unsigned csize;         /* current hash size */
101         unsigned nprime;        /* next hash size in PRIMES[] */
102         unsigned glen;          /* summary length of item names */
103         struct hash_item_s **items;
104 } xhash;
105
106 /* Tree node */
107 typedef struct node_s {
108         uint32_t info;
109         unsigned lineno;
110         union {
111                 struct node_s *n;
112                 var *v;
113                 int i;
114                 char *s;
115                 regex_t *re;
116         } l;
117         union {
118                 struct node_s *n;
119                 regex_t *ire;
120                 func *f;
121                 int argno;
122         } r;
123         union {
124                 struct node_s *n;
125         } a;
126 } node;
127
128 /* Block of temporary variables */
129 typedef struct nvblock_s {
130         int size;
131         var *pos;
132         struct nvblock_s *prev;
133         struct nvblock_s *next;
134         var nv[];
135 } nvblock;
136
137 typedef struct tsplitter_s {
138         node n;
139         regex_t re[2];
140 } tsplitter;
141
142 /* simple token classes */
143 /* Order and hex values are very important!!!  See next_token() */
144 #define TC_SEQSTART      1                              /* ( */
145 #define TC_SEQTERM      (1 << 1)                /* ) */
146 #define TC_REGEXP       (1 << 2)                /* /.../ */
147 #define TC_OUTRDR       (1 << 3)                /* | > >> */
148 #define TC_UOPPOST      (1 << 4)                /* unary postfix operator */
149 #define TC_UOPPRE1      (1 << 5)                /* unary prefix operator */
150 #define TC_BINOPX       (1 << 6)                /* two-opnd operator */
151 #define TC_IN           (1 << 7)
152 #define TC_COMMA        (1 << 8)
153 #define TC_PIPE         (1 << 9)                /* input redirection pipe */
154 #define TC_UOPPRE2      (1 << 10)               /* unary prefix operator */
155 #define TC_ARRTERM      (1 << 11)               /* ] */
156 #define TC_GRPSTART     (1 << 12)               /* { */
157 #define TC_GRPTERM      (1 << 13)               /* } */
158 #define TC_SEMICOL      (1 << 14)
159 #define TC_NEWLINE      (1 << 15)
160 #define TC_STATX        (1 << 16)               /* ctl statement (for, next...) */
161 #define TC_WHILE        (1 << 17)
162 #define TC_ELSE         (1 << 18)
163 #define TC_BUILTIN      (1 << 19)
164 #define TC_GETLINE      (1 << 20)
165 #define TC_FUNCDECL     (1 << 21)               /* `function' `func' */
166 #define TC_BEGIN        (1 << 22)
167 #define TC_END          (1 << 23)
168 #define TC_EOF          (1 << 24)
169 #define TC_VARIABLE     (1 << 25)
170 #define TC_ARRAY        (1 << 26)
171 #define TC_FUNCTION     (1 << 27)
172 #define TC_STRING       (1 << 28)
173 #define TC_NUMBER       (1 << 29)
174
175 #define TC_UOPPRE  (TC_UOPPRE1 | TC_UOPPRE2)
176
177 /* combined token classes */
178 #define TC_BINOP   (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
179 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
180 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
181                    | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
182
183 #define TC_STATEMNT (TC_STATX | TC_WHILE)
184 #define TC_OPTERM  (TC_SEMICOL | TC_NEWLINE)
185
186 /* word tokens, cannot mean something else if not expected */
187 #define TC_WORD    (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
188                    | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
189
190 /* discard newlines after these */
191 #define TC_NOTERM  (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
192                    | TC_BINOP | TC_OPTERM)
193
194 /* what can expression begin with */
195 #define TC_OPSEQ   (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
196 /* what can group begin with */
197 #define TC_GRPSEQ  (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
198
199 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
200 /* operator is inserted between them */
201 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
202                    | TC_STRING | TC_NUMBER | TC_UOPPOST)
203 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
204
205 #define OF_RES1    0x010000
206 #define OF_RES2    0x020000
207 #define OF_STR1    0x040000
208 #define OF_STR2    0x080000
209 #define OF_NUM1    0x100000
210 #define OF_CHECKED 0x200000
211
212 /* combined operator flags */
213 #define xx      0
214 #define xV      OF_RES2
215 #define xS      (OF_RES2 | OF_STR2)
216 #define Vx      OF_RES1
217 #define VV      (OF_RES1 | OF_RES2)
218 #define Nx      (OF_RES1 | OF_NUM1)
219 #define NV      (OF_RES1 | OF_NUM1 | OF_RES2)
220 #define Sx      (OF_RES1 | OF_STR1)
221 #define SV      (OF_RES1 | OF_STR1 | OF_RES2)
222 #define SS      (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
223
224 #define OPCLSMASK 0xFF00
225 #define OPNMASK   0x007F
226
227 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
228  * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
229  * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
230  */
231 #define P(x)      (x << 24)
232 #define PRIMASK   0x7F000000
233 #define PRIMASK2  0x7E000000
234
235 /* Operation classes */
236
237 #define SHIFT_TIL_THIS  0x0600
238 #define RECUR_FROM_THIS 0x1000
239
240 enum {
241         OC_DELETE = 0x0100,     OC_EXEC = 0x0200,       OC_NEWSOURCE = 0x0300,
242         OC_PRINT = 0x0400,      OC_PRINTF = 0x0500,     OC_WALKINIT = 0x0600,
243
244         OC_BR = 0x0700,         OC_BREAK = 0x0800,      OC_CONTINUE = 0x0900,
245         OC_EXIT = 0x0a00,       OC_NEXT = 0x0b00,       OC_NEXTFILE = 0x0c00,
246         OC_TEST = 0x0d00,       OC_WALKNEXT = 0x0e00,
247
248         OC_BINARY = 0x1000,     OC_BUILTIN = 0x1100,    OC_COLON = 0x1200,
249         OC_COMMA = 0x1300,      OC_COMPARE = 0x1400,    OC_CONCAT = 0x1500,
250         OC_FBLTIN = 0x1600,     OC_FIELD = 0x1700,      OC_FNARG = 0x1800,
251         OC_FUNC = 0x1900,       OC_GETLINE = 0x1a00,    OC_IN = 0x1b00,
252         OC_LAND = 0x1c00,       OC_LOR = 0x1d00,        OC_MATCH = 0x1e00,
253         OC_MOVE = 0x1f00,       OC_PGETLINE = 0x2000,   OC_REGEXP = 0x2100,
254         OC_REPLACE = 0x2200,    OC_RETURN = 0x2300,     OC_SPRINTF = 0x2400,
255         OC_TERNARY = 0x2500,    OC_UNARY = 0x2600,      OC_VAR = 0x2700,
256         OC_DONE = 0x2800,
257
258         ST_IF = 0x3000,         ST_DO = 0x3100,         ST_FOR = 0x3200,
259         ST_WHILE = 0x3300
260 };
261
262 /* simple builtins */
263 enum {
264         F_in,   F_rn,   F_co,   F_ex,   F_lg,   F_si,   F_sq,   F_sr,
265         F_ti,   F_le,   F_sy,   F_ff,   F_cl
266 };
267
268 /* builtins */
269 enum {
270         B_a2,   B_ix,   B_ma,   B_sp,   B_ss,   B_ti,   B_mt,   B_lo,   B_up,
271         B_ge,   B_gs,   B_su,
272         B_an,   B_co,   B_ls,   B_or,   B_rs,   B_xo,
273 };
274
275 /* tokens and their corresponding info values */
276
277 #define NTC     "\377"  /* switch to next token class (tc<<1) */
278 #define NTCC    '\377'
279
280 #define OC_B    OC_BUILTIN
281
282 static const char tokenlist[] ALIGN1 =
283         "\1("       NTC
284         "\1)"       NTC
285         "\1/"       NTC                                 /* REGEXP */
286         "\2>>"      "\1>"       "\1|"       NTC         /* OUTRDR */
287         "\2++"      "\2--"      NTC                     /* UOPPOST */
288         "\2++"      "\2--"      "\1$"       NTC         /* UOPPRE1 */
289         "\2=="      "\1="       "\2+="      "\2-="      /* BINOPX */
290         "\2*="      "\2/="      "\2%="      "\2^="
291         "\1+"       "\1-"       "\3**="     "\2**"
292         "\1/"       "\1%"       "\1^"       "\1*"
293         "\2!="      "\2>="      "\2<="      "\1>"
294         "\1<"       "\2!~"      "\1~"       "\2&&"
295         "\2||"      "\1?"       "\1:"       NTC
296         "\2in"      NTC
297         "\1,"       NTC
298         "\1|"       NTC
299         "\1+"       "\1-"       "\1!"       NTC         /* UOPPRE2 */
300         "\1]"       NTC
301         "\1{"       NTC
302         "\1}"       NTC
303         "\1;"       NTC
304         "\1\n"      NTC
305         "\2if"      "\2do"      "\3for"     "\5break"   /* STATX */
306         "\10continue"           "\6delete"  "\5print"
307         "\6printf"  "\4next"    "\10nextfile"
308         "\6return"  "\4exit"    NTC
309         "\5while"   NTC
310         "\4else"    NTC
311
312         "\3and"     "\5compl"   "\6lshift"  "\2or"
313         "\6rshift"  "\3xor"
314         "\5close"   "\6system"  "\6fflush"  "\5atan2"   /* BUILTIN */
315         "\3cos"     "\3exp"     "\3int"     "\3log"
316         "\4rand"    "\3sin"     "\4sqrt"    "\5srand"
317         "\6gensub"  "\4gsub"    "\5index"   "\6length"
318         "\5match"   "\5split"   "\7sprintf" "\3sub"
319         "\6substr"  "\7systime" "\10strftime" "\6mktime"
320         "\7tolower" "\7toupper" NTC
321         "\7getline" NTC
322         "\4func"    "\10function"   NTC
323         "\5BEGIN"   NTC
324         "\3END"     "\0"
325         ;
326
327 static const uint32_t tokeninfo[] = {
328         0,
329         0,
330         OC_REGEXP,
331         xS|'a',     xS|'w',     xS|'|',
332         OC_UNARY|xV|P(9)|'p',       OC_UNARY|xV|P(9)|'m',
333         OC_UNARY|xV|P(9)|'P',       OC_UNARY|xV|P(9)|'M',
334             OC_FIELD|xV|P(5),
335         OC_COMPARE|VV|P(39)|5,      OC_MOVE|VV|P(74),
336             OC_REPLACE|NV|P(74)|'+',    OC_REPLACE|NV|P(74)|'-',
337         OC_REPLACE|NV|P(74)|'*',    OC_REPLACE|NV|P(74)|'/',
338             OC_REPLACE|NV|P(74)|'%',    OC_REPLACE|NV|P(74)|'&',
339         OC_BINARY|NV|P(29)|'+',     OC_BINARY|NV|P(29)|'-',
340             OC_REPLACE|NV|P(74)|'&',    OC_BINARY|NV|P(15)|'&',
341         OC_BINARY|NV|P(25)|'/',     OC_BINARY|NV|P(25)|'%',
342             OC_BINARY|NV|P(15)|'&',     OC_BINARY|NV|P(25)|'*',
343         OC_COMPARE|VV|P(39)|4,      OC_COMPARE|VV|P(39)|3,
344             OC_COMPARE|VV|P(39)|0,      OC_COMPARE|VV|P(39)|1,
345         OC_COMPARE|VV|P(39)|2,      OC_MATCH|Sx|P(45)|'!',
346             OC_MATCH|Sx|P(45)|'~',      OC_LAND|Vx|P(55),
347         OC_LOR|Vx|P(59),            OC_TERNARY|Vx|P(64)|'?',
348             OC_COLON|xx|P(67)|':',
349         OC_IN|SV|P(49),
350         OC_COMMA|SS|P(80),
351         OC_PGETLINE|SV|P(37),
352         OC_UNARY|xV|P(19)|'+',      OC_UNARY|xV|P(19)|'-',
353             OC_UNARY|xV|P(19)|'!',
354         0,
355         0,
356         0,
357         0,
358         0,
359         ST_IF,          ST_DO,          ST_FOR,         OC_BREAK,
360         OC_CONTINUE,                    OC_DELETE|Vx,   OC_PRINT,
361         OC_PRINTF,      OC_NEXT,        OC_NEXTFILE,
362         OC_RETURN|Vx,   OC_EXIT|Nx,
363         ST_WHILE,
364         0,
365
366         OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
367         OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
368         OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
369         OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
370         OC_FBLTIN|F_rn,    OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
371         OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
372         OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF,        OC_B|B_su|P(0xb6),
373         OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti,    OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b),
374         OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
375         OC_GETLINE|SV|P(0),
376         0,      0,
377         0,
378         0
379 };
380
381 /* internal variable names and their initial values       */
382 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
383 enum {
384         CONVFMT,    OFMT,       FS,         OFS,
385         ORS,        RS,         RT,         FILENAME,
386         SUBSEP,     F0,         ARGIND,     ARGC,
387         ARGV,       ERRNO,      FNR,        NR,
388         NF,         IGNORECASE, ENVIRON,    NUM_INTERNAL_VARS
389 };
390
391 static const char vNames[] ALIGN1 =
392         "CONVFMT\0" "OFMT\0"    "FS\0*"     "OFS\0"
393         "ORS\0"     "RS\0*"     "RT\0"      "FILENAME\0"
394         "SUBSEP\0"  "$\0*"      "ARGIND\0"  "ARGC\0"
395         "ARGV\0"    "ERRNO\0"   "FNR\0"     "NR\0"
396         "NF\0*"     "IGNORECASE\0*" "ENVIRON\0" "\0";
397
398 static const char vValues[] ALIGN1 =
399         "%.6g\0"    "%.6g\0"    " \0"       " \0"
400         "\n\0"      "\n\0"      "\0"        "\0"
401         "\034\0"    "\0"        "\377";
402
403 /* hash size may grow to these values */
404 #define FIRST_PRIME 61
405 static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
406
407
408 /* Globals. Split in two parts so that first one is addressed
409  * with (mostly short) negative offsets.
410  * NB: it's unsafe to put members of type "double"
411  * into globals2 (gcc may fail to align them).
412  */
413 struct globals {
414         double t_double;
415         chain beginseq, mainseq, endseq;
416         chain *seq;
417         node *break_ptr, *continue_ptr;
418         rstream *iF;
419         xhash *vhash, *ahash, *fdhash, *fnhash;
420         const char *g_progname;
421         int g_lineno;
422         int nfields;
423         int maxfields; /* used in fsrealloc() only */
424         var *Fields;
425         nvblock *g_cb;
426         char *g_pos;
427         char *g_buf;
428         smallint icase;
429         smallint exiting;
430         smallint nextrec;
431         smallint nextfile;
432         smallint is_f0_split;
433 };
434 struct globals2 {
435         uint32_t t_info; /* often used */
436         uint32_t t_tclass;
437         char *t_string;
438         int t_lineno;
439         int t_rollback;
440
441         var *intvar[NUM_INTERNAL_VARS]; /* often used */
442
443         /* former statics from various functions */
444         char *split_f0__fstrings;
445
446         uint32_t next_token__save_tclass;
447         uint32_t next_token__save_info;
448         uint32_t next_token__ltclass;
449         smallint next_token__concat_inserted;
450
451         smallint next_input_file__files_happen;
452         rstream next_input_file__rsm;
453
454         var *evaluate__fnargs;
455         unsigned evaluate__seed;
456         regex_t evaluate__sreg;
457
458         var ptest__v;
459
460         tsplitter exec_builtin__tspl;
461
462         /* biggest and least used members go last */
463         tsplitter fsplitter, rsplitter;
464 };
465 #define G1 (ptr_to_globals[-1])
466 #define G (*(struct globals2 *)ptr_to_globals)
467 /* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
468 /*char G1size[sizeof(G1)]; - 0x74 */
469 /*char Gsize[sizeof(G)]; - 0x1c4 */
470 /* Trying to keep most of members accessible with short offsets: */
471 /*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
472 #define t_double     (G1.t_double    )
473 #define beginseq     (G1.beginseq    )
474 #define mainseq      (G1.mainseq     )
475 #define endseq       (G1.endseq      )
476 #define seq          (G1.seq         )
477 #define break_ptr    (G1.break_ptr   )
478 #define continue_ptr (G1.continue_ptr)
479 #define iF           (G1.iF          )
480 #define vhash        (G1.vhash       )
481 #define ahash        (G1.ahash       )
482 #define fdhash       (G1.fdhash      )
483 #define fnhash       (G1.fnhash      )
484 #define g_progname   (G1.g_progname  )
485 #define g_lineno     (G1.g_lineno    )
486 #define nfields      (G1.nfields     )
487 #define maxfields    (G1.maxfields   )
488 #define Fields       (G1.Fields      )
489 #define g_cb         (G1.g_cb        )
490 #define g_pos        (G1.g_pos       )
491 #define g_buf        (G1.g_buf       )
492 #define icase        (G1.icase       )
493 #define exiting      (G1.exiting     )
494 #define nextrec      (G1.nextrec     )
495 #define nextfile     (G1.nextfile    )
496 #define is_f0_split  (G1.is_f0_split )
497 #define t_info       (G.t_info      )
498 #define t_tclass     (G.t_tclass    )
499 #define t_string     (G.t_string    )
500 #define t_lineno     (G.t_lineno    )
501 #define t_rollback   (G.t_rollback  )
502 #define intvar       (G.intvar      )
503 #define fsplitter    (G.fsplitter   )
504 #define rsplitter    (G.rsplitter   )
505 #define INIT_G() do { \
506         SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
507         G.next_token__ltclass = TC_OPTERM; \
508         G.evaluate__seed = 1; \
509 } while (0)
510
511
512 /* function prototypes */
513 static void handle_special(var *);
514 static node *parse_expr(uint32_t);
515 static void chain_group(void);
516 static var *evaluate(node *, var *);
517 static rstream *next_input_file(void);
518 static int fmt_num(char *, int, const char *, double, int);
519 static int awk_exit(int) NORETURN;
520
521 /* ---- error handling ---- */
522
523 static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
524 static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
525 static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
526 static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
527 static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
528 static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin";
529 static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
530 static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
531 static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
532 #if !ENABLE_FEATURE_AWK_LIBM
533 static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
534 #endif
535
536 static void zero_out_var(var *vp)
537 {
538         memset(vp, 0, sizeof(*vp));
539 }
540
541 static void syntax_error(const char *message) NORETURN;
542 static void syntax_error(const char *message)
543 {
544         bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
545 }
546
547 /* ---- hash stuff ---- */
548
549 static unsigned hashidx(const char *name)
550 {
551         unsigned idx = 0;
552
553         while (*name)
554                 idx = *name++ + (idx << 6) - idx;
555         return idx;
556 }
557
558 /* create new hash */
559 static xhash *hash_init(void)
560 {
561         xhash *newhash;
562
563         newhash = xzalloc(sizeof(*newhash));
564         newhash->csize = FIRST_PRIME;
565         newhash->items = xzalloc(FIRST_PRIME * sizeof(newhash->items[0]));
566
567         return newhash;
568 }
569
570 /* find item in hash, return ptr to data, NULL if not found */
571 static void *hash_search(xhash *hash, const char *name)
572 {
573         hash_item *hi;
574
575         hi = hash->items[hashidx(name) % hash->csize];
576         while (hi) {
577                 if (strcmp(hi->name, name) == 0)
578                         return &(hi->data);
579                 hi = hi->next;
580         }
581         return NULL;
582 }
583
584 /* grow hash if it becomes too big */
585 static void hash_rebuild(xhash *hash)
586 {
587         unsigned newsize, i, idx;
588         hash_item **newitems, *hi, *thi;
589
590         if (hash->nprime == ARRAY_SIZE(PRIMES))
591                 return;
592
593         newsize = PRIMES[hash->nprime++];
594         newitems = xzalloc(newsize * sizeof(newitems[0]));
595
596         for (i = 0; i < hash->csize; i++) {
597                 hi = hash->items[i];
598                 while (hi) {
599                         thi = hi;
600                         hi = thi->next;
601                         idx = hashidx(thi->name) % newsize;
602                         thi->next = newitems[idx];
603                         newitems[idx] = thi;
604                 }
605         }
606
607         free(hash->items);
608         hash->csize = newsize;
609         hash->items = newitems;
610 }
611
612 /* find item in hash, add it if necessary. Return ptr to data */
613 static void *hash_find(xhash *hash, const char *name)
614 {
615         hash_item *hi;
616         unsigned idx;
617         int l;
618
619         hi = hash_search(hash, name);
620         if (!hi) {
621                 if (++hash->nel / hash->csize > 10)
622                         hash_rebuild(hash);
623
624                 l = strlen(name) + 1;
625                 hi = xzalloc(sizeof(*hi) + l);
626                 strcpy(hi->name, name);
627
628                 idx = hashidx(name) % hash->csize;
629                 hi->next = hash->items[idx];
630                 hash->items[idx] = hi;
631                 hash->glen += l;
632         }
633         return &(hi->data);
634 }
635
636 #define findvar(hash, name) ((var*)    hash_find((hash), (name)))
637 #define newvar(name)        ((var*)    hash_find(vhash, (name)))
638 #define newfile(name)       ((rstream*)hash_find(fdhash, (name)))
639 #define newfunc(name)       ((func*)   hash_find(fnhash, (name)))
640
641 static void hash_remove(xhash *hash, const char *name)
642 {
643         hash_item *hi, **phi;
644
645         phi = &(hash->items[hashidx(name) % hash->csize]);
646         while (*phi) {
647                 hi = *phi;
648                 if (strcmp(hi->name, name) == 0) {
649                         hash->glen -= (strlen(name) + 1);
650                         hash->nel--;
651                         *phi = hi->next;
652                         free(hi);
653                         break;
654                 }
655                 phi = &(hi->next);
656         }
657 }
658
659 /* ------ some useful functions ------ */
660
661 static void skip_spaces(char **s)
662 {
663         char *p = *s;
664
665         while (1) {
666                 if (*p == '\\' && p[1] == '\n') {
667                         p++;
668                         t_lineno++;
669                 } else if (*p != ' ' && *p != '\t') {
670                         break;
671                 }
672                 p++;
673         }
674         *s = p;
675 }
676
677 static char *nextword(char **s)
678 {
679         char *p = *s;
680         while (*(*s)++)
681                 continue;
682         return p;
683 }
684
685 static char nextchar(char **s)
686 {
687         char c, *pps;
688
689         c = *((*s)++);
690         pps = *s;
691         if (c == '\\')
692                 c = bb_process_escape_sequence((const char**)s);
693         if (c == '\\' && *s == pps)
694                 c = *((*s)++);
695         return c;
696 }
697
698 static ALWAYS_INLINE int isalnum_(int c)
699 {
700         return (isalnum(c) || c == '_');
701 }
702
703 static double my_strtod(char **pp)
704 {
705 #if ENABLE_DESKTOP
706         if ((*pp)[0] == '0'
707          && ((((*pp)[1] | 0x20) == 'x') || isdigit((*pp)[1]))
708         ) {
709                 return strtoull(*pp, pp, 0);
710         }
711 #endif
712         return strtod(*pp, pp);
713 }
714
715 /* -------- working with variables (set/get/copy/etc) -------- */
716
717 static xhash *iamarray(var *v)
718 {
719         var *a = v;
720
721         while (a->type & VF_CHILD)
722                 a = a->x.parent;
723
724         if (!(a->type & VF_ARRAY)) {
725                 a->type |= VF_ARRAY;
726                 a->x.array = hash_init();
727         }
728         return a->x.array;
729 }
730
731 static void clear_array(xhash *array)
732 {
733         unsigned i;
734         hash_item *hi, *thi;
735
736         for (i = 0; i < array->csize; i++) {
737                 hi = array->items[i];
738                 while (hi) {
739                         thi = hi;
740                         hi = hi->next;
741                         free(thi->data.v.string);
742                         free(thi);
743                 }
744                 array->items[i] = NULL;
745         }
746         array->glen = array->nel = 0;
747 }
748
749 /* clear a variable */
750 static var *clrvar(var *v)
751 {
752         if (!(v->type & VF_FSTR))
753                 free(v->string);
754
755         v->type &= VF_DONTTOUCH;
756         v->type |= VF_DIRTY;
757         v->string = NULL;
758         return v;
759 }
760
761 /* assign string value to variable */
762 static var *setvar_p(var *v, char *value)
763 {
764         clrvar(v);
765         v->string = value;
766         handle_special(v);
767         return v;
768 }
769
770 /* same as setvar_p but make a copy of string */
771 static var *setvar_s(var *v, const char *value)
772 {
773         return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
774 }
775
776 /* same as setvar_s but sets USER flag */
777 static var *setvar_u(var *v, const char *value)
778 {
779         v = setvar_s(v, value);
780         v->type |= VF_USER;
781         return v;
782 }
783
784 /* set array element to user string */
785 static void setari_u(var *a, int idx, const char *s)
786 {
787         var *v;
788
789         v = findvar(iamarray(a), itoa(idx));
790         setvar_u(v, s);
791 }
792
793 /* assign numeric value to variable */
794 static var *setvar_i(var *v, double value)
795 {
796         clrvar(v);
797         v->type |= VF_NUMBER;
798         v->number = value;
799         handle_special(v);
800         return v;
801 }
802
803 static const char *getvar_s(var *v)
804 {
805         /* if v is numeric and has no cached string, convert it to string */
806         if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
807                 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
808                 v->string = xstrdup(g_buf);
809                 v->type |= VF_CACHED;
810         }
811         return (v->string == NULL) ? "" : v->string;
812 }
813
814 static double getvar_i(var *v)
815 {
816         char *s;
817
818         if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
819                 v->number = 0;
820                 s = v->string;
821                 if (s && *s) {
822                         v->number = my_strtod(&s);
823                         if (v->type & VF_USER) {
824                                 skip_spaces(&s);
825                                 if (*s != '\0')
826                                         v->type &= ~VF_USER;
827                         }
828                 } else {
829                         v->type &= ~VF_USER;
830                 }
831                 v->type |= VF_CACHED;
832         }
833         return v->number;
834 }
835
836 /* Used for operands of bitwise ops */
837 static unsigned long getvar_i_int(var *v)
838 {
839         double d = getvar_i(v);
840
841         /* Casting doubles to longs is undefined for values outside
842          * of target type range. Try to widen it as much as possible */
843         if (d >= 0)
844                 return (unsigned long)d;
845         /* Why? Think about d == -4294967295.0 (assuming 32bit longs) */
846         return - (long) (unsigned long) (-d);
847 }
848
849 static var *copyvar(var *dest, const var *src)
850 {
851         if (dest != src) {
852                 clrvar(dest);
853                 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
854                 dest->number = src->number;
855                 if (src->string)
856                         dest->string = xstrdup(src->string);
857         }
858         handle_special(dest);
859         return dest;
860 }
861
862 static var *incvar(var *v)
863 {
864         return setvar_i(v, getvar_i(v) + 1.0);
865 }
866
867 /* return true if v is number or numeric string */
868 static int is_numeric(var *v)
869 {
870         getvar_i(v);
871         return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
872 }
873
874 /* return 1 when value of v corresponds to true, 0 otherwise */
875 static int istrue(var *v)
876 {
877         if (is_numeric(v))
878                 return (v->number != 0);
879         return (v->string && v->string[0]);
880 }
881
882 /* temporary variables allocator. Last allocated should be first freed */
883 static var *nvalloc(int n)
884 {
885         nvblock *pb = NULL;
886         var *v, *r;
887         int size;
888
889         while (g_cb) {
890                 pb = g_cb;
891                 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size)
892                         break;
893                 g_cb = g_cb->next;
894         }
895
896         if (!g_cb) {
897                 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
898                 g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
899                 g_cb->size = size;
900                 g_cb->pos = g_cb->nv;
901                 g_cb->prev = pb;
902                 /*g_cb->next = NULL; - xzalloc did it */
903                 if (pb)
904                         pb->next = g_cb;
905         }
906
907         v = r = g_cb->pos;
908         g_cb->pos += n;
909
910         while (v < g_cb->pos) {
911                 v->type = 0;
912                 v->string = NULL;
913                 v++;
914         }
915
916         return r;
917 }
918
919 static void nvfree(var *v)
920 {
921         var *p;
922
923         if (v < g_cb->nv || v >= g_cb->pos)
924                 syntax_error(EMSG_INTERNAL_ERROR);
925
926         for (p = v; p < g_cb->pos; p++) {
927                 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
928                         clear_array(iamarray(p));
929                         free(p->x.array->items);
930                         free(p->x.array);
931                 }
932                 if (p->type & VF_WALK) {
933                         walker_list *n;
934                         walker_list *w = p->x.walker;
935                         debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker);
936                         p->x.walker = NULL;
937                         while (w) {
938                                 n = w->prev;
939                                 debug_printf_walker(" free(%p)\n", w);
940                                 free(w);
941                                 w = n;
942                         }
943                 }
944                 clrvar(p);
945         }
946
947         g_cb->pos = v;
948         while (g_cb->prev && g_cb->pos == g_cb->nv) {
949                 g_cb = g_cb->prev;
950         }
951 }
952
953 /* ------- awk program text parsing ------- */
954
955 /* Parse next token pointed by global pos, place results into global ttt.
956  * If token isn't expected, give away. Return token class
957  */
958 static uint32_t next_token(uint32_t expected)
959 {
960 #define concat_inserted (G.next_token__concat_inserted)
961 #define save_tclass     (G.next_token__save_tclass)
962 #define save_info       (G.next_token__save_info)
963 /* Initialized to TC_OPTERM: */
964 #define ltclass         (G.next_token__ltclass)
965
966         char *p, *pp, *s;
967         const char *tl;
968         uint32_t tc;
969         const uint32_t *ti;
970         int l;
971
972         if (t_rollback) {
973                 t_rollback = FALSE;
974
975         } else if (concat_inserted) {
976                 concat_inserted = FALSE;
977                 t_tclass = save_tclass;
978                 t_info = save_info;
979
980         } else {
981                 p = g_pos;
982  readnext:
983                 skip_spaces(&p);
984                 g_lineno = t_lineno;
985                 if (*p == '#')
986                         while (*p != '\n' && *p != '\0')
987                                 p++;
988
989                 if (*p == '\n')
990                         t_lineno++;
991
992                 if (*p == '\0') {
993                         tc = TC_EOF;
994
995                 } else if (*p == '\"') {
996                         /* it's a string */
997                         t_string = s = ++p;
998                         while (*p != '\"') {
999                                 if (*p == '\0' || *p == '\n')
1000                                         syntax_error(EMSG_UNEXP_EOS);
1001                                 *(s++) = nextchar(&p);
1002                         }
1003                         p++;
1004                         *s = '\0';
1005                         tc = TC_STRING;
1006
1007                 } else if ((expected & TC_REGEXP) && *p == '/') {
1008                         /* it's regexp */
1009                         t_string = s = ++p;
1010                         while (*p != '/') {
1011                                 if (*p == '\0' || *p == '\n')
1012                                         syntax_error(EMSG_UNEXP_EOS);
1013                                 *s = *p++;
1014                                 if (*s++ == '\\') {
1015                                         pp = p;
1016                                         *(s-1) = bb_process_escape_sequence((const char **)&p);
1017                                         if (*pp == '\\')
1018                                                 *s++ = '\\';
1019                                         if (p == pp)
1020                                                 *s++ = *p++;
1021                                 }
1022                         }
1023                         p++;
1024                         *s = '\0';
1025                         tc = TC_REGEXP;
1026
1027                 } else if (*p == '.' || isdigit(*p)) {
1028                         /* it's a number */
1029                         t_double = my_strtod(&p);
1030                         if (*p == '.')
1031                                 syntax_error(EMSG_UNEXP_TOKEN);
1032                         tc = TC_NUMBER;
1033
1034                 } else {
1035                         /* search for something known */
1036                         tl = tokenlist;
1037                         tc = 0x00000001;
1038                         ti = tokeninfo;
1039                         while (*tl) {
1040                                 l = *(tl++);
1041                                 if (l == NTCC) {
1042                                         tc <<= 1;
1043                                         continue;
1044                                 }
1045                                 /* if token class is expected, token
1046                                  * matches and it's not a longer word,
1047                                  * then this is what we are looking for
1048                                  */
1049                                 if ((tc & (expected | TC_WORD | TC_NEWLINE))
1050                                  && *tl == *p && strncmp(p, tl, l) == 0
1051                                  && !((tc & TC_WORD) && isalnum_(p[l]))
1052                                 ) {
1053                                         t_info = *ti;
1054                                         p += l;
1055                                         break;
1056                                 }
1057                                 ti++;
1058                                 tl += l;
1059                         }
1060
1061                         if (!*tl) {
1062                                 /* it's a name (var/array/function),
1063                                  * otherwise it's something wrong
1064                                  */
1065                                 if (!isalnum_(*p))
1066                                         syntax_error(EMSG_UNEXP_TOKEN);
1067
1068                                 t_string = --p;
1069                                 while (isalnum_(*(++p))) {
1070                                         *(p-1) = *p;
1071                                 }
1072                                 *(p-1) = '\0';
1073                                 tc = TC_VARIABLE;
1074                                 /* also consume whitespace between functionname and bracket */
1075                                 if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY))
1076                                         skip_spaces(&p);
1077                                 if (*p == '(') {
1078                                         tc = TC_FUNCTION;
1079                                 } else {
1080                                         if (*p == '[') {
1081                                                 p++;
1082                                                 tc = TC_ARRAY;
1083                                         }
1084                                 }
1085                         }
1086                 }
1087                 g_pos = p;
1088
1089                 /* skipping newlines in some cases */
1090                 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1091                         goto readnext;
1092
1093                 /* insert concatenation operator when needed */
1094                 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1095                         concat_inserted = TRUE;
1096                         save_tclass = tc;
1097                         save_info = t_info;
1098                         tc = TC_BINOP;
1099                         t_info = OC_CONCAT | SS | P(35);
1100                 }
1101
1102                 t_tclass = tc;
1103         }
1104         ltclass = t_tclass;
1105
1106         /* Are we ready for this? */
1107         if (!(ltclass & expected))
1108                 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1109                                 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1110
1111         return ltclass;
1112 #undef concat_inserted
1113 #undef save_tclass
1114 #undef save_info
1115 #undef ltclass
1116 }
1117
1118 static void rollback_token(void)
1119 {
1120         t_rollback = TRUE;
1121 }
1122
1123 static node *new_node(uint32_t info)
1124 {
1125         node *n;
1126
1127         n = xzalloc(sizeof(node));
1128         n->info = info;
1129         n->lineno = g_lineno;
1130         return n;
1131 }
1132
1133 static node *mk_re_node(const char *s, node *n, regex_t *re)
1134 {
1135         n->info = OC_REGEXP;
1136         n->l.re = re;
1137         n->r.ire = re + 1;
1138         xregcomp(re, s, REG_EXTENDED);
1139         xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1140
1141         return n;
1142 }
1143
1144 static node *condition(void)
1145 {
1146         next_token(TC_SEQSTART);
1147         return parse_expr(TC_SEQTERM);
1148 }
1149
1150 /* parse expression terminated by given argument, return ptr
1151  * to built subtree. Terminator is eaten by parse_expr */
1152 static node *parse_expr(uint32_t iexp)
1153 {
1154         node sn;
1155         node *cn = &sn;
1156         node *vn, *glptr;
1157         uint32_t tc, xtc;
1158         var *v;
1159
1160         sn.info = PRIMASK;
1161         sn.r.n = glptr = NULL;
1162         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1163
1164         while (!((tc = next_token(xtc)) & iexp)) {
1165                 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1166                         /* input redirection (<) attached to glptr node */
1167                         cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1168                         cn->a.n = glptr;
1169                         xtc = TC_OPERAND | TC_UOPPRE;
1170                         glptr = NULL;
1171
1172                 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1173                         /* for binary and postfix-unary operators, jump back over
1174                          * previous operators with higher priority */
1175                         vn = cn;
1176                         while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1177                             || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON))
1178                         ) {
1179                                 vn = vn->a.n;
1180                         }
1181                         if ((t_info & OPCLSMASK) == OC_TERNARY)
1182                                 t_info += P(6);
1183                         cn = vn->a.n->r.n = new_node(t_info);
1184                         cn->a.n = vn->a.n;
1185                         if (tc & TC_BINOP) {
1186                                 cn->l.n = vn;
1187                                 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1188                                 if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1189                                         /* it's a pipe */
1190                                         next_token(TC_GETLINE);
1191                                         /* give maximum priority to this pipe */
1192                                         cn->info &= ~PRIMASK;
1193                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1194                                 }
1195                         } else {
1196                                 cn->r.n = vn;
1197                                 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1198                         }
1199                         vn->a.n = cn;
1200
1201                 } else {
1202                         /* for operands and prefix-unary operators, attach them
1203                          * to last node */
1204                         vn = cn;
1205                         cn = vn->r.n = new_node(t_info);
1206                         cn->a.n = vn;
1207                         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1208                         if (tc & (TC_OPERAND | TC_REGEXP)) {
1209                                 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1210                                 /* one should be very careful with switch on tclass -
1211                                  * only simple tclasses should be used! */
1212                                 switch (tc) {
1213                                 case TC_VARIABLE:
1214                                 case TC_ARRAY:
1215                                         cn->info = OC_VAR;
1216                                         v = hash_search(ahash, t_string);
1217                                         if (v != NULL) {
1218                                                 cn->info = OC_FNARG;
1219                                                 cn->l.i = v->x.aidx;
1220                                         } else {
1221                                                 cn->l.v = newvar(t_string);
1222                                         }
1223                                         if (tc & TC_ARRAY) {
1224                                                 cn->info |= xS;
1225                                                 cn->r.n = parse_expr(TC_ARRTERM);
1226                                         }
1227                                         break;
1228
1229                                 case TC_NUMBER:
1230                                 case TC_STRING:
1231                                         cn->info = OC_VAR;
1232                                         v = cn->l.v = xzalloc(sizeof(var));
1233                                         if (tc & TC_NUMBER)
1234                                                 setvar_i(v, t_double);
1235                                         else
1236                                                 setvar_s(v, t_string);
1237                                         break;
1238
1239                                 case TC_REGEXP:
1240                                         mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1241                                         break;
1242
1243                                 case TC_FUNCTION:
1244                                         cn->info = OC_FUNC;
1245                                         cn->r.f = newfunc(t_string);
1246                                         cn->l.n = condition();
1247                                         break;
1248
1249                                 case TC_SEQSTART:
1250                                         cn = vn->r.n = parse_expr(TC_SEQTERM);
1251                                         cn->a.n = vn;
1252                                         break;
1253
1254                                 case TC_GETLINE:
1255                                         glptr = cn;
1256                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1257                                         break;
1258
1259                                 case TC_BUILTIN:
1260                                         cn->l.n = condition();
1261                                         break;
1262                                 }
1263                         }
1264                 }
1265         }
1266         return sn.r.n;
1267 }
1268
1269 /* add node to chain. Return ptr to alloc'd node */
1270 static node *chain_node(uint32_t info)
1271 {
1272         node *n;
1273
1274         if (!seq->first)
1275                 seq->first = seq->last = new_node(0);
1276
1277         if (seq->programname != g_progname) {
1278                 seq->programname = g_progname;
1279                 n = chain_node(OC_NEWSOURCE);
1280                 n->l.s = xstrdup(g_progname);
1281         }
1282
1283         n = seq->last;
1284         n->info = info;
1285         seq->last = n->a.n = new_node(OC_DONE);
1286
1287         return n;
1288 }
1289
1290 static void chain_expr(uint32_t info)
1291 {
1292         node *n;
1293
1294         n = chain_node(info);
1295         n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1296         if (t_tclass & TC_GRPTERM)
1297                 rollback_token();
1298 }
1299
1300 static node *chain_loop(node *nn)
1301 {
1302         node *n, *n2, *save_brk, *save_cont;
1303
1304         save_brk = break_ptr;
1305         save_cont = continue_ptr;
1306
1307         n = chain_node(OC_BR | Vx);
1308         continue_ptr = new_node(OC_EXEC);
1309         break_ptr = new_node(OC_EXEC);
1310         chain_group();
1311         n2 = chain_node(OC_EXEC | Vx);
1312         n2->l.n = nn;
1313         n2->a.n = n;
1314         continue_ptr->a.n = n2;
1315         break_ptr->a.n = n->r.n = seq->last;
1316
1317         continue_ptr = save_cont;
1318         break_ptr = save_brk;
1319
1320         return n;
1321 }
1322
1323 /* parse group and attach it to chain */
1324 static void chain_group(void)
1325 {
1326         uint32_t c;
1327         node *n, *n2, *n3;
1328
1329         do {
1330                 c = next_token(TC_GRPSEQ);
1331         } while (c & TC_NEWLINE);
1332
1333         if (c & TC_GRPSTART) {
1334                 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1335                         if (t_tclass & TC_NEWLINE) continue;
1336                         rollback_token();
1337                         chain_group();
1338                 }
1339         } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1340                 rollback_token();
1341                 chain_expr(OC_EXEC | Vx);
1342         } else {                                                /* TC_STATEMNT */
1343                 switch (t_info & OPCLSMASK) {
1344                 case ST_IF:
1345                         n = chain_node(OC_BR | Vx);
1346                         n->l.n = condition();
1347                         chain_group();
1348                         n2 = chain_node(OC_EXEC);
1349                         n->r.n = seq->last;
1350                         if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1351                                 chain_group();
1352                                 n2->a.n = seq->last;
1353                         } else {
1354                                 rollback_token();
1355                         }
1356                         break;
1357
1358                 case ST_WHILE:
1359                         n2 = condition();
1360                         n = chain_loop(NULL);
1361                         n->l.n = n2;
1362                         break;
1363
1364                 case ST_DO:
1365                         n2 = chain_node(OC_EXEC);
1366                         n = chain_loop(NULL);
1367                         n2->a.n = n->a.n;
1368                         next_token(TC_WHILE);
1369                         n->l.n = condition();
1370                         break;
1371
1372                 case ST_FOR:
1373                         next_token(TC_SEQSTART);
1374                         n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1375                         if (t_tclass & TC_SEQTERM) {    /* for-in */
1376                                 if ((n2->info & OPCLSMASK) != OC_IN)
1377                                         syntax_error(EMSG_UNEXP_TOKEN);
1378                                 n = chain_node(OC_WALKINIT | VV);
1379                                 n->l.n = n2->l.n;
1380                                 n->r.n = n2->r.n;
1381                                 n = chain_loop(NULL);
1382                                 n->info = OC_WALKNEXT | Vx;
1383                                 n->l.n = n2->l.n;
1384                         } else {                        /* for (;;) */
1385                                 n = chain_node(OC_EXEC | Vx);
1386                                 n->l.n = n2;
1387                                 n2 = parse_expr(TC_SEMICOL);
1388                                 n3 = parse_expr(TC_SEQTERM);
1389                                 n = chain_loop(n3);
1390                                 n->l.n = n2;
1391                                 if (!n2)
1392                                         n->info = OC_EXEC;
1393                         }
1394                         break;
1395
1396                 case OC_PRINT:
1397                 case OC_PRINTF:
1398                         n = chain_node(t_info);
1399                         n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1400                         if (t_tclass & TC_OUTRDR) {
1401                                 n->info |= t_info;
1402                                 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1403                         }
1404                         if (t_tclass & TC_GRPTERM)
1405                                 rollback_token();
1406                         break;
1407
1408                 case OC_BREAK:
1409                         n = chain_node(OC_EXEC);
1410                         n->a.n = break_ptr;
1411                         break;
1412
1413                 case OC_CONTINUE:
1414                         n = chain_node(OC_EXEC);
1415                         n->a.n = continue_ptr;
1416                         break;
1417
1418                 /* delete, next, nextfile, return, exit */
1419                 default:
1420                         chain_expr(t_info);
1421                 }
1422         }
1423 }
1424
1425 static void parse_program(char *p)
1426 {
1427         uint32_t tclass;
1428         node *cn;
1429         func *f;
1430         var *v;
1431
1432         g_pos = p;
1433         t_lineno = 1;
1434         while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1435                         TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1436
1437                 if (tclass & TC_OPTERM)
1438                         continue;
1439
1440                 seq = &mainseq;
1441                 if (tclass & TC_BEGIN) {
1442                         seq = &beginseq;
1443                         chain_group();
1444
1445                 } else if (tclass & TC_END) {
1446                         seq = &endseq;
1447                         chain_group();
1448
1449                 } else if (tclass & TC_FUNCDECL) {
1450                         next_token(TC_FUNCTION);
1451                         g_pos++;
1452                         f = newfunc(t_string);
1453                         f->body.first = NULL;
1454                         f->nargs = 0;
1455                         while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1456                                 v = findvar(ahash, t_string);
1457                                 v->x.aidx = (f->nargs)++;
1458
1459                                 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1460                                         break;
1461                         }
1462                         seq = &(f->body);
1463                         chain_group();
1464                         clear_array(ahash);
1465
1466                 } else if (tclass & TC_OPSEQ) {
1467                         rollback_token();
1468                         cn = chain_node(OC_TEST);
1469                         cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1470                         if (t_tclass & TC_GRPSTART) {
1471                                 rollback_token();
1472                                 chain_group();
1473                         } else {
1474                                 chain_node(OC_PRINT);
1475                         }
1476                         cn->r.n = mainseq.last;
1477
1478                 } else /* if (tclass & TC_GRPSTART) */ {
1479                         rollback_token();
1480                         chain_group();
1481                 }
1482         }
1483 }
1484
1485
1486 /* -------- program execution part -------- */
1487
1488 static node *mk_splitter(const char *s, tsplitter *spl)
1489 {
1490         regex_t *re, *ire;
1491         node *n;
1492
1493         re = &spl->re[0];
1494         ire = &spl->re[1];
1495         n = &spl->n;
1496         if ((n->info & OPCLSMASK) == OC_REGEXP) {
1497                 regfree(re);
1498                 regfree(ire); // TODO: nuke ire, use re+1?
1499         }
1500         if (strlen(s) > 1) {
1501                 mk_re_node(s, n, re);
1502         } else {
1503                 n->info = (uint32_t) *s;
1504         }
1505
1506         return n;
1507 }
1508
1509 /* use node as a regular expression. Supplied with node ptr and regex_t
1510  * storage space. Return ptr to regex (if result points to preg, it should
1511  * be later regfree'd manually
1512  */
1513 static regex_t *as_regex(node *op, regex_t *preg)
1514 {
1515         int cflags;
1516         var *v;
1517         const char *s;
1518
1519         if ((op->info & OPCLSMASK) == OC_REGEXP) {
1520                 return icase ? op->r.ire : op->l.re;
1521         }
1522         v = nvalloc(1);
1523         s = getvar_s(evaluate(op, v));
1524
1525         cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
1526         /* Testcase where REG_EXTENDED fails (unpaired '{'):
1527          * echo Hi | awk 'gsub("@(samp|code|file)\{","");'
1528          * gawk 3.1.5 eats this. We revert to ~REG_EXTENDED
1529          * (maybe gsub is not supposed to use REG_EXTENDED?).
1530          */
1531         if (regcomp(preg, s, cflags)) {
1532                 cflags &= ~REG_EXTENDED;
1533                 xregcomp(preg, s, cflags);
1534         }
1535         nvfree(v);
1536         return preg;
1537 }
1538
1539 /* gradually increasing buffer */
1540 static char* qrealloc(char *b, int n, int *size)
1541 {
1542         if (!b || n >= *size) {
1543                 *size = n + (n>>1) + 80;
1544                 b = xrealloc(b, *size);
1545         }
1546         return b;
1547 }
1548
1549 /* resize field storage space */
1550 static void fsrealloc(int size)
1551 {
1552         int i;
1553
1554         if (size >= maxfields) {
1555                 i = maxfields;
1556                 maxfields = size + 16;
1557                 Fields = xrealloc(Fields, maxfields * sizeof(var));
1558                 for (; i < maxfields; i++) {
1559                         Fields[i].type = VF_SPECIAL;
1560                         Fields[i].string = NULL;
1561                 }
1562         }
1563
1564         if (size < nfields) {
1565                 for (i = size; i < nfields; i++) {
1566                         clrvar(Fields + i);
1567                 }
1568         }
1569         nfields = size;
1570 }
1571
1572 static int awk_split(const char *s, node *spl, char **slist)
1573 {
1574         int l, n = 0;
1575         char c[4];
1576         char *s1;
1577         regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1578
1579         /* in worst case, each char would be a separate field */
1580         *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1581         strcpy(s1, s);
1582
1583         c[0] = c[1] = (char)spl->info;
1584         c[2] = c[3] = '\0';
1585         if (*getvar_s(intvar[RS]) == '\0')
1586                 c[2] = '\n';
1587
1588         if ((spl->info & OPCLSMASK) == OC_REGEXP) {  /* regex split */
1589                 if (!*s)
1590                         return n; /* "": zero fields */
1591                 n++; /* at least one field will be there */
1592                 do {
1593                         l = strcspn(s, c+2); /* len till next NUL or \n */
1594                         if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1595                          && pmatch[0].rm_so <= l
1596                         ) {
1597                                 l = pmatch[0].rm_so;
1598                                 if (pmatch[0].rm_eo == 0) {
1599                                         l++;
1600                                         pmatch[0].rm_eo++;
1601                                 }
1602                                 n++; /* we saw yet another delimiter */
1603                         } else {
1604                                 pmatch[0].rm_eo = l;
1605                                 if (s[l])
1606                                         pmatch[0].rm_eo++;
1607                         }
1608                         memcpy(s1, s, l);
1609                         /* make sure we remove *all* of the separator chars */
1610                         do {
1611                                 s1[l] = '\0';
1612                         } while (++l < pmatch[0].rm_eo);
1613                         nextword(&s1);
1614                         s += pmatch[0].rm_eo;
1615                 } while (*s);
1616                 return n;
1617         }
1618         if (c[0] == '\0') {  /* null split */
1619                 while (*s) {
1620                         *s1++ = *s++;
1621                         *s1++ = '\0';
1622                         n++;
1623                 }
1624                 return n;
1625         }
1626         if (c[0] != ' ') {  /* single-character split */
1627                 if (icase) {
1628                         c[0] = toupper(c[0]);
1629                         c[1] = tolower(c[1]);
1630                 }
1631                 if (*s1) n++;
1632                 while ((s1 = strpbrk(s1, c))) {
1633                         *s1++ = '\0';
1634                         n++;
1635                 }
1636                 return n;
1637         }
1638         /* space split */
1639         while (*s) {
1640                 s = skip_whitespace(s);
1641                 if (!*s) break;
1642                 n++;
1643                 while (*s && !isspace(*s))
1644                         *s1++ = *s++;
1645                 *s1++ = '\0';
1646         }
1647         return n;
1648 }
1649
1650 static void split_f0(void)
1651 {
1652 /* static char *fstrings; */
1653 #define fstrings (G.split_f0__fstrings)
1654
1655         int i, n;
1656         char *s;
1657
1658         if (is_f0_split)
1659                 return;
1660
1661         is_f0_split = TRUE;
1662         free(fstrings);
1663         fsrealloc(0);
1664         n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1665         fsrealloc(n);
1666         s = fstrings;
1667         for (i = 0; i < n; i++) {
1668                 Fields[i].string = nextword(&s);
1669                 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1670         }
1671
1672         /* set NF manually to avoid side effects */
1673         clrvar(intvar[NF]);
1674         intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1675         intvar[NF]->number = nfields;
1676 #undef fstrings
1677 }
1678
1679 /* perform additional actions when some internal variables changed */
1680 static void handle_special(var *v)
1681 {
1682         int n;
1683         char *b;
1684         const char *sep, *s;
1685         int sl, l, len, i, bsize;
1686
1687         if (!(v->type & VF_SPECIAL))
1688                 return;
1689
1690         if (v == intvar[NF]) {
1691                 n = (int)getvar_i(v);
1692                 fsrealloc(n);
1693
1694                 /* recalculate $0 */
1695                 sep = getvar_s(intvar[OFS]);
1696                 sl = strlen(sep);
1697                 b = NULL;
1698                 len = 0;
1699                 for (i = 0; i < n; i++) {
1700                         s = getvar_s(&Fields[i]);
1701                         l = strlen(s);
1702                         if (b) {
1703                                 memcpy(b+len, sep, sl);
1704                                 len += sl;
1705                         }
1706                         b = qrealloc(b, len+l+sl, &bsize);
1707                         memcpy(b+len, s, l);
1708                         len += l;
1709                 }
1710                 if (b)
1711                         b[len] = '\0';
1712                 setvar_p(intvar[F0], b);
1713                 is_f0_split = TRUE;
1714
1715         } else if (v == intvar[F0]) {
1716                 is_f0_split = FALSE;
1717
1718         } else if (v == intvar[FS]) {
1719                 mk_splitter(getvar_s(v), &fsplitter);
1720
1721         } else if (v == intvar[RS]) {
1722                 mk_splitter(getvar_s(v), &rsplitter);
1723
1724         } else if (v == intvar[IGNORECASE]) {
1725                 icase = istrue(v);
1726
1727         } else {                                /* $n */
1728                 n = getvar_i(intvar[NF]);
1729                 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1730                 /* right here v is invalid. Just to note... */
1731         }
1732 }
1733
1734 /* step through func/builtin/etc arguments */
1735 static node *nextarg(node **pn)
1736 {
1737         node *n;
1738
1739         n = *pn;
1740         if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1741                 *pn = n->r.n;
1742                 n = n->l.n;
1743         } else {
1744                 *pn = NULL;
1745         }
1746         return n;
1747 }
1748
1749 static void hashwalk_init(var *v, xhash *array)
1750 {
1751         hash_item *hi;
1752         unsigned i;
1753         walker_list *w;
1754         walker_list *prev_walker;
1755
1756         if (v->type & VF_WALK) {
1757                 prev_walker = v->x.walker;
1758         } else {
1759                 v->type |= VF_WALK;
1760                 prev_walker = NULL;
1761         }
1762         debug_printf_walker("hashwalk_init: prev_walker:%p\n", prev_walker);
1763
1764         w = v->x.walker = xzalloc(sizeof(*w) + array->glen + 1); /* why + 1? */
1765         debug_printf_walker(" walker@%p=%p\n", &v->x.walker, w);
1766         w->cur = w->end = w->wbuf;
1767         w->prev = prev_walker;
1768         for (i = 0; i < array->csize; i++) {
1769                 hi = array->items[i];
1770                 while (hi) {
1771                         strcpy(w->end, hi->name);
1772                         nextword(&w->end);
1773                         hi = hi->next;
1774                 }
1775         }
1776 }
1777
1778 static int hashwalk_next(var *v)
1779 {
1780         walker_list *w = v->x.walker;
1781
1782         if (w->cur >= w->end) {
1783                 walker_list *prev_walker = w->prev;
1784
1785                 debug_printf_walker("end of iteration, free(walker@%p:%p), prev_walker:%p\n", &v->x.walker, w, prev_walker);
1786                 free(w);
1787                 v->x.walker = prev_walker;
1788                 return FALSE;
1789         }
1790
1791         setvar_s(v, nextword(&w->cur));
1792         return TRUE;
1793 }
1794
1795 /* evaluate node, return 1 when result is true, 0 otherwise */
1796 static int ptest(node *pattern)
1797 {
1798         /* ptest__v is "static": to save stack space? */
1799         return istrue(evaluate(pattern, &G.ptest__v));
1800 }
1801
1802 /* read next record from stream rsm into a variable v */
1803 static int awk_getline(rstream *rsm, var *v)
1804 {
1805         char *b;
1806         regmatch_t pmatch[2];
1807         int a, p, pp=0, size;
1808         int fd, so, eo, r, rp;
1809         char c, *m, *s;
1810
1811         /* we're using our own buffer since we need access to accumulating
1812          * characters
1813          */
1814         fd = fileno(rsm->F);
1815         m = rsm->buffer;
1816         a = rsm->adv;
1817         p = rsm->pos;
1818         size = rsm->size;
1819         c = (char) rsplitter.n.info;
1820         rp = 0;
1821
1822         if (!m)
1823                 m = qrealloc(m, 256, &size);
1824         do {
1825                 b = m + a;
1826                 so = eo = p;
1827                 r = 1;
1828                 if (p > 0) {
1829                         if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1830                                 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1831                                                         b, 1, pmatch, 0) == 0) {
1832                                         so = pmatch[0].rm_so;
1833                                         eo = pmatch[0].rm_eo;
1834                                         if (b[eo] != '\0')
1835                                                 break;
1836                                 }
1837                         } else if (c != '\0') {
1838                                 s = strchr(b+pp, c);
1839                                 if (!s) s = memchr(b+pp, '\0', p - pp);
1840                                 if (s) {
1841                                         so = eo = s-b;
1842                                         eo++;
1843                                         break;
1844                                 }
1845                         } else {
1846                                 while (b[rp] == '\n')
1847                                         rp++;
1848                                 s = strstr(b+rp, "\n\n");
1849                                 if (s) {
1850                                         so = eo = s-b;
1851                                         while (b[eo] == '\n') eo++;
1852                                         if (b[eo] != '\0')
1853                                                 break;
1854                                 }
1855                         }
1856                 }
1857
1858                 if (a > 0) {
1859                         memmove(m, (const void *)(m+a), p+1);
1860                         b = m;
1861                         a = 0;
1862                 }
1863
1864                 m = qrealloc(m, a+p+128, &size);
1865                 b = m + a;
1866                 pp = p;
1867                 p += safe_read(fd, b+p, size-p-1);
1868                 if (p < pp) {
1869                         p = 0;
1870                         r = 0;
1871                         setvar_i(intvar[ERRNO], errno);
1872                 }
1873                 b[p] = '\0';
1874
1875         } while (p > pp);
1876
1877         if (p == 0) {
1878                 r--;
1879         } else {
1880                 c = b[so]; b[so] = '\0';
1881                 setvar_s(v, b+rp);
1882                 v->type |= VF_USER;
1883                 b[so] = c;
1884                 c = b[eo]; b[eo] = '\0';
1885                 setvar_s(intvar[RT], b+so);
1886                 b[eo] = c;
1887         }
1888
1889         rsm->buffer = m;
1890         rsm->adv = a + eo;
1891         rsm->pos = p - eo;
1892         rsm->size = size;
1893
1894         return r;
1895 }
1896
1897 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1898 {
1899         int r = 0;
1900         char c;
1901         const char *s = format;
1902
1903         if (int_as_int && n == (int)n) {
1904                 r = snprintf(b, size, "%d", (int)n);
1905         } else {
1906                 do { c = *s; } while (c && *++s);
1907                 if (strchr("diouxX", c)) {
1908                         r = snprintf(b, size, format, (int)n);
1909                 } else if (strchr("eEfgG", c)) {
1910                         r = snprintf(b, size, format, n);
1911                 } else {
1912                         syntax_error(EMSG_INV_FMT);
1913                 }
1914         }
1915         return r;
1916 }
1917
1918 /* formatted output into an allocated buffer, return ptr to buffer */
1919 static char *awk_printf(node *n)
1920 {
1921         char *b = NULL;
1922         char *fmt, *s, *f;
1923         const char *s1;
1924         int i, j, incr, bsize;
1925         char c, c1;
1926         var *v, *arg;
1927
1928         v = nvalloc(1);
1929         fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1930
1931         i = 0;
1932         while (*f) {
1933                 s = f;
1934                 while (*f && (*f != '%' || *(++f) == '%'))
1935                         f++;
1936                 while (*f && !isalpha(*f)) {
1937                         if (*f == '*')
1938                                 syntax_error("%*x formats are not supported");
1939                         f++;
1940                 }
1941
1942                 incr = (f - s) + MAXVARFMT;
1943                 b = qrealloc(b, incr + i, &bsize);
1944                 c = *f;
1945                 if (c != '\0') f++;
1946                 c1 = *f;
1947                 *f = '\0';
1948                 arg = evaluate(nextarg(&n), v);
1949
1950                 j = i;
1951                 if (c == 'c' || !c) {
1952                         i += sprintf(b+i, s, is_numeric(arg) ?
1953                                         (char)getvar_i(arg) : *getvar_s(arg));
1954                 } else if (c == 's') {
1955                         s1 = getvar_s(arg);
1956                         b = qrealloc(b, incr+i+strlen(s1), &bsize);
1957                         i += sprintf(b+i, s, s1);
1958                 } else {
1959                         i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1960                 }
1961                 *f = c1;
1962
1963                 /* if there was an error while sprintf, return value is negative */
1964                 if (i < j) i = j;
1965         }
1966
1967         b = xrealloc(b, i + 1);
1968         free(fmt);
1969         nvfree(v);
1970         b[i] = '\0';
1971         return b;
1972 }
1973
1974 /* common substitution routine
1975  * replace (nm) substring of (src) that match (n) with (repl), store
1976  * result into (dest), return number of substitutions. If nm=0, replace
1977  * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1978  * subexpression matching (\1-\9)
1979  */
1980 static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int ex)
1981 {
1982         char *ds = NULL;
1983         const char *s;
1984         const char *sp;
1985         int c, i, j, di, rl, so, eo, nbs, n, dssize;
1986         regmatch_t pmatch[10];
1987         regex_t sreg, *re;
1988
1989         re = as_regex(rn, &sreg);
1990         if (!src) src = intvar[F0];
1991         if (!dest) dest = intvar[F0];
1992
1993         i = di = 0;
1994         sp = getvar_s(src);
1995         rl = strlen(repl);
1996         while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0 : REG_NOTBOL) == 0) {
1997                 so = pmatch[0].rm_so;
1998                 eo = pmatch[0].rm_eo;
1999
2000                 ds = qrealloc(ds, di + eo + rl, &dssize);
2001                 memcpy(ds + di, sp, eo);
2002                 di += eo;
2003                 if (++i >= nm) {
2004                         /* replace */
2005                         di -= (eo - so);
2006                         nbs = 0;
2007                         for (s = repl; *s; s++) {
2008                                 ds[di++] = c = *s;
2009                                 if (c == '\\') {
2010                                         nbs++;
2011                                         continue;
2012                                 }
2013                                 if (c == '&' || (ex && c >= '0' && c <= '9')) {
2014                                         di -= ((nbs + 3) >> 1);
2015                                         j = 0;
2016                                         if (c != '&') {
2017                                                 j = c - '0';
2018                                                 nbs++;
2019                                         }
2020                                         if (nbs % 2) {
2021                                                 ds[di++] = c;
2022                                         } else {
2023                                                 n = pmatch[j].rm_eo - pmatch[j].rm_so;
2024                                                 ds = qrealloc(ds, di + rl + n, &dssize);
2025                                                 memcpy(ds + di, sp + pmatch[j].rm_so, n);
2026                                                 di += n;
2027                                         }
2028                                 }
2029                                 nbs = 0;
2030                         }
2031                 }
2032
2033                 sp += eo;
2034                 if (i == nm)
2035                         break;
2036                 if (eo == so) {
2037                         ds[di] = *sp++;
2038                         if (!ds[di++])
2039                                 break;
2040                 }
2041         }
2042
2043         ds = qrealloc(ds, di + strlen(sp), &dssize);
2044         strcpy(ds + di, sp);
2045         setvar_p(dest, ds);
2046         if (re == &sreg)
2047                 regfree(re);
2048         return i;
2049 }
2050
2051 static NOINLINE int do_mktime(const char *ds)
2052 {
2053         struct tm then;
2054         int count;
2055
2056         /*memset(&then, 0, sizeof(then)); - not needed */
2057         then.tm_isdst = -1; /* default is unknown */
2058
2059         /* manpage of mktime says these fields are ints,
2060          * so we can sscanf stuff directly into them */
2061         count = sscanf(ds, "%u %u %u %u %u %u %d",
2062                 &then.tm_year, &then.tm_mon, &then.tm_mday,
2063                 &then.tm_hour, &then.tm_min, &then.tm_sec,
2064                 &then.tm_isdst);
2065
2066         if (count < 6
2067          || (unsigned)then.tm_mon < 1
2068          || (unsigned)then.tm_year < 1900
2069         ) {
2070                 return -1;
2071         }
2072
2073         then.tm_mon -= 1;
2074         then.tm_year -= 1900;
2075
2076         return mktime(&then);
2077 }
2078
2079 static NOINLINE var *exec_builtin(node *op, var *res)
2080 {
2081 #define tspl (G.exec_builtin__tspl)
2082
2083         var *tv;
2084         node *an[4];
2085         var *av[4];
2086         const char *as[4];
2087         regmatch_t pmatch[2];
2088         regex_t sreg, *re;
2089         node *spl;
2090         uint32_t isr, info;
2091         int nargs;
2092         time_t tt;
2093         char *s, *s1;
2094         int i, l, ll, n;
2095
2096         tv = nvalloc(4);
2097         isr = info = op->info;
2098         op = op->l.n;
2099
2100         av[2] = av[3] = NULL;
2101         for (i = 0; i < 4 && op; i++) {
2102                 an[i] = nextarg(&op);
2103                 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
2104                 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
2105                 isr >>= 1;
2106         }
2107
2108         nargs = i;
2109         if ((uint32_t)nargs < (info >> 30))
2110                 syntax_error(EMSG_TOO_FEW_ARGS);
2111
2112         info &= OPNMASK;
2113         switch (info) {
2114
2115         case B_a2:
2116 #if ENABLE_FEATURE_AWK_LIBM
2117                 setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1])));
2118 #else
2119                 syntax_error(EMSG_NO_MATH);
2120 #endif
2121                 break;
2122
2123         case B_sp:
2124                 if (nargs > 2) {
2125                         spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2126                                 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2127                 } else {
2128                         spl = &fsplitter.n;
2129                 }
2130
2131                 n = awk_split(as[0], spl, &s);
2132                 s1 = s;
2133                 clear_array(iamarray(av[1]));
2134                 for (i = 1; i <= n; i++)
2135                         setari_u(av[1], i, nextword(&s1));
2136                 free(s);
2137                 setvar_i(res, n);
2138                 break;
2139
2140         case B_ss:
2141                 l = strlen(as[0]);
2142                 i = getvar_i(av[1]) - 1;
2143                 if (i > l) i = l;
2144                 if (i < 0) i = 0;
2145                 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2146                 if (n < 0) n = 0;
2147                 s = xstrndup(as[0]+i, n);
2148                 setvar_p(res, s);
2149                 break;
2150
2151         /* Bitwise ops must assume that operands are unsigned. GNU Awk 3.1.5:
2152          * awk '{ print or(-1,1) }' gives "4.29497e+09", not "-2.xxxe+09" */
2153         case B_an:
2154                 setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1]));
2155                 break;
2156
2157         case B_co:
2158                 setvar_i(res, ~getvar_i_int(av[0]));
2159                 break;
2160
2161         case B_ls:
2162                 setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1]));
2163                 break;
2164
2165         case B_or:
2166                 setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1]));
2167                 break;
2168
2169         case B_rs:
2170                 setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1]));
2171                 break;
2172
2173         case B_xo:
2174                 setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1]));
2175                 break;
2176
2177         case B_lo:
2178         case B_up:
2179                 s1 = s = xstrdup(as[0]);
2180                 while (*s1) {
2181                         //*s1 = (info == B_up) ? toupper(*s1) : tolower(*s1);
2182                         if ((unsigned char)((*s1 | 0x20) - 'a') <= ('z' - 'a'))
2183                                 *s1 = (info == B_up) ? (*s1 & 0xdf) : (*s1 | 0x20);
2184                         s1++;
2185                 }
2186                 setvar_p(res, s);
2187                 break;
2188
2189         case B_ix:
2190                 n = 0;
2191                 ll = strlen(as[1]);
2192                 l = strlen(as[0]) - ll;
2193                 if (ll > 0 && l >= 0) {
2194                         if (!icase) {
2195                                 s = strstr(as[0], as[1]);
2196                                 if (s) n = (s - as[0]) + 1;
2197                         } else {
2198                                 /* this piece of code is terribly slow and
2199                                  * really should be rewritten
2200                                  */
2201                                 for (i=0; i<=l; i++) {
2202                                         if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2203                                                 n = i+1;
2204                                                 break;
2205                                         }
2206                                 }
2207                         }
2208                 }
2209                 setvar_i(res, n);
2210                 break;
2211
2212         case B_ti:
2213                 if (nargs > 1)
2214                         tt = getvar_i(av[1]);
2215                 else
2216                         time(&tt);
2217                 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2218                 i = strftime(g_buf, MAXVARFMT,
2219                         ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2220                         localtime(&tt));
2221                 g_buf[i] = '\0';
2222                 setvar_s(res, g_buf);
2223                 break;
2224
2225         case B_mt:
2226                 setvar_i(res, do_mktime(as[0]));
2227                 break;
2228
2229         case B_ma:
2230                 re = as_regex(an[1], &sreg);
2231                 n = regexec(re, as[0], 1, pmatch, 0);
2232                 if (n == 0) {
2233                         pmatch[0].rm_so++;
2234                         pmatch[0].rm_eo++;
2235                 } else {
2236                         pmatch[0].rm_so = 0;
2237                         pmatch[0].rm_eo = -1;
2238                 }
2239                 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2240                 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2241                 setvar_i(res, pmatch[0].rm_so);
2242                 if (re == &sreg) regfree(re);
2243                 break;
2244
2245         case B_ge:
2246                 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2247                 break;
2248
2249         case B_gs:
2250                 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2251                 break;
2252
2253         case B_su:
2254                 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2255                 break;
2256         }
2257
2258         nvfree(tv);
2259         return res;
2260 #undef tspl
2261 }
2262
2263 /*
2264  * Evaluate node - the heart of the program. Supplied with subtree
2265  * and place where to store result. returns ptr to result.
2266  */
2267 #define XC(n) ((n) >> 8)
2268
2269 static var *evaluate(node *op, var *res)
2270 {
2271 /* This procedure is recursive so we should count every byte */
2272 #define fnargs (G.evaluate__fnargs)
2273 /* seed is initialized to 1 */
2274 #define seed   (G.evaluate__seed)
2275 #define sreg   (G.evaluate__sreg)
2276
2277         node *op1;
2278         var *v1;
2279         union {
2280                 var *v;
2281                 const char *s;
2282                 double d;
2283                 int i;
2284         } L, R;
2285         uint32_t opinfo;
2286         int opn;
2287         union {
2288                 char *s;
2289                 rstream *rsm;
2290                 FILE *F;
2291                 var *v;
2292                 regex_t *re;
2293                 uint32_t info;
2294         } X;
2295
2296         if (!op)
2297                 return setvar_s(res, NULL);
2298
2299         v1 = nvalloc(2);
2300
2301         while (op) {
2302                 opinfo = op->info;
2303                 opn = (opinfo & OPNMASK);
2304                 g_lineno = op->lineno;
2305
2306                 /* execute inevitable things */
2307                 op1 = op->l.n;
2308                 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2309                 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2310                 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2311                 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2312                 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2313
2314                 switch (XC(opinfo & OPCLSMASK)) {
2315
2316                 /* -- iterative node type -- */
2317
2318                 /* test pattern */
2319                 case XC( OC_TEST ):
2320                         if ((op1->info & OPCLSMASK) == OC_COMMA) {
2321                                 /* it's range pattern */
2322                                 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2323                                         op->info |= OF_CHECKED;
2324                                         if (ptest(op1->r.n))
2325                                                 op->info &= ~OF_CHECKED;
2326
2327                                         op = op->a.n;
2328                                 } else {
2329                                         op = op->r.n;
2330                                 }
2331                         } else {
2332                                 op = (ptest(op1)) ? op->a.n : op->r.n;
2333                         }
2334                         break;
2335
2336                 /* just evaluate an expression, also used as unconditional jump */
2337                 case XC( OC_EXEC ):
2338                         break;
2339
2340                 /* branch, used in if-else and various loops */
2341                 case XC( OC_BR ):
2342                         op = istrue(L.v) ? op->a.n : op->r.n;
2343                         break;
2344
2345                 /* initialize for-in loop */
2346                 case XC( OC_WALKINIT ):
2347                         hashwalk_init(L.v, iamarray(R.v));
2348                         break;
2349
2350                 /* get next array item */
2351                 case XC( OC_WALKNEXT ):
2352                         op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2353                         break;
2354
2355                 case XC( OC_PRINT ):
2356                 case XC( OC_PRINTF ):
2357                         X.F = stdout;
2358                         if (op->r.n) {
2359                                 X.rsm = newfile(R.s);
2360                                 if (!X.rsm->F) {
2361                                         if (opn == '|') {
2362                                                 X.rsm->F = popen(R.s, "w");
2363                                                 if (X.rsm->F == NULL)
2364                                                         bb_perror_msg_and_die("popen");
2365                                                 X.rsm->is_pipe = 1;
2366                                         } else {
2367                                                 X.rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2368                                         }
2369                                 }
2370                                 X.F = X.rsm->F;
2371                         }
2372
2373                         if ((opinfo & OPCLSMASK) == OC_PRINT) {
2374                                 if (!op1) {
2375                                         fputs(getvar_s(intvar[F0]), X.F);
2376                                 } else {
2377                                         while (op1) {
2378                                                 L.v = evaluate(nextarg(&op1), v1);
2379                                                 if (L.v->type & VF_NUMBER) {
2380                                                         fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2381                                                                         getvar_i(L.v), TRUE);
2382                                                         fputs(g_buf, X.F);
2383                                                 } else {
2384                                                         fputs(getvar_s(L.v), X.F);
2385                                                 }
2386
2387                                                 if (op1) fputs(getvar_s(intvar[OFS]), X.F);
2388                                         }
2389                                 }
2390                                 fputs(getvar_s(intvar[ORS]), X.F);
2391
2392                         } else {        /* OC_PRINTF */
2393                                 L.s = awk_printf(op1);
2394                                 fputs(L.s, X.F);
2395                                 free((char*)L.s);
2396                         }
2397                         fflush(X.F);
2398                         break;
2399
2400                 case XC( OC_DELETE ):
2401                         X.info = op1->info & OPCLSMASK;
2402                         if (X.info == OC_VAR) {
2403                                 R.v = op1->l.v;
2404                         } else if (X.info == OC_FNARG) {
2405                                 R.v = &fnargs[op1->l.i];
2406                         } else {
2407                                 syntax_error(EMSG_NOT_ARRAY);
2408                         }
2409
2410                         if (op1->r.n) {
2411                                 clrvar(L.v);
2412                                 L.s = getvar_s(evaluate(op1->r.n, v1));
2413                                 hash_remove(iamarray(R.v), L.s);
2414                         } else {
2415                                 clear_array(iamarray(R.v));
2416                         }
2417                         break;
2418
2419                 case XC( OC_NEWSOURCE ):
2420                         g_progname = op->l.s;
2421                         break;
2422
2423                 case XC( OC_RETURN ):
2424                         copyvar(res, L.v);
2425                         break;
2426
2427                 case XC( OC_NEXTFILE ):
2428                         nextfile = TRUE;
2429                 case XC( OC_NEXT ):
2430                         nextrec = TRUE;
2431                 case XC( OC_DONE ):
2432                         clrvar(res);
2433                         break;
2434
2435                 case XC( OC_EXIT ):
2436                         awk_exit(L.d);
2437
2438                 /* -- recursive node type -- */
2439
2440                 case XC( OC_VAR ):
2441                         L.v = op->l.v;
2442                         if (L.v == intvar[NF])
2443                                 split_f0();
2444                         goto v_cont;
2445
2446                 case XC( OC_FNARG ):
2447                         L.v = &fnargs[op->l.i];
2448  v_cont:
2449                         res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2450                         break;
2451
2452                 case XC( OC_IN ):
2453                         setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2454                         break;
2455
2456                 case XC( OC_REGEXP ):
2457                         op1 = op;
2458                         L.s = getvar_s(intvar[F0]);
2459                         goto re_cont;
2460
2461                 case XC( OC_MATCH ):
2462                         op1 = op->r.n;
2463  re_cont:
2464                         X.re = as_regex(op1, &sreg);
2465                         R.i = regexec(X.re, L.s, 0, NULL, 0);
2466                         if (X.re == &sreg) regfree(X.re);
2467                         setvar_i(res, (R.i == 0) ^ (opn == '!'));
2468                         break;
2469
2470                 case XC( OC_MOVE ):
2471                         /* if source is a temporary string, jusk relink it to dest */
2472 //Disabled: if R.v is numeric but happens to have cached R.v->string,
2473 //then L.v ends up being a string, which is wrong
2474 //                      if (R.v == v1+1 && R.v->string) {
2475 //                              res = setvar_p(L.v, R.v->string);
2476 //                              R.v->string = NULL;
2477 //                      } else {
2478                                 res = copyvar(L.v, R.v);
2479 //                      }
2480                         break;
2481
2482                 case XC( OC_TERNARY ):
2483                         if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2484                                 syntax_error(EMSG_POSSIBLE_ERROR);
2485                         res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2486                         break;
2487
2488                 case XC( OC_FUNC ):
2489                         if (!op->r.f->body.first)
2490                                 syntax_error(EMSG_UNDEF_FUNC);
2491
2492                         X.v = R.v = nvalloc(op->r.f->nargs + 1);
2493                         while (op1) {
2494                                 L.v = evaluate(nextarg(&op1), v1);
2495                                 copyvar(R.v, L.v);
2496                                 R.v->type |= VF_CHILD;
2497                                 R.v->x.parent = L.v;
2498                                 if (++R.v - X.v >= op->r.f->nargs)
2499                                         break;
2500                         }
2501
2502                         R.v = fnargs;
2503                         fnargs = X.v;
2504
2505                         L.s = g_progname;
2506                         res = evaluate(op->r.f->body.first, res);
2507                         g_progname = L.s;
2508
2509                         nvfree(fnargs);
2510                         fnargs = R.v;
2511                         break;
2512
2513                 case XC( OC_GETLINE ):
2514                 case XC( OC_PGETLINE ):
2515                         if (op1) {
2516                                 X.rsm = newfile(L.s);
2517                                 if (!X.rsm->F) {
2518                                         if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2519                                                 X.rsm->F = popen(L.s, "r");
2520                                                 X.rsm->is_pipe = TRUE;
2521                                         } else {
2522                                                 X.rsm->F = fopen_for_read(L.s);         /* not xfopen! */
2523                                         }
2524                                 }
2525                         } else {
2526                                 if (!iF) iF = next_input_file();
2527                                 X.rsm = iF;
2528                         }
2529
2530                         if (!X.rsm->F) {
2531                                 setvar_i(intvar[ERRNO], errno);
2532                                 setvar_i(res, -1);
2533                                 break;
2534                         }
2535
2536                         if (!op->r.n)
2537                                 R.v = intvar[F0];
2538
2539                         L.i = awk_getline(X.rsm, R.v);
2540                         if (L.i > 0) {
2541                                 if (!op1) {
2542                                         incvar(intvar[FNR]);
2543                                         incvar(intvar[NR]);
2544                                 }
2545                         }
2546                         setvar_i(res, L.i);
2547                         break;
2548
2549                 /* simple builtins */
2550                 case XC( OC_FBLTIN ):
2551                         switch (opn) {
2552
2553                         case F_in:
2554                                 R.d = (int)L.d;
2555                                 break;
2556
2557                         case F_rn:
2558                                 R.d = (double)rand() / (double)RAND_MAX;
2559                                 break;
2560 #if ENABLE_FEATURE_AWK_LIBM
2561                         case F_co:
2562                                 R.d = cos(L.d);
2563                                 break;
2564
2565                         case F_ex:
2566                                 R.d = exp(L.d);
2567                                 break;
2568
2569                         case F_lg:
2570                                 R.d = log(L.d);
2571                                 break;
2572
2573                         case F_si:
2574                                 R.d = sin(L.d);
2575                                 break;
2576
2577                         case F_sq:
2578                                 R.d = sqrt(L.d);
2579                                 break;
2580 #else
2581                         case F_co:
2582                         case F_ex:
2583                         case F_lg:
2584                         case F_si:
2585                         case F_sq:
2586                                 syntax_error(EMSG_NO_MATH);
2587                                 break;
2588 #endif
2589                         case F_sr:
2590                                 R.d = (double)seed;
2591                                 seed = op1 ? (unsigned)L.d : (unsigned)time(NULL);
2592                                 srand(seed);
2593                                 break;
2594
2595                         case F_ti:
2596                                 R.d = time(NULL);
2597                                 break;
2598
2599                         case F_le:
2600                                 if (!op1)
2601                                         L.s = getvar_s(intvar[F0]);
2602                                 R.d = strlen(L.s);
2603                                 break;
2604
2605                         case F_sy:
2606                                 fflush_all();
2607                                 R.d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2608                                                 ? (system(L.s) >> 8) : 0;
2609                                 break;
2610
2611                         case F_ff:
2612                                 if (!op1)
2613                                         fflush(stdout);
2614                                 else {
2615                                         if (L.s && *L.s) {
2616                                                 X.rsm = newfile(L.s);
2617                                                 fflush(X.rsm->F);
2618                                         } else {
2619                                                 fflush_all();
2620                                         }
2621                                 }
2622                                 break;
2623
2624                         case F_cl:
2625                                 X.rsm = (rstream *)hash_search(fdhash, L.s);
2626                                 if (X.rsm) {
2627                                         R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
2628                                         free(X.rsm->buffer);
2629                                         hash_remove(fdhash, L.s);
2630                                 }
2631                                 if (R.i != 0)
2632                                         setvar_i(intvar[ERRNO], errno);
2633                                 R.d = (double)R.i;
2634                                 break;
2635                         }
2636                         setvar_i(res, R.d);
2637                         break;
2638
2639                 case XC( OC_BUILTIN ):
2640                         res = exec_builtin(op, res);
2641                         break;
2642
2643                 case XC( OC_SPRINTF ):
2644                         setvar_p(res, awk_printf(op1));
2645                         break;
2646
2647                 case XC( OC_UNARY ):
2648                         X.v = R.v;
2649                         L.d = R.d = getvar_i(R.v);
2650                         switch (opn) {
2651                         case 'P':
2652                                 L.d = ++R.d;
2653                                 goto r_op_change;
2654                         case 'p':
2655                                 R.d++;
2656                                 goto r_op_change;
2657                         case 'M':
2658                                 L.d = --R.d;
2659                                 goto r_op_change;
2660                         case 'm':
2661                                 R.d--;
2662                                 goto r_op_change;
2663                         case '!':
2664                                 L.d = !istrue(X.v);
2665                                 break;
2666                         case '-':
2667                                 L.d = -R.d;
2668                                 break;
2669  r_op_change:
2670                                 setvar_i(X.v, R.d);
2671                         }
2672                         setvar_i(res, L.d);
2673                         break;
2674
2675                 case XC( OC_FIELD ):
2676                         R.i = (int)getvar_i(R.v);
2677                         if (R.i == 0) {
2678                                 res = intvar[F0];
2679                         } else {
2680                                 split_f0();
2681                                 if (R.i > nfields)
2682                                         fsrealloc(R.i);
2683                                 res = &Fields[R.i - 1];
2684                         }
2685                         break;
2686
2687                 /* concatenation (" ") and index joining (",") */
2688                 case XC( OC_CONCAT ):
2689                 case XC( OC_COMMA ):
2690                         opn = strlen(L.s) + strlen(R.s) + 2;
2691                         X.s = xmalloc(opn);
2692                         strcpy(X.s, L.s);
2693                         if ((opinfo & OPCLSMASK) == OC_COMMA) {
2694                                 L.s = getvar_s(intvar[SUBSEP]);
2695                                 X.s = xrealloc(X.s, opn + strlen(L.s));
2696                                 strcat(X.s, L.s);
2697                         }
2698                         strcat(X.s, R.s);
2699                         setvar_p(res, X.s);
2700                         break;
2701
2702                 case XC( OC_LAND ):
2703                         setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2704                         break;
2705
2706                 case XC( OC_LOR ):
2707                         setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2708                         break;
2709
2710                 case XC( OC_BINARY ):
2711                 case XC( OC_REPLACE ):
2712                         R.d = getvar_i(R.v);
2713                         switch (opn) {
2714                         case '+':
2715                                 L.d += R.d;
2716                                 break;
2717                         case '-':
2718                                 L.d -= R.d;
2719                                 break;
2720                         case '*':
2721                                 L.d *= R.d;
2722                                 break;
2723                         case '/':
2724                                 if (R.d == 0)
2725                                         syntax_error(EMSG_DIV_BY_ZERO);
2726                                 L.d /= R.d;
2727                                 break;
2728                         case '&':
2729 #if ENABLE_FEATURE_AWK_LIBM
2730                                 L.d = pow(L.d, R.d);
2731 #else
2732                                 syntax_error(EMSG_NO_MATH);
2733 #endif
2734                                 break;
2735                         case '%':
2736                                 if (R.d == 0)
2737                                         syntax_error(EMSG_DIV_BY_ZERO);
2738                                 L.d -= (int)(L.d / R.d) * R.d;
2739                                 break;
2740                         }
2741                         res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2742                         break;
2743
2744                 case XC( OC_COMPARE ):
2745                         if (is_numeric(L.v) && is_numeric(R.v)) {
2746                                 L.d = getvar_i(L.v) - getvar_i(R.v);
2747                         } else {
2748                                 L.s = getvar_s(L.v);
2749                                 R.s = getvar_s(R.v);
2750                                 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2751                         }
2752                         switch (opn & 0xfe) {
2753                         case 0:
2754                                 R.i = (L.d > 0);
2755                                 break;
2756                         case 2:
2757                                 R.i = (L.d >= 0);
2758                                 break;
2759                         case 4:
2760                                 R.i = (L.d == 0);
2761                                 break;
2762                         }
2763                         setvar_i(res, (opn & 1 ? R.i : !R.i) ? 1 : 0);
2764                         break;
2765
2766                 default:
2767                         syntax_error(EMSG_POSSIBLE_ERROR);
2768                 }
2769                 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2770                         op = op->a.n;
2771                 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2772                         break;
2773                 if (nextrec)
2774                         break;
2775         }
2776         nvfree(v1);
2777         return res;
2778 #undef fnargs
2779 #undef seed
2780 #undef sreg
2781 }
2782
2783
2784 /* -------- main & co. -------- */
2785
2786 static int awk_exit(int r)
2787 {
2788         var tv;
2789         unsigned i;
2790         hash_item *hi;
2791
2792         zero_out_var(&tv);
2793
2794         if (!exiting) {
2795                 exiting = TRUE;
2796                 nextrec = FALSE;
2797                 evaluate(endseq.first, &tv);
2798         }
2799
2800         /* waiting for children */
2801         for (i = 0; i < fdhash->csize; i++) {
2802                 hi = fdhash->items[i];
2803                 while (hi) {
2804                         if (hi->data.rs.F && hi->data.rs.is_pipe)
2805                                 pclose(hi->data.rs.F);
2806                         hi = hi->next;
2807                 }
2808         }
2809
2810         exit(r);
2811 }
2812
2813 /* if expr looks like "var=value", perform assignment and return 1,
2814  * otherwise return 0 */
2815 static int is_assignment(const char *expr)
2816 {
2817         char *exprc, *s, *s0, *s1;
2818
2819         exprc = xstrdup(expr);
2820         if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2821                 free(exprc);
2822                 return FALSE;
2823         }
2824
2825         *(s++) = '\0';
2826         s0 = s1 = s;
2827         while (*s)
2828                 *(s1++) = nextchar(&s);
2829
2830         *s1 = '\0';
2831         setvar_u(newvar(exprc), s0);
2832         free(exprc);
2833         return TRUE;
2834 }
2835
2836 /* switch to next input file */
2837 static rstream *next_input_file(void)
2838 {
2839 #define rsm          (G.next_input_file__rsm)
2840 #define files_happen (G.next_input_file__files_happen)
2841
2842         FILE *F = NULL;
2843         const char *fname, *ind;
2844
2845         if (rsm.F)
2846                 fclose(rsm.F);
2847         rsm.F = NULL;
2848         rsm.pos = rsm.adv = 0;
2849
2850         do {
2851                 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
2852                         if (files_happen)
2853                                 return NULL;
2854                         fname = "-";
2855                         F = stdin;
2856                 } else {
2857                         ind = getvar_s(incvar(intvar[ARGIND]));
2858                         fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
2859                         if (fname && *fname && !is_assignment(fname))
2860                                 F = xfopen_stdin(fname);
2861                 }
2862         } while (!F);
2863
2864         files_happen = TRUE;
2865         setvar_s(intvar[FILENAME], fname);
2866         rsm.F = F;
2867         return &rsm;
2868 #undef rsm
2869 #undef files_happen
2870 }
2871
2872 int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
2873 int awk_main(int argc, char **argv)
2874 {
2875         unsigned opt;
2876         char *opt_F, *opt_W;
2877         llist_t *list_v = NULL;
2878         llist_t *list_f = NULL;
2879         int i, j;
2880         var *v;
2881         var tv;
2882         char **envp;
2883         char *vnames = (char *)vNames; /* cheat */
2884         char *vvalues = (char *)vValues;
2885
2886         INIT_G();
2887
2888         /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
2889          * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
2890         if (ENABLE_LOCALE_SUPPORT)
2891                 setlocale(LC_NUMERIC, "C");
2892
2893         zero_out_var(&tv);
2894
2895         /* allocate global buffer */
2896         g_buf = xmalloc(MAXVARFMT + 1);
2897
2898         vhash = hash_init();
2899         ahash = hash_init();
2900         fdhash = hash_init();
2901         fnhash = hash_init();
2902
2903         /* initialize variables */
2904         for (i = 0; *vnames; i++) {
2905                 intvar[i] = v = newvar(nextword(&vnames));
2906                 if (*vvalues != '\377')
2907                         setvar_s(v, nextword(&vvalues));
2908                 else
2909                         setvar_i(v, 0);
2910
2911                 if (*vnames == '*') {
2912                         v->type |= VF_SPECIAL;
2913                         vnames++;
2914                 }
2915         }
2916
2917         handle_special(intvar[FS]);
2918         handle_special(intvar[RS]);
2919
2920         newfile("/dev/stdin")->F = stdin;
2921         newfile("/dev/stdout")->F = stdout;
2922         newfile("/dev/stderr")->F = stderr;
2923
2924         /* Huh, people report that sometimes environ is NULL. Oh well. */
2925         if (environ) for (envp = environ; *envp; envp++) {
2926                 /* environ is writable, thus we don't strdup it needlessly */
2927                 char *s = *envp;
2928                 char *s1 = strchr(s, '=');
2929                 if (s1) {
2930                         *s1 = '\0';
2931                         /* Both findvar and setvar_u take const char*
2932                          * as 2nd arg -> environment is not trashed */
2933                         setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
2934                         *s1 = '=';
2935                 }
2936         }
2937         opt_complementary = "v::f::"; /* -v and -f can occur multiple times */
2938         opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, &opt_W);
2939         argv += optind;
2940         argc -= optind;
2941         if (opt & 0x1)
2942                 setvar_s(intvar[FS], opt_F); // -F
2943         while (list_v) { /* -v */
2944                 if (!is_assignment(llist_pop(&list_v)))
2945                         bb_show_usage();
2946         }
2947         if (list_f) { /* -f */
2948                 do {
2949                         char *s = NULL;
2950                         FILE *from_file;
2951
2952                         g_progname = llist_pop(&list_f);
2953                         from_file = xfopen_stdin(g_progname);
2954                         /* one byte is reserved for some trick in next_token */
2955                         for (i = j = 1; j > 0; i += j) {
2956                                 s = xrealloc(s, i + 4096);
2957                                 j = fread(s + i, 1, 4094, from_file);
2958                         }
2959                         s[i] = '\0';
2960                         fclose(from_file);
2961                         parse_program(s + 1);
2962                         free(s);
2963                 } while (list_f);
2964                 argc++;
2965         } else { // no -f: take program from 1st parameter
2966                 if (!argc)
2967                         bb_show_usage();
2968                 g_progname = "cmd. line";
2969                 parse_program(*argv++);
2970         }
2971         if (opt & 0x8) // -W
2972                 bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
2973
2974         /* fill in ARGV array */
2975         setvar_i(intvar[ARGC], argc);
2976         setari_u(intvar[ARGV], 0, "awk");
2977         i = 0;
2978         while (*argv)
2979                 setari_u(intvar[ARGV], ++i, *argv++);
2980
2981         evaluate(beginseq.first, &tv);
2982         if (!mainseq.first && !endseq.first)
2983                 awk_exit(EXIT_SUCCESS);
2984
2985         /* input file could already be opened in BEGIN block */
2986         if (!iF)
2987                 iF = next_input_file();
2988
2989         /* passing through input files */
2990         while (iF) {
2991                 nextfile = FALSE;
2992                 setvar_i(intvar[FNR], 0);
2993
2994                 while ((i = awk_getline(iF, intvar[F0])) > 0) {
2995                         nextrec = FALSE;
2996                         incvar(intvar[NR]);
2997                         incvar(intvar[FNR]);
2998                         evaluate(mainseq.first, &tv);
2999
3000                         if (nextfile)
3001                                 break;
3002                 }
3003
3004                 if (i < 0)
3005                         syntax_error(strerror(errno));
3006
3007                 iF = next_input_file();
3008         }
3009
3010         awk_exit(EXIT_SUCCESS);
3011         /*return 0;*/
3012 }