*: make GNU licensing statement forms more regular
[oweals/busybox.git] / editors / awk.c
1 /* vi: set sw=4 ts=4: */
2 /*
3  * awk implementation for busybox
4  *
5  * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
6  *
7  * Licensed under GPLv2 or later, see file LICENSE in this source tree.
8  */
9
10 #include "libbb.h"
11 #include "xregex.h"
12 #include <math.h>
13
14 /* This is a NOEXEC applet. Be very careful! */
15
16
17 /* If you comment out one of these below, it will be #defined later
18  * to perform debug printfs to stderr: */
19 #define debug_printf_walker(...)  do {} while (0)
20
21 #ifndef debug_printf_walker
22 # define debug_printf_walker(...) (fprintf(stderr, __VA_ARGS__))
23 #endif
24
25
26
27 #define MAXVARFMT       240
28 #define MINNVBLOCK      64
29
30 /* variable flags */
31 #define VF_NUMBER       0x0001  /* 1 = primary type is number */
32 #define VF_ARRAY        0x0002  /* 1 = it's an array */
33
34 #define VF_CACHED       0x0100  /* 1 = num/str value has cached str/num eq */
35 #define VF_USER         0x0200  /* 1 = user input (may be numeric string) */
36 #define VF_SPECIAL      0x0400  /* 1 = requires extra handling when changed */
37 #define VF_WALK         0x0800  /* 1 = variable has alloc'd x.walker list */
38 #define VF_FSTR         0x1000  /* 1 = var::string points to fstring buffer */
39 #define VF_CHILD        0x2000  /* 1 = function arg; x.parent points to source */
40 #define VF_DIRTY        0x4000  /* 1 = variable was set explicitly */
41
42 /* these flags are static, don't change them when value is changed */
43 #define VF_DONTTOUCH    (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
44
45 typedef struct walker_list {
46         char *end;
47         char *cur;
48         struct walker_list *prev;
49         char wbuf[1];
50 } walker_list;
51
52 /* Variable */
53 typedef struct var_s {
54         unsigned type;            /* flags */
55         double number;
56         char *string;
57         union {
58                 int aidx;               /* func arg idx (for compilation stage) */
59                 struct xhash_s *array;  /* array ptr */
60                 struct var_s *parent;   /* for func args, ptr to actual parameter */
61                 walker_list *walker;    /* list of array elements (for..in) */
62         } x;
63 } var;
64
65 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
66 typedef struct chain_s {
67         struct node_s *first;
68         struct node_s *last;
69         const char *programname;
70 } chain;
71
72 /* Function */
73 typedef struct func_s {
74         unsigned nargs;
75         struct chain_s body;
76 } func;
77
78 /* I/O stream */
79 typedef struct rstream_s {
80         FILE *F;
81         char *buffer;
82         int adv;
83         int size;
84         int pos;
85         smallint is_pipe;
86 } rstream;
87
88 typedef struct hash_item_s {
89         union {
90                 struct var_s v;         /* variable/array hash */
91                 struct rstream_s rs;    /* redirect streams hash */
92                 struct func_s f;        /* functions hash */
93         } data;
94         struct hash_item_s *next;       /* next in chain */
95         char name[1];                   /* really it's longer */
96 } hash_item;
97
98 typedef struct xhash_s {
99         unsigned nel;           /* num of elements */
100         unsigned csize;         /* current hash size */
101         unsigned nprime;        /* next hash size in PRIMES[] */
102         unsigned glen;          /* summary length of item names */
103         struct hash_item_s **items;
104 } xhash;
105
106 /* Tree node */
107 typedef struct node_s {
108         uint32_t info;
109         unsigned lineno;
110         union {
111                 struct node_s *n;
112                 var *v;
113                 int aidx;
114                 char *new_progname;
115                 regex_t *re;
116         } l;
117         union {
118                 struct node_s *n;
119                 regex_t *ire;
120                 func *f;
121         } r;
122         union {
123                 struct node_s *n;
124         } a;
125 } node;
126
127 /* Block of temporary variables */
128 typedef struct nvblock_s {
129         int size;
130         var *pos;
131         struct nvblock_s *prev;
132         struct nvblock_s *next;
133         var nv[];
134 } nvblock;
135
136 typedef struct tsplitter_s {
137         node n;
138         regex_t re[2];
139 } tsplitter;
140
141 /* simple token classes */
142 /* Order and hex values are very important!!!  See next_token() */
143 #define TC_SEQSTART      1                              /* ( */
144 #define TC_SEQTERM      (1 << 1)                /* ) */
145 #define TC_REGEXP       (1 << 2)                /* /.../ */
146 #define TC_OUTRDR       (1 << 3)                /* | > >> */
147 #define TC_UOPPOST      (1 << 4)                /* unary postfix operator */
148 #define TC_UOPPRE1      (1 << 5)                /* unary prefix operator */
149 #define TC_BINOPX       (1 << 6)                /* two-opnd operator */
150 #define TC_IN           (1 << 7)
151 #define TC_COMMA        (1 << 8)
152 #define TC_PIPE         (1 << 9)                /* input redirection pipe */
153 #define TC_UOPPRE2      (1 << 10)               /* unary prefix operator */
154 #define TC_ARRTERM      (1 << 11)               /* ] */
155 #define TC_GRPSTART     (1 << 12)               /* { */
156 #define TC_GRPTERM      (1 << 13)               /* } */
157 #define TC_SEMICOL      (1 << 14)
158 #define TC_NEWLINE      (1 << 15)
159 #define TC_STATX        (1 << 16)               /* ctl statement (for, next...) */
160 #define TC_WHILE        (1 << 17)
161 #define TC_ELSE         (1 << 18)
162 #define TC_BUILTIN      (1 << 19)
163 #define TC_GETLINE      (1 << 20)
164 #define TC_FUNCDECL     (1 << 21)               /* `function' `func' */
165 #define TC_BEGIN        (1 << 22)
166 #define TC_END          (1 << 23)
167 #define TC_EOF          (1 << 24)
168 #define TC_VARIABLE     (1 << 25)
169 #define TC_ARRAY        (1 << 26)
170 #define TC_FUNCTION     (1 << 27)
171 #define TC_STRING       (1 << 28)
172 #define TC_NUMBER       (1 << 29)
173
174 #define TC_UOPPRE  (TC_UOPPRE1 | TC_UOPPRE2)
175
176 /* combined token classes */
177 #define TC_BINOP   (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
178 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
179 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
180                    | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
181
182 #define TC_STATEMNT (TC_STATX | TC_WHILE)
183 #define TC_OPTERM  (TC_SEMICOL | TC_NEWLINE)
184
185 /* word tokens, cannot mean something else if not expected */
186 #define TC_WORD    (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
187                    | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
188
189 /* discard newlines after these */
190 #define TC_NOTERM  (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
191                    | TC_BINOP | TC_OPTERM)
192
193 /* what can expression begin with */
194 #define TC_OPSEQ   (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
195 /* what can group begin with */
196 #define TC_GRPSEQ  (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
197
198 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
199 /* operator is inserted between them */
200 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
201                    | TC_STRING | TC_NUMBER | TC_UOPPOST)
202 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
203
204 #define OF_RES1    0x010000
205 #define OF_RES2    0x020000
206 #define OF_STR1    0x040000
207 #define OF_STR2    0x080000
208 #define OF_NUM1    0x100000
209 #define OF_CHECKED 0x200000
210
211 /* combined operator flags */
212 #define xx      0
213 #define xV      OF_RES2
214 #define xS      (OF_RES2 | OF_STR2)
215 #define Vx      OF_RES1
216 #define VV      (OF_RES1 | OF_RES2)
217 #define Nx      (OF_RES1 | OF_NUM1)
218 #define NV      (OF_RES1 | OF_NUM1 | OF_RES2)
219 #define Sx      (OF_RES1 | OF_STR1)
220 #define SV      (OF_RES1 | OF_STR1 | OF_RES2)
221 #define SS      (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
222
223 #define OPCLSMASK 0xFF00
224 #define OPNMASK   0x007F
225
226 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
227  * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
228  * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
229  */
230 #define P(x)      (x << 24)
231 #define PRIMASK   0x7F000000
232 #define PRIMASK2  0x7E000000
233
234 /* Operation classes */
235
236 #define SHIFT_TIL_THIS  0x0600
237 #define RECUR_FROM_THIS 0x1000
238
239 enum {
240         OC_DELETE = 0x0100,     OC_EXEC = 0x0200,       OC_NEWSOURCE = 0x0300,
241         OC_PRINT = 0x0400,      OC_PRINTF = 0x0500,     OC_WALKINIT = 0x0600,
242
243         OC_BR = 0x0700,         OC_BREAK = 0x0800,      OC_CONTINUE = 0x0900,
244         OC_EXIT = 0x0a00,       OC_NEXT = 0x0b00,       OC_NEXTFILE = 0x0c00,
245         OC_TEST = 0x0d00,       OC_WALKNEXT = 0x0e00,
246
247         OC_BINARY = 0x1000,     OC_BUILTIN = 0x1100,    OC_COLON = 0x1200,
248         OC_COMMA = 0x1300,      OC_COMPARE = 0x1400,    OC_CONCAT = 0x1500,
249         OC_FBLTIN = 0x1600,     OC_FIELD = 0x1700,      OC_FNARG = 0x1800,
250         OC_FUNC = 0x1900,       OC_GETLINE = 0x1a00,    OC_IN = 0x1b00,
251         OC_LAND = 0x1c00,       OC_LOR = 0x1d00,        OC_MATCH = 0x1e00,
252         OC_MOVE = 0x1f00,       OC_PGETLINE = 0x2000,   OC_REGEXP = 0x2100,
253         OC_REPLACE = 0x2200,    OC_RETURN = 0x2300,     OC_SPRINTF = 0x2400,
254         OC_TERNARY = 0x2500,    OC_UNARY = 0x2600,      OC_VAR = 0x2700,
255         OC_DONE = 0x2800,
256
257         ST_IF = 0x3000,         ST_DO = 0x3100,         ST_FOR = 0x3200,
258         ST_WHILE = 0x3300
259 };
260
261 /* simple builtins */
262 enum {
263         F_in,   F_rn,   F_co,   F_ex,   F_lg,   F_si,   F_sq,   F_sr,
264         F_ti,   F_le,   F_sy,   F_ff,   F_cl
265 };
266
267 /* builtins */
268 enum {
269         B_a2,   B_ix,   B_ma,   B_sp,   B_ss,   B_ti,   B_mt,   B_lo,   B_up,
270         B_ge,   B_gs,   B_su,
271         B_an,   B_co,   B_ls,   B_or,   B_rs,   B_xo,
272 };
273
274 /* tokens and their corresponding info values */
275
276 #define NTC     "\377"  /* switch to next token class (tc<<1) */
277 #define NTCC    '\377'
278
279 #define OC_B    OC_BUILTIN
280
281 static const char tokenlist[] ALIGN1 =
282         "\1("       NTC
283         "\1)"       NTC
284         "\1/"       NTC                                 /* REGEXP */
285         "\2>>"      "\1>"       "\1|"       NTC         /* OUTRDR */
286         "\2++"      "\2--"      NTC                     /* UOPPOST */
287         "\2++"      "\2--"      "\1$"       NTC         /* UOPPRE1 */
288         "\2=="      "\1="       "\2+="      "\2-="      /* BINOPX */
289         "\2*="      "\2/="      "\2%="      "\2^="
290         "\1+"       "\1-"       "\3**="     "\2**"
291         "\1/"       "\1%"       "\1^"       "\1*"
292         "\2!="      "\2>="      "\2<="      "\1>"
293         "\1<"       "\2!~"      "\1~"       "\2&&"
294         "\2||"      "\1?"       "\1:"       NTC
295         "\2in"      NTC
296         "\1,"       NTC
297         "\1|"       NTC
298         "\1+"       "\1-"       "\1!"       NTC         /* UOPPRE2 */
299         "\1]"       NTC
300         "\1{"       NTC
301         "\1}"       NTC
302         "\1;"       NTC
303         "\1\n"      NTC
304         "\2if"      "\2do"      "\3for"     "\5break"   /* STATX */
305         "\10continue"           "\6delete"  "\5print"
306         "\6printf"  "\4next"    "\10nextfile"
307         "\6return"  "\4exit"    NTC
308         "\5while"   NTC
309         "\4else"    NTC
310
311         "\3and"     "\5compl"   "\6lshift"  "\2or"
312         "\6rshift"  "\3xor"
313         "\5close"   "\6system"  "\6fflush"  "\5atan2"   /* BUILTIN */
314         "\3cos"     "\3exp"     "\3int"     "\3log"
315         "\4rand"    "\3sin"     "\4sqrt"    "\5srand"
316         "\6gensub"  "\4gsub"    "\5index"   "\6length"
317         "\5match"   "\5split"   "\7sprintf" "\3sub"
318         "\6substr"  "\7systime" "\10strftime" "\6mktime"
319         "\7tolower" "\7toupper" NTC
320         "\7getline" NTC
321         "\4func"    "\10function"   NTC
322         "\5BEGIN"   NTC
323         "\3END"     "\0"
324         ;
325
326 static const uint32_t tokeninfo[] = {
327         0,
328         0,
329         OC_REGEXP,
330         xS|'a',     xS|'w',     xS|'|',
331         OC_UNARY|xV|P(9)|'p',       OC_UNARY|xV|P(9)|'m',
332         OC_UNARY|xV|P(9)|'P',       OC_UNARY|xV|P(9)|'M',
333             OC_FIELD|xV|P(5),
334         OC_COMPARE|VV|P(39)|5,      OC_MOVE|VV|P(74),
335             OC_REPLACE|NV|P(74)|'+',    OC_REPLACE|NV|P(74)|'-',
336         OC_REPLACE|NV|P(74)|'*',    OC_REPLACE|NV|P(74)|'/',
337             OC_REPLACE|NV|P(74)|'%',    OC_REPLACE|NV|P(74)|'&',
338         OC_BINARY|NV|P(29)|'+',     OC_BINARY|NV|P(29)|'-',
339             OC_REPLACE|NV|P(74)|'&',    OC_BINARY|NV|P(15)|'&',
340         OC_BINARY|NV|P(25)|'/',     OC_BINARY|NV|P(25)|'%',
341             OC_BINARY|NV|P(15)|'&',     OC_BINARY|NV|P(25)|'*',
342         OC_COMPARE|VV|P(39)|4,      OC_COMPARE|VV|P(39)|3,
343             OC_COMPARE|VV|P(39)|0,      OC_COMPARE|VV|P(39)|1,
344         OC_COMPARE|VV|P(39)|2,      OC_MATCH|Sx|P(45)|'!',
345             OC_MATCH|Sx|P(45)|'~',      OC_LAND|Vx|P(55),
346         OC_LOR|Vx|P(59),            OC_TERNARY|Vx|P(64)|'?',
347             OC_COLON|xx|P(67)|':',
348         OC_IN|SV|P(49),
349         OC_COMMA|SS|P(80),
350         OC_PGETLINE|SV|P(37),
351         OC_UNARY|xV|P(19)|'+',      OC_UNARY|xV|P(19)|'-',
352             OC_UNARY|xV|P(19)|'!',
353         0,
354         0,
355         0,
356         0,
357         0,
358         ST_IF,          ST_DO,          ST_FOR,         OC_BREAK,
359         OC_CONTINUE,                    OC_DELETE|Vx,   OC_PRINT,
360         OC_PRINTF,      OC_NEXT,        OC_NEXTFILE,
361         OC_RETURN|Vx,   OC_EXIT|Nx,
362         ST_WHILE,
363         0,
364
365         OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
366         OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
367         OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
368         OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
369         OC_FBLTIN|F_rn,    OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
370         OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
371         OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF,        OC_B|B_su|P(0xb6),
372         OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti,    OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b),
373         OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
374         OC_GETLINE|SV|P(0),
375         0,      0,
376         0,
377         0
378 };
379
380 /* internal variable names and their initial values       */
381 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
382 enum {
383         CONVFMT,    OFMT,       FS,         OFS,
384         ORS,        RS,         RT,         FILENAME,
385         SUBSEP,     F0,         ARGIND,     ARGC,
386         ARGV,       ERRNO,      FNR,        NR,
387         NF,         IGNORECASE, ENVIRON,    NUM_INTERNAL_VARS
388 };
389
390 static const char vNames[] ALIGN1 =
391         "CONVFMT\0" "OFMT\0"    "FS\0*"     "OFS\0"
392         "ORS\0"     "RS\0*"     "RT\0"      "FILENAME\0"
393         "SUBSEP\0"  "$\0*"      "ARGIND\0"  "ARGC\0"
394         "ARGV\0"    "ERRNO\0"   "FNR\0"     "NR\0"
395         "NF\0*"     "IGNORECASE\0*" "ENVIRON\0" "\0";
396
397 static const char vValues[] ALIGN1 =
398         "%.6g\0"    "%.6g\0"    " \0"       " \0"
399         "\n\0"      "\n\0"      "\0"        "\0"
400         "\034\0"    "\0"        "\377";
401
402 /* hash size may grow to these values */
403 #define FIRST_PRIME 61
404 static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
405
406
407 /* Globals. Split in two parts so that first one is addressed
408  * with (mostly short) negative offsets.
409  * NB: it's unsafe to put members of type "double"
410  * into globals2 (gcc may fail to align them).
411  */
412 struct globals {
413         double t_double;
414         chain beginseq, mainseq, endseq;
415         chain *seq;
416         node *break_ptr, *continue_ptr;
417         rstream *iF;
418         xhash *vhash, *ahash, *fdhash, *fnhash;
419         const char *g_progname;
420         int g_lineno;
421         int nfields;
422         int maxfields; /* used in fsrealloc() only */
423         var *Fields;
424         nvblock *g_cb;
425         char *g_pos;
426         char *g_buf;
427         smallint icase;
428         smallint exiting;
429         smallint nextrec;
430         smallint nextfile;
431         smallint is_f0_split;
432 };
433 struct globals2 {
434         uint32_t t_info; /* often used */
435         uint32_t t_tclass;
436         char *t_string;
437         int t_lineno;
438         int t_rollback;
439
440         var *intvar[NUM_INTERNAL_VARS]; /* often used */
441
442         /* former statics from various functions */
443         char *split_f0__fstrings;
444
445         uint32_t next_token__save_tclass;
446         uint32_t next_token__save_info;
447         uint32_t next_token__ltclass;
448         smallint next_token__concat_inserted;
449
450         smallint next_input_file__files_happen;
451         rstream next_input_file__rsm;
452
453         var *evaluate__fnargs;
454         unsigned evaluate__seed;
455         regex_t evaluate__sreg;
456
457         var ptest__v;
458
459         tsplitter exec_builtin__tspl;
460
461         /* biggest and least used members go last */
462         tsplitter fsplitter, rsplitter;
463 };
464 #define G1 (ptr_to_globals[-1])
465 #define G (*(struct globals2 *)ptr_to_globals)
466 /* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
467 /*char G1size[sizeof(G1)]; - 0x74 */
468 /*char Gsize[sizeof(G)]; - 0x1c4 */
469 /* Trying to keep most of members accessible with short offsets: */
470 /*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
471 #define t_double     (G1.t_double    )
472 #define beginseq     (G1.beginseq    )
473 #define mainseq      (G1.mainseq     )
474 #define endseq       (G1.endseq      )
475 #define seq          (G1.seq         )
476 #define break_ptr    (G1.break_ptr   )
477 #define continue_ptr (G1.continue_ptr)
478 #define iF           (G1.iF          )
479 #define vhash        (G1.vhash       )
480 #define ahash        (G1.ahash       )
481 #define fdhash       (G1.fdhash      )
482 #define fnhash       (G1.fnhash      )
483 #define g_progname   (G1.g_progname  )
484 #define g_lineno     (G1.g_lineno    )
485 #define nfields      (G1.nfields     )
486 #define maxfields    (G1.maxfields   )
487 #define Fields       (G1.Fields      )
488 #define g_cb         (G1.g_cb        )
489 #define g_pos        (G1.g_pos       )
490 #define g_buf        (G1.g_buf       )
491 #define icase        (G1.icase       )
492 #define exiting      (G1.exiting     )
493 #define nextrec      (G1.nextrec     )
494 #define nextfile     (G1.nextfile    )
495 #define is_f0_split  (G1.is_f0_split )
496 #define t_info       (G.t_info      )
497 #define t_tclass     (G.t_tclass    )
498 #define t_string     (G.t_string    )
499 #define t_lineno     (G.t_lineno    )
500 #define t_rollback   (G.t_rollback  )
501 #define intvar       (G.intvar      )
502 #define fsplitter    (G.fsplitter   )
503 #define rsplitter    (G.rsplitter   )
504 #define INIT_G() do { \
505         SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
506         G.next_token__ltclass = TC_OPTERM; \
507         G.evaluate__seed = 1; \
508 } while (0)
509
510
511 /* function prototypes */
512 static void handle_special(var *);
513 static node *parse_expr(uint32_t);
514 static void chain_group(void);
515 static var *evaluate(node *, var *);
516 static rstream *next_input_file(void);
517 static int fmt_num(char *, int, const char *, double, int);
518 static int awk_exit(int) NORETURN;
519
520 /* ---- error handling ---- */
521
522 static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
523 static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
524 static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
525 static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
526 static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
527 static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin";
528 static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
529 static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
530 static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
531 #if !ENABLE_FEATURE_AWK_LIBM
532 static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
533 #endif
534
535 static void zero_out_var(var *vp)
536 {
537         memset(vp, 0, sizeof(*vp));
538 }
539
540 static void syntax_error(const char *message) NORETURN;
541 static void syntax_error(const char *message)
542 {
543         bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
544 }
545
546 /* ---- hash stuff ---- */
547
548 static unsigned hashidx(const char *name)
549 {
550         unsigned idx = 0;
551
552         while (*name)
553                 idx = *name++ + (idx << 6) - idx;
554         return idx;
555 }
556
557 /* create new hash */
558 static xhash *hash_init(void)
559 {
560         xhash *newhash;
561
562         newhash = xzalloc(sizeof(*newhash));
563         newhash->csize = FIRST_PRIME;
564         newhash->items = xzalloc(FIRST_PRIME * sizeof(newhash->items[0]));
565
566         return newhash;
567 }
568
569 /* find item in hash, return ptr to data, NULL if not found */
570 static void *hash_search(xhash *hash, const char *name)
571 {
572         hash_item *hi;
573
574         hi = hash->items[hashidx(name) % hash->csize];
575         while (hi) {
576                 if (strcmp(hi->name, name) == 0)
577                         return &hi->data;
578                 hi = hi->next;
579         }
580         return NULL;
581 }
582
583 /* grow hash if it becomes too big */
584 static void hash_rebuild(xhash *hash)
585 {
586         unsigned newsize, i, idx;
587         hash_item **newitems, *hi, *thi;
588
589         if (hash->nprime == ARRAY_SIZE(PRIMES))
590                 return;
591
592         newsize = PRIMES[hash->nprime++];
593         newitems = xzalloc(newsize * sizeof(newitems[0]));
594
595         for (i = 0; i < hash->csize; i++) {
596                 hi = hash->items[i];
597                 while (hi) {
598                         thi = hi;
599                         hi = thi->next;
600                         idx = hashidx(thi->name) % newsize;
601                         thi->next = newitems[idx];
602                         newitems[idx] = thi;
603                 }
604         }
605
606         free(hash->items);
607         hash->csize = newsize;
608         hash->items = newitems;
609 }
610
611 /* find item in hash, add it if necessary. Return ptr to data */
612 static void *hash_find(xhash *hash, const char *name)
613 {
614         hash_item *hi;
615         unsigned idx;
616         int l;
617
618         hi = hash_search(hash, name);
619         if (!hi) {
620                 if (++hash->nel / hash->csize > 10)
621                         hash_rebuild(hash);
622
623                 l = strlen(name) + 1;
624                 hi = xzalloc(sizeof(*hi) + l);
625                 strcpy(hi->name, name);
626
627                 idx = hashidx(name) % hash->csize;
628                 hi->next = hash->items[idx];
629                 hash->items[idx] = hi;
630                 hash->glen += l;
631         }
632         return &hi->data;
633 }
634
635 #define findvar(hash, name) ((var*)    hash_find((hash), (name)))
636 #define newvar(name)        ((var*)    hash_find(vhash, (name)))
637 #define newfile(name)       ((rstream*)hash_find(fdhash, (name)))
638 #define newfunc(name)       ((func*)   hash_find(fnhash, (name)))
639
640 static void hash_remove(xhash *hash, const char *name)
641 {
642         hash_item *hi, **phi;
643
644         phi = &hash->items[hashidx(name) % hash->csize];
645         while (*phi) {
646                 hi = *phi;
647                 if (strcmp(hi->name, name) == 0) {
648                         hash->glen -= (strlen(name) + 1);
649                         hash->nel--;
650                         *phi = hi->next;
651                         free(hi);
652                         break;
653                 }
654                 phi = &hi->next;
655         }
656 }
657
658 /* ------ some useful functions ------ */
659
660 static char *skip_spaces(char *p)
661 {
662         while (1) {
663                 if (*p == '\\' && p[1] == '\n') {
664                         p++;
665                         t_lineno++;
666                 } else if (*p != ' ' && *p != '\t') {
667                         break;
668                 }
669                 p++;
670         }
671         return p;
672 }
673
674 /* returns old *s, advances *s past word and terminating NUL */
675 static char *nextword(char **s)
676 {
677         char *p = *s;
678         while (*(*s)++ != '\0')
679                 continue;
680         return p;
681 }
682
683 static char nextchar(char **s)
684 {
685         char c, *pps;
686
687         c = *(*s)++;
688         pps = *s;
689         if (c == '\\')
690                 c = bb_process_escape_sequence((const char**)s);
691         if (c == '\\' && *s == pps)
692                 c = *(*s)++;
693         return c;
694 }
695
696 static ALWAYS_INLINE int isalnum_(int c)
697 {
698         return (isalnum(c) || c == '_');
699 }
700
701 static double my_strtod(char **pp)
702 {
703 #if ENABLE_DESKTOP
704         if ((*pp)[0] == '0'
705          && ((((*pp)[1] | 0x20) == 'x') || isdigit((*pp)[1]))
706         ) {
707                 return strtoull(*pp, pp, 0);
708         }
709 #endif
710         return strtod(*pp, pp);
711 }
712
713 /* -------- working with variables (set/get/copy/etc) -------- */
714
715 static xhash *iamarray(var *v)
716 {
717         var *a = v;
718
719         while (a->type & VF_CHILD)
720                 a = a->x.parent;
721
722         if (!(a->type & VF_ARRAY)) {
723                 a->type |= VF_ARRAY;
724                 a->x.array = hash_init();
725         }
726         return a->x.array;
727 }
728
729 static void clear_array(xhash *array)
730 {
731         unsigned i;
732         hash_item *hi, *thi;
733
734         for (i = 0; i < array->csize; i++) {
735                 hi = array->items[i];
736                 while (hi) {
737                         thi = hi;
738                         hi = hi->next;
739                         free(thi->data.v.string);
740                         free(thi);
741                 }
742                 array->items[i] = NULL;
743         }
744         array->glen = array->nel = 0;
745 }
746
747 /* clear a variable */
748 static var *clrvar(var *v)
749 {
750         if (!(v->type & VF_FSTR))
751                 free(v->string);
752
753         v->type &= VF_DONTTOUCH;
754         v->type |= VF_DIRTY;
755         v->string = NULL;
756         return v;
757 }
758
759 /* assign string value to variable */
760 static var *setvar_p(var *v, char *value)
761 {
762         clrvar(v);
763         v->string = value;
764         handle_special(v);
765         return v;
766 }
767
768 /* same as setvar_p but make a copy of string */
769 static var *setvar_s(var *v, const char *value)
770 {
771         return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
772 }
773
774 /* same as setvar_s but sets USER flag */
775 static var *setvar_u(var *v, const char *value)
776 {
777         v = setvar_s(v, value);
778         v->type |= VF_USER;
779         return v;
780 }
781
782 /* set array element to user string */
783 static void setari_u(var *a, int idx, const char *s)
784 {
785         var *v;
786
787         v = findvar(iamarray(a), itoa(idx));
788         setvar_u(v, s);
789 }
790
791 /* assign numeric value to variable */
792 static var *setvar_i(var *v, double value)
793 {
794         clrvar(v);
795         v->type |= VF_NUMBER;
796         v->number = value;
797         handle_special(v);
798         return v;
799 }
800
801 static const char *getvar_s(var *v)
802 {
803         /* if v is numeric and has no cached string, convert it to string */
804         if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
805                 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
806                 v->string = xstrdup(g_buf);
807                 v->type |= VF_CACHED;
808         }
809         return (v->string == NULL) ? "" : v->string;
810 }
811
812 static double getvar_i(var *v)
813 {
814         char *s;
815
816         if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
817                 v->number = 0;
818                 s = v->string;
819                 if (s && *s) {
820                         v->number = my_strtod(&s);
821                         if (v->type & VF_USER) {
822                                 s = skip_spaces(s);
823                                 if (*s != '\0')
824                                         v->type &= ~VF_USER;
825                         }
826                 } else {
827                         v->type &= ~VF_USER;
828                 }
829                 v->type |= VF_CACHED;
830         }
831         return v->number;
832 }
833
834 /* Used for operands of bitwise ops */
835 static unsigned long getvar_i_int(var *v)
836 {
837         double d = getvar_i(v);
838
839         /* Casting doubles to longs is undefined for values outside
840          * of target type range. Try to widen it as much as possible */
841         if (d >= 0)
842                 return (unsigned long)d;
843         /* Why? Think about d == -4294967295.0 (assuming 32bit longs) */
844         return - (long) (unsigned long) (-d);
845 }
846
847 static var *copyvar(var *dest, const var *src)
848 {
849         if (dest != src) {
850                 clrvar(dest);
851                 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
852                 dest->number = src->number;
853                 if (src->string)
854                         dest->string = xstrdup(src->string);
855         }
856         handle_special(dest);
857         return dest;
858 }
859
860 static var *incvar(var *v)
861 {
862         return setvar_i(v, getvar_i(v) + 1.0);
863 }
864
865 /* return true if v is number or numeric string */
866 static int is_numeric(var *v)
867 {
868         getvar_i(v);
869         return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
870 }
871
872 /* return 1 when value of v corresponds to true, 0 otherwise */
873 static int istrue(var *v)
874 {
875         if (is_numeric(v))
876                 return (v->number != 0);
877         return (v->string && v->string[0]);
878 }
879
880 /* temporary variables allocator. Last allocated should be first freed */
881 static var *nvalloc(int n)
882 {
883         nvblock *pb = NULL;
884         var *v, *r;
885         int size;
886
887         while (g_cb) {
888                 pb = g_cb;
889                 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size)
890                         break;
891                 g_cb = g_cb->next;
892         }
893
894         if (!g_cb) {
895                 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
896                 g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
897                 g_cb->size = size;
898                 g_cb->pos = g_cb->nv;
899                 g_cb->prev = pb;
900                 /*g_cb->next = NULL; - xzalloc did it */
901                 if (pb)
902                         pb->next = g_cb;
903         }
904
905         v = r = g_cb->pos;
906         g_cb->pos += n;
907
908         while (v < g_cb->pos) {
909                 v->type = 0;
910                 v->string = NULL;
911                 v++;
912         }
913
914         return r;
915 }
916
917 static void nvfree(var *v)
918 {
919         var *p;
920
921         if (v < g_cb->nv || v >= g_cb->pos)
922                 syntax_error(EMSG_INTERNAL_ERROR);
923
924         for (p = v; p < g_cb->pos; p++) {
925                 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
926                         clear_array(iamarray(p));
927                         free(p->x.array->items);
928                         free(p->x.array);
929                 }
930                 if (p->type & VF_WALK) {
931                         walker_list *n;
932                         walker_list *w = p->x.walker;
933                         debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker);
934                         p->x.walker = NULL;
935                         while (w) {
936                                 n = w->prev;
937                                 debug_printf_walker(" free(%p)\n", w);
938                                 free(w);
939                                 w = n;
940                         }
941                 }
942                 clrvar(p);
943         }
944
945         g_cb->pos = v;
946         while (g_cb->prev && g_cb->pos == g_cb->nv) {
947                 g_cb = g_cb->prev;
948         }
949 }
950
951 /* ------- awk program text parsing ------- */
952
953 /* Parse next token pointed by global pos, place results into global ttt.
954  * If token isn't expected, give away. Return token class
955  */
956 static uint32_t next_token(uint32_t expected)
957 {
958 #define concat_inserted (G.next_token__concat_inserted)
959 #define save_tclass     (G.next_token__save_tclass)
960 #define save_info       (G.next_token__save_info)
961 /* Initialized to TC_OPTERM: */
962 #define ltclass         (G.next_token__ltclass)
963
964         char *p, *s;
965         const char *tl;
966         uint32_t tc;
967         const uint32_t *ti;
968         int l;
969
970         if (t_rollback) {
971                 t_rollback = FALSE;
972
973         } else if (concat_inserted) {
974                 concat_inserted = FALSE;
975                 t_tclass = save_tclass;
976                 t_info = save_info;
977
978         } else {
979                 p = g_pos;
980  readnext:
981                 p = skip_spaces(p);
982                 g_lineno = t_lineno;
983                 if (*p == '#')
984                         while (*p != '\n' && *p != '\0')
985                                 p++;
986
987                 if (*p == '\n')
988                         t_lineno++;
989
990                 if (*p == '\0') {
991                         tc = TC_EOF;
992
993                 } else if (*p == '\"') {
994                         /* it's a string */
995                         t_string = s = ++p;
996                         while (*p != '\"') {
997                                 char *pp = p;
998                                 if (*p == '\0' || *p == '\n')
999                                         syntax_error(EMSG_UNEXP_EOS);
1000                                 *s++ = nextchar(&pp);
1001                                 p = pp;
1002                         }
1003                         p++;
1004                         *s = '\0';
1005                         tc = TC_STRING;
1006
1007                 } else if ((expected & TC_REGEXP) && *p == '/') {
1008                         /* it's regexp */
1009                         t_string = s = ++p;
1010                         while (*p != '/') {
1011                                 if (*p == '\0' || *p == '\n')
1012                                         syntax_error(EMSG_UNEXP_EOS);
1013                                 *s = *p++;
1014                                 if (*s++ == '\\') {
1015                                         char *pp = p;
1016                                         s[-1] = bb_process_escape_sequence((const char **)&pp);
1017                                         if (*p == '\\')
1018                                                 *s++ = '\\';
1019                                         if (pp == p)
1020                                                 *s++ = *p++;
1021                                         else
1022                                                 p = pp;
1023                                 }
1024                         }
1025                         p++;
1026                         *s = '\0';
1027                         tc = TC_REGEXP;
1028
1029                 } else if (*p == '.' || isdigit(*p)) {
1030                         /* it's a number */
1031                         char *pp = p;
1032                         t_double = my_strtod(&pp);
1033                         p = pp;
1034                         if (*pp == '.')
1035                                 syntax_error(EMSG_UNEXP_TOKEN);
1036                         tc = TC_NUMBER;
1037
1038                 } else {
1039                         /* search for something known */
1040                         tl = tokenlist;
1041                         tc = 0x00000001;
1042                         ti = tokeninfo;
1043                         while (*tl) {
1044                                 l = *tl++;
1045                                 if (l == NTCC) {
1046                                         tc <<= 1;
1047                                         continue;
1048                                 }
1049                                 /* if token class is expected, token
1050                                  * matches and it's not a longer word,
1051                                  * then this is what we are looking for
1052                                  */
1053                                 if ((tc & (expected | TC_WORD | TC_NEWLINE))
1054                                  && *tl == *p && strncmp(p, tl, l) == 0
1055                                  && !((tc & TC_WORD) && isalnum_(p[l]))
1056                                 ) {
1057                                         t_info = *ti;
1058                                         p += l;
1059                                         break;
1060                                 }
1061                                 ti++;
1062                                 tl += l;
1063                         }
1064
1065                         if (!*tl) {
1066                                 /* it's a name (var/array/function),
1067                                  * otherwise it's something wrong
1068                                  */
1069                                 if (!isalnum_(*p))
1070                                         syntax_error(EMSG_UNEXP_TOKEN);
1071
1072                                 t_string = --p;
1073                                 while (isalnum_(*++p)) {
1074                                         p[-1] = *p;
1075                                 }
1076                                 p[-1] = '\0';
1077                                 tc = TC_VARIABLE;
1078                                 /* also consume whitespace between functionname and bracket */
1079                                 if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY))
1080                                         p = skip_spaces(p);
1081                                 if (*p == '(') {
1082                                         tc = TC_FUNCTION;
1083                                 } else {
1084                                         if (*p == '[') {
1085                                                 p++;
1086                                                 tc = TC_ARRAY;
1087                                         }
1088                                 }
1089                         }
1090                 }
1091                 g_pos = p;
1092
1093                 /* skipping newlines in some cases */
1094                 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1095                         goto readnext;
1096
1097                 /* insert concatenation operator when needed */
1098                 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1099                         concat_inserted = TRUE;
1100                         save_tclass = tc;
1101                         save_info = t_info;
1102                         tc = TC_BINOP;
1103                         t_info = OC_CONCAT | SS | P(35);
1104                 }
1105
1106                 t_tclass = tc;
1107         }
1108         ltclass = t_tclass;
1109
1110         /* Are we ready for this? */
1111         if (!(ltclass & expected))
1112                 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1113                                 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1114
1115         return ltclass;
1116 #undef concat_inserted
1117 #undef save_tclass
1118 #undef save_info
1119 #undef ltclass
1120 }
1121
1122 static void rollback_token(void)
1123 {
1124         t_rollback = TRUE;
1125 }
1126
1127 static node *new_node(uint32_t info)
1128 {
1129         node *n;
1130
1131         n = xzalloc(sizeof(node));
1132         n->info = info;
1133         n->lineno = g_lineno;
1134         return n;
1135 }
1136
1137 static void mk_re_node(const char *s, node *n, regex_t *re)
1138 {
1139         n->info = OC_REGEXP;
1140         n->l.re = re;
1141         n->r.ire = re + 1;
1142         xregcomp(re, s, REG_EXTENDED);
1143         xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1144 }
1145
1146 static node *condition(void)
1147 {
1148         next_token(TC_SEQSTART);
1149         return parse_expr(TC_SEQTERM);
1150 }
1151
1152 /* parse expression terminated by given argument, return ptr
1153  * to built subtree. Terminator is eaten by parse_expr */
1154 static node *parse_expr(uint32_t iexp)
1155 {
1156         node sn;
1157         node *cn = &sn;
1158         node *vn, *glptr;
1159         uint32_t tc, xtc;
1160         var *v;
1161
1162         sn.info = PRIMASK;
1163         sn.r.n = glptr = NULL;
1164         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1165
1166         while (!((tc = next_token(xtc)) & iexp)) {
1167                 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1168                         /* input redirection (<) attached to glptr node */
1169                         cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1170                         cn->a.n = glptr;
1171                         xtc = TC_OPERAND | TC_UOPPRE;
1172                         glptr = NULL;
1173
1174                 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1175                         /* for binary and postfix-unary operators, jump back over
1176                          * previous operators with higher priority */
1177                         vn = cn;
1178                         while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1179                             || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON))
1180                         ) {
1181                                 vn = vn->a.n;
1182                         }
1183                         if ((t_info & OPCLSMASK) == OC_TERNARY)
1184                                 t_info += P(6);
1185                         cn = vn->a.n->r.n = new_node(t_info);
1186                         cn->a.n = vn->a.n;
1187                         if (tc & TC_BINOP) {
1188                                 cn->l.n = vn;
1189                                 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1190                                 if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1191                                         /* it's a pipe */
1192                                         next_token(TC_GETLINE);
1193                                         /* give maximum priority to this pipe */
1194                                         cn->info &= ~PRIMASK;
1195                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1196                                 }
1197                         } else {
1198                                 cn->r.n = vn;
1199                                 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1200                         }
1201                         vn->a.n = cn;
1202
1203                 } else {
1204                         /* for operands and prefix-unary operators, attach them
1205                          * to last node */
1206                         vn = cn;
1207                         cn = vn->r.n = new_node(t_info);
1208                         cn->a.n = vn;
1209                         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1210                         if (tc & (TC_OPERAND | TC_REGEXP)) {
1211                                 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1212                                 /* one should be very careful with switch on tclass -
1213                                  * only simple tclasses should be used! */
1214                                 switch (tc) {
1215                                 case TC_VARIABLE:
1216                                 case TC_ARRAY:
1217                                         cn->info = OC_VAR;
1218                                         v = hash_search(ahash, t_string);
1219                                         if (v != NULL) {
1220                                                 cn->info = OC_FNARG;
1221                                                 cn->l.aidx = v->x.aidx;
1222                                         } else {
1223                                                 cn->l.v = newvar(t_string);
1224                                         }
1225                                         if (tc & TC_ARRAY) {
1226                                                 cn->info |= xS;
1227                                                 cn->r.n = parse_expr(TC_ARRTERM);
1228                                         }
1229                                         break;
1230
1231                                 case TC_NUMBER:
1232                                 case TC_STRING:
1233                                         cn->info = OC_VAR;
1234                                         v = cn->l.v = xzalloc(sizeof(var));
1235                                         if (tc & TC_NUMBER)
1236                                                 setvar_i(v, t_double);
1237                                         else
1238                                                 setvar_s(v, t_string);
1239                                         break;
1240
1241                                 case TC_REGEXP:
1242                                         mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1243                                         break;
1244
1245                                 case TC_FUNCTION:
1246                                         cn->info = OC_FUNC;
1247                                         cn->r.f = newfunc(t_string);
1248                                         cn->l.n = condition();
1249                                         break;
1250
1251                                 case TC_SEQSTART:
1252                                         cn = vn->r.n = parse_expr(TC_SEQTERM);
1253                                         cn->a.n = vn;
1254                                         break;
1255
1256                                 case TC_GETLINE:
1257                                         glptr = cn;
1258                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1259                                         break;
1260
1261                                 case TC_BUILTIN:
1262                                         cn->l.n = condition();
1263                                         break;
1264                                 }
1265                         }
1266                 }
1267         }
1268         return sn.r.n;
1269 }
1270
1271 /* add node to chain. Return ptr to alloc'd node */
1272 static node *chain_node(uint32_t info)
1273 {
1274         node *n;
1275
1276         if (!seq->first)
1277                 seq->first = seq->last = new_node(0);
1278
1279         if (seq->programname != g_progname) {
1280                 seq->programname = g_progname;
1281                 n = chain_node(OC_NEWSOURCE);
1282                 n->l.new_progname = xstrdup(g_progname);
1283         }
1284
1285         n = seq->last;
1286         n->info = info;
1287         seq->last = n->a.n = new_node(OC_DONE);
1288
1289         return n;
1290 }
1291
1292 static void chain_expr(uint32_t info)
1293 {
1294         node *n;
1295
1296         n = chain_node(info);
1297         n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1298         if (t_tclass & TC_GRPTERM)
1299                 rollback_token();
1300 }
1301
1302 static node *chain_loop(node *nn)
1303 {
1304         node *n, *n2, *save_brk, *save_cont;
1305
1306         save_brk = break_ptr;
1307         save_cont = continue_ptr;
1308
1309         n = chain_node(OC_BR | Vx);
1310         continue_ptr = new_node(OC_EXEC);
1311         break_ptr = new_node(OC_EXEC);
1312         chain_group();
1313         n2 = chain_node(OC_EXEC | Vx);
1314         n2->l.n = nn;
1315         n2->a.n = n;
1316         continue_ptr->a.n = n2;
1317         break_ptr->a.n = n->r.n = seq->last;
1318
1319         continue_ptr = save_cont;
1320         break_ptr = save_brk;
1321
1322         return n;
1323 }
1324
1325 /* parse group and attach it to chain */
1326 static void chain_group(void)
1327 {
1328         uint32_t c;
1329         node *n, *n2, *n3;
1330
1331         do {
1332                 c = next_token(TC_GRPSEQ);
1333         } while (c & TC_NEWLINE);
1334
1335         if (c & TC_GRPSTART) {
1336                 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1337                         if (t_tclass & TC_NEWLINE)
1338                                 continue;
1339                         rollback_token();
1340                         chain_group();
1341                 }
1342         } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1343                 rollback_token();
1344                 chain_expr(OC_EXEC | Vx);
1345         } else {                                                /* TC_STATEMNT */
1346                 switch (t_info & OPCLSMASK) {
1347                 case ST_IF:
1348                         n = chain_node(OC_BR | Vx);
1349                         n->l.n = condition();
1350                         chain_group();
1351                         n2 = chain_node(OC_EXEC);
1352                         n->r.n = seq->last;
1353                         if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1354                                 chain_group();
1355                                 n2->a.n = seq->last;
1356                         } else {
1357                                 rollback_token();
1358                         }
1359                         break;
1360
1361                 case ST_WHILE:
1362                         n2 = condition();
1363                         n = chain_loop(NULL);
1364                         n->l.n = n2;
1365                         break;
1366
1367                 case ST_DO:
1368                         n2 = chain_node(OC_EXEC);
1369                         n = chain_loop(NULL);
1370                         n2->a.n = n->a.n;
1371                         next_token(TC_WHILE);
1372                         n->l.n = condition();
1373                         break;
1374
1375                 case ST_FOR:
1376                         next_token(TC_SEQSTART);
1377                         n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1378                         if (t_tclass & TC_SEQTERM) {    /* for-in */
1379                                 if ((n2->info & OPCLSMASK) != OC_IN)
1380                                         syntax_error(EMSG_UNEXP_TOKEN);
1381                                 n = chain_node(OC_WALKINIT | VV);
1382                                 n->l.n = n2->l.n;
1383                                 n->r.n = n2->r.n;
1384                                 n = chain_loop(NULL);
1385                                 n->info = OC_WALKNEXT | Vx;
1386                                 n->l.n = n2->l.n;
1387                         } else {                        /* for (;;) */
1388                                 n = chain_node(OC_EXEC | Vx);
1389                                 n->l.n = n2;
1390                                 n2 = parse_expr(TC_SEMICOL);
1391                                 n3 = parse_expr(TC_SEQTERM);
1392                                 n = chain_loop(n3);
1393                                 n->l.n = n2;
1394                                 if (!n2)
1395                                         n->info = OC_EXEC;
1396                         }
1397                         break;
1398
1399                 case OC_PRINT:
1400                 case OC_PRINTF:
1401                         n = chain_node(t_info);
1402                         n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1403                         if (t_tclass & TC_OUTRDR) {
1404                                 n->info |= t_info;
1405                                 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1406                         }
1407                         if (t_tclass & TC_GRPTERM)
1408                                 rollback_token();
1409                         break;
1410
1411                 case OC_BREAK:
1412                         n = chain_node(OC_EXEC);
1413                         n->a.n = break_ptr;
1414                         break;
1415
1416                 case OC_CONTINUE:
1417                         n = chain_node(OC_EXEC);
1418                         n->a.n = continue_ptr;
1419                         break;
1420
1421                 /* delete, next, nextfile, return, exit */
1422                 default:
1423                         chain_expr(t_info);
1424                 }
1425         }
1426 }
1427
1428 static void parse_program(char *p)
1429 {
1430         uint32_t tclass;
1431         node *cn;
1432         func *f;
1433         var *v;
1434
1435         g_pos = p;
1436         t_lineno = 1;
1437         while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1438                         TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1439
1440                 if (tclass & TC_OPTERM)
1441                         continue;
1442
1443                 seq = &mainseq;
1444                 if (tclass & TC_BEGIN) {
1445                         seq = &beginseq;
1446                         chain_group();
1447
1448                 } else if (tclass & TC_END) {
1449                         seq = &endseq;
1450                         chain_group();
1451
1452                 } else if (tclass & TC_FUNCDECL) {
1453                         next_token(TC_FUNCTION);
1454                         g_pos++;
1455                         f = newfunc(t_string);
1456                         f->body.first = NULL;
1457                         f->nargs = 0;
1458                         while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1459                                 v = findvar(ahash, t_string);
1460                                 v->x.aidx = f->nargs++;
1461
1462                                 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1463                                         break;
1464                         }
1465                         seq = &f->body;
1466                         chain_group();
1467                         clear_array(ahash);
1468
1469                 } else if (tclass & TC_OPSEQ) {
1470                         rollback_token();
1471                         cn = chain_node(OC_TEST);
1472                         cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1473                         if (t_tclass & TC_GRPSTART) {
1474                                 rollback_token();
1475                                 chain_group();
1476                         } else {
1477                                 chain_node(OC_PRINT);
1478                         }
1479                         cn->r.n = mainseq.last;
1480
1481                 } else /* if (tclass & TC_GRPSTART) */ {
1482                         rollback_token();
1483                         chain_group();
1484                 }
1485         }
1486 }
1487
1488
1489 /* -------- program execution part -------- */
1490
1491 static node *mk_splitter(const char *s, tsplitter *spl)
1492 {
1493         regex_t *re, *ire;
1494         node *n;
1495
1496         re = &spl->re[0];
1497         ire = &spl->re[1];
1498         n = &spl->n;
1499         if ((n->info & OPCLSMASK) == OC_REGEXP) {
1500                 regfree(re);
1501                 regfree(ire); // TODO: nuke ire, use re+1?
1502         }
1503         if (strlen(s) > 1) {
1504                 mk_re_node(s, n, re);
1505         } else {
1506                 n->info = (uint32_t) *s;
1507         }
1508
1509         return n;
1510 }
1511
1512 /* use node as a regular expression. Supplied with node ptr and regex_t
1513  * storage space. Return ptr to regex (if result points to preg, it should
1514  * be later regfree'd manually
1515  */
1516 static regex_t *as_regex(node *op, regex_t *preg)
1517 {
1518         int cflags;
1519         var *v;
1520         const char *s;
1521
1522         if ((op->info & OPCLSMASK) == OC_REGEXP) {
1523                 return icase ? op->r.ire : op->l.re;
1524         }
1525         v = nvalloc(1);
1526         s = getvar_s(evaluate(op, v));
1527
1528         cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
1529         /* Testcase where REG_EXTENDED fails (unpaired '{'):
1530          * echo Hi | awk 'gsub("@(samp|code|file)\{","");'
1531          * gawk 3.1.5 eats this. We revert to ~REG_EXTENDED
1532          * (maybe gsub is not supposed to use REG_EXTENDED?).
1533          */
1534         if (regcomp(preg, s, cflags)) {
1535                 cflags &= ~REG_EXTENDED;
1536                 xregcomp(preg, s, cflags);
1537         }
1538         nvfree(v);
1539         return preg;
1540 }
1541
1542 /* gradually increasing buffer.
1543  * note that we reallocate even if n == old_size,
1544  * and thus there is at least one extra allocated byte.
1545  */
1546 static char* qrealloc(char *b, int n, int *size)
1547 {
1548         if (!b || n >= *size) {
1549                 *size = n + (n>>1) + 80;
1550                 b = xrealloc(b, *size);
1551         }
1552         return b;
1553 }
1554
1555 /* resize field storage space */
1556 static void fsrealloc(int size)
1557 {
1558         int i;
1559
1560         if (size >= maxfields) {
1561                 i = maxfields;
1562                 maxfields = size + 16;
1563                 Fields = xrealloc(Fields, maxfields * sizeof(var));
1564                 for (; i < maxfields; i++) {
1565                         Fields[i].type = VF_SPECIAL;
1566                         Fields[i].string = NULL;
1567                 }
1568         }
1569
1570         if (size < nfields) {
1571                 for (i = size; i < nfields; i++) {
1572                         clrvar(Fields + i);
1573                 }
1574         }
1575         nfields = size;
1576 }
1577
1578 static int awk_split(const char *s, node *spl, char **slist)
1579 {
1580         int l, n = 0;
1581         char c[4];
1582         char *s1;
1583         regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1584
1585         /* in worst case, each char would be a separate field */
1586         *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1587         strcpy(s1, s);
1588
1589         c[0] = c[1] = (char)spl->info;
1590         c[2] = c[3] = '\0';
1591         if (*getvar_s(intvar[RS]) == '\0')
1592                 c[2] = '\n';
1593
1594         if ((spl->info & OPCLSMASK) == OC_REGEXP) {  /* regex split */
1595                 if (!*s)
1596                         return n; /* "": zero fields */
1597                 n++; /* at least one field will be there */
1598                 do {
1599                         l = strcspn(s, c+2); /* len till next NUL or \n */
1600                         if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1601                          && pmatch[0].rm_so <= l
1602                         ) {
1603                                 l = pmatch[0].rm_so;
1604                                 if (pmatch[0].rm_eo == 0) {
1605                                         l++;
1606                                         pmatch[0].rm_eo++;
1607                                 }
1608                                 n++; /* we saw yet another delimiter */
1609                         } else {
1610                                 pmatch[0].rm_eo = l;
1611                                 if (s[l])
1612                                         pmatch[0].rm_eo++;
1613                         }
1614                         memcpy(s1, s, l);
1615                         /* make sure we remove *all* of the separator chars */
1616                         do {
1617                                 s1[l] = '\0';
1618                         } while (++l < pmatch[0].rm_eo);
1619                         nextword(&s1);
1620                         s += pmatch[0].rm_eo;
1621                 } while (*s);
1622                 return n;
1623         }
1624         if (c[0] == '\0') {  /* null split */
1625                 while (*s) {
1626                         *s1++ = *s++;
1627                         *s1++ = '\0';
1628                         n++;
1629                 }
1630                 return n;
1631         }
1632         if (c[0] != ' ') {  /* single-character split */
1633                 if (icase) {
1634                         c[0] = toupper(c[0]);
1635                         c[1] = tolower(c[1]);
1636                 }
1637                 if (*s1)
1638                         n++;
1639                 while ((s1 = strpbrk(s1, c))) {
1640                         *s1++ = '\0';
1641                         n++;
1642                 }
1643                 return n;
1644         }
1645         /* space split */
1646         while (*s) {
1647                 s = skip_whitespace(s);
1648                 if (!*s)
1649                         break;
1650                 n++;
1651                 while (*s && !isspace(*s))
1652                         *s1++ = *s++;
1653                 *s1++ = '\0';
1654         }
1655         return n;
1656 }
1657
1658 static void split_f0(void)
1659 {
1660 /* static char *fstrings; */
1661 #define fstrings (G.split_f0__fstrings)
1662
1663         int i, n;
1664         char *s;
1665
1666         if (is_f0_split)
1667                 return;
1668
1669         is_f0_split = TRUE;
1670         free(fstrings);
1671         fsrealloc(0);
1672         n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1673         fsrealloc(n);
1674         s = fstrings;
1675         for (i = 0; i < n; i++) {
1676                 Fields[i].string = nextword(&s);
1677                 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1678         }
1679
1680         /* set NF manually to avoid side effects */
1681         clrvar(intvar[NF]);
1682         intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1683         intvar[NF]->number = nfields;
1684 #undef fstrings
1685 }
1686
1687 /* perform additional actions when some internal variables changed */
1688 static void handle_special(var *v)
1689 {
1690         int n;
1691         char *b;
1692         const char *sep, *s;
1693         int sl, l, len, i, bsize;
1694
1695         if (!(v->type & VF_SPECIAL))
1696                 return;
1697
1698         if (v == intvar[NF]) {
1699                 n = (int)getvar_i(v);
1700                 fsrealloc(n);
1701
1702                 /* recalculate $0 */
1703                 sep = getvar_s(intvar[OFS]);
1704                 sl = strlen(sep);
1705                 b = NULL;
1706                 len = 0;
1707                 for (i = 0; i < n; i++) {
1708                         s = getvar_s(&Fields[i]);
1709                         l = strlen(s);
1710                         if (b) {
1711                                 memcpy(b+len, sep, sl);
1712                                 len += sl;
1713                         }
1714                         b = qrealloc(b, len+l+sl, &bsize);
1715                         memcpy(b+len, s, l);
1716                         len += l;
1717                 }
1718                 if (b)
1719                         b[len] = '\0';
1720                 setvar_p(intvar[F0], b);
1721                 is_f0_split = TRUE;
1722
1723         } else if (v == intvar[F0]) {
1724                 is_f0_split = FALSE;
1725
1726         } else if (v == intvar[FS]) {
1727                 mk_splitter(getvar_s(v), &fsplitter);
1728
1729         } else if (v == intvar[RS]) {
1730                 mk_splitter(getvar_s(v), &rsplitter);
1731
1732         } else if (v == intvar[IGNORECASE]) {
1733                 icase = istrue(v);
1734
1735         } else {                                /* $n */
1736                 n = getvar_i(intvar[NF]);
1737                 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1738                 /* right here v is invalid. Just to note... */
1739         }
1740 }
1741
1742 /* step through func/builtin/etc arguments */
1743 static node *nextarg(node **pn)
1744 {
1745         node *n;
1746
1747         n = *pn;
1748         if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1749                 *pn = n->r.n;
1750                 n = n->l.n;
1751         } else {
1752                 *pn = NULL;
1753         }
1754         return n;
1755 }
1756
1757 static void hashwalk_init(var *v, xhash *array)
1758 {
1759         hash_item *hi;
1760         unsigned i;
1761         walker_list *w;
1762         walker_list *prev_walker;
1763
1764         if (v->type & VF_WALK) {
1765                 prev_walker = v->x.walker;
1766         } else {
1767                 v->type |= VF_WALK;
1768                 prev_walker = NULL;
1769         }
1770         debug_printf_walker("hashwalk_init: prev_walker:%p\n", prev_walker);
1771
1772         w = v->x.walker = xzalloc(sizeof(*w) + array->glen + 1); /* why + 1? */
1773         debug_printf_walker(" walker@%p=%p\n", &v->x.walker, w);
1774         w->cur = w->end = w->wbuf;
1775         w->prev = prev_walker;
1776         for (i = 0; i < array->csize; i++) {
1777                 hi = array->items[i];
1778                 while (hi) {
1779                         strcpy(w->end, hi->name);
1780                         nextword(&w->end);
1781                         hi = hi->next;
1782                 }
1783         }
1784 }
1785
1786 static int hashwalk_next(var *v)
1787 {
1788         walker_list *w = v->x.walker;
1789
1790         if (w->cur >= w->end) {
1791                 walker_list *prev_walker = w->prev;
1792
1793                 debug_printf_walker("end of iteration, free(walker@%p:%p), prev_walker:%p\n", &v->x.walker, w, prev_walker);
1794                 free(w);
1795                 v->x.walker = prev_walker;
1796                 return FALSE;
1797         }
1798
1799         setvar_s(v, nextword(&w->cur));
1800         return TRUE;
1801 }
1802
1803 /* evaluate node, return 1 when result is true, 0 otherwise */
1804 static int ptest(node *pattern)
1805 {
1806         /* ptest__v is "static": to save stack space? */
1807         return istrue(evaluate(pattern, &G.ptest__v));
1808 }
1809
1810 /* read next record from stream rsm into a variable v */
1811 static int awk_getline(rstream *rsm, var *v)
1812 {
1813         char *b;
1814         regmatch_t pmatch[2];
1815         int size, a, p, pp = 0;
1816         int fd, so, eo, r, rp;
1817         char c, *m, *s;
1818
1819         /* we're using our own buffer since we need access to accumulating
1820          * characters
1821          */
1822         fd = fileno(rsm->F);
1823         m = rsm->buffer;
1824         a = rsm->adv;
1825         p = rsm->pos;
1826         size = rsm->size;
1827         c = (char) rsplitter.n.info;
1828         rp = 0;
1829
1830         if (!m)
1831                 m = qrealloc(m, 256, &size);
1832
1833         do {
1834                 b = m + a;
1835                 so = eo = p;
1836                 r = 1;
1837                 if (p > 0) {
1838                         if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1839                                 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1840                                                         b, 1, pmatch, 0) == 0) {
1841                                         so = pmatch[0].rm_so;
1842                                         eo = pmatch[0].rm_eo;
1843                                         if (b[eo] != '\0')
1844                                                 break;
1845                                 }
1846                         } else if (c != '\0') {
1847                                 s = strchr(b+pp, c);
1848                                 if (!s)
1849                                         s = memchr(b+pp, '\0', p - pp);
1850                                 if (s) {
1851                                         so = eo = s-b;
1852                                         eo++;
1853                                         break;
1854                                 }
1855                         } else {
1856                                 while (b[rp] == '\n')
1857                                         rp++;
1858                                 s = strstr(b+rp, "\n\n");
1859                                 if (s) {
1860                                         so = eo = s-b;
1861                                         while (b[eo] == '\n')
1862                                                 eo++;
1863                                         if (b[eo] != '\0')
1864                                                 break;
1865                                 }
1866                         }
1867                 }
1868
1869                 if (a > 0) {
1870                         memmove(m, m+a, p+1);
1871                         b = m;
1872                         a = 0;
1873                 }
1874
1875                 m = qrealloc(m, a+p+128, &size);
1876                 b = m + a;
1877                 pp = p;
1878                 p += safe_read(fd, b+p, size-p-1);
1879                 if (p < pp) {
1880                         p = 0;
1881                         r = 0;
1882                         setvar_i(intvar[ERRNO], errno);
1883                 }
1884                 b[p] = '\0';
1885
1886         } while (p > pp);
1887
1888         if (p == 0) {
1889                 r--;
1890         } else {
1891                 c = b[so]; b[so] = '\0';
1892                 setvar_s(v, b+rp);
1893                 v->type |= VF_USER;
1894                 b[so] = c;
1895                 c = b[eo]; b[eo] = '\0';
1896                 setvar_s(intvar[RT], b+so);
1897                 b[eo] = c;
1898         }
1899
1900         rsm->buffer = m;
1901         rsm->adv = a + eo;
1902         rsm->pos = p - eo;
1903         rsm->size = size;
1904
1905         return r;
1906 }
1907
1908 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1909 {
1910         int r = 0;
1911         char c;
1912         const char *s = format;
1913
1914         if (int_as_int && n == (int)n) {
1915                 r = snprintf(b, size, "%d", (int)n);
1916         } else {
1917                 do { c = *s; } while (c && *++s);
1918                 if (strchr("diouxX", c)) {
1919                         r = snprintf(b, size, format, (int)n);
1920                 } else if (strchr("eEfgG", c)) {
1921                         r = snprintf(b, size, format, n);
1922                 } else {
1923                         syntax_error(EMSG_INV_FMT);
1924                 }
1925         }
1926         return r;
1927 }
1928
1929 /* formatted output into an allocated buffer, return ptr to buffer */
1930 static char *awk_printf(node *n)
1931 {
1932         char *b = NULL;
1933         char *fmt, *s, *f;
1934         const char *s1;
1935         int i, j, incr, bsize;
1936         char c, c1;
1937         var *v, *arg;
1938
1939         v = nvalloc(1);
1940         fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1941
1942         i = 0;
1943         while (*f) {
1944                 s = f;
1945                 while (*f && (*f != '%' || *++f == '%'))
1946                         f++;
1947                 while (*f && !isalpha(*f)) {
1948                         if (*f == '*')
1949                                 syntax_error("%*x formats are not supported");
1950                         f++;
1951                 }
1952
1953                 incr = (f - s) + MAXVARFMT;
1954                 b = qrealloc(b, incr + i, &bsize);
1955                 c = *f;
1956                 if (c != '\0')
1957                         f++;
1958                 c1 = *f;
1959                 *f = '\0';
1960                 arg = evaluate(nextarg(&n), v);
1961
1962                 j = i;
1963                 if (c == 'c' || !c) {
1964                         i += sprintf(b+i, s, is_numeric(arg) ?
1965                                         (char)getvar_i(arg) : *getvar_s(arg));
1966                 } else if (c == 's') {
1967                         s1 = getvar_s(arg);
1968                         b = qrealloc(b, incr+i+strlen(s1), &bsize);
1969                         i += sprintf(b+i, s, s1);
1970                 } else {
1971                         i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1972                 }
1973                 *f = c1;
1974
1975                 /* if there was an error while sprintf, return value is negative */
1976                 if (i < j)
1977                         i = j;
1978         }
1979
1980         free(fmt);
1981         nvfree(v);
1982         b = xrealloc(b, i + 1);
1983         b[i] = '\0';
1984         return b;
1985 }
1986
1987 /* Common substitution routine.
1988  * Replace (nm)'th substring of (src) that matches (rn) with (repl),
1989  * store result into (dest), return number of substitutions.
1990  * If nm = 0, replace all matches.
1991  * If src or dst is NULL, use $0.
1992  * If subexp != 0, enable subexpression matching (\1-\9).
1993  */
1994 static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int subexp)
1995 {
1996         char *resbuf;
1997         const char *sp;
1998         int match_no, residx, replen, resbufsize;
1999         int regexec_flags;
2000         regmatch_t pmatch[10];
2001         regex_t sreg, *regex;
2002
2003         resbuf = NULL;
2004         residx = 0;
2005         match_no = 0;
2006         regexec_flags = 0;
2007         regex = as_regex(rn, &sreg);
2008         sp = getvar_s(src ? src : intvar[F0]);
2009         replen = strlen(repl);
2010         while (regexec(regex, sp, 10, pmatch, regexec_flags) == 0) {
2011                 int so = pmatch[0].rm_so;
2012                 int eo = pmatch[0].rm_eo;
2013
2014                 //bb_error_msg("match %u: [%u,%u] '%s'%p", match_no+1, so, eo, sp,sp);
2015                 resbuf = qrealloc(resbuf, residx + eo + replen, &resbufsize);
2016                 memcpy(resbuf + residx, sp, eo);
2017                 residx += eo;
2018                 if (++match_no >= nm) {
2019                         const char *s;
2020                         int nbs;
2021
2022                         /* replace */
2023                         residx -= (eo - so);
2024                         nbs = 0;
2025                         for (s = repl; *s; s++) {
2026                                 char c = resbuf[residx++] = *s;
2027                                 if (c == '\\') {
2028                                         nbs++;
2029                                         continue;
2030                                 }
2031                                 if (c == '&' || (subexp && c >= '0' && c <= '9')) {
2032                                         int j;
2033                                         residx -= ((nbs + 3) >> 1);
2034                                         j = 0;
2035                                         if (c != '&') {
2036                                                 j = c - '0';
2037                                                 nbs++;
2038                                         }
2039                                         if (nbs % 2) {
2040                                                 resbuf[residx++] = c;
2041                                         } else {
2042                                                 int n = pmatch[j].rm_eo - pmatch[j].rm_so;
2043                                                 resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize);
2044                                                 memcpy(resbuf + residx, sp + pmatch[j].rm_so, n);
2045                                                 residx += n;
2046                                         }
2047                                 }
2048                                 nbs = 0;
2049                         }
2050                 }
2051
2052                 regexec_flags = REG_NOTBOL;
2053                 sp += eo;
2054                 if (match_no == nm)
2055                         break;
2056                 if (eo == so) {
2057                         /* Empty match (e.g. "b*" will match anywhere).
2058                          * Advance by one char. */
2059 //BUG (bug 1333):
2060 //gsub(/\<b*/,"") on "abc" will reach this point, advance to "bc"
2061 //... and will erroneously match "b" even though it is NOT at the word start.
2062 //we need REG_NOTBOW but it does not exist...
2063 //TODO: if EXTRA_COMPAT=y, use GNU matching and re_search,
2064 //it should be able to do it correctly.
2065                         /* Subtle: this is safe only because
2066                          * qrealloc allocated at least one extra byte */
2067                         resbuf[residx] = *sp;
2068                         if (*sp == '\0')
2069                                 goto ret;
2070                         sp++;
2071                         residx++;
2072                 }
2073         }
2074
2075         resbuf = qrealloc(resbuf, residx + strlen(sp), &resbufsize);
2076         strcpy(resbuf + residx, sp);
2077  ret:
2078         //bb_error_msg("end sp:'%s'%p", sp,sp);
2079         setvar_p(dest ? dest : intvar[F0], resbuf);
2080         if (regex == &sreg)
2081                 regfree(regex);
2082         return match_no;
2083 }
2084
2085 static NOINLINE int do_mktime(const char *ds)
2086 {
2087         struct tm then;
2088         int count;
2089
2090         /*memset(&then, 0, sizeof(then)); - not needed */
2091         then.tm_isdst = -1; /* default is unknown */
2092
2093         /* manpage of mktime says these fields are ints,
2094          * so we can sscanf stuff directly into them */
2095         count = sscanf(ds, "%u %u %u %u %u %u %d",
2096                 &then.tm_year, &then.tm_mon, &then.tm_mday,
2097                 &then.tm_hour, &then.tm_min, &then.tm_sec,
2098                 &then.tm_isdst);
2099
2100         if (count < 6
2101          || (unsigned)then.tm_mon < 1
2102          || (unsigned)then.tm_year < 1900
2103         ) {
2104                 return -1;
2105         }
2106
2107         then.tm_mon -= 1;
2108         then.tm_year -= 1900;
2109
2110         return mktime(&then);
2111 }
2112
2113 static NOINLINE var *exec_builtin(node *op, var *res)
2114 {
2115 #define tspl (G.exec_builtin__tspl)
2116
2117         var *tv;
2118         node *an[4];
2119         var *av[4];
2120         const char *as[4];
2121         regmatch_t pmatch[2];
2122         regex_t sreg, *re;
2123         node *spl;
2124         uint32_t isr, info;
2125         int nargs;
2126         time_t tt;
2127         int i, l, ll, n;
2128
2129         tv = nvalloc(4);
2130         isr = info = op->info;
2131         op = op->l.n;
2132
2133         av[2] = av[3] = NULL;
2134         for (i = 0; i < 4 && op; i++) {
2135                 an[i] = nextarg(&op);
2136                 if (isr & 0x09000000)
2137                         av[i] = evaluate(an[i], &tv[i]);
2138                 if (isr & 0x08000000)
2139                         as[i] = getvar_s(av[i]);
2140                 isr >>= 1;
2141         }
2142
2143         nargs = i;
2144         if ((uint32_t)nargs < (info >> 30))
2145                 syntax_error(EMSG_TOO_FEW_ARGS);
2146
2147         info &= OPNMASK;
2148         switch (info) {
2149
2150         case B_a2:
2151 #if ENABLE_FEATURE_AWK_LIBM
2152                 setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1])));
2153 #else
2154                 syntax_error(EMSG_NO_MATH);
2155 #endif
2156                 break;
2157
2158         case B_sp: {
2159                 char *s, *s1;
2160
2161                 if (nargs > 2) {
2162                         spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2163                                 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2164                 } else {
2165                         spl = &fsplitter.n;
2166                 }
2167
2168                 n = awk_split(as[0], spl, &s);
2169                 s1 = s;
2170                 clear_array(iamarray(av[1]));
2171                 for (i = 1; i <= n; i++)
2172                         setari_u(av[1], i, nextword(&s));
2173                 free(s1);
2174                 setvar_i(res, n);
2175                 break;
2176         }
2177
2178         case B_ss: {
2179                 char *s;
2180
2181                 l = strlen(as[0]);
2182                 i = getvar_i(av[1]) - 1;
2183                 if (i > l)
2184                         i = l;
2185                 if (i < 0)
2186                         i = 0;
2187                 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2188                 if (n < 0)
2189                         n = 0;
2190                 s = xstrndup(as[0]+i, n);
2191                 setvar_p(res, s);
2192                 break;
2193         }
2194
2195         /* Bitwise ops must assume that operands are unsigned. GNU Awk 3.1.5:
2196          * awk '{ print or(-1,1) }' gives "4.29497e+09", not "-2.xxxe+09" */
2197         case B_an:
2198                 setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1]));
2199                 break;
2200
2201         case B_co:
2202                 setvar_i(res, ~getvar_i_int(av[0]));
2203                 break;
2204
2205         case B_ls:
2206                 setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1]));
2207                 break;
2208
2209         case B_or:
2210                 setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1]));
2211                 break;
2212
2213         case B_rs:
2214                 setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1]));
2215                 break;
2216
2217         case B_xo:
2218                 setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1]));
2219                 break;
2220
2221         case B_lo:
2222         case B_up: {
2223                 char *s, *s1;
2224                 s1 = s = xstrdup(as[0]);
2225                 while (*s1) {
2226                         //*s1 = (info == B_up) ? toupper(*s1) : tolower(*s1);
2227                         if ((unsigned char)((*s1 | 0x20) - 'a') <= ('z' - 'a'))
2228                                 *s1 = (info == B_up) ? (*s1 & 0xdf) : (*s1 | 0x20);
2229                         s1++;
2230                 }
2231                 setvar_p(res, s);
2232                 break;
2233         }
2234
2235         case B_ix:
2236                 n = 0;
2237                 ll = strlen(as[1]);
2238                 l = strlen(as[0]) - ll;
2239                 if (ll > 0 && l >= 0) {
2240                         if (!icase) {
2241                                 char *s = strstr(as[0], as[1]);
2242                                 if (s)
2243                                         n = (s - as[0]) + 1;
2244                         } else {
2245                                 /* this piece of code is terribly slow and
2246                                  * really should be rewritten
2247                                  */
2248                                 for (i = 0; i <= l; i++) {
2249                                         if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2250                                                 n = i+1;
2251                                                 break;
2252                                         }
2253                                 }
2254                         }
2255                 }
2256                 setvar_i(res, n);
2257                 break;
2258
2259         case B_ti:
2260                 if (nargs > 1)
2261                         tt = getvar_i(av[1]);
2262                 else
2263                         time(&tt);
2264                 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2265                 i = strftime(g_buf, MAXVARFMT,
2266                         ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2267                         localtime(&tt));
2268                 g_buf[i] = '\0';
2269                 setvar_s(res, g_buf);
2270                 break;
2271
2272         case B_mt:
2273                 setvar_i(res, do_mktime(as[0]));
2274                 break;
2275
2276         case B_ma:
2277                 re = as_regex(an[1], &sreg);
2278                 n = regexec(re, as[0], 1, pmatch, 0);
2279                 if (n == 0) {
2280                         pmatch[0].rm_so++;
2281                         pmatch[0].rm_eo++;
2282                 } else {
2283                         pmatch[0].rm_so = 0;
2284                         pmatch[0].rm_eo = -1;
2285                 }
2286                 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2287                 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2288                 setvar_i(res, pmatch[0].rm_so);
2289                 if (re == &sreg)
2290                         regfree(re);
2291                 break;
2292
2293         case B_ge:
2294                 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2295                 break;
2296
2297         case B_gs:
2298                 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2299                 break;
2300
2301         case B_su:
2302                 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2303                 break;
2304         }
2305
2306         nvfree(tv);
2307         return res;
2308 #undef tspl
2309 }
2310
2311 /*
2312  * Evaluate node - the heart of the program. Supplied with subtree
2313  * and place where to store result. returns ptr to result.
2314  */
2315 #define XC(n) ((n) >> 8)
2316
2317 static var *evaluate(node *op, var *res)
2318 {
2319 /* This procedure is recursive so we should count every byte */
2320 #define fnargs (G.evaluate__fnargs)
2321 /* seed is initialized to 1 */
2322 #define seed   (G.evaluate__seed)
2323 #define sreg   (G.evaluate__sreg)
2324
2325         var *v1;
2326
2327         if (!op)
2328                 return setvar_s(res, NULL);
2329
2330         v1 = nvalloc(2);
2331
2332         while (op) {
2333                 struct {
2334                         var *v;
2335                         const char *s;
2336                 } L = L; /* for compiler */
2337                 struct {
2338                         var *v;
2339                         const char *s;
2340                 } R = R;
2341                 double L_d = L_d;
2342                 uint32_t opinfo;
2343                 int opn;
2344                 node *op1;
2345
2346                 opinfo = op->info;
2347                 opn = (opinfo & OPNMASK);
2348                 g_lineno = op->lineno;
2349                 op1 = op->l.n;
2350
2351                 /* execute inevitable things */
2352                 if (opinfo & OF_RES1)
2353                         L.v = evaluate(op1, v1);
2354                 if (opinfo & OF_RES2)
2355                         R.v = evaluate(op->r.n, v1+1);
2356                 if (opinfo & OF_STR1)
2357                         L.s = getvar_s(L.v);
2358                 if (opinfo & OF_STR2)
2359                         R.s = getvar_s(R.v);
2360                 if (opinfo & OF_NUM1)
2361                         L_d = getvar_i(L.v);
2362
2363                 switch (XC(opinfo & OPCLSMASK)) {
2364
2365                 /* -- iterative node type -- */
2366
2367                 /* test pattern */
2368                 case XC( OC_TEST ):
2369                         if ((op1->info & OPCLSMASK) == OC_COMMA) {
2370                                 /* it's range pattern */
2371                                 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2372                                         op->info |= OF_CHECKED;
2373                                         if (ptest(op1->r.n))
2374                                                 op->info &= ~OF_CHECKED;
2375                                         op = op->a.n;
2376                                 } else {
2377                                         op = op->r.n;
2378                                 }
2379                         } else {
2380                                 op = ptest(op1) ? op->a.n : op->r.n;
2381                         }
2382                         break;
2383
2384                 /* just evaluate an expression, also used as unconditional jump */
2385                 case XC( OC_EXEC ):
2386                         break;
2387
2388                 /* branch, used in if-else and various loops */
2389                 case XC( OC_BR ):
2390                         op = istrue(L.v) ? op->a.n : op->r.n;
2391                         break;
2392
2393                 /* initialize for-in loop */
2394                 case XC( OC_WALKINIT ):
2395                         hashwalk_init(L.v, iamarray(R.v));
2396                         break;
2397
2398                 /* get next array item */
2399                 case XC( OC_WALKNEXT ):
2400                         op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2401                         break;
2402
2403                 case XC( OC_PRINT ):
2404                 case XC( OC_PRINTF ): {
2405                         FILE *F = stdout;
2406
2407                         if (op->r.n) {
2408                                 rstream *rsm = newfile(R.s);
2409                                 if (!rsm->F) {
2410                                         if (opn == '|') {
2411                                                 rsm->F = popen(R.s, "w");
2412                                                 if (rsm->F == NULL)
2413                                                         bb_perror_msg_and_die("popen");
2414                                                 rsm->is_pipe = 1;
2415                                         } else {
2416                                                 rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2417                                         }
2418                                 }
2419                                 F = rsm->F;
2420                         }
2421
2422                         if ((opinfo & OPCLSMASK) == OC_PRINT) {
2423                                 if (!op1) {
2424                                         fputs(getvar_s(intvar[F0]), F);
2425                                 } else {
2426                                         while (op1) {
2427                                                 var *v = evaluate(nextarg(&op1), v1);
2428                                                 if (v->type & VF_NUMBER) {
2429                                                         fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2430                                                                         getvar_i(v), TRUE);
2431                                                         fputs(g_buf, F);
2432                                                 } else {
2433                                                         fputs(getvar_s(v), F);
2434                                                 }
2435
2436                                                 if (op1)
2437                                                         fputs(getvar_s(intvar[OFS]), F);
2438                                         }
2439                                 }
2440                                 fputs(getvar_s(intvar[ORS]), F);
2441
2442                         } else {        /* OC_PRINTF */
2443                                 char *s = awk_printf(op1);
2444                                 fputs(s, F);
2445                                 free(s);
2446                         }
2447                         fflush(F);
2448                         break;
2449                 }
2450
2451                 case XC( OC_DELETE ): {
2452                         uint32_t info = op1->info & OPCLSMASK;
2453                         var *v;
2454
2455                         if (info == OC_VAR) {
2456                                 v = op1->l.v;
2457                         } else if (info == OC_FNARG) {
2458                                 v = &fnargs[op1->l.aidx];
2459                         } else {
2460                                 syntax_error(EMSG_NOT_ARRAY);
2461                         }
2462
2463                         if (op1->r.n) {
2464                                 const char *s;
2465                                 clrvar(L.v);
2466                                 s = getvar_s(evaluate(op1->r.n, v1));
2467                                 hash_remove(iamarray(v), s);
2468                         } else {
2469                                 clear_array(iamarray(v));
2470                         }
2471                         break;
2472                 }
2473
2474                 case XC( OC_NEWSOURCE ):
2475                         g_progname = op->l.new_progname;
2476                         break;
2477
2478                 case XC( OC_RETURN ):
2479                         copyvar(res, L.v);
2480                         break;
2481
2482                 case XC( OC_NEXTFILE ):
2483                         nextfile = TRUE;
2484                 case XC( OC_NEXT ):
2485                         nextrec = TRUE;
2486                 case XC( OC_DONE ):
2487                         clrvar(res);
2488                         break;
2489
2490                 case XC( OC_EXIT ):
2491                         awk_exit(L_d);
2492
2493                 /* -- recursive node type -- */
2494
2495                 case XC( OC_VAR ):
2496                         L.v = op->l.v;
2497                         if (L.v == intvar[NF])
2498                                 split_f0();
2499                         goto v_cont;
2500
2501                 case XC( OC_FNARG ):
2502                         L.v = &fnargs[op->l.aidx];
2503  v_cont:
2504                         res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2505                         break;
2506
2507                 case XC( OC_IN ):
2508                         setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2509                         break;
2510
2511                 case XC( OC_REGEXP ):
2512                         op1 = op;
2513                         L.s = getvar_s(intvar[F0]);
2514                         goto re_cont;
2515
2516                 case XC( OC_MATCH ):
2517                         op1 = op->r.n;
2518  re_cont:
2519                         {
2520                                 regex_t *re = as_regex(op1, &sreg);
2521                                 int i = regexec(re, L.s, 0, NULL, 0);
2522                                 if (re == &sreg)
2523                                         regfree(re);
2524                                 setvar_i(res, (i == 0) ^ (opn == '!'));
2525                         }
2526                         break;
2527
2528                 case XC( OC_MOVE ):
2529                         /* if source is a temporary string, jusk relink it to dest */
2530 //Disabled: if R.v is numeric but happens to have cached R.v->string,
2531 //then L.v ends up being a string, which is wrong
2532 //                      if (R.v == v1+1 && R.v->string) {
2533 //                              res = setvar_p(L.v, R.v->string);
2534 //                              R.v->string = NULL;
2535 //                      } else {
2536                                 res = copyvar(L.v, R.v);
2537 //                      }
2538                         break;
2539
2540                 case XC( OC_TERNARY ):
2541                         if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2542                                 syntax_error(EMSG_POSSIBLE_ERROR);
2543                         res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2544                         break;
2545
2546                 case XC( OC_FUNC ): {
2547                         var *vbeg, *v;
2548                         const char *sv_progname;
2549
2550                         if (!op->r.f->body.first)
2551                                 syntax_error(EMSG_UNDEF_FUNC);
2552
2553                         vbeg = v = nvalloc(op->r.f->nargs + 1);
2554                         while (op1) {
2555                                 var *arg = evaluate(nextarg(&op1), v1);
2556                                 copyvar(v, arg);
2557                                 v->type |= VF_CHILD;
2558                                 v->x.parent = arg;
2559                                 if (++v - vbeg >= op->r.f->nargs)
2560                                         break;
2561                         }
2562
2563                         v = fnargs;
2564                         fnargs = vbeg;
2565                         sv_progname = g_progname;
2566
2567                         res = evaluate(op->r.f->body.first, res);
2568
2569                         g_progname = sv_progname;
2570                         nvfree(fnargs);
2571                         fnargs = v;
2572
2573                         break;
2574                 }
2575
2576                 case XC( OC_GETLINE ):
2577                 case XC( OC_PGETLINE ): {
2578                         rstream *rsm;
2579                         int i;
2580
2581                         if (op1) {
2582                                 rsm = newfile(L.s);
2583                                 if (!rsm->F) {
2584                                         if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2585                                                 rsm->F = popen(L.s, "r");
2586                                                 rsm->is_pipe = TRUE;
2587                                         } else {
2588                                                 rsm->F = fopen_for_read(L.s);           /* not xfopen! */
2589                                         }
2590                                 }
2591                         } else {
2592                                 if (!iF)
2593                                         iF = next_input_file();
2594                                 rsm = iF;
2595                         }
2596
2597                         if (!rsm->F) {
2598                                 setvar_i(intvar[ERRNO], errno);
2599                                 setvar_i(res, -1);
2600                                 break;
2601                         }
2602
2603                         if (!op->r.n)
2604                                 R.v = intvar[F0];
2605
2606                         i = awk_getline(rsm, R.v);
2607                         if (i > 0 && !op1) {
2608                                 incvar(intvar[FNR]);
2609                                 incvar(intvar[NR]);
2610                         }
2611                         setvar_i(res, i);
2612                         break;
2613                 }
2614
2615                 /* simple builtins */
2616                 case XC( OC_FBLTIN ): {
2617                         int i;
2618                         rstream *rsm;
2619                         double R_d = R_d; /* for compiler */
2620
2621                         switch (opn) {
2622                         case F_in:
2623                                 R_d = (int)L_d;
2624                                 break;
2625
2626                         case F_rn:
2627                                 R_d = (double)rand() / (double)RAND_MAX;
2628                                 break;
2629 #if ENABLE_FEATURE_AWK_LIBM
2630                         case F_co:
2631                                 R_d = cos(L_d);
2632                                 break;
2633
2634                         case F_ex:
2635                                 R_d = exp(L_d);
2636                                 break;
2637
2638                         case F_lg:
2639                                 R_d = log(L_d);
2640                                 break;
2641
2642                         case F_si:
2643                                 R_d = sin(L_d);
2644                                 break;
2645
2646                         case F_sq:
2647                                 R_d = sqrt(L_d);
2648                                 break;
2649 #else
2650                         case F_co:
2651                         case F_ex:
2652                         case F_lg:
2653                         case F_si:
2654                         case F_sq:
2655                                 syntax_error(EMSG_NO_MATH);
2656                                 break;
2657 #endif
2658                         case F_sr:
2659                                 R_d = (double)seed;
2660                                 seed = op1 ? (unsigned)L_d : (unsigned)time(NULL);
2661                                 srand(seed);
2662                                 break;
2663
2664                         case F_ti:
2665                                 R_d = time(NULL);
2666                                 break;
2667
2668                         case F_le:
2669                                 if (!op1)
2670                                         L.s = getvar_s(intvar[F0]);
2671                                 R_d = strlen(L.s);
2672                                 break;
2673
2674                         case F_sy:
2675                                 fflush_all();
2676                                 R_d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2677                                                 ? (system(L.s) >> 8) : 0;
2678                                 break;
2679
2680                         case F_ff:
2681                                 if (!op1) {
2682                                         fflush(stdout);
2683                                 } else if (L.s && *L.s) {
2684                                         rsm = newfile(L.s);
2685                                         fflush(rsm->F);
2686                                 } else {
2687                                         fflush_all();
2688                                 }
2689                                 break;
2690
2691                         case F_cl:
2692                                 i = 0;
2693                                 rsm = (rstream *)hash_search(fdhash, L.s);
2694                                 if (rsm) {
2695                                         i = rsm->is_pipe ? pclose(rsm->F) : fclose(rsm->F);
2696                                         free(rsm->buffer);
2697                                         hash_remove(fdhash, L.s);
2698                                 }
2699                                 if (i != 0)
2700                                         setvar_i(intvar[ERRNO], errno);
2701                                 R_d = (double)i;
2702                                 break;
2703                         }
2704                         setvar_i(res, R_d);
2705                         break;
2706                 }
2707
2708                 case XC( OC_BUILTIN ):
2709                         res = exec_builtin(op, res);
2710                         break;
2711
2712                 case XC( OC_SPRINTF ):
2713                         setvar_p(res, awk_printf(op1));
2714                         break;
2715
2716                 case XC( OC_UNARY ): {
2717                         double Ld, R_d;
2718
2719                         Ld = R_d = getvar_i(R.v);
2720                         switch (opn) {
2721                         case 'P':
2722                                 Ld = ++R_d;
2723                                 goto r_op_change;
2724                         case 'p':
2725                                 R_d++;
2726                                 goto r_op_change;
2727                         case 'M':
2728                                 Ld = --R_d;
2729                                 goto r_op_change;
2730                         case 'm':
2731                                 R_d--;
2732  r_op_change:
2733                                 setvar_i(R.v, R_d);
2734                                 break;
2735                         case '!':
2736                                 Ld = !istrue(R.v);
2737                                 break;
2738                         case '-':
2739                                 Ld = -R_d;
2740                                 break;
2741                         }
2742                         setvar_i(res, Ld);
2743                         break;
2744                 }
2745
2746                 case XC( OC_FIELD ): {
2747                         int i = (int)getvar_i(R.v);
2748                         if (i == 0) {
2749                                 res = intvar[F0];
2750                         } else {
2751                                 split_f0();
2752                                 if (i > nfields)
2753                                         fsrealloc(i);
2754                                 res = &Fields[i - 1];
2755                         }
2756                         break;
2757                 }
2758
2759                 /* concatenation (" ") and index joining (",") */
2760                 case XC( OC_CONCAT ):
2761                 case XC( OC_COMMA ): {
2762                         const char *sep = "";
2763                         if ((opinfo & OPCLSMASK) == OC_COMMA)
2764                                 sep = getvar_s(intvar[SUBSEP]);
2765                         setvar_p(res, xasprintf("%s%s%s", L.s, sep, R.s));
2766                         break;
2767                 }
2768
2769                 case XC( OC_LAND ):
2770                         setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2771                         break;
2772
2773                 case XC( OC_LOR ):
2774                         setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2775                         break;
2776
2777                 case XC( OC_BINARY ):
2778                 case XC( OC_REPLACE ): {
2779                         double R_d = getvar_i(R.v);
2780                         switch (opn) {
2781                         case '+':
2782                                 L_d += R_d;
2783                                 break;
2784                         case '-':
2785                                 L_d -= R_d;
2786                                 break;
2787                         case '*':
2788                                 L_d *= R_d;
2789                                 break;
2790                         case '/':
2791                                 if (R_d == 0)
2792                                         syntax_error(EMSG_DIV_BY_ZERO);
2793                                 L_d /= R_d;
2794                                 break;
2795                         case '&':
2796 #if ENABLE_FEATURE_AWK_LIBM
2797                                 L_d = pow(L_d, R_d);
2798 #else
2799                                 syntax_error(EMSG_NO_MATH);
2800 #endif
2801                                 break;
2802                         case '%':
2803                                 if (R_d == 0)
2804                                         syntax_error(EMSG_DIV_BY_ZERO);
2805                                 L_d -= (int)(L_d / R_d) * R_d;
2806                                 break;
2807                         }
2808                         res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : L.v, L_d);
2809                         break;
2810                 }
2811
2812                 case XC( OC_COMPARE ): {
2813                         int i = i; /* for compiler */
2814                         double Ld;
2815
2816                         if (is_numeric(L.v) && is_numeric(R.v)) {
2817                                 Ld = getvar_i(L.v) - getvar_i(R.v);
2818                         } else {
2819                                 const char *l = getvar_s(L.v);
2820                                 const char *r = getvar_s(R.v);
2821                                 Ld = icase ? strcasecmp(l, r) : strcmp(l, r);
2822                         }
2823                         switch (opn & 0xfe) {
2824                         case 0:
2825                                 i = (Ld > 0);
2826                                 break;
2827                         case 2:
2828                                 i = (Ld >= 0);
2829                                 break;
2830                         case 4:
2831                                 i = (Ld == 0);
2832                                 break;
2833                         }
2834                         setvar_i(res, (i == 0) ^ (opn & 1));
2835                         break;
2836                 }
2837
2838                 default:
2839                         syntax_error(EMSG_POSSIBLE_ERROR);
2840                 }
2841                 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2842                         op = op->a.n;
2843                 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2844                         break;
2845                 if (nextrec)
2846                         break;
2847         } /* while (op) */
2848
2849         nvfree(v1);
2850         return res;
2851 #undef fnargs
2852 #undef seed
2853 #undef sreg
2854 }
2855
2856
2857 /* -------- main & co. -------- */
2858
2859 static int awk_exit(int r)
2860 {
2861         var tv;
2862         unsigned i;
2863         hash_item *hi;
2864
2865         zero_out_var(&tv);
2866
2867         if (!exiting) {
2868                 exiting = TRUE;
2869                 nextrec = FALSE;
2870                 evaluate(endseq.first, &tv);
2871         }
2872
2873         /* waiting for children */
2874         for (i = 0; i < fdhash->csize; i++) {
2875                 hi = fdhash->items[i];
2876                 while (hi) {
2877                         if (hi->data.rs.F && hi->data.rs.is_pipe)
2878                                 pclose(hi->data.rs.F);
2879                         hi = hi->next;
2880                 }
2881         }
2882
2883         exit(r);
2884 }
2885
2886 /* if expr looks like "var=value", perform assignment and return 1,
2887  * otherwise return 0 */
2888 static int is_assignment(const char *expr)
2889 {
2890         char *exprc, *s, *s0, *s1;
2891
2892         exprc = xstrdup(expr);
2893         if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2894                 free(exprc);
2895                 return FALSE;
2896         }
2897
2898         *s++ = '\0';
2899         s0 = s1 = s;
2900         while (*s)
2901                 *s1++ = nextchar(&s);
2902
2903         *s1 = '\0';
2904         setvar_u(newvar(exprc), s0);
2905         free(exprc);
2906         return TRUE;
2907 }
2908
2909 /* switch to next input file */
2910 static rstream *next_input_file(void)
2911 {
2912 #define rsm          (G.next_input_file__rsm)
2913 #define files_happen (G.next_input_file__files_happen)
2914
2915         FILE *F = NULL;
2916         const char *fname, *ind;
2917
2918         if (rsm.F)
2919                 fclose(rsm.F);
2920         rsm.F = NULL;
2921         rsm.pos = rsm.adv = 0;
2922
2923         do {
2924                 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
2925                         if (files_happen)
2926                                 return NULL;
2927                         fname = "-";
2928                         F = stdin;
2929                 } else {
2930                         ind = getvar_s(incvar(intvar[ARGIND]));
2931                         fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
2932                         if (fname && *fname && !is_assignment(fname))
2933                                 F = xfopen_stdin(fname);
2934                 }
2935         } while (!F);
2936
2937         files_happen = TRUE;
2938         setvar_s(intvar[FILENAME], fname);
2939         rsm.F = F;
2940         return &rsm;
2941 #undef rsm
2942 #undef files_happen
2943 }
2944
2945 int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
2946 int awk_main(int argc, char **argv)
2947 {
2948         unsigned opt;
2949         char *opt_F, *opt_W;
2950         llist_t *list_v = NULL;
2951         llist_t *list_f = NULL;
2952         int i, j;
2953         var *v;
2954         var tv;
2955         char **envp;
2956         char *vnames = (char *)vNames; /* cheat */
2957         char *vvalues = (char *)vValues;
2958
2959         INIT_G();
2960
2961         /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
2962          * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
2963         if (ENABLE_LOCALE_SUPPORT)
2964                 setlocale(LC_NUMERIC, "C");
2965
2966         zero_out_var(&tv);
2967
2968         /* allocate global buffer */
2969         g_buf = xmalloc(MAXVARFMT + 1);
2970
2971         vhash = hash_init();
2972         ahash = hash_init();
2973         fdhash = hash_init();
2974         fnhash = hash_init();
2975
2976         /* initialize variables */
2977         for (i = 0; *vnames; i++) {
2978                 intvar[i] = v = newvar(nextword(&vnames));
2979                 if (*vvalues != '\377')
2980                         setvar_s(v, nextword(&vvalues));
2981                 else
2982                         setvar_i(v, 0);
2983
2984                 if (*vnames == '*') {
2985                         v->type |= VF_SPECIAL;
2986                         vnames++;
2987                 }
2988         }
2989
2990         handle_special(intvar[FS]);
2991         handle_special(intvar[RS]);
2992
2993         newfile("/dev/stdin")->F = stdin;
2994         newfile("/dev/stdout")->F = stdout;
2995         newfile("/dev/stderr")->F = stderr;
2996
2997         /* Huh, people report that sometimes environ is NULL. Oh well. */
2998         if (environ) for (envp = environ; *envp; envp++) {
2999                 /* environ is writable, thus we don't strdup it needlessly */
3000                 char *s = *envp;
3001                 char *s1 = strchr(s, '=');
3002                 if (s1) {
3003                         *s1 = '\0';
3004                         /* Both findvar and setvar_u take const char*
3005                          * as 2nd arg -> environment is not trashed */
3006                         setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
3007                         *s1 = '=';
3008                 }
3009         }
3010         opt_complementary = "v::f::"; /* -v and -f can occur multiple times */
3011         opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, &opt_W);
3012         argv += optind;
3013         argc -= optind;
3014         if (opt & 0x1)
3015                 setvar_s(intvar[FS], opt_F); // -F
3016         while (list_v) { /* -v */
3017                 if (!is_assignment(llist_pop(&list_v)))
3018                         bb_show_usage();
3019         }
3020         if (list_f) { /* -f */
3021                 do {
3022                         char *s = NULL;
3023                         FILE *from_file;
3024
3025                         g_progname = llist_pop(&list_f);
3026                         from_file = xfopen_stdin(g_progname);
3027                         /* one byte is reserved for some trick in next_token */
3028                         for (i = j = 1; j > 0; i += j) {
3029                                 s = xrealloc(s, i + 4096);
3030                                 j = fread(s + i, 1, 4094, from_file);
3031                         }
3032                         s[i] = '\0';
3033                         fclose(from_file);
3034                         parse_program(s + 1);
3035                         free(s);
3036                 } while (list_f);
3037                 argc++;
3038         } else { // no -f: take program from 1st parameter
3039                 if (!argc)
3040                         bb_show_usage();
3041                 g_progname = "cmd. line";
3042                 parse_program(*argv++);
3043         }
3044         if (opt & 0x8) // -W
3045                 bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
3046
3047         /* fill in ARGV array */
3048         setvar_i(intvar[ARGC], argc);
3049         setari_u(intvar[ARGV], 0, "awk");
3050         i = 0;
3051         while (*argv)
3052                 setari_u(intvar[ARGV], ++i, *argv++);
3053
3054         evaluate(beginseq.first, &tv);
3055         if (!mainseq.first && !endseq.first)
3056                 awk_exit(EXIT_SUCCESS);
3057
3058         /* input file could already be opened in BEGIN block */
3059         if (!iF)
3060                 iF = next_input_file();
3061
3062         /* passing through input files */
3063         while (iF) {
3064                 nextfile = FALSE;
3065                 setvar_i(intvar[FNR], 0);
3066
3067                 while ((i = awk_getline(iF, intvar[F0])) > 0) {
3068                         nextrec = FALSE;
3069                         incvar(intvar[NR]);
3070                         incvar(intvar[FNR]);
3071                         evaluate(mainseq.first, &tv);
3072
3073                         if (nextfile)
3074                                 break;
3075                 }
3076
3077                 if (i < 0)
3078                         syntax_error(strerror(errno));
3079
3080                 iF = next_input_file();
3081         }
3082
3083         awk_exit(EXIT_SUCCESS);
3084         /*return 0;*/
3085 }