awk: reduce ifdef forest
[oweals/busybox.git] / editors / awk.c
1 /* vi: set sw=4 ts=4: */
2 /*
3  * awk implementation for busybox
4  *
5  * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
6  *
7  * Licensed under GPLv2 or later, see file LICENSE in this source tree.
8  */
9
10 #include "libbb.h"
11 #include "xregex.h"
12 #include <math.h>
13
14 /* This is a NOEXEC applet. Be very careful! */
15
16
17 /* If you comment out one of these below, it will be #defined later
18  * to perform debug printfs to stderr: */
19 #define debug_printf_walker(...)  do {} while (0)
20 #define debug_printf_eval(...)  do {} while (0)
21
22 #ifndef debug_printf_walker
23 # define debug_printf_walker(...) (fprintf(stderr, __VA_ARGS__))
24 #endif
25 #ifndef debug_printf_eval
26 # define debug_printf_eval(...) (fprintf(stderr, __VA_ARGS__))
27 #endif
28
29
30
31 #define MAXVARFMT       240
32 #define MINNVBLOCK      64
33
34 /* variable flags */
35 #define VF_NUMBER       0x0001  /* 1 = primary type is number */
36 #define VF_ARRAY        0x0002  /* 1 = it's an array */
37
38 #define VF_CACHED       0x0100  /* 1 = num/str value has cached str/num eq */
39 #define VF_USER         0x0200  /* 1 = user input (may be numeric string) */
40 #define VF_SPECIAL      0x0400  /* 1 = requires extra handling when changed */
41 #define VF_WALK         0x0800  /* 1 = variable has alloc'd x.walker list */
42 #define VF_FSTR         0x1000  /* 1 = var::string points to fstring buffer */
43 #define VF_CHILD        0x2000  /* 1 = function arg; x.parent points to source */
44 #define VF_DIRTY        0x4000  /* 1 = variable was set explicitly */
45
46 /* these flags are static, don't change them when value is changed */
47 #define VF_DONTTOUCH    (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
48
49 typedef struct walker_list {
50         char *end;
51         char *cur;
52         struct walker_list *prev;
53         char wbuf[1];
54 } walker_list;
55
56 /* Variable */
57 typedef struct var_s {
58         unsigned type;            /* flags */
59         double number;
60         char *string;
61         union {
62                 int aidx;               /* func arg idx (for compilation stage) */
63                 struct xhash_s *array;  /* array ptr */
64                 struct var_s *parent;   /* for func args, ptr to actual parameter */
65                 walker_list *walker;    /* list of array elements (for..in) */
66         } x;
67 } var;
68
69 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
70 typedef struct chain_s {
71         struct node_s *first;
72         struct node_s *last;
73         const char *programname;
74 } chain;
75
76 /* Function */
77 typedef struct func_s {
78         unsigned nargs;
79         struct chain_s body;
80 } func;
81
82 /* I/O stream */
83 typedef struct rstream_s {
84         FILE *F;
85         char *buffer;
86         int adv;
87         int size;
88         int pos;
89         smallint is_pipe;
90 } rstream;
91
92 typedef struct hash_item_s {
93         union {
94                 struct var_s v;         /* variable/array hash */
95                 struct rstream_s rs;    /* redirect streams hash */
96                 struct func_s f;        /* functions hash */
97         } data;
98         struct hash_item_s *next;       /* next in chain */
99         char name[1];                   /* really it's longer */
100 } hash_item;
101
102 typedef struct xhash_s {
103         unsigned nel;           /* num of elements */
104         unsigned csize;         /* current hash size */
105         unsigned nprime;        /* next hash size in PRIMES[] */
106         unsigned glen;          /* summary length of item names */
107         struct hash_item_s **items;
108 } xhash;
109
110 /* Tree node */
111 typedef struct node_s {
112         uint32_t info;
113         unsigned lineno;
114         union {
115                 struct node_s *n;
116                 var *v;
117                 int aidx;
118                 char *new_progname;
119                 regex_t *re;
120         } l;
121         union {
122                 struct node_s *n;
123                 regex_t *ire;
124                 func *f;
125         } r;
126         union {
127                 struct node_s *n;
128         } a;
129 } node;
130
131 /* Block of temporary variables */
132 typedef struct nvblock_s {
133         int size;
134         var *pos;
135         struct nvblock_s *prev;
136         struct nvblock_s *next;
137         var nv[];
138 } nvblock;
139
140 typedef struct tsplitter_s {
141         node n;
142         regex_t re[2];
143 } tsplitter;
144
145 /* simple token classes */
146 /* Order and hex values are very important!!!  See next_token() */
147 #define TC_SEQSTART      1                              /* ( */
148 #define TC_SEQTERM      (1 << 1)                /* ) */
149 #define TC_REGEXP       (1 << 2)                /* /.../ */
150 #define TC_OUTRDR       (1 << 3)                /* | > >> */
151 #define TC_UOPPOST      (1 << 4)                /* unary postfix operator */
152 #define TC_UOPPRE1      (1 << 5)                /* unary prefix operator */
153 #define TC_BINOPX       (1 << 6)                /* two-opnd operator */
154 #define TC_IN           (1 << 7)
155 #define TC_COMMA        (1 << 8)
156 #define TC_PIPE         (1 << 9)                /* input redirection pipe */
157 #define TC_UOPPRE2      (1 << 10)               /* unary prefix operator */
158 #define TC_ARRTERM      (1 << 11)               /* ] */
159 #define TC_GRPSTART     (1 << 12)               /* { */
160 #define TC_GRPTERM      (1 << 13)               /* } */
161 #define TC_SEMICOL      (1 << 14)
162 #define TC_NEWLINE      (1 << 15)
163 #define TC_STATX        (1 << 16)               /* ctl statement (for, next...) */
164 #define TC_WHILE        (1 << 17)
165 #define TC_ELSE         (1 << 18)
166 #define TC_BUILTIN      (1 << 19)
167 #define TC_GETLINE      (1 << 20)
168 #define TC_FUNCDECL     (1 << 21)               /* `function' `func' */
169 #define TC_BEGIN        (1 << 22)
170 #define TC_END          (1 << 23)
171 #define TC_EOF          (1 << 24)
172 #define TC_VARIABLE     (1 << 25)
173 #define TC_ARRAY        (1 << 26)
174 #define TC_FUNCTION     (1 << 27)
175 #define TC_STRING       (1 << 28)
176 #define TC_NUMBER       (1 << 29)
177
178 #define TC_UOPPRE  (TC_UOPPRE1 | TC_UOPPRE2)
179
180 /* combined token classes */
181 #define TC_BINOP   (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
182 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
183 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
184                    | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
185
186 #define TC_STATEMNT (TC_STATX | TC_WHILE)
187 #define TC_OPTERM  (TC_SEMICOL | TC_NEWLINE)
188
189 /* word tokens, cannot mean something else if not expected */
190 #define TC_WORD    (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
191                    | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
192
193 /* discard newlines after these */
194 #define TC_NOTERM  (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
195                    | TC_BINOP | TC_OPTERM)
196
197 /* what can expression begin with */
198 #define TC_OPSEQ   (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
199 /* what can group begin with */
200 #define TC_GRPSEQ  (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
201
202 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
203 /* operator is inserted between them */
204 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
205                    | TC_STRING | TC_NUMBER | TC_UOPPOST)
206 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
207
208 #define OF_RES1    0x010000
209 #define OF_RES2    0x020000
210 #define OF_STR1    0x040000
211 #define OF_STR2    0x080000
212 #define OF_NUM1    0x100000
213 #define OF_CHECKED 0x200000
214
215 /* combined operator flags */
216 #define xx      0
217 #define xV      OF_RES2
218 #define xS      (OF_RES2 | OF_STR2)
219 #define Vx      OF_RES1
220 #define VV      (OF_RES1 | OF_RES2)
221 #define Nx      (OF_RES1 | OF_NUM1)
222 #define NV      (OF_RES1 | OF_NUM1 | OF_RES2)
223 #define Sx      (OF_RES1 | OF_STR1)
224 #define SV      (OF_RES1 | OF_STR1 | OF_RES2)
225 #define SS      (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
226
227 #define OPCLSMASK 0xFF00
228 #define OPNMASK   0x007F
229
230 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
231  * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
232  * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
233  */
234 #define P(x)      (x << 24)
235 #define PRIMASK   0x7F000000
236 #define PRIMASK2  0x7E000000
237
238 /* Operation classes */
239
240 #define SHIFT_TIL_THIS  0x0600
241 #define RECUR_FROM_THIS 0x1000
242
243 enum {
244         OC_DELETE = 0x0100,     OC_EXEC = 0x0200,       OC_NEWSOURCE = 0x0300,
245         OC_PRINT = 0x0400,      OC_PRINTF = 0x0500,     OC_WALKINIT = 0x0600,
246
247         OC_BR = 0x0700,         OC_BREAK = 0x0800,      OC_CONTINUE = 0x0900,
248         OC_EXIT = 0x0a00,       OC_NEXT = 0x0b00,       OC_NEXTFILE = 0x0c00,
249         OC_TEST = 0x0d00,       OC_WALKNEXT = 0x0e00,
250
251         OC_BINARY = 0x1000,     OC_BUILTIN = 0x1100,    OC_COLON = 0x1200,
252         OC_COMMA = 0x1300,      OC_COMPARE = 0x1400,    OC_CONCAT = 0x1500,
253         OC_FBLTIN = 0x1600,     OC_FIELD = 0x1700,      OC_FNARG = 0x1800,
254         OC_FUNC = 0x1900,       OC_GETLINE = 0x1a00,    OC_IN = 0x1b00,
255         OC_LAND = 0x1c00,       OC_LOR = 0x1d00,        OC_MATCH = 0x1e00,
256         OC_MOVE = 0x1f00,       OC_PGETLINE = 0x2000,   OC_REGEXP = 0x2100,
257         OC_REPLACE = 0x2200,    OC_RETURN = 0x2300,     OC_SPRINTF = 0x2400,
258         OC_TERNARY = 0x2500,    OC_UNARY = 0x2600,      OC_VAR = 0x2700,
259         OC_DONE = 0x2800,
260
261         ST_IF = 0x3000,         ST_DO = 0x3100,         ST_FOR = 0x3200,
262         ST_WHILE = 0x3300
263 };
264
265 /* simple builtins */
266 enum {
267         F_in,   F_rn,   F_co,   F_ex,   F_lg,   F_si,   F_sq,   F_sr,
268         F_ti,   F_le,   F_sy,   F_ff,   F_cl
269 };
270
271 /* builtins */
272 enum {
273         B_a2,   B_ix,   B_ma,   B_sp,   B_ss,   B_ti,   B_mt,   B_lo,   B_up,
274         B_ge,   B_gs,   B_su,
275         B_an,   B_co,   B_ls,   B_or,   B_rs,   B_xo,
276 };
277
278 /* tokens and their corresponding info values */
279
280 #define NTC     "\377"  /* switch to next token class (tc<<1) */
281 #define NTCC    '\377'
282
283 #define OC_B    OC_BUILTIN
284
285 static const char tokenlist[] ALIGN1 =
286         "\1("         NTC
287         "\1)"         NTC
288         "\1/"         NTC                                   /* REGEXP */
289         "\2>>"        "\1>"         "\1|"       NTC         /* OUTRDR */
290         "\2++"        "\2--"        NTC                     /* UOPPOST */
291         "\2++"        "\2--"        "\1$"       NTC         /* UOPPRE1 */
292         "\2=="        "\1="         "\2+="      "\2-="      /* BINOPX */
293         "\2*="        "\2/="        "\2%="      "\2^="
294         "\1+"         "\1-"         "\3**="     "\2**"
295         "\1/"         "\1%"         "\1^"       "\1*"
296         "\2!="        "\2>="        "\2<="      "\1>"
297         "\1<"         "\2!~"        "\1~"       "\2&&"
298         "\2||"        "\1?"         "\1:"       NTC
299         "\2in"        NTC
300         "\1,"         NTC
301         "\1|"         NTC
302         "\1+"         "\1-"         "\1!"       NTC         /* UOPPRE2 */
303         "\1]"         NTC
304         "\1{"         NTC
305         "\1}"         NTC
306         "\1;"         NTC
307         "\1\n"        NTC
308         "\2if"        "\2do"        "\3for"     "\5break"   /* STATX */
309         "\10continue" "\6delete"    "\5print"
310         "\6printf"    "\4next"      "\10nextfile"
311         "\6return"    "\4exit"      NTC
312         "\5while"     NTC
313         "\4else"      NTC
314
315         "\3and"       "\5compl"     "\6lshift"  "\2or"
316         "\6rshift"    "\3xor"
317         "\5close"     "\6system"    "\6fflush"  "\5atan2"   /* BUILTIN */
318         "\3cos"       "\3exp"       "\3int"     "\3log"
319         "\4rand"      "\3sin"       "\4sqrt"    "\5srand"
320         "\6gensub"    "\4gsub"      "\5index"   "\6length"
321         "\5match"     "\5split"     "\7sprintf" "\3sub"
322         "\6substr"    "\7systime"   "\10strftime" "\6mktime"
323         "\7tolower"   "\7toupper"   NTC
324         "\7getline"   NTC
325         "\4func"      "\10function" NTC
326         "\5BEGIN"     NTC
327         "\3END"
328         /* compiler adds trailing "\0" */
329         ;
330
331 static const uint32_t tokeninfo[] = {
332         0,
333         0,
334         OC_REGEXP,
335         xS|'a',                  xS|'w',                  xS|'|',
336         OC_UNARY|xV|P(9)|'p',    OC_UNARY|xV|P(9)|'m',
337         OC_UNARY|xV|P(9)|'P',    OC_UNARY|xV|P(9)|'M',    OC_FIELD|xV|P(5),
338         OC_COMPARE|VV|P(39)|5,   OC_MOVE|VV|P(74),        OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
339         OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/', OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
340         OC_BINARY|NV|P(29)|'+',  OC_BINARY|NV|P(29)|'-',  OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
341         OC_BINARY|NV|P(25)|'/',  OC_BINARY|NV|P(25)|'%',  OC_BINARY|NV|P(15)|'&',  OC_BINARY|NV|P(25)|'*',
342         OC_COMPARE|VV|P(39)|4,   OC_COMPARE|VV|P(39)|3,   OC_COMPARE|VV|P(39)|0,   OC_COMPARE|VV|P(39)|1,
343         OC_COMPARE|VV|P(39)|2,   OC_MATCH|Sx|P(45)|'!',   OC_MATCH|Sx|P(45)|'~',   OC_LAND|Vx|P(55),
344         OC_LOR|Vx|P(59),         OC_TERNARY|Vx|P(64)|'?', OC_COLON|xx|P(67)|':',
345         OC_IN|SV|P(49), /* in */
346         OC_COMMA|SS|P(80),
347         OC_PGETLINE|SV|P(37),
348         OC_UNARY|xV|P(19)|'+',   OC_UNARY|xV|P(19)|'-',   OC_UNARY|xV|P(19)|'!',
349         0, /* ] */
350         0,
351         0,
352         0,
353         0, /* \n */
354         ST_IF,        ST_DO,        ST_FOR,      OC_BREAK,
355         OC_CONTINUE,  OC_DELETE|Vx, OC_PRINT,
356         OC_PRINTF,    OC_NEXT,      OC_NEXTFILE,
357         OC_RETURN|Vx, OC_EXIT|Nx,
358         ST_WHILE,
359         0, /* else */
360
361         OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
362         OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
363         OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
364         OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
365         OC_FBLTIN|F_rn,    OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
366         OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
367         OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF,        OC_B|B_su|P(0xb6),
368         OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti,    OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b),
369         OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
370         OC_GETLINE|SV|P(0),
371         0,                 0,
372         0,
373         0 /* END */
374 };
375
376 /* internal variable names and their initial values       */
377 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
378 enum {
379         CONVFMT,    OFMT,       FS,         OFS,
380         ORS,        RS,         RT,         FILENAME,
381         SUBSEP,     F0,         ARGIND,     ARGC,
382         ARGV,       ERRNO,      FNR,        NR,
383         NF,         IGNORECASE, ENVIRON,    NUM_INTERNAL_VARS
384 };
385
386 static const char vNames[] ALIGN1 =
387         "CONVFMT\0" "OFMT\0"    "FS\0*"     "OFS\0"
388         "ORS\0"     "RS\0*"     "RT\0"      "FILENAME\0"
389         "SUBSEP\0"  "$\0*"      "ARGIND\0"  "ARGC\0"
390         "ARGV\0"    "ERRNO\0"   "FNR\0"     "NR\0"
391         "NF\0*"     "IGNORECASE\0*" "ENVIRON\0" "\0";
392
393 static const char vValues[] ALIGN1 =
394         "%.6g\0"    "%.6g\0"    " \0"       " \0"
395         "\n\0"      "\n\0"      "\0"        "\0"
396         "\034\0"    "\0"        "\377";
397
398 /* hash size may grow to these values */
399 #define FIRST_PRIME 61
400 static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
401
402
403 /* Globals. Split in two parts so that first one is addressed
404  * with (mostly short) negative offsets.
405  * NB: it's unsafe to put members of type "double"
406  * into globals2 (gcc may fail to align them).
407  */
408 struct globals {
409         double t_double;
410         chain beginseq, mainseq, endseq;
411         chain *seq;
412         node *break_ptr, *continue_ptr;
413         rstream *iF;
414         xhash *vhash, *ahash, *fdhash, *fnhash;
415         const char *g_progname;
416         int g_lineno;
417         int nfields;
418         int maxfields; /* used in fsrealloc() only */
419         var *Fields;
420         nvblock *g_cb;
421         char *g_pos;
422         char *g_buf;
423         smallint icase;
424         smallint exiting;
425         smallint nextrec;
426         smallint nextfile;
427         smallint is_f0_split;
428 };
429 struct globals2 {
430         uint32_t t_info; /* often used */
431         uint32_t t_tclass;
432         char *t_string;
433         int t_lineno;
434         int t_rollback;
435
436         var *intvar[NUM_INTERNAL_VARS]; /* often used */
437
438         /* former statics from various functions */
439         char *split_f0__fstrings;
440
441         uint32_t next_token__save_tclass;
442         uint32_t next_token__save_info;
443         uint32_t next_token__ltclass;
444         smallint next_token__concat_inserted;
445
446         smallint next_input_file__files_happen;
447         rstream next_input_file__rsm;
448
449         var *evaluate__fnargs;
450         unsigned evaluate__seed;
451         regex_t evaluate__sreg;
452
453         var ptest__v;
454
455         tsplitter exec_builtin__tspl;
456
457         /* biggest and least used members go last */
458         tsplitter fsplitter, rsplitter;
459 };
460 #define G1 (ptr_to_globals[-1])
461 #define G (*(struct globals2 *)ptr_to_globals)
462 /* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
463 /*char G1size[sizeof(G1)]; - 0x74 */
464 /*char Gsize[sizeof(G)]; - 0x1c4 */
465 /* Trying to keep most of members accessible with short offsets: */
466 /*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
467 #define t_double     (G1.t_double    )
468 #define beginseq     (G1.beginseq    )
469 #define mainseq      (G1.mainseq     )
470 #define endseq       (G1.endseq      )
471 #define seq          (G1.seq         )
472 #define break_ptr    (G1.break_ptr   )
473 #define continue_ptr (G1.continue_ptr)
474 #define iF           (G1.iF          )
475 #define vhash        (G1.vhash       )
476 #define ahash        (G1.ahash       )
477 #define fdhash       (G1.fdhash      )
478 #define fnhash       (G1.fnhash      )
479 #define g_progname   (G1.g_progname  )
480 #define g_lineno     (G1.g_lineno    )
481 #define nfields      (G1.nfields     )
482 #define maxfields    (G1.maxfields   )
483 #define Fields       (G1.Fields      )
484 #define g_cb         (G1.g_cb        )
485 #define g_pos        (G1.g_pos       )
486 #define g_buf        (G1.g_buf       )
487 #define icase        (G1.icase       )
488 #define exiting      (G1.exiting     )
489 #define nextrec      (G1.nextrec     )
490 #define nextfile     (G1.nextfile    )
491 #define is_f0_split  (G1.is_f0_split )
492 #define t_info       (G.t_info      )
493 #define t_tclass     (G.t_tclass    )
494 #define t_string     (G.t_string    )
495 #define t_lineno     (G.t_lineno    )
496 #define t_rollback   (G.t_rollback  )
497 #define intvar       (G.intvar      )
498 #define fsplitter    (G.fsplitter   )
499 #define rsplitter    (G.rsplitter   )
500 #define INIT_G() do { \
501         SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
502         G.next_token__ltclass = TC_OPTERM; \
503         G.evaluate__seed = 1; \
504 } while (0)
505
506
507 /* function prototypes */
508 static void handle_special(var *);
509 static node *parse_expr(uint32_t);
510 static void chain_group(void);
511 static var *evaluate(node *, var *);
512 static rstream *next_input_file(void);
513 static int fmt_num(char *, int, const char *, double, int);
514 static int awk_exit(int) NORETURN;
515
516 /* ---- error handling ---- */
517
518 static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
519 static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
520 static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
521 static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
522 static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
523 static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin";
524 static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
525 static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
526 static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
527 static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
528
529 static void zero_out_var(var *vp)
530 {
531         memset(vp, 0, sizeof(*vp));
532 }
533
534 static void syntax_error(const char *message) NORETURN;
535 static void syntax_error(const char *message)
536 {
537         bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
538 }
539
540 /* ---- hash stuff ---- */
541
542 static unsigned hashidx(const char *name)
543 {
544         unsigned idx = 0;
545
546         while (*name)
547                 idx = *name++ + (idx << 6) - idx;
548         return idx;
549 }
550
551 /* create new hash */
552 static xhash *hash_init(void)
553 {
554         xhash *newhash;
555
556         newhash = xzalloc(sizeof(*newhash));
557         newhash->csize = FIRST_PRIME;
558         newhash->items = xzalloc(FIRST_PRIME * sizeof(newhash->items[0]));
559
560         return newhash;
561 }
562
563 /* find item in hash, return ptr to data, NULL if not found */
564 static void *hash_search(xhash *hash, const char *name)
565 {
566         hash_item *hi;
567
568         hi = hash->items[hashidx(name) % hash->csize];
569         while (hi) {
570                 if (strcmp(hi->name, name) == 0)
571                         return &hi->data;
572                 hi = hi->next;
573         }
574         return NULL;
575 }
576
577 /* grow hash if it becomes too big */
578 static void hash_rebuild(xhash *hash)
579 {
580         unsigned newsize, i, idx;
581         hash_item **newitems, *hi, *thi;
582
583         if (hash->nprime == ARRAY_SIZE(PRIMES))
584                 return;
585
586         newsize = PRIMES[hash->nprime++];
587         newitems = xzalloc(newsize * sizeof(newitems[0]));
588
589         for (i = 0; i < hash->csize; i++) {
590                 hi = hash->items[i];
591                 while (hi) {
592                         thi = hi;
593                         hi = thi->next;
594                         idx = hashidx(thi->name) % newsize;
595                         thi->next = newitems[idx];
596                         newitems[idx] = thi;
597                 }
598         }
599
600         free(hash->items);
601         hash->csize = newsize;
602         hash->items = newitems;
603 }
604
605 /* find item in hash, add it if necessary. Return ptr to data */
606 static void *hash_find(xhash *hash, const char *name)
607 {
608         hash_item *hi;
609         unsigned idx;
610         int l;
611
612         hi = hash_search(hash, name);
613         if (!hi) {
614                 if (++hash->nel / hash->csize > 10)
615                         hash_rebuild(hash);
616
617                 l = strlen(name) + 1;
618                 hi = xzalloc(sizeof(*hi) + l);
619                 strcpy(hi->name, name);
620
621                 idx = hashidx(name) % hash->csize;
622                 hi->next = hash->items[idx];
623                 hash->items[idx] = hi;
624                 hash->glen += l;
625         }
626         return &hi->data;
627 }
628
629 #define findvar(hash, name) ((var*)    hash_find((hash), (name)))
630 #define newvar(name)        ((var*)    hash_find(vhash, (name)))
631 #define newfile(name)       ((rstream*)hash_find(fdhash, (name)))
632 #define newfunc(name)       ((func*)   hash_find(fnhash, (name)))
633
634 static void hash_remove(xhash *hash, const char *name)
635 {
636         hash_item *hi, **phi;
637
638         phi = &hash->items[hashidx(name) % hash->csize];
639         while (*phi) {
640                 hi = *phi;
641                 if (strcmp(hi->name, name) == 0) {
642                         hash->glen -= (strlen(name) + 1);
643                         hash->nel--;
644                         *phi = hi->next;
645                         free(hi);
646                         break;
647                 }
648                 phi = &hi->next;
649         }
650 }
651
652 /* ------ some useful functions ------ */
653
654 static char *skip_spaces(char *p)
655 {
656         while (1) {
657                 if (*p == '\\' && p[1] == '\n') {
658                         p++;
659                         t_lineno++;
660                 } else if (*p != ' ' && *p != '\t') {
661                         break;
662                 }
663                 p++;
664         }
665         return p;
666 }
667
668 /* returns old *s, advances *s past word and terminating NUL */
669 static char *nextword(char **s)
670 {
671         char *p = *s;
672         while (*(*s)++ != '\0')
673                 continue;
674         return p;
675 }
676
677 static char nextchar(char **s)
678 {
679         char c, *pps;
680
681         c = *(*s)++;
682         pps = *s;
683         if (c == '\\')
684                 c = bb_process_escape_sequence((const char**)s);
685         if (c == '\\' && *s == pps) { /* unrecognized \z? */
686                 c = *(*s); /* yes, fetch z */
687                 if (c)
688                         (*s)++; /* advance unless z = NUL */
689         }
690         return c;
691 }
692
693 static ALWAYS_INLINE int isalnum_(int c)
694 {
695         return (isalnum(c) || c == '_');
696 }
697
698 static double my_strtod(char **pp)
699 {
700         char *cp = *pp;
701         if (ENABLE_DESKTOP && cp[0] == '0') {
702                 /* Might be hex or octal integer: 0x123abc or 07777 */
703                 char c = (cp[1] | 0x20);
704                 if (c == 'x' || isdigit(cp[1])) {
705                         unsigned long long ull = strtoull(cp, pp, 0);
706                         if (c == 'x')
707                                 return ull;
708                         c = **pp;
709                         if (!isdigit(c) && c != '.')
710                                 return ull;
711                         /* else: it may be a floating number. Examples:
712                          * 009.123 (*pp points to '9')
713                          * 000.123 (*pp points to '.')
714                          * fall through to strtod.
715                          */
716                 }
717         }
718         return strtod(cp, pp);
719 }
720
721 /* -------- working with variables (set/get/copy/etc) -------- */
722
723 static xhash *iamarray(var *v)
724 {
725         var *a = v;
726
727         while (a->type & VF_CHILD)
728                 a = a->x.parent;
729
730         if (!(a->type & VF_ARRAY)) {
731                 a->type |= VF_ARRAY;
732                 a->x.array = hash_init();
733         }
734         return a->x.array;
735 }
736
737 static void clear_array(xhash *array)
738 {
739         unsigned i;
740         hash_item *hi, *thi;
741
742         for (i = 0; i < array->csize; i++) {
743                 hi = array->items[i];
744                 while (hi) {
745                         thi = hi;
746                         hi = hi->next;
747                         free(thi->data.v.string);
748                         free(thi);
749                 }
750                 array->items[i] = NULL;
751         }
752         array->glen = array->nel = 0;
753 }
754
755 /* clear a variable */
756 static var *clrvar(var *v)
757 {
758         if (!(v->type & VF_FSTR))
759                 free(v->string);
760
761         v->type &= VF_DONTTOUCH;
762         v->type |= VF_DIRTY;
763         v->string = NULL;
764         return v;
765 }
766
767 /* assign string value to variable */
768 static var *setvar_p(var *v, char *value)
769 {
770         clrvar(v);
771         v->string = value;
772         handle_special(v);
773         return v;
774 }
775
776 /* same as setvar_p but make a copy of string */
777 static var *setvar_s(var *v, const char *value)
778 {
779         return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
780 }
781
782 /* same as setvar_s but sets USER flag */
783 static var *setvar_u(var *v, const char *value)
784 {
785         v = setvar_s(v, value);
786         v->type |= VF_USER;
787         return v;
788 }
789
790 /* set array element to user string */
791 static void setari_u(var *a, int idx, const char *s)
792 {
793         var *v;
794
795         v = findvar(iamarray(a), itoa(idx));
796         setvar_u(v, s);
797 }
798
799 /* assign numeric value to variable */
800 static var *setvar_i(var *v, double value)
801 {
802         clrvar(v);
803         v->type |= VF_NUMBER;
804         v->number = value;
805         handle_special(v);
806         return v;
807 }
808
809 static const char *getvar_s(var *v)
810 {
811         /* if v is numeric and has no cached string, convert it to string */
812         if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
813                 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
814                 v->string = xstrdup(g_buf);
815                 v->type |= VF_CACHED;
816         }
817         return (v->string == NULL) ? "" : v->string;
818 }
819
820 static double getvar_i(var *v)
821 {
822         char *s;
823
824         if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
825                 v->number = 0;
826                 s = v->string;
827                 if (s && *s) {
828                         debug_printf_eval("getvar_i: '%s'->", s);
829                         v->number = my_strtod(&s);
830                         debug_printf_eval("%f (s:'%s')\n", v->number, s);
831                         if (v->type & VF_USER) {
832                                 s = skip_spaces(s);
833                                 if (*s != '\0')
834                                         v->type &= ~VF_USER;
835                         }
836                 } else {
837                         debug_printf_eval("getvar_i: '%s'->zero\n", s);
838                         v->type &= ~VF_USER;
839                 }
840                 v->type |= VF_CACHED;
841         }
842         debug_printf_eval("getvar_i: %f\n", v->number);
843         return v->number;
844 }
845
846 /* Used for operands of bitwise ops */
847 static unsigned long getvar_i_int(var *v)
848 {
849         double d = getvar_i(v);
850
851         /* Casting doubles to longs is undefined for values outside
852          * of target type range. Try to widen it as much as possible */
853         if (d >= 0)
854                 return (unsigned long)d;
855         /* Why? Think about d == -4294967295.0 (assuming 32bit longs) */
856         return - (long) (unsigned long) (-d);
857 }
858
859 static var *copyvar(var *dest, const var *src)
860 {
861         if (dest != src) {
862                 clrvar(dest);
863                 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
864                 debug_printf_eval("copyvar: number:%f string:'%s'\n", src->number, src->string);
865                 dest->number = src->number;
866                 if (src->string)
867                         dest->string = xstrdup(src->string);
868         }
869         handle_special(dest);
870         return dest;
871 }
872
873 static var *incvar(var *v)
874 {
875         return setvar_i(v, getvar_i(v) + 1.0);
876 }
877
878 /* return true if v is number or numeric string */
879 static int is_numeric(var *v)
880 {
881         getvar_i(v);
882         return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
883 }
884
885 /* return 1 when value of v corresponds to true, 0 otherwise */
886 static int istrue(var *v)
887 {
888         if (is_numeric(v))
889                 return (v->number != 0);
890         return (v->string && v->string[0]);
891 }
892
893 /* temporary variables allocator. Last allocated should be first freed */
894 static var *nvalloc(int n)
895 {
896         nvblock *pb = NULL;
897         var *v, *r;
898         int size;
899
900         while (g_cb) {
901                 pb = g_cb;
902                 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size)
903                         break;
904                 g_cb = g_cb->next;
905         }
906
907         if (!g_cb) {
908                 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
909                 g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
910                 g_cb->size = size;
911                 g_cb->pos = g_cb->nv;
912                 g_cb->prev = pb;
913                 /*g_cb->next = NULL; - xzalloc did it */
914                 if (pb)
915                         pb->next = g_cb;
916         }
917
918         v = r = g_cb->pos;
919         g_cb->pos += n;
920
921         while (v < g_cb->pos) {
922                 v->type = 0;
923                 v->string = NULL;
924                 v++;
925         }
926
927         return r;
928 }
929
930 static void nvfree(var *v)
931 {
932         var *p;
933
934         if (v < g_cb->nv || v >= g_cb->pos)
935                 syntax_error(EMSG_INTERNAL_ERROR);
936
937         for (p = v; p < g_cb->pos; p++) {
938                 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
939                         clear_array(iamarray(p));
940                         free(p->x.array->items);
941                         free(p->x.array);
942                 }
943                 if (p->type & VF_WALK) {
944                         walker_list *n;
945                         walker_list *w = p->x.walker;
946                         debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker);
947                         p->x.walker = NULL;
948                         while (w) {
949                                 n = w->prev;
950                                 debug_printf_walker(" free(%p)\n", w);
951                                 free(w);
952                                 w = n;
953                         }
954                 }
955                 clrvar(p);
956         }
957
958         g_cb->pos = v;
959         while (g_cb->prev && g_cb->pos == g_cb->nv) {
960                 g_cb = g_cb->prev;
961         }
962 }
963
964 /* ------- awk program text parsing ------- */
965
966 /* Parse next token pointed by global pos, place results into global ttt.
967  * If token isn't expected, give away. Return token class
968  */
969 static uint32_t next_token(uint32_t expected)
970 {
971 #define concat_inserted (G.next_token__concat_inserted)
972 #define save_tclass     (G.next_token__save_tclass)
973 #define save_info       (G.next_token__save_info)
974 /* Initialized to TC_OPTERM: */
975 #define ltclass         (G.next_token__ltclass)
976
977         char *p, *s;
978         const char *tl;
979         uint32_t tc;
980         const uint32_t *ti;
981
982         if (t_rollback) {
983                 t_rollback = FALSE;
984
985         } else if (concat_inserted) {
986                 concat_inserted = FALSE;
987                 t_tclass = save_tclass;
988                 t_info = save_info;
989
990         } else {
991                 p = g_pos;
992  readnext:
993                 p = skip_spaces(p);
994                 g_lineno = t_lineno;
995                 if (*p == '#')
996                         while (*p != '\n' && *p != '\0')
997                                 p++;
998
999                 if (*p == '\n')
1000                         t_lineno++;
1001
1002                 if (*p == '\0') {
1003                         tc = TC_EOF;
1004
1005                 } else if (*p == '\"') {
1006                         /* it's a string */
1007                         t_string = s = ++p;
1008                         while (*p != '\"') {
1009                                 char *pp;
1010                                 if (*p == '\0' || *p == '\n')
1011                                         syntax_error(EMSG_UNEXP_EOS);
1012                                 pp = p;
1013                                 *s++ = nextchar(&pp);
1014                                 p = pp;
1015                         }
1016                         p++;
1017                         *s = '\0';
1018                         tc = TC_STRING;
1019
1020                 } else if ((expected & TC_REGEXP) && *p == '/') {
1021                         /* it's regexp */
1022                         t_string = s = ++p;
1023                         while (*p != '/') {
1024                                 if (*p == '\0' || *p == '\n')
1025                                         syntax_error(EMSG_UNEXP_EOS);
1026                                 *s = *p++;
1027                                 if (*s++ == '\\') {
1028                                         char *pp = p;
1029                                         s[-1] = bb_process_escape_sequence((const char **)&pp);
1030                                         if (*p == '\\')
1031                                                 *s++ = '\\';
1032                                         if (pp == p)
1033                                                 *s++ = *p++;
1034                                         else
1035                                                 p = pp;
1036                                 }
1037                         }
1038                         p++;
1039                         *s = '\0';
1040                         tc = TC_REGEXP;
1041
1042                 } else if (*p == '.' || isdigit(*p)) {
1043                         /* it's a number */
1044                         char *pp = p;
1045                         t_double = my_strtod(&pp);
1046                         p = pp;
1047                         if (*p == '.')
1048                                 syntax_error(EMSG_UNEXP_TOKEN);
1049                         tc = TC_NUMBER;
1050
1051                 } else {
1052                         /* search for something known */
1053                         tl = tokenlist;
1054                         tc = 0x00000001;
1055                         ti = tokeninfo;
1056                         while (*tl) {
1057                                 int l = (unsigned char) *tl++;
1058                                 if (l == (unsigned char) NTCC) {
1059                                         tc <<= 1;
1060                                         continue;
1061                                 }
1062                                 /* if token class is expected,
1063                                  * token matches,
1064                                  * and it's not a longer word,
1065                                  */
1066                                 if ((tc & (expected | TC_WORD | TC_NEWLINE))
1067                                  && strncmp(p, tl, l) == 0
1068                                  && !((tc & TC_WORD) && isalnum_(p[l]))
1069                                 ) {
1070                                         /* then this is what we are looking for */
1071                                         t_info = *ti;
1072                                         p += l;
1073                                         goto token_found;
1074                                 }
1075                                 ti++;
1076                                 tl += l;
1077                         }
1078                         /* not a known token */
1079
1080                         /* is it a name? (var/array/function) */
1081                         if (!isalnum_(*p))
1082                                 syntax_error(EMSG_UNEXP_TOKEN); /* no */
1083                         /* yes */
1084                         t_string = --p;
1085                         while (isalnum_(*++p)) {
1086                                 p[-1] = *p;
1087                         }
1088                         p[-1] = '\0';
1089                         tc = TC_VARIABLE;
1090                         /* also consume whitespace between functionname and bracket */
1091                         if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY))
1092                                 p = skip_spaces(p);
1093                         if (*p == '(') {
1094                                 tc = TC_FUNCTION;
1095                         } else {
1096                                 if (*p == '[') {
1097                                         p++;
1098                                         tc = TC_ARRAY;
1099                                 }
1100                         }
1101  token_found: ;
1102                 }
1103                 g_pos = p;
1104
1105                 /* skipping newlines in some cases */
1106                 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1107                         goto readnext;
1108
1109                 /* insert concatenation operator when needed */
1110                 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1111                         concat_inserted = TRUE;
1112                         save_tclass = tc;
1113                         save_info = t_info;
1114                         tc = TC_BINOP;
1115                         t_info = OC_CONCAT | SS | P(35);
1116                 }
1117
1118                 t_tclass = tc;
1119         }
1120         ltclass = t_tclass;
1121
1122         /* Are we ready for this? */
1123         if (!(ltclass & expected))
1124                 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1125                                 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1126
1127         return ltclass;
1128 #undef concat_inserted
1129 #undef save_tclass
1130 #undef save_info
1131 #undef ltclass
1132 }
1133
1134 static void rollback_token(void)
1135 {
1136         t_rollback = TRUE;
1137 }
1138
1139 static node *new_node(uint32_t info)
1140 {
1141         node *n;
1142
1143         n = xzalloc(sizeof(node));
1144         n->info = info;
1145         n->lineno = g_lineno;
1146         return n;
1147 }
1148
1149 static void mk_re_node(const char *s, node *n, regex_t *re)
1150 {
1151         n->info = OC_REGEXP;
1152         n->l.re = re;
1153         n->r.ire = re + 1;
1154         xregcomp(re, s, REG_EXTENDED);
1155         xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1156 }
1157
1158 static node *condition(void)
1159 {
1160         next_token(TC_SEQSTART);
1161         return parse_expr(TC_SEQTERM);
1162 }
1163
1164 /* parse expression terminated by given argument, return ptr
1165  * to built subtree. Terminator is eaten by parse_expr */
1166 static node *parse_expr(uint32_t iexp)
1167 {
1168         node sn;
1169         node *cn = &sn;
1170         node *vn, *glptr;
1171         uint32_t tc, xtc;
1172         var *v;
1173
1174         sn.info = PRIMASK;
1175         sn.r.n = glptr = NULL;
1176         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1177
1178         while (!((tc = next_token(xtc)) & iexp)) {
1179
1180                 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1181                         /* input redirection (<) attached to glptr node */
1182                         cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1183                         cn->a.n = glptr;
1184                         xtc = TC_OPERAND | TC_UOPPRE;
1185                         glptr = NULL;
1186
1187                 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1188                         /* for binary and postfix-unary operators, jump back over
1189                          * previous operators with higher priority */
1190                         vn = cn;
1191                         while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1192                             || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON))
1193                         ) {
1194                                 vn = vn->a.n;
1195                         }
1196                         if ((t_info & OPCLSMASK) == OC_TERNARY)
1197                                 t_info += P(6);
1198                         cn = vn->a.n->r.n = new_node(t_info);
1199                         cn->a.n = vn->a.n;
1200                         if (tc & TC_BINOP) {
1201                                 cn->l.n = vn;
1202                                 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1203                                 if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1204                                         /* it's a pipe */
1205                                         next_token(TC_GETLINE);
1206                                         /* give maximum priority to this pipe */
1207                                         cn->info &= ~PRIMASK;
1208                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1209                                 }
1210                         } else {
1211                                 cn->r.n = vn;
1212                                 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1213                         }
1214                         vn->a.n = cn;
1215
1216                 } else {
1217                         /* for operands and prefix-unary operators, attach them
1218                          * to last node */
1219                         vn = cn;
1220                         cn = vn->r.n = new_node(t_info);
1221                         cn->a.n = vn;
1222                         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1223                         if (tc & (TC_OPERAND | TC_REGEXP)) {
1224                                 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1225                                 /* one should be very careful with switch on tclass -
1226                                  * only simple tclasses should be used! */
1227                                 switch (tc) {
1228                                 case TC_VARIABLE:
1229                                 case TC_ARRAY:
1230                                         cn->info = OC_VAR;
1231                                         v = hash_search(ahash, t_string);
1232                                         if (v != NULL) {
1233                                                 cn->info = OC_FNARG;
1234                                                 cn->l.aidx = v->x.aidx;
1235                                         } else {
1236                                                 cn->l.v = newvar(t_string);
1237                                         }
1238                                         if (tc & TC_ARRAY) {
1239                                                 cn->info |= xS;
1240                                                 cn->r.n = parse_expr(TC_ARRTERM);
1241                                         }
1242                                         break;
1243
1244                                 case TC_NUMBER:
1245                                 case TC_STRING:
1246                                         cn->info = OC_VAR;
1247                                         v = cn->l.v = xzalloc(sizeof(var));
1248                                         if (tc & TC_NUMBER)
1249                                                 setvar_i(v, t_double);
1250                                         else
1251                                                 setvar_s(v, t_string);
1252                                         break;
1253
1254                                 case TC_REGEXP:
1255                                         mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1256                                         break;
1257
1258                                 case TC_FUNCTION:
1259                                         cn->info = OC_FUNC;
1260                                         cn->r.f = newfunc(t_string);
1261                                         cn->l.n = condition();
1262                                         break;
1263
1264                                 case TC_SEQSTART:
1265                                         cn = vn->r.n = parse_expr(TC_SEQTERM);
1266                                         cn->a.n = vn;
1267                                         break;
1268
1269                                 case TC_GETLINE:
1270                                         glptr = cn;
1271                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1272                                         break;
1273
1274                                 case TC_BUILTIN:
1275                                         cn->l.n = condition();
1276                                         break;
1277                                 }
1278                         }
1279                 }
1280         }
1281         return sn.r.n;
1282 }
1283
1284 /* add node to chain. Return ptr to alloc'd node */
1285 static node *chain_node(uint32_t info)
1286 {
1287         node *n;
1288
1289         if (!seq->first)
1290                 seq->first = seq->last = new_node(0);
1291
1292         if (seq->programname != g_progname) {
1293                 seq->programname = g_progname;
1294                 n = chain_node(OC_NEWSOURCE);
1295                 n->l.new_progname = xstrdup(g_progname);
1296         }
1297
1298         n = seq->last;
1299         n->info = info;
1300         seq->last = n->a.n = new_node(OC_DONE);
1301
1302         return n;
1303 }
1304
1305 static void chain_expr(uint32_t info)
1306 {
1307         node *n;
1308
1309         n = chain_node(info);
1310         n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1311         if (t_tclass & TC_GRPTERM)
1312                 rollback_token();
1313 }
1314
1315 static node *chain_loop(node *nn)
1316 {
1317         node *n, *n2, *save_brk, *save_cont;
1318
1319         save_brk = break_ptr;
1320         save_cont = continue_ptr;
1321
1322         n = chain_node(OC_BR | Vx);
1323         continue_ptr = new_node(OC_EXEC);
1324         break_ptr = new_node(OC_EXEC);
1325         chain_group();
1326         n2 = chain_node(OC_EXEC | Vx);
1327         n2->l.n = nn;
1328         n2->a.n = n;
1329         continue_ptr->a.n = n2;
1330         break_ptr->a.n = n->r.n = seq->last;
1331
1332         continue_ptr = save_cont;
1333         break_ptr = save_brk;
1334
1335         return n;
1336 }
1337
1338 /* parse group and attach it to chain */
1339 static void chain_group(void)
1340 {
1341         uint32_t c;
1342         node *n, *n2, *n3;
1343
1344         do {
1345                 c = next_token(TC_GRPSEQ);
1346         } while (c & TC_NEWLINE);
1347
1348         if (c & TC_GRPSTART) {
1349                 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1350                         if (t_tclass & TC_NEWLINE)
1351                                 continue;
1352                         rollback_token();
1353                         chain_group();
1354                 }
1355         } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1356                 rollback_token();
1357                 chain_expr(OC_EXEC | Vx);
1358         } else {                                                /* TC_STATEMNT */
1359                 switch (t_info & OPCLSMASK) {
1360                 case ST_IF:
1361                         n = chain_node(OC_BR | Vx);
1362                         n->l.n = condition();
1363                         chain_group();
1364                         n2 = chain_node(OC_EXEC);
1365                         n->r.n = seq->last;
1366                         if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1367                                 chain_group();
1368                                 n2->a.n = seq->last;
1369                         } else {
1370                                 rollback_token();
1371                         }
1372                         break;
1373
1374                 case ST_WHILE:
1375                         n2 = condition();
1376                         n = chain_loop(NULL);
1377                         n->l.n = n2;
1378                         break;
1379
1380                 case ST_DO:
1381                         n2 = chain_node(OC_EXEC);
1382                         n = chain_loop(NULL);
1383                         n2->a.n = n->a.n;
1384                         next_token(TC_WHILE);
1385                         n->l.n = condition();
1386                         break;
1387
1388                 case ST_FOR:
1389                         next_token(TC_SEQSTART);
1390                         n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1391                         if (t_tclass & TC_SEQTERM) {    /* for-in */
1392                                 if ((n2->info & OPCLSMASK) != OC_IN)
1393                                         syntax_error(EMSG_UNEXP_TOKEN);
1394                                 n = chain_node(OC_WALKINIT | VV);
1395                                 n->l.n = n2->l.n;
1396                                 n->r.n = n2->r.n;
1397                                 n = chain_loop(NULL);
1398                                 n->info = OC_WALKNEXT | Vx;
1399                                 n->l.n = n2->l.n;
1400                         } else {                        /* for (;;) */
1401                                 n = chain_node(OC_EXEC | Vx);
1402                                 n->l.n = n2;
1403                                 n2 = parse_expr(TC_SEMICOL);
1404                                 n3 = parse_expr(TC_SEQTERM);
1405                                 n = chain_loop(n3);
1406                                 n->l.n = n2;
1407                                 if (!n2)
1408                                         n->info = OC_EXEC;
1409                         }
1410                         break;
1411
1412                 case OC_PRINT:
1413                 case OC_PRINTF:
1414                         n = chain_node(t_info);
1415                         n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1416                         if (t_tclass & TC_OUTRDR) {
1417                                 n->info |= t_info;
1418                                 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1419                         }
1420                         if (t_tclass & TC_GRPTERM)
1421                                 rollback_token();
1422                         break;
1423
1424                 case OC_BREAK:
1425                         n = chain_node(OC_EXEC);
1426                         n->a.n = break_ptr;
1427                         break;
1428
1429                 case OC_CONTINUE:
1430                         n = chain_node(OC_EXEC);
1431                         n->a.n = continue_ptr;
1432                         break;
1433
1434                 /* delete, next, nextfile, return, exit */
1435                 default:
1436                         chain_expr(t_info);
1437                 }
1438         }
1439 }
1440
1441 static void parse_program(char *p)
1442 {
1443         uint32_t tclass;
1444         node *cn;
1445         func *f;
1446         var *v;
1447
1448         g_pos = p;
1449         t_lineno = 1;
1450         while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1451                         TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1452
1453                 if (tclass & TC_OPTERM)
1454                         continue;
1455
1456                 seq = &mainseq;
1457                 if (tclass & TC_BEGIN) {
1458                         seq = &beginseq;
1459                         chain_group();
1460
1461                 } else if (tclass & TC_END) {
1462                         seq = &endseq;
1463                         chain_group();
1464
1465                 } else if (tclass & TC_FUNCDECL) {
1466                         next_token(TC_FUNCTION);
1467                         g_pos++;
1468                         f = newfunc(t_string);
1469                         f->body.first = NULL;
1470                         f->nargs = 0;
1471                         while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1472                                 v = findvar(ahash, t_string);
1473                                 v->x.aidx = f->nargs++;
1474
1475                                 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1476                                         break;
1477                         }
1478                         seq = &f->body;
1479                         chain_group();
1480                         clear_array(ahash);
1481
1482                 } else if (tclass & TC_OPSEQ) {
1483                         rollback_token();
1484                         cn = chain_node(OC_TEST);
1485                         cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1486                         if (t_tclass & TC_GRPSTART) {
1487                                 rollback_token();
1488                                 chain_group();
1489                         } else {
1490                                 chain_node(OC_PRINT);
1491                         }
1492                         cn->r.n = mainseq.last;
1493
1494                 } else /* if (tclass & TC_GRPSTART) */ {
1495                         rollback_token();
1496                         chain_group();
1497                 }
1498         }
1499 }
1500
1501
1502 /* -------- program execution part -------- */
1503
1504 static node *mk_splitter(const char *s, tsplitter *spl)
1505 {
1506         regex_t *re, *ire;
1507         node *n;
1508
1509         re = &spl->re[0];
1510         ire = &spl->re[1];
1511         n = &spl->n;
1512         if ((n->info & OPCLSMASK) == OC_REGEXP) {
1513                 regfree(re);
1514                 regfree(ire); // TODO: nuke ire, use re+1?
1515         }
1516         if (s[0] && s[1]) { /* strlen(s) > 1 */
1517                 mk_re_node(s, n, re);
1518         } else {
1519                 n->info = (uint32_t) s[0];
1520         }
1521
1522         return n;
1523 }
1524
1525 /* use node as a regular expression. Supplied with node ptr and regex_t
1526  * storage space. Return ptr to regex (if result points to preg, it should
1527  * be later regfree'd manually
1528  */
1529 static regex_t *as_regex(node *op, regex_t *preg)
1530 {
1531         int cflags;
1532         var *v;
1533         const char *s;
1534
1535         if ((op->info & OPCLSMASK) == OC_REGEXP) {
1536                 return icase ? op->r.ire : op->l.re;
1537         }
1538         v = nvalloc(1);
1539         s = getvar_s(evaluate(op, v));
1540
1541         cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
1542         /* Testcase where REG_EXTENDED fails (unpaired '{'):
1543          * echo Hi | awk 'gsub("@(samp|code|file)\{","");'
1544          * gawk 3.1.5 eats this. We revert to ~REG_EXTENDED
1545          * (maybe gsub is not supposed to use REG_EXTENDED?).
1546          */
1547         if (regcomp(preg, s, cflags)) {
1548                 cflags &= ~REG_EXTENDED;
1549                 xregcomp(preg, s, cflags);
1550         }
1551         nvfree(v);
1552         return preg;
1553 }
1554
1555 /* gradually increasing buffer.
1556  * note that we reallocate even if n == old_size,
1557  * and thus there is at least one extra allocated byte.
1558  */
1559 static char* qrealloc(char *b, int n, int *size)
1560 {
1561         if (!b || n >= *size) {
1562                 *size = n + (n>>1) + 80;
1563                 b = xrealloc(b, *size);
1564         }
1565         return b;
1566 }
1567
1568 /* resize field storage space */
1569 static void fsrealloc(int size)
1570 {
1571         int i;
1572
1573         if (size >= maxfields) {
1574                 i = maxfields;
1575                 maxfields = size + 16;
1576                 Fields = xrealloc(Fields, maxfields * sizeof(Fields[0]));
1577                 for (; i < maxfields; i++) {
1578                         Fields[i].type = VF_SPECIAL;
1579                         Fields[i].string = NULL;
1580                 }
1581         }
1582         /* if size < nfields, clear extra field variables */
1583         for (i = size; i < nfields; i++) {
1584                 clrvar(Fields + i);
1585         }
1586         nfields = size;
1587 }
1588
1589 static int awk_split(const char *s, node *spl, char **slist)
1590 {
1591         int l, n;
1592         char c[4];
1593         char *s1;
1594         regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1595
1596         /* in worst case, each char would be a separate field */
1597         *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1598         strcpy(s1, s);
1599
1600         c[0] = c[1] = (char)spl->info;
1601         c[2] = c[3] = '\0';
1602         if (*getvar_s(intvar[RS]) == '\0')
1603                 c[2] = '\n';
1604
1605         n = 0;
1606         if ((spl->info & OPCLSMASK) == OC_REGEXP) {  /* regex split */
1607                 if (!*s)
1608                         return n; /* "": zero fields */
1609                 n++; /* at least one field will be there */
1610                 do {
1611                         l = strcspn(s, c+2); /* len till next NUL or \n */
1612                         if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1613                          && pmatch[0].rm_so <= l
1614                         ) {
1615                                 l = pmatch[0].rm_so;
1616                                 if (pmatch[0].rm_eo == 0) {
1617                                         l++;
1618                                         pmatch[0].rm_eo++;
1619                                 }
1620                                 n++; /* we saw yet another delimiter */
1621                         } else {
1622                                 pmatch[0].rm_eo = l;
1623                                 if (s[l])
1624                                         pmatch[0].rm_eo++;
1625                         }
1626                         memcpy(s1, s, l);
1627                         /* make sure we remove *all* of the separator chars */
1628                         do {
1629                                 s1[l] = '\0';
1630                         } while (++l < pmatch[0].rm_eo);
1631                         nextword(&s1);
1632                         s += pmatch[0].rm_eo;
1633                 } while (*s);
1634                 return n;
1635         }
1636         if (c[0] == '\0') {  /* null split */
1637                 while (*s) {
1638                         *s1++ = *s++;
1639                         *s1++ = '\0';
1640                         n++;
1641                 }
1642                 return n;
1643         }
1644         if (c[0] != ' ') {  /* single-character split */
1645                 if (icase) {
1646                         c[0] = toupper(c[0]);
1647                         c[1] = tolower(c[1]);
1648                 }
1649                 if (*s1)
1650                         n++;
1651                 while ((s1 = strpbrk(s1, c)) != NULL) {
1652                         *s1++ = '\0';
1653                         n++;
1654                 }
1655                 return n;
1656         }
1657         /* space split */
1658         while (*s) {
1659                 s = skip_whitespace(s);
1660                 if (!*s)
1661                         break;
1662                 n++;
1663                 while (*s && !isspace(*s))
1664                         *s1++ = *s++;
1665                 *s1++ = '\0';
1666         }
1667         return n;
1668 }
1669
1670 static void split_f0(void)
1671 {
1672 /* static char *fstrings; */
1673 #define fstrings (G.split_f0__fstrings)
1674
1675         int i, n;
1676         char *s;
1677
1678         if (is_f0_split)
1679                 return;
1680
1681         is_f0_split = TRUE;
1682         free(fstrings);
1683         fsrealloc(0);
1684         n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1685         fsrealloc(n);
1686         s = fstrings;
1687         for (i = 0; i < n; i++) {
1688                 Fields[i].string = nextword(&s);
1689                 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1690         }
1691
1692         /* set NF manually to avoid side effects */
1693         clrvar(intvar[NF]);
1694         intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1695         intvar[NF]->number = nfields;
1696 #undef fstrings
1697 }
1698
1699 /* perform additional actions when some internal variables changed */
1700 static void handle_special(var *v)
1701 {
1702         int n;
1703         char *b;
1704         const char *sep, *s;
1705         int sl, l, len, i, bsize;
1706
1707         if (!(v->type & VF_SPECIAL))
1708                 return;
1709
1710         if (v == intvar[NF]) {
1711                 n = (int)getvar_i(v);
1712                 fsrealloc(n);
1713
1714                 /* recalculate $0 */
1715                 sep = getvar_s(intvar[OFS]);
1716                 sl = strlen(sep);
1717                 b = NULL;
1718                 len = 0;
1719                 for (i = 0; i < n; i++) {
1720                         s = getvar_s(&Fields[i]);
1721                         l = strlen(s);
1722                         if (b) {
1723                                 memcpy(b+len, sep, sl);
1724                                 len += sl;
1725                         }
1726                         b = qrealloc(b, len+l+sl, &bsize);
1727                         memcpy(b+len, s, l);
1728                         len += l;
1729                 }
1730                 if (b)
1731                         b[len] = '\0';
1732                 setvar_p(intvar[F0], b);
1733                 is_f0_split = TRUE;
1734
1735         } else if (v == intvar[F0]) {
1736                 is_f0_split = FALSE;
1737
1738         } else if (v == intvar[FS]) {
1739                 mk_splitter(getvar_s(v), &fsplitter);
1740
1741         } else if (v == intvar[RS]) {
1742                 mk_splitter(getvar_s(v), &rsplitter);
1743
1744         } else if (v == intvar[IGNORECASE]) {
1745                 icase = istrue(v);
1746
1747         } else {                                /* $n */
1748                 n = getvar_i(intvar[NF]);
1749                 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1750                 /* right here v is invalid. Just to note... */
1751         }
1752 }
1753
1754 /* step through func/builtin/etc arguments */
1755 static node *nextarg(node **pn)
1756 {
1757         node *n;
1758
1759         n = *pn;
1760         if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1761                 *pn = n->r.n;
1762                 n = n->l.n;
1763         } else {
1764                 *pn = NULL;
1765         }
1766         return n;
1767 }
1768
1769 static void hashwalk_init(var *v, xhash *array)
1770 {
1771         hash_item *hi;
1772         unsigned i;
1773         walker_list *w;
1774         walker_list *prev_walker;
1775
1776         if (v->type & VF_WALK) {
1777                 prev_walker = v->x.walker;
1778         } else {
1779                 v->type |= VF_WALK;
1780                 prev_walker = NULL;
1781         }
1782         debug_printf_walker("hashwalk_init: prev_walker:%p\n", prev_walker);
1783
1784         w = v->x.walker = xzalloc(sizeof(*w) + array->glen + 1); /* why + 1? */
1785         debug_printf_walker(" walker@%p=%p\n", &v->x.walker, w);
1786         w->cur = w->end = w->wbuf;
1787         w->prev = prev_walker;
1788         for (i = 0; i < array->csize; i++) {
1789                 hi = array->items[i];
1790                 while (hi) {
1791                         strcpy(w->end, hi->name);
1792                         nextword(&w->end);
1793                         hi = hi->next;
1794                 }
1795         }
1796 }
1797
1798 static int hashwalk_next(var *v)
1799 {
1800         walker_list *w = v->x.walker;
1801
1802         if (w->cur >= w->end) {
1803                 walker_list *prev_walker = w->prev;
1804
1805                 debug_printf_walker("end of iteration, free(walker@%p:%p), prev_walker:%p\n", &v->x.walker, w, prev_walker);
1806                 free(w);
1807                 v->x.walker = prev_walker;
1808                 return FALSE;
1809         }
1810
1811         setvar_s(v, nextword(&w->cur));
1812         return TRUE;
1813 }
1814
1815 /* evaluate node, return 1 when result is true, 0 otherwise */
1816 static int ptest(node *pattern)
1817 {
1818         /* ptest__v is "static": to save stack space? */
1819         return istrue(evaluate(pattern, &G.ptest__v));
1820 }
1821
1822 /* read next record from stream rsm into a variable v */
1823 static int awk_getline(rstream *rsm, var *v)
1824 {
1825         char *b;
1826         regmatch_t pmatch[2];
1827         int size, a, p, pp = 0;
1828         int fd, so, eo, r, rp;
1829         char c, *m, *s;
1830
1831         debug_printf_eval("entered %s()\n", __func__);
1832
1833         /* we're using our own buffer since we need access to accumulating
1834          * characters
1835          */
1836         fd = fileno(rsm->F);
1837         m = rsm->buffer;
1838         a = rsm->adv;
1839         p = rsm->pos;
1840         size = rsm->size;
1841         c = (char) rsplitter.n.info;
1842         rp = 0;
1843
1844         if (!m)
1845                 m = qrealloc(m, 256, &size);
1846
1847         do {
1848                 b = m + a;
1849                 so = eo = p;
1850                 r = 1;
1851                 if (p > 0) {
1852                         if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1853                                 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1854                                                         b, 1, pmatch, 0) == 0) {
1855                                         so = pmatch[0].rm_so;
1856                                         eo = pmatch[0].rm_eo;
1857                                         if (b[eo] != '\0')
1858                                                 break;
1859                                 }
1860                         } else if (c != '\0') {
1861                                 s = strchr(b+pp, c);
1862                                 if (!s)
1863                                         s = memchr(b+pp, '\0', p - pp);
1864                                 if (s) {
1865                                         so = eo = s-b;
1866                                         eo++;
1867                                         break;
1868                                 }
1869                         } else {
1870                                 while (b[rp] == '\n')
1871                                         rp++;
1872                                 s = strstr(b+rp, "\n\n");
1873                                 if (s) {
1874                                         so = eo = s-b;
1875                                         while (b[eo] == '\n')
1876                                                 eo++;
1877                                         if (b[eo] != '\0')
1878                                                 break;
1879                                 }
1880                         }
1881                 }
1882
1883                 if (a > 0) {
1884                         memmove(m, m+a, p+1);
1885                         b = m;
1886                         a = 0;
1887                 }
1888
1889                 m = qrealloc(m, a+p+128, &size);
1890                 b = m + a;
1891                 pp = p;
1892                 p += safe_read(fd, b+p, size-p-1);
1893                 if (p < pp) {
1894                         p = 0;
1895                         r = 0;
1896                         setvar_i(intvar[ERRNO], errno);
1897                 }
1898                 b[p] = '\0';
1899
1900         } while (p > pp);
1901
1902         if (p == 0) {
1903                 r--;
1904         } else {
1905                 c = b[so]; b[so] = '\0';
1906                 setvar_s(v, b+rp);
1907                 v->type |= VF_USER;
1908                 b[so] = c;
1909                 c = b[eo]; b[eo] = '\0';
1910                 setvar_s(intvar[RT], b+so);
1911                 b[eo] = c;
1912         }
1913
1914         rsm->buffer = m;
1915         rsm->adv = a + eo;
1916         rsm->pos = p - eo;
1917         rsm->size = size;
1918
1919         debug_printf_eval("returning from %s(): %d\n", __func__, r);
1920
1921         return r;
1922 }
1923
1924 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1925 {
1926         int r = 0;
1927         char c;
1928         const char *s = format;
1929
1930         if (int_as_int && n == (int)n) {
1931                 r = snprintf(b, size, "%d", (int)n);
1932         } else {
1933                 do { c = *s; } while (c && *++s);
1934                 if (strchr("diouxX", c)) {
1935                         r = snprintf(b, size, format, (int)n);
1936                 } else if (strchr("eEfgG", c)) {
1937                         r = snprintf(b, size, format, n);
1938                 } else {
1939                         syntax_error(EMSG_INV_FMT);
1940                 }
1941         }
1942         return r;
1943 }
1944
1945 /* formatted output into an allocated buffer, return ptr to buffer */
1946 static char *awk_printf(node *n)
1947 {
1948         char *b = NULL;
1949         char *fmt, *s, *f;
1950         const char *s1;
1951         int i, j, incr, bsize;
1952         char c, c1;
1953         var *v, *arg;
1954
1955         v = nvalloc(1);
1956         fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1957
1958         i = 0;
1959         while (*f) {
1960                 s = f;
1961                 while (*f && (*f != '%' || *++f == '%'))
1962                         f++;
1963                 while (*f && !isalpha(*f)) {
1964                         if (*f == '*')
1965                                 syntax_error("%*x formats are not supported");
1966                         f++;
1967                 }
1968
1969                 incr = (f - s) + MAXVARFMT;
1970                 b = qrealloc(b, incr + i, &bsize);
1971                 c = *f;
1972                 if (c != '\0')
1973                         f++;
1974                 c1 = *f;
1975                 *f = '\0';
1976                 arg = evaluate(nextarg(&n), v);
1977
1978                 j = i;
1979                 if (c == 'c' || !c) {
1980                         i += sprintf(b+i, s, is_numeric(arg) ?
1981                                         (char)getvar_i(arg) : *getvar_s(arg));
1982                 } else if (c == 's') {
1983                         s1 = getvar_s(arg);
1984                         b = qrealloc(b, incr+i+strlen(s1), &bsize);
1985                         i += sprintf(b+i, s, s1);
1986                 } else {
1987                         i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1988                 }
1989                 *f = c1;
1990
1991                 /* if there was an error while sprintf, return value is negative */
1992                 if (i < j)
1993                         i = j;
1994         }
1995
1996         free(fmt);
1997         nvfree(v);
1998         b = xrealloc(b, i + 1);
1999         b[i] = '\0';
2000         return b;
2001 }
2002
2003 /* Common substitution routine.
2004  * Replace (nm)'th substring of (src) that matches (rn) with (repl),
2005  * store result into (dest), return number of substitutions.
2006  * If nm = 0, replace all matches.
2007  * If src or dst is NULL, use $0.
2008  * If subexp != 0, enable subexpression matching (\1-\9).
2009  */
2010 static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int subexp)
2011 {
2012         char *resbuf;
2013         const char *sp;
2014         int match_no, residx, replen, resbufsize;
2015         int regexec_flags;
2016         regmatch_t pmatch[10];
2017         regex_t sreg, *regex;
2018
2019         resbuf = NULL;
2020         residx = 0;
2021         match_no = 0;
2022         regexec_flags = 0;
2023         regex = as_regex(rn, &sreg);
2024         sp = getvar_s(src ? src : intvar[F0]);
2025         replen = strlen(repl);
2026         while (regexec(regex, sp, 10, pmatch, regexec_flags) == 0) {
2027                 int so = pmatch[0].rm_so;
2028                 int eo = pmatch[0].rm_eo;
2029
2030                 //bb_error_msg("match %u: [%u,%u] '%s'%p", match_no+1, so, eo, sp,sp);
2031                 resbuf = qrealloc(resbuf, residx + eo + replen, &resbufsize);
2032                 memcpy(resbuf + residx, sp, eo);
2033                 residx += eo;
2034                 if (++match_no >= nm) {
2035                         const char *s;
2036                         int nbs;
2037
2038                         /* replace */
2039                         residx -= (eo - so);
2040                         nbs = 0;
2041                         for (s = repl; *s; s++) {
2042                                 char c = resbuf[residx++] = *s;
2043                                 if (c == '\\') {
2044                                         nbs++;
2045                                         continue;
2046                                 }
2047                                 if (c == '&' || (subexp && c >= '0' && c <= '9')) {
2048                                         int j;
2049                                         residx -= ((nbs + 3) >> 1);
2050                                         j = 0;
2051                                         if (c != '&') {
2052                                                 j = c - '0';
2053                                                 nbs++;
2054                                         }
2055                                         if (nbs % 2) {
2056                                                 resbuf[residx++] = c;
2057                                         } else {
2058                                                 int n = pmatch[j].rm_eo - pmatch[j].rm_so;
2059                                                 resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize);
2060                                                 memcpy(resbuf + residx, sp + pmatch[j].rm_so, n);
2061                                                 residx += n;
2062                                         }
2063                                 }
2064                                 nbs = 0;
2065                         }
2066                 }
2067
2068                 regexec_flags = REG_NOTBOL;
2069                 sp += eo;
2070                 if (match_no == nm)
2071                         break;
2072                 if (eo == so) {
2073                         /* Empty match (e.g. "b*" will match anywhere).
2074                          * Advance by one char. */
2075 //BUG (bug 1333):
2076 //gsub(/\<b*/,"") on "abc" will reach this point, advance to "bc"
2077 //... and will erroneously match "b" even though it is NOT at the word start.
2078 //we need REG_NOTBOW but it does not exist...
2079 //TODO: if EXTRA_COMPAT=y, use GNU matching and re_search,
2080 //it should be able to do it correctly.
2081                         /* Subtle: this is safe only because
2082                          * qrealloc allocated at least one extra byte */
2083                         resbuf[residx] = *sp;
2084                         if (*sp == '\0')
2085                                 goto ret;
2086                         sp++;
2087                         residx++;
2088                 }
2089         }
2090
2091         resbuf = qrealloc(resbuf, residx + strlen(sp), &resbufsize);
2092         strcpy(resbuf + residx, sp);
2093  ret:
2094         //bb_error_msg("end sp:'%s'%p", sp,sp);
2095         setvar_p(dest ? dest : intvar[F0], resbuf);
2096         if (regex == &sreg)
2097                 regfree(regex);
2098         return match_no;
2099 }
2100
2101 static NOINLINE int do_mktime(const char *ds)
2102 {
2103         struct tm then;
2104         int count;
2105
2106         /*memset(&then, 0, sizeof(then)); - not needed */
2107         then.tm_isdst = -1; /* default is unknown */
2108
2109         /* manpage of mktime says these fields are ints,
2110          * so we can sscanf stuff directly into them */
2111         count = sscanf(ds, "%u %u %u %u %u %u %d",
2112                 &then.tm_year, &then.tm_mon, &then.tm_mday,
2113                 &then.tm_hour, &then.tm_min, &then.tm_sec,
2114                 &then.tm_isdst);
2115
2116         if (count < 6
2117          || (unsigned)then.tm_mon < 1
2118          || (unsigned)then.tm_year < 1900
2119         ) {
2120                 return -1;
2121         }
2122
2123         then.tm_mon -= 1;
2124         then.tm_year -= 1900;
2125
2126         return mktime(&then);
2127 }
2128
2129 static NOINLINE var *exec_builtin(node *op, var *res)
2130 {
2131 #define tspl (G.exec_builtin__tspl)
2132
2133         var *tv;
2134         node *an[4];
2135         var *av[4];
2136         const char *as[4];
2137         regmatch_t pmatch[2];
2138         regex_t sreg, *re;
2139         node *spl;
2140         uint32_t isr, info;
2141         int nargs;
2142         time_t tt;
2143         int i, l, ll, n;
2144
2145         tv = nvalloc(4);
2146         isr = info = op->info;
2147         op = op->l.n;
2148
2149         av[2] = av[3] = NULL;
2150         for (i = 0; i < 4 && op; i++) {
2151                 an[i] = nextarg(&op);
2152                 if (isr & 0x09000000)
2153                         av[i] = evaluate(an[i], &tv[i]);
2154                 if (isr & 0x08000000)
2155                         as[i] = getvar_s(av[i]);
2156                 isr >>= 1;
2157         }
2158
2159         nargs = i;
2160         if ((uint32_t)nargs < (info >> 30))
2161                 syntax_error(EMSG_TOO_FEW_ARGS);
2162
2163         info &= OPNMASK;
2164         switch (info) {
2165
2166         case B_a2:
2167                 if (ENABLE_FEATURE_AWK_LIBM)
2168                         setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1])));
2169                 else
2170                         syntax_error(EMSG_NO_MATH);
2171                 break;
2172
2173         case B_sp: {
2174                 char *s, *s1;
2175
2176                 if (nargs > 2) {
2177                         spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2178                                 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2179                 } else {
2180                         spl = &fsplitter.n;
2181                 }
2182
2183                 n = awk_split(as[0], spl, &s);
2184                 s1 = s;
2185                 clear_array(iamarray(av[1]));
2186                 for (i = 1; i <= n; i++)
2187                         setari_u(av[1], i, nextword(&s));
2188                 free(s1);
2189                 setvar_i(res, n);
2190                 break;
2191         }
2192
2193         case B_ss: {
2194                 char *s;
2195
2196                 l = strlen(as[0]);
2197                 i = getvar_i(av[1]) - 1;
2198                 if (i > l)
2199                         i = l;
2200                 if (i < 0)
2201                         i = 0;
2202                 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2203                 if (n < 0)
2204                         n = 0;
2205                 s = xstrndup(as[0]+i, n);
2206                 setvar_p(res, s);
2207                 break;
2208         }
2209
2210         /* Bitwise ops must assume that operands are unsigned. GNU Awk 3.1.5:
2211          * awk '{ print or(-1,1) }' gives "4.29497e+09", not "-2.xxxe+09" */
2212         case B_an:
2213                 setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1]));
2214                 break;
2215
2216         case B_co:
2217                 setvar_i(res, ~getvar_i_int(av[0]));
2218                 break;
2219
2220         case B_ls:
2221                 setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1]));
2222                 break;
2223
2224         case B_or:
2225                 setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1]));
2226                 break;
2227
2228         case B_rs:
2229                 setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1]));
2230                 break;
2231
2232         case B_xo:
2233                 setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1]));
2234                 break;
2235
2236         case B_lo:
2237         case B_up: {
2238                 char *s, *s1;
2239                 s1 = s = xstrdup(as[0]);
2240                 while (*s1) {
2241                         //*s1 = (info == B_up) ? toupper(*s1) : tolower(*s1);
2242                         if ((unsigned char)((*s1 | 0x20) - 'a') <= ('z' - 'a'))
2243                                 *s1 = (info == B_up) ? (*s1 & 0xdf) : (*s1 | 0x20);
2244                         s1++;
2245                 }
2246                 setvar_p(res, s);
2247                 break;
2248         }
2249
2250         case B_ix:
2251                 n = 0;
2252                 ll = strlen(as[1]);
2253                 l = strlen(as[0]) - ll;
2254                 if (ll > 0 && l >= 0) {
2255                         if (!icase) {
2256                                 char *s = strstr(as[0], as[1]);
2257                                 if (s)
2258                                         n = (s - as[0]) + 1;
2259                         } else {
2260                                 /* this piece of code is terribly slow and
2261                                  * really should be rewritten
2262                                  */
2263                                 for (i = 0; i <= l; i++) {
2264                                         if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2265                                                 n = i+1;
2266                                                 break;
2267                                         }
2268                                 }
2269                         }
2270                 }
2271                 setvar_i(res, n);
2272                 break;
2273
2274         case B_ti:
2275                 if (nargs > 1)
2276                         tt = getvar_i(av[1]);
2277                 else
2278                         time(&tt);
2279                 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2280                 i = strftime(g_buf, MAXVARFMT,
2281                         ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2282                         localtime(&tt));
2283                 g_buf[i] = '\0';
2284                 setvar_s(res, g_buf);
2285                 break;
2286
2287         case B_mt:
2288                 setvar_i(res, do_mktime(as[0]));
2289                 break;
2290
2291         case B_ma:
2292                 re = as_regex(an[1], &sreg);
2293                 n = regexec(re, as[0], 1, pmatch, 0);
2294                 if (n == 0) {
2295                         pmatch[0].rm_so++;
2296                         pmatch[0].rm_eo++;
2297                 } else {
2298                         pmatch[0].rm_so = 0;
2299                         pmatch[0].rm_eo = -1;
2300                 }
2301                 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2302                 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2303                 setvar_i(res, pmatch[0].rm_so);
2304                 if (re == &sreg)
2305                         regfree(re);
2306                 break;
2307
2308         case B_ge:
2309                 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2310                 break;
2311
2312         case B_gs:
2313                 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2314                 break;
2315
2316         case B_su:
2317                 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2318                 break;
2319         }
2320
2321         nvfree(tv);
2322         return res;
2323 #undef tspl
2324 }
2325
2326 /*
2327  * Evaluate node - the heart of the program. Supplied with subtree
2328  * and place where to store result. returns ptr to result.
2329  */
2330 #define XC(n) ((n) >> 8)
2331
2332 static var *evaluate(node *op, var *res)
2333 {
2334 /* This procedure is recursive so we should count every byte */
2335 #define fnargs (G.evaluate__fnargs)
2336 /* seed is initialized to 1 */
2337 #define seed   (G.evaluate__seed)
2338 #define sreg   (G.evaluate__sreg)
2339
2340         var *v1;
2341
2342         if (!op)
2343                 return setvar_s(res, NULL);
2344
2345         debug_printf_eval("entered %s()\n", __func__);
2346
2347         v1 = nvalloc(2);
2348
2349         while (op) {
2350                 struct {
2351                         var *v;
2352                         const char *s;
2353                 } L = L; /* for compiler */
2354                 struct {
2355                         var *v;
2356                         const char *s;
2357                 } R = R;
2358                 double L_d = L_d;
2359                 uint32_t opinfo;
2360                 int opn;
2361                 node *op1;
2362
2363                 opinfo = op->info;
2364                 opn = (opinfo & OPNMASK);
2365                 g_lineno = op->lineno;
2366                 op1 = op->l.n;
2367                 debug_printf_eval("opinfo:%08x opn:%08x\n", opinfo, opn);
2368
2369                 /* execute inevitable things */
2370                 if (opinfo & OF_RES1)
2371                         L.v = evaluate(op1, v1);
2372                 if (opinfo & OF_RES2)
2373                         R.v = evaluate(op->r.n, v1+1);
2374                 if (opinfo & OF_STR1) {
2375                         L.s = getvar_s(L.v);
2376                         debug_printf_eval("L.s:'%s'\n", L.s);
2377                 }
2378                 if (opinfo & OF_STR2) {
2379                         R.s = getvar_s(R.v);
2380                         debug_printf_eval("R.s:'%s'\n", R.s);
2381                 }
2382                 if (opinfo & OF_NUM1) {
2383                         L_d = getvar_i(L.v);
2384                         debug_printf_eval("L_d:%f\n", L_d);
2385                 }
2386
2387                 debug_printf_eval("switch(0x%x)\n", XC(opinfo & OPCLSMASK));
2388                 switch (XC(opinfo & OPCLSMASK)) {
2389
2390                 /* -- iterative node type -- */
2391
2392                 /* test pattern */
2393                 case XC( OC_TEST ):
2394                         if ((op1->info & OPCLSMASK) == OC_COMMA) {
2395                                 /* it's range pattern */
2396                                 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2397                                         op->info |= OF_CHECKED;
2398                                         if (ptest(op1->r.n))
2399                                                 op->info &= ~OF_CHECKED;
2400                                         op = op->a.n;
2401                                 } else {
2402                                         op = op->r.n;
2403                                 }
2404                         } else {
2405                                 op = ptest(op1) ? op->a.n : op->r.n;
2406                         }
2407                         break;
2408
2409                 /* just evaluate an expression, also used as unconditional jump */
2410                 case XC( OC_EXEC ):
2411                         break;
2412
2413                 /* branch, used in if-else and various loops */
2414                 case XC( OC_BR ):
2415                         op = istrue(L.v) ? op->a.n : op->r.n;
2416                         break;
2417
2418                 /* initialize for-in loop */
2419                 case XC( OC_WALKINIT ):
2420                         hashwalk_init(L.v, iamarray(R.v));
2421                         break;
2422
2423                 /* get next array item */
2424                 case XC( OC_WALKNEXT ):
2425                         op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2426                         break;
2427
2428                 case XC( OC_PRINT ):
2429                 case XC( OC_PRINTF ): {
2430                         FILE *F = stdout;
2431
2432                         if (op->r.n) {
2433                                 rstream *rsm = newfile(R.s);
2434                                 if (!rsm->F) {
2435                                         if (opn == '|') {
2436                                                 rsm->F = popen(R.s, "w");
2437                                                 if (rsm->F == NULL)
2438                                                         bb_perror_msg_and_die("popen");
2439                                                 rsm->is_pipe = 1;
2440                                         } else {
2441                                                 rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2442                                         }
2443                                 }
2444                                 F = rsm->F;
2445                         }
2446
2447                         if ((opinfo & OPCLSMASK) == OC_PRINT) {
2448                                 if (!op1) {
2449                                         fputs(getvar_s(intvar[F0]), F);
2450                                 } else {
2451                                         while (op1) {
2452                                                 var *v = evaluate(nextarg(&op1), v1);
2453                                                 if (v->type & VF_NUMBER) {
2454                                                         fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2455                                                                         getvar_i(v), TRUE);
2456                                                         fputs(g_buf, F);
2457                                                 } else {
2458                                                         fputs(getvar_s(v), F);
2459                                                 }
2460
2461                                                 if (op1)
2462                                                         fputs(getvar_s(intvar[OFS]), F);
2463                                         }
2464                                 }
2465                                 fputs(getvar_s(intvar[ORS]), F);
2466
2467                         } else {        /* OC_PRINTF */
2468                                 char *s = awk_printf(op1);
2469                                 fputs(s, F);
2470                                 free(s);
2471                         }
2472                         fflush(F);
2473                         break;
2474                 }
2475
2476                 case XC( OC_DELETE ): {
2477                         uint32_t info = op1->info & OPCLSMASK;
2478                         var *v;
2479
2480                         if (info == OC_VAR) {
2481                                 v = op1->l.v;
2482                         } else if (info == OC_FNARG) {
2483                                 v = &fnargs[op1->l.aidx];
2484                         } else {
2485                                 syntax_error(EMSG_NOT_ARRAY);
2486                         }
2487
2488                         if (op1->r.n) {
2489                                 const char *s;
2490                                 clrvar(L.v);
2491                                 s = getvar_s(evaluate(op1->r.n, v1));
2492                                 hash_remove(iamarray(v), s);
2493                         } else {
2494                                 clear_array(iamarray(v));
2495                         }
2496                         break;
2497                 }
2498
2499                 case XC( OC_NEWSOURCE ):
2500                         g_progname = op->l.new_progname;
2501                         break;
2502
2503                 case XC( OC_RETURN ):
2504                         copyvar(res, L.v);
2505                         break;
2506
2507                 case XC( OC_NEXTFILE ):
2508                         nextfile = TRUE;
2509                 case XC( OC_NEXT ):
2510                         nextrec = TRUE;
2511                 case XC( OC_DONE ):
2512                         clrvar(res);
2513                         break;
2514
2515                 case XC( OC_EXIT ):
2516                         awk_exit(L_d);
2517
2518                 /* -- recursive node type -- */
2519
2520                 case XC( OC_VAR ):
2521                         L.v = op->l.v;
2522                         if (L.v == intvar[NF])
2523                                 split_f0();
2524                         goto v_cont;
2525
2526                 case XC( OC_FNARG ):
2527                         L.v = &fnargs[op->l.aidx];
2528  v_cont:
2529                         res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2530                         break;
2531
2532                 case XC( OC_IN ):
2533                         setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2534                         break;
2535
2536                 case XC( OC_REGEXP ):
2537                         op1 = op;
2538                         L.s = getvar_s(intvar[F0]);
2539                         goto re_cont;
2540
2541                 case XC( OC_MATCH ):
2542                         op1 = op->r.n;
2543  re_cont:
2544                         {
2545                                 regex_t *re = as_regex(op1, &sreg);
2546                                 int i = regexec(re, L.s, 0, NULL, 0);
2547                                 if (re == &sreg)
2548                                         regfree(re);
2549                                 setvar_i(res, (i == 0) ^ (opn == '!'));
2550                         }
2551                         break;
2552
2553                 case XC( OC_MOVE ):
2554                         debug_printf_eval("MOVE\n");
2555                         /* if source is a temporary string, jusk relink it to dest */
2556 //Disabled: if R.v is numeric but happens to have cached R.v->string,
2557 //then L.v ends up being a string, which is wrong
2558 //                      if (R.v == v1+1 && R.v->string) {
2559 //                              res = setvar_p(L.v, R.v->string);
2560 //                              R.v->string = NULL;
2561 //                      } else {
2562                                 res = copyvar(L.v, R.v);
2563 //                      }
2564                         break;
2565
2566                 case XC( OC_TERNARY ):
2567                         if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2568                                 syntax_error(EMSG_POSSIBLE_ERROR);
2569                         res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2570                         break;
2571
2572                 case XC( OC_FUNC ): {
2573                         var *vbeg, *v;
2574                         const char *sv_progname;
2575
2576                         if (!op->r.f->body.first)
2577                                 syntax_error(EMSG_UNDEF_FUNC);
2578
2579                         vbeg = v = nvalloc(op->r.f->nargs + 1);
2580                         while (op1) {
2581                                 var *arg = evaluate(nextarg(&op1), v1);
2582                                 copyvar(v, arg);
2583                                 v->type |= VF_CHILD;
2584                                 v->x.parent = arg;
2585                                 if (++v - vbeg >= op->r.f->nargs)
2586                                         break;
2587                         }
2588
2589                         v = fnargs;
2590                         fnargs = vbeg;
2591                         sv_progname = g_progname;
2592
2593                         res = evaluate(op->r.f->body.first, res);
2594
2595                         g_progname = sv_progname;
2596                         nvfree(fnargs);
2597                         fnargs = v;
2598
2599                         break;
2600                 }
2601
2602                 case XC( OC_GETLINE ):
2603                 case XC( OC_PGETLINE ): {
2604                         rstream *rsm;
2605                         int i;
2606
2607                         if (op1) {
2608                                 rsm = newfile(L.s);
2609                                 if (!rsm->F) {
2610                                         if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2611                                                 rsm->F = popen(L.s, "r");
2612                                                 rsm->is_pipe = TRUE;
2613                                         } else {
2614                                                 rsm->F = fopen_for_read(L.s);           /* not xfopen! */
2615                                         }
2616                                 }
2617                         } else {
2618                                 if (!iF)
2619                                         iF = next_input_file();
2620                                 rsm = iF;
2621                         }
2622
2623                         if (!rsm->F) {
2624                                 setvar_i(intvar[ERRNO], errno);
2625                                 setvar_i(res, -1);
2626                                 break;
2627                         }
2628
2629                         if (!op->r.n)
2630                                 R.v = intvar[F0];
2631
2632                         i = awk_getline(rsm, R.v);
2633                         if (i > 0 && !op1) {
2634                                 incvar(intvar[FNR]);
2635                                 incvar(intvar[NR]);
2636                         }
2637                         setvar_i(res, i);
2638                         break;
2639                 }
2640
2641                 /* simple builtins */
2642                 case XC( OC_FBLTIN ): {
2643                         double R_d = R_d; /* for compiler */
2644
2645                         switch (opn) {
2646                         case F_in:
2647                                 R_d = (int)L_d;
2648                                 break;
2649
2650                         case F_rn:
2651                                 R_d = (double)rand() / (double)RAND_MAX;
2652                                 break;
2653
2654                         case F_co:
2655                                 if (ENABLE_FEATURE_AWK_LIBM) {
2656                                         R_d = cos(L_d);
2657                                         break;
2658                                 }
2659
2660                         case F_ex:
2661                                 if (ENABLE_FEATURE_AWK_LIBM) {
2662                                         R_d = exp(L_d);
2663                                         break;
2664                                 }
2665
2666                         case F_lg:
2667                                 if (ENABLE_FEATURE_AWK_LIBM) {
2668                                         R_d = log(L_d);
2669                                         break;
2670                                 }
2671
2672                         case F_si:
2673                                 if (ENABLE_FEATURE_AWK_LIBM) {
2674                                         R_d = sin(L_d);
2675                                         break;
2676                                 }
2677
2678                         case F_sq:
2679                                 if (ENABLE_FEATURE_AWK_LIBM) {
2680                                         R_d = sqrt(L_d);
2681                                         break;
2682                                 }
2683
2684                                 syntax_error(EMSG_NO_MATH);
2685                                 break;
2686
2687                         case F_sr:
2688                                 R_d = (double)seed;
2689                                 seed = op1 ? (unsigned)L_d : (unsigned)time(NULL);
2690                                 srand(seed);
2691                                 break;
2692
2693                         case F_ti:
2694                                 R_d = time(NULL);
2695                                 break;
2696
2697                         case F_le:
2698                                 if (!op1)
2699                                         L.s = getvar_s(intvar[F0]);
2700                                 R_d = strlen(L.s);
2701                                 break;
2702
2703                         case F_sy:
2704                                 fflush_all();
2705                                 R_d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2706                                                 ? (system(L.s) >> 8) : 0;
2707                                 break;
2708
2709                         case F_ff:
2710                                 if (!op1) {
2711                                         fflush(stdout);
2712                                 } else if (L.s && *L.s) {
2713                                         rstream *rsm = newfile(L.s);
2714                                         fflush(rsm->F);
2715                                 } else {
2716                                         fflush_all();
2717                                 }
2718                                 break;
2719
2720                         case F_cl: {
2721                                 rstream *rsm;
2722                                 int err = 0;
2723                                 rsm = (rstream *)hash_search(fdhash, L.s);
2724                                 debug_printf_eval("OC_FBLTIN F_cl rsm:%p\n", rsm);
2725                                 if (rsm) {
2726                                         debug_printf_eval("OC_FBLTIN F_cl "
2727                                                 "rsm->is_pipe:%d, ->F:%p\n",
2728                                                 rsm->is_pipe, rsm->F);
2729                                         /* Can be NULL if open failed. Example:
2730                                          * getline line <"doesnt_exist";
2731                                          * close("doesnt_exist"); <--- here rsm->F is NULL
2732                                          */
2733                                         if (rsm->F)
2734                                                 err = rsm->is_pipe ? pclose(rsm->F) : fclose(rsm->F);
2735                                         free(rsm->buffer);
2736                                         hash_remove(fdhash, L.s);
2737                                 }
2738                                 if (err)
2739                                         setvar_i(intvar[ERRNO], errno);
2740                                 R_d = (double)err;
2741                                 break;
2742                         }
2743                         } /* switch */
2744                         setvar_i(res, R_d);
2745                         break;
2746                 }
2747
2748                 case XC( OC_BUILTIN ):
2749                         res = exec_builtin(op, res);
2750                         break;
2751
2752                 case XC( OC_SPRINTF ):
2753                         setvar_p(res, awk_printf(op1));
2754                         break;
2755
2756                 case XC( OC_UNARY ): {
2757                         double Ld, R_d;
2758
2759                         Ld = R_d = getvar_i(R.v);
2760                         switch (opn) {
2761                         case 'P':
2762                                 Ld = ++R_d;
2763                                 goto r_op_change;
2764                         case 'p':
2765                                 R_d++;
2766                                 goto r_op_change;
2767                         case 'M':
2768                                 Ld = --R_d;
2769                                 goto r_op_change;
2770                         case 'm':
2771                                 R_d--;
2772  r_op_change:
2773                                 setvar_i(R.v, R_d);
2774                                 break;
2775                         case '!':
2776                                 Ld = !istrue(R.v);
2777                                 break;
2778                         case '-':
2779                                 Ld = -R_d;
2780                                 break;
2781                         }
2782                         setvar_i(res, Ld);
2783                         break;
2784                 }
2785
2786                 case XC( OC_FIELD ): {
2787                         int i = (int)getvar_i(R.v);
2788                         if (i == 0) {
2789                                 res = intvar[F0];
2790                         } else {
2791                                 split_f0();
2792                                 if (i > nfields)
2793                                         fsrealloc(i);
2794                                 res = &Fields[i - 1];
2795                         }
2796                         break;
2797                 }
2798
2799                 /* concatenation (" ") and index joining (",") */
2800                 case XC( OC_CONCAT ):
2801                 case XC( OC_COMMA ): {
2802                         const char *sep = "";
2803                         if ((opinfo & OPCLSMASK) == OC_COMMA)
2804                                 sep = getvar_s(intvar[SUBSEP]);
2805                         setvar_p(res, xasprintf("%s%s%s", L.s, sep, R.s));
2806                         break;
2807                 }
2808
2809                 case XC( OC_LAND ):
2810                         setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2811                         break;
2812
2813                 case XC( OC_LOR ):
2814                         setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2815                         break;
2816
2817                 case XC( OC_BINARY ):
2818                 case XC( OC_REPLACE ): {
2819                         double R_d = getvar_i(R.v);
2820                         debug_printf_eval("BINARY/REPLACE: R_d:%f opn:%c\n", R_d, opn);
2821                         switch (opn) {
2822                         case '+':
2823                                 L_d += R_d;
2824                                 break;
2825                         case '-':
2826                                 L_d -= R_d;
2827                                 break;
2828                         case '*':
2829                                 L_d *= R_d;
2830                                 break;
2831                         case '/':
2832                                 if (R_d == 0)
2833                                         syntax_error(EMSG_DIV_BY_ZERO);
2834                                 L_d /= R_d;
2835                                 break;
2836                         case '&':
2837                                 if (ENABLE_FEATURE_AWK_LIBM)
2838                                         L_d = pow(L_d, R_d);
2839                                 else
2840                                         syntax_error(EMSG_NO_MATH);
2841                                 break;
2842                         case '%':
2843                                 if (R_d == 0)
2844                                         syntax_error(EMSG_DIV_BY_ZERO);
2845                                 L_d -= (int)(L_d / R_d) * R_d;
2846                                 break;
2847                         }
2848                         debug_printf_eval("BINARY/REPLACE result:%f\n", L_d);
2849                         res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : L.v, L_d);
2850                         break;
2851                 }
2852
2853                 case XC( OC_COMPARE ): {
2854                         int i = i; /* for compiler */
2855                         double Ld;
2856
2857                         if (is_numeric(L.v) && is_numeric(R.v)) {
2858                                 Ld = getvar_i(L.v) - getvar_i(R.v);
2859                         } else {
2860                                 const char *l = getvar_s(L.v);
2861                                 const char *r = getvar_s(R.v);
2862                                 Ld = icase ? strcasecmp(l, r) : strcmp(l, r);
2863                         }
2864                         switch (opn & 0xfe) {
2865                         case 0:
2866                                 i = (Ld > 0);
2867                                 break;
2868                         case 2:
2869                                 i = (Ld >= 0);
2870                                 break;
2871                         case 4:
2872                                 i = (Ld == 0);
2873                                 break;
2874                         }
2875                         setvar_i(res, (i == 0) ^ (opn & 1));
2876                         break;
2877                 }
2878
2879                 default:
2880                         syntax_error(EMSG_POSSIBLE_ERROR);
2881                 }
2882                 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2883                         op = op->a.n;
2884                 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2885                         break;
2886                 if (nextrec)
2887                         break;
2888         } /* while (op) */
2889
2890         nvfree(v1);
2891         debug_printf_eval("returning from %s(): %p\n", __func__, res);
2892         return res;
2893 #undef fnargs
2894 #undef seed
2895 #undef sreg
2896 }
2897
2898
2899 /* -------- main & co. -------- */
2900
2901 static int awk_exit(int r)
2902 {
2903         var tv;
2904         unsigned i;
2905         hash_item *hi;
2906
2907         zero_out_var(&tv);
2908
2909         if (!exiting) {
2910                 exiting = TRUE;
2911                 nextrec = FALSE;
2912                 evaluate(endseq.first, &tv);
2913         }
2914
2915         /* waiting for children */
2916         for (i = 0; i < fdhash->csize; i++) {
2917                 hi = fdhash->items[i];
2918                 while (hi) {
2919                         if (hi->data.rs.F && hi->data.rs.is_pipe)
2920                                 pclose(hi->data.rs.F);
2921                         hi = hi->next;
2922                 }
2923         }
2924
2925         exit(r);
2926 }
2927
2928 /* if expr looks like "var=value", perform assignment and return 1,
2929  * otherwise return 0 */
2930 static int is_assignment(const char *expr)
2931 {
2932         char *exprc, *val, *s, *s1;
2933
2934         if (!isalnum_(*expr) || (val = strchr(expr, '=')) == NULL) {
2935                 return FALSE;
2936         }
2937
2938         exprc = xstrdup(expr);
2939         val = exprc + (val - expr);
2940         *val++ = '\0';
2941
2942         s = s1 = val;
2943         while ((*s1 = nextchar(&s)) != '\0')
2944                 s1++;
2945
2946         setvar_u(newvar(exprc), val);
2947         free(exprc);
2948         return TRUE;
2949 }
2950
2951 /* switch to next input file */
2952 static rstream *next_input_file(void)
2953 {
2954 #define rsm          (G.next_input_file__rsm)
2955 #define files_happen (G.next_input_file__files_happen)
2956
2957         FILE *F = NULL;
2958         const char *fname, *ind;
2959
2960         if (rsm.F)
2961                 fclose(rsm.F);
2962         rsm.F = NULL;
2963         rsm.pos = rsm.adv = 0;
2964
2965         do {
2966                 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
2967                         if (files_happen)
2968                                 return NULL;
2969                         fname = "-";
2970                         F = stdin;
2971                 } else {
2972                         ind = getvar_s(incvar(intvar[ARGIND]));
2973                         fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
2974                         if (fname && *fname && !is_assignment(fname))
2975                                 F = xfopen_stdin(fname);
2976                 }
2977         } while (!F);
2978
2979         files_happen = TRUE;
2980         setvar_s(intvar[FILENAME], fname);
2981         rsm.F = F;
2982         return &rsm;
2983 #undef rsm
2984 #undef files_happen
2985 }
2986
2987 int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
2988 int awk_main(int argc, char **argv)
2989 {
2990         unsigned opt;
2991         char *opt_F, *opt_W;
2992         llist_t *list_v = NULL;
2993         llist_t *list_f = NULL;
2994         int i, j;
2995         var *v;
2996         var tv;
2997         char **envp;
2998         char *vnames = (char *)vNames; /* cheat */
2999         char *vvalues = (char *)vValues;
3000
3001         INIT_G();
3002
3003         /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
3004          * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
3005         if (ENABLE_LOCALE_SUPPORT)
3006                 setlocale(LC_NUMERIC, "C");
3007
3008         zero_out_var(&tv);
3009
3010         /* allocate global buffer */
3011         g_buf = xmalloc(MAXVARFMT + 1);
3012
3013         vhash = hash_init();
3014         ahash = hash_init();
3015         fdhash = hash_init();
3016         fnhash = hash_init();
3017
3018         /* initialize variables */
3019         for (i = 0; *vnames; i++) {
3020                 intvar[i] = v = newvar(nextword(&vnames));
3021                 if (*vvalues != '\377')
3022                         setvar_s(v, nextword(&vvalues));
3023                 else
3024                         setvar_i(v, 0);
3025
3026                 if (*vnames == '*') {
3027                         v->type |= VF_SPECIAL;
3028                         vnames++;
3029                 }
3030         }
3031
3032         handle_special(intvar[FS]);
3033         handle_special(intvar[RS]);
3034
3035         newfile("/dev/stdin")->F = stdin;
3036         newfile("/dev/stdout")->F = stdout;
3037         newfile("/dev/stderr")->F = stderr;
3038
3039         /* Huh, people report that sometimes environ is NULL. Oh well. */
3040         if (environ) for (envp = environ; *envp; envp++) {
3041                 /* environ is writable, thus we don't strdup it needlessly */
3042                 char *s = *envp;
3043                 char *s1 = strchr(s, '=');
3044                 if (s1) {
3045                         *s1 = '\0';
3046                         /* Both findvar and setvar_u take const char*
3047                          * as 2nd arg -> environment is not trashed */
3048                         setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
3049                         *s1 = '=';
3050                 }
3051         }
3052         opt_complementary = "v::f::"; /* -v and -f can occur multiple times */
3053         opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, &opt_W);
3054         argv += optind;
3055         argc -= optind;
3056         if (opt & 0x1)
3057                 setvar_s(intvar[FS], opt_F); // -F
3058         while (list_v) { /* -v */
3059                 if (!is_assignment(llist_pop(&list_v)))
3060                         bb_show_usage();
3061         }
3062         if (list_f) { /* -f */
3063                 do {
3064                         char *s = NULL;
3065                         FILE *from_file;
3066
3067                         g_progname = llist_pop(&list_f);
3068                         from_file = xfopen_stdin(g_progname);
3069                         /* one byte is reserved for some trick in next_token */
3070                         for (i = j = 1; j > 0; i += j) {
3071                                 s = xrealloc(s, i + 4096);
3072                                 j = fread(s + i, 1, 4094, from_file);
3073                         }
3074                         s[i] = '\0';
3075                         fclose(from_file);
3076                         parse_program(s + 1);
3077                         free(s);
3078                 } while (list_f);
3079                 argc++;
3080         } else { // no -f: take program from 1st parameter
3081                 if (!argc)
3082                         bb_show_usage();
3083                 g_progname = "cmd. line";
3084                 parse_program(*argv++);
3085         }
3086         if (opt & 0x8) // -W
3087                 bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
3088
3089         /* fill in ARGV array */
3090         setvar_i(intvar[ARGC], argc);
3091         setari_u(intvar[ARGV], 0, "awk");
3092         i = 0;
3093         while (*argv)
3094                 setari_u(intvar[ARGV], ++i, *argv++);
3095
3096         evaluate(beginseq.first, &tv);
3097         if (!mainseq.first && !endseq.first)
3098                 awk_exit(EXIT_SUCCESS);
3099
3100         /* input file could already be opened in BEGIN block */
3101         if (!iF)
3102                 iF = next_input_file();
3103
3104         /* passing through input files */
3105         while (iF) {
3106                 nextfile = FALSE;
3107                 setvar_i(intvar[FNR], 0);
3108
3109                 while ((i = awk_getline(iF, intvar[F0])) > 0) {
3110                         nextrec = FALSE;
3111                         incvar(intvar[NR]);
3112                         incvar(intvar[FNR]);
3113                         evaluate(mainseq.first, &tv);
3114
3115                         if (nextfile)
3116                                 break;
3117                 }
3118
3119                 if (i < 0)
3120                         syntax_error(strerror(errno));
3121
3122                 iF = next_input_file();
3123         }
3124
3125         awk_exit(EXIT_SUCCESS);
3126         /*return 0;*/
3127 }