awk: 00.123 is not a octal constant; neither is 009.123. fixing this
[oweals/busybox.git] / editors / awk.c
1 /* vi: set sw=4 ts=4: */
2 /*
3  * awk implementation for busybox
4  *
5  * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
6  *
7  * Licensed under GPLv2 or later, see file LICENSE in this source tree.
8  */
9
10 #include "libbb.h"
11 #include "xregex.h"
12 #include <math.h>
13
14 /* This is a NOEXEC applet. Be very careful! */
15
16
17 /* If you comment out one of these below, it will be #defined later
18  * to perform debug printfs to stderr: */
19 #define debug_printf_walker(...)  do {} while (0)
20 #define debug_printf_eval(...)  do {} while (0)
21
22 #ifndef debug_printf_walker
23 # define debug_printf_walker(...) (fprintf(stderr, __VA_ARGS__))
24 #endif
25 #ifndef debug_printf_eval
26 # define debug_printf_eval(...) (fprintf(stderr, __VA_ARGS__))
27 #endif
28
29
30
31 #define MAXVARFMT       240
32 #define MINNVBLOCK      64
33
34 /* variable flags */
35 #define VF_NUMBER       0x0001  /* 1 = primary type is number */
36 #define VF_ARRAY        0x0002  /* 1 = it's an array */
37
38 #define VF_CACHED       0x0100  /* 1 = num/str value has cached str/num eq */
39 #define VF_USER         0x0200  /* 1 = user input (may be numeric string) */
40 #define VF_SPECIAL      0x0400  /* 1 = requires extra handling when changed */
41 #define VF_WALK         0x0800  /* 1 = variable has alloc'd x.walker list */
42 #define VF_FSTR         0x1000  /* 1 = var::string points to fstring buffer */
43 #define VF_CHILD        0x2000  /* 1 = function arg; x.parent points to source */
44 #define VF_DIRTY        0x4000  /* 1 = variable was set explicitly */
45
46 /* these flags are static, don't change them when value is changed */
47 #define VF_DONTTOUCH    (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
48
49 typedef struct walker_list {
50         char *end;
51         char *cur;
52         struct walker_list *prev;
53         char wbuf[1];
54 } walker_list;
55
56 /* Variable */
57 typedef struct var_s {
58         unsigned type;            /* flags */
59         double number;
60         char *string;
61         union {
62                 int aidx;               /* func arg idx (for compilation stage) */
63                 struct xhash_s *array;  /* array ptr */
64                 struct var_s *parent;   /* for func args, ptr to actual parameter */
65                 walker_list *walker;    /* list of array elements (for..in) */
66         } x;
67 } var;
68
69 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
70 typedef struct chain_s {
71         struct node_s *first;
72         struct node_s *last;
73         const char *programname;
74 } chain;
75
76 /* Function */
77 typedef struct func_s {
78         unsigned nargs;
79         struct chain_s body;
80 } func;
81
82 /* I/O stream */
83 typedef struct rstream_s {
84         FILE *F;
85         char *buffer;
86         int adv;
87         int size;
88         int pos;
89         smallint is_pipe;
90 } rstream;
91
92 typedef struct hash_item_s {
93         union {
94                 struct var_s v;         /* variable/array hash */
95                 struct rstream_s rs;    /* redirect streams hash */
96                 struct func_s f;        /* functions hash */
97         } data;
98         struct hash_item_s *next;       /* next in chain */
99         char name[1];                   /* really it's longer */
100 } hash_item;
101
102 typedef struct xhash_s {
103         unsigned nel;           /* num of elements */
104         unsigned csize;         /* current hash size */
105         unsigned nprime;        /* next hash size in PRIMES[] */
106         unsigned glen;          /* summary length of item names */
107         struct hash_item_s **items;
108 } xhash;
109
110 /* Tree node */
111 typedef struct node_s {
112         uint32_t info;
113         unsigned lineno;
114         union {
115                 struct node_s *n;
116                 var *v;
117                 int aidx;
118                 char *new_progname;
119                 regex_t *re;
120         } l;
121         union {
122                 struct node_s *n;
123                 regex_t *ire;
124                 func *f;
125         } r;
126         union {
127                 struct node_s *n;
128         } a;
129 } node;
130
131 /* Block of temporary variables */
132 typedef struct nvblock_s {
133         int size;
134         var *pos;
135         struct nvblock_s *prev;
136         struct nvblock_s *next;
137         var nv[];
138 } nvblock;
139
140 typedef struct tsplitter_s {
141         node n;
142         regex_t re[2];
143 } tsplitter;
144
145 /* simple token classes */
146 /* Order and hex values are very important!!!  See next_token() */
147 #define TC_SEQSTART      1                              /* ( */
148 #define TC_SEQTERM      (1 << 1)                /* ) */
149 #define TC_REGEXP       (1 << 2)                /* /.../ */
150 #define TC_OUTRDR       (1 << 3)                /* | > >> */
151 #define TC_UOPPOST      (1 << 4)                /* unary postfix operator */
152 #define TC_UOPPRE1      (1 << 5)                /* unary prefix operator */
153 #define TC_BINOPX       (1 << 6)                /* two-opnd operator */
154 #define TC_IN           (1 << 7)
155 #define TC_COMMA        (1 << 8)
156 #define TC_PIPE         (1 << 9)                /* input redirection pipe */
157 #define TC_UOPPRE2      (1 << 10)               /* unary prefix operator */
158 #define TC_ARRTERM      (1 << 11)               /* ] */
159 #define TC_GRPSTART     (1 << 12)               /* { */
160 #define TC_GRPTERM      (1 << 13)               /* } */
161 #define TC_SEMICOL      (1 << 14)
162 #define TC_NEWLINE      (1 << 15)
163 #define TC_STATX        (1 << 16)               /* ctl statement (for, next...) */
164 #define TC_WHILE        (1 << 17)
165 #define TC_ELSE         (1 << 18)
166 #define TC_BUILTIN      (1 << 19)
167 #define TC_GETLINE      (1 << 20)
168 #define TC_FUNCDECL     (1 << 21)               /* `function' `func' */
169 #define TC_BEGIN        (1 << 22)
170 #define TC_END          (1 << 23)
171 #define TC_EOF          (1 << 24)
172 #define TC_VARIABLE     (1 << 25)
173 #define TC_ARRAY        (1 << 26)
174 #define TC_FUNCTION     (1 << 27)
175 #define TC_STRING       (1 << 28)
176 #define TC_NUMBER       (1 << 29)
177
178 #define TC_UOPPRE  (TC_UOPPRE1 | TC_UOPPRE2)
179
180 /* combined token classes */
181 #define TC_BINOP   (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
182 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
183 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
184                    | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
185
186 #define TC_STATEMNT (TC_STATX | TC_WHILE)
187 #define TC_OPTERM  (TC_SEMICOL | TC_NEWLINE)
188
189 /* word tokens, cannot mean something else if not expected */
190 #define TC_WORD    (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
191                    | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
192
193 /* discard newlines after these */
194 #define TC_NOTERM  (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
195                    | TC_BINOP | TC_OPTERM)
196
197 /* what can expression begin with */
198 #define TC_OPSEQ   (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
199 /* what can group begin with */
200 #define TC_GRPSEQ  (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
201
202 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
203 /* operator is inserted between them */
204 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
205                    | TC_STRING | TC_NUMBER | TC_UOPPOST)
206 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
207
208 #define OF_RES1    0x010000
209 #define OF_RES2    0x020000
210 #define OF_STR1    0x040000
211 #define OF_STR2    0x080000
212 #define OF_NUM1    0x100000
213 #define OF_CHECKED 0x200000
214
215 /* combined operator flags */
216 #define xx      0
217 #define xV      OF_RES2
218 #define xS      (OF_RES2 | OF_STR2)
219 #define Vx      OF_RES1
220 #define VV      (OF_RES1 | OF_RES2)
221 #define Nx      (OF_RES1 | OF_NUM1)
222 #define NV      (OF_RES1 | OF_NUM1 | OF_RES2)
223 #define Sx      (OF_RES1 | OF_STR1)
224 #define SV      (OF_RES1 | OF_STR1 | OF_RES2)
225 #define SS      (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
226
227 #define OPCLSMASK 0xFF00
228 #define OPNMASK   0x007F
229
230 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
231  * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
232  * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
233  */
234 #define P(x)      (x << 24)
235 #define PRIMASK   0x7F000000
236 #define PRIMASK2  0x7E000000
237
238 /* Operation classes */
239
240 #define SHIFT_TIL_THIS  0x0600
241 #define RECUR_FROM_THIS 0x1000
242
243 enum {
244         OC_DELETE = 0x0100,     OC_EXEC = 0x0200,       OC_NEWSOURCE = 0x0300,
245         OC_PRINT = 0x0400,      OC_PRINTF = 0x0500,     OC_WALKINIT = 0x0600,
246
247         OC_BR = 0x0700,         OC_BREAK = 0x0800,      OC_CONTINUE = 0x0900,
248         OC_EXIT = 0x0a00,       OC_NEXT = 0x0b00,       OC_NEXTFILE = 0x0c00,
249         OC_TEST = 0x0d00,       OC_WALKNEXT = 0x0e00,
250
251         OC_BINARY = 0x1000,     OC_BUILTIN = 0x1100,    OC_COLON = 0x1200,
252         OC_COMMA = 0x1300,      OC_COMPARE = 0x1400,    OC_CONCAT = 0x1500,
253         OC_FBLTIN = 0x1600,     OC_FIELD = 0x1700,      OC_FNARG = 0x1800,
254         OC_FUNC = 0x1900,       OC_GETLINE = 0x1a00,    OC_IN = 0x1b00,
255         OC_LAND = 0x1c00,       OC_LOR = 0x1d00,        OC_MATCH = 0x1e00,
256         OC_MOVE = 0x1f00,       OC_PGETLINE = 0x2000,   OC_REGEXP = 0x2100,
257         OC_REPLACE = 0x2200,    OC_RETURN = 0x2300,     OC_SPRINTF = 0x2400,
258         OC_TERNARY = 0x2500,    OC_UNARY = 0x2600,      OC_VAR = 0x2700,
259         OC_DONE = 0x2800,
260
261         ST_IF = 0x3000,         ST_DO = 0x3100,         ST_FOR = 0x3200,
262         ST_WHILE = 0x3300
263 };
264
265 /* simple builtins */
266 enum {
267         F_in,   F_rn,   F_co,   F_ex,   F_lg,   F_si,   F_sq,   F_sr,
268         F_ti,   F_le,   F_sy,   F_ff,   F_cl
269 };
270
271 /* builtins */
272 enum {
273         B_a2,   B_ix,   B_ma,   B_sp,   B_ss,   B_ti,   B_mt,   B_lo,   B_up,
274         B_ge,   B_gs,   B_su,
275         B_an,   B_co,   B_ls,   B_or,   B_rs,   B_xo,
276 };
277
278 /* tokens and their corresponding info values */
279
280 #define NTC     "\377"  /* switch to next token class (tc<<1) */
281 #define NTCC    '\377'
282
283 #define OC_B    OC_BUILTIN
284
285 static const char tokenlist[] ALIGN1 =
286         "\1("       NTC
287         "\1)"       NTC
288         "\1/"       NTC                                 /* REGEXP */
289         "\2>>"      "\1>"       "\1|"       NTC         /* OUTRDR */
290         "\2++"      "\2--"      NTC                     /* UOPPOST */
291         "\2++"      "\2--"      "\1$"       NTC         /* UOPPRE1 */
292         "\2=="      "\1="       "\2+="      "\2-="      /* BINOPX */
293         "\2*="      "\2/="      "\2%="      "\2^="
294         "\1+"       "\1-"       "\3**="     "\2**"
295         "\1/"       "\1%"       "\1^"       "\1*"
296         "\2!="      "\2>="      "\2<="      "\1>"
297         "\1<"       "\2!~"      "\1~"       "\2&&"
298         "\2||"      "\1?"       "\1:"       NTC
299         "\2in"      NTC
300         "\1,"       NTC
301         "\1|"       NTC
302         "\1+"       "\1-"       "\1!"       NTC         /* UOPPRE2 */
303         "\1]"       NTC
304         "\1{"       NTC
305         "\1}"       NTC
306         "\1;"       NTC
307         "\1\n"      NTC
308         "\2if"      "\2do"      "\3for"     "\5break"   /* STATX */
309         "\10continue"           "\6delete"  "\5print"
310         "\6printf"  "\4next"    "\10nextfile"
311         "\6return"  "\4exit"    NTC
312         "\5while"   NTC
313         "\4else"    NTC
314
315         "\3and"     "\5compl"   "\6lshift"  "\2or"
316         "\6rshift"  "\3xor"
317         "\5close"   "\6system"  "\6fflush"  "\5atan2"   /* BUILTIN */
318         "\3cos"     "\3exp"     "\3int"     "\3log"
319         "\4rand"    "\3sin"     "\4sqrt"    "\5srand"
320         "\6gensub"  "\4gsub"    "\5index"   "\6length"
321         "\5match"   "\5split"   "\7sprintf" "\3sub"
322         "\6substr"  "\7systime" "\10strftime" "\6mktime"
323         "\7tolower" "\7toupper" NTC
324         "\7getline" NTC
325         "\4func"    "\10function"   NTC
326         "\5BEGIN"   NTC
327         "\3END"     "\0"
328         ;
329
330 static const uint32_t tokeninfo[] = {
331         0,
332         0,
333         OC_REGEXP,
334         xS|'a',     xS|'w',     xS|'|',
335         OC_UNARY|xV|P(9)|'p',       OC_UNARY|xV|P(9)|'m',
336         OC_UNARY|xV|P(9)|'P',       OC_UNARY|xV|P(9)|'M',
337             OC_FIELD|xV|P(5),
338         OC_COMPARE|VV|P(39)|5,      OC_MOVE|VV|P(74),
339             OC_REPLACE|NV|P(74)|'+',    OC_REPLACE|NV|P(74)|'-',
340         OC_REPLACE|NV|P(74)|'*',    OC_REPLACE|NV|P(74)|'/',
341             OC_REPLACE|NV|P(74)|'%',    OC_REPLACE|NV|P(74)|'&',
342         OC_BINARY|NV|P(29)|'+',     OC_BINARY|NV|P(29)|'-',
343             OC_REPLACE|NV|P(74)|'&',    OC_BINARY|NV|P(15)|'&',
344         OC_BINARY|NV|P(25)|'/',     OC_BINARY|NV|P(25)|'%',
345             OC_BINARY|NV|P(15)|'&',     OC_BINARY|NV|P(25)|'*',
346         OC_COMPARE|VV|P(39)|4,      OC_COMPARE|VV|P(39)|3,
347             OC_COMPARE|VV|P(39)|0,      OC_COMPARE|VV|P(39)|1,
348         OC_COMPARE|VV|P(39)|2,      OC_MATCH|Sx|P(45)|'!',
349             OC_MATCH|Sx|P(45)|'~',      OC_LAND|Vx|P(55),
350         OC_LOR|Vx|P(59),            OC_TERNARY|Vx|P(64)|'?',
351             OC_COLON|xx|P(67)|':',
352         OC_IN|SV|P(49),
353         OC_COMMA|SS|P(80),
354         OC_PGETLINE|SV|P(37),
355         OC_UNARY|xV|P(19)|'+',      OC_UNARY|xV|P(19)|'-',
356             OC_UNARY|xV|P(19)|'!',
357         0,
358         0,
359         0,
360         0,
361         0,
362         ST_IF,          ST_DO,          ST_FOR,         OC_BREAK,
363         OC_CONTINUE,                    OC_DELETE|Vx,   OC_PRINT,
364         OC_PRINTF,      OC_NEXT,        OC_NEXTFILE,
365         OC_RETURN|Vx,   OC_EXIT|Nx,
366         ST_WHILE,
367         0,
368
369         OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
370         OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
371         OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
372         OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
373         OC_FBLTIN|F_rn,    OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
374         OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
375         OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF,        OC_B|B_su|P(0xb6),
376         OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti,    OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b),
377         OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
378         OC_GETLINE|SV|P(0),
379         0,      0,
380         0,
381         0
382 };
383
384 /* internal variable names and their initial values       */
385 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
386 enum {
387         CONVFMT,    OFMT,       FS,         OFS,
388         ORS,        RS,         RT,         FILENAME,
389         SUBSEP,     F0,         ARGIND,     ARGC,
390         ARGV,       ERRNO,      FNR,        NR,
391         NF,         IGNORECASE, ENVIRON,    NUM_INTERNAL_VARS
392 };
393
394 static const char vNames[] ALIGN1 =
395         "CONVFMT\0" "OFMT\0"    "FS\0*"     "OFS\0"
396         "ORS\0"     "RS\0*"     "RT\0"      "FILENAME\0"
397         "SUBSEP\0"  "$\0*"      "ARGIND\0"  "ARGC\0"
398         "ARGV\0"    "ERRNO\0"   "FNR\0"     "NR\0"
399         "NF\0*"     "IGNORECASE\0*" "ENVIRON\0" "\0";
400
401 static const char vValues[] ALIGN1 =
402         "%.6g\0"    "%.6g\0"    " \0"       " \0"
403         "\n\0"      "\n\0"      "\0"        "\0"
404         "\034\0"    "\0"        "\377";
405
406 /* hash size may grow to these values */
407 #define FIRST_PRIME 61
408 static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
409
410
411 /* Globals. Split in two parts so that first one is addressed
412  * with (mostly short) negative offsets.
413  * NB: it's unsafe to put members of type "double"
414  * into globals2 (gcc may fail to align them).
415  */
416 struct globals {
417         double t_double;
418         chain beginseq, mainseq, endseq;
419         chain *seq;
420         node *break_ptr, *continue_ptr;
421         rstream *iF;
422         xhash *vhash, *ahash, *fdhash, *fnhash;
423         const char *g_progname;
424         int g_lineno;
425         int nfields;
426         int maxfields; /* used in fsrealloc() only */
427         var *Fields;
428         nvblock *g_cb;
429         char *g_pos;
430         char *g_buf;
431         smallint icase;
432         smallint exiting;
433         smallint nextrec;
434         smallint nextfile;
435         smallint is_f0_split;
436 };
437 struct globals2 {
438         uint32_t t_info; /* often used */
439         uint32_t t_tclass;
440         char *t_string;
441         int t_lineno;
442         int t_rollback;
443
444         var *intvar[NUM_INTERNAL_VARS]; /* often used */
445
446         /* former statics from various functions */
447         char *split_f0__fstrings;
448
449         uint32_t next_token__save_tclass;
450         uint32_t next_token__save_info;
451         uint32_t next_token__ltclass;
452         smallint next_token__concat_inserted;
453
454         smallint next_input_file__files_happen;
455         rstream next_input_file__rsm;
456
457         var *evaluate__fnargs;
458         unsigned evaluate__seed;
459         regex_t evaluate__sreg;
460
461         var ptest__v;
462
463         tsplitter exec_builtin__tspl;
464
465         /* biggest and least used members go last */
466         tsplitter fsplitter, rsplitter;
467 };
468 #define G1 (ptr_to_globals[-1])
469 #define G (*(struct globals2 *)ptr_to_globals)
470 /* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
471 /*char G1size[sizeof(G1)]; - 0x74 */
472 /*char Gsize[sizeof(G)]; - 0x1c4 */
473 /* Trying to keep most of members accessible with short offsets: */
474 /*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
475 #define t_double     (G1.t_double    )
476 #define beginseq     (G1.beginseq    )
477 #define mainseq      (G1.mainseq     )
478 #define endseq       (G1.endseq      )
479 #define seq          (G1.seq         )
480 #define break_ptr    (G1.break_ptr   )
481 #define continue_ptr (G1.continue_ptr)
482 #define iF           (G1.iF          )
483 #define vhash        (G1.vhash       )
484 #define ahash        (G1.ahash       )
485 #define fdhash       (G1.fdhash      )
486 #define fnhash       (G1.fnhash      )
487 #define g_progname   (G1.g_progname  )
488 #define g_lineno     (G1.g_lineno    )
489 #define nfields      (G1.nfields     )
490 #define maxfields    (G1.maxfields   )
491 #define Fields       (G1.Fields      )
492 #define g_cb         (G1.g_cb        )
493 #define g_pos        (G1.g_pos       )
494 #define g_buf        (G1.g_buf       )
495 #define icase        (G1.icase       )
496 #define exiting      (G1.exiting     )
497 #define nextrec      (G1.nextrec     )
498 #define nextfile     (G1.nextfile    )
499 #define is_f0_split  (G1.is_f0_split )
500 #define t_info       (G.t_info      )
501 #define t_tclass     (G.t_tclass    )
502 #define t_string     (G.t_string    )
503 #define t_lineno     (G.t_lineno    )
504 #define t_rollback   (G.t_rollback  )
505 #define intvar       (G.intvar      )
506 #define fsplitter    (G.fsplitter   )
507 #define rsplitter    (G.rsplitter   )
508 #define INIT_G() do { \
509         SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
510         G.next_token__ltclass = TC_OPTERM; \
511         G.evaluate__seed = 1; \
512 } while (0)
513
514
515 /* function prototypes */
516 static void handle_special(var *);
517 static node *parse_expr(uint32_t);
518 static void chain_group(void);
519 static var *evaluate(node *, var *);
520 static rstream *next_input_file(void);
521 static int fmt_num(char *, int, const char *, double, int);
522 static int awk_exit(int) NORETURN;
523
524 /* ---- error handling ---- */
525
526 static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
527 static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
528 static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
529 static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
530 static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
531 static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin";
532 static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
533 static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
534 static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
535 #if !ENABLE_FEATURE_AWK_LIBM
536 static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
537 #endif
538
539 static void zero_out_var(var *vp)
540 {
541         memset(vp, 0, sizeof(*vp));
542 }
543
544 static void syntax_error(const char *message) NORETURN;
545 static void syntax_error(const char *message)
546 {
547         bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
548 }
549
550 /* ---- hash stuff ---- */
551
552 static unsigned hashidx(const char *name)
553 {
554         unsigned idx = 0;
555
556         while (*name)
557                 idx = *name++ + (idx << 6) - idx;
558         return idx;
559 }
560
561 /* create new hash */
562 static xhash *hash_init(void)
563 {
564         xhash *newhash;
565
566         newhash = xzalloc(sizeof(*newhash));
567         newhash->csize = FIRST_PRIME;
568         newhash->items = xzalloc(FIRST_PRIME * sizeof(newhash->items[0]));
569
570         return newhash;
571 }
572
573 /* find item in hash, return ptr to data, NULL if not found */
574 static void *hash_search(xhash *hash, const char *name)
575 {
576         hash_item *hi;
577
578         hi = hash->items[hashidx(name) % hash->csize];
579         while (hi) {
580                 if (strcmp(hi->name, name) == 0)
581                         return &hi->data;
582                 hi = hi->next;
583         }
584         return NULL;
585 }
586
587 /* grow hash if it becomes too big */
588 static void hash_rebuild(xhash *hash)
589 {
590         unsigned newsize, i, idx;
591         hash_item **newitems, *hi, *thi;
592
593         if (hash->nprime == ARRAY_SIZE(PRIMES))
594                 return;
595
596         newsize = PRIMES[hash->nprime++];
597         newitems = xzalloc(newsize * sizeof(newitems[0]));
598
599         for (i = 0; i < hash->csize; i++) {
600                 hi = hash->items[i];
601                 while (hi) {
602                         thi = hi;
603                         hi = thi->next;
604                         idx = hashidx(thi->name) % newsize;
605                         thi->next = newitems[idx];
606                         newitems[idx] = thi;
607                 }
608         }
609
610         free(hash->items);
611         hash->csize = newsize;
612         hash->items = newitems;
613 }
614
615 /* find item in hash, add it if necessary. Return ptr to data */
616 static void *hash_find(xhash *hash, const char *name)
617 {
618         hash_item *hi;
619         unsigned idx;
620         int l;
621
622         hi = hash_search(hash, name);
623         if (!hi) {
624                 if (++hash->nel / hash->csize > 10)
625                         hash_rebuild(hash);
626
627                 l = strlen(name) + 1;
628                 hi = xzalloc(sizeof(*hi) + l);
629                 strcpy(hi->name, name);
630
631                 idx = hashidx(name) % hash->csize;
632                 hi->next = hash->items[idx];
633                 hash->items[idx] = hi;
634                 hash->glen += l;
635         }
636         return &hi->data;
637 }
638
639 #define findvar(hash, name) ((var*)    hash_find((hash), (name)))
640 #define newvar(name)        ((var*)    hash_find(vhash, (name)))
641 #define newfile(name)       ((rstream*)hash_find(fdhash, (name)))
642 #define newfunc(name)       ((func*)   hash_find(fnhash, (name)))
643
644 static void hash_remove(xhash *hash, const char *name)
645 {
646         hash_item *hi, **phi;
647
648         phi = &hash->items[hashidx(name) % hash->csize];
649         while (*phi) {
650                 hi = *phi;
651                 if (strcmp(hi->name, name) == 0) {
652                         hash->glen -= (strlen(name) + 1);
653                         hash->nel--;
654                         *phi = hi->next;
655                         free(hi);
656                         break;
657                 }
658                 phi = &hi->next;
659         }
660 }
661
662 /* ------ some useful functions ------ */
663
664 static char *skip_spaces(char *p)
665 {
666         while (1) {
667                 if (*p == '\\' && p[1] == '\n') {
668                         p++;
669                         t_lineno++;
670                 } else if (*p != ' ' && *p != '\t') {
671                         break;
672                 }
673                 p++;
674         }
675         return p;
676 }
677
678 /* returns old *s, advances *s past word and terminating NUL */
679 static char *nextword(char **s)
680 {
681         char *p = *s;
682         while (*(*s)++ != '\0')
683                 continue;
684         return p;
685 }
686
687 static char nextchar(char **s)
688 {
689         char c, *pps;
690
691         c = *(*s)++;
692         pps = *s;
693         if (c == '\\')
694                 c = bb_process_escape_sequence((const char**)s);
695         if (c == '\\' && *s == pps)
696                 c = *(*s)++;
697         return c;
698 }
699
700 static ALWAYS_INLINE int isalnum_(int c)
701 {
702         return (isalnum(c) || c == '_');
703 }
704
705 static double my_strtod(char **pp)
706 {
707         char *cp = *pp;
708 #if ENABLE_DESKTOP
709         if (cp[0] == '0') {
710                 /* Might be hex or octal integer: 0x123abc or 07777 */
711                 char c = (cp[1] | 0x20);
712                 if (c == 'x' || isdigit(cp[1])) {
713                         unsigned long long ull = strtoull(cp, pp, 0);
714                         if (c == 'x')
715                                 return ull;
716                         c = **pp;
717                         if (!isdigit(c) && c != '.')
718                                 return ull;
719                         /* else: it may be a floating number. Examples:
720                          * 009.123 (*pp points to '9')
721                          * 000.123 (*pp points to '.')
722                          * fall through to strtod.
723                          */
724                 }
725         }
726 #endif
727         return strtod(cp, pp);
728 }
729
730 /* -------- working with variables (set/get/copy/etc) -------- */
731
732 static xhash *iamarray(var *v)
733 {
734         var *a = v;
735
736         while (a->type & VF_CHILD)
737                 a = a->x.parent;
738
739         if (!(a->type & VF_ARRAY)) {
740                 a->type |= VF_ARRAY;
741                 a->x.array = hash_init();
742         }
743         return a->x.array;
744 }
745
746 static void clear_array(xhash *array)
747 {
748         unsigned i;
749         hash_item *hi, *thi;
750
751         for (i = 0; i < array->csize; i++) {
752                 hi = array->items[i];
753                 while (hi) {
754                         thi = hi;
755                         hi = hi->next;
756                         free(thi->data.v.string);
757                         free(thi);
758                 }
759                 array->items[i] = NULL;
760         }
761         array->glen = array->nel = 0;
762 }
763
764 /* clear a variable */
765 static var *clrvar(var *v)
766 {
767         if (!(v->type & VF_FSTR))
768                 free(v->string);
769
770         v->type &= VF_DONTTOUCH;
771         v->type |= VF_DIRTY;
772         v->string = NULL;
773         return v;
774 }
775
776 /* assign string value to variable */
777 static var *setvar_p(var *v, char *value)
778 {
779         clrvar(v);
780         v->string = value;
781         handle_special(v);
782         return v;
783 }
784
785 /* same as setvar_p but make a copy of string */
786 static var *setvar_s(var *v, const char *value)
787 {
788         return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
789 }
790
791 /* same as setvar_s but sets USER flag */
792 static var *setvar_u(var *v, const char *value)
793 {
794         v = setvar_s(v, value);
795         v->type |= VF_USER;
796         return v;
797 }
798
799 /* set array element to user string */
800 static void setari_u(var *a, int idx, const char *s)
801 {
802         var *v;
803
804         v = findvar(iamarray(a), itoa(idx));
805         setvar_u(v, s);
806 }
807
808 /* assign numeric value to variable */
809 static var *setvar_i(var *v, double value)
810 {
811         clrvar(v);
812         v->type |= VF_NUMBER;
813         v->number = value;
814         handle_special(v);
815         return v;
816 }
817
818 static const char *getvar_s(var *v)
819 {
820         /* if v is numeric and has no cached string, convert it to string */
821         if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
822                 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
823                 v->string = xstrdup(g_buf);
824                 v->type |= VF_CACHED;
825         }
826         return (v->string == NULL) ? "" : v->string;
827 }
828
829 static double getvar_i(var *v)
830 {
831         char *s;
832
833         if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
834                 v->number = 0;
835                 s = v->string;
836                 if (s && *s) {
837                         debug_printf_eval("getvar_i: '%s'->", s);
838                         v->number = my_strtod(&s);
839                         debug_printf_eval("%f (s:'%s')\n", v->number, s);
840                         if (v->type & VF_USER) {
841                                 s = skip_spaces(s);
842                                 if (*s != '\0')
843                                         v->type &= ~VF_USER;
844                         }
845                 } else {
846                         debug_printf_eval("getvar_i: '%s'->zero\n", s);
847                         v->type &= ~VF_USER;
848                 }
849                 v->type |= VF_CACHED;
850         }
851         debug_printf_eval("getvar_i: %f\n", v->number);
852         return v->number;
853 }
854
855 /* Used for operands of bitwise ops */
856 static unsigned long getvar_i_int(var *v)
857 {
858         double d = getvar_i(v);
859
860         /* Casting doubles to longs is undefined for values outside
861          * of target type range. Try to widen it as much as possible */
862         if (d >= 0)
863                 return (unsigned long)d;
864         /* Why? Think about d == -4294967295.0 (assuming 32bit longs) */
865         return - (long) (unsigned long) (-d);
866 }
867
868 static var *copyvar(var *dest, const var *src)
869 {
870         if (dest != src) {
871                 clrvar(dest);
872                 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
873                 debug_printf_eval("copyvar: number:%f string:'%s'\n", src->number, src->string);
874                 dest->number = src->number;
875                 if (src->string)
876                         dest->string = xstrdup(src->string);
877         }
878         handle_special(dest);
879         return dest;
880 }
881
882 static var *incvar(var *v)
883 {
884         return setvar_i(v, getvar_i(v) + 1.0);
885 }
886
887 /* return true if v is number or numeric string */
888 static int is_numeric(var *v)
889 {
890         getvar_i(v);
891         return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
892 }
893
894 /* return 1 when value of v corresponds to true, 0 otherwise */
895 static int istrue(var *v)
896 {
897         if (is_numeric(v))
898                 return (v->number != 0);
899         return (v->string && v->string[0]);
900 }
901
902 /* temporary variables allocator. Last allocated should be first freed */
903 static var *nvalloc(int n)
904 {
905         nvblock *pb = NULL;
906         var *v, *r;
907         int size;
908
909         while (g_cb) {
910                 pb = g_cb;
911                 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size)
912                         break;
913                 g_cb = g_cb->next;
914         }
915
916         if (!g_cb) {
917                 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
918                 g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
919                 g_cb->size = size;
920                 g_cb->pos = g_cb->nv;
921                 g_cb->prev = pb;
922                 /*g_cb->next = NULL; - xzalloc did it */
923                 if (pb)
924                         pb->next = g_cb;
925         }
926
927         v = r = g_cb->pos;
928         g_cb->pos += n;
929
930         while (v < g_cb->pos) {
931                 v->type = 0;
932                 v->string = NULL;
933                 v++;
934         }
935
936         return r;
937 }
938
939 static void nvfree(var *v)
940 {
941         var *p;
942
943         if (v < g_cb->nv || v >= g_cb->pos)
944                 syntax_error(EMSG_INTERNAL_ERROR);
945
946         for (p = v; p < g_cb->pos; p++) {
947                 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
948                         clear_array(iamarray(p));
949                         free(p->x.array->items);
950                         free(p->x.array);
951                 }
952                 if (p->type & VF_WALK) {
953                         walker_list *n;
954                         walker_list *w = p->x.walker;
955                         debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker);
956                         p->x.walker = NULL;
957                         while (w) {
958                                 n = w->prev;
959                                 debug_printf_walker(" free(%p)\n", w);
960                                 free(w);
961                                 w = n;
962                         }
963                 }
964                 clrvar(p);
965         }
966
967         g_cb->pos = v;
968         while (g_cb->prev && g_cb->pos == g_cb->nv) {
969                 g_cb = g_cb->prev;
970         }
971 }
972
973 /* ------- awk program text parsing ------- */
974
975 /* Parse next token pointed by global pos, place results into global ttt.
976  * If token isn't expected, give away. Return token class
977  */
978 static uint32_t next_token(uint32_t expected)
979 {
980 #define concat_inserted (G.next_token__concat_inserted)
981 #define save_tclass     (G.next_token__save_tclass)
982 #define save_info       (G.next_token__save_info)
983 /* Initialized to TC_OPTERM: */
984 #define ltclass         (G.next_token__ltclass)
985
986         char *p, *s;
987         const char *tl;
988         uint32_t tc;
989         const uint32_t *ti;
990         int l;
991
992         if (t_rollback) {
993                 t_rollback = FALSE;
994
995         } else if (concat_inserted) {
996                 concat_inserted = FALSE;
997                 t_tclass = save_tclass;
998                 t_info = save_info;
999
1000         } else {
1001                 p = g_pos;
1002  readnext:
1003                 p = skip_spaces(p);
1004                 g_lineno = t_lineno;
1005                 if (*p == '#')
1006                         while (*p != '\n' && *p != '\0')
1007                                 p++;
1008
1009                 if (*p == '\n')
1010                         t_lineno++;
1011
1012                 if (*p == '\0') {
1013                         tc = TC_EOF;
1014
1015                 } else if (*p == '\"') {
1016                         /* it's a string */
1017                         t_string = s = ++p;
1018                         while (*p != '\"') {
1019                                 char *pp = p;
1020                                 if (*p == '\0' || *p == '\n')
1021                                         syntax_error(EMSG_UNEXP_EOS);
1022                                 *s++ = nextchar(&pp);
1023                                 p = pp;
1024                         }
1025                         p++;
1026                         *s = '\0';
1027                         tc = TC_STRING;
1028
1029                 } else if ((expected & TC_REGEXP) && *p == '/') {
1030                         /* it's regexp */
1031                         t_string = s = ++p;
1032                         while (*p != '/') {
1033                                 if (*p == '\0' || *p == '\n')
1034                                         syntax_error(EMSG_UNEXP_EOS);
1035                                 *s = *p++;
1036                                 if (*s++ == '\\') {
1037                                         char *pp = p;
1038                                         s[-1] = bb_process_escape_sequence((const char **)&pp);
1039                                         if (*p == '\\')
1040                                                 *s++ = '\\';
1041                                         if (pp == p)
1042                                                 *s++ = *p++;
1043                                         else
1044                                                 p = pp;
1045                                 }
1046                         }
1047                         p++;
1048                         *s = '\0';
1049                         tc = TC_REGEXP;
1050
1051                 } else if (*p == '.' || isdigit(*p)) {
1052                         /* it's a number */
1053                         char *pp = p;
1054                         t_double = my_strtod(&pp);
1055                         p = pp;
1056                         if (*pp == '.')
1057                                 syntax_error(EMSG_UNEXP_TOKEN);
1058                         tc = TC_NUMBER;
1059
1060                 } else {
1061                         /* search for something known */
1062                         tl = tokenlist;
1063                         tc = 0x00000001;
1064                         ti = tokeninfo;
1065                         while (*tl) {
1066                                 l = *tl++;
1067                                 if (l == NTCC) {
1068                                         tc <<= 1;
1069                                         continue;
1070                                 }
1071                                 /* if token class is expected, token
1072                                  * matches and it's not a longer word,
1073                                  * then this is what we are looking for
1074                                  */
1075                                 if ((tc & (expected | TC_WORD | TC_NEWLINE))
1076                                  && *tl == *p && strncmp(p, tl, l) == 0
1077                                  && !((tc & TC_WORD) && isalnum_(p[l]))
1078                                 ) {
1079                                         t_info = *ti;
1080                                         p += l;
1081                                         break;
1082                                 }
1083                                 ti++;
1084                                 tl += l;
1085                         }
1086
1087                         if (!*tl) {
1088                                 /* it's a name (var/array/function),
1089                                  * otherwise it's something wrong
1090                                  */
1091                                 if (!isalnum_(*p))
1092                                         syntax_error(EMSG_UNEXP_TOKEN);
1093
1094                                 t_string = --p;
1095                                 while (isalnum_(*++p)) {
1096                                         p[-1] = *p;
1097                                 }
1098                                 p[-1] = '\0';
1099                                 tc = TC_VARIABLE;
1100                                 /* also consume whitespace between functionname and bracket */
1101                                 if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY))
1102                                         p = skip_spaces(p);
1103                                 if (*p == '(') {
1104                                         tc = TC_FUNCTION;
1105                                 } else {
1106                                         if (*p == '[') {
1107                                                 p++;
1108                                                 tc = TC_ARRAY;
1109                                         }
1110                                 }
1111                         }
1112                 }
1113                 g_pos = p;
1114
1115                 /* skipping newlines in some cases */
1116                 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1117                         goto readnext;
1118
1119                 /* insert concatenation operator when needed */
1120                 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1121                         concat_inserted = TRUE;
1122                         save_tclass = tc;
1123                         save_info = t_info;
1124                         tc = TC_BINOP;
1125                         t_info = OC_CONCAT | SS | P(35);
1126                 }
1127
1128                 t_tclass = tc;
1129         }
1130         ltclass = t_tclass;
1131
1132         /* Are we ready for this? */
1133         if (!(ltclass & expected))
1134                 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1135                                 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1136
1137         return ltclass;
1138 #undef concat_inserted
1139 #undef save_tclass
1140 #undef save_info
1141 #undef ltclass
1142 }
1143
1144 static void rollback_token(void)
1145 {
1146         t_rollback = TRUE;
1147 }
1148
1149 static node *new_node(uint32_t info)
1150 {
1151         node *n;
1152
1153         n = xzalloc(sizeof(node));
1154         n->info = info;
1155         n->lineno = g_lineno;
1156         return n;
1157 }
1158
1159 static void mk_re_node(const char *s, node *n, regex_t *re)
1160 {
1161         n->info = OC_REGEXP;
1162         n->l.re = re;
1163         n->r.ire = re + 1;
1164         xregcomp(re, s, REG_EXTENDED);
1165         xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1166 }
1167
1168 static node *condition(void)
1169 {
1170         next_token(TC_SEQSTART);
1171         return parse_expr(TC_SEQTERM);
1172 }
1173
1174 /* parse expression terminated by given argument, return ptr
1175  * to built subtree. Terminator is eaten by parse_expr */
1176 static node *parse_expr(uint32_t iexp)
1177 {
1178         node sn;
1179         node *cn = &sn;
1180         node *vn, *glptr;
1181         uint32_t tc, xtc;
1182         var *v;
1183
1184         sn.info = PRIMASK;
1185         sn.r.n = glptr = NULL;
1186         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1187
1188         while (!((tc = next_token(xtc)) & iexp)) {
1189                 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1190                         /* input redirection (<) attached to glptr node */
1191                         cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1192                         cn->a.n = glptr;
1193                         xtc = TC_OPERAND | TC_UOPPRE;
1194                         glptr = NULL;
1195
1196                 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1197                         /* for binary and postfix-unary operators, jump back over
1198                          * previous operators with higher priority */
1199                         vn = cn;
1200                         while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1201                             || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON))
1202                         ) {
1203                                 vn = vn->a.n;
1204                         }
1205                         if ((t_info & OPCLSMASK) == OC_TERNARY)
1206                                 t_info += P(6);
1207                         cn = vn->a.n->r.n = new_node(t_info);
1208                         cn->a.n = vn->a.n;
1209                         if (tc & TC_BINOP) {
1210                                 cn->l.n = vn;
1211                                 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1212                                 if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1213                                         /* it's a pipe */
1214                                         next_token(TC_GETLINE);
1215                                         /* give maximum priority to this pipe */
1216                                         cn->info &= ~PRIMASK;
1217                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1218                                 }
1219                         } else {
1220                                 cn->r.n = vn;
1221                                 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1222                         }
1223                         vn->a.n = cn;
1224
1225                 } else {
1226                         /* for operands and prefix-unary operators, attach them
1227                          * to last node */
1228                         vn = cn;
1229                         cn = vn->r.n = new_node(t_info);
1230                         cn->a.n = vn;
1231                         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1232                         if (tc & (TC_OPERAND | TC_REGEXP)) {
1233                                 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1234                                 /* one should be very careful with switch on tclass -
1235                                  * only simple tclasses should be used! */
1236                                 switch (tc) {
1237                                 case TC_VARIABLE:
1238                                 case TC_ARRAY:
1239                                         cn->info = OC_VAR;
1240                                         v = hash_search(ahash, t_string);
1241                                         if (v != NULL) {
1242                                                 cn->info = OC_FNARG;
1243                                                 cn->l.aidx = v->x.aidx;
1244                                         } else {
1245                                                 cn->l.v = newvar(t_string);
1246                                         }
1247                                         if (tc & TC_ARRAY) {
1248                                                 cn->info |= xS;
1249                                                 cn->r.n = parse_expr(TC_ARRTERM);
1250                                         }
1251                                         break;
1252
1253                                 case TC_NUMBER:
1254                                 case TC_STRING:
1255                                         cn->info = OC_VAR;
1256                                         v = cn->l.v = xzalloc(sizeof(var));
1257                                         if (tc & TC_NUMBER)
1258                                                 setvar_i(v, t_double);
1259                                         else
1260                                                 setvar_s(v, t_string);
1261                                         break;
1262
1263                                 case TC_REGEXP:
1264                                         mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1265                                         break;
1266
1267                                 case TC_FUNCTION:
1268                                         cn->info = OC_FUNC;
1269                                         cn->r.f = newfunc(t_string);
1270                                         cn->l.n = condition();
1271                                         break;
1272
1273                                 case TC_SEQSTART:
1274                                         cn = vn->r.n = parse_expr(TC_SEQTERM);
1275                                         cn->a.n = vn;
1276                                         break;
1277
1278                                 case TC_GETLINE:
1279                                         glptr = cn;
1280                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1281                                         break;
1282
1283                                 case TC_BUILTIN:
1284                                         cn->l.n = condition();
1285                                         break;
1286                                 }
1287                         }
1288                 }
1289         }
1290         return sn.r.n;
1291 }
1292
1293 /* add node to chain. Return ptr to alloc'd node */
1294 static node *chain_node(uint32_t info)
1295 {
1296         node *n;
1297
1298         if (!seq->first)
1299                 seq->first = seq->last = new_node(0);
1300
1301         if (seq->programname != g_progname) {
1302                 seq->programname = g_progname;
1303                 n = chain_node(OC_NEWSOURCE);
1304                 n->l.new_progname = xstrdup(g_progname);
1305         }
1306
1307         n = seq->last;
1308         n->info = info;
1309         seq->last = n->a.n = new_node(OC_DONE);
1310
1311         return n;
1312 }
1313
1314 static void chain_expr(uint32_t info)
1315 {
1316         node *n;
1317
1318         n = chain_node(info);
1319         n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1320         if (t_tclass & TC_GRPTERM)
1321                 rollback_token();
1322 }
1323
1324 static node *chain_loop(node *nn)
1325 {
1326         node *n, *n2, *save_brk, *save_cont;
1327
1328         save_brk = break_ptr;
1329         save_cont = continue_ptr;
1330
1331         n = chain_node(OC_BR | Vx);
1332         continue_ptr = new_node(OC_EXEC);
1333         break_ptr = new_node(OC_EXEC);
1334         chain_group();
1335         n2 = chain_node(OC_EXEC | Vx);
1336         n2->l.n = nn;
1337         n2->a.n = n;
1338         continue_ptr->a.n = n2;
1339         break_ptr->a.n = n->r.n = seq->last;
1340
1341         continue_ptr = save_cont;
1342         break_ptr = save_brk;
1343
1344         return n;
1345 }
1346
1347 /* parse group and attach it to chain */
1348 static void chain_group(void)
1349 {
1350         uint32_t c;
1351         node *n, *n2, *n3;
1352
1353         do {
1354                 c = next_token(TC_GRPSEQ);
1355         } while (c & TC_NEWLINE);
1356
1357         if (c & TC_GRPSTART) {
1358                 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1359                         if (t_tclass & TC_NEWLINE)
1360                                 continue;
1361                         rollback_token();
1362                         chain_group();
1363                 }
1364         } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1365                 rollback_token();
1366                 chain_expr(OC_EXEC | Vx);
1367         } else {                                                /* TC_STATEMNT */
1368                 switch (t_info & OPCLSMASK) {
1369                 case ST_IF:
1370                         n = chain_node(OC_BR | Vx);
1371                         n->l.n = condition();
1372                         chain_group();
1373                         n2 = chain_node(OC_EXEC);
1374                         n->r.n = seq->last;
1375                         if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1376                                 chain_group();
1377                                 n2->a.n = seq->last;
1378                         } else {
1379                                 rollback_token();
1380                         }
1381                         break;
1382
1383                 case ST_WHILE:
1384                         n2 = condition();
1385                         n = chain_loop(NULL);
1386                         n->l.n = n2;
1387                         break;
1388
1389                 case ST_DO:
1390                         n2 = chain_node(OC_EXEC);
1391                         n = chain_loop(NULL);
1392                         n2->a.n = n->a.n;
1393                         next_token(TC_WHILE);
1394                         n->l.n = condition();
1395                         break;
1396
1397                 case ST_FOR:
1398                         next_token(TC_SEQSTART);
1399                         n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1400                         if (t_tclass & TC_SEQTERM) {    /* for-in */
1401                                 if ((n2->info & OPCLSMASK) != OC_IN)
1402                                         syntax_error(EMSG_UNEXP_TOKEN);
1403                                 n = chain_node(OC_WALKINIT | VV);
1404                                 n->l.n = n2->l.n;
1405                                 n->r.n = n2->r.n;
1406                                 n = chain_loop(NULL);
1407                                 n->info = OC_WALKNEXT | Vx;
1408                                 n->l.n = n2->l.n;
1409                         } else {                        /* for (;;) */
1410                                 n = chain_node(OC_EXEC | Vx);
1411                                 n->l.n = n2;
1412                                 n2 = parse_expr(TC_SEMICOL);
1413                                 n3 = parse_expr(TC_SEQTERM);
1414                                 n = chain_loop(n3);
1415                                 n->l.n = n2;
1416                                 if (!n2)
1417                                         n->info = OC_EXEC;
1418                         }
1419                         break;
1420
1421                 case OC_PRINT:
1422                 case OC_PRINTF:
1423                         n = chain_node(t_info);
1424                         n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1425                         if (t_tclass & TC_OUTRDR) {
1426                                 n->info |= t_info;
1427                                 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1428                         }
1429                         if (t_tclass & TC_GRPTERM)
1430                                 rollback_token();
1431                         break;
1432
1433                 case OC_BREAK:
1434                         n = chain_node(OC_EXEC);
1435                         n->a.n = break_ptr;
1436                         break;
1437
1438                 case OC_CONTINUE:
1439                         n = chain_node(OC_EXEC);
1440                         n->a.n = continue_ptr;
1441                         break;
1442
1443                 /* delete, next, nextfile, return, exit */
1444                 default:
1445                         chain_expr(t_info);
1446                 }
1447         }
1448 }
1449
1450 static void parse_program(char *p)
1451 {
1452         uint32_t tclass;
1453         node *cn;
1454         func *f;
1455         var *v;
1456
1457         g_pos = p;
1458         t_lineno = 1;
1459         while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1460                         TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1461
1462                 if (tclass & TC_OPTERM)
1463                         continue;
1464
1465                 seq = &mainseq;
1466                 if (tclass & TC_BEGIN) {
1467                         seq = &beginseq;
1468                         chain_group();
1469
1470                 } else if (tclass & TC_END) {
1471                         seq = &endseq;
1472                         chain_group();
1473
1474                 } else if (tclass & TC_FUNCDECL) {
1475                         next_token(TC_FUNCTION);
1476                         g_pos++;
1477                         f = newfunc(t_string);
1478                         f->body.first = NULL;
1479                         f->nargs = 0;
1480                         while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1481                                 v = findvar(ahash, t_string);
1482                                 v->x.aidx = f->nargs++;
1483
1484                                 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1485                                         break;
1486                         }
1487                         seq = &f->body;
1488                         chain_group();
1489                         clear_array(ahash);
1490
1491                 } else if (tclass & TC_OPSEQ) {
1492                         rollback_token();
1493                         cn = chain_node(OC_TEST);
1494                         cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1495                         if (t_tclass & TC_GRPSTART) {
1496                                 rollback_token();
1497                                 chain_group();
1498                         } else {
1499                                 chain_node(OC_PRINT);
1500                         }
1501                         cn->r.n = mainseq.last;
1502
1503                 } else /* if (tclass & TC_GRPSTART) */ {
1504                         rollback_token();
1505                         chain_group();
1506                 }
1507         }
1508 }
1509
1510
1511 /* -------- program execution part -------- */
1512
1513 static node *mk_splitter(const char *s, tsplitter *spl)
1514 {
1515         regex_t *re, *ire;
1516         node *n;
1517
1518         re = &spl->re[0];
1519         ire = &spl->re[1];
1520         n = &spl->n;
1521         if ((n->info & OPCLSMASK) == OC_REGEXP) {
1522                 regfree(re);
1523                 regfree(ire); // TODO: nuke ire, use re+1?
1524         }
1525         if (strlen(s) > 1) {
1526                 mk_re_node(s, n, re);
1527         } else {
1528                 n->info = (uint32_t) *s;
1529         }
1530
1531         return n;
1532 }
1533
1534 /* use node as a regular expression. Supplied with node ptr and regex_t
1535  * storage space. Return ptr to regex (if result points to preg, it should
1536  * be later regfree'd manually
1537  */
1538 static regex_t *as_regex(node *op, regex_t *preg)
1539 {
1540         int cflags;
1541         var *v;
1542         const char *s;
1543
1544         if ((op->info & OPCLSMASK) == OC_REGEXP) {
1545                 return icase ? op->r.ire : op->l.re;
1546         }
1547         v = nvalloc(1);
1548         s = getvar_s(evaluate(op, v));
1549
1550         cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
1551         /* Testcase where REG_EXTENDED fails (unpaired '{'):
1552          * echo Hi | awk 'gsub("@(samp|code|file)\{","");'
1553          * gawk 3.1.5 eats this. We revert to ~REG_EXTENDED
1554          * (maybe gsub is not supposed to use REG_EXTENDED?).
1555          */
1556         if (regcomp(preg, s, cflags)) {
1557                 cflags &= ~REG_EXTENDED;
1558                 xregcomp(preg, s, cflags);
1559         }
1560         nvfree(v);
1561         return preg;
1562 }
1563
1564 /* gradually increasing buffer.
1565  * note that we reallocate even if n == old_size,
1566  * and thus there is at least one extra allocated byte.
1567  */
1568 static char* qrealloc(char *b, int n, int *size)
1569 {
1570         if (!b || n >= *size) {
1571                 *size = n + (n>>1) + 80;
1572                 b = xrealloc(b, *size);
1573         }
1574         return b;
1575 }
1576
1577 /* resize field storage space */
1578 static void fsrealloc(int size)
1579 {
1580         int i;
1581
1582         if (size >= maxfields) {
1583                 i = maxfields;
1584                 maxfields = size + 16;
1585                 Fields = xrealloc(Fields, maxfields * sizeof(var));
1586                 for (; i < maxfields; i++) {
1587                         Fields[i].type = VF_SPECIAL;
1588                         Fields[i].string = NULL;
1589                 }
1590         }
1591
1592         if (size < nfields) {
1593                 for (i = size; i < nfields; i++) {
1594                         clrvar(Fields + i);
1595                 }
1596         }
1597         nfields = size;
1598 }
1599
1600 static int awk_split(const char *s, node *spl, char **slist)
1601 {
1602         int l, n = 0;
1603         char c[4];
1604         char *s1;
1605         regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1606
1607         /* in worst case, each char would be a separate field */
1608         *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1609         strcpy(s1, s);
1610
1611         c[0] = c[1] = (char)spl->info;
1612         c[2] = c[3] = '\0';
1613         if (*getvar_s(intvar[RS]) == '\0')
1614                 c[2] = '\n';
1615
1616         if ((spl->info & OPCLSMASK) == OC_REGEXP) {  /* regex split */
1617                 if (!*s)
1618                         return n; /* "": zero fields */
1619                 n++; /* at least one field will be there */
1620                 do {
1621                         l = strcspn(s, c+2); /* len till next NUL or \n */
1622                         if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1623                          && pmatch[0].rm_so <= l
1624                         ) {
1625                                 l = pmatch[0].rm_so;
1626                                 if (pmatch[0].rm_eo == 0) {
1627                                         l++;
1628                                         pmatch[0].rm_eo++;
1629                                 }
1630                                 n++; /* we saw yet another delimiter */
1631                         } else {
1632                                 pmatch[0].rm_eo = l;
1633                                 if (s[l])
1634                                         pmatch[0].rm_eo++;
1635                         }
1636                         memcpy(s1, s, l);
1637                         /* make sure we remove *all* of the separator chars */
1638                         do {
1639                                 s1[l] = '\0';
1640                         } while (++l < pmatch[0].rm_eo);
1641                         nextword(&s1);
1642                         s += pmatch[0].rm_eo;
1643                 } while (*s);
1644                 return n;
1645         }
1646         if (c[0] == '\0') {  /* null split */
1647                 while (*s) {
1648                         *s1++ = *s++;
1649                         *s1++ = '\0';
1650                         n++;
1651                 }
1652                 return n;
1653         }
1654         if (c[0] != ' ') {  /* single-character split */
1655                 if (icase) {
1656                         c[0] = toupper(c[0]);
1657                         c[1] = tolower(c[1]);
1658                 }
1659                 if (*s1)
1660                         n++;
1661                 while ((s1 = strpbrk(s1, c))) {
1662                         *s1++ = '\0';
1663                         n++;
1664                 }
1665                 return n;
1666         }
1667         /* space split */
1668         while (*s) {
1669                 s = skip_whitespace(s);
1670                 if (!*s)
1671                         break;
1672                 n++;
1673                 while (*s && !isspace(*s))
1674                         *s1++ = *s++;
1675                 *s1++ = '\0';
1676         }
1677         return n;
1678 }
1679
1680 static void split_f0(void)
1681 {
1682 /* static char *fstrings; */
1683 #define fstrings (G.split_f0__fstrings)
1684
1685         int i, n;
1686         char *s;
1687
1688         if (is_f0_split)
1689                 return;
1690
1691         is_f0_split = TRUE;
1692         free(fstrings);
1693         fsrealloc(0);
1694         n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1695         fsrealloc(n);
1696         s = fstrings;
1697         for (i = 0; i < n; i++) {
1698                 Fields[i].string = nextword(&s);
1699                 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1700         }
1701
1702         /* set NF manually to avoid side effects */
1703         clrvar(intvar[NF]);
1704         intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1705         intvar[NF]->number = nfields;
1706 #undef fstrings
1707 }
1708
1709 /* perform additional actions when some internal variables changed */
1710 static void handle_special(var *v)
1711 {
1712         int n;
1713         char *b;
1714         const char *sep, *s;
1715         int sl, l, len, i, bsize;
1716
1717         if (!(v->type & VF_SPECIAL))
1718                 return;
1719
1720         if (v == intvar[NF]) {
1721                 n = (int)getvar_i(v);
1722                 fsrealloc(n);
1723
1724                 /* recalculate $0 */
1725                 sep = getvar_s(intvar[OFS]);
1726                 sl = strlen(sep);
1727                 b = NULL;
1728                 len = 0;
1729                 for (i = 0; i < n; i++) {
1730                         s = getvar_s(&Fields[i]);
1731                         l = strlen(s);
1732                         if (b) {
1733                                 memcpy(b+len, sep, sl);
1734                                 len += sl;
1735                         }
1736                         b = qrealloc(b, len+l+sl, &bsize);
1737                         memcpy(b+len, s, l);
1738                         len += l;
1739                 }
1740                 if (b)
1741                         b[len] = '\0';
1742                 setvar_p(intvar[F0], b);
1743                 is_f0_split = TRUE;
1744
1745         } else if (v == intvar[F0]) {
1746                 is_f0_split = FALSE;
1747
1748         } else if (v == intvar[FS]) {
1749                 mk_splitter(getvar_s(v), &fsplitter);
1750
1751         } else if (v == intvar[RS]) {
1752                 mk_splitter(getvar_s(v), &rsplitter);
1753
1754         } else if (v == intvar[IGNORECASE]) {
1755                 icase = istrue(v);
1756
1757         } else {                                /* $n */
1758                 n = getvar_i(intvar[NF]);
1759                 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1760                 /* right here v is invalid. Just to note... */
1761         }
1762 }
1763
1764 /* step through func/builtin/etc arguments */
1765 static node *nextarg(node **pn)
1766 {
1767         node *n;
1768
1769         n = *pn;
1770         if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1771                 *pn = n->r.n;
1772                 n = n->l.n;
1773         } else {
1774                 *pn = NULL;
1775         }
1776         return n;
1777 }
1778
1779 static void hashwalk_init(var *v, xhash *array)
1780 {
1781         hash_item *hi;
1782         unsigned i;
1783         walker_list *w;
1784         walker_list *prev_walker;
1785
1786         if (v->type & VF_WALK) {
1787                 prev_walker = v->x.walker;
1788         } else {
1789                 v->type |= VF_WALK;
1790                 prev_walker = NULL;
1791         }
1792         debug_printf_walker("hashwalk_init: prev_walker:%p\n", prev_walker);
1793
1794         w = v->x.walker = xzalloc(sizeof(*w) + array->glen + 1); /* why + 1? */
1795         debug_printf_walker(" walker@%p=%p\n", &v->x.walker, w);
1796         w->cur = w->end = w->wbuf;
1797         w->prev = prev_walker;
1798         for (i = 0; i < array->csize; i++) {
1799                 hi = array->items[i];
1800                 while (hi) {
1801                         strcpy(w->end, hi->name);
1802                         nextword(&w->end);
1803                         hi = hi->next;
1804                 }
1805         }
1806 }
1807
1808 static int hashwalk_next(var *v)
1809 {
1810         walker_list *w = v->x.walker;
1811
1812         if (w->cur >= w->end) {
1813                 walker_list *prev_walker = w->prev;
1814
1815                 debug_printf_walker("end of iteration, free(walker@%p:%p), prev_walker:%p\n", &v->x.walker, w, prev_walker);
1816                 free(w);
1817                 v->x.walker = prev_walker;
1818                 return FALSE;
1819         }
1820
1821         setvar_s(v, nextword(&w->cur));
1822         return TRUE;
1823 }
1824
1825 /* evaluate node, return 1 when result is true, 0 otherwise */
1826 static int ptest(node *pattern)
1827 {
1828         /* ptest__v is "static": to save stack space? */
1829         return istrue(evaluate(pattern, &G.ptest__v));
1830 }
1831
1832 /* read next record from stream rsm into a variable v */
1833 static int awk_getline(rstream *rsm, var *v)
1834 {
1835         char *b;
1836         regmatch_t pmatch[2];
1837         int size, a, p, pp = 0;
1838         int fd, so, eo, r, rp;
1839         char c, *m, *s;
1840
1841         /* we're using our own buffer since we need access to accumulating
1842          * characters
1843          */
1844         fd = fileno(rsm->F);
1845         m = rsm->buffer;
1846         a = rsm->adv;
1847         p = rsm->pos;
1848         size = rsm->size;
1849         c = (char) rsplitter.n.info;
1850         rp = 0;
1851
1852         if (!m)
1853                 m = qrealloc(m, 256, &size);
1854
1855         do {
1856                 b = m + a;
1857                 so = eo = p;
1858                 r = 1;
1859                 if (p > 0) {
1860                         if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1861                                 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1862                                                         b, 1, pmatch, 0) == 0) {
1863                                         so = pmatch[0].rm_so;
1864                                         eo = pmatch[0].rm_eo;
1865                                         if (b[eo] != '\0')
1866                                                 break;
1867                                 }
1868                         } else if (c != '\0') {
1869                                 s = strchr(b+pp, c);
1870                                 if (!s)
1871                                         s = memchr(b+pp, '\0', p - pp);
1872                                 if (s) {
1873                                         so = eo = s-b;
1874                                         eo++;
1875                                         break;
1876                                 }
1877                         } else {
1878                                 while (b[rp] == '\n')
1879                                         rp++;
1880                                 s = strstr(b+rp, "\n\n");
1881                                 if (s) {
1882                                         so = eo = s-b;
1883                                         while (b[eo] == '\n')
1884                                                 eo++;
1885                                         if (b[eo] != '\0')
1886                                                 break;
1887                                 }
1888                         }
1889                 }
1890
1891                 if (a > 0) {
1892                         memmove(m, m+a, p+1);
1893                         b = m;
1894                         a = 0;
1895                 }
1896
1897                 m = qrealloc(m, a+p+128, &size);
1898                 b = m + a;
1899                 pp = p;
1900                 p += safe_read(fd, b+p, size-p-1);
1901                 if (p < pp) {
1902                         p = 0;
1903                         r = 0;
1904                         setvar_i(intvar[ERRNO], errno);
1905                 }
1906                 b[p] = '\0';
1907
1908         } while (p > pp);
1909
1910         if (p == 0) {
1911                 r--;
1912         } else {
1913                 c = b[so]; b[so] = '\0';
1914                 setvar_s(v, b+rp);
1915                 v->type |= VF_USER;
1916                 b[so] = c;
1917                 c = b[eo]; b[eo] = '\0';
1918                 setvar_s(intvar[RT], b+so);
1919                 b[eo] = c;
1920         }
1921
1922         rsm->buffer = m;
1923         rsm->adv = a + eo;
1924         rsm->pos = p - eo;
1925         rsm->size = size;
1926
1927         return r;
1928 }
1929
1930 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1931 {
1932         int r = 0;
1933         char c;
1934         const char *s = format;
1935
1936         if (int_as_int && n == (int)n) {
1937                 r = snprintf(b, size, "%d", (int)n);
1938         } else {
1939                 do { c = *s; } while (c && *++s);
1940                 if (strchr("diouxX", c)) {
1941                         r = snprintf(b, size, format, (int)n);
1942                 } else if (strchr("eEfgG", c)) {
1943                         r = snprintf(b, size, format, n);
1944                 } else {
1945                         syntax_error(EMSG_INV_FMT);
1946                 }
1947         }
1948         return r;
1949 }
1950
1951 /* formatted output into an allocated buffer, return ptr to buffer */
1952 static char *awk_printf(node *n)
1953 {
1954         char *b = NULL;
1955         char *fmt, *s, *f;
1956         const char *s1;
1957         int i, j, incr, bsize;
1958         char c, c1;
1959         var *v, *arg;
1960
1961         v = nvalloc(1);
1962         fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1963
1964         i = 0;
1965         while (*f) {
1966                 s = f;
1967                 while (*f && (*f != '%' || *++f == '%'))
1968                         f++;
1969                 while (*f && !isalpha(*f)) {
1970                         if (*f == '*')
1971                                 syntax_error("%*x formats are not supported");
1972                         f++;
1973                 }
1974
1975                 incr = (f - s) + MAXVARFMT;
1976                 b = qrealloc(b, incr + i, &bsize);
1977                 c = *f;
1978                 if (c != '\0')
1979                         f++;
1980                 c1 = *f;
1981                 *f = '\0';
1982                 arg = evaluate(nextarg(&n), v);
1983
1984                 j = i;
1985                 if (c == 'c' || !c) {
1986                         i += sprintf(b+i, s, is_numeric(arg) ?
1987                                         (char)getvar_i(arg) : *getvar_s(arg));
1988                 } else if (c == 's') {
1989                         s1 = getvar_s(arg);
1990                         b = qrealloc(b, incr+i+strlen(s1), &bsize);
1991                         i += sprintf(b+i, s, s1);
1992                 } else {
1993                         i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1994                 }
1995                 *f = c1;
1996
1997                 /* if there was an error while sprintf, return value is negative */
1998                 if (i < j)
1999                         i = j;
2000         }
2001
2002         free(fmt);
2003         nvfree(v);
2004         b = xrealloc(b, i + 1);
2005         b[i] = '\0';
2006         return b;
2007 }
2008
2009 /* Common substitution routine.
2010  * Replace (nm)'th substring of (src) that matches (rn) with (repl),
2011  * store result into (dest), return number of substitutions.
2012  * If nm = 0, replace all matches.
2013  * If src or dst is NULL, use $0.
2014  * If subexp != 0, enable subexpression matching (\1-\9).
2015  */
2016 static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int subexp)
2017 {
2018         char *resbuf;
2019         const char *sp;
2020         int match_no, residx, replen, resbufsize;
2021         int regexec_flags;
2022         regmatch_t pmatch[10];
2023         regex_t sreg, *regex;
2024
2025         resbuf = NULL;
2026         residx = 0;
2027         match_no = 0;
2028         regexec_flags = 0;
2029         regex = as_regex(rn, &sreg);
2030         sp = getvar_s(src ? src : intvar[F0]);
2031         replen = strlen(repl);
2032         while (regexec(regex, sp, 10, pmatch, regexec_flags) == 0) {
2033                 int so = pmatch[0].rm_so;
2034                 int eo = pmatch[0].rm_eo;
2035
2036                 //bb_error_msg("match %u: [%u,%u] '%s'%p", match_no+1, so, eo, sp,sp);
2037                 resbuf = qrealloc(resbuf, residx + eo + replen, &resbufsize);
2038                 memcpy(resbuf + residx, sp, eo);
2039                 residx += eo;
2040                 if (++match_no >= nm) {
2041                         const char *s;
2042                         int nbs;
2043
2044                         /* replace */
2045                         residx -= (eo - so);
2046                         nbs = 0;
2047                         for (s = repl; *s; s++) {
2048                                 char c = resbuf[residx++] = *s;
2049                                 if (c == '\\') {
2050                                         nbs++;
2051                                         continue;
2052                                 }
2053                                 if (c == '&' || (subexp && c >= '0' && c <= '9')) {
2054                                         int j;
2055                                         residx -= ((nbs + 3) >> 1);
2056                                         j = 0;
2057                                         if (c != '&') {
2058                                                 j = c - '0';
2059                                                 nbs++;
2060                                         }
2061                                         if (nbs % 2) {
2062                                                 resbuf[residx++] = c;
2063                                         } else {
2064                                                 int n = pmatch[j].rm_eo - pmatch[j].rm_so;
2065                                                 resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize);
2066                                                 memcpy(resbuf + residx, sp + pmatch[j].rm_so, n);
2067                                                 residx += n;
2068                                         }
2069                                 }
2070                                 nbs = 0;
2071                         }
2072                 }
2073
2074                 regexec_flags = REG_NOTBOL;
2075                 sp += eo;
2076                 if (match_no == nm)
2077                         break;
2078                 if (eo == so) {
2079                         /* Empty match (e.g. "b*" will match anywhere).
2080                          * Advance by one char. */
2081 //BUG (bug 1333):
2082 //gsub(/\<b*/,"") on "abc" will reach this point, advance to "bc"
2083 //... and will erroneously match "b" even though it is NOT at the word start.
2084 //we need REG_NOTBOW but it does not exist...
2085 //TODO: if EXTRA_COMPAT=y, use GNU matching and re_search,
2086 //it should be able to do it correctly.
2087                         /* Subtle: this is safe only because
2088                          * qrealloc allocated at least one extra byte */
2089                         resbuf[residx] = *sp;
2090                         if (*sp == '\0')
2091                                 goto ret;
2092                         sp++;
2093                         residx++;
2094                 }
2095         }
2096
2097         resbuf = qrealloc(resbuf, residx + strlen(sp), &resbufsize);
2098         strcpy(resbuf + residx, sp);
2099  ret:
2100         //bb_error_msg("end sp:'%s'%p", sp,sp);
2101         setvar_p(dest ? dest : intvar[F0], resbuf);
2102         if (regex == &sreg)
2103                 regfree(regex);
2104         return match_no;
2105 }
2106
2107 static NOINLINE int do_mktime(const char *ds)
2108 {
2109         struct tm then;
2110         int count;
2111
2112         /*memset(&then, 0, sizeof(then)); - not needed */
2113         then.tm_isdst = -1; /* default is unknown */
2114
2115         /* manpage of mktime says these fields are ints,
2116          * so we can sscanf stuff directly into them */
2117         count = sscanf(ds, "%u %u %u %u %u %u %d",
2118                 &then.tm_year, &then.tm_mon, &then.tm_mday,
2119                 &then.tm_hour, &then.tm_min, &then.tm_sec,
2120                 &then.tm_isdst);
2121
2122         if (count < 6
2123          || (unsigned)then.tm_mon < 1
2124          || (unsigned)then.tm_year < 1900
2125         ) {
2126                 return -1;
2127         }
2128
2129         then.tm_mon -= 1;
2130         then.tm_year -= 1900;
2131
2132         return mktime(&then);
2133 }
2134
2135 static NOINLINE var *exec_builtin(node *op, var *res)
2136 {
2137 #define tspl (G.exec_builtin__tspl)
2138
2139         var *tv;
2140         node *an[4];
2141         var *av[4];
2142         const char *as[4];
2143         regmatch_t pmatch[2];
2144         regex_t sreg, *re;
2145         node *spl;
2146         uint32_t isr, info;
2147         int nargs;
2148         time_t tt;
2149         int i, l, ll, n;
2150
2151         tv = nvalloc(4);
2152         isr = info = op->info;
2153         op = op->l.n;
2154
2155         av[2] = av[3] = NULL;
2156         for (i = 0; i < 4 && op; i++) {
2157                 an[i] = nextarg(&op);
2158                 if (isr & 0x09000000)
2159                         av[i] = evaluate(an[i], &tv[i]);
2160                 if (isr & 0x08000000)
2161                         as[i] = getvar_s(av[i]);
2162                 isr >>= 1;
2163         }
2164
2165         nargs = i;
2166         if ((uint32_t)nargs < (info >> 30))
2167                 syntax_error(EMSG_TOO_FEW_ARGS);
2168
2169         info &= OPNMASK;
2170         switch (info) {
2171
2172         case B_a2:
2173 #if ENABLE_FEATURE_AWK_LIBM
2174                 setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1])));
2175 #else
2176                 syntax_error(EMSG_NO_MATH);
2177 #endif
2178                 break;
2179
2180         case B_sp: {
2181                 char *s, *s1;
2182
2183                 if (nargs > 2) {
2184                         spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2185                                 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2186                 } else {
2187                         spl = &fsplitter.n;
2188                 }
2189
2190                 n = awk_split(as[0], spl, &s);
2191                 s1 = s;
2192                 clear_array(iamarray(av[1]));
2193                 for (i = 1; i <= n; i++)
2194                         setari_u(av[1], i, nextword(&s));
2195                 free(s1);
2196                 setvar_i(res, n);
2197                 break;
2198         }
2199
2200         case B_ss: {
2201                 char *s;
2202
2203                 l = strlen(as[0]);
2204                 i = getvar_i(av[1]) - 1;
2205                 if (i > l)
2206                         i = l;
2207                 if (i < 0)
2208                         i = 0;
2209                 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2210                 if (n < 0)
2211                         n = 0;
2212                 s = xstrndup(as[0]+i, n);
2213                 setvar_p(res, s);
2214                 break;
2215         }
2216
2217         /* Bitwise ops must assume that operands are unsigned. GNU Awk 3.1.5:
2218          * awk '{ print or(-1,1) }' gives "4.29497e+09", not "-2.xxxe+09" */
2219         case B_an:
2220                 setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1]));
2221                 break;
2222
2223         case B_co:
2224                 setvar_i(res, ~getvar_i_int(av[0]));
2225                 break;
2226
2227         case B_ls:
2228                 setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1]));
2229                 break;
2230
2231         case B_or:
2232                 setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1]));
2233                 break;
2234
2235         case B_rs:
2236                 setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1]));
2237                 break;
2238
2239         case B_xo:
2240                 setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1]));
2241                 break;
2242
2243         case B_lo:
2244         case B_up: {
2245                 char *s, *s1;
2246                 s1 = s = xstrdup(as[0]);
2247                 while (*s1) {
2248                         //*s1 = (info == B_up) ? toupper(*s1) : tolower(*s1);
2249                         if ((unsigned char)((*s1 | 0x20) - 'a') <= ('z' - 'a'))
2250                                 *s1 = (info == B_up) ? (*s1 & 0xdf) : (*s1 | 0x20);
2251                         s1++;
2252                 }
2253                 setvar_p(res, s);
2254                 break;
2255         }
2256
2257         case B_ix:
2258                 n = 0;
2259                 ll = strlen(as[1]);
2260                 l = strlen(as[0]) - ll;
2261                 if (ll > 0 && l >= 0) {
2262                         if (!icase) {
2263                                 char *s = strstr(as[0], as[1]);
2264                                 if (s)
2265                                         n = (s - as[0]) + 1;
2266                         } else {
2267                                 /* this piece of code is terribly slow and
2268                                  * really should be rewritten
2269                                  */
2270                                 for (i = 0; i <= l; i++) {
2271                                         if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2272                                                 n = i+1;
2273                                                 break;
2274                                         }
2275                                 }
2276                         }
2277                 }
2278                 setvar_i(res, n);
2279                 break;
2280
2281         case B_ti:
2282                 if (nargs > 1)
2283                         tt = getvar_i(av[1]);
2284                 else
2285                         time(&tt);
2286                 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2287                 i = strftime(g_buf, MAXVARFMT,
2288                         ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2289                         localtime(&tt));
2290                 g_buf[i] = '\0';
2291                 setvar_s(res, g_buf);
2292                 break;
2293
2294         case B_mt:
2295                 setvar_i(res, do_mktime(as[0]));
2296                 break;
2297
2298         case B_ma:
2299                 re = as_regex(an[1], &sreg);
2300                 n = regexec(re, as[0], 1, pmatch, 0);
2301                 if (n == 0) {
2302                         pmatch[0].rm_so++;
2303                         pmatch[0].rm_eo++;
2304                 } else {
2305                         pmatch[0].rm_so = 0;
2306                         pmatch[0].rm_eo = -1;
2307                 }
2308                 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2309                 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2310                 setvar_i(res, pmatch[0].rm_so);
2311                 if (re == &sreg)
2312                         regfree(re);
2313                 break;
2314
2315         case B_ge:
2316                 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2317                 break;
2318
2319         case B_gs:
2320                 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2321                 break;
2322
2323         case B_su:
2324                 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2325                 break;
2326         }
2327
2328         nvfree(tv);
2329         return res;
2330 #undef tspl
2331 }
2332
2333 /*
2334  * Evaluate node - the heart of the program. Supplied with subtree
2335  * and place where to store result. returns ptr to result.
2336  */
2337 #define XC(n) ((n) >> 8)
2338
2339 static var *evaluate(node *op, var *res)
2340 {
2341 /* This procedure is recursive so we should count every byte */
2342 #define fnargs (G.evaluate__fnargs)
2343 /* seed is initialized to 1 */
2344 #define seed   (G.evaluate__seed)
2345 #define sreg   (G.evaluate__sreg)
2346
2347         var *v1;
2348
2349         if (!op)
2350                 return setvar_s(res, NULL);
2351
2352         v1 = nvalloc(2);
2353
2354         while (op) {
2355                 struct {
2356                         var *v;
2357                         const char *s;
2358                 } L = L; /* for compiler */
2359                 struct {
2360                         var *v;
2361                         const char *s;
2362                 } R = R;
2363                 double L_d = L_d;
2364                 uint32_t opinfo;
2365                 int opn;
2366                 node *op1;
2367
2368                 opinfo = op->info;
2369                 opn = (opinfo & OPNMASK);
2370                 g_lineno = op->lineno;
2371                 op1 = op->l.n;
2372                 debug_printf_eval("opinfo:%08x opn:%08x XC:%x\n", opinfo, opn, XC(opinfo & OPCLSMASK));
2373
2374                 /* execute inevitable things */
2375                 if (opinfo & OF_RES1)
2376                         L.v = evaluate(op1, v1);
2377                 if (opinfo & OF_RES2)
2378                         R.v = evaluate(op->r.n, v1+1);
2379                 if (opinfo & OF_STR1) {
2380                         L.s = getvar_s(L.v);
2381                         debug_printf_eval("L.s:'%s'\n", L.s);
2382                 }
2383                 if (opinfo & OF_STR2) {
2384                         R.s = getvar_s(R.v);
2385                         debug_printf_eval("R.s:'%s'\n", R.s);
2386                 }
2387                 if (opinfo & OF_NUM1) {
2388                         L_d = getvar_i(L.v);
2389                         debug_printf_eval("L_d:%f\n", L_d);
2390                 }
2391
2392                 switch (XC(opinfo & OPCLSMASK)) {
2393
2394                 /* -- iterative node type -- */
2395
2396                 /* test pattern */
2397                 case XC( OC_TEST ):
2398                         if ((op1->info & OPCLSMASK) == OC_COMMA) {
2399                                 /* it's range pattern */
2400                                 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2401                                         op->info |= OF_CHECKED;
2402                                         if (ptest(op1->r.n))
2403                                                 op->info &= ~OF_CHECKED;
2404                                         op = op->a.n;
2405                                 } else {
2406                                         op = op->r.n;
2407                                 }
2408                         } else {
2409                                 op = ptest(op1) ? op->a.n : op->r.n;
2410                         }
2411                         break;
2412
2413                 /* just evaluate an expression, also used as unconditional jump */
2414                 case XC( OC_EXEC ):
2415                         break;
2416
2417                 /* branch, used in if-else and various loops */
2418                 case XC( OC_BR ):
2419                         op = istrue(L.v) ? op->a.n : op->r.n;
2420                         break;
2421
2422                 /* initialize for-in loop */
2423                 case XC( OC_WALKINIT ):
2424                         hashwalk_init(L.v, iamarray(R.v));
2425                         break;
2426
2427                 /* get next array item */
2428                 case XC( OC_WALKNEXT ):
2429                         op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2430                         break;
2431
2432                 case XC( OC_PRINT ):
2433                 case XC( OC_PRINTF ): {
2434                         FILE *F = stdout;
2435
2436                         if (op->r.n) {
2437                                 rstream *rsm = newfile(R.s);
2438                                 if (!rsm->F) {
2439                                         if (opn == '|') {
2440                                                 rsm->F = popen(R.s, "w");
2441                                                 if (rsm->F == NULL)
2442                                                         bb_perror_msg_and_die("popen");
2443                                                 rsm->is_pipe = 1;
2444                                         } else {
2445                                                 rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2446                                         }
2447                                 }
2448                                 F = rsm->F;
2449                         }
2450
2451                         if ((opinfo & OPCLSMASK) == OC_PRINT) {
2452                                 if (!op1) {
2453                                         fputs(getvar_s(intvar[F0]), F);
2454                                 } else {
2455                                         while (op1) {
2456                                                 var *v = evaluate(nextarg(&op1), v1);
2457                                                 if (v->type & VF_NUMBER) {
2458                                                         fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2459                                                                         getvar_i(v), TRUE);
2460                                                         fputs(g_buf, F);
2461                                                 } else {
2462                                                         fputs(getvar_s(v), F);
2463                                                 }
2464
2465                                                 if (op1)
2466                                                         fputs(getvar_s(intvar[OFS]), F);
2467                                         }
2468                                 }
2469                                 fputs(getvar_s(intvar[ORS]), F);
2470
2471                         } else {        /* OC_PRINTF */
2472                                 char *s = awk_printf(op1);
2473                                 fputs(s, F);
2474                                 free(s);
2475                         }
2476                         fflush(F);
2477                         break;
2478                 }
2479
2480                 case XC( OC_DELETE ): {
2481                         uint32_t info = op1->info & OPCLSMASK;
2482                         var *v;
2483
2484                         if (info == OC_VAR) {
2485                                 v = op1->l.v;
2486                         } else if (info == OC_FNARG) {
2487                                 v = &fnargs[op1->l.aidx];
2488                         } else {
2489                                 syntax_error(EMSG_NOT_ARRAY);
2490                         }
2491
2492                         if (op1->r.n) {
2493                                 const char *s;
2494                                 clrvar(L.v);
2495                                 s = getvar_s(evaluate(op1->r.n, v1));
2496                                 hash_remove(iamarray(v), s);
2497                         } else {
2498                                 clear_array(iamarray(v));
2499                         }
2500                         break;
2501                 }
2502
2503                 case XC( OC_NEWSOURCE ):
2504                         g_progname = op->l.new_progname;
2505                         break;
2506
2507                 case XC( OC_RETURN ):
2508                         copyvar(res, L.v);
2509                         break;
2510
2511                 case XC( OC_NEXTFILE ):
2512                         nextfile = TRUE;
2513                 case XC( OC_NEXT ):
2514                         nextrec = TRUE;
2515                 case XC( OC_DONE ):
2516                         clrvar(res);
2517                         break;
2518
2519                 case XC( OC_EXIT ):
2520                         awk_exit(L_d);
2521
2522                 /* -- recursive node type -- */
2523
2524                 case XC( OC_VAR ):
2525                         L.v = op->l.v;
2526                         if (L.v == intvar[NF])
2527                                 split_f0();
2528                         goto v_cont;
2529
2530                 case XC( OC_FNARG ):
2531                         L.v = &fnargs[op->l.aidx];
2532  v_cont:
2533                         res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2534                         break;
2535
2536                 case XC( OC_IN ):
2537                         setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2538                         break;
2539
2540                 case XC( OC_REGEXP ):
2541                         op1 = op;
2542                         L.s = getvar_s(intvar[F0]);
2543                         goto re_cont;
2544
2545                 case XC( OC_MATCH ):
2546                         op1 = op->r.n;
2547  re_cont:
2548                         {
2549                                 regex_t *re = as_regex(op1, &sreg);
2550                                 int i = regexec(re, L.s, 0, NULL, 0);
2551                                 if (re == &sreg)
2552                                         regfree(re);
2553                                 setvar_i(res, (i == 0) ^ (opn == '!'));
2554                         }
2555                         break;
2556
2557                 case XC( OC_MOVE ):
2558                         debug_printf_eval("MOVE\n");
2559                         /* if source is a temporary string, jusk relink it to dest */
2560 //Disabled: if R.v is numeric but happens to have cached R.v->string,
2561 //then L.v ends up being a string, which is wrong
2562 //                      if (R.v == v1+1 && R.v->string) {
2563 //                              res = setvar_p(L.v, R.v->string);
2564 //                              R.v->string = NULL;
2565 //                      } else {
2566                                 res = copyvar(L.v, R.v);
2567 //                      }
2568                         break;
2569
2570                 case XC( OC_TERNARY ):
2571                         if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2572                                 syntax_error(EMSG_POSSIBLE_ERROR);
2573                         res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2574                         break;
2575
2576                 case XC( OC_FUNC ): {
2577                         var *vbeg, *v;
2578                         const char *sv_progname;
2579
2580                         if (!op->r.f->body.first)
2581                                 syntax_error(EMSG_UNDEF_FUNC);
2582
2583                         vbeg = v = nvalloc(op->r.f->nargs + 1);
2584                         while (op1) {
2585                                 var *arg = evaluate(nextarg(&op1), v1);
2586                                 copyvar(v, arg);
2587                                 v->type |= VF_CHILD;
2588                                 v->x.parent = arg;
2589                                 if (++v - vbeg >= op->r.f->nargs)
2590                                         break;
2591                         }
2592
2593                         v = fnargs;
2594                         fnargs = vbeg;
2595                         sv_progname = g_progname;
2596
2597                         res = evaluate(op->r.f->body.first, res);
2598
2599                         g_progname = sv_progname;
2600                         nvfree(fnargs);
2601                         fnargs = v;
2602
2603                         break;
2604                 }
2605
2606                 case XC( OC_GETLINE ):
2607                 case XC( OC_PGETLINE ): {
2608                         rstream *rsm;
2609                         int i;
2610
2611                         if (op1) {
2612                                 rsm = newfile(L.s);
2613                                 if (!rsm->F) {
2614                                         if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2615                                                 rsm->F = popen(L.s, "r");
2616                                                 rsm->is_pipe = TRUE;
2617                                         } else {
2618                                                 rsm->F = fopen_for_read(L.s);           /* not xfopen! */
2619                                         }
2620                                 }
2621                         } else {
2622                                 if (!iF)
2623                                         iF = next_input_file();
2624                                 rsm = iF;
2625                         }
2626
2627                         if (!rsm->F) {
2628                                 setvar_i(intvar[ERRNO], errno);
2629                                 setvar_i(res, -1);
2630                                 break;
2631                         }
2632
2633                         if (!op->r.n)
2634                                 R.v = intvar[F0];
2635
2636                         i = awk_getline(rsm, R.v);
2637                         if (i > 0 && !op1) {
2638                                 incvar(intvar[FNR]);
2639                                 incvar(intvar[NR]);
2640                         }
2641                         setvar_i(res, i);
2642                         break;
2643                 }
2644
2645                 /* simple builtins */
2646                 case XC( OC_FBLTIN ): {
2647                         int i;
2648                         rstream *rsm;
2649                         double R_d = R_d; /* for compiler */
2650
2651                         switch (opn) {
2652                         case F_in:
2653                                 R_d = (int)L_d;
2654                                 break;
2655
2656                         case F_rn:
2657                                 R_d = (double)rand() / (double)RAND_MAX;
2658                                 break;
2659 #if ENABLE_FEATURE_AWK_LIBM
2660                         case F_co:
2661                                 R_d = cos(L_d);
2662                                 break;
2663
2664                         case F_ex:
2665                                 R_d = exp(L_d);
2666                                 break;
2667
2668                         case F_lg:
2669                                 R_d = log(L_d);
2670                                 break;
2671
2672                         case F_si:
2673                                 R_d = sin(L_d);
2674                                 break;
2675
2676                         case F_sq:
2677                                 R_d = sqrt(L_d);
2678                                 break;
2679 #else
2680                         case F_co:
2681                         case F_ex:
2682                         case F_lg:
2683                         case F_si:
2684                         case F_sq:
2685                                 syntax_error(EMSG_NO_MATH);
2686                                 break;
2687 #endif
2688                         case F_sr:
2689                                 R_d = (double)seed;
2690                                 seed = op1 ? (unsigned)L_d : (unsigned)time(NULL);
2691                                 srand(seed);
2692                                 break;
2693
2694                         case F_ti:
2695                                 R_d = time(NULL);
2696                                 break;
2697
2698                         case F_le:
2699                                 if (!op1)
2700                                         L.s = getvar_s(intvar[F0]);
2701                                 R_d = strlen(L.s);
2702                                 break;
2703
2704                         case F_sy:
2705                                 fflush_all();
2706                                 R_d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2707                                                 ? (system(L.s) >> 8) : 0;
2708                                 break;
2709
2710                         case F_ff:
2711                                 if (!op1) {
2712                                         fflush(stdout);
2713                                 } else if (L.s && *L.s) {
2714                                         rsm = newfile(L.s);
2715                                         fflush(rsm->F);
2716                                 } else {
2717                                         fflush_all();
2718                                 }
2719                                 break;
2720
2721                         case F_cl:
2722                                 i = 0;
2723                                 rsm = (rstream *)hash_search(fdhash, L.s);
2724                                 if (rsm) {
2725                                         i = rsm->is_pipe ? pclose(rsm->F) : fclose(rsm->F);
2726                                         free(rsm->buffer);
2727                                         hash_remove(fdhash, L.s);
2728                                 }
2729                                 if (i != 0)
2730                                         setvar_i(intvar[ERRNO], errno);
2731                                 R_d = (double)i;
2732                                 break;
2733                         }
2734                         setvar_i(res, R_d);
2735                         break;
2736                 }
2737
2738                 case XC( OC_BUILTIN ):
2739                         res = exec_builtin(op, res);
2740                         break;
2741
2742                 case XC( OC_SPRINTF ):
2743                         setvar_p(res, awk_printf(op1));
2744                         break;
2745
2746                 case XC( OC_UNARY ): {
2747                         double Ld, R_d;
2748
2749                         Ld = R_d = getvar_i(R.v);
2750                         switch (opn) {
2751                         case 'P':
2752                                 Ld = ++R_d;
2753                                 goto r_op_change;
2754                         case 'p':
2755                                 R_d++;
2756                                 goto r_op_change;
2757                         case 'M':
2758                                 Ld = --R_d;
2759                                 goto r_op_change;
2760                         case 'm':
2761                                 R_d--;
2762  r_op_change:
2763                                 setvar_i(R.v, R_d);
2764                                 break;
2765                         case '!':
2766                                 Ld = !istrue(R.v);
2767                                 break;
2768                         case '-':
2769                                 Ld = -R_d;
2770                                 break;
2771                         }
2772                         setvar_i(res, Ld);
2773                         break;
2774                 }
2775
2776                 case XC( OC_FIELD ): {
2777                         int i = (int)getvar_i(R.v);
2778                         if (i == 0) {
2779                                 res = intvar[F0];
2780                         } else {
2781                                 split_f0();
2782                                 if (i > nfields)
2783                                         fsrealloc(i);
2784                                 res = &Fields[i - 1];
2785                         }
2786                         break;
2787                 }
2788
2789                 /* concatenation (" ") and index joining (",") */
2790                 case XC( OC_CONCAT ):
2791                 case XC( OC_COMMA ): {
2792                         const char *sep = "";
2793                         if ((opinfo & OPCLSMASK) == OC_COMMA)
2794                                 sep = getvar_s(intvar[SUBSEP]);
2795                         setvar_p(res, xasprintf("%s%s%s", L.s, sep, R.s));
2796                         break;
2797                 }
2798
2799                 case XC( OC_LAND ):
2800                         setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2801                         break;
2802
2803                 case XC( OC_LOR ):
2804                         setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2805                         break;
2806
2807                 case XC( OC_BINARY ):
2808                 case XC( OC_REPLACE ): {
2809                         double R_d = getvar_i(R.v);
2810                         debug_printf_eval("BINARY/REPLACE: R_d:%f opn:%c\n", R_d, opn);
2811                         switch (opn) {
2812                         case '+':
2813                                 L_d += R_d;
2814                                 break;
2815                         case '-':
2816                                 L_d -= R_d;
2817                                 break;
2818                         case '*':
2819                                 L_d *= R_d;
2820                                 break;
2821                         case '/':
2822                                 if (R_d == 0)
2823                                         syntax_error(EMSG_DIV_BY_ZERO);
2824                                 L_d /= R_d;
2825                                 break;
2826                         case '&':
2827 #if ENABLE_FEATURE_AWK_LIBM
2828                                 L_d = pow(L_d, R_d);
2829 #else
2830                                 syntax_error(EMSG_NO_MATH);
2831 #endif
2832                                 break;
2833                         case '%':
2834                                 if (R_d == 0)
2835                                         syntax_error(EMSG_DIV_BY_ZERO);
2836                                 L_d -= (int)(L_d / R_d) * R_d;
2837                                 break;
2838                         }
2839                         debug_printf_eval("BINARY/REPLACE result:%f\n", L_d);
2840                         res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : L.v, L_d);
2841                         break;
2842                 }
2843
2844                 case XC( OC_COMPARE ): {
2845                         int i = i; /* for compiler */
2846                         double Ld;
2847
2848                         if (is_numeric(L.v) && is_numeric(R.v)) {
2849                                 Ld = getvar_i(L.v) - getvar_i(R.v);
2850                         } else {
2851                                 const char *l = getvar_s(L.v);
2852                                 const char *r = getvar_s(R.v);
2853                                 Ld = icase ? strcasecmp(l, r) : strcmp(l, r);
2854                         }
2855                         switch (opn & 0xfe) {
2856                         case 0:
2857                                 i = (Ld > 0);
2858                                 break;
2859                         case 2:
2860                                 i = (Ld >= 0);
2861                                 break;
2862                         case 4:
2863                                 i = (Ld == 0);
2864                                 break;
2865                         }
2866                         setvar_i(res, (i == 0) ^ (opn & 1));
2867                         break;
2868                 }
2869
2870                 default:
2871                         syntax_error(EMSG_POSSIBLE_ERROR);
2872                 }
2873                 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2874                         op = op->a.n;
2875                 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2876                         break;
2877                 if (nextrec)
2878                         break;
2879         } /* while (op) */
2880
2881         nvfree(v1);
2882         return res;
2883 #undef fnargs
2884 #undef seed
2885 #undef sreg
2886 }
2887
2888
2889 /* -------- main & co. -------- */
2890
2891 static int awk_exit(int r)
2892 {
2893         var tv;
2894         unsigned i;
2895         hash_item *hi;
2896
2897         zero_out_var(&tv);
2898
2899         if (!exiting) {
2900                 exiting = TRUE;
2901                 nextrec = FALSE;
2902                 evaluate(endseq.first, &tv);
2903         }
2904
2905         /* waiting for children */
2906         for (i = 0; i < fdhash->csize; i++) {
2907                 hi = fdhash->items[i];
2908                 while (hi) {
2909                         if (hi->data.rs.F && hi->data.rs.is_pipe)
2910                                 pclose(hi->data.rs.F);
2911                         hi = hi->next;
2912                 }
2913         }
2914
2915         exit(r);
2916 }
2917
2918 /* if expr looks like "var=value", perform assignment and return 1,
2919  * otherwise return 0 */
2920 static int is_assignment(const char *expr)
2921 {
2922         char *exprc, *s, *s0, *s1;
2923
2924         exprc = xstrdup(expr);
2925         if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2926                 free(exprc);
2927                 return FALSE;
2928         }
2929
2930         *s++ = '\0';
2931         s0 = s1 = s;
2932         while (*s)
2933                 *s1++ = nextchar(&s);
2934
2935         *s1 = '\0';
2936         setvar_u(newvar(exprc), s0);
2937         free(exprc);
2938         return TRUE;
2939 }
2940
2941 /* switch to next input file */
2942 static rstream *next_input_file(void)
2943 {
2944 #define rsm          (G.next_input_file__rsm)
2945 #define files_happen (G.next_input_file__files_happen)
2946
2947         FILE *F = NULL;
2948         const char *fname, *ind;
2949
2950         if (rsm.F)
2951                 fclose(rsm.F);
2952         rsm.F = NULL;
2953         rsm.pos = rsm.adv = 0;
2954
2955         do {
2956                 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
2957                         if (files_happen)
2958                                 return NULL;
2959                         fname = "-";
2960                         F = stdin;
2961                 } else {
2962                         ind = getvar_s(incvar(intvar[ARGIND]));
2963                         fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
2964                         if (fname && *fname && !is_assignment(fname))
2965                                 F = xfopen_stdin(fname);
2966                 }
2967         } while (!F);
2968
2969         files_happen = TRUE;
2970         setvar_s(intvar[FILENAME], fname);
2971         rsm.F = F;
2972         return &rsm;
2973 #undef rsm
2974 #undef files_happen
2975 }
2976
2977 int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
2978 int awk_main(int argc, char **argv)
2979 {
2980         unsigned opt;
2981         char *opt_F, *opt_W;
2982         llist_t *list_v = NULL;
2983         llist_t *list_f = NULL;
2984         int i, j;
2985         var *v;
2986         var tv;
2987         char **envp;
2988         char *vnames = (char *)vNames; /* cheat */
2989         char *vvalues = (char *)vValues;
2990
2991         INIT_G();
2992
2993         /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
2994          * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
2995         if (ENABLE_LOCALE_SUPPORT)
2996                 setlocale(LC_NUMERIC, "C");
2997
2998         zero_out_var(&tv);
2999
3000         /* allocate global buffer */
3001         g_buf = xmalloc(MAXVARFMT + 1);
3002
3003         vhash = hash_init();
3004         ahash = hash_init();
3005         fdhash = hash_init();
3006         fnhash = hash_init();
3007
3008         /* initialize variables */
3009         for (i = 0; *vnames; i++) {
3010                 intvar[i] = v = newvar(nextword(&vnames));
3011                 if (*vvalues != '\377')
3012                         setvar_s(v, nextword(&vvalues));
3013                 else
3014                         setvar_i(v, 0);
3015
3016                 if (*vnames == '*') {
3017                         v->type |= VF_SPECIAL;
3018                         vnames++;
3019                 }
3020         }
3021
3022         handle_special(intvar[FS]);
3023         handle_special(intvar[RS]);
3024
3025         newfile("/dev/stdin")->F = stdin;
3026         newfile("/dev/stdout")->F = stdout;
3027         newfile("/dev/stderr")->F = stderr;
3028
3029         /* Huh, people report that sometimes environ is NULL. Oh well. */
3030         if (environ) for (envp = environ; *envp; envp++) {
3031                 /* environ is writable, thus we don't strdup it needlessly */
3032                 char *s = *envp;
3033                 char *s1 = strchr(s, '=');
3034                 if (s1) {
3035                         *s1 = '\0';
3036                         /* Both findvar and setvar_u take const char*
3037                          * as 2nd arg -> environment is not trashed */
3038                         setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
3039                         *s1 = '=';
3040                 }
3041         }
3042         opt_complementary = "v::f::"; /* -v and -f can occur multiple times */
3043         opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, &opt_W);
3044         argv += optind;
3045         argc -= optind;
3046         if (opt & 0x1)
3047                 setvar_s(intvar[FS], opt_F); // -F
3048         while (list_v) { /* -v */
3049                 if (!is_assignment(llist_pop(&list_v)))
3050                         bb_show_usage();
3051         }
3052         if (list_f) { /* -f */
3053                 do {
3054                         char *s = NULL;
3055                         FILE *from_file;
3056
3057                         g_progname = llist_pop(&list_f);
3058                         from_file = xfopen_stdin(g_progname);
3059                         /* one byte is reserved for some trick in next_token */
3060                         for (i = j = 1; j > 0; i += j) {
3061                                 s = xrealloc(s, i + 4096);
3062                                 j = fread(s + i, 1, 4094, from_file);
3063                         }
3064                         s[i] = '\0';
3065                         fclose(from_file);
3066                         parse_program(s + 1);
3067                         free(s);
3068                 } while (list_f);
3069                 argc++;
3070         } else { // no -f: take program from 1st parameter
3071                 if (!argc)
3072                         bb_show_usage();
3073                 g_progname = "cmd. line";
3074                 parse_program(*argv++);
3075         }
3076         if (opt & 0x8) // -W
3077                 bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
3078
3079         /* fill in ARGV array */
3080         setvar_i(intvar[ARGC], argc);
3081         setari_u(intvar[ARGV], 0, "awk");
3082         i = 0;
3083         while (*argv)
3084                 setari_u(intvar[ARGV], ++i, *argv++);
3085
3086         evaluate(beginseq.first, &tv);
3087         if (!mainseq.first && !endseq.first)
3088                 awk_exit(EXIT_SUCCESS);
3089
3090         /* input file could already be opened in BEGIN block */
3091         if (!iF)
3092                 iF = next_input_file();
3093
3094         /* passing through input files */
3095         while (iF) {
3096                 nextfile = FALSE;
3097                 setvar_i(intvar[FNR], 0);
3098
3099                 while ((i = awk_getline(iF, intvar[F0])) > 0) {
3100                         nextrec = FALSE;
3101                         incvar(intvar[NR]);
3102                         incvar(intvar[FNR]);
3103                         evaluate(mainseq.first, &tv);
3104
3105                         if (nextfile)
3106                                 break;
3107                 }
3108
3109                 if (i < 0)
3110                         syntax_error(strerror(errno));
3111
3112                 iF = next_input_file();
3113         }
3114
3115         awk_exit(EXIT_SUCCESS);
3116         /*return 0;*/
3117 }