awk: fix define name collision with BSD headers
[oweals/busybox.git] / editors / awk.c
1 /* vi: set sw=4 ts=4: */
2 /*
3  * awk implementation for busybox
4  *
5  * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
6  *
7  * Licensed under GPLv2 or later, see file LICENSE in this source tree.
8  */
9
10 //usage:#define awk_trivial_usage
11 //usage:       "[OPTIONS] [AWK_PROGRAM] [FILE]..."
12 //usage:#define awk_full_usage "\n\n"
13 //usage:       "        -v VAR=VAL      Set variable"
14 //usage:     "\n        -F SEP          Use SEP as field separator"
15 //usage:     "\n        -f FILE         Read program from FILE"
16
17 #include "libbb.h"
18 #include "xregex.h"
19 #include <math.h>
20
21 /* This is a NOEXEC applet. Be very careful! */
22
23
24 /* If you comment out one of these below, it will be #defined later
25  * to perform debug printfs to stderr: */
26 #define debug_printf_walker(...)  do {} while (0)
27 #define debug_printf_eval(...)  do {} while (0)
28
29 #ifndef debug_printf_walker
30 # define debug_printf_walker(...) (fprintf(stderr, __VA_ARGS__))
31 #endif
32 #ifndef debug_printf_eval
33 # define debug_printf_eval(...) (fprintf(stderr, __VA_ARGS__))
34 #endif
35
36
37
38 #define MAXVARFMT       240
39 #define MINNVBLOCK      64
40
41 /* variable flags */
42 #define VF_NUMBER       0x0001  /* 1 = primary type is number */
43 #define VF_ARRAY        0x0002  /* 1 = it's an array */
44
45 #define VF_CACHED       0x0100  /* 1 = num/str value has cached str/num eq */
46 #define VF_USER         0x0200  /* 1 = user input (may be numeric string) */
47 #define VF_SPECIAL      0x0400  /* 1 = requires extra handling when changed */
48 #define VF_WALK         0x0800  /* 1 = variable has alloc'd x.walker list */
49 #define VF_FSTR         0x1000  /* 1 = var::string points to fstring buffer */
50 #define VF_CHILD        0x2000  /* 1 = function arg; x.parent points to source */
51 #define VF_DIRTY        0x4000  /* 1 = variable was set explicitly */
52
53 /* these flags are static, don't change them when value is changed */
54 #define VF_DONTTOUCH    (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
55
56 typedef struct walker_list {
57         char *end;
58         char *cur;
59         struct walker_list *prev;
60         char wbuf[1];
61 } walker_list;
62
63 /* Variable */
64 typedef struct var_s {
65         unsigned type;            /* flags */
66         double number;
67         char *string;
68         union {
69                 int aidx;               /* func arg idx (for compilation stage) */
70                 struct xhash_s *array;  /* array ptr */
71                 struct var_s *parent;   /* for func args, ptr to actual parameter */
72                 walker_list *walker;    /* list of array elements (for..in) */
73         } x;
74 } var;
75
76 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
77 typedef struct chain_s {
78         struct node_s *first;
79         struct node_s *last;
80         const char *programname;
81 } chain;
82
83 /* Function */
84 typedef struct func_s {
85         unsigned nargs;
86         struct chain_s body;
87 } func;
88
89 /* I/O stream */
90 typedef struct rstream_s {
91         FILE *F;
92         char *buffer;
93         int adv;
94         int size;
95         int pos;
96         smallint is_pipe;
97 } rstream;
98
99 typedef struct hash_item_s {
100         union {
101                 struct var_s v;         /* variable/array hash */
102                 struct rstream_s rs;    /* redirect streams hash */
103                 struct func_s f;        /* functions hash */
104         } data;
105         struct hash_item_s *next;       /* next in chain */
106         char name[1];                   /* really it's longer */
107 } hash_item;
108
109 typedef struct xhash_s {
110         unsigned nel;           /* num of elements */
111         unsigned csize;         /* current hash size */
112         unsigned nprime;        /* next hash size in PRIMES[] */
113         unsigned glen;          /* summary length of item names */
114         struct hash_item_s **items;
115 } xhash;
116
117 /* Tree node */
118 typedef struct node_s {
119         uint32_t info;
120         unsigned lineno;
121         union {
122                 struct node_s *n;
123                 var *v;
124                 int aidx;
125                 char *new_progname;
126                 regex_t *re;
127         } l;
128         union {
129                 struct node_s *n;
130                 regex_t *ire;
131                 func *f;
132         } r;
133         union {
134                 struct node_s *n;
135         } a;
136 } node;
137
138 /* Block of temporary variables */
139 typedef struct nvblock_s {
140         int size;
141         var *pos;
142         struct nvblock_s *prev;
143         struct nvblock_s *next;
144         var nv[];
145 } nvblock;
146
147 typedef struct tsplitter_s {
148         node n;
149         regex_t re[2];
150 } tsplitter;
151
152 /* simple token classes */
153 /* Order and hex values are very important!!!  See next_token() */
154 #define TC_SEQSTART      1                              /* ( */
155 #define TC_SEQTERM      (1 << 1)                /* ) */
156 #define TC_REGEXP       (1 << 2)                /* /.../ */
157 #define TC_OUTRDR       (1 << 3)                /* | > >> */
158 #define TC_UOPPOST      (1 << 4)                /* unary postfix operator */
159 #define TC_UOPPRE1      (1 << 5)                /* unary prefix operator */
160 #define TC_BINOPX       (1 << 6)                /* two-opnd operator */
161 #define TC_IN           (1 << 7)
162 #define TC_COMMA        (1 << 8)
163 #define TC_PIPE         (1 << 9)                /* input redirection pipe */
164 #define TC_UOPPRE2      (1 << 10)               /* unary prefix operator */
165 #define TC_ARRTERM      (1 << 11)               /* ] */
166 #define TC_GRPSTART     (1 << 12)               /* { */
167 #define TC_GRPTERM      (1 << 13)               /* } */
168 #define TC_SEMICOL      (1 << 14)
169 #define TC_NEWLINE      (1 << 15)
170 #define TC_STATX        (1 << 16)               /* ctl statement (for, next...) */
171 #define TC_WHILE        (1 << 17)
172 #define TC_ELSE         (1 << 18)
173 #define TC_BUILTIN      (1 << 19)
174 #define TC_GETLINE      (1 << 20)
175 #define TC_FUNCDECL     (1 << 21)               /* `function' `func' */
176 #define TC_BEGIN        (1 << 22)
177 #define TC_END          (1 << 23)
178 #define TC_EOF          (1 << 24)
179 #define TC_VARIABLE     (1 << 25)
180 #define TC_ARRAY        (1 << 26)
181 #define TC_FUNCTION     (1 << 27)
182 #define TC_STRING       (1 << 28)
183 #define TC_NUMBER       (1 << 29)
184
185 #define TC_UOPPRE  (TC_UOPPRE1 | TC_UOPPRE2)
186
187 /* combined token classes */
188 #define TC_BINOP   (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
189 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
190 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
191                    | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
192
193 #define TC_STATEMNT (TC_STATX | TC_WHILE)
194 #define TC_OPTERM  (TC_SEMICOL | TC_NEWLINE)
195
196 /* word tokens, cannot mean something else if not expected */
197 #define TC_WORD    (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
198                    | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
199
200 /* discard newlines after these */
201 #define TC_NOTERM  (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
202                    | TC_BINOP | TC_OPTERM)
203
204 /* what can expression begin with */
205 #define TC_OPSEQ   (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
206 /* what can group begin with */
207 #define TC_GRPSEQ  (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
208
209 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
210 /* operator is inserted between them */
211 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
212                    | TC_STRING | TC_NUMBER | TC_UOPPOST)
213 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
214
215 #define OF_RES1    0x010000
216 #define OF_RES2    0x020000
217 #define OF_STR1    0x040000
218 #define OF_STR2    0x080000
219 #define OF_NUM1    0x100000
220 #define OF_CHECKED 0x200000
221
222 /* combined operator flags */
223 #define xx      0
224 #define xV      OF_RES2
225 #define xS      (OF_RES2 | OF_STR2)
226 #define Vx      OF_RES1
227 #define VV      (OF_RES1 | OF_RES2)
228 #define Nx      (OF_RES1 | OF_NUM1)
229 #define NV      (OF_RES1 | OF_NUM1 | OF_RES2)
230 #define Sx      (OF_RES1 | OF_STR1)
231 #define SV      (OF_RES1 | OF_STR1 | OF_RES2)
232 #define SS      (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
233
234 #define OPCLSMASK 0xFF00
235 #define OPNMASK   0x007F
236
237 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
238  * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
239  * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
240  */
241 #undef P
242 #undef PRIMASK
243 #undef PRIMASK2
244 #define P(x)      (x << 24)
245 #define PRIMASK   0x7F000000
246 #define PRIMASK2  0x7E000000
247
248 /* Operation classes */
249
250 #define SHIFT_TIL_THIS  0x0600
251 #define RECUR_FROM_THIS 0x1000
252
253 enum {
254         OC_DELETE = 0x0100,     OC_EXEC = 0x0200,       OC_NEWSOURCE = 0x0300,
255         OC_PRINT = 0x0400,      OC_PRINTF = 0x0500,     OC_WALKINIT = 0x0600,
256
257         OC_BR = 0x0700,         OC_BREAK = 0x0800,      OC_CONTINUE = 0x0900,
258         OC_EXIT = 0x0a00,       OC_NEXT = 0x0b00,       OC_NEXTFILE = 0x0c00,
259         OC_TEST = 0x0d00,       OC_WALKNEXT = 0x0e00,
260
261         OC_BINARY = 0x1000,     OC_BUILTIN = 0x1100,    OC_COLON = 0x1200,
262         OC_COMMA = 0x1300,      OC_COMPARE = 0x1400,    OC_CONCAT = 0x1500,
263         OC_FBLTIN = 0x1600,     OC_FIELD = 0x1700,      OC_FNARG = 0x1800,
264         OC_FUNC = 0x1900,       OC_GETLINE = 0x1a00,    OC_IN = 0x1b00,
265         OC_LAND = 0x1c00,       OC_LOR = 0x1d00,        OC_MATCH = 0x1e00,
266         OC_MOVE = 0x1f00,       OC_PGETLINE = 0x2000,   OC_REGEXP = 0x2100,
267         OC_REPLACE = 0x2200,    OC_RETURN = 0x2300,     OC_SPRINTF = 0x2400,
268         OC_TERNARY = 0x2500,    OC_UNARY = 0x2600,      OC_VAR = 0x2700,
269         OC_DONE = 0x2800,
270
271         ST_IF = 0x3000,         ST_DO = 0x3100,         ST_FOR = 0x3200,
272         ST_WHILE = 0x3300
273 };
274
275 /* simple builtins */
276 enum {
277         F_in,   F_rn,   F_co,   F_ex,   F_lg,   F_si,   F_sq,   F_sr,
278         F_ti,   F_le,   F_sy,   F_ff,   F_cl
279 };
280
281 /* builtins */
282 enum {
283         B_a2,   B_ix,   B_ma,   B_sp,   B_ss,   B_ti,   B_mt,   B_lo,   B_up,
284         B_ge,   B_gs,   B_su,
285         B_an,   B_co,   B_ls,   B_or,   B_rs,   B_xo,
286 };
287
288 /* tokens and their corresponding info values */
289
290 #define NTC     "\377"  /* switch to next token class (tc<<1) */
291 #define NTCC    '\377'
292
293 #define OC_B  OC_BUILTIN
294
295 static const char tokenlist[] ALIGN1 =
296         "\1("         NTC
297         "\1)"         NTC
298         "\1/"         NTC                                   /* REGEXP */
299         "\2>>"        "\1>"         "\1|"       NTC         /* OUTRDR */
300         "\2++"        "\2--"        NTC                     /* UOPPOST */
301         "\2++"        "\2--"        "\1$"       NTC         /* UOPPRE1 */
302         "\2=="        "\1="         "\2+="      "\2-="      /* BINOPX */
303         "\2*="        "\2/="        "\2%="      "\2^="
304         "\1+"         "\1-"         "\3**="     "\2**"
305         "\1/"         "\1%"         "\1^"       "\1*"
306         "\2!="        "\2>="        "\2<="      "\1>"
307         "\1<"         "\2!~"        "\1~"       "\2&&"
308         "\2||"        "\1?"         "\1:"       NTC
309         "\2in"        NTC
310         "\1,"         NTC
311         "\1|"         NTC
312         "\1+"         "\1-"         "\1!"       NTC         /* UOPPRE2 */
313         "\1]"         NTC
314         "\1{"         NTC
315         "\1}"         NTC
316         "\1;"         NTC
317         "\1\n"        NTC
318         "\2if"        "\2do"        "\3for"     "\5break"   /* STATX */
319         "\10continue" "\6delete"    "\5print"
320         "\6printf"    "\4next"      "\10nextfile"
321         "\6return"    "\4exit"      NTC
322         "\5while"     NTC
323         "\4else"      NTC
324
325         "\3and"       "\5compl"     "\6lshift"  "\2or"
326         "\6rshift"    "\3xor"
327         "\5close"     "\6system"    "\6fflush"  "\5atan2"   /* BUILTIN */
328         "\3cos"       "\3exp"       "\3int"     "\3log"
329         "\4rand"      "\3sin"       "\4sqrt"    "\5srand"
330         "\6gensub"    "\4gsub"      "\5index"   "\6length"
331         "\5match"     "\5split"     "\7sprintf" "\3sub"
332         "\6substr"    "\7systime"   "\10strftime" "\6mktime"
333         "\7tolower"   "\7toupper"   NTC
334         "\7getline"   NTC
335         "\4func"      "\10function" NTC
336         "\5BEGIN"     NTC
337         "\3END"
338         /* compiler adds trailing "\0" */
339         ;
340
341 static const uint32_t tokeninfo[] = {
342         0,
343         0,
344         OC_REGEXP,
345         xS|'a',                  xS|'w',                  xS|'|',
346         OC_UNARY|xV|P(9)|'p',    OC_UNARY|xV|P(9)|'m',
347         OC_UNARY|xV|P(9)|'P',    OC_UNARY|xV|P(9)|'M',    OC_FIELD|xV|P(5),
348         OC_COMPARE|VV|P(39)|5,   OC_MOVE|VV|P(74),        OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
349         OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/', OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
350         OC_BINARY|NV|P(29)|'+',  OC_BINARY|NV|P(29)|'-',  OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
351         OC_BINARY|NV|P(25)|'/',  OC_BINARY|NV|P(25)|'%',  OC_BINARY|NV|P(15)|'&',  OC_BINARY|NV|P(25)|'*',
352         OC_COMPARE|VV|P(39)|4,   OC_COMPARE|VV|P(39)|3,   OC_COMPARE|VV|P(39)|0,   OC_COMPARE|VV|P(39)|1,
353         OC_COMPARE|VV|P(39)|2,   OC_MATCH|Sx|P(45)|'!',   OC_MATCH|Sx|P(45)|'~',   OC_LAND|Vx|P(55),
354         OC_LOR|Vx|P(59),         OC_TERNARY|Vx|P(64)|'?', OC_COLON|xx|P(67)|':',
355         OC_IN|SV|P(49), /* in */
356         OC_COMMA|SS|P(80),
357         OC_PGETLINE|SV|P(37),
358         OC_UNARY|xV|P(19)|'+',   OC_UNARY|xV|P(19)|'-',   OC_UNARY|xV|P(19)|'!',
359         0, /* ] */
360         0,
361         0,
362         0,
363         0, /* \n */
364         ST_IF,        ST_DO,        ST_FOR,      OC_BREAK,
365         OC_CONTINUE,  OC_DELETE|Vx, OC_PRINT,
366         OC_PRINTF,    OC_NEXT,      OC_NEXTFILE,
367         OC_RETURN|Vx, OC_EXIT|Nx,
368         ST_WHILE,
369         0, /* else */
370
371         OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
372         OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
373         OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
374         OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
375         OC_FBLTIN|F_rn,    OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
376         OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
377         OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF,        OC_B|B_su|P(0xb6),
378         OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti,    OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b),
379         OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
380         OC_GETLINE|SV|P(0),
381         0,                 0,
382         0,
383         0 /* END */
384 };
385
386 /* internal variable names and their initial values       */
387 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
388 enum {
389         CONVFMT,    OFMT,       FS,         OFS,
390         ORS,        RS,         RT,         FILENAME,
391         SUBSEP,     F0,         ARGIND,     ARGC,
392         ARGV,       ERRNO,      FNR,        NR,
393         NF,         IGNORECASE, ENVIRON,    NUM_INTERNAL_VARS
394 };
395
396 static const char vNames[] ALIGN1 =
397         "CONVFMT\0" "OFMT\0"    "FS\0*"     "OFS\0"
398         "ORS\0"     "RS\0*"     "RT\0"      "FILENAME\0"
399         "SUBSEP\0"  "$\0*"      "ARGIND\0"  "ARGC\0"
400         "ARGV\0"    "ERRNO\0"   "FNR\0"     "NR\0"
401         "NF\0*"     "IGNORECASE\0*" "ENVIRON\0" "\0";
402
403 static const char vValues[] ALIGN1 =
404         "%.6g\0"    "%.6g\0"    " \0"       " \0"
405         "\n\0"      "\n\0"      "\0"        "\0"
406         "\034\0"    "\0"        "\377";
407
408 /* hash size may grow to these values */
409 #define FIRST_PRIME 61
410 static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
411
412
413 /* Globals. Split in two parts so that first one is addressed
414  * with (mostly short) negative offsets.
415  * NB: it's unsafe to put members of type "double"
416  * into globals2 (gcc may fail to align them).
417  */
418 struct globals {
419         double t_double;
420         chain beginseq, mainseq, endseq;
421         chain *seq;
422         node *break_ptr, *continue_ptr;
423         rstream *iF;
424         xhash *vhash, *ahash, *fdhash, *fnhash;
425         const char *g_progname;
426         int g_lineno;
427         int nfields;
428         int maxfields; /* used in fsrealloc() only */
429         var *Fields;
430         nvblock *g_cb;
431         char *g_pos;
432         char *g_buf;
433         smallint icase;
434         smallint exiting;
435         smallint nextrec;
436         smallint nextfile;
437         smallint is_f0_split;
438 };
439 struct globals2 {
440         uint32_t t_info; /* often used */
441         uint32_t t_tclass;
442         char *t_string;
443         int t_lineno;
444         int t_rollback;
445
446         var *intvar[NUM_INTERNAL_VARS]; /* often used */
447
448         /* former statics from various functions */
449         char *split_f0__fstrings;
450
451         uint32_t next_token__save_tclass;
452         uint32_t next_token__save_info;
453         uint32_t next_token__ltclass;
454         smallint next_token__concat_inserted;
455
456         smallint next_input_file__files_happen;
457         rstream next_input_file__rsm;
458
459         var *evaluate__fnargs;
460         unsigned evaluate__seed;
461         regex_t evaluate__sreg;
462
463         var ptest__v;
464
465         tsplitter exec_builtin__tspl;
466
467         /* biggest and least used members go last */
468         tsplitter fsplitter, rsplitter;
469 };
470 #define G1 (ptr_to_globals[-1])
471 #define G (*(struct globals2 *)ptr_to_globals)
472 /* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
473 /*char G1size[sizeof(G1)]; - 0x74 */
474 /*char Gsize[sizeof(G)]; - 0x1c4 */
475 /* Trying to keep most of members accessible with short offsets: */
476 /*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
477 #define t_double     (G1.t_double    )
478 #define beginseq     (G1.beginseq    )
479 #define mainseq      (G1.mainseq     )
480 #define endseq       (G1.endseq      )
481 #define seq          (G1.seq         )
482 #define break_ptr    (G1.break_ptr   )
483 #define continue_ptr (G1.continue_ptr)
484 #define iF           (G1.iF          )
485 #define vhash        (G1.vhash       )
486 #define ahash        (G1.ahash       )
487 #define fdhash       (G1.fdhash      )
488 #define fnhash       (G1.fnhash      )
489 #define g_progname   (G1.g_progname  )
490 #define g_lineno     (G1.g_lineno    )
491 #define nfields      (G1.nfields     )
492 #define maxfields    (G1.maxfields   )
493 #define Fields       (G1.Fields      )
494 #define g_cb         (G1.g_cb        )
495 #define g_pos        (G1.g_pos       )
496 #define g_buf        (G1.g_buf       )
497 #define icase        (G1.icase       )
498 #define exiting      (G1.exiting     )
499 #define nextrec      (G1.nextrec     )
500 #define nextfile     (G1.nextfile    )
501 #define is_f0_split  (G1.is_f0_split )
502 #define t_info       (G.t_info      )
503 #define t_tclass     (G.t_tclass    )
504 #define t_string     (G.t_string    )
505 #define t_lineno     (G.t_lineno    )
506 #define t_rollback   (G.t_rollback  )
507 #define intvar       (G.intvar      )
508 #define fsplitter    (G.fsplitter   )
509 #define rsplitter    (G.rsplitter   )
510 #define INIT_G() do { \
511         SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
512         G.next_token__ltclass = TC_OPTERM; \
513         G.evaluate__seed = 1; \
514 } while (0)
515
516
517 /* function prototypes */
518 static void handle_special(var *);
519 static node *parse_expr(uint32_t);
520 static void chain_group(void);
521 static var *evaluate(node *, var *);
522 static rstream *next_input_file(void);
523 static int fmt_num(char *, int, const char *, double, int);
524 static int awk_exit(int) NORETURN;
525
526 /* ---- error handling ---- */
527
528 static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
529 static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
530 static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
531 static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
532 static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
533 static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin";
534 static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
535 static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
536 static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
537 static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
538
539 static void zero_out_var(var *vp)
540 {
541         memset(vp, 0, sizeof(*vp));
542 }
543
544 static void syntax_error(const char *message) NORETURN;
545 static void syntax_error(const char *message)
546 {
547         bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
548 }
549
550 /* ---- hash stuff ---- */
551
552 static unsigned hashidx(const char *name)
553 {
554         unsigned idx = 0;
555
556         while (*name)
557                 idx = *name++ + (idx << 6) - idx;
558         return idx;
559 }
560
561 /* create new hash */
562 static xhash *hash_init(void)
563 {
564         xhash *newhash;
565
566         newhash = xzalloc(sizeof(*newhash));
567         newhash->csize = FIRST_PRIME;
568         newhash->items = xzalloc(FIRST_PRIME * sizeof(newhash->items[0]));
569
570         return newhash;
571 }
572
573 /* find item in hash, return ptr to data, NULL if not found */
574 static void *hash_search(xhash *hash, const char *name)
575 {
576         hash_item *hi;
577
578         hi = hash->items[hashidx(name) % hash->csize];
579         while (hi) {
580                 if (strcmp(hi->name, name) == 0)
581                         return &hi->data;
582                 hi = hi->next;
583         }
584         return NULL;
585 }
586
587 /* grow hash if it becomes too big */
588 static void hash_rebuild(xhash *hash)
589 {
590         unsigned newsize, i, idx;
591         hash_item **newitems, *hi, *thi;
592
593         if (hash->nprime == ARRAY_SIZE(PRIMES))
594                 return;
595
596         newsize = PRIMES[hash->nprime++];
597         newitems = xzalloc(newsize * sizeof(newitems[0]));
598
599         for (i = 0; i < hash->csize; i++) {
600                 hi = hash->items[i];
601                 while (hi) {
602                         thi = hi;
603                         hi = thi->next;
604                         idx = hashidx(thi->name) % newsize;
605                         thi->next = newitems[idx];
606                         newitems[idx] = thi;
607                 }
608         }
609
610         free(hash->items);
611         hash->csize = newsize;
612         hash->items = newitems;
613 }
614
615 /* find item in hash, add it if necessary. Return ptr to data */
616 static void *hash_find(xhash *hash, const char *name)
617 {
618         hash_item *hi;
619         unsigned idx;
620         int l;
621
622         hi = hash_search(hash, name);
623         if (!hi) {
624                 if (++hash->nel / hash->csize > 10)
625                         hash_rebuild(hash);
626
627                 l = strlen(name) + 1;
628                 hi = xzalloc(sizeof(*hi) + l);
629                 strcpy(hi->name, name);
630
631                 idx = hashidx(name) % hash->csize;
632                 hi->next = hash->items[idx];
633                 hash->items[idx] = hi;
634                 hash->glen += l;
635         }
636         return &hi->data;
637 }
638
639 #define findvar(hash, name) ((var*)    hash_find((hash), (name)))
640 #define newvar(name)        ((var*)    hash_find(vhash, (name)))
641 #define newfile(name)       ((rstream*)hash_find(fdhash, (name)))
642 #define newfunc(name)       ((func*)   hash_find(fnhash, (name)))
643
644 static void hash_remove(xhash *hash, const char *name)
645 {
646         hash_item *hi, **phi;
647
648         phi = &hash->items[hashidx(name) % hash->csize];
649         while (*phi) {
650                 hi = *phi;
651                 if (strcmp(hi->name, name) == 0) {
652                         hash->glen -= (strlen(name) + 1);
653                         hash->nel--;
654                         *phi = hi->next;
655                         free(hi);
656                         break;
657                 }
658                 phi = &hi->next;
659         }
660 }
661
662 /* ------ some useful functions ------ */
663
664 static char *skip_spaces(char *p)
665 {
666         while (1) {
667                 if (*p == '\\' && p[1] == '\n') {
668                         p++;
669                         t_lineno++;
670                 } else if (*p != ' ' && *p != '\t') {
671                         break;
672                 }
673                 p++;
674         }
675         return p;
676 }
677
678 /* returns old *s, advances *s past word and terminating NUL */
679 static char *nextword(char **s)
680 {
681         char *p = *s;
682         while (*(*s)++ != '\0')
683                 continue;
684         return p;
685 }
686
687 static char nextchar(char **s)
688 {
689         char c, *pps;
690
691         c = *(*s)++;
692         pps = *s;
693         if (c == '\\')
694                 c = bb_process_escape_sequence((const char**)s);
695         if (c == '\\' && *s == pps) { /* unrecognized \z? */
696                 c = *(*s); /* yes, fetch z */
697                 if (c)
698                         (*s)++; /* advance unless z = NUL */
699         }
700         return c;
701 }
702
703 static ALWAYS_INLINE int isalnum_(int c)
704 {
705         return (isalnum(c) || c == '_');
706 }
707
708 static double my_strtod(char **pp)
709 {
710         char *cp = *pp;
711         if (ENABLE_DESKTOP && cp[0] == '0') {
712                 /* Might be hex or octal integer: 0x123abc or 07777 */
713                 char c = (cp[1] | 0x20);
714                 if (c == 'x' || isdigit(cp[1])) {
715                         unsigned long long ull = strtoull(cp, pp, 0);
716                         if (c == 'x')
717                                 return ull;
718                         c = **pp;
719                         if (!isdigit(c) && c != '.')
720                                 return ull;
721                         /* else: it may be a floating number. Examples:
722                          * 009.123 (*pp points to '9')
723                          * 000.123 (*pp points to '.')
724                          * fall through to strtod.
725                          */
726                 }
727         }
728         return strtod(cp, pp);
729 }
730
731 /* -------- working with variables (set/get/copy/etc) -------- */
732
733 static xhash *iamarray(var *v)
734 {
735         var *a = v;
736
737         while (a->type & VF_CHILD)
738                 a = a->x.parent;
739
740         if (!(a->type & VF_ARRAY)) {
741                 a->type |= VF_ARRAY;
742                 a->x.array = hash_init();
743         }
744         return a->x.array;
745 }
746
747 static void clear_array(xhash *array)
748 {
749         unsigned i;
750         hash_item *hi, *thi;
751
752         for (i = 0; i < array->csize; i++) {
753                 hi = array->items[i];
754                 while (hi) {
755                         thi = hi;
756                         hi = hi->next;
757                         free(thi->data.v.string);
758                         free(thi);
759                 }
760                 array->items[i] = NULL;
761         }
762         array->glen = array->nel = 0;
763 }
764
765 /* clear a variable */
766 static var *clrvar(var *v)
767 {
768         if (!(v->type & VF_FSTR))
769                 free(v->string);
770
771         v->type &= VF_DONTTOUCH;
772         v->type |= VF_DIRTY;
773         v->string = NULL;
774         return v;
775 }
776
777 /* assign string value to variable */
778 static var *setvar_p(var *v, char *value)
779 {
780         clrvar(v);
781         v->string = value;
782         handle_special(v);
783         return v;
784 }
785
786 /* same as setvar_p but make a copy of string */
787 static var *setvar_s(var *v, const char *value)
788 {
789         return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
790 }
791
792 /* same as setvar_s but sets USER flag */
793 static var *setvar_u(var *v, const char *value)
794 {
795         v = setvar_s(v, value);
796         v->type |= VF_USER;
797         return v;
798 }
799
800 /* set array element to user string */
801 static void setari_u(var *a, int idx, const char *s)
802 {
803         var *v;
804
805         v = findvar(iamarray(a), itoa(idx));
806         setvar_u(v, s);
807 }
808
809 /* assign numeric value to variable */
810 static var *setvar_i(var *v, double value)
811 {
812         clrvar(v);
813         v->type |= VF_NUMBER;
814         v->number = value;
815         handle_special(v);
816         return v;
817 }
818
819 static const char *getvar_s(var *v)
820 {
821         /* if v is numeric and has no cached string, convert it to string */
822         if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
823                 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
824                 v->string = xstrdup(g_buf);
825                 v->type |= VF_CACHED;
826         }
827         return (v->string == NULL) ? "" : v->string;
828 }
829
830 static double getvar_i(var *v)
831 {
832         char *s;
833
834         if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
835                 v->number = 0;
836                 s = v->string;
837                 if (s && *s) {
838                         debug_printf_eval("getvar_i: '%s'->", s);
839                         v->number = my_strtod(&s);
840                         debug_printf_eval("%f (s:'%s')\n", v->number, s);
841                         if (v->type & VF_USER) {
842                                 s = skip_spaces(s);
843                                 if (*s != '\0')
844                                         v->type &= ~VF_USER;
845                         }
846                 } else {
847                         debug_printf_eval("getvar_i: '%s'->zero\n", s);
848                         v->type &= ~VF_USER;
849                 }
850                 v->type |= VF_CACHED;
851         }
852         debug_printf_eval("getvar_i: %f\n", v->number);
853         return v->number;
854 }
855
856 /* Used for operands of bitwise ops */
857 static unsigned long getvar_i_int(var *v)
858 {
859         double d = getvar_i(v);
860
861         /* Casting doubles to longs is undefined for values outside
862          * of target type range. Try to widen it as much as possible */
863         if (d >= 0)
864                 return (unsigned long)d;
865         /* Why? Think about d == -4294967295.0 (assuming 32bit longs) */
866         return - (long) (unsigned long) (-d);
867 }
868
869 static var *copyvar(var *dest, const var *src)
870 {
871         if (dest != src) {
872                 clrvar(dest);
873                 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
874                 debug_printf_eval("copyvar: number:%f string:'%s'\n", src->number, src->string);
875                 dest->number = src->number;
876                 if (src->string)
877                         dest->string = xstrdup(src->string);
878         }
879         handle_special(dest);
880         return dest;
881 }
882
883 static var *incvar(var *v)
884 {
885         return setvar_i(v, getvar_i(v) + 1.0);
886 }
887
888 /* return true if v is number or numeric string */
889 static int is_numeric(var *v)
890 {
891         getvar_i(v);
892         return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
893 }
894
895 /* return 1 when value of v corresponds to true, 0 otherwise */
896 static int istrue(var *v)
897 {
898         if (is_numeric(v))
899                 return (v->number != 0);
900         return (v->string && v->string[0]);
901 }
902
903 /* temporary variables allocator. Last allocated should be first freed */
904 static var *nvalloc(int n)
905 {
906         nvblock *pb = NULL;
907         var *v, *r;
908         int size;
909
910         while (g_cb) {
911                 pb = g_cb;
912                 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size)
913                         break;
914                 g_cb = g_cb->next;
915         }
916
917         if (!g_cb) {
918                 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
919                 g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
920                 g_cb->size = size;
921                 g_cb->pos = g_cb->nv;
922                 g_cb->prev = pb;
923                 /*g_cb->next = NULL; - xzalloc did it */
924                 if (pb)
925                         pb->next = g_cb;
926         }
927
928         v = r = g_cb->pos;
929         g_cb->pos += n;
930
931         while (v < g_cb->pos) {
932                 v->type = 0;
933                 v->string = NULL;
934                 v++;
935         }
936
937         return r;
938 }
939
940 static void nvfree(var *v)
941 {
942         var *p;
943
944         if (v < g_cb->nv || v >= g_cb->pos)
945                 syntax_error(EMSG_INTERNAL_ERROR);
946
947         for (p = v; p < g_cb->pos; p++) {
948                 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
949                         clear_array(iamarray(p));
950                         free(p->x.array->items);
951                         free(p->x.array);
952                 }
953                 if (p->type & VF_WALK) {
954                         walker_list *n;
955                         walker_list *w = p->x.walker;
956                         debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker);
957                         p->x.walker = NULL;
958                         while (w) {
959                                 n = w->prev;
960                                 debug_printf_walker(" free(%p)\n", w);
961                                 free(w);
962                                 w = n;
963                         }
964                 }
965                 clrvar(p);
966         }
967
968         g_cb->pos = v;
969         while (g_cb->prev && g_cb->pos == g_cb->nv) {
970                 g_cb = g_cb->prev;
971         }
972 }
973
974 /* ------- awk program text parsing ------- */
975
976 /* Parse next token pointed by global pos, place results into global ttt.
977  * If token isn't expected, give away. Return token class
978  */
979 static uint32_t next_token(uint32_t expected)
980 {
981 #define concat_inserted (G.next_token__concat_inserted)
982 #define save_tclass     (G.next_token__save_tclass)
983 #define save_info       (G.next_token__save_info)
984 /* Initialized to TC_OPTERM: */
985 #define ltclass         (G.next_token__ltclass)
986
987         char *p, *s;
988         const char *tl;
989         uint32_t tc;
990         const uint32_t *ti;
991
992         if (t_rollback) {
993                 t_rollback = FALSE;
994
995         } else if (concat_inserted) {
996                 concat_inserted = FALSE;
997                 t_tclass = save_tclass;
998                 t_info = save_info;
999
1000         } else {
1001                 p = g_pos;
1002  readnext:
1003                 p = skip_spaces(p);
1004                 g_lineno = t_lineno;
1005                 if (*p == '#')
1006                         while (*p != '\n' && *p != '\0')
1007                                 p++;
1008
1009                 if (*p == '\n')
1010                         t_lineno++;
1011
1012                 if (*p == '\0') {
1013                         tc = TC_EOF;
1014
1015                 } else if (*p == '\"') {
1016                         /* it's a string */
1017                         t_string = s = ++p;
1018                         while (*p != '\"') {
1019                                 char *pp;
1020                                 if (*p == '\0' || *p == '\n')
1021                                         syntax_error(EMSG_UNEXP_EOS);
1022                                 pp = p;
1023                                 *s++ = nextchar(&pp);
1024                                 p = pp;
1025                         }
1026                         p++;
1027                         *s = '\0';
1028                         tc = TC_STRING;
1029
1030                 } else if ((expected & TC_REGEXP) && *p == '/') {
1031                         /* it's regexp */
1032                         t_string = s = ++p;
1033                         while (*p != '/') {
1034                                 if (*p == '\0' || *p == '\n')
1035                                         syntax_error(EMSG_UNEXP_EOS);
1036                                 *s = *p++;
1037                                 if (*s++ == '\\') {
1038                                         char *pp = p;
1039                                         s[-1] = bb_process_escape_sequence((const char **)&pp);
1040                                         if (*p == '\\')
1041                                                 *s++ = '\\';
1042                                         if (pp == p)
1043                                                 *s++ = *p++;
1044                                         else
1045                                                 p = pp;
1046                                 }
1047                         }
1048                         p++;
1049                         *s = '\0';
1050                         tc = TC_REGEXP;
1051
1052                 } else if (*p == '.' || isdigit(*p)) {
1053                         /* it's a number */
1054                         char *pp = p;
1055                         t_double = my_strtod(&pp);
1056                         p = pp;
1057                         if (*p == '.')
1058                                 syntax_error(EMSG_UNEXP_TOKEN);
1059                         tc = TC_NUMBER;
1060
1061                 } else {
1062                         /* search for something known */
1063                         tl = tokenlist;
1064                         tc = 0x00000001;
1065                         ti = tokeninfo;
1066                         while (*tl) {
1067                                 int l = (unsigned char) *tl++;
1068                                 if (l == (unsigned char) NTCC) {
1069                                         tc <<= 1;
1070                                         continue;
1071                                 }
1072                                 /* if token class is expected,
1073                                  * token matches,
1074                                  * and it's not a longer word,
1075                                  */
1076                                 if ((tc & (expected | TC_WORD | TC_NEWLINE))
1077                                  && strncmp(p, tl, l) == 0
1078                                  && !((tc & TC_WORD) && isalnum_(p[l]))
1079                                 ) {
1080                                         /* then this is what we are looking for */
1081                                         t_info = *ti;
1082                                         p += l;
1083                                         goto token_found;
1084                                 }
1085                                 ti++;
1086                                 tl += l;
1087                         }
1088                         /* not a known token */
1089
1090                         /* is it a name? (var/array/function) */
1091                         if (!isalnum_(*p))
1092                                 syntax_error(EMSG_UNEXP_TOKEN); /* no */
1093                         /* yes */
1094                         t_string = --p;
1095                         while (isalnum_(*++p)) {
1096                                 p[-1] = *p;
1097                         }
1098                         p[-1] = '\0';
1099                         tc = TC_VARIABLE;
1100                         /* also consume whitespace between functionname and bracket */
1101                         if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY))
1102                                 p = skip_spaces(p);
1103                         if (*p == '(') {
1104                                 tc = TC_FUNCTION;
1105                         } else {
1106                                 if (*p == '[') {
1107                                         p++;
1108                                         tc = TC_ARRAY;
1109                                 }
1110                         }
1111  token_found: ;
1112                 }
1113                 g_pos = p;
1114
1115                 /* skipping newlines in some cases */
1116                 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1117                         goto readnext;
1118
1119                 /* insert concatenation operator when needed */
1120                 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1121                         concat_inserted = TRUE;
1122                         save_tclass = tc;
1123                         save_info = t_info;
1124                         tc = TC_BINOP;
1125                         t_info = OC_CONCAT | SS | P(35);
1126                 }
1127
1128                 t_tclass = tc;
1129         }
1130         ltclass = t_tclass;
1131
1132         /* Are we ready for this? */
1133         if (!(ltclass & expected))
1134                 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1135                                 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1136
1137         return ltclass;
1138 #undef concat_inserted
1139 #undef save_tclass
1140 #undef save_info
1141 #undef ltclass
1142 }
1143
1144 static void rollback_token(void)
1145 {
1146         t_rollback = TRUE;
1147 }
1148
1149 static node *new_node(uint32_t info)
1150 {
1151         node *n;
1152
1153         n = xzalloc(sizeof(node));
1154         n->info = info;
1155         n->lineno = g_lineno;
1156         return n;
1157 }
1158
1159 static void mk_re_node(const char *s, node *n, regex_t *re)
1160 {
1161         n->info = OC_REGEXP;
1162         n->l.re = re;
1163         n->r.ire = re + 1;
1164         xregcomp(re, s, REG_EXTENDED);
1165         xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1166 }
1167
1168 static node *condition(void)
1169 {
1170         next_token(TC_SEQSTART);
1171         return parse_expr(TC_SEQTERM);
1172 }
1173
1174 /* parse expression terminated by given argument, return ptr
1175  * to built subtree. Terminator is eaten by parse_expr */
1176 static node *parse_expr(uint32_t iexp)
1177 {
1178         node sn;
1179         node *cn = &sn;
1180         node *vn, *glptr;
1181         uint32_t tc, xtc;
1182         var *v;
1183
1184         sn.info = PRIMASK;
1185         sn.r.n = glptr = NULL;
1186         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1187
1188         while (!((tc = next_token(xtc)) & iexp)) {
1189
1190                 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1191                         /* input redirection (<) attached to glptr node */
1192                         cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1193                         cn->a.n = glptr;
1194                         xtc = TC_OPERAND | TC_UOPPRE;
1195                         glptr = NULL;
1196
1197                 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1198                         /* for binary and postfix-unary operators, jump back over
1199                          * previous operators with higher priority */
1200                         vn = cn;
1201                         while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1202                             || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON))
1203                         ) {
1204                                 vn = vn->a.n;
1205                         }
1206                         if ((t_info & OPCLSMASK) == OC_TERNARY)
1207                                 t_info += P(6);
1208                         cn = vn->a.n->r.n = new_node(t_info);
1209                         cn->a.n = vn->a.n;
1210                         if (tc & TC_BINOP) {
1211                                 cn->l.n = vn;
1212                                 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1213                                 if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1214                                         /* it's a pipe */
1215                                         next_token(TC_GETLINE);
1216                                         /* give maximum priority to this pipe */
1217                                         cn->info &= ~PRIMASK;
1218                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1219                                 }
1220                         } else {
1221                                 cn->r.n = vn;
1222                                 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1223                         }
1224                         vn->a.n = cn;
1225
1226                 } else {
1227                         /* for operands and prefix-unary operators, attach them
1228                          * to last node */
1229                         vn = cn;
1230                         cn = vn->r.n = new_node(t_info);
1231                         cn->a.n = vn;
1232                         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1233                         if (tc & (TC_OPERAND | TC_REGEXP)) {
1234                                 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1235                                 /* one should be very careful with switch on tclass -
1236                                  * only simple tclasses should be used! */
1237                                 switch (tc) {
1238                                 case TC_VARIABLE:
1239                                 case TC_ARRAY:
1240                                         cn->info = OC_VAR;
1241                                         v = hash_search(ahash, t_string);
1242                                         if (v != NULL) {
1243                                                 cn->info = OC_FNARG;
1244                                                 cn->l.aidx = v->x.aidx;
1245                                         } else {
1246                                                 cn->l.v = newvar(t_string);
1247                                         }
1248                                         if (tc & TC_ARRAY) {
1249                                                 cn->info |= xS;
1250                                                 cn->r.n = parse_expr(TC_ARRTERM);
1251                                         }
1252                                         break;
1253
1254                                 case TC_NUMBER:
1255                                 case TC_STRING:
1256                                         cn->info = OC_VAR;
1257                                         v = cn->l.v = xzalloc(sizeof(var));
1258                                         if (tc & TC_NUMBER)
1259                                                 setvar_i(v, t_double);
1260                                         else
1261                                                 setvar_s(v, t_string);
1262                                         break;
1263
1264                                 case TC_REGEXP:
1265                                         mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1266                                         break;
1267
1268                                 case TC_FUNCTION:
1269                                         cn->info = OC_FUNC;
1270                                         cn->r.f = newfunc(t_string);
1271                                         cn->l.n = condition();
1272                                         break;
1273
1274                                 case TC_SEQSTART:
1275                                         cn = vn->r.n = parse_expr(TC_SEQTERM);
1276                                         cn->a.n = vn;
1277                                         break;
1278
1279                                 case TC_GETLINE:
1280                                         glptr = cn;
1281                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1282                                         break;
1283
1284                                 case TC_BUILTIN:
1285                                         cn->l.n = condition();
1286                                         break;
1287                                 }
1288                         }
1289                 }
1290         }
1291         return sn.r.n;
1292 }
1293
1294 /* add node to chain. Return ptr to alloc'd node */
1295 static node *chain_node(uint32_t info)
1296 {
1297         node *n;
1298
1299         if (!seq->first)
1300                 seq->first = seq->last = new_node(0);
1301
1302         if (seq->programname != g_progname) {
1303                 seq->programname = g_progname;
1304                 n = chain_node(OC_NEWSOURCE);
1305                 n->l.new_progname = xstrdup(g_progname);
1306         }
1307
1308         n = seq->last;
1309         n->info = info;
1310         seq->last = n->a.n = new_node(OC_DONE);
1311
1312         return n;
1313 }
1314
1315 static void chain_expr(uint32_t info)
1316 {
1317         node *n;
1318
1319         n = chain_node(info);
1320         n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1321         if (t_tclass & TC_GRPTERM)
1322                 rollback_token();
1323 }
1324
1325 static node *chain_loop(node *nn)
1326 {
1327         node *n, *n2, *save_brk, *save_cont;
1328
1329         save_brk = break_ptr;
1330         save_cont = continue_ptr;
1331
1332         n = chain_node(OC_BR | Vx);
1333         continue_ptr = new_node(OC_EXEC);
1334         break_ptr = new_node(OC_EXEC);
1335         chain_group();
1336         n2 = chain_node(OC_EXEC | Vx);
1337         n2->l.n = nn;
1338         n2->a.n = n;
1339         continue_ptr->a.n = n2;
1340         break_ptr->a.n = n->r.n = seq->last;
1341
1342         continue_ptr = save_cont;
1343         break_ptr = save_brk;
1344
1345         return n;
1346 }
1347
1348 /* parse group and attach it to chain */
1349 static void chain_group(void)
1350 {
1351         uint32_t c;
1352         node *n, *n2, *n3;
1353
1354         do {
1355                 c = next_token(TC_GRPSEQ);
1356         } while (c & TC_NEWLINE);
1357
1358         if (c & TC_GRPSTART) {
1359                 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1360                         if (t_tclass & TC_NEWLINE)
1361                                 continue;
1362                         rollback_token();
1363                         chain_group();
1364                 }
1365         } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1366                 rollback_token();
1367                 chain_expr(OC_EXEC | Vx);
1368         } else {                                                /* TC_STATEMNT */
1369                 switch (t_info & OPCLSMASK) {
1370                 case ST_IF:
1371                         n = chain_node(OC_BR | Vx);
1372                         n->l.n = condition();
1373                         chain_group();
1374                         n2 = chain_node(OC_EXEC);
1375                         n->r.n = seq->last;
1376                         if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1377                                 chain_group();
1378                                 n2->a.n = seq->last;
1379                         } else {
1380                                 rollback_token();
1381                         }
1382                         break;
1383
1384                 case ST_WHILE:
1385                         n2 = condition();
1386                         n = chain_loop(NULL);
1387                         n->l.n = n2;
1388                         break;
1389
1390                 case ST_DO:
1391                         n2 = chain_node(OC_EXEC);
1392                         n = chain_loop(NULL);
1393                         n2->a.n = n->a.n;
1394                         next_token(TC_WHILE);
1395                         n->l.n = condition();
1396                         break;
1397
1398                 case ST_FOR:
1399                         next_token(TC_SEQSTART);
1400                         n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1401                         if (t_tclass & TC_SEQTERM) {    /* for-in */
1402                                 if ((n2->info & OPCLSMASK) != OC_IN)
1403                                         syntax_error(EMSG_UNEXP_TOKEN);
1404                                 n = chain_node(OC_WALKINIT | VV);
1405                                 n->l.n = n2->l.n;
1406                                 n->r.n = n2->r.n;
1407                                 n = chain_loop(NULL);
1408                                 n->info = OC_WALKNEXT | Vx;
1409                                 n->l.n = n2->l.n;
1410                         } else {                        /* for (;;) */
1411                                 n = chain_node(OC_EXEC | Vx);
1412                                 n->l.n = n2;
1413                                 n2 = parse_expr(TC_SEMICOL);
1414                                 n3 = parse_expr(TC_SEQTERM);
1415                                 n = chain_loop(n3);
1416                                 n->l.n = n2;
1417                                 if (!n2)
1418                                         n->info = OC_EXEC;
1419                         }
1420                         break;
1421
1422                 case OC_PRINT:
1423                 case OC_PRINTF:
1424                         n = chain_node(t_info);
1425                         n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1426                         if (t_tclass & TC_OUTRDR) {
1427                                 n->info |= t_info;
1428                                 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1429                         }
1430                         if (t_tclass & TC_GRPTERM)
1431                                 rollback_token();
1432                         break;
1433
1434                 case OC_BREAK:
1435                         n = chain_node(OC_EXEC);
1436                         n->a.n = break_ptr;
1437                         break;
1438
1439                 case OC_CONTINUE:
1440                         n = chain_node(OC_EXEC);
1441                         n->a.n = continue_ptr;
1442                         break;
1443
1444                 /* delete, next, nextfile, return, exit */
1445                 default:
1446                         chain_expr(t_info);
1447                 }
1448         }
1449 }
1450
1451 static void parse_program(char *p)
1452 {
1453         uint32_t tclass;
1454         node *cn;
1455         func *f;
1456         var *v;
1457
1458         g_pos = p;
1459         t_lineno = 1;
1460         while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1461                         TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1462
1463                 if (tclass & TC_OPTERM)
1464                         continue;
1465
1466                 seq = &mainseq;
1467                 if (tclass & TC_BEGIN) {
1468                         seq = &beginseq;
1469                         chain_group();
1470
1471                 } else if (tclass & TC_END) {
1472                         seq = &endseq;
1473                         chain_group();
1474
1475                 } else if (tclass & TC_FUNCDECL) {
1476                         next_token(TC_FUNCTION);
1477                         g_pos++;
1478                         f = newfunc(t_string);
1479                         f->body.first = NULL;
1480                         f->nargs = 0;
1481                         while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1482                                 v = findvar(ahash, t_string);
1483                                 v->x.aidx = f->nargs++;
1484
1485                                 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1486                                         break;
1487                         }
1488                         seq = &f->body;
1489                         chain_group();
1490                         clear_array(ahash);
1491
1492                 } else if (tclass & TC_OPSEQ) {
1493                         rollback_token();
1494                         cn = chain_node(OC_TEST);
1495                         cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1496                         if (t_tclass & TC_GRPSTART) {
1497                                 rollback_token();
1498                                 chain_group();
1499                         } else {
1500                                 chain_node(OC_PRINT);
1501                         }
1502                         cn->r.n = mainseq.last;
1503
1504                 } else /* if (tclass & TC_GRPSTART) */ {
1505                         rollback_token();
1506                         chain_group();
1507                 }
1508         }
1509 }
1510
1511
1512 /* -------- program execution part -------- */
1513
1514 static node *mk_splitter(const char *s, tsplitter *spl)
1515 {
1516         regex_t *re, *ire;
1517         node *n;
1518
1519         re = &spl->re[0];
1520         ire = &spl->re[1];
1521         n = &spl->n;
1522         if ((n->info & OPCLSMASK) == OC_REGEXP) {
1523                 regfree(re);
1524                 regfree(ire); // TODO: nuke ire, use re+1?
1525         }
1526         if (s[0] && s[1]) { /* strlen(s) > 1 */
1527                 mk_re_node(s, n, re);
1528         } else {
1529                 n->info = (uint32_t) s[0];
1530         }
1531
1532         return n;
1533 }
1534
1535 /* use node as a regular expression. Supplied with node ptr and regex_t
1536  * storage space. Return ptr to regex (if result points to preg, it should
1537  * be later regfree'd manually
1538  */
1539 static regex_t *as_regex(node *op, regex_t *preg)
1540 {
1541         int cflags;
1542         var *v;
1543         const char *s;
1544
1545         if ((op->info & OPCLSMASK) == OC_REGEXP) {
1546                 return icase ? op->r.ire : op->l.re;
1547         }
1548         v = nvalloc(1);
1549         s = getvar_s(evaluate(op, v));
1550
1551         cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
1552         /* Testcase where REG_EXTENDED fails (unpaired '{'):
1553          * echo Hi | awk 'gsub("@(samp|code|file)\{","");'
1554          * gawk 3.1.5 eats this. We revert to ~REG_EXTENDED
1555          * (maybe gsub is not supposed to use REG_EXTENDED?).
1556          */
1557         if (regcomp(preg, s, cflags)) {
1558                 cflags &= ~REG_EXTENDED;
1559                 xregcomp(preg, s, cflags);
1560         }
1561         nvfree(v);
1562         return preg;
1563 }
1564
1565 /* gradually increasing buffer.
1566  * note that we reallocate even if n == old_size,
1567  * and thus there is at least one extra allocated byte.
1568  */
1569 static char* qrealloc(char *b, int n, int *size)
1570 {
1571         if (!b || n >= *size) {
1572                 *size = n + (n>>1) + 80;
1573                 b = xrealloc(b, *size);
1574         }
1575         return b;
1576 }
1577
1578 /* resize field storage space */
1579 static void fsrealloc(int size)
1580 {
1581         int i;
1582
1583         if (size >= maxfields) {
1584                 i = maxfields;
1585                 maxfields = size + 16;
1586                 Fields = xrealloc(Fields, maxfields * sizeof(Fields[0]));
1587                 for (; i < maxfields; i++) {
1588                         Fields[i].type = VF_SPECIAL;
1589                         Fields[i].string = NULL;
1590                 }
1591         }
1592         /* if size < nfields, clear extra field variables */
1593         for (i = size; i < nfields; i++) {
1594                 clrvar(Fields + i);
1595         }
1596         nfields = size;
1597 }
1598
1599 static int awk_split(const char *s, node *spl, char **slist)
1600 {
1601         int l, n;
1602         char c[4];
1603         char *s1;
1604         regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1605
1606         /* in worst case, each char would be a separate field */
1607         *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1608         strcpy(s1, s);
1609
1610         c[0] = c[1] = (char)spl->info;
1611         c[2] = c[3] = '\0';
1612         if (*getvar_s(intvar[RS]) == '\0')
1613                 c[2] = '\n';
1614
1615         n = 0;
1616         if ((spl->info & OPCLSMASK) == OC_REGEXP) {  /* regex split */
1617                 if (!*s)
1618                         return n; /* "": zero fields */
1619                 n++; /* at least one field will be there */
1620                 do {
1621                         l = strcspn(s, c+2); /* len till next NUL or \n */
1622                         if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1623                          && pmatch[0].rm_so <= l
1624                         ) {
1625                                 l = pmatch[0].rm_so;
1626                                 if (pmatch[0].rm_eo == 0) {
1627                                         l++;
1628                                         pmatch[0].rm_eo++;
1629                                 }
1630                                 n++; /* we saw yet another delimiter */
1631                         } else {
1632                                 pmatch[0].rm_eo = l;
1633                                 if (s[l])
1634                                         pmatch[0].rm_eo++;
1635                         }
1636                         memcpy(s1, s, l);
1637                         /* make sure we remove *all* of the separator chars */
1638                         do {
1639                                 s1[l] = '\0';
1640                         } while (++l < pmatch[0].rm_eo);
1641                         nextword(&s1);
1642                         s += pmatch[0].rm_eo;
1643                 } while (*s);
1644                 return n;
1645         }
1646         if (c[0] == '\0') {  /* null split */
1647                 while (*s) {
1648                         *s1++ = *s++;
1649                         *s1++ = '\0';
1650                         n++;
1651                 }
1652                 return n;
1653         }
1654         if (c[0] != ' ') {  /* single-character split */
1655                 if (icase) {
1656                         c[0] = toupper(c[0]);
1657                         c[1] = tolower(c[1]);
1658                 }
1659                 if (*s1)
1660                         n++;
1661                 while ((s1 = strpbrk(s1, c)) != NULL) {
1662                         *s1++ = '\0';
1663                         n++;
1664                 }
1665                 return n;
1666         }
1667         /* space split */
1668         while (*s) {
1669                 s = skip_whitespace(s);
1670                 if (!*s)
1671                         break;
1672                 n++;
1673                 while (*s && !isspace(*s))
1674                         *s1++ = *s++;
1675                 *s1++ = '\0';
1676         }
1677         return n;
1678 }
1679
1680 static void split_f0(void)
1681 {
1682 /* static char *fstrings; */
1683 #define fstrings (G.split_f0__fstrings)
1684
1685         int i, n;
1686         char *s;
1687
1688         if (is_f0_split)
1689                 return;
1690
1691         is_f0_split = TRUE;
1692         free(fstrings);
1693         fsrealloc(0);
1694         n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1695         fsrealloc(n);
1696         s = fstrings;
1697         for (i = 0; i < n; i++) {
1698                 Fields[i].string = nextword(&s);
1699                 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1700         }
1701
1702         /* set NF manually to avoid side effects */
1703         clrvar(intvar[NF]);
1704         intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1705         intvar[NF]->number = nfields;
1706 #undef fstrings
1707 }
1708
1709 /* perform additional actions when some internal variables changed */
1710 static void handle_special(var *v)
1711 {
1712         int n;
1713         char *b;
1714         const char *sep, *s;
1715         int sl, l, len, i, bsize;
1716
1717         if (!(v->type & VF_SPECIAL))
1718                 return;
1719
1720         if (v == intvar[NF]) {
1721                 n = (int)getvar_i(v);
1722                 fsrealloc(n);
1723
1724                 /* recalculate $0 */
1725                 sep = getvar_s(intvar[OFS]);
1726                 sl = strlen(sep);
1727                 b = NULL;
1728                 len = 0;
1729                 for (i = 0; i < n; i++) {
1730                         s = getvar_s(&Fields[i]);
1731                         l = strlen(s);
1732                         if (b) {
1733                                 memcpy(b+len, sep, sl);
1734                                 len += sl;
1735                         }
1736                         b = qrealloc(b, len+l+sl, &bsize);
1737                         memcpy(b+len, s, l);
1738                         len += l;
1739                 }
1740                 if (b)
1741                         b[len] = '\0';
1742                 setvar_p(intvar[F0], b);
1743                 is_f0_split = TRUE;
1744
1745         } else if (v == intvar[F0]) {
1746                 is_f0_split = FALSE;
1747
1748         } else if (v == intvar[FS]) {
1749                 mk_splitter(getvar_s(v), &fsplitter);
1750
1751         } else if (v == intvar[RS]) {
1752                 mk_splitter(getvar_s(v), &rsplitter);
1753
1754         } else if (v == intvar[IGNORECASE]) {
1755                 icase = istrue(v);
1756
1757         } else {                                /* $n */
1758                 n = getvar_i(intvar[NF]);
1759                 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1760                 /* right here v is invalid. Just to note... */
1761         }
1762 }
1763
1764 /* step through func/builtin/etc arguments */
1765 static node *nextarg(node **pn)
1766 {
1767         node *n;
1768
1769         n = *pn;
1770         if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1771                 *pn = n->r.n;
1772                 n = n->l.n;
1773         } else {
1774                 *pn = NULL;
1775         }
1776         return n;
1777 }
1778
1779 static void hashwalk_init(var *v, xhash *array)
1780 {
1781         hash_item *hi;
1782         unsigned i;
1783         walker_list *w;
1784         walker_list *prev_walker;
1785
1786         if (v->type & VF_WALK) {
1787                 prev_walker = v->x.walker;
1788         } else {
1789                 v->type |= VF_WALK;
1790                 prev_walker = NULL;
1791         }
1792         debug_printf_walker("hashwalk_init: prev_walker:%p\n", prev_walker);
1793
1794         w = v->x.walker = xzalloc(sizeof(*w) + array->glen + 1); /* why + 1? */
1795         debug_printf_walker(" walker@%p=%p\n", &v->x.walker, w);
1796         w->cur = w->end = w->wbuf;
1797         w->prev = prev_walker;
1798         for (i = 0; i < array->csize; i++) {
1799                 hi = array->items[i];
1800                 while (hi) {
1801                         strcpy(w->end, hi->name);
1802                         nextword(&w->end);
1803                         hi = hi->next;
1804                 }
1805         }
1806 }
1807
1808 static int hashwalk_next(var *v)
1809 {
1810         walker_list *w = v->x.walker;
1811
1812         if (w->cur >= w->end) {
1813                 walker_list *prev_walker = w->prev;
1814
1815                 debug_printf_walker("end of iteration, free(walker@%p:%p), prev_walker:%p\n", &v->x.walker, w, prev_walker);
1816                 free(w);
1817                 v->x.walker = prev_walker;
1818                 return FALSE;
1819         }
1820
1821         setvar_s(v, nextword(&w->cur));
1822         return TRUE;
1823 }
1824
1825 /* evaluate node, return 1 when result is true, 0 otherwise */
1826 static int ptest(node *pattern)
1827 {
1828         /* ptest__v is "static": to save stack space? */
1829         return istrue(evaluate(pattern, &G.ptest__v));
1830 }
1831
1832 /* read next record from stream rsm into a variable v */
1833 static int awk_getline(rstream *rsm, var *v)
1834 {
1835         char *b;
1836         regmatch_t pmatch[2];
1837         int size, a, p, pp = 0;
1838         int fd, so, eo, r, rp;
1839         char c, *m, *s;
1840
1841         debug_printf_eval("entered %s()\n", __func__);
1842
1843         /* we're using our own buffer since we need access to accumulating
1844          * characters
1845          */
1846         fd = fileno(rsm->F);
1847         m = rsm->buffer;
1848         a = rsm->adv;
1849         p = rsm->pos;
1850         size = rsm->size;
1851         c = (char) rsplitter.n.info;
1852         rp = 0;
1853
1854         if (!m)
1855                 m = qrealloc(m, 256, &size);
1856
1857         do {
1858                 b = m + a;
1859                 so = eo = p;
1860                 r = 1;
1861                 if (p > 0) {
1862                         if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1863                                 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1864                                                         b, 1, pmatch, 0) == 0) {
1865                                         so = pmatch[0].rm_so;
1866                                         eo = pmatch[0].rm_eo;
1867                                         if (b[eo] != '\0')
1868                                                 break;
1869                                 }
1870                         } else if (c != '\0') {
1871                                 s = strchr(b+pp, c);
1872                                 if (!s)
1873                                         s = memchr(b+pp, '\0', p - pp);
1874                                 if (s) {
1875                                         so = eo = s-b;
1876                                         eo++;
1877                                         break;
1878                                 }
1879                         } else {
1880                                 while (b[rp] == '\n')
1881                                         rp++;
1882                                 s = strstr(b+rp, "\n\n");
1883                                 if (s) {
1884                                         so = eo = s-b;
1885                                         while (b[eo] == '\n')
1886                                                 eo++;
1887                                         if (b[eo] != '\0')
1888                                                 break;
1889                                 }
1890                         }
1891                 }
1892
1893                 if (a > 0) {
1894                         memmove(m, m+a, p+1);
1895                         b = m;
1896                         a = 0;
1897                 }
1898
1899                 m = qrealloc(m, a+p+128, &size);
1900                 b = m + a;
1901                 pp = p;
1902                 p += safe_read(fd, b+p, size-p-1);
1903                 if (p < pp) {
1904                         p = 0;
1905                         r = 0;
1906                         setvar_i(intvar[ERRNO], errno);
1907                 }
1908                 b[p] = '\0';
1909
1910         } while (p > pp);
1911
1912         if (p == 0) {
1913                 r--;
1914         } else {
1915                 c = b[so]; b[so] = '\0';
1916                 setvar_s(v, b+rp);
1917                 v->type |= VF_USER;
1918                 b[so] = c;
1919                 c = b[eo]; b[eo] = '\0';
1920                 setvar_s(intvar[RT], b+so);
1921                 b[eo] = c;
1922         }
1923
1924         rsm->buffer = m;
1925         rsm->adv = a + eo;
1926         rsm->pos = p - eo;
1927         rsm->size = size;
1928
1929         debug_printf_eval("returning from %s(): %d\n", __func__, r);
1930
1931         return r;
1932 }
1933
1934 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1935 {
1936         int r = 0;
1937         char c;
1938         const char *s = format;
1939
1940         if (int_as_int && n == (int)n) {
1941                 r = snprintf(b, size, "%d", (int)n);
1942         } else {
1943                 do { c = *s; } while (c && *++s);
1944                 if (strchr("diouxX", c)) {
1945                         r = snprintf(b, size, format, (int)n);
1946                 } else if (strchr("eEfgG", c)) {
1947                         r = snprintf(b, size, format, n);
1948                 } else {
1949                         syntax_error(EMSG_INV_FMT);
1950                 }
1951         }
1952         return r;
1953 }
1954
1955 /* formatted output into an allocated buffer, return ptr to buffer */
1956 static char *awk_printf(node *n)
1957 {
1958         char *b = NULL;
1959         char *fmt, *s, *f;
1960         const char *s1;
1961         int i, j, incr, bsize;
1962         char c, c1;
1963         var *v, *arg;
1964
1965         v = nvalloc(1);
1966         fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1967
1968         i = 0;
1969         while (*f) {
1970                 s = f;
1971                 while (*f && (*f != '%' || *++f == '%'))
1972                         f++;
1973                 while (*f && !isalpha(*f)) {
1974                         if (*f == '*')
1975                                 syntax_error("%*x formats are not supported");
1976                         f++;
1977                 }
1978
1979                 incr = (f - s) + MAXVARFMT;
1980                 b = qrealloc(b, incr + i, &bsize);
1981                 c = *f;
1982                 if (c != '\0')
1983                         f++;
1984                 c1 = *f;
1985                 *f = '\0';
1986                 arg = evaluate(nextarg(&n), v);
1987
1988                 j = i;
1989                 if (c == 'c' || !c) {
1990                         i += sprintf(b+i, s, is_numeric(arg) ?
1991                                         (char)getvar_i(arg) : *getvar_s(arg));
1992                 } else if (c == 's') {
1993                         s1 = getvar_s(arg);
1994                         b = qrealloc(b, incr+i+strlen(s1), &bsize);
1995                         i += sprintf(b+i, s, s1);
1996                 } else {
1997                         i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1998                 }
1999                 *f = c1;
2000
2001                 /* if there was an error while sprintf, return value is negative */
2002                 if (i < j)
2003                         i = j;
2004         }
2005
2006         free(fmt);
2007         nvfree(v);
2008         b = xrealloc(b, i + 1);
2009         b[i] = '\0';
2010         return b;
2011 }
2012
2013 /* Common substitution routine.
2014  * Replace (nm)'th substring of (src) that matches (rn) with (repl),
2015  * store result into (dest), return number of substitutions.
2016  * If nm = 0, replace all matches.
2017  * If src or dst is NULL, use $0.
2018  * If subexp != 0, enable subexpression matching (\1-\9).
2019  */
2020 static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int subexp)
2021 {
2022         char *resbuf;
2023         const char *sp;
2024         int match_no, residx, replen, resbufsize;
2025         int regexec_flags;
2026         regmatch_t pmatch[10];
2027         regex_t sreg, *regex;
2028
2029         resbuf = NULL;
2030         residx = 0;
2031         match_no = 0;
2032         regexec_flags = 0;
2033         regex = as_regex(rn, &sreg);
2034         sp = getvar_s(src ? src : intvar[F0]);
2035         replen = strlen(repl);
2036         while (regexec(regex, sp, 10, pmatch, regexec_flags) == 0) {
2037                 int so = pmatch[0].rm_so;
2038                 int eo = pmatch[0].rm_eo;
2039
2040                 //bb_error_msg("match %u: [%u,%u] '%s'%p", match_no+1, so, eo, sp,sp);
2041                 resbuf = qrealloc(resbuf, residx + eo + replen, &resbufsize);
2042                 memcpy(resbuf + residx, sp, eo);
2043                 residx += eo;
2044                 if (++match_no >= nm) {
2045                         const char *s;
2046                         int nbs;
2047
2048                         /* replace */
2049                         residx -= (eo - so);
2050                         nbs = 0;
2051                         for (s = repl; *s; s++) {
2052                                 char c = resbuf[residx++] = *s;
2053                                 if (c == '\\') {
2054                                         nbs++;
2055                                         continue;
2056                                 }
2057                                 if (c == '&' || (subexp && c >= '0' && c <= '9')) {
2058                                         int j;
2059                                         residx -= ((nbs + 3) >> 1);
2060                                         j = 0;
2061                                         if (c != '&') {
2062                                                 j = c - '0';
2063                                                 nbs++;
2064                                         }
2065                                         if (nbs % 2) {
2066                                                 resbuf[residx++] = c;
2067                                         } else {
2068                                                 int n = pmatch[j].rm_eo - pmatch[j].rm_so;
2069                                                 resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize);
2070                                                 memcpy(resbuf + residx, sp + pmatch[j].rm_so, n);
2071                                                 residx += n;
2072                                         }
2073                                 }
2074                                 nbs = 0;
2075                         }
2076                 }
2077
2078                 regexec_flags = REG_NOTBOL;
2079                 sp += eo;
2080                 if (match_no == nm)
2081                         break;
2082                 if (eo == so) {
2083                         /* Empty match (e.g. "b*" will match anywhere).
2084                          * Advance by one char. */
2085 //BUG (bug 1333):
2086 //gsub(/\<b*/,"") on "abc" will reach this point, advance to "bc"
2087 //... and will erroneously match "b" even though it is NOT at the word start.
2088 //we need REG_NOTBOW but it does not exist...
2089 //TODO: if EXTRA_COMPAT=y, use GNU matching and re_search,
2090 //it should be able to do it correctly.
2091                         /* Subtle: this is safe only because
2092                          * qrealloc allocated at least one extra byte */
2093                         resbuf[residx] = *sp;
2094                         if (*sp == '\0')
2095                                 goto ret;
2096                         sp++;
2097                         residx++;
2098                 }
2099         }
2100
2101         resbuf = qrealloc(resbuf, residx + strlen(sp), &resbufsize);
2102         strcpy(resbuf + residx, sp);
2103  ret:
2104         //bb_error_msg("end sp:'%s'%p", sp,sp);
2105         setvar_p(dest ? dest : intvar[F0], resbuf);
2106         if (regex == &sreg)
2107                 regfree(regex);
2108         return match_no;
2109 }
2110
2111 static NOINLINE int do_mktime(const char *ds)
2112 {
2113         struct tm then;
2114         int count;
2115
2116         /*memset(&then, 0, sizeof(then)); - not needed */
2117         then.tm_isdst = -1; /* default is unknown */
2118
2119         /* manpage of mktime says these fields are ints,
2120          * so we can sscanf stuff directly into them */
2121         count = sscanf(ds, "%u %u %u %u %u %u %d",
2122                 &then.tm_year, &then.tm_mon, &then.tm_mday,
2123                 &then.tm_hour, &then.tm_min, &then.tm_sec,
2124                 &then.tm_isdst);
2125
2126         if (count < 6
2127          || (unsigned)then.tm_mon < 1
2128          || (unsigned)then.tm_year < 1900
2129         ) {
2130                 return -1;
2131         }
2132
2133         then.tm_mon -= 1;
2134         then.tm_year -= 1900;
2135
2136         return mktime(&then);
2137 }
2138
2139 static NOINLINE var *exec_builtin(node *op, var *res)
2140 {
2141 #define tspl (G.exec_builtin__tspl)
2142
2143         var *tv;
2144         node *an[4];
2145         var *av[4];
2146         const char *as[4];
2147         regmatch_t pmatch[2];
2148         regex_t sreg, *re;
2149         node *spl;
2150         uint32_t isr, info;
2151         int nargs;
2152         time_t tt;
2153         int i, l, ll, n;
2154
2155         tv = nvalloc(4);
2156         isr = info = op->info;
2157         op = op->l.n;
2158
2159         av[2] = av[3] = NULL;
2160         for (i = 0; i < 4 && op; i++) {
2161                 an[i] = nextarg(&op);
2162                 if (isr & 0x09000000)
2163                         av[i] = evaluate(an[i], &tv[i]);
2164                 if (isr & 0x08000000)
2165                         as[i] = getvar_s(av[i]);
2166                 isr >>= 1;
2167         }
2168
2169         nargs = i;
2170         if ((uint32_t)nargs < (info >> 30))
2171                 syntax_error(EMSG_TOO_FEW_ARGS);
2172
2173         info &= OPNMASK;
2174         switch (info) {
2175
2176         case B_a2:
2177                 if (ENABLE_FEATURE_AWK_LIBM)
2178                         setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1])));
2179                 else
2180                         syntax_error(EMSG_NO_MATH);
2181                 break;
2182
2183         case B_sp: {
2184                 char *s, *s1;
2185
2186                 if (nargs > 2) {
2187                         spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2188                                 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2189                 } else {
2190                         spl = &fsplitter.n;
2191                 }
2192
2193                 n = awk_split(as[0], spl, &s);
2194                 s1 = s;
2195                 clear_array(iamarray(av[1]));
2196                 for (i = 1; i <= n; i++)
2197                         setari_u(av[1], i, nextword(&s));
2198                 free(s1);
2199                 setvar_i(res, n);
2200                 break;
2201         }
2202
2203         case B_ss: {
2204                 char *s;
2205
2206                 l = strlen(as[0]);
2207                 i = getvar_i(av[1]) - 1;
2208                 if (i > l)
2209                         i = l;
2210                 if (i < 0)
2211                         i = 0;
2212                 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2213                 if (n < 0)
2214                         n = 0;
2215                 s = xstrndup(as[0]+i, n);
2216                 setvar_p(res, s);
2217                 break;
2218         }
2219
2220         /* Bitwise ops must assume that operands are unsigned. GNU Awk 3.1.5:
2221          * awk '{ print or(-1,1) }' gives "4.29497e+09", not "-2.xxxe+09" */
2222         case B_an:
2223                 setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1]));
2224                 break;
2225
2226         case B_co:
2227                 setvar_i(res, ~getvar_i_int(av[0]));
2228                 break;
2229
2230         case B_ls:
2231                 setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1]));
2232                 break;
2233
2234         case B_or:
2235                 setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1]));
2236                 break;
2237
2238         case B_rs:
2239                 setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1]));
2240                 break;
2241
2242         case B_xo:
2243                 setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1]));
2244                 break;
2245
2246         case B_lo:
2247         case B_up: {
2248                 char *s, *s1;
2249                 s1 = s = xstrdup(as[0]);
2250                 while (*s1) {
2251                         //*s1 = (info == B_up) ? toupper(*s1) : tolower(*s1);
2252                         if ((unsigned char)((*s1 | 0x20) - 'a') <= ('z' - 'a'))
2253                                 *s1 = (info == B_up) ? (*s1 & 0xdf) : (*s1 | 0x20);
2254                         s1++;
2255                 }
2256                 setvar_p(res, s);
2257                 break;
2258         }
2259
2260         case B_ix:
2261                 n = 0;
2262                 ll = strlen(as[1]);
2263                 l = strlen(as[0]) - ll;
2264                 if (ll > 0 && l >= 0) {
2265                         if (!icase) {
2266                                 char *s = strstr(as[0], as[1]);
2267                                 if (s)
2268                                         n = (s - as[0]) + 1;
2269                         } else {
2270                                 /* this piece of code is terribly slow and
2271                                  * really should be rewritten
2272                                  */
2273                                 for (i = 0; i <= l; i++) {
2274                                         if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2275                                                 n = i+1;
2276                                                 break;
2277                                         }
2278                                 }
2279                         }
2280                 }
2281                 setvar_i(res, n);
2282                 break;
2283
2284         case B_ti:
2285                 if (nargs > 1)
2286                         tt = getvar_i(av[1]);
2287                 else
2288                         time(&tt);
2289                 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2290                 i = strftime(g_buf, MAXVARFMT,
2291                         ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2292                         localtime(&tt));
2293                 g_buf[i] = '\0';
2294                 setvar_s(res, g_buf);
2295                 break;
2296
2297         case B_mt:
2298                 setvar_i(res, do_mktime(as[0]));
2299                 break;
2300
2301         case B_ma:
2302                 re = as_regex(an[1], &sreg);
2303                 n = regexec(re, as[0], 1, pmatch, 0);
2304                 if (n == 0) {
2305                         pmatch[0].rm_so++;
2306                         pmatch[0].rm_eo++;
2307                 } else {
2308                         pmatch[0].rm_so = 0;
2309                         pmatch[0].rm_eo = -1;
2310                 }
2311                 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2312                 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2313                 setvar_i(res, pmatch[0].rm_so);
2314                 if (re == &sreg)
2315                         regfree(re);
2316                 break;
2317
2318         case B_ge:
2319                 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2320                 break;
2321
2322         case B_gs:
2323                 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2324                 break;
2325
2326         case B_su:
2327                 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2328                 break;
2329         }
2330
2331         nvfree(tv);
2332         return res;
2333 #undef tspl
2334 }
2335
2336 /*
2337  * Evaluate node - the heart of the program. Supplied with subtree
2338  * and place where to store result. returns ptr to result.
2339  */
2340 #define XC(n) ((n) >> 8)
2341
2342 static var *evaluate(node *op, var *res)
2343 {
2344 /* This procedure is recursive so we should count every byte */
2345 #define fnargs (G.evaluate__fnargs)
2346 /* seed is initialized to 1 */
2347 #define seed   (G.evaluate__seed)
2348 #define sreg   (G.evaluate__sreg)
2349
2350         var *v1;
2351
2352         if (!op)
2353                 return setvar_s(res, NULL);
2354
2355         debug_printf_eval("entered %s()\n", __func__);
2356
2357         v1 = nvalloc(2);
2358
2359         while (op) {
2360                 struct {
2361                         var *v;
2362                         const char *s;
2363                 } L = L; /* for compiler */
2364                 struct {
2365                         var *v;
2366                         const char *s;
2367                 } R = R;
2368                 double L_d = L_d;
2369                 uint32_t opinfo;
2370                 int opn;
2371                 node *op1;
2372
2373                 opinfo = op->info;
2374                 opn = (opinfo & OPNMASK);
2375                 g_lineno = op->lineno;
2376                 op1 = op->l.n;
2377                 debug_printf_eval("opinfo:%08x opn:%08x\n", opinfo, opn);
2378
2379                 /* execute inevitable things */
2380                 if (opinfo & OF_RES1)
2381                         L.v = evaluate(op1, v1);
2382                 if (opinfo & OF_RES2)
2383                         R.v = evaluate(op->r.n, v1+1);
2384                 if (opinfo & OF_STR1) {
2385                         L.s = getvar_s(L.v);
2386                         debug_printf_eval("L.s:'%s'\n", L.s);
2387                 }
2388                 if (opinfo & OF_STR2) {
2389                         R.s = getvar_s(R.v);
2390                         debug_printf_eval("R.s:'%s'\n", R.s);
2391                 }
2392                 if (opinfo & OF_NUM1) {
2393                         L_d = getvar_i(L.v);
2394                         debug_printf_eval("L_d:%f\n", L_d);
2395                 }
2396
2397                 debug_printf_eval("switch(0x%x)\n", XC(opinfo & OPCLSMASK));
2398                 switch (XC(opinfo & OPCLSMASK)) {
2399
2400                 /* -- iterative node type -- */
2401
2402                 /* test pattern */
2403                 case XC( OC_TEST ):
2404                         if ((op1->info & OPCLSMASK) == OC_COMMA) {
2405                                 /* it's range pattern */
2406                                 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2407                                         op->info |= OF_CHECKED;
2408                                         if (ptest(op1->r.n))
2409                                                 op->info &= ~OF_CHECKED;
2410                                         op = op->a.n;
2411                                 } else {
2412                                         op = op->r.n;
2413                                 }
2414                         } else {
2415                                 op = ptest(op1) ? op->a.n : op->r.n;
2416                         }
2417                         break;
2418
2419                 /* just evaluate an expression, also used as unconditional jump */
2420                 case XC( OC_EXEC ):
2421                         break;
2422
2423                 /* branch, used in if-else and various loops */
2424                 case XC( OC_BR ):
2425                         op = istrue(L.v) ? op->a.n : op->r.n;
2426                         break;
2427
2428                 /* initialize for-in loop */
2429                 case XC( OC_WALKINIT ):
2430                         hashwalk_init(L.v, iamarray(R.v));
2431                         break;
2432
2433                 /* get next array item */
2434                 case XC( OC_WALKNEXT ):
2435                         op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2436                         break;
2437
2438                 case XC( OC_PRINT ):
2439                 case XC( OC_PRINTF ): {
2440                         FILE *F = stdout;
2441
2442                         if (op->r.n) {
2443                                 rstream *rsm = newfile(R.s);
2444                                 if (!rsm->F) {
2445                                         if (opn == '|') {
2446                                                 rsm->F = popen(R.s, "w");
2447                                                 if (rsm->F == NULL)
2448                                                         bb_perror_msg_and_die("popen");
2449                                                 rsm->is_pipe = 1;
2450                                         } else {
2451                                                 rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2452                                         }
2453                                 }
2454                                 F = rsm->F;
2455                         }
2456
2457                         if ((opinfo & OPCLSMASK) == OC_PRINT) {
2458                                 if (!op1) {
2459                                         fputs(getvar_s(intvar[F0]), F);
2460                                 } else {
2461                                         while (op1) {
2462                                                 var *v = evaluate(nextarg(&op1), v1);
2463                                                 if (v->type & VF_NUMBER) {
2464                                                         fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2465                                                                         getvar_i(v), TRUE);
2466                                                         fputs(g_buf, F);
2467                                                 } else {
2468                                                         fputs(getvar_s(v), F);
2469                                                 }
2470
2471                                                 if (op1)
2472                                                         fputs(getvar_s(intvar[OFS]), F);
2473                                         }
2474                                 }
2475                                 fputs(getvar_s(intvar[ORS]), F);
2476
2477                         } else {        /* OC_PRINTF */
2478                                 char *s = awk_printf(op1);
2479                                 fputs(s, F);
2480                                 free(s);
2481                         }
2482                         fflush(F);
2483                         break;
2484                 }
2485
2486                 case XC( OC_DELETE ): {
2487                         uint32_t info = op1->info & OPCLSMASK;
2488                         var *v;
2489
2490                         if (info == OC_VAR) {
2491                                 v = op1->l.v;
2492                         } else if (info == OC_FNARG) {
2493                                 v = &fnargs[op1->l.aidx];
2494                         } else {
2495                                 syntax_error(EMSG_NOT_ARRAY);
2496                         }
2497
2498                         if (op1->r.n) {
2499                                 const char *s;
2500                                 clrvar(L.v);
2501                                 s = getvar_s(evaluate(op1->r.n, v1));
2502                                 hash_remove(iamarray(v), s);
2503                         } else {
2504                                 clear_array(iamarray(v));
2505                         }
2506                         break;
2507                 }
2508
2509                 case XC( OC_NEWSOURCE ):
2510                         g_progname = op->l.new_progname;
2511                         break;
2512
2513                 case XC( OC_RETURN ):
2514                         copyvar(res, L.v);
2515                         break;
2516
2517                 case XC( OC_NEXTFILE ):
2518                         nextfile = TRUE;
2519                 case XC( OC_NEXT ):
2520                         nextrec = TRUE;
2521                 case XC( OC_DONE ):
2522                         clrvar(res);
2523                         break;
2524
2525                 case XC( OC_EXIT ):
2526                         awk_exit(L_d);
2527
2528                 /* -- recursive node type -- */
2529
2530                 case XC( OC_VAR ):
2531                         L.v = op->l.v;
2532                         if (L.v == intvar[NF])
2533                                 split_f0();
2534                         goto v_cont;
2535
2536                 case XC( OC_FNARG ):
2537                         L.v = &fnargs[op->l.aidx];
2538  v_cont:
2539                         res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2540                         break;
2541
2542                 case XC( OC_IN ):
2543                         setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2544                         break;
2545
2546                 case XC( OC_REGEXP ):
2547                         op1 = op;
2548                         L.s = getvar_s(intvar[F0]);
2549                         goto re_cont;
2550
2551                 case XC( OC_MATCH ):
2552                         op1 = op->r.n;
2553  re_cont:
2554                         {
2555                                 regex_t *re = as_regex(op1, &sreg);
2556                                 int i = regexec(re, L.s, 0, NULL, 0);
2557                                 if (re == &sreg)
2558                                         regfree(re);
2559                                 setvar_i(res, (i == 0) ^ (opn == '!'));
2560                         }
2561                         break;
2562
2563                 case XC( OC_MOVE ):
2564                         debug_printf_eval("MOVE\n");
2565                         /* if source is a temporary string, jusk relink it to dest */
2566 //Disabled: if R.v is numeric but happens to have cached R.v->string,
2567 //then L.v ends up being a string, which is wrong
2568 //                      if (R.v == v1+1 && R.v->string) {
2569 //                              res = setvar_p(L.v, R.v->string);
2570 //                              R.v->string = NULL;
2571 //                      } else {
2572                                 res = copyvar(L.v, R.v);
2573 //                      }
2574                         break;
2575
2576                 case XC( OC_TERNARY ):
2577                         if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2578                                 syntax_error(EMSG_POSSIBLE_ERROR);
2579                         res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2580                         break;
2581
2582                 case XC( OC_FUNC ): {
2583                         var *vbeg, *v;
2584                         const char *sv_progname;
2585
2586                         if (!op->r.f->body.first)
2587                                 syntax_error(EMSG_UNDEF_FUNC);
2588
2589                         vbeg = v = nvalloc(op->r.f->nargs + 1);
2590                         while (op1) {
2591                                 var *arg = evaluate(nextarg(&op1), v1);
2592                                 copyvar(v, arg);
2593                                 v->type |= VF_CHILD;
2594                                 v->x.parent = arg;
2595                                 if (++v - vbeg >= op->r.f->nargs)
2596                                         break;
2597                         }
2598
2599                         v = fnargs;
2600                         fnargs = vbeg;
2601                         sv_progname = g_progname;
2602
2603                         res = evaluate(op->r.f->body.first, res);
2604
2605                         g_progname = sv_progname;
2606                         nvfree(fnargs);
2607                         fnargs = v;
2608
2609                         break;
2610                 }
2611
2612                 case XC( OC_GETLINE ):
2613                 case XC( OC_PGETLINE ): {
2614                         rstream *rsm;
2615                         int i;
2616
2617                         if (op1) {
2618                                 rsm = newfile(L.s);
2619                                 if (!rsm->F) {
2620                                         if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2621                                                 rsm->F = popen(L.s, "r");
2622                                                 rsm->is_pipe = TRUE;
2623                                         } else {
2624                                                 rsm->F = fopen_for_read(L.s);  /* not xfopen! */
2625                                         }
2626                                 }
2627                         } else {
2628                                 if (!iF)
2629                                         iF = next_input_file();
2630                                 rsm = iF;
2631                         }
2632
2633                         if (!rsm || !rsm->F) {
2634                                 setvar_i(intvar[ERRNO], errno);
2635                                 setvar_i(res, -1);
2636                                 break;
2637                         }
2638
2639                         if (!op->r.n)
2640                                 R.v = intvar[F0];
2641
2642                         i = awk_getline(rsm, R.v);
2643                         if (i > 0 && !op1) {
2644                                 incvar(intvar[FNR]);
2645                                 incvar(intvar[NR]);
2646                         }
2647                         setvar_i(res, i);
2648                         break;
2649                 }
2650
2651                 /* simple builtins */
2652                 case XC( OC_FBLTIN ): {
2653                         double R_d = R_d; /* for compiler */
2654
2655                         switch (opn) {
2656                         case F_in:
2657                                 R_d = (int)L_d;
2658                                 break;
2659
2660                         case F_rn:
2661                                 R_d = (double)rand() / (double)RAND_MAX;
2662                                 break;
2663
2664                         case F_co:
2665                                 if (ENABLE_FEATURE_AWK_LIBM) {
2666                                         R_d = cos(L_d);
2667                                         break;
2668                                 }
2669
2670                         case F_ex:
2671                                 if (ENABLE_FEATURE_AWK_LIBM) {
2672                                         R_d = exp(L_d);
2673                                         break;
2674                                 }
2675
2676                         case F_lg:
2677                                 if (ENABLE_FEATURE_AWK_LIBM) {
2678                                         R_d = log(L_d);
2679                                         break;
2680                                 }
2681
2682                         case F_si:
2683                                 if (ENABLE_FEATURE_AWK_LIBM) {
2684                                         R_d = sin(L_d);
2685                                         break;
2686                                 }
2687
2688                         case F_sq:
2689                                 if (ENABLE_FEATURE_AWK_LIBM) {
2690                                         R_d = sqrt(L_d);
2691                                         break;
2692                                 }
2693
2694                                 syntax_error(EMSG_NO_MATH);
2695                                 break;
2696
2697                         case F_sr:
2698                                 R_d = (double)seed;
2699                                 seed = op1 ? (unsigned)L_d : (unsigned)time(NULL);
2700                                 srand(seed);
2701                                 break;
2702
2703                         case F_ti:
2704                                 R_d = time(NULL);
2705                                 break;
2706
2707                         case F_le:
2708                                 if (!op1)
2709                                         L.s = getvar_s(intvar[F0]);
2710                                 R_d = strlen(L.s);
2711                                 break;
2712
2713                         case F_sy:
2714                                 fflush_all();
2715                                 R_d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2716                                                 ? (system(L.s) >> 8) : 0;
2717                                 break;
2718
2719                         case F_ff:
2720                                 if (!op1) {
2721                                         fflush(stdout);
2722                                 } else if (L.s && *L.s) {
2723                                         rstream *rsm = newfile(L.s);
2724                                         fflush(rsm->F);
2725                                 } else {
2726                                         fflush_all();
2727                                 }
2728                                 break;
2729
2730                         case F_cl: {
2731                                 rstream *rsm;
2732                                 int err = 0;
2733                                 rsm = (rstream *)hash_search(fdhash, L.s);
2734                                 debug_printf_eval("OC_FBLTIN F_cl rsm:%p\n", rsm);
2735                                 if (rsm) {
2736                                         debug_printf_eval("OC_FBLTIN F_cl "
2737                                                 "rsm->is_pipe:%d, ->F:%p\n",
2738                                                 rsm->is_pipe, rsm->F);
2739                                         /* Can be NULL if open failed. Example:
2740                                          * getline line <"doesnt_exist";
2741                                          * close("doesnt_exist"); <--- here rsm->F is NULL
2742                                          */
2743                                         if (rsm->F)
2744                                                 err = rsm->is_pipe ? pclose(rsm->F) : fclose(rsm->F);
2745                                         free(rsm->buffer);
2746                                         hash_remove(fdhash, L.s);
2747                                 }
2748                                 if (err)
2749                                         setvar_i(intvar[ERRNO], errno);
2750                                 R_d = (double)err;
2751                                 break;
2752                         }
2753                         } /* switch */
2754                         setvar_i(res, R_d);
2755                         break;
2756                 }
2757
2758                 case XC( OC_BUILTIN ):
2759                         res = exec_builtin(op, res);
2760                         break;
2761
2762                 case XC( OC_SPRINTF ):
2763                         setvar_p(res, awk_printf(op1));
2764                         break;
2765
2766                 case XC( OC_UNARY ): {
2767                         double Ld, R_d;
2768
2769                         Ld = R_d = getvar_i(R.v);
2770                         switch (opn) {
2771                         case 'P':
2772                                 Ld = ++R_d;
2773                                 goto r_op_change;
2774                         case 'p':
2775                                 R_d++;
2776                                 goto r_op_change;
2777                         case 'M':
2778                                 Ld = --R_d;
2779                                 goto r_op_change;
2780                         case 'm':
2781                                 R_d--;
2782  r_op_change:
2783                                 setvar_i(R.v, R_d);
2784                                 break;
2785                         case '!':
2786                                 Ld = !istrue(R.v);
2787                                 break;
2788                         case '-':
2789                                 Ld = -R_d;
2790                                 break;
2791                         }
2792                         setvar_i(res, Ld);
2793                         break;
2794                 }
2795
2796                 case XC( OC_FIELD ): {
2797                         int i = (int)getvar_i(R.v);
2798                         if (i == 0) {
2799                                 res = intvar[F0];
2800                         } else {
2801                                 split_f0();
2802                                 if (i > nfields)
2803                                         fsrealloc(i);
2804                                 res = &Fields[i - 1];
2805                         }
2806                         break;
2807                 }
2808
2809                 /* concatenation (" ") and index joining (",") */
2810                 case XC( OC_CONCAT ):
2811                 case XC( OC_COMMA ): {
2812                         const char *sep = "";
2813                         if ((opinfo & OPCLSMASK) == OC_COMMA)
2814                                 sep = getvar_s(intvar[SUBSEP]);
2815                         setvar_p(res, xasprintf("%s%s%s", L.s, sep, R.s));
2816                         break;
2817                 }
2818
2819                 case XC( OC_LAND ):
2820                         setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2821                         break;
2822
2823                 case XC( OC_LOR ):
2824                         setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2825                         break;
2826
2827                 case XC( OC_BINARY ):
2828                 case XC( OC_REPLACE ): {
2829                         double R_d = getvar_i(R.v);
2830                         debug_printf_eval("BINARY/REPLACE: R_d:%f opn:%c\n", R_d, opn);
2831                         switch (opn) {
2832                         case '+':
2833                                 L_d += R_d;
2834                                 break;
2835                         case '-':
2836                                 L_d -= R_d;
2837                                 break;
2838                         case '*':
2839                                 L_d *= R_d;
2840                                 break;
2841                         case '/':
2842                                 if (R_d == 0)
2843                                         syntax_error(EMSG_DIV_BY_ZERO);
2844                                 L_d /= R_d;
2845                                 break;
2846                         case '&':
2847                                 if (ENABLE_FEATURE_AWK_LIBM)
2848                                         L_d = pow(L_d, R_d);
2849                                 else
2850                                         syntax_error(EMSG_NO_MATH);
2851                                 break;
2852                         case '%':
2853                                 if (R_d == 0)
2854                                         syntax_error(EMSG_DIV_BY_ZERO);
2855                                 L_d -= (int)(L_d / R_d) * R_d;
2856                                 break;
2857                         }
2858                         debug_printf_eval("BINARY/REPLACE result:%f\n", L_d);
2859                         res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : L.v, L_d);
2860                         break;
2861                 }
2862
2863                 case XC( OC_COMPARE ): {
2864                         int i = i; /* for compiler */
2865                         double Ld;
2866
2867                         if (is_numeric(L.v) && is_numeric(R.v)) {
2868                                 Ld = getvar_i(L.v) - getvar_i(R.v);
2869                         } else {
2870                                 const char *l = getvar_s(L.v);
2871                                 const char *r = getvar_s(R.v);
2872                                 Ld = icase ? strcasecmp(l, r) : strcmp(l, r);
2873                         }
2874                         switch (opn & 0xfe) {
2875                         case 0:
2876                                 i = (Ld > 0);
2877                                 break;
2878                         case 2:
2879                                 i = (Ld >= 0);
2880                                 break;
2881                         case 4:
2882                                 i = (Ld == 0);
2883                                 break;
2884                         }
2885                         setvar_i(res, (i == 0) ^ (opn & 1));
2886                         break;
2887                 }
2888
2889                 default:
2890                         syntax_error(EMSG_POSSIBLE_ERROR);
2891                 }
2892                 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2893                         op = op->a.n;
2894                 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2895                         break;
2896                 if (nextrec)
2897                         break;
2898         } /* while (op) */
2899
2900         nvfree(v1);
2901         debug_printf_eval("returning from %s(): %p\n", __func__, res);
2902         return res;
2903 #undef fnargs
2904 #undef seed
2905 #undef sreg
2906 }
2907
2908
2909 /* -------- main & co. -------- */
2910
2911 static int awk_exit(int r)
2912 {
2913         var tv;
2914         unsigned i;
2915         hash_item *hi;
2916
2917         zero_out_var(&tv);
2918
2919         if (!exiting) {
2920                 exiting = TRUE;
2921                 nextrec = FALSE;
2922                 evaluate(endseq.first, &tv);
2923         }
2924
2925         /* waiting for children */
2926         for (i = 0; i < fdhash->csize; i++) {
2927                 hi = fdhash->items[i];
2928                 while (hi) {
2929                         if (hi->data.rs.F && hi->data.rs.is_pipe)
2930                                 pclose(hi->data.rs.F);
2931                         hi = hi->next;
2932                 }
2933         }
2934
2935         exit(r);
2936 }
2937
2938 /* if expr looks like "var=value", perform assignment and return 1,
2939  * otherwise return 0 */
2940 static int is_assignment(const char *expr)
2941 {
2942         char *exprc, *val, *s, *s1;
2943
2944         if (!isalnum_(*expr) || (val = strchr(expr, '=')) == NULL) {
2945                 return FALSE;
2946         }
2947
2948         exprc = xstrdup(expr);
2949         val = exprc + (val - expr);
2950         *val++ = '\0';
2951
2952         s = s1 = val;
2953         while ((*s1 = nextchar(&s)) != '\0')
2954                 s1++;
2955
2956         setvar_u(newvar(exprc), val);
2957         free(exprc);
2958         return TRUE;
2959 }
2960
2961 /* switch to next input file */
2962 static rstream *next_input_file(void)
2963 {
2964 #define rsm          (G.next_input_file__rsm)
2965 #define files_happen (G.next_input_file__files_happen)
2966
2967         FILE *F;
2968         const char *fname, *ind;
2969
2970         if (rsm.F)
2971                 fclose(rsm.F);
2972         rsm.F = NULL;
2973         rsm.pos = rsm.adv = 0;
2974
2975         for (;;) {
2976                 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
2977                         if (files_happen)
2978                                 return NULL;
2979                         fname = "-";
2980                         F = stdin;
2981                         break;
2982                 }
2983                 ind = getvar_s(incvar(intvar[ARGIND]));
2984                 fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
2985                 if (fname && *fname && !is_assignment(fname)) {
2986                         F = xfopen_stdin(fname);
2987                         break;
2988                 }
2989         }
2990
2991         files_happen = TRUE;
2992         setvar_s(intvar[FILENAME], fname);
2993         rsm.F = F;
2994         return &rsm;
2995 #undef rsm
2996 #undef files_happen
2997 }
2998
2999 int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
3000 int awk_main(int argc, char **argv)
3001 {
3002         unsigned opt;
3003         char *opt_F, *opt_W;
3004         llist_t *list_v = NULL;
3005         llist_t *list_f = NULL;
3006         int i, j;
3007         var *v;
3008         var tv;
3009         char **envp;
3010         char *vnames = (char *)vNames; /* cheat */
3011         char *vvalues = (char *)vValues;
3012
3013         INIT_G();
3014
3015         /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
3016          * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
3017         if (ENABLE_LOCALE_SUPPORT)
3018                 setlocale(LC_NUMERIC, "C");
3019
3020         zero_out_var(&tv);
3021
3022         /* allocate global buffer */
3023         g_buf = xmalloc(MAXVARFMT + 1);
3024
3025         vhash = hash_init();
3026         ahash = hash_init();
3027         fdhash = hash_init();
3028         fnhash = hash_init();
3029
3030         /* initialize variables */
3031         for (i = 0; *vnames; i++) {
3032                 intvar[i] = v = newvar(nextword(&vnames));
3033                 if (*vvalues != '\377')
3034                         setvar_s(v, nextword(&vvalues));
3035                 else
3036                         setvar_i(v, 0);
3037
3038                 if (*vnames == '*') {
3039                         v->type |= VF_SPECIAL;
3040                         vnames++;
3041                 }
3042         }
3043
3044         handle_special(intvar[FS]);
3045         handle_special(intvar[RS]);
3046
3047         newfile("/dev/stdin")->F = stdin;
3048         newfile("/dev/stdout")->F = stdout;
3049         newfile("/dev/stderr")->F = stderr;
3050
3051         /* Huh, people report that sometimes environ is NULL. Oh well. */
3052         if (environ) for (envp = environ; *envp; envp++) {
3053                 /* environ is writable, thus we don't strdup it needlessly */
3054                 char *s = *envp;
3055                 char *s1 = strchr(s, '=');
3056                 if (s1) {
3057                         *s1 = '\0';
3058                         /* Both findvar and setvar_u take const char*
3059                          * as 2nd arg -> environment is not trashed */
3060                         setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
3061                         *s1 = '=';
3062                 }
3063         }
3064         opt_complementary = "v::f::"; /* -v and -f can occur multiple times */
3065         opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, &opt_W);
3066         argv += optind;
3067         argc -= optind;
3068         if (opt & 0x1)
3069                 setvar_s(intvar[FS], opt_F); // -F
3070         while (list_v) { /* -v */
3071                 if (!is_assignment(llist_pop(&list_v)))
3072                         bb_show_usage();
3073         }
3074         if (list_f) { /* -f */
3075                 do {
3076                         char *s = NULL;
3077                         FILE *from_file;
3078
3079                         g_progname = llist_pop(&list_f);
3080                         from_file = xfopen_stdin(g_progname);
3081                         /* one byte is reserved for some trick in next_token */
3082                         for (i = j = 1; j > 0; i += j) {
3083                                 s = xrealloc(s, i + 4096);
3084                                 j = fread(s + i, 1, 4094, from_file);
3085                         }
3086                         s[i] = '\0';
3087                         fclose(from_file);
3088                         parse_program(s + 1);
3089                         free(s);
3090                 } while (list_f);
3091                 argc++;
3092         } else { // no -f: take program from 1st parameter
3093                 if (!argc)
3094                         bb_show_usage();
3095                 g_progname = "cmd. line";
3096                 parse_program(*argv++);
3097         }
3098         if (opt & 0x8) // -W
3099                 bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
3100
3101         /* fill in ARGV array */
3102         setvar_i(intvar[ARGC], argc);
3103         setari_u(intvar[ARGV], 0, "awk");
3104         i = 0;
3105         while (*argv)
3106                 setari_u(intvar[ARGV], ++i, *argv++);
3107
3108         evaluate(beginseq.first, &tv);
3109         if (!mainseq.first && !endseq.first)
3110                 awk_exit(EXIT_SUCCESS);
3111
3112         /* input file could already be opened in BEGIN block */
3113         if (!iF)
3114                 iF = next_input_file();
3115
3116         /* passing through input files */
3117         while (iF) {
3118                 nextfile = FALSE;
3119                 setvar_i(intvar[FNR], 0);
3120
3121                 while ((i = awk_getline(iF, intvar[F0])) > 0) {
3122                         nextrec = FALSE;
3123                         incvar(intvar[NR]);
3124                         incvar(intvar[FNR]);
3125                         evaluate(mainseq.first, &tv);
3126
3127                         if (nextfile)
3128                                 break;
3129                 }
3130
3131                 if (i < 0)
3132                         syntax_error(strerror(errno));
3133
3134                 iF = next_input_file();
3135         }
3136
3137         awk_exit(EXIT_SUCCESS);
3138         /*return 0;*/
3139 }