awk: fix a SEGV
[oweals/busybox.git] / editors / awk.c
1 /* vi: set sw=4 ts=4: */
2 /*
3  * awk implementation for busybox
4  *
5  * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
6  *
7  * Licensed under GPLv2 or later, see file LICENSE in this source tree.
8  */
9
10 //usage:#define awk_trivial_usage
11 //usage:       "[OPTIONS] [AWK_PROGRAM] [FILE]..."
12 //usage:#define awk_full_usage "\n\n"
13 //usage:       "        -v VAR=VAL      Set variable"
14 //usage:     "\n        -F SEP          Use SEP as field separator"
15 //usage:     "\n        -f FILE         Read program from FILE"
16
17 #include "libbb.h"
18 #include "xregex.h"
19 #include <math.h>
20
21 /* This is a NOEXEC applet. Be very careful! */
22
23
24 /* If you comment out one of these below, it will be #defined later
25  * to perform debug printfs to stderr: */
26 #define debug_printf_walker(...)  do {} while (0)
27 #define debug_printf_eval(...)  do {} while (0)
28 #define debug_printf_parse(...)  do {} while (0)
29
30 #ifndef debug_printf_walker
31 # define debug_printf_walker(...) (fprintf(stderr, __VA_ARGS__))
32 #endif
33 #ifndef debug_printf_eval
34 # define debug_printf_eval(...) (fprintf(stderr, __VA_ARGS__))
35 #endif
36 #ifndef debug_printf_parse
37 # define debug_printf_parse(...) (fprintf(stderr, __VA_ARGS__))
38 #endif
39
40
41
42 #define MAXVARFMT       240
43 #define MINNVBLOCK      64
44
45 /* variable flags */
46 #define VF_NUMBER       0x0001  /* 1 = primary type is number */
47 #define VF_ARRAY        0x0002  /* 1 = it's an array */
48
49 #define VF_CACHED       0x0100  /* 1 = num/str value has cached str/num eq */
50 #define VF_USER         0x0200  /* 1 = user input (may be numeric string) */
51 #define VF_SPECIAL      0x0400  /* 1 = requires extra handling when changed */
52 #define VF_WALK         0x0800  /* 1 = variable has alloc'd x.walker list */
53 #define VF_FSTR         0x1000  /* 1 = var::string points to fstring buffer */
54 #define VF_CHILD        0x2000  /* 1 = function arg; x.parent points to source */
55 #define VF_DIRTY        0x4000  /* 1 = variable was set explicitly */
56
57 /* these flags are static, don't change them when value is changed */
58 #define VF_DONTTOUCH    (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
59
60 typedef struct walker_list {
61         char *end;
62         char *cur;
63         struct walker_list *prev;
64         char wbuf[1];
65 } walker_list;
66
67 /* Variable */
68 typedef struct var_s {
69         unsigned type;            /* flags */
70         double number;
71         char *string;
72         union {
73                 int aidx;               /* func arg idx (for compilation stage) */
74                 struct xhash_s *array;  /* array ptr */
75                 struct var_s *parent;   /* for func args, ptr to actual parameter */
76                 walker_list *walker;    /* list of array elements (for..in) */
77         } x;
78 } var;
79
80 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
81 typedef struct chain_s {
82         struct node_s *first;
83         struct node_s *last;
84         const char *programname;
85 } chain;
86
87 /* Function */
88 typedef struct func_s {
89         unsigned nargs;
90         struct chain_s body;
91 } func;
92
93 /* I/O stream */
94 typedef struct rstream_s {
95         FILE *F;
96         char *buffer;
97         int adv;
98         int size;
99         int pos;
100         smallint is_pipe;
101 } rstream;
102
103 typedef struct hash_item_s {
104         union {
105                 struct var_s v;         /* variable/array hash */
106                 struct rstream_s rs;    /* redirect streams hash */
107                 struct func_s f;        /* functions hash */
108         } data;
109         struct hash_item_s *next;       /* next in chain */
110         char name[1];                   /* really it's longer */
111 } hash_item;
112
113 typedef struct xhash_s {
114         unsigned nel;           /* num of elements */
115         unsigned csize;         /* current hash size */
116         unsigned nprime;        /* next hash size in PRIMES[] */
117         unsigned glen;          /* summary length of item names */
118         struct hash_item_s **items;
119 } xhash;
120
121 /* Tree node */
122 typedef struct node_s {
123         uint32_t info;
124         unsigned lineno;
125         union {
126                 struct node_s *n;
127                 var *v;
128                 int aidx;
129                 char *new_progname;
130                 regex_t *re;
131         } l;
132         union {
133                 struct node_s *n;
134                 regex_t *ire;
135                 func *f;
136         } r;
137         union {
138                 struct node_s *n;
139         } a;
140 } node;
141
142 /* Block of temporary variables */
143 typedef struct nvblock_s {
144         int size;
145         var *pos;
146         struct nvblock_s *prev;
147         struct nvblock_s *next;
148         var nv[];
149 } nvblock;
150
151 typedef struct tsplitter_s {
152         node n;
153         regex_t re[2];
154 } tsplitter;
155
156 /* simple token classes */
157 /* Order and hex values are very important!!!  See next_token() */
158 #define TC_SEQSTART      1                              /* ( */
159 #define TC_SEQTERM      (1 << 1)                /* ) */
160 #define TC_REGEXP       (1 << 2)                /* /.../ */
161 #define TC_OUTRDR       (1 << 3)                /* | > >> */
162 #define TC_UOPPOST      (1 << 4)                /* unary postfix operator */
163 #define TC_UOPPRE1      (1 << 5)                /* unary prefix operator */
164 #define TC_BINOPX       (1 << 6)                /* two-opnd operator */
165 #define TC_IN           (1 << 7)
166 #define TC_COMMA        (1 << 8)
167 #define TC_PIPE         (1 << 9)                /* input redirection pipe */
168 #define TC_UOPPRE2      (1 << 10)               /* unary prefix operator */
169 #define TC_ARRTERM      (1 << 11)               /* ] */
170 #define TC_GRPSTART     (1 << 12)               /* { */
171 #define TC_GRPTERM      (1 << 13)               /* } */
172 #define TC_SEMICOL      (1 << 14)
173 #define TC_NEWLINE      (1 << 15)
174 #define TC_STATX        (1 << 16)               /* ctl statement (for, next...) */
175 #define TC_WHILE        (1 << 17)
176 #define TC_ELSE         (1 << 18)
177 #define TC_BUILTIN      (1 << 19)
178 #define TC_GETLINE      (1 << 20)
179 #define TC_FUNCDECL     (1 << 21)               /* `function' `func' */
180 #define TC_BEGIN        (1 << 22)
181 #define TC_END          (1 << 23)
182 #define TC_EOF          (1 << 24)
183 #define TC_VARIABLE     (1 << 25)
184 #define TC_ARRAY        (1 << 26)
185 #define TC_FUNCTION     (1 << 27)
186 #define TC_STRING       (1 << 28)
187 #define TC_NUMBER       (1 << 29)
188
189 #define TC_UOPPRE  (TC_UOPPRE1 | TC_UOPPRE2)
190
191 /* combined token classes */
192 #define TC_BINOP   (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
193 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
194 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
195                    | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
196
197 #define TC_STATEMNT (TC_STATX | TC_WHILE)
198 #define TC_OPTERM  (TC_SEMICOL | TC_NEWLINE)
199
200 /* word tokens, cannot mean something else if not expected */
201 #define TC_WORD    (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
202                    | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
203
204 /* discard newlines after these */
205 #define TC_NOTERM  (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
206                    | TC_BINOP | TC_OPTERM)
207
208 /* what can expression begin with */
209 #define TC_OPSEQ   (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
210 /* what can group begin with */
211 #define TC_GRPSEQ  (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
212
213 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
214 /* operator is inserted between them */
215 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
216                    | TC_STRING | TC_NUMBER | TC_UOPPOST)
217 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
218
219 #define OF_RES1    0x010000
220 #define OF_RES2    0x020000
221 #define OF_STR1    0x040000
222 #define OF_STR2    0x080000
223 #define OF_NUM1    0x100000
224 #define OF_CHECKED 0x200000
225
226 /* combined operator flags */
227 #define xx      0
228 #define xV      OF_RES2
229 #define xS      (OF_RES2 | OF_STR2)
230 #define Vx      OF_RES1
231 #define VV      (OF_RES1 | OF_RES2)
232 #define Nx      (OF_RES1 | OF_NUM1)
233 #define NV      (OF_RES1 | OF_NUM1 | OF_RES2)
234 #define Sx      (OF_RES1 | OF_STR1)
235 #define SV      (OF_RES1 | OF_STR1 | OF_RES2)
236 #define SS      (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
237
238 #define OPCLSMASK 0xFF00
239 #define OPNMASK   0x007F
240
241 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
242  * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
243  * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
244  */
245 #undef P
246 #undef PRIMASK
247 #undef PRIMASK2
248 #define P(x)      (x << 24)
249 #define PRIMASK   0x7F000000
250 #define PRIMASK2  0x7E000000
251
252 /* Operation classes */
253
254 #define SHIFT_TIL_THIS  0x0600
255 #define RECUR_FROM_THIS 0x1000
256
257 enum {
258         OC_DELETE = 0x0100,     OC_EXEC = 0x0200,       OC_NEWSOURCE = 0x0300,
259         OC_PRINT = 0x0400,      OC_PRINTF = 0x0500,     OC_WALKINIT = 0x0600,
260
261         OC_BR = 0x0700,         OC_BREAK = 0x0800,      OC_CONTINUE = 0x0900,
262         OC_EXIT = 0x0a00,       OC_NEXT = 0x0b00,       OC_NEXTFILE = 0x0c00,
263         OC_TEST = 0x0d00,       OC_WALKNEXT = 0x0e00,
264
265         OC_BINARY = 0x1000,     OC_BUILTIN = 0x1100,    OC_COLON = 0x1200,
266         OC_COMMA = 0x1300,      OC_COMPARE = 0x1400,    OC_CONCAT = 0x1500,
267         OC_FBLTIN = 0x1600,     OC_FIELD = 0x1700,      OC_FNARG = 0x1800,
268         OC_FUNC = 0x1900,       OC_GETLINE = 0x1a00,    OC_IN = 0x1b00,
269         OC_LAND = 0x1c00,       OC_LOR = 0x1d00,        OC_MATCH = 0x1e00,
270         OC_MOVE = 0x1f00,       OC_PGETLINE = 0x2000,   OC_REGEXP = 0x2100,
271         OC_REPLACE = 0x2200,    OC_RETURN = 0x2300,     OC_SPRINTF = 0x2400,
272         OC_TERNARY = 0x2500,    OC_UNARY = 0x2600,      OC_VAR = 0x2700,
273         OC_DONE = 0x2800,
274
275         ST_IF = 0x3000,         ST_DO = 0x3100,         ST_FOR = 0x3200,
276         ST_WHILE = 0x3300
277 };
278
279 /* simple builtins */
280 enum {
281         F_in,   F_rn,   F_co,   F_ex,   F_lg,   F_si,   F_sq,   F_sr,
282         F_ti,   F_le,   F_sy,   F_ff,   F_cl
283 };
284
285 /* builtins */
286 enum {
287         B_a2,   B_ix,   B_ma,   B_sp,   B_ss,   B_ti,   B_mt,   B_lo,   B_up,
288         B_ge,   B_gs,   B_su,
289         B_an,   B_co,   B_ls,   B_or,   B_rs,   B_xo,
290 };
291
292 /* tokens and their corresponding info values */
293
294 #define NTC     "\377"  /* switch to next token class (tc<<1) */
295 #define NTCC    '\377'
296
297 #define OC_B  OC_BUILTIN
298
299 static const char tokenlist[] ALIGN1 =
300         "\1("         NTC
301         "\1)"         NTC
302         "\1/"         NTC                                   /* REGEXP */
303         "\2>>"        "\1>"         "\1|"       NTC         /* OUTRDR */
304         "\2++"        "\2--"        NTC                     /* UOPPOST */
305         "\2++"        "\2--"        "\1$"       NTC         /* UOPPRE1 */
306         "\2=="        "\1="         "\2+="      "\2-="      /* BINOPX */
307         "\2*="        "\2/="        "\2%="      "\2^="
308         "\1+"         "\1-"         "\3**="     "\2**"
309         "\1/"         "\1%"         "\1^"       "\1*"
310         "\2!="        "\2>="        "\2<="      "\1>"
311         "\1<"         "\2!~"        "\1~"       "\2&&"
312         "\2||"        "\1?"         "\1:"       NTC
313         "\2in"        NTC
314         "\1,"         NTC
315         "\1|"         NTC
316         "\1+"         "\1-"         "\1!"       NTC         /* UOPPRE2 */
317         "\1]"         NTC
318         "\1{"         NTC
319         "\1}"         NTC
320         "\1;"         NTC
321         "\1\n"        NTC
322         "\2if"        "\2do"        "\3for"     "\5break"   /* STATX */
323         "\10continue" "\6delete"    "\5print"
324         "\6printf"    "\4next"      "\10nextfile"
325         "\6return"    "\4exit"      NTC
326         "\5while"     NTC
327         "\4else"      NTC
328
329         "\3and"       "\5compl"     "\6lshift"  "\2or"
330         "\6rshift"    "\3xor"
331         "\5close"     "\6system"    "\6fflush"  "\5atan2"   /* BUILTIN */
332         "\3cos"       "\3exp"       "\3int"     "\3log"
333         "\4rand"      "\3sin"       "\4sqrt"    "\5srand"
334         "\6gensub"    "\4gsub"      "\5index"   "\6length"
335         "\5match"     "\5split"     "\7sprintf" "\3sub"
336         "\6substr"    "\7systime"   "\10strftime" "\6mktime"
337         "\7tolower"   "\7toupper"   NTC
338         "\7getline"   NTC
339         "\4func"      "\10function" NTC
340         "\5BEGIN"     NTC
341         "\3END"
342         /* compiler adds trailing "\0" */
343         ;
344
345 static const uint32_t tokeninfo[] = {
346         0,
347         0,
348         OC_REGEXP,
349         xS|'a',                  xS|'w',                  xS|'|',
350         OC_UNARY|xV|P(9)|'p',    OC_UNARY|xV|P(9)|'m',
351         OC_UNARY|xV|P(9)|'P',    OC_UNARY|xV|P(9)|'M',    OC_FIELD|xV|P(5),
352         OC_COMPARE|VV|P(39)|5,   OC_MOVE|VV|P(74),        OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
353         OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/', OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
354         OC_BINARY|NV|P(29)|'+',  OC_BINARY|NV|P(29)|'-',  OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
355         OC_BINARY|NV|P(25)|'/',  OC_BINARY|NV|P(25)|'%',  OC_BINARY|NV|P(15)|'&',  OC_BINARY|NV|P(25)|'*',
356         OC_COMPARE|VV|P(39)|4,   OC_COMPARE|VV|P(39)|3,   OC_COMPARE|VV|P(39)|0,   OC_COMPARE|VV|P(39)|1,
357         OC_COMPARE|VV|P(39)|2,   OC_MATCH|Sx|P(45)|'!',   OC_MATCH|Sx|P(45)|'~',   OC_LAND|Vx|P(55),
358         OC_LOR|Vx|P(59),         OC_TERNARY|Vx|P(64)|'?', OC_COLON|xx|P(67)|':',
359         OC_IN|SV|P(49), /* in */
360         OC_COMMA|SS|P(80),
361         OC_PGETLINE|SV|P(37),
362         OC_UNARY|xV|P(19)|'+',   OC_UNARY|xV|P(19)|'-',   OC_UNARY|xV|P(19)|'!',
363         0, /* ] */
364         0,
365         0,
366         0,
367         0, /* \n */
368         ST_IF,        ST_DO,        ST_FOR,      OC_BREAK,
369         OC_CONTINUE,  OC_DELETE|Vx, OC_PRINT,
370         OC_PRINTF,    OC_NEXT,      OC_NEXTFILE,
371         OC_RETURN|Vx, OC_EXIT|Nx,
372         ST_WHILE,
373         0, /* else */
374
375         OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
376         OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
377         OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
378         OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
379         OC_FBLTIN|F_rn,    OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
380         OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
381         OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF,        OC_B|B_su|P(0xb6),
382         OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti,    OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b),
383         OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
384         OC_GETLINE|SV|P(0),
385         0,                 0,
386         0,
387         0 /* END */
388 };
389
390 /* internal variable names and their initial values       */
391 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
392 enum {
393         CONVFMT,    OFMT,       FS,         OFS,
394         ORS,        RS,         RT,         FILENAME,
395         SUBSEP,     F0,         ARGIND,     ARGC,
396         ARGV,       ERRNO,      FNR,        NR,
397         NF,         IGNORECASE, ENVIRON,    NUM_INTERNAL_VARS
398 };
399
400 static const char vNames[] ALIGN1 =
401         "CONVFMT\0" "OFMT\0"    "FS\0*"     "OFS\0"
402         "ORS\0"     "RS\0*"     "RT\0"      "FILENAME\0"
403         "SUBSEP\0"  "$\0*"      "ARGIND\0"  "ARGC\0"
404         "ARGV\0"    "ERRNO\0"   "FNR\0"     "NR\0"
405         "NF\0*"     "IGNORECASE\0*" "ENVIRON\0" "\0";
406
407 static const char vValues[] ALIGN1 =
408         "%.6g\0"    "%.6g\0"    " \0"       " \0"
409         "\n\0"      "\n\0"      "\0"        "\0"
410         "\034\0"    "\0"        "\377";
411
412 /* hash size may grow to these values */
413 #define FIRST_PRIME 61
414 static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
415
416
417 /* Globals. Split in two parts so that first one is addressed
418  * with (mostly short) negative offsets.
419  * NB: it's unsafe to put members of type "double"
420  * into globals2 (gcc may fail to align them).
421  */
422 struct globals {
423         double t_double;
424         chain beginseq, mainseq, endseq;
425         chain *seq;
426         node *break_ptr, *continue_ptr;
427         rstream *iF;
428         xhash *vhash, *ahash, *fdhash, *fnhash;
429         const char *g_progname;
430         int g_lineno;
431         int nfields;
432         int maxfields; /* used in fsrealloc() only */
433         var *Fields;
434         nvblock *g_cb;
435         char *g_pos;
436         char *g_buf;
437         smallint icase;
438         smallint exiting;
439         smallint nextrec;
440         smallint nextfile;
441         smallint is_f0_split;
442         smallint t_rollback;
443 };
444 struct globals2 {
445         uint32_t t_info; /* often used */
446         uint32_t t_tclass;
447         char *t_string;
448         int t_lineno;
449
450         var *intvar[NUM_INTERNAL_VARS]; /* often used */
451
452         /* former statics from various functions */
453         char *split_f0__fstrings;
454
455         uint32_t next_token__save_tclass;
456         uint32_t next_token__save_info;
457         uint32_t next_token__ltclass;
458         smallint next_token__concat_inserted;
459
460         smallint next_input_file__files_happen;
461         rstream next_input_file__rsm;
462
463         var *evaluate__fnargs;
464         unsigned evaluate__seed;
465         regex_t evaluate__sreg;
466
467         var ptest__v;
468
469         tsplitter exec_builtin__tspl;
470
471         /* biggest and least used members go last */
472         tsplitter fsplitter, rsplitter;
473 };
474 #define G1 (ptr_to_globals[-1])
475 #define G (*(struct globals2 *)ptr_to_globals)
476 /* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
477 /*char G1size[sizeof(G1)]; - 0x74 */
478 /*char Gsize[sizeof(G)]; - 0x1c4 */
479 /* Trying to keep most of members accessible with short offsets: */
480 /*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
481 #define t_double     (G1.t_double    )
482 #define beginseq     (G1.beginseq    )
483 #define mainseq      (G1.mainseq     )
484 #define endseq       (G1.endseq      )
485 #define seq          (G1.seq         )
486 #define break_ptr    (G1.break_ptr   )
487 #define continue_ptr (G1.continue_ptr)
488 #define iF           (G1.iF          )
489 #define vhash        (G1.vhash       )
490 #define ahash        (G1.ahash       )
491 #define fdhash       (G1.fdhash      )
492 #define fnhash       (G1.fnhash      )
493 #define g_progname   (G1.g_progname  )
494 #define g_lineno     (G1.g_lineno    )
495 #define nfields      (G1.nfields     )
496 #define maxfields    (G1.maxfields   )
497 #define Fields       (G1.Fields      )
498 #define g_cb         (G1.g_cb        )
499 #define g_pos        (G1.g_pos       )
500 #define g_buf        (G1.g_buf       )
501 #define icase        (G1.icase       )
502 #define exiting      (G1.exiting     )
503 #define nextrec      (G1.nextrec     )
504 #define nextfile     (G1.nextfile    )
505 #define is_f0_split  (G1.is_f0_split )
506 #define t_rollback   (G1.t_rollback  )
507 #define t_info       (G.t_info      )
508 #define t_tclass     (G.t_tclass    )
509 #define t_string     (G.t_string    )
510 #define t_lineno     (G.t_lineno    )
511 #define intvar       (G.intvar      )
512 #define fsplitter    (G.fsplitter   )
513 #define rsplitter    (G.rsplitter   )
514 #define INIT_G() do { \
515         SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
516         G.next_token__ltclass = TC_OPTERM; \
517         G.evaluate__seed = 1; \
518 } while (0)
519
520
521 /* function prototypes */
522 static void handle_special(var *);
523 static node *parse_expr(uint32_t);
524 static void chain_group(void);
525 static var *evaluate(node *, var *);
526 static rstream *next_input_file(void);
527 static int fmt_num(char *, int, const char *, double, int);
528 static int awk_exit(int) NORETURN;
529
530 /* ---- error handling ---- */
531
532 static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
533 static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
534 static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
535 static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
536 static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
537 static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin";
538 static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
539 static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
540 static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
541 static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
542
543 static void zero_out_var(var *vp)
544 {
545         memset(vp, 0, sizeof(*vp));
546 }
547
548 static void syntax_error(const char *message) NORETURN;
549 static void syntax_error(const char *message)
550 {
551         bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
552 }
553
554 /* ---- hash stuff ---- */
555
556 static unsigned hashidx(const char *name)
557 {
558         unsigned idx = 0;
559
560         while (*name)
561                 idx = *name++ + (idx << 6) - idx;
562         return idx;
563 }
564
565 /* create new hash */
566 static xhash *hash_init(void)
567 {
568         xhash *newhash;
569
570         newhash = xzalloc(sizeof(*newhash));
571         newhash->csize = FIRST_PRIME;
572         newhash->items = xzalloc(FIRST_PRIME * sizeof(newhash->items[0]));
573
574         return newhash;
575 }
576
577 /* find item in hash, return ptr to data, NULL if not found */
578 static void *hash_search(xhash *hash, const char *name)
579 {
580         hash_item *hi;
581
582         hi = hash->items[hashidx(name) % hash->csize];
583         while (hi) {
584                 if (strcmp(hi->name, name) == 0)
585                         return &hi->data;
586                 hi = hi->next;
587         }
588         return NULL;
589 }
590
591 /* grow hash if it becomes too big */
592 static void hash_rebuild(xhash *hash)
593 {
594         unsigned newsize, i, idx;
595         hash_item **newitems, *hi, *thi;
596
597         if (hash->nprime == ARRAY_SIZE(PRIMES))
598                 return;
599
600         newsize = PRIMES[hash->nprime++];
601         newitems = xzalloc(newsize * sizeof(newitems[0]));
602
603         for (i = 0; i < hash->csize; i++) {
604                 hi = hash->items[i];
605                 while (hi) {
606                         thi = hi;
607                         hi = thi->next;
608                         idx = hashidx(thi->name) % newsize;
609                         thi->next = newitems[idx];
610                         newitems[idx] = thi;
611                 }
612         }
613
614         free(hash->items);
615         hash->csize = newsize;
616         hash->items = newitems;
617 }
618
619 /* find item in hash, add it if necessary. Return ptr to data */
620 static void *hash_find(xhash *hash, const char *name)
621 {
622         hash_item *hi;
623         unsigned idx;
624         int l;
625
626         hi = hash_search(hash, name);
627         if (!hi) {
628                 if (++hash->nel / hash->csize > 10)
629                         hash_rebuild(hash);
630
631                 l = strlen(name) + 1;
632                 hi = xzalloc(sizeof(*hi) + l);
633                 strcpy(hi->name, name);
634
635                 idx = hashidx(name) % hash->csize;
636                 hi->next = hash->items[idx];
637                 hash->items[idx] = hi;
638                 hash->glen += l;
639         }
640         return &hi->data;
641 }
642
643 #define findvar(hash, name) ((var*)    hash_find((hash), (name)))
644 #define newvar(name)        ((var*)    hash_find(vhash, (name)))
645 #define newfile(name)       ((rstream*)hash_find(fdhash, (name)))
646 #define newfunc(name)       ((func*)   hash_find(fnhash, (name)))
647
648 static void hash_remove(xhash *hash, const char *name)
649 {
650         hash_item *hi, **phi;
651
652         phi = &hash->items[hashidx(name) % hash->csize];
653         while (*phi) {
654                 hi = *phi;
655                 if (strcmp(hi->name, name) == 0) {
656                         hash->glen -= (strlen(name) + 1);
657                         hash->nel--;
658                         *phi = hi->next;
659                         free(hi);
660                         break;
661                 }
662                 phi = &hi->next;
663         }
664 }
665
666 /* ------ some useful functions ------ */
667
668 static char *skip_spaces(char *p)
669 {
670         while (1) {
671                 if (*p == '\\' && p[1] == '\n') {
672                         p++;
673                         t_lineno++;
674                 } else if (*p != ' ' && *p != '\t') {
675                         break;
676                 }
677                 p++;
678         }
679         return p;
680 }
681
682 /* returns old *s, advances *s past word and terminating NUL */
683 static char *nextword(char **s)
684 {
685         char *p = *s;
686         while (*(*s)++ != '\0')
687                 continue;
688         return p;
689 }
690
691 static char nextchar(char **s)
692 {
693         char c, *pps;
694
695         c = *(*s)++;
696         pps = *s;
697         if (c == '\\')
698                 c = bb_process_escape_sequence((const char**)s);
699         if (c == '\\' && *s == pps) { /* unrecognized \z? */
700                 c = *(*s); /* yes, fetch z */
701                 if (c)
702                         (*s)++; /* advance unless z = NUL */
703         }
704         return c;
705 }
706
707 static ALWAYS_INLINE int isalnum_(int c)
708 {
709         return (isalnum(c) || c == '_');
710 }
711
712 static double my_strtod(char **pp)
713 {
714         char *cp = *pp;
715         if (ENABLE_DESKTOP && cp[0] == '0') {
716                 /* Might be hex or octal integer: 0x123abc or 07777 */
717                 char c = (cp[1] | 0x20);
718                 if (c == 'x' || isdigit(cp[1])) {
719                         unsigned long long ull = strtoull(cp, pp, 0);
720                         if (c == 'x')
721                                 return ull;
722                         c = **pp;
723                         if (!isdigit(c) && c != '.')
724                                 return ull;
725                         /* else: it may be a floating number. Examples:
726                          * 009.123 (*pp points to '9')
727                          * 000.123 (*pp points to '.')
728                          * fall through to strtod.
729                          */
730                 }
731         }
732         return strtod(cp, pp);
733 }
734
735 /* -------- working with variables (set/get/copy/etc) -------- */
736
737 static xhash *iamarray(var *v)
738 {
739         var *a = v;
740
741         while (a->type & VF_CHILD)
742                 a = a->x.parent;
743
744         if (!(a->type & VF_ARRAY)) {
745                 a->type |= VF_ARRAY;
746                 a->x.array = hash_init();
747         }
748         return a->x.array;
749 }
750
751 static void clear_array(xhash *array)
752 {
753         unsigned i;
754         hash_item *hi, *thi;
755
756         for (i = 0; i < array->csize; i++) {
757                 hi = array->items[i];
758                 while (hi) {
759                         thi = hi;
760                         hi = hi->next;
761                         free(thi->data.v.string);
762                         free(thi);
763                 }
764                 array->items[i] = NULL;
765         }
766         array->glen = array->nel = 0;
767 }
768
769 /* clear a variable */
770 static var *clrvar(var *v)
771 {
772         if (!(v->type & VF_FSTR))
773                 free(v->string);
774
775         v->type &= VF_DONTTOUCH;
776         v->type |= VF_DIRTY;
777         v->string = NULL;
778         return v;
779 }
780
781 /* assign string value to variable */
782 static var *setvar_p(var *v, char *value)
783 {
784         clrvar(v);
785         v->string = value;
786         handle_special(v);
787         return v;
788 }
789
790 /* same as setvar_p but make a copy of string */
791 static var *setvar_s(var *v, const char *value)
792 {
793         return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
794 }
795
796 /* same as setvar_s but sets USER flag */
797 static var *setvar_u(var *v, const char *value)
798 {
799         v = setvar_s(v, value);
800         v->type |= VF_USER;
801         return v;
802 }
803
804 /* set array element to user string */
805 static void setari_u(var *a, int idx, const char *s)
806 {
807         var *v;
808
809         v = findvar(iamarray(a), itoa(idx));
810         setvar_u(v, s);
811 }
812
813 /* assign numeric value to variable */
814 static var *setvar_i(var *v, double value)
815 {
816         clrvar(v);
817         v->type |= VF_NUMBER;
818         v->number = value;
819         handle_special(v);
820         return v;
821 }
822
823 static const char *getvar_s(var *v)
824 {
825         /* if v is numeric and has no cached string, convert it to string */
826         if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
827                 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
828                 v->string = xstrdup(g_buf);
829                 v->type |= VF_CACHED;
830         }
831         return (v->string == NULL) ? "" : v->string;
832 }
833
834 static double getvar_i(var *v)
835 {
836         char *s;
837
838         if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
839                 v->number = 0;
840                 s = v->string;
841                 if (s && *s) {
842                         debug_printf_eval("getvar_i: '%s'->", s);
843                         v->number = my_strtod(&s);
844                         debug_printf_eval("%f (s:'%s')\n", v->number, s);
845                         if (v->type & VF_USER) {
846                                 s = skip_spaces(s);
847                                 if (*s != '\0')
848                                         v->type &= ~VF_USER;
849                         }
850                 } else {
851                         debug_printf_eval("getvar_i: '%s'->zero\n", s);
852                         v->type &= ~VF_USER;
853                 }
854                 v->type |= VF_CACHED;
855         }
856         debug_printf_eval("getvar_i: %f\n", v->number);
857         return v->number;
858 }
859
860 /* Used for operands of bitwise ops */
861 static unsigned long getvar_i_int(var *v)
862 {
863         double d = getvar_i(v);
864
865         /* Casting doubles to longs is undefined for values outside
866          * of target type range. Try to widen it as much as possible */
867         if (d >= 0)
868                 return (unsigned long)d;
869         /* Why? Think about d == -4294967295.0 (assuming 32bit longs) */
870         return - (long) (unsigned long) (-d);
871 }
872
873 static var *copyvar(var *dest, const var *src)
874 {
875         if (dest != src) {
876                 clrvar(dest);
877                 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
878                 debug_printf_eval("copyvar: number:%f string:'%s'\n", src->number, src->string);
879                 dest->number = src->number;
880                 if (src->string)
881                         dest->string = xstrdup(src->string);
882         }
883         handle_special(dest);
884         return dest;
885 }
886
887 static var *incvar(var *v)
888 {
889         return setvar_i(v, getvar_i(v) + 1.0);
890 }
891
892 /* return true if v is number or numeric string */
893 static int is_numeric(var *v)
894 {
895         getvar_i(v);
896         return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
897 }
898
899 /* return 1 when value of v corresponds to true, 0 otherwise */
900 static int istrue(var *v)
901 {
902         if (is_numeric(v))
903                 return (v->number != 0);
904         return (v->string && v->string[0]);
905 }
906
907 /* temporary variables allocator. Last allocated should be first freed */
908 static var *nvalloc(int n)
909 {
910         nvblock *pb = NULL;
911         var *v, *r;
912         int size;
913
914         while (g_cb) {
915                 pb = g_cb;
916                 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size)
917                         break;
918                 g_cb = g_cb->next;
919         }
920
921         if (!g_cb) {
922                 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
923                 g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
924                 g_cb->size = size;
925                 g_cb->pos = g_cb->nv;
926                 g_cb->prev = pb;
927                 /*g_cb->next = NULL; - xzalloc did it */
928                 if (pb)
929                         pb->next = g_cb;
930         }
931
932         v = r = g_cb->pos;
933         g_cb->pos += n;
934
935         while (v < g_cb->pos) {
936                 v->type = 0;
937                 v->string = NULL;
938                 v++;
939         }
940
941         return r;
942 }
943
944 static void nvfree(var *v)
945 {
946         var *p;
947
948         if (v < g_cb->nv || v >= g_cb->pos)
949                 syntax_error(EMSG_INTERNAL_ERROR);
950
951         for (p = v; p < g_cb->pos; p++) {
952                 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
953                         clear_array(iamarray(p));
954                         free(p->x.array->items);
955                         free(p->x.array);
956                 }
957                 if (p->type & VF_WALK) {
958                         walker_list *n;
959                         walker_list *w = p->x.walker;
960                         debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker);
961                         p->x.walker = NULL;
962                         while (w) {
963                                 n = w->prev;
964                                 debug_printf_walker(" free(%p)\n", w);
965                                 free(w);
966                                 w = n;
967                         }
968                 }
969                 clrvar(p);
970         }
971
972         g_cb->pos = v;
973         while (g_cb->prev && g_cb->pos == g_cb->nv) {
974                 g_cb = g_cb->prev;
975         }
976 }
977
978 /* ------- awk program text parsing ------- */
979
980 /* Parse next token pointed by global pos, place results into global ttt.
981  * If token isn't expected, give away. Return token class
982  */
983 static uint32_t next_token(uint32_t expected)
984 {
985 #define concat_inserted (G.next_token__concat_inserted)
986 #define save_tclass     (G.next_token__save_tclass)
987 #define save_info       (G.next_token__save_info)
988 /* Initialized to TC_OPTERM: */
989 #define ltclass         (G.next_token__ltclass)
990
991         char *p, *s;
992         const char *tl;
993         uint32_t tc;
994         const uint32_t *ti;
995
996         if (t_rollback) {
997                 t_rollback = FALSE;
998
999         } else if (concat_inserted) {
1000                 concat_inserted = FALSE;
1001                 t_tclass = save_tclass;
1002                 t_info = save_info;
1003
1004         } else {
1005                 p = g_pos;
1006  readnext:
1007                 p = skip_spaces(p);
1008                 g_lineno = t_lineno;
1009                 if (*p == '#')
1010                         while (*p != '\n' && *p != '\0')
1011                                 p++;
1012
1013                 if (*p == '\n')
1014                         t_lineno++;
1015
1016                 if (*p == '\0') {
1017                         tc = TC_EOF;
1018                         debug_printf_parse("%s: token found: TC_EOF\n", __func__);
1019
1020                 } else if (*p == '\"') {
1021                         /* it's a string */
1022                         t_string = s = ++p;
1023                         while (*p != '\"') {
1024                                 char *pp;
1025                                 if (*p == '\0' || *p == '\n')
1026                                         syntax_error(EMSG_UNEXP_EOS);
1027                                 pp = p;
1028                                 *s++ = nextchar(&pp);
1029                                 p = pp;
1030                         }
1031                         p++;
1032                         *s = '\0';
1033                         tc = TC_STRING;
1034                         debug_printf_parse("%s: token found:'%s' TC_STRING\n", __func__, t_string);
1035
1036                 } else if ((expected & TC_REGEXP) && *p == '/') {
1037                         /* it's regexp */
1038                         t_string = s = ++p;
1039                         while (*p != '/') {
1040                                 if (*p == '\0' || *p == '\n')
1041                                         syntax_error(EMSG_UNEXP_EOS);
1042                                 *s = *p++;
1043                                 if (*s++ == '\\') {
1044                                         char *pp = p;
1045                                         s[-1] = bb_process_escape_sequence((const char **)&pp);
1046                                         if (*p == '\\')
1047                                                 *s++ = '\\';
1048                                         if (pp == p)
1049                                                 *s++ = *p++;
1050                                         else
1051                                                 p = pp;
1052                                 }
1053                         }
1054                         p++;
1055                         *s = '\0';
1056                         tc = TC_REGEXP;
1057                         debug_printf_parse("%s: token found:'%s' TC_REGEXP\n", __func__, t_string);
1058
1059                 } else if (*p == '.' || isdigit(*p)) {
1060                         /* it's a number */
1061                         char *pp = p;
1062                         t_double = my_strtod(&pp);
1063                         p = pp;
1064                         if (*p == '.')
1065                                 syntax_error(EMSG_UNEXP_TOKEN);
1066                         tc = TC_NUMBER;
1067                         debug_printf_parse("%s: token found:%f TC_NUMBER\n", __func__, t_double);
1068
1069                 } else {
1070                         /* search for something known */
1071                         tl = tokenlist;
1072                         tc = 0x00000001;
1073                         ti = tokeninfo;
1074                         while (*tl) {
1075                                 int l = (unsigned char) *tl++;
1076                                 if (l == (unsigned char) NTCC) {
1077                                         tc <<= 1;
1078                                         continue;
1079                                 }
1080                                 /* if token class is expected,
1081                                  * token matches,
1082                                  * and it's not a longer word,
1083                                  */
1084                                 if ((tc & (expected | TC_WORD | TC_NEWLINE))
1085                                  && strncmp(p, tl, l) == 0
1086                                  && !((tc & TC_WORD) && isalnum_(p[l]))
1087                                 ) {
1088                                         /* then this is what we are looking for */
1089                                         t_info = *ti;
1090                                         debug_printf_parse("%s: token found:'%.*s' t_info:%x\n", __func__, l, p, t_info);
1091                                         p += l;
1092                                         goto token_found;
1093                                 }
1094                                 ti++;
1095                                 tl += l;
1096                         }
1097                         /* not a known token */
1098
1099                         /* is it a name? (var/array/function) */
1100                         if (!isalnum_(*p))
1101                                 syntax_error(EMSG_UNEXP_TOKEN); /* no */
1102                         /* yes */
1103                         t_string = --p;
1104                         while (isalnum_(*++p)) {
1105                                 p[-1] = *p;
1106                         }
1107                         p[-1] = '\0';
1108                         tc = TC_VARIABLE;
1109                         /* also consume whitespace between functionname and bracket */
1110                         if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY))
1111                                 p = skip_spaces(p);
1112                         if (*p == '(') {
1113                                 tc = TC_FUNCTION;
1114                                 debug_printf_parse("%s: token found:'%s' TC_FUNCTION\n", __func__, t_string);
1115                         } else {
1116                                 if (*p == '[') {
1117                                         p++;
1118                                         tc = TC_ARRAY;
1119                                         debug_printf_parse("%s: token found:'%s' TC_ARRAY\n", __func__, t_string);
1120                                 } else
1121                                         debug_printf_parse("%s: token found:'%s' TC_VARIABLE\n", __func__, t_string);
1122                         }
1123                 }
1124  token_found:
1125                 g_pos = p;
1126
1127                 /* skipping newlines in some cases */
1128                 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1129                         goto readnext;
1130
1131                 /* insert concatenation operator when needed */
1132                 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1133                         concat_inserted = TRUE;
1134                         save_tclass = tc;
1135                         save_info = t_info;
1136                         tc = TC_BINOP;
1137                         t_info = OC_CONCAT | SS | P(35);
1138                 }
1139
1140                 t_tclass = tc;
1141         }
1142         ltclass = t_tclass;
1143
1144         /* Are we ready for this? */
1145         if (!(ltclass & expected))
1146                 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1147                                 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1148
1149         return ltclass;
1150 #undef concat_inserted
1151 #undef save_tclass
1152 #undef save_info
1153 #undef ltclass
1154 }
1155
1156 static void rollback_token(void)
1157 {
1158         t_rollback = TRUE;
1159 }
1160
1161 static node *new_node(uint32_t info)
1162 {
1163         node *n;
1164
1165         n = xzalloc(sizeof(node));
1166         n->info = info;
1167         n->lineno = g_lineno;
1168         return n;
1169 }
1170
1171 static void mk_re_node(const char *s, node *n, regex_t *re)
1172 {
1173         n->info = OC_REGEXP;
1174         n->l.re = re;
1175         n->r.ire = re + 1;
1176         xregcomp(re, s, REG_EXTENDED);
1177         xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1178 }
1179
1180 static node *condition(void)
1181 {
1182         next_token(TC_SEQSTART);
1183         return parse_expr(TC_SEQTERM);
1184 }
1185
1186 /* parse expression terminated by given argument, return ptr
1187  * to built subtree. Terminator is eaten by parse_expr */
1188 static node *parse_expr(uint32_t iexp)
1189 {
1190         node sn;
1191         node *cn = &sn;
1192         node *vn, *glptr;
1193         uint32_t tc, xtc;
1194         var *v;
1195
1196         debug_printf_parse("%s(%x)\n", __func__, iexp);
1197
1198         sn.info = PRIMASK;
1199         sn.r.n = glptr = NULL;
1200         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1201
1202         while (!((tc = next_token(xtc)) & iexp)) {
1203
1204                 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1205                         /* input redirection (<) attached to glptr node */
1206                         debug_printf_parse("%s: input redir\n", __func__);
1207                         cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1208                         cn->a.n = glptr;
1209                         xtc = TC_OPERAND | TC_UOPPRE;
1210                         glptr = NULL;
1211
1212                 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1213                         debug_printf_parse("%s: TC_BINOP | TC_UOPPOST\n", __func__);
1214                         /* for binary and postfix-unary operators, jump back over
1215                          * previous operators with higher priority */
1216                         vn = cn;
1217                         while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1218                             || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON))
1219                         ) {
1220                                 vn = vn->a.n;
1221                         }
1222                         if ((t_info & OPCLSMASK) == OC_TERNARY)
1223                                 t_info += P(6);
1224                         cn = vn->a.n->r.n = new_node(t_info);
1225                         cn->a.n = vn->a.n;
1226                         if (tc & TC_BINOP) {
1227                                 cn->l.n = vn;
1228                                 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1229                                 if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1230                                         /* it's a pipe */
1231                                         next_token(TC_GETLINE);
1232                                         /* give maximum priority to this pipe */
1233                                         cn->info &= ~PRIMASK;
1234                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1235                                 }
1236                         } else {
1237                                 cn->r.n = vn;
1238                                 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1239                         }
1240                         vn->a.n = cn;
1241
1242                 } else {
1243                         debug_printf_parse("%s: other\n", __func__);
1244                         /* for operands and prefix-unary operators, attach them
1245                          * to last node */
1246                         vn = cn;
1247                         cn = vn->r.n = new_node(t_info);
1248                         cn->a.n = vn;
1249                         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1250                         if (tc & (TC_OPERAND | TC_REGEXP)) {
1251                                 debug_printf_parse("%s: TC_OPERAND | TC_REGEXP\n", __func__);
1252                                 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1253                                 /* one should be very careful with switch on tclass -
1254                                  * only simple tclasses should be used! */
1255                                 switch (tc) {
1256                                 case TC_VARIABLE:
1257                                 case TC_ARRAY:
1258                                         debug_printf_parse("%s: TC_VARIABLE | TC_ARRAY\n", __func__);
1259                                         cn->info = OC_VAR;
1260                                         v = hash_search(ahash, t_string);
1261                                         if (v != NULL) {
1262                                                 cn->info = OC_FNARG;
1263                                                 cn->l.aidx = v->x.aidx;
1264                                         } else {
1265                                                 cn->l.v = newvar(t_string);
1266                                         }
1267                                         if (tc & TC_ARRAY) {
1268                                                 cn->info |= xS;
1269                                                 cn->r.n = parse_expr(TC_ARRTERM);
1270                                         }
1271                                         break;
1272
1273                                 case TC_NUMBER:
1274                                 case TC_STRING:
1275                                         debug_printf_parse("%s: TC_NUMBER | TC_STRING\n", __func__);
1276                                         cn->info = OC_VAR;
1277                                         v = cn->l.v = xzalloc(sizeof(var));
1278                                         if (tc & TC_NUMBER)
1279                                                 setvar_i(v, t_double);
1280                                         else
1281                                                 setvar_s(v, t_string);
1282                                         break;
1283
1284                                 case TC_REGEXP:
1285                                         debug_printf_parse("%s: TC_REGEXP\n", __func__);
1286                                         mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1287                                         break;
1288
1289                                 case TC_FUNCTION:
1290                                         debug_printf_parse("%s: TC_FUNCTION\n", __func__);
1291                                         cn->info = OC_FUNC;
1292                                         cn->r.f = newfunc(t_string);
1293                                         cn->l.n = condition();
1294                                         break;
1295
1296                                 case TC_SEQSTART:
1297                                         debug_printf_parse("%s: TC_SEQSTART\n", __func__);
1298                                         cn = vn->r.n = parse_expr(TC_SEQTERM);
1299                                         if (!cn)
1300                                                 syntax_error("Empty sequence");
1301                                         cn->a.n = vn;
1302                                         break;
1303
1304                                 case TC_GETLINE:
1305                                         debug_printf_parse("%s: TC_GETLINE\n", __func__);
1306                                         glptr = cn;
1307                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1308                                         break;
1309
1310                                 case TC_BUILTIN:
1311                                         debug_printf_parse("%s: TC_BUILTIN\n", __func__);
1312                                         cn->l.n = condition();
1313                                         break;
1314                                 }
1315                         }
1316                 }
1317         }
1318
1319         debug_printf_parse("%s() returns %p\n", __func__, sn.r.n);
1320         return sn.r.n;
1321 }
1322
1323 /* add node to chain. Return ptr to alloc'd node */
1324 static node *chain_node(uint32_t info)
1325 {
1326         node *n;
1327
1328         if (!seq->first)
1329                 seq->first = seq->last = new_node(0);
1330
1331         if (seq->programname != g_progname) {
1332                 seq->programname = g_progname;
1333                 n = chain_node(OC_NEWSOURCE);
1334                 n->l.new_progname = xstrdup(g_progname);
1335         }
1336
1337         n = seq->last;
1338         n->info = info;
1339         seq->last = n->a.n = new_node(OC_DONE);
1340
1341         return n;
1342 }
1343
1344 static void chain_expr(uint32_t info)
1345 {
1346         node *n;
1347
1348         n = chain_node(info);
1349         n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1350         if (t_tclass & TC_GRPTERM)
1351                 rollback_token();
1352 }
1353
1354 static node *chain_loop(node *nn)
1355 {
1356         node *n, *n2, *save_brk, *save_cont;
1357
1358         save_brk = break_ptr;
1359         save_cont = continue_ptr;
1360
1361         n = chain_node(OC_BR | Vx);
1362         continue_ptr = new_node(OC_EXEC);
1363         break_ptr = new_node(OC_EXEC);
1364         chain_group();
1365         n2 = chain_node(OC_EXEC | Vx);
1366         n2->l.n = nn;
1367         n2->a.n = n;
1368         continue_ptr->a.n = n2;
1369         break_ptr->a.n = n->r.n = seq->last;
1370
1371         continue_ptr = save_cont;
1372         break_ptr = save_brk;
1373
1374         return n;
1375 }
1376
1377 /* parse group and attach it to chain */
1378 static void chain_group(void)
1379 {
1380         uint32_t c;
1381         node *n, *n2, *n3;
1382
1383         do {
1384                 c = next_token(TC_GRPSEQ);
1385         } while (c & TC_NEWLINE);
1386
1387         if (c & TC_GRPSTART) {
1388                 debug_printf_parse("%s: TC_GRPSTART\n", __func__);
1389                 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1390                         debug_printf_parse("%s: !TC_GRPTERM\n", __func__);
1391                         if (t_tclass & TC_NEWLINE)
1392                                 continue;
1393                         rollback_token();
1394                         chain_group();
1395                 }
1396                 debug_printf_parse("%s: TC_GRPTERM\n", __func__);
1397         } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1398                 debug_printf_parse("%s: TC_OPSEQ | TC_OPTERM\n", __func__);
1399                 rollback_token();
1400                 chain_expr(OC_EXEC | Vx);
1401         } else {
1402                 /* TC_STATEMNT */
1403                 debug_printf_parse("%s: TC_STATEMNT(?)\n", __func__);
1404                 switch (t_info & OPCLSMASK) {
1405                 case ST_IF:
1406                         debug_printf_parse("%s: ST_IF\n", __func__);
1407                         n = chain_node(OC_BR | Vx);
1408                         n->l.n = condition();
1409                         chain_group();
1410                         n2 = chain_node(OC_EXEC);
1411                         n->r.n = seq->last;
1412                         if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1413                                 chain_group();
1414                                 n2->a.n = seq->last;
1415                         } else {
1416                                 rollback_token();
1417                         }
1418                         break;
1419
1420                 case ST_WHILE:
1421                         debug_printf_parse("%s: ST_WHILE\n", __func__);
1422                         n2 = condition();
1423                         n = chain_loop(NULL);
1424                         n->l.n = n2;
1425                         break;
1426
1427                 case ST_DO:
1428                         debug_printf_parse("%s: ST_DO\n", __func__);
1429                         n2 = chain_node(OC_EXEC);
1430                         n = chain_loop(NULL);
1431                         n2->a.n = n->a.n;
1432                         next_token(TC_WHILE);
1433                         n->l.n = condition();
1434                         break;
1435
1436                 case ST_FOR:
1437                         debug_printf_parse("%s: ST_FOR\n", __func__);
1438                         next_token(TC_SEQSTART);
1439                         n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1440                         if (t_tclass & TC_SEQTERM) {    /* for-in */
1441                                 if ((n2->info & OPCLSMASK) != OC_IN)
1442                                         syntax_error(EMSG_UNEXP_TOKEN);
1443                                 n = chain_node(OC_WALKINIT | VV);
1444                                 n->l.n = n2->l.n;
1445                                 n->r.n = n2->r.n;
1446                                 n = chain_loop(NULL);
1447                                 n->info = OC_WALKNEXT | Vx;
1448                                 n->l.n = n2->l.n;
1449                         } else {                        /* for (;;) */
1450                                 n = chain_node(OC_EXEC | Vx);
1451                                 n->l.n = n2;
1452                                 n2 = parse_expr(TC_SEMICOL);
1453                                 n3 = parse_expr(TC_SEQTERM);
1454                                 n = chain_loop(n3);
1455                                 n->l.n = n2;
1456                                 if (!n2)
1457                                         n->info = OC_EXEC;
1458                         }
1459                         break;
1460
1461                 case OC_PRINT:
1462                 case OC_PRINTF:
1463                         debug_printf_parse("%s: OC_PRINT[F]\n", __func__);
1464                         n = chain_node(t_info);
1465                         n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1466                         if (t_tclass & TC_OUTRDR) {
1467                                 n->info |= t_info;
1468                                 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1469                         }
1470                         if (t_tclass & TC_GRPTERM)
1471                                 rollback_token();
1472                         break;
1473
1474                 case OC_BREAK:
1475                         debug_printf_parse("%s: OC_BREAK\n", __func__);
1476                         n = chain_node(OC_EXEC);
1477                         n->a.n = break_ptr;
1478                         break;
1479
1480                 case OC_CONTINUE:
1481                         debug_printf_parse("%s: OC_CONTINUE\n", __func__);
1482                         n = chain_node(OC_EXEC);
1483                         n->a.n = continue_ptr;
1484                         break;
1485
1486                 /* delete, next, nextfile, return, exit */
1487                 default:
1488                         debug_printf_parse("%s: default\n", __func__);
1489                         chain_expr(t_info);
1490                 }
1491         }
1492 }
1493
1494 static void parse_program(char *p)
1495 {
1496         uint32_t tclass;
1497         node *cn;
1498         func *f;
1499         var *v;
1500
1501         g_pos = p;
1502         t_lineno = 1;
1503         while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1504                         TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1505
1506                 if (tclass & TC_OPTERM) {
1507                         debug_printf_parse("%s: TC_OPTERM\n", __func__);
1508                         continue;
1509                 }
1510
1511                 seq = &mainseq;
1512                 if (tclass & TC_BEGIN) {
1513                         debug_printf_parse("%s: TC_BEGIN\n", __func__);
1514                         seq = &beginseq;
1515                         chain_group();
1516
1517                 } else if (tclass & TC_END) {
1518                         debug_printf_parse("%s: TC_END\n", __func__);
1519                         seq = &endseq;
1520                         chain_group();
1521
1522                 } else if (tclass & TC_FUNCDECL) {
1523                         debug_printf_parse("%s: TC_FUNCDECL\n", __func__);
1524                         next_token(TC_FUNCTION);
1525                         g_pos++;
1526                         f = newfunc(t_string);
1527                         f->body.first = NULL;
1528                         f->nargs = 0;
1529                         while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1530                                 v = findvar(ahash, t_string);
1531                                 v->x.aidx = f->nargs++;
1532
1533                                 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1534                                         break;
1535                         }
1536                         seq = &f->body;
1537                         chain_group();
1538                         clear_array(ahash);
1539
1540                 } else if (tclass & TC_OPSEQ) {
1541                         debug_printf_parse("%s: TC_OPSEQ\n", __func__);
1542                         rollback_token();
1543                         cn = chain_node(OC_TEST);
1544                         cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1545                         if (t_tclass & TC_GRPSTART) {
1546                                 debug_printf_parse("%s: TC_GRPSTART\n", __func__);
1547                                 rollback_token();
1548                                 chain_group();
1549                         } else {
1550                                 debug_printf_parse("%s: !TC_GRPSTART\n", __func__);
1551                                 chain_node(OC_PRINT);
1552                         }
1553                         cn->r.n = mainseq.last;
1554
1555                 } else /* if (tclass & TC_GRPSTART) */ {
1556                         debug_printf_parse("%s: TC_GRPSTART(?)\n", __func__);
1557                         rollback_token();
1558                         chain_group();
1559                 }
1560         }
1561         debug_printf_parse("%s: TC_EOF\n", __func__);
1562 }
1563
1564
1565 /* -------- program execution part -------- */
1566
1567 static node *mk_splitter(const char *s, tsplitter *spl)
1568 {
1569         regex_t *re, *ire;
1570         node *n;
1571
1572         re = &spl->re[0];
1573         ire = &spl->re[1];
1574         n = &spl->n;
1575         if ((n->info & OPCLSMASK) == OC_REGEXP) {
1576                 regfree(re);
1577                 regfree(ire); // TODO: nuke ire, use re+1?
1578         }
1579         if (s[0] && s[1]) { /* strlen(s) > 1 */
1580                 mk_re_node(s, n, re);
1581         } else {
1582                 n->info = (uint32_t) s[0];
1583         }
1584
1585         return n;
1586 }
1587
1588 /* use node as a regular expression. Supplied with node ptr and regex_t
1589  * storage space. Return ptr to regex (if result points to preg, it should
1590  * be later regfree'd manually
1591  */
1592 static regex_t *as_regex(node *op, regex_t *preg)
1593 {
1594         int cflags;
1595         var *v;
1596         const char *s;
1597
1598         if ((op->info & OPCLSMASK) == OC_REGEXP) {
1599                 return icase ? op->r.ire : op->l.re;
1600         }
1601         v = nvalloc(1);
1602         s = getvar_s(evaluate(op, v));
1603
1604         cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
1605         /* Testcase where REG_EXTENDED fails (unpaired '{'):
1606          * echo Hi | awk 'gsub("@(samp|code|file)\{","");'
1607          * gawk 3.1.5 eats this. We revert to ~REG_EXTENDED
1608          * (maybe gsub is not supposed to use REG_EXTENDED?).
1609          */
1610         if (regcomp(preg, s, cflags)) {
1611                 cflags &= ~REG_EXTENDED;
1612                 xregcomp(preg, s, cflags);
1613         }
1614         nvfree(v);
1615         return preg;
1616 }
1617
1618 /* gradually increasing buffer.
1619  * note that we reallocate even if n == old_size,
1620  * and thus there is at least one extra allocated byte.
1621  */
1622 static char* qrealloc(char *b, int n, int *size)
1623 {
1624         if (!b || n >= *size) {
1625                 *size = n + (n>>1) + 80;
1626                 b = xrealloc(b, *size);
1627         }
1628         return b;
1629 }
1630
1631 /* resize field storage space */
1632 static void fsrealloc(int size)
1633 {
1634         int i;
1635
1636         if (size >= maxfields) {
1637                 i = maxfields;
1638                 maxfields = size + 16;
1639                 Fields = xrealloc(Fields, maxfields * sizeof(Fields[0]));
1640                 for (; i < maxfields; i++) {
1641                         Fields[i].type = VF_SPECIAL;
1642                         Fields[i].string = NULL;
1643                 }
1644         }
1645         /* if size < nfields, clear extra field variables */
1646         for (i = size; i < nfields; i++) {
1647                 clrvar(Fields + i);
1648         }
1649         nfields = size;
1650 }
1651
1652 static int awk_split(const char *s, node *spl, char **slist)
1653 {
1654         int l, n;
1655         char c[4];
1656         char *s1;
1657         regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1658
1659         /* in worst case, each char would be a separate field */
1660         *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1661         strcpy(s1, s);
1662
1663         c[0] = c[1] = (char)spl->info;
1664         c[2] = c[3] = '\0';
1665         if (*getvar_s(intvar[RS]) == '\0')
1666                 c[2] = '\n';
1667
1668         n = 0;
1669         if ((spl->info & OPCLSMASK) == OC_REGEXP) {  /* regex split */
1670                 if (!*s)
1671                         return n; /* "": zero fields */
1672                 n++; /* at least one field will be there */
1673                 do {
1674                         l = strcspn(s, c+2); /* len till next NUL or \n */
1675                         if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1676                          && pmatch[0].rm_so <= l
1677                         ) {
1678                                 l = pmatch[0].rm_so;
1679                                 if (pmatch[0].rm_eo == 0) {
1680                                         l++;
1681                                         pmatch[0].rm_eo++;
1682                                 }
1683                                 n++; /* we saw yet another delimiter */
1684                         } else {
1685                                 pmatch[0].rm_eo = l;
1686                                 if (s[l])
1687                                         pmatch[0].rm_eo++;
1688                         }
1689                         memcpy(s1, s, l);
1690                         /* make sure we remove *all* of the separator chars */
1691                         do {
1692                                 s1[l] = '\0';
1693                         } while (++l < pmatch[0].rm_eo);
1694                         nextword(&s1);
1695                         s += pmatch[0].rm_eo;
1696                 } while (*s);
1697                 return n;
1698         }
1699         if (c[0] == '\0') {  /* null split */
1700                 while (*s) {
1701                         *s1++ = *s++;
1702                         *s1++ = '\0';
1703                         n++;
1704                 }
1705                 return n;
1706         }
1707         if (c[0] != ' ') {  /* single-character split */
1708                 if (icase) {
1709                         c[0] = toupper(c[0]);
1710                         c[1] = tolower(c[1]);
1711                 }
1712                 if (*s1)
1713                         n++;
1714                 while ((s1 = strpbrk(s1, c)) != NULL) {
1715                         *s1++ = '\0';
1716                         n++;
1717                 }
1718                 return n;
1719         }
1720         /* space split */
1721         while (*s) {
1722                 s = skip_whitespace(s);
1723                 if (!*s)
1724                         break;
1725                 n++;
1726                 while (*s && !isspace(*s))
1727                         *s1++ = *s++;
1728                 *s1++ = '\0';
1729         }
1730         return n;
1731 }
1732
1733 static void split_f0(void)
1734 {
1735 /* static char *fstrings; */
1736 #define fstrings (G.split_f0__fstrings)
1737
1738         int i, n;
1739         char *s;
1740
1741         if (is_f0_split)
1742                 return;
1743
1744         is_f0_split = TRUE;
1745         free(fstrings);
1746         fsrealloc(0);
1747         n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1748         fsrealloc(n);
1749         s = fstrings;
1750         for (i = 0; i < n; i++) {
1751                 Fields[i].string = nextword(&s);
1752                 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1753         }
1754
1755         /* set NF manually to avoid side effects */
1756         clrvar(intvar[NF]);
1757         intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1758         intvar[NF]->number = nfields;
1759 #undef fstrings
1760 }
1761
1762 /* perform additional actions when some internal variables changed */
1763 static void handle_special(var *v)
1764 {
1765         int n;
1766         char *b;
1767         const char *sep, *s;
1768         int sl, l, len, i, bsize;
1769
1770         if (!(v->type & VF_SPECIAL))
1771                 return;
1772
1773         if (v == intvar[NF]) {
1774                 n = (int)getvar_i(v);
1775                 fsrealloc(n);
1776
1777                 /* recalculate $0 */
1778                 sep = getvar_s(intvar[OFS]);
1779                 sl = strlen(sep);
1780                 b = NULL;
1781                 len = 0;
1782                 for (i = 0; i < n; i++) {
1783                         s = getvar_s(&Fields[i]);
1784                         l = strlen(s);
1785                         if (b) {
1786                                 memcpy(b+len, sep, sl);
1787                                 len += sl;
1788                         }
1789                         b = qrealloc(b, len+l+sl, &bsize);
1790                         memcpy(b+len, s, l);
1791                         len += l;
1792                 }
1793                 if (b)
1794                         b[len] = '\0';
1795                 setvar_p(intvar[F0], b);
1796                 is_f0_split = TRUE;
1797
1798         } else if (v == intvar[F0]) {
1799                 is_f0_split = FALSE;
1800
1801         } else if (v == intvar[FS]) {
1802                 mk_splitter(getvar_s(v), &fsplitter);
1803
1804         } else if (v == intvar[RS]) {
1805                 mk_splitter(getvar_s(v), &rsplitter);
1806
1807         } else if (v == intvar[IGNORECASE]) {
1808                 icase = istrue(v);
1809
1810         } else {                                /* $n */
1811                 n = getvar_i(intvar[NF]);
1812                 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1813                 /* right here v is invalid. Just to note... */
1814         }
1815 }
1816
1817 /* step through func/builtin/etc arguments */
1818 static node *nextarg(node **pn)
1819 {
1820         node *n;
1821
1822         n = *pn;
1823         if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1824                 *pn = n->r.n;
1825                 n = n->l.n;
1826         } else {
1827                 *pn = NULL;
1828         }
1829         return n;
1830 }
1831
1832 static void hashwalk_init(var *v, xhash *array)
1833 {
1834         hash_item *hi;
1835         unsigned i;
1836         walker_list *w;
1837         walker_list *prev_walker;
1838
1839         if (v->type & VF_WALK) {
1840                 prev_walker = v->x.walker;
1841         } else {
1842                 v->type |= VF_WALK;
1843                 prev_walker = NULL;
1844         }
1845         debug_printf_walker("hashwalk_init: prev_walker:%p\n", prev_walker);
1846
1847         w = v->x.walker = xzalloc(sizeof(*w) + array->glen + 1); /* why + 1? */
1848         debug_printf_walker(" walker@%p=%p\n", &v->x.walker, w);
1849         w->cur = w->end = w->wbuf;
1850         w->prev = prev_walker;
1851         for (i = 0; i < array->csize; i++) {
1852                 hi = array->items[i];
1853                 while (hi) {
1854                         strcpy(w->end, hi->name);
1855                         nextword(&w->end);
1856                         hi = hi->next;
1857                 }
1858         }
1859 }
1860
1861 static int hashwalk_next(var *v)
1862 {
1863         walker_list *w = v->x.walker;
1864
1865         if (w->cur >= w->end) {
1866                 walker_list *prev_walker = w->prev;
1867
1868                 debug_printf_walker("end of iteration, free(walker@%p:%p), prev_walker:%p\n", &v->x.walker, w, prev_walker);
1869                 free(w);
1870                 v->x.walker = prev_walker;
1871                 return FALSE;
1872         }
1873
1874         setvar_s(v, nextword(&w->cur));
1875         return TRUE;
1876 }
1877
1878 /* evaluate node, return 1 when result is true, 0 otherwise */
1879 static int ptest(node *pattern)
1880 {
1881         /* ptest__v is "static": to save stack space? */
1882         return istrue(evaluate(pattern, &G.ptest__v));
1883 }
1884
1885 /* read next record from stream rsm into a variable v */
1886 static int awk_getline(rstream *rsm, var *v)
1887 {
1888         char *b;
1889         regmatch_t pmatch[2];
1890         int size, a, p, pp = 0;
1891         int fd, so, eo, r, rp;
1892         char c, *m, *s;
1893
1894         debug_printf_eval("entered %s()\n", __func__);
1895
1896         /* we're using our own buffer since we need access to accumulating
1897          * characters
1898          */
1899         fd = fileno(rsm->F);
1900         m = rsm->buffer;
1901         a = rsm->adv;
1902         p = rsm->pos;
1903         size = rsm->size;
1904         c = (char) rsplitter.n.info;
1905         rp = 0;
1906
1907         if (!m)
1908                 m = qrealloc(m, 256, &size);
1909
1910         do {
1911                 b = m + a;
1912                 so = eo = p;
1913                 r = 1;
1914                 if (p > 0) {
1915                         if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1916                                 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1917                                                         b, 1, pmatch, 0) == 0) {
1918                                         so = pmatch[0].rm_so;
1919                                         eo = pmatch[0].rm_eo;
1920                                         if (b[eo] != '\0')
1921                                                 break;
1922                                 }
1923                         } else if (c != '\0') {
1924                                 s = strchr(b+pp, c);
1925                                 if (!s)
1926                                         s = memchr(b+pp, '\0', p - pp);
1927                                 if (s) {
1928                                         so = eo = s-b;
1929                                         eo++;
1930                                         break;
1931                                 }
1932                         } else {
1933                                 while (b[rp] == '\n')
1934                                         rp++;
1935                                 s = strstr(b+rp, "\n\n");
1936                                 if (s) {
1937                                         so = eo = s-b;
1938                                         while (b[eo] == '\n')
1939                                                 eo++;
1940                                         if (b[eo] != '\0')
1941                                                 break;
1942                                 }
1943                         }
1944                 }
1945
1946                 if (a > 0) {
1947                         memmove(m, m+a, p+1);
1948                         b = m;
1949                         a = 0;
1950                 }
1951
1952                 m = qrealloc(m, a+p+128, &size);
1953                 b = m + a;
1954                 pp = p;
1955                 p += safe_read(fd, b+p, size-p-1);
1956                 if (p < pp) {
1957                         p = 0;
1958                         r = 0;
1959                         setvar_i(intvar[ERRNO], errno);
1960                 }
1961                 b[p] = '\0';
1962
1963         } while (p > pp);
1964
1965         if (p == 0) {
1966                 r--;
1967         } else {
1968                 c = b[so]; b[so] = '\0';
1969                 setvar_s(v, b+rp);
1970                 v->type |= VF_USER;
1971                 b[so] = c;
1972                 c = b[eo]; b[eo] = '\0';
1973                 setvar_s(intvar[RT], b+so);
1974                 b[eo] = c;
1975         }
1976
1977         rsm->buffer = m;
1978         rsm->adv = a + eo;
1979         rsm->pos = p - eo;
1980         rsm->size = size;
1981
1982         debug_printf_eval("returning from %s(): %d\n", __func__, r);
1983
1984         return r;
1985 }
1986
1987 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1988 {
1989         int r = 0;
1990         char c;
1991         const char *s = format;
1992
1993         if (int_as_int && n == (int)n) {
1994                 r = snprintf(b, size, "%d", (int)n);
1995         } else {
1996                 do { c = *s; } while (c && *++s);
1997                 if (strchr("diouxX", c)) {
1998                         r = snprintf(b, size, format, (int)n);
1999                 } else if (strchr("eEfgG", c)) {
2000                         r = snprintf(b, size, format, n);
2001                 } else {
2002                         syntax_error(EMSG_INV_FMT);
2003                 }
2004         }
2005         return r;
2006 }
2007
2008 /* formatted output into an allocated buffer, return ptr to buffer */
2009 static char *awk_printf(node *n)
2010 {
2011         char *b = NULL;
2012         char *fmt, *s, *f;
2013         const char *s1;
2014         int i, j, incr, bsize;
2015         char c, c1;
2016         var *v, *arg;
2017
2018         v = nvalloc(1);
2019         fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
2020
2021         i = 0;
2022         while (*f) {
2023                 s = f;
2024                 while (*f && (*f != '%' || *++f == '%'))
2025                         f++;
2026                 while (*f && !isalpha(*f)) {
2027                         if (*f == '*')
2028                                 syntax_error("%*x formats are not supported");
2029                         f++;
2030                 }
2031
2032                 incr = (f - s) + MAXVARFMT;
2033                 b = qrealloc(b, incr + i, &bsize);
2034                 c = *f;
2035                 if (c != '\0')
2036                         f++;
2037                 c1 = *f;
2038                 *f = '\0';
2039                 arg = evaluate(nextarg(&n), v);
2040
2041                 j = i;
2042                 if (c == 'c' || !c) {
2043                         i += sprintf(b+i, s, is_numeric(arg) ?
2044                                         (char)getvar_i(arg) : *getvar_s(arg));
2045                 } else if (c == 's') {
2046                         s1 = getvar_s(arg);
2047                         b = qrealloc(b, incr+i+strlen(s1), &bsize);
2048                         i += sprintf(b+i, s, s1);
2049                 } else {
2050                         i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
2051                 }
2052                 *f = c1;
2053
2054                 /* if there was an error while sprintf, return value is negative */
2055                 if (i < j)
2056                         i = j;
2057         }
2058
2059         free(fmt);
2060         nvfree(v);
2061         b = xrealloc(b, i + 1);
2062         b[i] = '\0';
2063         return b;
2064 }
2065
2066 /* Common substitution routine.
2067  * Replace (nm)'th substring of (src) that matches (rn) with (repl),
2068  * store result into (dest), return number of substitutions.
2069  * If nm = 0, replace all matches.
2070  * If src or dst is NULL, use $0.
2071  * If subexp != 0, enable subexpression matching (\1-\9).
2072  */
2073 static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int subexp)
2074 {
2075         char *resbuf;
2076         const char *sp;
2077         int match_no, residx, replen, resbufsize;
2078         int regexec_flags;
2079         regmatch_t pmatch[10];
2080         regex_t sreg, *regex;
2081
2082         resbuf = NULL;
2083         residx = 0;
2084         match_no = 0;
2085         regexec_flags = 0;
2086         regex = as_regex(rn, &sreg);
2087         sp = getvar_s(src ? src : intvar[F0]);
2088         replen = strlen(repl);
2089         while (regexec(regex, sp, 10, pmatch, regexec_flags) == 0) {
2090                 int so = pmatch[0].rm_so;
2091                 int eo = pmatch[0].rm_eo;
2092
2093                 //bb_error_msg("match %u: [%u,%u] '%s'%p", match_no+1, so, eo, sp,sp);
2094                 resbuf = qrealloc(resbuf, residx + eo + replen, &resbufsize);
2095                 memcpy(resbuf + residx, sp, eo);
2096                 residx += eo;
2097                 if (++match_no >= nm) {
2098                         const char *s;
2099                         int nbs;
2100
2101                         /* replace */
2102                         residx -= (eo - so);
2103                         nbs = 0;
2104                         for (s = repl; *s; s++) {
2105                                 char c = resbuf[residx++] = *s;
2106                                 if (c == '\\') {
2107                                         nbs++;
2108                                         continue;
2109                                 }
2110                                 if (c == '&' || (subexp && c >= '0' && c <= '9')) {
2111                                         int j;
2112                                         residx -= ((nbs + 3) >> 1);
2113                                         j = 0;
2114                                         if (c != '&') {
2115                                                 j = c - '0';
2116                                                 nbs++;
2117                                         }
2118                                         if (nbs % 2) {
2119                                                 resbuf[residx++] = c;
2120                                         } else {
2121                                                 int n = pmatch[j].rm_eo - pmatch[j].rm_so;
2122                                                 resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize);
2123                                                 memcpy(resbuf + residx, sp + pmatch[j].rm_so, n);
2124                                                 residx += n;
2125                                         }
2126                                 }
2127                                 nbs = 0;
2128                         }
2129                 }
2130
2131                 regexec_flags = REG_NOTBOL;
2132                 sp += eo;
2133                 if (match_no == nm)
2134                         break;
2135                 if (eo == so) {
2136                         /* Empty match (e.g. "b*" will match anywhere).
2137                          * Advance by one char. */
2138 //BUG (bug 1333):
2139 //gsub(/\<b*/,"") on "abc" will reach this point, advance to "bc"
2140 //... and will erroneously match "b" even though it is NOT at the word start.
2141 //we need REG_NOTBOW but it does not exist...
2142 //TODO: if EXTRA_COMPAT=y, use GNU matching and re_search,
2143 //it should be able to do it correctly.
2144                         /* Subtle: this is safe only because
2145                          * qrealloc allocated at least one extra byte */
2146                         resbuf[residx] = *sp;
2147                         if (*sp == '\0')
2148                                 goto ret;
2149                         sp++;
2150                         residx++;
2151                 }
2152         }
2153
2154         resbuf = qrealloc(resbuf, residx + strlen(sp), &resbufsize);
2155         strcpy(resbuf + residx, sp);
2156  ret:
2157         //bb_error_msg("end sp:'%s'%p", sp,sp);
2158         setvar_p(dest ? dest : intvar[F0], resbuf);
2159         if (regex == &sreg)
2160                 regfree(regex);
2161         return match_no;
2162 }
2163
2164 static NOINLINE int do_mktime(const char *ds)
2165 {
2166         struct tm then;
2167         int count;
2168
2169         /*memset(&then, 0, sizeof(then)); - not needed */
2170         then.tm_isdst = -1; /* default is unknown */
2171
2172         /* manpage of mktime says these fields are ints,
2173          * so we can sscanf stuff directly into them */
2174         count = sscanf(ds, "%u %u %u %u %u %u %d",
2175                 &then.tm_year, &then.tm_mon, &then.tm_mday,
2176                 &then.tm_hour, &then.tm_min, &then.tm_sec,
2177                 &then.tm_isdst);
2178
2179         if (count < 6
2180          || (unsigned)then.tm_mon < 1
2181          || (unsigned)then.tm_year < 1900
2182         ) {
2183                 return -1;
2184         }
2185
2186         then.tm_mon -= 1;
2187         then.tm_year -= 1900;
2188
2189         return mktime(&then);
2190 }
2191
2192 static NOINLINE var *exec_builtin(node *op, var *res)
2193 {
2194 #define tspl (G.exec_builtin__tspl)
2195
2196         var *tv;
2197         node *an[4];
2198         var *av[4];
2199         const char *as[4];
2200         regmatch_t pmatch[2];
2201         regex_t sreg, *re;
2202         node *spl;
2203         uint32_t isr, info;
2204         int nargs;
2205         time_t tt;
2206         int i, l, ll, n;
2207
2208         tv = nvalloc(4);
2209         isr = info = op->info;
2210         op = op->l.n;
2211
2212         av[2] = av[3] = NULL;
2213         for (i = 0; i < 4 && op; i++) {
2214                 an[i] = nextarg(&op);
2215                 if (isr & 0x09000000)
2216                         av[i] = evaluate(an[i], &tv[i]);
2217                 if (isr & 0x08000000)
2218                         as[i] = getvar_s(av[i]);
2219                 isr >>= 1;
2220         }
2221
2222         nargs = i;
2223         if ((uint32_t)nargs < (info >> 30))
2224                 syntax_error(EMSG_TOO_FEW_ARGS);
2225
2226         info &= OPNMASK;
2227         switch (info) {
2228
2229         case B_a2:
2230                 if (ENABLE_FEATURE_AWK_LIBM)
2231                         setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1])));
2232                 else
2233                         syntax_error(EMSG_NO_MATH);
2234                 break;
2235
2236         case B_sp: {
2237                 char *s, *s1;
2238
2239                 if (nargs > 2) {
2240                         spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2241                                 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2242                 } else {
2243                         spl = &fsplitter.n;
2244                 }
2245
2246                 n = awk_split(as[0], spl, &s);
2247                 s1 = s;
2248                 clear_array(iamarray(av[1]));
2249                 for (i = 1; i <= n; i++)
2250                         setari_u(av[1], i, nextword(&s));
2251                 free(s1);
2252                 setvar_i(res, n);
2253                 break;
2254         }
2255
2256         case B_ss: {
2257                 char *s;
2258
2259                 l = strlen(as[0]);
2260                 i = getvar_i(av[1]) - 1;
2261                 if (i > l)
2262                         i = l;
2263                 if (i < 0)
2264                         i = 0;
2265                 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2266                 if (n < 0)
2267                         n = 0;
2268                 s = xstrndup(as[0]+i, n);
2269                 setvar_p(res, s);
2270                 break;
2271         }
2272
2273         /* Bitwise ops must assume that operands are unsigned. GNU Awk 3.1.5:
2274          * awk '{ print or(-1,1) }' gives "4.29497e+09", not "-2.xxxe+09" */
2275         case B_an:
2276                 setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1]));
2277                 break;
2278
2279         case B_co:
2280                 setvar_i(res, ~getvar_i_int(av[0]));
2281                 break;
2282
2283         case B_ls:
2284                 setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1]));
2285                 break;
2286
2287         case B_or:
2288                 setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1]));
2289                 break;
2290
2291         case B_rs:
2292                 setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1]));
2293                 break;
2294
2295         case B_xo:
2296                 setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1]));
2297                 break;
2298
2299         case B_lo:
2300         case B_up: {
2301                 char *s, *s1;
2302                 s1 = s = xstrdup(as[0]);
2303                 while (*s1) {
2304                         //*s1 = (info == B_up) ? toupper(*s1) : tolower(*s1);
2305                         if ((unsigned char)((*s1 | 0x20) - 'a') <= ('z' - 'a'))
2306                                 *s1 = (info == B_up) ? (*s1 & 0xdf) : (*s1 | 0x20);
2307                         s1++;
2308                 }
2309                 setvar_p(res, s);
2310                 break;
2311         }
2312
2313         case B_ix:
2314                 n = 0;
2315                 ll = strlen(as[1]);
2316                 l = strlen(as[0]) - ll;
2317                 if (ll > 0 && l >= 0) {
2318                         if (!icase) {
2319                                 char *s = strstr(as[0], as[1]);
2320                                 if (s)
2321                                         n = (s - as[0]) + 1;
2322                         } else {
2323                                 /* this piece of code is terribly slow and
2324                                  * really should be rewritten
2325                                  */
2326                                 for (i = 0; i <= l; i++) {
2327                                         if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2328                                                 n = i+1;
2329                                                 break;
2330                                         }
2331                                 }
2332                         }
2333                 }
2334                 setvar_i(res, n);
2335                 break;
2336
2337         case B_ti:
2338                 if (nargs > 1)
2339                         tt = getvar_i(av[1]);
2340                 else
2341                         time(&tt);
2342                 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2343                 i = strftime(g_buf, MAXVARFMT,
2344                         ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2345                         localtime(&tt));
2346                 g_buf[i] = '\0';
2347                 setvar_s(res, g_buf);
2348                 break;
2349
2350         case B_mt:
2351                 setvar_i(res, do_mktime(as[0]));
2352                 break;
2353
2354         case B_ma:
2355                 re = as_regex(an[1], &sreg);
2356                 n = regexec(re, as[0], 1, pmatch, 0);
2357                 if (n == 0) {
2358                         pmatch[0].rm_so++;
2359                         pmatch[0].rm_eo++;
2360                 } else {
2361                         pmatch[0].rm_so = 0;
2362                         pmatch[0].rm_eo = -1;
2363                 }
2364                 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2365                 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2366                 setvar_i(res, pmatch[0].rm_so);
2367                 if (re == &sreg)
2368                         regfree(re);
2369                 break;
2370
2371         case B_ge:
2372                 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2373                 break;
2374
2375         case B_gs:
2376                 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2377                 break;
2378
2379         case B_su:
2380                 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2381                 break;
2382         }
2383
2384         nvfree(tv);
2385         return res;
2386 #undef tspl
2387 }
2388
2389 /*
2390  * Evaluate node - the heart of the program. Supplied with subtree
2391  * and place where to store result. returns ptr to result.
2392  */
2393 #define XC(n) ((n) >> 8)
2394
2395 static var *evaluate(node *op, var *res)
2396 {
2397 /* This procedure is recursive so we should count every byte */
2398 #define fnargs (G.evaluate__fnargs)
2399 /* seed is initialized to 1 */
2400 #define seed   (G.evaluate__seed)
2401 #define sreg   (G.evaluate__sreg)
2402
2403         var *v1;
2404
2405         if (!op)
2406                 return setvar_s(res, NULL);
2407
2408         debug_printf_eval("entered %s()\n", __func__);
2409
2410         v1 = nvalloc(2);
2411
2412         while (op) {
2413                 struct {
2414                         var *v;
2415                         const char *s;
2416                 } L = L; /* for compiler */
2417                 struct {
2418                         var *v;
2419                         const char *s;
2420                 } R = R;
2421                 double L_d = L_d;
2422                 uint32_t opinfo;
2423                 int opn;
2424                 node *op1;
2425
2426                 opinfo = op->info;
2427                 opn = (opinfo & OPNMASK);
2428                 g_lineno = op->lineno;
2429                 op1 = op->l.n;
2430                 debug_printf_eval("opinfo:%08x opn:%08x\n", opinfo, opn);
2431
2432                 /* execute inevitable things */
2433                 if (opinfo & OF_RES1)
2434                         L.v = evaluate(op1, v1);
2435                 if (opinfo & OF_RES2)
2436                         R.v = evaluate(op->r.n, v1+1);
2437                 if (opinfo & OF_STR1) {
2438                         L.s = getvar_s(L.v);
2439                         debug_printf_eval("L.s:'%s'\n", L.s);
2440                 }
2441                 if (opinfo & OF_STR2) {
2442                         R.s = getvar_s(R.v);
2443                         debug_printf_eval("R.s:'%s'\n", R.s);
2444                 }
2445                 if (opinfo & OF_NUM1) {
2446                         L_d = getvar_i(L.v);
2447                         debug_printf_eval("L_d:%f\n", L_d);
2448                 }
2449
2450                 debug_printf_eval("switch(0x%x)\n", XC(opinfo & OPCLSMASK));
2451                 switch (XC(opinfo & OPCLSMASK)) {
2452
2453                 /* -- iterative node type -- */
2454
2455                 /* test pattern */
2456                 case XC( OC_TEST ):
2457                         if ((op1->info & OPCLSMASK) == OC_COMMA) {
2458                                 /* it's range pattern */
2459                                 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2460                                         op->info |= OF_CHECKED;
2461                                         if (ptest(op1->r.n))
2462                                                 op->info &= ~OF_CHECKED;
2463                                         op = op->a.n;
2464                                 } else {
2465                                         op = op->r.n;
2466                                 }
2467                         } else {
2468                                 op = ptest(op1) ? op->a.n : op->r.n;
2469                         }
2470                         break;
2471
2472                 /* just evaluate an expression, also used as unconditional jump */
2473                 case XC( OC_EXEC ):
2474                         break;
2475
2476                 /* branch, used in if-else and various loops */
2477                 case XC( OC_BR ):
2478                         op = istrue(L.v) ? op->a.n : op->r.n;
2479                         break;
2480
2481                 /* initialize for-in loop */
2482                 case XC( OC_WALKINIT ):
2483                         hashwalk_init(L.v, iamarray(R.v));
2484                         break;
2485
2486                 /* get next array item */
2487                 case XC( OC_WALKNEXT ):
2488                         op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2489                         break;
2490
2491                 case XC( OC_PRINT ):
2492                 case XC( OC_PRINTF ): {
2493                         FILE *F = stdout;
2494
2495                         if (op->r.n) {
2496                                 rstream *rsm = newfile(R.s);
2497                                 if (!rsm->F) {
2498                                         if (opn == '|') {
2499                                                 rsm->F = popen(R.s, "w");
2500                                                 if (rsm->F == NULL)
2501                                                         bb_perror_msg_and_die("popen");
2502                                                 rsm->is_pipe = 1;
2503                                         } else {
2504                                                 rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2505                                         }
2506                                 }
2507                                 F = rsm->F;
2508                         }
2509
2510                         if ((opinfo & OPCLSMASK) == OC_PRINT) {
2511                                 if (!op1) {
2512                                         fputs(getvar_s(intvar[F0]), F);
2513                                 } else {
2514                                         while (op1) {
2515                                                 var *v = evaluate(nextarg(&op1), v1);
2516                                                 if (v->type & VF_NUMBER) {
2517                                                         fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2518                                                                         getvar_i(v), TRUE);
2519                                                         fputs(g_buf, F);
2520                                                 } else {
2521                                                         fputs(getvar_s(v), F);
2522                                                 }
2523
2524                                                 if (op1)
2525                                                         fputs(getvar_s(intvar[OFS]), F);
2526                                         }
2527                                 }
2528                                 fputs(getvar_s(intvar[ORS]), F);
2529
2530                         } else {        /* OC_PRINTF */
2531                                 char *s = awk_printf(op1);
2532                                 fputs(s, F);
2533                                 free(s);
2534                         }
2535                         fflush(F);
2536                         break;
2537                 }
2538
2539                 case XC( OC_DELETE ): {
2540                         uint32_t info = op1->info & OPCLSMASK;
2541                         var *v;
2542
2543                         if (info == OC_VAR) {
2544                                 v = op1->l.v;
2545                         } else if (info == OC_FNARG) {
2546                                 v = &fnargs[op1->l.aidx];
2547                         } else {
2548                                 syntax_error(EMSG_NOT_ARRAY);
2549                         }
2550
2551                         if (op1->r.n) {
2552                                 const char *s;
2553                                 clrvar(L.v);
2554                                 s = getvar_s(evaluate(op1->r.n, v1));
2555                                 hash_remove(iamarray(v), s);
2556                         } else {
2557                                 clear_array(iamarray(v));
2558                         }
2559                         break;
2560                 }
2561
2562                 case XC( OC_NEWSOURCE ):
2563                         g_progname = op->l.new_progname;
2564                         break;
2565
2566                 case XC( OC_RETURN ):
2567                         copyvar(res, L.v);
2568                         break;
2569
2570                 case XC( OC_NEXTFILE ):
2571                         nextfile = TRUE;
2572                 case XC( OC_NEXT ):
2573                         nextrec = TRUE;
2574                 case XC( OC_DONE ):
2575                         clrvar(res);
2576                         break;
2577
2578                 case XC( OC_EXIT ):
2579                         awk_exit(L_d);
2580
2581                 /* -- recursive node type -- */
2582
2583                 case XC( OC_VAR ):
2584                         L.v = op->l.v;
2585                         if (L.v == intvar[NF])
2586                                 split_f0();
2587                         goto v_cont;
2588
2589                 case XC( OC_FNARG ):
2590                         L.v = &fnargs[op->l.aidx];
2591  v_cont:
2592                         res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2593                         break;
2594
2595                 case XC( OC_IN ):
2596                         setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2597                         break;
2598
2599                 case XC( OC_REGEXP ):
2600                         op1 = op;
2601                         L.s = getvar_s(intvar[F0]);
2602                         goto re_cont;
2603
2604                 case XC( OC_MATCH ):
2605                         op1 = op->r.n;
2606  re_cont:
2607                         {
2608                                 regex_t *re = as_regex(op1, &sreg);
2609                                 int i = regexec(re, L.s, 0, NULL, 0);
2610                                 if (re == &sreg)
2611                                         regfree(re);
2612                                 setvar_i(res, (i == 0) ^ (opn == '!'));
2613                         }
2614                         break;
2615
2616                 case XC( OC_MOVE ):
2617                         debug_printf_eval("MOVE\n");
2618                         /* if source is a temporary string, jusk relink it to dest */
2619 //Disabled: if R.v is numeric but happens to have cached R.v->string,
2620 //then L.v ends up being a string, which is wrong
2621 //                      if (R.v == v1+1 && R.v->string) {
2622 //                              res = setvar_p(L.v, R.v->string);
2623 //                              R.v->string = NULL;
2624 //                      } else {
2625                                 res = copyvar(L.v, R.v);
2626 //                      }
2627                         break;
2628
2629                 case XC( OC_TERNARY ):
2630                         if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2631                                 syntax_error(EMSG_POSSIBLE_ERROR);
2632                         res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2633                         break;
2634
2635                 case XC( OC_FUNC ): {
2636                         var *vbeg, *v;
2637                         const char *sv_progname;
2638
2639                         if (!op->r.f->body.first)
2640                                 syntax_error(EMSG_UNDEF_FUNC);
2641
2642                         vbeg = v = nvalloc(op->r.f->nargs + 1);
2643                         while (op1) {
2644                                 var *arg = evaluate(nextarg(&op1), v1);
2645                                 copyvar(v, arg);
2646                                 v->type |= VF_CHILD;
2647                                 v->x.parent = arg;
2648                                 if (++v - vbeg >= op->r.f->nargs)
2649                                         break;
2650                         }
2651
2652                         v = fnargs;
2653                         fnargs = vbeg;
2654                         sv_progname = g_progname;
2655
2656                         res = evaluate(op->r.f->body.first, res);
2657
2658                         g_progname = sv_progname;
2659                         nvfree(fnargs);
2660                         fnargs = v;
2661
2662                         break;
2663                 }
2664
2665                 case XC( OC_GETLINE ):
2666                 case XC( OC_PGETLINE ): {
2667                         rstream *rsm;
2668                         int i;
2669
2670                         if (op1) {
2671                                 rsm = newfile(L.s);
2672                                 if (!rsm->F) {
2673                                         if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2674                                                 rsm->F = popen(L.s, "r");
2675                                                 rsm->is_pipe = TRUE;
2676                                         } else {
2677                                                 rsm->F = fopen_for_read(L.s);  /* not xfopen! */
2678                                         }
2679                                 }
2680                         } else {
2681                                 if (!iF)
2682                                         iF = next_input_file();
2683                                 rsm = iF;
2684                         }
2685
2686                         if (!rsm || !rsm->F) {
2687                                 setvar_i(intvar[ERRNO], errno);
2688                                 setvar_i(res, -1);
2689                                 break;
2690                         }
2691
2692                         if (!op->r.n)
2693                                 R.v = intvar[F0];
2694
2695                         i = awk_getline(rsm, R.v);
2696                         if (i > 0 && !op1) {
2697                                 incvar(intvar[FNR]);
2698                                 incvar(intvar[NR]);
2699                         }
2700                         setvar_i(res, i);
2701                         break;
2702                 }
2703
2704                 /* simple builtins */
2705                 case XC( OC_FBLTIN ): {
2706                         double R_d = R_d; /* for compiler */
2707
2708                         switch (opn) {
2709                         case F_in:
2710                                 R_d = (int)L_d;
2711                                 break;
2712
2713                         case F_rn:
2714                                 R_d = (double)rand() / (double)RAND_MAX;
2715                                 break;
2716
2717                         case F_co:
2718                                 if (ENABLE_FEATURE_AWK_LIBM) {
2719                                         R_d = cos(L_d);
2720                                         break;
2721                                 }
2722
2723                         case F_ex:
2724                                 if (ENABLE_FEATURE_AWK_LIBM) {
2725                                         R_d = exp(L_d);
2726                                         break;
2727                                 }
2728
2729                         case F_lg:
2730                                 if (ENABLE_FEATURE_AWK_LIBM) {
2731                                         R_d = log(L_d);
2732                                         break;
2733                                 }
2734
2735                         case F_si:
2736                                 if (ENABLE_FEATURE_AWK_LIBM) {
2737                                         R_d = sin(L_d);
2738                                         break;
2739                                 }
2740
2741                         case F_sq:
2742                                 if (ENABLE_FEATURE_AWK_LIBM) {
2743                                         R_d = sqrt(L_d);
2744                                         break;
2745                                 }
2746
2747                                 syntax_error(EMSG_NO_MATH);
2748                                 break;
2749
2750                         case F_sr:
2751                                 R_d = (double)seed;
2752                                 seed = op1 ? (unsigned)L_d : (unsigned)time(NULL);
2753                                 srand(seed);
2754                                 break;
2755
2756                         case F_ti:
2757                                 R_d = time(NULL);
2758                                 break;
2759
2760                         case F_le:
2761                                 if (!op1)
2762                                         L.s = getvar_s(intvar[F0]);
2763                                 R_d = strlen(L.s);
2764                                 break;
2765
2766                         case F_sy:
2767                                 fflush_all();
2768                                 R_d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2769                                                 ? (system(L.s) >> 8) : 0;
2770                                 break;
2771
2772                         case F_ff:
2773                                 if (!op1) {
2774                                         fflush(stdout);
2775                                 } else if (L.s && *L.s) {
2776                                         rstream *rsm = newfile(L.s);
2777                                         fflush(rsm->F);
2778                                 } else {
2779                                         fflush_all();
2780                                 }
2781                                 break;
2782
2783                         case F_cl: {
2784                                 rstream *rsm;
2785                                 int err = 0;
2786                                 rsm = (rstream *)hash_search(fdhash, L.s);
2787                                 debug_printf_eval("OC_FBLTIN F_cl rsm:%p\n", rsm);
2788                                 if (rsm) {
2789                                         debug_printf_eval("OC_FBLTIN F_cl "
2790                                                 "rsm->is_pipe:%d, ->F:%p\n",
2791                                                 rsm->is_pipe, rsm->F);
2792                                         /* Can be NULL if open failed. Example:
2793                                          * getline line <"doesnt_exist";
2794                                          * close("doesnt_exist"); <--- here rsm->F is NULL
2795                                          */
2796                                         if (rsm->F)
2797                                                 err = rsm->is_pipe ? pclose(rsm->F) : fclose(rsm->F);
2798                                         free(rsm->buffer);
2799                                         hash_remove(fdhash, L.s);
2800                                 }
2801                                 if (err)
2802                                         setvar_i(intvar[ERRNO], errno);
2803                                 R_d = (double)err;
2804                                 break;
2805                         }
2806                         } /* switch */
2807                         setvar_i(res, R_d);
2808                         break;
2809                 }
2810
2811                 case XC( OC_BUILTIN ):
2812                         res = exec_builtin(op, res);
2813                         break;
2814
2815                 case XC( OC_SPRINTF ):
2816                         setvar_p(res, awk_printf(op1));
2817                         break;
2818
2819                 case XC( OC_UNARY ): {
2820                         double Ld, R_d;
2821
2822                         Ld = R_d = getvar_i(R.v);
2823                         switch (opn) {
2824                         case 'P':
2825                                 Ld = ++R_d;
2826                                 goto r_op_change;
2827                         case 'p':
2828                                 R_d++;
2829                                 goto r_op_change;
2830                         case 'M':
2831                                 Ld = --R_d;
2832                                 goto r_op_change;
2833                         case 'm':
2834                                 R_d--;
2835  r_op_change:
2836                                 setvar_i(R.v, R_d);
2837                                 break;
2838                         case '!':
2839                                 Ld = !istrue(R.v);
2840                                 break;
2841                         case '-':
2842                                 Ld = -R_d;
2843                                 break;
2844                         }
2845                         setvar_i(res, Ld);
2846                         break;
2847                 }
2848
2849                 case XC( OC_FIELD ): {
2850                         int i = (int)getvar_i(R.v);
2851                         if (i == 0) {
2852                                 res = intvar[F0];
2853                         } else {
2854                                 split_f0();
2855                                 if (i > nfields)
2856                                         fsrealloc(i);
2857                                 res = &Fields[i - 1];
2858                         }
2859                         break;
2860                 }
2861
2862                 /* concatenation (" ") and index joining (",") */
2863                 case XC( OC_CONCAT ):
2864                 case XC( OC_COMMA ): {
2865                         const char *sep = "";
2866                         if ((opinfo & OPCLSMASK) == OC_COMMA)
2867                                 sep = getvar_s(intvar[SUBSEP]);
2868                         setvar_p(res, xasprintf("%s%s%s", L.s, sep, R.s));
2869                         break;
2870                 }
2871
2872                 case XC( OC_LAND ):
2873                         setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2874                         break;
2875
2876                 case XC( OC_LOR ):
2877                         setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2878                         break;
2879
2880                 case XC( OC_BINARY ):
2881                 case XC( OC_REPLACE ): {
2882                         double R_d = getvar_i(R.v);
2883                         debug_printf_eval("BINARY/REPLACE: R_d:%f opn:%c\n", R_d, opn);
2884                         switch (opn) {
2885                         case '+':
2886                                 L_d += R_d;
2887                                 break;
2888                         case '-':
2889                                 L_d -= R_d;
2890                                 break;
2891                         case '*':
2892                                 L_d *= R_d;
2893                                 break;
2894                         case '/':
2895                                 if (R_d == 0)
2896                                         syntax_error(EMSG_DIV_BY_ZERO);
2897                                 L_d /= R_d;
2898                                 break;
2899                         case '&':
2900                                 if (ENABLE_FEATURE_AWK_LIBM)
2901                                         L_d = pow(L_d, R_d);
2902                                 else
2903                                         syntax_error(EMSG_NO_MATH);
2904                                 break;
2905                         case '%':
2906                                 if (R_d == 0)
2907                                         syntax_error(EMSG_DIV_BY_ZERO);
2908                                 L_d -= (int)(L_d / R_d) * R_d;
2909                                 break;
2910                         }
2911                         debug_printf_eval("BINARY/REPLACE result:%f\n", L_d);
2912                         res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : L.v, L_d);
2913                         break;
2914                 }
2915
2916                 case XC( OC_COMPARE ): {
2917                         int i = i; /* for compiler */
2918                         double Ld;
2919
2920                         if (is_numeric(L.v) && is_numeric(R.v)) {
2921                                 Ld = getvar_i(L.v) - getvar_i(R.v);
2922                         } else {
2923                                 const char *l = getvar_s(L.v);
2924                                 const char *r = getvar_s(R.v);
2925                                 Ld = icase ? strcasecmp(l, r) : strcmp(l, r);
2926                         }
2927                         switch (opn & 0xfe) {
2928                         case 0:
2929                                 i = (Ld > 0);
2930                                 break;
2931                         case 2:
2932                                 i = (Ld >= 0);
2933                                 break;
2934                         case 4:
2935                                 i = (Ld == 0);
2936                                 break;
2937                         }
2938                         setvar_i(res, (i == 0) ^ (opn & 1));
2939                         break;
2940                 }
2941
2942                 default:
2943                         syntax_error(EMSG_POSSIBLE_ERROR);
2944                 }
2945                 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2946                         op = op->a.n;
2947                 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2948                         break;
2949                 if (nextrec)
2950                         break;
2951         } /* while (op) */
2952
2953         nvfree(v1);
2954         debug_printf_eval("returning from %s(): %p\n", __func__, res);
2955         return res;
2956 #undef fnargs
2957 #undef seed
2958 #undef sreg
2959 }
2960
2961
2962 /* -------- main & co. -------- */
2963
2964 static int awk_exit(int r)
2965 {
2966         var tv;
2967         unsigned i;
2968         hash_item *hi;
2969
2970         zero_out_var(&tv);
2971
2972         if (!exiting) {
2973                 exiting = TRUE;
2974                 nextrec = FALSE;
2975                 evaluate(endseq.first, &tv);
2976         }
2977
2978         /* waiting for children */
2979         for (i = 0; i < fdhash->csize; i++) {
2980                 hi = fdhash->items[i];
2981                 while (hi) {
2982                         if (hi->data.rs.F && hi->data.rs.is_pipe)
2983                                 pclose(hi->data.rs.F);
2984                         hi = hi->next;
2985                 }
2986         }
2987
2988         exit(r);
2989 }
2990
2991 /* if expr looks like "var=value", perform assignment and return 1,
2992  * otherwise return 0 */
2993 static int is_assignment(const char *expr)
2994 {
2995         char *exprc, *val, *s, *s1;
2996
2997         if (!isalnum_(*expr) || (val = strchr(expr, '=')) == NULL) {
2998                 return FALSE;
2999         }
3000
3001         exprc = xstrdup(expr);
3002         val = exprc + (val - expr);
3003         *val++ = '\0';
3004
3005         s = s1 = val;
3006         while ((*s1 = nextchar(&s)) != '\0')
3007                 s1++;
3008
3009         setvar_u(newvar(exprc), val);
3010         free(exprc);
3011         return TRUE;
3012 }
3013
3014 /* switch to next input file */
3015 static rstream *next_input_file(void)
3016 {
3017 #define rsm          (G.next_input_file__rsm)
3018 #define files_happen (G.next_input_file__files_happen)
3019
3020         FILE *F;
3021         const char *fname, *ind;
3022
3023         if (rsm.F)
3024                 fclose(rsm.F);
3025         rsm.F = NULL;
3026         rsm.pos = rsm.adv = 0;
3027
3028         for (;;) {
3029                 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
3030                         if (files_happen)
3031                                 return NULL;
3032                         fname = "-";
3033                         F = stdin;
3034                         break;
3035                 }
3036                 ind = getvar_s(incvar(intvar[ARGIND]));
3037                 fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
3038                 if (fname && *fname && !is_assignment(fname)) {
3039                         F = xfopen_stdin(fname);
3040                         break;
3041                 }
3042         }
3043
3044         files_happen = TRUE;
3045         setvar_s(intvar[FILENAME], fname);
3046         rsm.F = F;
3047         return &rsm;
3048 #undef rsm
3049 #undef files_happen
3050 }
3051
3052 int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
3053 int awk_main(int argc, char **argv)
3054 {
3055         unsigned opt;
3056         char *opt_F;
3057         llist_t *list_v = NULL;
3058         llist_t *list_f = NULL;
3059         int i, j;
3060         var *v;
3061         var tv;
3062         char **envp;
3063         char *vnames = (char *)vNames; /* cheat */
3064         char *vvalues = (char *)vValues;
3065
3066         INIT_G();
3067
3068         /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
3069          * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
3070         if (ENABLE_LOCALE_SUPPORT)
3071                 setlocale(LC_NUMERIC, "C");
3072
3073         zero_out_var(&tv);
3074
3075         /* allocate global buffer */
3076         g_buf = xmalloc(MAXVARFMT + 1);
3077
3078         vhash = hash_init();
3079         ahash = hash_init();
3080         fdhash = hash_init();
3081         fnhash = hash_init();
3082
3083         /* initialize variables */
3084         for (i = 0; *vnames; i++) {
3085                 intvar[i] = v = newvar(nextword(&vnames));
3086                 if (*vvalues != '\377')
3087                         setvar_s(v, nextword(&vvalues));
3088                 else
3089                         setvar_i(v, 0);
3090
3091                 if (*vnames == '*') {
3092                         v->type |= VF_SPECIAL;
3093                         vnames++;
3094                 }
3095         }
3096
3097         handle_special(intvar[FS]);
3098         handle_special(intvar[RS]);
3099
3100         newfile("/dev/stdin")->F = stdin;
3101         newfile("/dev/stdout")->F = stdout;
3102         newfile("/dev/stderr")->F = stderr;
3103
3104         /* Huh, people report that sometimes environ is NULL. Oh well. */
3105         if (environ) for (envp = environ; *envp; envp++) {
3106                 /* environ is writable, thus we don't strdup it needlessly */
3107                 char *s = *envp;
3108                 char *s1 = strchr(s, '=');
3109                 if (s1) {
3110                         *s1 = '\0';
3111                         /* Both findvar and setvar_u take const char*
3112                          * as 2nd arg -> environment is not trashed */
3113                         setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
3114                         *s1 = '=';
3115                 }
3116         }
3117         opt_complementary = "v::f::"; /* -v and -f can occur multiple times */
3118         opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, NULL);
3119         argv += optind;
3120         argc -= optind;
3121         if (opt & 0x1)
3122                 setvar_s(intvar[FS], opt_F); // -F
3123         while (list_v) { /* -v */
3124                 if (!is_assignment(llist_pop(&list_v)))
3125                         bb_show_usage();
3126         }
3127         if (list_f) { /* -f */
3128                 do {
3129                         char *s = NULL;
3130                         FILE *from_file;
3131
3132                         g_progname = llist_pop(&list_f);
3133                         from_file = xfopen_stdin(g_progname);
3134                         /* one byte is reserved for some trick in next_token */
3135                         for (i = j = 1; j > 0; i += j) {
3136                                 s = xrealloc(s, i + 4096);
3137                                 j = fread(s + i, 1, 4094, from_file);
3138                         }
3139                         s[i] = '\0';
3140                         fclose(from_file);
3141                         parse_program(s + 1);
3142                         free(s);
3143                 } while (list_f);
3144                 argc++;
3145         } else { // no -f: take program from 1st parameter
3146                 if (!argc)
3147                         bb_show_usage();
3148                 g_progname = "cmd. line";
3149                 parse_program(*argv++);
3150         }
3151         if (opt & 0x8) // -W
3152                 bb_error_msg("warning: option -W is ignored");
3153
3154         /* fill in ARGV array */
3155         setvar_i(intvar[ARGC], argc);
3156         setari_u(intvar[ARGV], 0, "awk");
3157         i = 0;
3158         while (*argv)
3159                 setari_u(intvar[ARGV], ++i, *argv++);
3160
3161         evaluate(beginseq.first, &tv);
3162         if (!mainseq.first && !endseq.first)
3163                 awk_exit(EXIT_SUCCESS);
3164
3165         /* input file could already be opened in BEGIN block */
3166         if (!iF)
3167                 iF = next_input_file();
3168
3169         /* passing through input files */
3170         while (iF) {
3171                 nextfile = FALSE;
3172                 setvar_i(intvar[FNR], 0);
3173
3174                 while ((i = awk_getline(iF, intvar[F0])) > 0) {
3175                         nextrec = FALSE;
3176                         incvar(intvar[NR]);
3177                         incvar(intvar[FNR]);
3178                         evaluate(mainseq.first, &tv);
3179
3180                         if (nextfile)
3181                                 break;
3182                 }
3183
3184                 if (i < 0)
3185                         syntax_error(strerror(errno));
3186
3187                 iF = next_input_file();
3188         }
3189
3190         awk_exit(EXIT_SUCCESS);
3191         /*return 0;*/
3192 }