d69720d6473fea753db54ff3d37debee6b742449
[oweals/busybox.git] / editors / awk.c
1 /* vi: set sw=4 ts=4: */
2 /*
3  * awk implementation for busybox
4  *
5  * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
6  *
7  * Licensed under GPLv2 or later, see file LICENSE in this source tree.
8  */
9
10 //usage:#define awk_trivial_usage
11 //usage:       "[OPTIONS] [AWK_PROGRAM] [FILE]..."
12 //usage:#define awk_full_usage "\n\n"
13 //usage:       "        -v VAR=VAL      Set variable"
14 //usage:     "\n        -F SEP          Use SEP as field separator"
15 //usage:     "\n        -f FILE         Read program from FILE"
16
17 #include "libbb.h"
18 #include "xregex.h"
19 #include <math.h>
20
21 /* This is a NOEXEC applet. Be very careful! */
22
23
24 /* If you comment out one of these below, it will be #defined later
25  * to perform debug printfs to stderr: */
26 #define debug_printf_walker(...)  do {} while (0)
27 #define debug_printf_eval(...)  do {} while (0)
28 #define debug_printf_parse(...)  do {} while (0)
29
30 #ifndef debug_printf_walker
31 # define debug_printf_walker(...) (fprintf(stderr, __VA_ARGS__))
32 #endif
33 #ifndef debug_printf_eval
34 # define debug_printf_eval(...) (fprintf(stderr, __VA_ARGS__))
35 #endif
36 #ifndef debug_printf_parse
37 # define debug_printf_parse(...) (fprintf(stderr, __VA_ARGS__))
38 #endif
39
40
41
42 #define MAXVARFMT       240
43 #define MINNVBLOCK      64
44
45 /* variable flags */
46 #define VF_NUMBER       0x0001  /* 1 = primary type is number */
47 #define VF_ARRAY        0x0002  /* 1 = it's an array */
48
49 #define VF_CACHED       0x0100  /* 1 = num/str value has cached str/num eq */
50 #define VF_USER         0x0200  /* 1 = user input (may be numeric string) */
51 #define VF_SPECIAL      0x0400  /* 1 = requires extra handling when changed */
52 #define VF_WALK         0x0800  /* 1 = variable has alloc'd x.walker list */
53 #define VF_FSTR         0x1000  /* 1 = var::string points to fstring buffer */
54 #define VF_CHILD        0x2000  /* 1 = function arg; x.parent points to source */
55 #define VF_DIRTY        0x4000  /* 1 = variable was set explicitly */
56
57 /* these flags are static, don't change them when value is changed */
58 #define VF_DONTTOUCH    (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
59
60 typedef struct walker_list {
61         char *end;
62         char *cur;
63         struct walker_list *prev;
64         char wbuf[1];
65 } walker_list;
66
67 /* Variable */
68 typedef struct var_s {
69         unsigned type;            /* flags */
70         double number;
71         char *string;
72         union {
73                 int aidx;               /* func arg idx (for compilation stage) */
74                 struct xhash_s *array;  /* array ptr */
75                 struct var_s *parent;   /* for func args, ptr to actual parameter */
76                 walker_list *walker;    /* list of array elements (for..in) */
77         } x;
78 } var;
79
80 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
81 typedef struct chain_s {
82         struct node_s *first;
83         struct node_s *last;
84         const char *programname;
85 } chain;
86
87 /* Function */
88 typedef struct func_s {
89         unsigned nargs;
90         struct chain_s body;
91 } func;
92
93 /* I/O stream */
94 typedef struct rstream_s {
95         FILE *F;
96         char *buffer;
97         int adv;
98         int size;
99         int pos;
100         smallint is_pipe;
101 } rstream;
102
103 typedef struct hash_item_s {
104         union {
105                 struct var_s v;         /* variable/array hash */
106                 struct rstream_s rs;    /* redirect streams hash */
107                 struct func_s f;        /* functions hash */
108         } data;
109         struct hash_item_s *next;       /* next in chain */
110         char name[1];                   /* really it's longer */
111 } hash_item;
112
113 typedef struct xhash_s {
114         unsigned nel;           /* num of elements */
115         unsigned csize;         /* current hash size */
116         unsigned nprime;        /* next hash size in PRIMES[] */
117         unsigned glen;          /* summary length of item names */
118         struct hash_item_s **items;
119 } xhash;
120
121 /* Tree node */
122 typedef struct node_s {
123         uint32_t info;
124         unsigned lineno;
125         union {
126                 struct node_s *n;
127                 var *v;
128                 int aidx;
129                 char *new_progname;
130                 regex_t *re;
131         } l;
132         union {
133                 struct node_s *n;
134                 regex_t *ire;
135                 func *f;
136         } r;
137         union {
138                 struct node_s *n;
139         } a;
140 } node;
141
142 /* Block of temporary variables */
143 typedef struct nvblock_s {
144         int size;
145         var *pos;
146         struct nvblock_s *prev;
147         struct nvblock_s *next;
148         var nv[];
149 } nvblock;
150
151 typedef struct tsplitter_s {
152         node n;
153         regex_t re[2];
154 } tsplitter;
155
156 /* simple token classes */
157 /* Order and hex values are very important!!!  See next_token() */
158 #define TC_SEQSTART      1                              /* ( */
159 #define TC_SEQTERM      (1 << 1)                /* ) */
160 #define TC_REGEXP       (1 << 2)                /* /.../ */
161 #define TC_OUTRDR       (1 << 3)                /* | > >> */
162 #define TC_UOPPOST      (1 << 4)                /* unary postfix operator */
163 #define TC_UOPPRE1      (1 << 5)                /* unary prefix operator */
164 #define TC_BINOPX       (1 << 6)                /* two-opnd operator */
165 #define TC_IN           (1 << 7)
166 #define TC_COMMA        (1 << 8)
167 #define TC_PIPE         (1 << 9)                /* input redirection pipe */
168 #define TC_UOPPRE2      (1 << 10)               /* unary prefix operator */
169 #define TC_ARRTERM      (1 << 11)               /* ] */
170 #define TC_GRPSTART     (1 << 12)               /* { */
171 #define TC_GRPTERM      (1 << 13)               /* } */
172 #define TC_SEMICOL      (1 << 14)
173 #define TC_NEWLINE      (1 << 15)
174 #define TC_STATX        (1 << 16)               /* ctl statement (for, next...) */
175 #define TC_WHILE        (1 << 17)
176 #define TC_ELSE         (1 << 18)
177 #define TC_BUILTIN      (1 << 19)
178 #define TC_GETLINE      (1 << 20)
179 #define TC_FUNCDECL     (1 << 21)               /* `function' `func' */
180 #define TC_BEGIN        (1 << 22)
181 #define TC_END          (1 << 23)
182 #define TC_EOF          (1 << 24)
183 #define TC_VARIABLE     (1 << 25)
184 #define TC_ARRAY        (1 << 26)
185 #define TC_FUNCTION     (1 << 27)
186 #define TC_STRING       (1 << 28)
187 #define TC_NUMBER       (1 << 29)
188
189 #define TC_UOPPRE  (TC_UOPPRE1 | TC_UOPPRE2)
190
191 /* combined token classes */
192 #define TC_BINOP   (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
193 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
194 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
195                    | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
196
197 #define TC_STATEMNT (TC_STATX | TC_WHILE)
198 #define TC_OPTERM  (TC_SEMICOL | TC_NEWLINE)
199
200 /* word tokens, cannot mean something else if not expected */
201 #define TC_WORD    (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
202                    | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
203
204 /* discard newlines after these */
205 #define TC_NOTERM  (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
206                    | TC_BINOP | TC_OPTERM)
207
208 /* what can expression begin with */
209 #define TC_OPSEQ   (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
210 /* what can group begin with */
211 #define TC_GRPSEQ  (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
212
213 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
214 /* operator is inserted between them */
215 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
216                    | TC_STRING | TC_NUMBER | TC_UOPPOST)
217 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
218
219 #define OF_RES1    0x010000
220 #define OF_RES2    0x020000
221 #define OF_STR1    0x040000
222 #define OF_STR2    0x080000
223 #define OF_NUM1    0x100000
224 #define OF_CHECKED 0x200000
225
226 /* combined operator flags */
227 #define xx      0
228 #define xV      OF_RES2
229 #define xS      (OF_RES2 | OF_STR2)
230 #define Vx      OF_RES1
231 #define VV      (OF_RES1 | OF_RES2)
232 #define Nx      (OF_RES1 | OF_NUM1)
233 #define NV      (OF_RES1 | OF_NUM1 | OF_RES2)
234 #define Sx      (OF_RES1 | OF_STR1)
235 #define SV      (OF_RES1 | OF_STR1 | OF_RES2)
236 #define SS      (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
237
238 #define OPCLSMASK 0xFF00
239 #define OPNMASK   0x007F
240
241 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
242  * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
243  * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
244  */
245 #undef P
246 #undef PRIMASK
247 #undef PRIMASK2
248 #define P(x)      (x << 24)
249 #define PRIMASK   0x7F000000
250 #define PRIMASK2  0x7E000000
251
252 /* Operation classes */
253
254 #define SHIFT_TIL_THIS  0x0600
255 #define RECUR_FROM_THIS 0x1000
256
257 enum {
258         OC_DELETE = 0x0100,     OC_EXEC = 0x0200,       OC_NEWSOURCE = 0x0300,
259         OC_PRINT = 0x0400,      OC_PRINTF = 0x0500,     OC_WALKINIT = 0x0600,
260
261         OC_BR = 0x0700,         OC_BREAK = 0x0800,      OC_CONTINUE = 0x0900,
262         OC_EXIT = 0x0a00,       OC_NEXT = 0x0b00,       OC_NEXTFILE = 0x0c00,
263         OC_TEST = 0x0d00,       OC_WALKNEXT = 0x0e00,
264
265         OC_BINARY = 0x1000,     OC_BUILTIN = 0x1100,    OC_COLON = 0x1200,
266         OC_COMMA = 0x1300,      OC_COMPARE = 0x1400,    OC_CONCAT = 0x1500,
267         OC_FBLTIN = 0x1600,     OC_FIELD = 0x1700,      OC_FNARG = 0x1800,
268         OC_FUNC = 0x1900,       OC_GETLINE = 0x1a00,    OC_IN = 0x1b00,
269         OC_LAND = 0x1c00,       OC_LOR = 0x1d00,        OC_MATCH = 0x1e00,
270         OC_MOVE = 0x1f00,       OC_PGETLINE = 0x2000,   OC_REGEXP = 0x2100,
271         OC_REPLACE = 0x2200,    OC_RETURN = 0x2300,     OC_SPRINTF = 0x2400,
272         OC_TERNARY = 0x2500,    OC_UNARY = 0x2600,      OC_VAR = 0x2700,
273         OC_DONE = 0x2800,
274
275         ST_IF = 0x3000,         ST_DO = 0x3100,         ST_FOR = 0x3200,
276         ST_WHILE = 0x3300
277 };
278
279 /* simple builtins */
280 enum {
281         F_in,   F_rn,   F_co,   F_ex,   F_lg,   F_si,   F_sq,   F_sr,
282         F_ti,   F_le,   F_sy,   F_ff,   F_cl
283 };
284
285 /* builtins */
286 enum {
287         B_a2,   B_ix,   B_ma,   B_sp,   B_ss,   B_ti,   B_mt,   B_lo,   B_up,
288         B_ge,   B_gs,   B_su,
289         B_an,   B_co,   B_ls,   B_or,   B_rs,   B_xo,
290 };
291
292 /* tokens and their corresponding info values */
293
294 #define NTC     "\377"  /* switch to next token class (tc<<1) */
295 #define NTCC    '\377'
296
297 #define OC_B  OC_BUILTIN
298
299 static const char tokenlist[] ALIGN1 =
300         "\1("         NTC
301         "\1)"         NTC
302         "\1/"         NTC                                   /* REGEXP */
303         "\2>>"        "\1>"         "\1|"       NTC         /* OUTRDR */
304         "\2++"        "\2--"        NTC                     /* UOPPOST */
305         "\2++"        "\2--"        "\1$"       NTC         /* UOPPRE1 */
306         "\2=="        "\1="         "\2+="      "\2-="      /* BINOPX */
307         "\2*="        "\2/="        "\2%="      "\2^="
308         "\1+"         "\1-"         "\3**="     "\2**"
309         "\1/"         "\1%"         "\1^"       "\1*"
310         "\2!="        "\2>="        "\2<="      "\1>"
311         "\1<"         "\2!~"        "\1~"       "\2&&"
312         "\2||"        "\1?"         "\1:"       NTC
313         "\2in"        NTC
314         "\1,"         NTC
315         "\1|"         NTC
316         "\1+"         "\1-"         "\1!"       NTC         /* UOPPRE2 */
317         "\1]"         NTC
318         "\1{"         NTC
319         "\1}"         NTC
320         "\1;"         NTC
321         "\1\n"        NTC
322         "\2if"        "\2do"        "\3for"     "\5break"   /* STATX */
323         "\10continue" "\6delete"    "\5print"
324         "\6printf"    "\4next"      "\10nextfile"
325         "\6return"    "\4exit"      NTC
326         "\5while"     NTC
327         "\4else"      NTC
328
329         "\3and"       "\5compl"     "\6lshift"  "\2or"
330         "\6rshift"    "\3xor"
331         "\5close"     "\6system"    "\6fflush"  "\5atan2"   /* BUILTIN */
332         "\3cos"       "\3exp"       "\3int"     "\3log"
333         "\4rand"      "\3sin"       "\4sqrt"    "\5srand"
334         "\6gensub"    "\4gsub"      "\5index"   "\6length"
335         "\5match"     "\5split"     "\7sprintf" "\3sub"
336         "\6substr"    "\7systime"   "\10strftime" "\6mktime"
337         "\7tolower"   "\7toupper"   NTC
338         "\7getline"   NTC
339         "\4func"      "\10function" NTC
340         "\5BEGIN"     NTC
341         "\3END"
342         /* compiler adds trailing "\0" */
343         ;
344
345 static const uint32_t tokeninfo[] = {
346         0,
347         0,
348         OC_REGEXP,
349         xS|'a',                  xS|'w',                  xS|'|',
350         OC_UNARY|xV|P(9)|'p',    OC_UNARY|xV|P(9)|'m',
351         OC_UNARY|xV|P(9)|'P',    OC_UNARY|xV|P(9)|'M',    OC_FIELD|xV|P(5),
352         OC_COMPARE|VV|P(39)|5,   OC_MOVE|VV|P(74),        OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
353         OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/', OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
354         OC_BINARY|NV|P(29)|'+',  OC_BINARY|NV|P(29)|'-',  OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
355         OC_BINARY|NV|P(25)|'/',  OC_BINARY|NV|P(25)|'%',  OC_BINARY|NV|P(15)|'&',  OC_BINARY|NV|P(25)|'*',
356         OC_COMPARE|VV|P(39)|4,   OC_COMPARE|VV|P(39)|3,   OC_COMPARE|VV|P(39)|0,   OC_COMPARE|VV|P(39)|1,
357         OC_COMPARE|VV|P(39)|2,   OC_MATCH|Sx|P(45)|'!',   OC_MATCH|Sx|P(45)|'~',   OC_LAND|Vx|P(55),
358         OC_LOR|Vx|P(59),         OC_TERNARY|Vx|P(64)|'?', OC_COLON|xx|P(67)|':',
359         OC_IN|SV|P(49), /* in */
360         OC_COMMA|SS|P(80),
361         OC_PGETLINE|SV|P(37),
362         OC_UNARY|xV|P(19)|'+',   OC_UNARY|xV|P(19)|'-',   OC_UNARY|xV|P(19)|'!',
363         0, /* ] */
364         0,
365         0,
366         0,
367         0, /* \n */
368         ST_IF,        ST_DO,        ST_FOR,      OC_BREAK,
369         OC_CONTINUE,  OC_DELETE|Vx, OC_PRINT,
370         OC_PRINTF,    OC_NEXT,      OC_NEXTFILE,
371         OC_RETURN|Vx, OC_EXIT|Nx,
372         ST_WHILE,
373         0, /* else */
374
375         OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
376         OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
377         OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
378         OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
379         OC_FBLTIN|F_rn,    OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
380         OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
381         OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF,        OC_B|B_su|P(0xb6),
382         OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti,    OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b),
383         OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
384         OC_GETLINE|SV|P(0),
385         0,                 0,
386         0,
387         0 /* END */
388 };
389
390 /* internal variable names and their initial values       */
391 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
392 enum {
393         CONVFMT,    OFMT,       FS,         OFS,
394         ORS,        RS,         RT,         FILENAME,
395         SUBSEP,     F0,         ARGIND,     ARGC,
396         ARGV,       ERRNO,      FNR,        NR,
397         NF,         IGNORECASE, ENVIRON,    NUM_INTERNAL_VARS
398 };
399
400 static const char vNames[] ALIGN1 =
401         "CONVFMT\0" "OFMT\0"    "FS\0*"     "OFS\0"
402         "ORS\0"     "RS\0*"     "RT\0"      "FILENAME\0"
403         "SUBSEP\0"  "$\0*"      "ARGIND\0"  "ARGC\0"
404         "ARGV\0"    "ERRNO\0"   "FNR\0"     "NR\0"
405         "NF\0*"     "IGNORECASE\0*" "ENVIRON\0" "\0";
406
407 static const char vValues[] ALIGN1 =
408         "%.6g\0"    "%.6g\0"    " \0"       " \0"
409         "\n\0"      "\n\0"      "\0"        "\0"
410         "\034\0"    "\0"        "\377";
411
412 /* hash size may grow to these values */
413 #define FIRST_PRIME 61
414 static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
415
416
417 /* Globals. Split in two parts so that first one is addressed
418  * with (mostly short) negative offsets.
419  * NB: it's unsafe to put members of type "double"
420  * into globals2 (gcc may fail to align them).
421  */
422 struct globals {
423         double t_double;
424         chain beginseq, mainseq, endseq;
425         chain *seq;
426         node *break_ptr, *continue_ptr;
427         rstream *iF;
428         xhash *vhash, *ahash, *fdhash, *fnhash;
429         const char *g_progname;
430         int g_lineno;
431         int nfields;
432         int maxfields; /* used in fsrealloc() only */
433         var *Fields;
434         nvblock *g_cb;
435         char *g_pos;
436         char *g_buf;
437         smallint icase;
438         smallint exiting;
439         smallint nextrec;
440         smallint nextfile;
441         smallint is_f0_split;
442         smallint t_rollback;
443 };
444 struct globals2 {
445         uint32_t t_info; /* often used */
446         uint32_t t_tclass;
447         char *t_string;
448         int t_lineno;
449
450         var *intvar[NUM_INTERNAL_VARS]; /* often used */
451
452         /* former statics from various functions */
453         char *split_f0__fstrings;
454
455         uint32_t next_token__save_tclass;
456         uint32_t next_token__save_info;
457         uint32_t next_token__ltclass;
458         smallint next_token__concat_inserted;
459
460         smallint next_input_file__files_happen;
461         rstream next_input_file__rsm;
462
463         var *evaluate__fnargs;
464         unsigned evaluate__seed;
465         regex_t evaluate__sreg;
466
467         var ptest__v;
468
469         tsplitter exec_builtin__tspl;
470
471         /* biggest and least used members go last */
472         tsplitter fsplitter, rsplitter;
473 };
474 #define G1 (ptr_to_globals[-1])
475 #define G (*(struct globals2 *)ptr_to_globals)
476 /* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
477 /*char G1size[sizeof(G1)]; - 0x74 */
478 /*char Gsize[sizeof(G)]; - 0x1c4 */
479 /* Trying to keep most of members accessible with short offsets: */
480 /*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
481 #define t_double     (G1.t_double    )
482 #define beginseq     (G1.beginseq    )
483 #define mainseq      (G1.mainseq     )
484 #define endseq       (G1.endseq      )
485 #define seq          (G1.seq         )
486 #define break_ptr    (G1.break_ptr   )
487 #define continue_ptr (G1.continue_ptr)
488 #define iF           (G1.iF          )
489 #define vhash        (G1.vhash       )
490 #define ahash        (G1.ahash       )
491 #define fdhash       (G1.fdhash      )
492 #define fnhash       (G1.fnhash      )
493 #define g_progname   (G1.g_progname  )
494 #define g_lineno     (G1.g_lineno    )
495 #define nfields      (G1.nfields     )
496 #define maxfields    (G1.maxfields   )
497 #define Fields       (G1.Fields      )
498 #define g_cb         (G1.g_cb        )
499 #define g_pos        (G1.g_pos       )
500 #define g_buf        (G1.g_buf       )
501 #define icase        (G1.icase       )
502 #define exiting      (G1.exiting     )
503 #define nextrec      (G1.nextrec     )
504 #define nextfile     (G1.nextfile    )
505 #define is_f0_split  (G1.is_f0_split )
506 #define t_rollback   (G1.t_rollback  )
507 #define t_info       (G.t_info      )
508 #define t_tclass     (G.t_tclass    )
509 #define t_string     (G.t_string    )
510 #define t_lineno     (G.t_lineno    )
511 #define intvar       (G.intvar      )
512 #define fsplitter    (G.fsplitter   )
513 #define rsplitter    (G.rsplitter   )
514 #define INIT_G() do { \
515         SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
516         G.next_token__ltclass = TC_OPTERM; \
517         G.evaluate__seed = 1; \
518 } while (0)
519
520
521 /* function prototypes */
522 static void handle_special(var *);
523 static node *parse_expr(uint32_t);
524 static void chain_group(void);
525 static var *evaluate(node *, var *);
526 static rstream *next_input_file(void);
527 static int fmt_num(char *, int, const char *, double, int);
528 static int awk_exit(int) NORETURN;
529
530 /* ---- error handling ---- */
531
532 static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
533 static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
534 static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
535 static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
536 static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
537 static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin";
538 static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
539 static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
540 static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
541 static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
542
543 static void zero_out_var(var *vp)
544 {
545         memset(vp, 0, sizeof(*vp));
546 }
547
548 static void syntax_error(const char *message) NORETURN;
549 static void syntax_error(const char *message)
550 {
551         bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
552 }
553
554 /* ---- hash stuff ---- */
555
556 static unsigned hashidx(const char *name)
557 {
558         unsigned idx = 0;
559
560         while (*name)
561                 idx = *name++ + (idx << 6) - idx;
562         return idx;
563 }
564
565 /* create new hash */
566 static xhash *hash_init(void)
567 {
568         xhash *newhash;
569
570         newhash = xzalloc(sizeof(*newhash));
571         newhash->csize = FIRST_PRIME;
572         newhash->items = xzalloc(FIRST_PRIME * sizeof(newhash->items[0]));
573
574         return newhash;
575 }
576
577 /* find item in hash, return ptr to data, NULL if not found */
578 static void *hash_search(xhash *hash, const char *name)
579 {
580         hash_item *hi;
581
582         hi = hash->items[hashidx(name) % hash->csize];
583         while (hi) {
584                 if (strcmp(hi->name, name) == 0)
585                         return &hi->data;
586                 hi = hi->next;
587         }
588         return NULL;
589 }
590
591 /* grow hash if it becomes too big */
592 static void hash_rebuild(xhash *hash)
593 {
594         unsigned newsize, i, idx;
595         hash_item **newitems, *hi, *thi;
596
597         if (hash->nprime == ARRAY_SIZE(PRIMES))
598                 return;
599
600         newsize = PRIMES[hash->nprime++];
601         newitems = xzalloc(newsize * sizeof(newitems[0]));
602
603         for (i = 0; i < hash->csize; i++) {
604                 hi = hash->items[i];
605                 while (hi) {
606                         thi = hi;
607                         hi = thi->next;
608                         idx = hashidx(thi->name) % newsize;
609                         thi->next = newitems[idx];
610                         newitems[idx] = thi;
611                 }
612         }
613
614         free(hash->items);
615         hash->csize = newsize;
616         hash->items = newitems;
617 }
618
619 /* find item in hash, add it if necessary. Return ptr to data */
620 static void *hash_find(xhash *hash, const char *name)
621 {
622         hash_item *hi;
623         unsigned idx;
624         int l;
625
626         hi = hash_search(hash, name);
627         if (!hi) {
628                 if (++hash->nel / hash->csize > 10)
629                         hash_rebuild(hash);
630
631                 l = strlen(name) + 1;
632                 hi = xzalloc(sizeof(*hi) + l);
633                 strcpy(hi->name, name);
634
635                 idx = hashidx(name) % hash->csize;
636                 hi->next = hash->items[idx];
637                 hash->items[idx] = hi;
638                 hash->glen += l;
639         }
640         return &hi->data;
641 }
642
643 #define findvar(hash, name) ((var*)    hash_find((hash), (name)))
644 #define newvar(name)        ((var*)    hash_find(vhash, (name)))
645 #define newfile(name)       ((rstream*)hash_find(fdhash, (name)))
646 #define newfunc(name)       ((func*)   hash_find(fnhash, (name)))
647
648 static void hash_remove(xhash *hash, const char *name)
649 {
650         hash_item *hi, **phi;
651
652         phi = &hash->items[hashidx(name) % hash->csize];
653         while (*phi) {
654                 hi = *phi;
655                 if (strcmp(hi->name, name) == 0) {
656                         hash->glen -= (strlen(name) + 1);
657                         hash->nel--;
658                         *phi = hi->next;
659                         free(hi);
660                         break;
661                 }
662                 phi = &hi->next;
663         }
664 }
665
666 /* ------ some useful functions ------ */
667
668 static char *skip_spaces(char *p)
669 {
670         while (1) {
671                 if (*p == '\\' && p[1] == '\n') {
672                         p++;
673                         t_lineno++;
674                 } else if (*p != ' ' && *p != '\t') {
675                         break;
676                 }
677                 p++;
678         }
679         return p;
680 }
681
682 /* returns old *s, advances *s past word and terminating NUL */
683 static char *nextword(char **s)
684 {
685         char *p = *s;
686         while (*(*s)++ != '\0')
687                 continue;
688         return p;
689 }
690
691 static char nextchar(char **s)
692 {
693         char c, *pps;
694
695         c = *(*s)++;
696         pps = *s;
697         if (c == '\\')
698                 c = bb_process_escape_sequence((const char**)s);
699         /* Example awk statement:
700          * s = "abc\"def"
701          * we must treat \" as "
702          */
703         if (c == '\\' && *s == pps) { /* unrecognized \z? */
704                 c = *(*s); /* yes, fetch z */
705                 if (c)
706                         (*s)++; /* advance unless z = NUL */
707         }
708         return c;
709 }
710
711 /* TODO: merge with strcpy_and_process_escape_sequences()?
712  */
713 static void unescape_string_in_place(char *s1)
714 {
715         char *s = s1;
716         while ((*s1 = nextchar(&s)) != '\0')
717                 s1++;
718 }
719
720 static ALWAYS_INLINE int isalnum_(int c)
721 {
722         return (isalnum(c) || c == '_');
723 }
724
725 static double my_strtod(char **pp)
726 {
727         char *cp = *pp;
728         if (ENABLE_DESKTOP && cp[0] == '0') {
729                 /* Might be hex or octal integer: 0x123abc or 07777 */
730                 char c = (cp[1] | 0x20);
731                 if (c == 'x' || isdigit(cp[1])) {
732                         unsigned long long ull = strtoull(cp, pp, 0);
733                         if (c == 'x')
734                                 return ull;
735                         c = **pp;
736                         if (!isdigit(c) && c != '.')
737                                 return ull;
738                         /* else: it may be a floating number. Examples:
739                          * 009.123 (*pp points to '9')
740                          * 000.123 (*pp points to '.')
741                          * fall through to strtod.
742                          */
743                 }
744         }
745         return strtod(cp, pp);
746 }
747
748 /* -------- working with variables (set/get/copy/etc) -------- */
749
750 static xhash *iamarray(var *v)
751 {
752         var *a = v;
753
754         while (a->type & VF_CHILD)
755                 a = a->x.parent;
756
757         if (!(a->type & VF_ARRAY)) {
758                 a->type |= VF_ARRAY;
759                 a->x.array = hash_init();
760         }
761         return a->x.array;
762 }
763
764 static void clear_array(xhash *array)
765 {
766         unsigned i;
767         hash_item *hi, *thi;
768
769         for (i = 0; i < array->csize; i++) {
770                 hi = array->items[i];
771                 while (hi) {
772                         thi = hi;
773                         hi = hi->next;
774                         free(thi->data.v.string);
775                         free(thi);
776                 }
777                 array->items[i] = NULL;
778         }
779         array->glen = array->nel = 0;
780 }
781
782 /* clear a variable */
783 static var *clrvar(var *v)
784 {
785         if (!(v->type & VF_FSTR))
786                 free(v->string);
787
788         v->type &= VF_DONTTOUCH;
789         v->type |= VF_DIRTY;
790         v->string = NULL;
791         return v;
792 }
793
794 /* assign string value to variable */
795 static var *setvar_p(var *v, char *value)
796 {
797         clrvar(v);
798         v->string = value;
799         handle_special(v);
800         return v;
801 }
802
803 /* same as setvar_p but make a copy of string */
804 static var *setvar_s(var *v, const char *value)
805 {
806         return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
807 }
808
809 /* same as setvar_s but sets USER flag */
810 static var *setvar_u(var *v, const char *value)
811 {
812         v = setvar_s(v, value);
813         v->type |= VF_USER;
814         return v;
815 }
816
817 /* set array element to user string */
818 static void setari_u(var *a, int idx, const char *s)
819 {
820         var *v;
821
822         v = findvar(iamarray(a), itoa(idx));
823         setvar_u(v, s);
824 }
825
826 /* assign numeric value to variable */
827 static var *setvar_i(var *v, double value)
828 {
829         clrvar(v);
830         v->type |= VF_NUMBER;
831         v->number = value;
832         handle_special(v);
833         return v;
834 }
835
836 static const char *getvar_s(var *v)
837 {
838         /* if v is numeric and has no cached string, convert it to string */
839         if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
840                 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
841                 v->string = xstrdup(g_buf);
842                 v->type |= VF_CACHED;
843         }
844         return (v->string == NULL) ? "" : v->string;
845 }
846
847 static double getvar_i(var *v)
848 {
849         char *s;
850
851         if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
852                 v->number = 0;
853                 s = v->string;
854                 if (s && *s) {
855                         debug_printf_eval("getvar_i: '%s'->", s);
856                         v->number = my_strtod(&s);
857                         debug_printf_eval("%f (s:'%s')\n", v->number, s);
858                         if (v->type & VF_USER) {
859                                 s = skip_spaces(s);
860                                 if (*s != '\0')
861                                         v->type &= ~VF_USER;
862                         }
863                 } else {
864                         debug_printf_eval("getvar_i: '%s'->zero\n", s);
865                         v->type &= ~VF_USER;
866                 }
867                 v->type |= VF_CACHED;
868         }
869         debug_printf_eval("getvar_i: %f\n", v->number);
870         return v->number;
871 }
872
873 /* Used for operands of bitwise ops */
874 static unsigned long getvar_i_int(var *v)
875 {
876         double d = getvar_i(v);
877
878         /* Casting doubles to longs is undefined for values outside
879          * of target type range. Try to widen it as much as possible */
880         if (d >= 0)
881                 return (unsigned long)d;
882         /* Why? Think about d == -4294967295.0 (assuming 32bit longs) */
883         return - (long) (unsigned long) (-d);
884 }
885
886 static var *copyvar(var *dest, const var *src)
887 {
888         if (dest != src) {
889                 clrvar(dest);
890                 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
891                 debug_printf_eval("copyvar: number:%f string:'%s'\n", src->number, src->string);
892                 dest->number = src->number;
893                 if (src->string)
894                         dest->string = xstrdup(src->string);
895         }
896         handle_special(dest);
897         return dest;
898 }
899
900 static var *incvar(var *v)
901 {
902         return setvar_i(v, getvar_i(v) + 1.0);
903 }
904
905 /* return true if v is number or numeric string */
906 static int is_numeric(var *v)
907 {
908         getvar_i(v);
909         return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
910 }
911
912 /* return 1 when value of v corresponds to true, 0 otherwise */
913 static int istrue(var *v)
914 {
915         if (is_numeric(v))
916                 return (v->number != 0);
917         return (v->string && v->string[0]);
918 }
919
920 /* temporary variables allocator. Last allocated should be first freed */
921 static var *nvalloc(int n)
922 {
923         nvblock *pb = NULL;
924         var *v, *r;
925         int size;
926
927         while (g_cb) {
928                 pb = g_cb;
929                 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size)
930                         break;
931                 g_cb = g_cb->next;
932         }
933
934         if (!g_cb) {
935                 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
936                 g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
937                 g_cb->size = size;
938                 g_cb->pos = g_cb->nv;
939                 g_cb->prev = pb;
940                 /*g_cb->next = NULL; - xzalloc did it */
941                 if (pb)
942                         pb->next = g_cb;
943         }
944
945         v = r = g_cb->pos;
946         g_cb->pos += n;
947
948         while (v < g_cb->pos) {
949                 v->type = 0;
950                 v->string = NULL;
951                 v++;
952         }
953
954         return r;
955 }
956
957 static void nvfree(var *v)
958 {
959         var *p;
960
961         if (v < g_cb->nv || v >= g_cb->pos)
962                 syntax_error(EMSG_INTERNAL_ERROR);
963
964         for (p = v; p < g_cb->pos; p++) {
965                 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
966                         clear_array(iamarray(p));
967                         free(p->x.array->items);
968                         free(p->x.array);
969                 }
970                 if (p->type & VF_WALK) {
971                         walker_list *n;
972                         walker_list *w = p->x.walker;
973                         debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker);
974                         p->x.walker = NULL;
975                         while (w) {
976                                 n = w->prev;
977                                 debug_printf_walker(" free(%p)\n", w);
978                                 free(w);
979                                 w = n;
980                         }
981                 }
982                 clrvar(p);
983         }
984
985         g_cb->pos = v;
986         while (g_cb->prev && g_cb->pos == g_cb->nv) {
987                 g_cb = g_cb->prev;
988         }
989 }
990
991 /* ------- awk program text parsing ------- */
992
993 /* Parse next token pointed by global pos, place results into global ttt.
994  * If token isn't expected, give away. Return token class
995  */
996 static uint32_t next_token(uint32_t expected)
997 {
998 #define concat_inserted (G.next_token__concat_inserted)
999 #define save_tclass     (G.next_token__save_tclass)
1000 #define save_info       (G.next_token__save_info)
1001 /* Initialized to TC_OPTERM: */
1002 #define ltclass         (G.next_token__ltclass)
1003
1004         char *p, *s;
1005         const char *tl;
1006         uint32_t tc;
1007         const uint32_t *ti;
1008
1009         if (t_rollback) {
1010                 t_rollback = FALSE;
1011
1012         } else if (concat_inserted) {
1013                 concat_inserted = FALSE;
1014                 t_tclass = save_tclass;
1015                 t_info = save_info;
1016
1017         } else {
1018                 p = g_pos;
1019  readnext:
1020                 p = skip_spaces(p);
1021                 g_lineno = t_lineno;
1022                 if (*p == '#')
1023                         while (*p != '\n' && *p != '\0')
1024                                 p++;
1025
1026                 if (*p == '\n')
1027                         t_lineno++;
1028
1029                 if (*p == '\0') {
1030                         tc = TC_EOF;
1031                         debug_printf_parse("%s: token found: TC_EOF\n", __func__);
1032
1033                 } else if (*p == '\"') {
1034                         /* it's a string */
1035                         t_string = s = ++p;
1036                         while (*p != '\"') {
1037                                 char *pp;
1038                                 if (*p == '\0' || *p == '\n')
1039                                         syntax_error(EMSG_UNEXP_EOS);
1040                                 pp = p;
1041                                 *s++ = nextchar(&pp);
1042                                 p = pp;
1043                         }
1044                         p++;
1045                         *s = '\0';
1046                         tc = TC_STRING;
1047                         debug_printf_parse("%s: token found:'%s' TC_STRING\n", __func__, t_string);
1048
1049                 } else if ((expected & TC_REGEXP) && *p == '/') {
1050                         /* it's regexp */
1051                         t_string = s = ++p;
1052                         while (*p != '/') {
1053                                 if (*p == '\0' || *p == '\n')
1054                                         syntax_error(EMSG_UNEXP_EOS);
1055                                 *s = *p++;
1056                                 if (*s++ == '\\') {
1057                                         char *pp = p;
1058                                         s[-1] = bb_process_escape_sequence((const char **)&pp);
1059                                         if (*p == '\\')
1060                                                 *s++ = '\\';
1061                                         if (pp == p)
1062                                                 *s++ = *p++;
1063                                         else
1064                                                 p = pp;
1065                                 }
1066                         }
1067                         p++;
1068                         *s = '\0';
1069                         tc = TC_REGEXP;
1070                         debug_printf_parse("%s: token found:'%s' TC_REGEXP\n", __func__, t_string);
1071
1072                 } else if (*p == '.' || isdigit(*p)) {
1073                         /* it's a number */
1074                         char *pp = p;
1075                         t_double = my_strtod(&pp);
1076                         p = pp;
1077                         if (*p == '.')
1078                                 syntax_error(EMSG_UNEXP_TOKEN);
1079                         tc = TC_NUMBER;
1080                         debug_printf_parse("%s: token found:%f TC_NUMBER\n", __func__, t_double);
1081
1082                 } else {
1083                         /* search for something known */
1084                         tl = tokenlist;
1085                         tc = 0x00000001;
1086                         ti = tokeninfo;
1087                         while (*tl) {
1088                                 int l = (unsigned char) *tl++;
1089                                 if (l == (unsigned char) NTCC) {
1090                                         tc <<= 1;
1091                                         continue;
1092                                 }
1093                                 /* if token class is expected,
1094                                  * token matches,
1095                                  * and it's not a longer word,
1096                                  */
1097                                 if ((tc & (expected | TC_WORD | TC_NEWLINE))
1098                                  && strncmp(p, tl, l) == 0
1099                                  && !((tc & TC_WORD) && isalnum_(p[l]))
1100                                 ) {
1101                                         /* then this is what we are looking for */
1102                                         t_info = *ti;
1103                                         debug_printf_parse("%s: token found:'%.*s' t_info:%x\n", __func__, l, p, t_info);
1104                                         p += l;
1105                                         goto token_found;
1106                                 }
1107                                 ti++;
1108                                 tl += l;
1109                         }
1110                         /* not a known token */
1111
1112                         /* is it a name? (var/array/function) */
1113                         if (!isalnum_(*p))
1114                                 syntax_error(EMSG_UNEXP_TOKEN); /* no */
1115                         /* yes */
1116                         t_string = --p;
1117                         while (isalnum_(*++p)) {
1118                                 p[-1] = *p;
1119                         }
1120                         p[-1] = '\0';
1121                         tc = TC_VARIABLE;
1122                         /* also consume whitespace between functionname and bracket */
1123                         if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY))
1124                                 p = skip_spaces(p);
1125                         if (*p == '(') {
1126                                 tc = TC_FUNCTION;
1127                                 debug_printf_parse("%s: token found:'%s' TC_FUNCTION\n", __func__, t_string);
1128                         } else {
1129                                 if (*p == '[') {
1130                                         p++;
1131                                         tc = TC_ARRAY;
1132                                         debug_printf_parse("%s: token found:'%s' TC_ARRAY\n", __func__, t_string);
1133                                 } else
1134                                         debug_printf_parse("%s: token found:'%s' TC_VARIABLE\n", __func__, t_string);
1135                         }
1136                 }
1137  token_found:
1138                 g_pos = p;
1139
1140                 /* skipping newlines in some cases */
1141                 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1142                         goto readnext;
1143
1144                 /* insert concatenation operator when needed */
1145                 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1146                         concat_inserted = TRUE;
1147                         save_tclass = tc;
1148                         save_info = t_info;
1149                         tc = TC_BINOP;
1150                         t_info = OC_CONCAT | SS | P(35);
1151                 }
1152
1153                 t_tclass = tc;
1154         }
1155         ltclass = t_tclass;
1156
1157         /* Are we ready for this? */
1158         if (!(ltclass & expected))
1159                 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1160                                 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1161
1162         return ltclass;
1163 #undef concat_inserted
1164 #undef save_tclass
1165 #undef save_info
1166 #undef ltclass
1167 }
1168
1169 static void rollback_token(void)
1170 {
1171         t_rollback = TRUE;
1172 }
1173
1174 static node *new_node(uint32_t info)
1175 {
1176         node *n;
1177
1178         n = xzalloc(sizeof(node));
1179         n->info = info;
1180         n->lineno = g_lineno;
1181         return n;
1182 }
1183
1184 static void mk_re_node(const char *s, node *n, regex_t *re)
1185 {
1186         n->info = OC_REGEXP;
1187         n->l.re = re;
1188         n->r.ire = re + 1;
1189         xregcomp(re, s, REG_EXTENDED);
1190         xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1191 }
1192
1193 static node *condition(void)
1194 {
1195         next_token(TC_SEQSTART);
1196         return parse_expr(TC_SEQTERM);
1197 }
1198
1199 /* parse expression terminated by given argument, return ptr
1200  * to built subtree. Terminator is eaten by parse_expr */
1201 static node *parse_expr(uint32_t iexp)
1202 {
1203         node sn;
1204         node *cn = &sn;
1205         node *vn, *glptr;
1206         uint32_t tc, xtc;
1207         var *v;
1208
1209         debug_printf_parse("%s(%x)\n", __func__, iexp);
1210
1211         sn.info = PRIMASK;
1212         sn.r.n = glptr = NULL;
1213         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1214
1215         while (!((tc = next_token(xtc)) & iexp)) {
1216
1217                 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1218                         /* input redirection (<) attached to glptr node */
1219                         debug_printf_parse("%s: input redir\n", __func__);
1220                         cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1221                         cn->a.n = glptr;
1222                         xtc = TC_OPERAND | TC_UOPPRE;
1223                         glptr = NULL;
1224
1225                 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1226                         debug_printf_parse("%s: TC_BINOP | TC_UOPPOST\n", __func__);
1227                         /* for binary and postfix-unary operators, jump back over
1228                          * previous operators with higher priority */
1229                         vn = cn;
1230                         while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1231                             || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON))
1232                         ) {
1233                                 vn = vn->a.n;
1234                         }
1235                         if ((t_info & OPCLSMASK) == OC_TERNARY)
1236                                 t_info += P(6);
1237                         cn = vn->a.n->r.n = new_node(t_info);
1238                         cn->a.n = vn->a.n;
1239                         if (tc & TC_BINOP) {
1240                                 cn->l.n = vn;
1241                                 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1242                                 if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1243                                         /* it's a pipe */
1244                                         next_token(TC_GETLINE);
1245                                         /* give maximum priority to this pipe */
1246                                         cn->info &= ~PRIMASK;
1247                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1248                                 }
1249                         } else {
1250                                 cn->r.n = vn;
1251                                 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1252                         }
1253                         vn->a.n = cn;
1254
1255                 } else {
1256                         debug_printf_parse("%s: other\n", __func__);
1257                         /* for operands and prefix-unary operators, attach them
1258                          * to last node */
1259                         vn = cn;
1260                         cn = vn->r.n = new_node(t_info);
1261                         cn->a.n = vn;
1262                         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1263                         if (tc & (TC_OPERAND | TC_REGEXP)) {
1264                                 debug_printf_parse("%s: TC_OPERAND | TC_REGEXP\n", __func__);
1265                                 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1266                                 /* one should be very careful with switch on tclass -
1267                                  * only simple tclasses should be used! */
1268                                 switch (tc) {
1269                                 case TC_VARIABLE:
1270                                 case TC_ARRAY:
1271                                         debug_printf_parse("%s: TC_VARIABLE | TC_ARRAY\n", __func__);
1272                                         cn->info = OC_VAR;
1273                                         v = hash_search(ahash, t_string);
1274                                         if (v != NULL) {
1275                                                 cn->info = OC_FNARG;
1276                                                 cn->l.aidx = v->x.aidx;
1277                                         } else {
1278                                                 cn->l.v = newvar(t_string);
1279                                         }
1280                                         if (tc & TC_ARRAY) {
1281                                                 cn->info |= xS;
1282                                                 cn->r.n = parse_expr(TC_ARRTERM);
1283                                         }
1284                                         break;
1285
1286                                 case TC_NUMBER:
1287                                 case TC_STRING:
1288                                         debug_printf_parse("%s: TC_NUMBER | TC_STRING\n", __func__);
1289                                         cn->info = OC_VAR;
1290                                         v = cn->l.v = xzalloc(sizeof(var));
1291                                         if (tc & TC_NUMBER)
1292                                                 setvar_i(v, t_double);
1293                                         else
1294                                                 setvar_s(v, t_string);
1295                                         break;
1296
1297                                 case TC_REGEXP:
1298                                         debug_printf_parse("%s: TC_REGEXP\n", __func__);
1299                                         mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1300                                         break;
1301
1302                                 case TC_FUNCTION:
1303                                         debug_printf_parse("%s: TC_FUNCTION\n", __func__);
1304                                         cn->info = OC_FUNC;
1305                                         cn->r.f = newfunc(t_string);
1306                                         cn->l.n = condition();
1307                                         break;
1308
1309                                 case TC_SEQSTART:
1310                                         debug_printf_parse("%s: TC_SEQSTART\n", __func__);
1311                                         cn = vn->r.n = parse_expr(TC_SEQTERM);
1312                                         if (!cn)
1313                                                 syntax_error("Empty sequence");
1314                                         cn->a.n = vn;
1315                                         break;
1316
1317                                 case TC_GETLINE:
1318                                         debug_printf_parse("%s: TC_GETLINE\n", __func__);
1319                                         glptr = cn;
1320                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1321                                         break;
1322
1323                                 case TC_BUILTIN:
1324                                         debug_printf_parse("%s: TC_BUILTIN\n", __func__);
1325                                         cn->l.n = condition();
1326                                         break;
1327                                 }
1328                         }
1329                 }
1330         }
1331
1332         debug_printf_parse("%s() returns %p\n", __func__, sn.r.n);
1333         return sn.r.n;
1334 }
1335
1336 /* add node to chain. Return ptr to alloc'd node */
1337 static node *chain_node(uint32_t info)
1338 {
1339         node *n;
1340
1341         if (!seq->first)
1342                 seq->first = seq->last = new_node(0);
1343
1344         if (seq->programname != g_progname) {
1345                 seq->programname = g_progname;
1346                 n = chain_node(OC_NEWSOURCE);
1347                 n->l.new_progname = xstrdup(g_progname);
1348         }
1349
1350         n = seq->last;
1351         n->info = info;
1352         seq->last = n->a.n = new_node(OC_DONE);
1353
1354         return n;
1355 }
1356
1357 static void chain_expr(uint32_t info)
1358 {
1359         node *n;
1360
1361         n = chain_node(info);
1362         n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1363         if (t_tclass & TC_GRPTERM)
1364                 rollback_token();
1365 }
1366
1367 static node *chain_loop(node *nn)
1368 {
1369         node *n, *n2, *save_brk, *save_cont;
1370
1371         save_brk = break_ptr;
1372         save_cont = continue_ptr;
1373
1374         n = chain_node(OC_BR | Vx);
1375         continue_ptr = new_node(OC_EXEC);
1376         break_ptr = new_node(OC_EXEC);
1377         chain_group();
1378         n2 = chain_node(OC_EXEC | Vx);
1379         n2->l.n = nn;
1380         n2->a.n = n;
1381         continue_ptr->a.n = n2;
1382         break_ptr->a.n = n->r.n = seq->last;
1383
1384         continue_ptr = save_cont;
1385         break_ptr = save_brk;
1386
1387         return n;
1388 }
1389
1390 /* parse group and attach it to chain */
1391 static void chain_group(void)
1392 {
1393         uint32_t c;
1394         node *n, *n2, *n3;
1395
1396         do {
1397                 c = next_token(TC_GRPSEQ);
1398         } while (c & TC_NEWLINE);
1399
1400         if (c & TC_GRPSTART) {
1401                 debug_printf_parse("%s: TC_GRPSTART\n", __func__);
1402                 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1403                         debug_printf_parse("%s: !TC_GRPTERM\n", __func__);
1404                         if (t_tclass & TC_NEWLINE)
1405                                 continue;
1406                         rollback_token();
1407                         chain_group();
1408                 }
1409                 debug_printf_parse("%s: TC_GRPTERM\n", __func__);
1410         } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1411                 debug_printf_parse("%s: TC_OPSEQ | TC_OPTERM\n", __func__);
1412                 rollback_token();
1413                 chain_expr(OC_EXEC | Vx);
1414         } else {
1415                 /* TC_STATEMNT */
1416                 debug_printf_parse("%s: TC_STATEMNT(?)\n", __func__);
1417                 switch (t_info & OPCLSMASK) {
1418                 case ST_IF:
1419                         debug_printf_parse("%s: ST_IF\n", __func__);
1420                         n = chain_node(OC_BR | Vx);
1421                         n->l.n = condition();
1422                         chain_group();
1423                         n2 = chain_node(OC_EXEC);
1424                         n->r.n = seq->last;
1425                         if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1426                                 chain_group();
1427                                 n2->a.n = seq->last;
1428                         } else {
1429                                 rollback_token();
1430                         }
1431                         break;
1432
1433                 case ST_WHILE:
1434                         debug_printf_parse("%s: ST_WHILE\n", __func__);
1435                         n2 = condition();
1436                         n = chain_loop(NULL);
1437                         n->l.n = n2;
1438                         break;
1439
1440                 case ST_DO:
1441                         debug_printf_parse("%s: ST_DO\n", __func__);
1442                         n2 = chain_node(OC_EXEC);
1443                         n = chain_loop(NULL);
1444                         n2->a.n = n->a.n;
1445                         next_token(TC_WHILE);
1446                         n->l.n = condition();
1447                         break;
1448
1449                 case ST_FOR:
1450                         debug_printf_parse("%s: ST_FOR\n", __func__);
1451                         next_token(TC_SEQSTART);
1452                         n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1453                         if (t_tclass & TC_SEQTERM) {    /* for-in */
1454                                 if ((n2->info & OPCLSMASK) != OC_IN)
1455                                         syntax_error(EMSG_UNEXP_TOKEN);
1456                                 n = chain_node(OC_WALKINIT | VV);
1457                                 n->l.n = n2->l.n;
1458                                 n->r.n = n2->r.n;
1459                                 n = chain_loop(NULL);
1460                                 n->info = OC_WALKNEXT | Vx;
1461                                 n->l.n = n2->l.n;
1462                         } else {                        /* for (;;) */
1463                                 n = chain_node(OC_EXEC | Vx);
1464                                 n->l.n = n2;
1465                                 n2 = parse_expr(TC_SEMICOL);
1466                                 n3 = parse_expr(TC_SEQTERM);
1467                                 n = chain_loop(n3);
1468                                 n->l.n = n2;
1469                                 if (!n2)
1470                                         n->info = OC_EXEC;
1471                         }
1472                         break;
1473
1474                 case OC_PRINT:
1475                 case OC_PRINTF:
1476                         debug_printf_parse("%s: OC_PRINT[F]\n", __func__);
1477                         n = chain_node(t_info);
1478                         n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1479                         if (t_tclass & TC_OUTRDR) {
1480                                 n->info |= t_info;
1481                                 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1482                         }
1483                         if (t_tclass & TC_GRPTERM)
1484                                 rollback_token();
1485                         break;
1486
1487                 case OC_BREAK:
1488                         debug_printf_parse("%s: OC_BREAK\n", __func__);
1489                         n = chain_node(OC_EXEC);
1490                         n->a.n = break_ptr;
1491                         break;
1492
1493                 case OC_CONTINUE:
1494                         debug_printf_parse("%s: OC_CONTINUE\n", __func__);
1495                         n = chain_node(OC_EXEC);
1496                         n->a.n = continue_ptr;
1497                         break;
1498
1499                 /* delete, next, nextfile, return, exit */
1500                 default:
1501                         debug_printf_parse("%s: default\n", __func__);
1502                         chain_expr(t_info);
1503                 }
1504         }
1505 }
1506
1507 static void parse_program(char *p)
1508 {
1509         uint32_t tclass;
1510         node *cn;
1511         func *f;
1512         var *v;
1513
1514         g_pos = p;
1515         t_lineno = 1;
1516         while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1517                         TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1518
1519                 if (tclass & TC_OPTERM) {
1520                         debug_printf_parse("%s: TC_OPTERM\n", __func__);
1521                         continue;
1522                 }
1523
1524                 seq = &mainseq;
1525                 if (tclass & TC_BEGIN) {
1526                         debug_printf_parse("%s: TC_BEGIN\n", __func__);
1527                         seq = &beginseq;
1528                         chain_group();
1529
1530                 } else if (tclass & TC_END) {
1531                         debug_printf_parse("%s: TC_END\n", __func__);
1532                         seq = &endseq;
1533                         chain_group();
1534
1535                 } else if (tclass & TC_FUNCDECL) {
1536                         debug_printf_parse("%s: TC_FUNCDECL\n", __func__);
1537                         next_token(TC_FUNCTION);
1538                         g_pos++;
1539                         f = newfunc(t_string);
1540                         f->body.first = NULL;
1541                         f->nargs = 0;
1542                         while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1543                                 v = findvar(ahash, t_string);
1544                                 v->x.aidx = f->nargs++;
1545
1546                                 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1547                                         break;
1548                         }
1549                         seq = &f->body;
1550                         chain_group();
1551                         clear_array(ahash);
1552
1553                 } else if (tclass & TC_OPSEQ) {
1554                         debug_printf_parse("%s: TC_OPSEQ\n", __func__);
1555                         rollback_token();
1556                         cn = chain_node(OC_TEST);
1557                         cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1558                         if (t_tclass & TC_GRPSTART) {
1559                                 debug_printf_parse("%s: TC_GRPSTART\n", __func__);
1560                                 rollback_token();
1561                                 chain_group();
1562                         } else {
1563                                 debug_printf_parse("%s: !TC_GRPSTART\n", __func__);
1564                                 chain_node(OC_PRINT);
1565                         }
1566                         cn->r.n = mainseq.last;
1567
1568                 } else /* if (tclass & TC_GRPSTART) */ {
1569                         debug_printf_parse("%s: TC_GRPSTART(?)\n", __func__);
1570                         rollback_token();
1571                         chain_group();
1572                 }
1573         }
1574         debug_printf_parse("%s: TC_EOF\n", __func__);
1575 }
1576
1577
1578 /* -------- program execution part -------- */
1579
1580 static node *mk_splitter(const char *s, tsplitter *spl)
1581 {
1582         regex_t *re, *ire;
1583         node *n;
1584
1585         re = &spl->re[0];
1586         ire = &spl->re[1];
1587         n = &spl->n;
1588         if ((n->info & OPCLSMASK) == OC_REGEXP) {
1589                 regfree(re);
1590                 regfree(ire); // TODO: nuke ire, use re+1?
1591         }
1592         if (s[0] && s[1]) { /* strlen(s) > 1 */
1593                 mk_re_node(s, n, re);
1594         } else {
1595                 n->info = (uint32_t) s[0];
1596         }
1597
1598         return n;
1599 }
1600
1601 /* use node as a regular expression. Supplied with node ptr and regex_t
1602  * storage space. Return ptr to regex (if result points to preg, it should
1603  * be later regfree'd manually
1604  */
1605 static regex_t *as_regex(node *op, regex_t *preg)
1606 {
1607         int cflags;
1608         var *v;
1609         const char *s;
1610
1611         if ((op->info & OPCLSMASK) == OC_REGEXP) {
1612                 return icase ? op->r.ire : op->l.re;
1613         }
1614         v = nvalloc(1);
1615         s = getvar_s(evaluate(op, v));
1616
1617         cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
1618         /* Testcase where REG_EXTENDED fails (unpaired '{'):
1619          * echo Hi | awk 'gsub("@(samp|code|file)\{","");'
1620          * gawk 3.1.5 eats this. We revert to ~REG_EXTENDED
1621          * (maybe gsub is not supposed to use REG_EXTENDED?).
1622          */
1623         if (regcomp(preg, s, cflags)) {
1624                 cflags &= ~REG_EXTENDED;
1625                 xregcomp(preg, s, cflags);
1626         }
1627         nvfree(v);
1628         return preg;
1629 }
1630
1631 /* gradually increasing buffer.
1632  * note that we reallocate even if n == old_size,
1633  * and thus there is at least one extra allocated byte.
1634  */
1635 static char* qrealloc(char *b, int n, int *size)
1636 {
1637         if (!b || n >= *size) {
1638                 *size = n + (n>>1) + 80;
1639                 b = xrealloc(b, *size);
1640         }
1641         return b;
1642 }
1643
1644 /* resize field storage space */
1645 static void fsrealloc(int size)
1646 {
1647         int i;
1648
1649         if (size >= maxfields) {
1650                 i = maxfields;
1651                 maxfields = size + 16;
1652                 Fields = xrealloc(Fields, maxfields * sizeof(Fields[0]));
1653                 for (; i < maxfields; i++) {
1654                         Fields[i].type = VF_SPECIAL;
1655                         Fields[i].string = NULL;
1656                 }
1657         }
1658         /* if size < nfields, clear extra field variables */
1659         for (i = size; i < nfields; i++) {
1660                 clrvar(Fields + i);
1661         }
1662         nfields = size;
1663 }
1664
1665 static int awk_split(const char *s, node *spl, char **slist)
1666 {
1667         int l, n;
1668         char c[4];
1669         char *s1;
1670         regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1671
1672         /* in worst case, each char would be a separate field */
1673         *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1674         strcpy(s1, s);
1675
1676         c[0] = c[1] = (char)spl->info;
1677         c[2] = c[3] = '\0';
1678         if (*getvar_s(intvar[RS]) == '\0')
1679                 c[2] = '\n';
1680
1681         n = 0;
1682         if ((spl->info & OPCLSMASK) == OC_REGEXP) {  /* regex split */
1683                 if (!*s)
1684                         return n; /* "": zero fields */
1685                 n++; /* at least one field will be there */
1686                 do {
1687                         l = strcspn(s, c+2); /* len till next NUL or \n */
1688                         if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1689                          && pmatch[0].rm_so <= l
1690                         ) {
1691                                 l = pmatch[0].rm_so;
1692                                 if (pmatch[0].rm_eo == 0) {
1693                                         l++;
1694                                         pmatch[0].rm_eo++;
1695                                 }
1696                                 n++; /* we saw yet another delimiter */
1697                         } else {
1698                                 pmatch[0].rm_eo = l;
1699                                 if (s[l])
1700                                         pmatch[0].rm_eo++;
1701                         }
1702                         memcpy(s1, s, l);
1703                         /* make sure we remove *all* of the separator chars */
1704                         do {
1705                                 s1[l] = '\0';
1706                         } while (++l < pmatch[0].rm_eo);
1707                         nextword(&s1);
1708                         s += pmatch[0].rm_eo;
1709                 } while (*s);
1710                 return n;
1711         }
1712         if (c[0] == '\0') {  /* null split */
1713                 while (*s) {
1714                         *s1++ = *s++;
1715                         *s1++ = '\0';
1716                         n++;
1717                 }
1718                 return n;
1719         }
1720         if (c[0] != ' ') {  /* single-character split */
1721                 if (icase) {
1722                         c[0] = toupper(c[0]);
1723                         c[1] = tolower(c[1]);
1724                 }
1725                 if (*s1)
1726                         n++;
1727                 while ((s1 = strpbrk(s1, c)) != NULL) {
1728                         *s1++ = '\0';
1729                         n++;
1730                 }
1731                 return n;
1732         }
1733         /* space split */
1734         while (*s) {
1735                 s = skip_whitespace(s);
1736                 if (!*s)
1737                         break;
1738                 n++;
1739                 while (*s && !isspace(*s))
1740                         *s1++ = *s++;
1741                 *s1++ = '\0';
1742         }
1743         return n;
1744 }
1745
1746 static void split_f0(void)
1747 {
1748 /* static char *fstrings; */
1749 #define fstrings (G.split_f0__fstrings)
1750
1751         int i, n;
1752         char *s;
1753
1754         if (is_f0_split)
1755                 return;
1756
1757         is_f0_split = TRUE;
1758         free(fstrings);
1759         fsrealloc(0);
1760         n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1761         fsrealloc(n);
1762         s = fstrings;
1763         for (i = 0; i < n; i++) {
1764                 Fields[i].string = nextword(&s);
1765                 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1766         }
1767
1768         /* set NF manually to avoid side effects */
1769         clrvar(intvar[NF]);
1770         intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1771         intvar[NF]->number = nfields;
1772 #undef fstrings
1773 }
1774
1775 /* perform additional actions when some internal variables changed */
1776 static void handle_special(var *v)
1777 {
1778         int n;
1779         char *b;
1780         const char *sep, *s;
1781         int sl, l, len, i, bsize;
1782
1783         if (!(v->type & VF_SPECIAL))
1784                 return;
1785
1786         if (v == intvar[NF]) {
1787                 n = (int)getvar_i(v);
1788                 fsrealloc(n);
1789
1790                 /* recalculate $0 */
1791                 sep = getvar_s(intvar[OFS]);
1792                 sl = strlen(sep);
1793                 b = NULL;
1794                 len = 0;
1795                 for (i = 0; i < n; i++) {
1796                         s = getvar_s(&Fields[i]);
1797                         l = strlen(s);
1798                         if (b) {
1799                                 memcpy(b+len, sep, sl);
1800                                 len += sl;
1801                         }
1802                         b = qrealloc(b, len+l+sl, &bsize);
1803                         memcpy(b+len, s, l);
1804                         len += l;
1805                 }
1806                 if (b)
1807                         b[len] = '\0';
1808                 setvar_p(intvar[F0], b);
1809                 is_f0_split = TRUE;
1810
1811         } else if (v == intvar[F0]) {
1812                 is_f0_split = FALSE;
1813
1814         } else if (v == intvar[FS]) {
1815                 mk_splitter(getvar_s(v), &fsplitter);
1816
1817         } else if (v == intvar[RS]) {
1818                 mk_splitter(getvar_s(v), &rsplitter);
1819
1820         } else if (v == intvar[IGNORECASE]) {
1821                 icase = istrue(v);
1822
1823         } else {                                /* $n */
1824                 n = getvar_i(intvar[NF]);
1825                 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1826                 /* right here v is invalid. Just to note... */
1827         }
1828 }
1829
1830 /* step through func/builtin/etc arguments */
1831 static node *nextarg(node **pn)
1832 {
1833         node *n;
1834
1835         n = *pn;
1836         if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1837                 *pn = n->r.n;
1838                 n = n->l.n;
1839         } else {
1840                 *pn = NULL;
1841         }
1842         return n;
1843 }
1844
1845 static void hashwalk_init(var *v, xhash *array)
1846 {
1847         hash_item *hi;
1848         unsigned i;
1849         walker_list *w;
1850         walker_list *prev_walker;
1851
1852         if (v->type & VF_WALK) {
1853                 prev_walker = v->x.walker;
1854         } else {
1855                 v->type |= VF_WALK;
1856                 prev_walker = NULL;
1857         }
1858         debug_printf_walker("hashwalk_init: prev_walker:%p\n", prev_walker);
1859
1860         w = v->x.walker = xzalloc(sizeof(*w) + array->glen + 1); /* why + 1? */
1861         debug_printf_walker(" walker@%p=%p\n", &v->x.walker, w);
1862         w->cur = w->end = w->wbuf;
1863         w->prev = prev_walker;
1864         for (i = 0; i < array->csize; i++) {
1865                 hi = array->items[i];
1866                 while (hi) {
1867                         strcpy(w->end, hi->name);
1868                         nextword(&w->end);
1869                         hi = hi->next;
1870                 }
1871         }
1872 }
1873
1874 static int hashwalk_next(var *v)
1875 {
1876         walker_list *w = v->x.walker;
1877
1878         if (w->cur >= w->end) {
1879                 walker_list *prev_walker = w->prev;
1880
1881                 debug_printf_walker("end of iteration, free(walker@%p:%p), prev_walker:%p\n", &v->x.walker, w, prev_walker);
1882                 free(w);
1883                 v->x.walker = prev_walker;
1884                 return FALSE;
1885         }
1886
1887         setvar_s(v, nextword(&w->cur));
1888         return TRUE;
1889 }
1890
1891 /* evaluate node, return 1 when result is true, 0 otherwise */
1892 static int ptest(node *pattern)
1893 {
1894         /* ptest__v is "static": to save stack space? */
1895         return istrue(evaluate(pattern, &G.ptest__v));
1896 }
1897
1898 /* read next record from stream rsm into a variable v */
1899 static int awk_getline(rstream *rsm, var *v)
1900 {
1901         char *b;
1902         regmatch_t pmatch[2];
1903         int size, a, p, pp = 0;
1904         int fd, so, eo, r, rp;
1905         char c, *m, *s;
1906
1907         debug_printf_eval("entered %s()\n", __func__);
1908
1909         /* we're using our own buffer since we need access to accumulating
1910          * characters
1911          */
1912         fd = fileno(rsm->F);
1913         m = rsm->buffer;
1914         a = rsm->adv;
1915         p = rsm->pos;
1916         size = rsm->size;
1917         c = (char) rsplitter.n.info;
1918         rp = 0;
1919
1920         if (!m)
1921                 m = qrealloc(m, 256, &size);
1922
1923         do {
1924                 b = m + a;
1925                 so = eo = p;
1926                 r = 1;
1927                 if (p > 0) {
1928                         if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1929                                 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1930                                                         b, 1, pmatch, 0) == 0) {
1931                                         so = pmatch[0].rm_so;
1932                                         eo = pmatch[0].rm_eo;
1933                                         if (b[eo] != '\0')
1934                                                 break;
1935                                 }
1936                         } else if (c != '\0') {
1937                                 s = strchr(b+pp, c);
1938                                 if (!s)
1939                                         s = memchr(b+pp, '\0', p - pp);
1940                                 if (s) {
1941                                         so = eo = s-b;
1942                                         eo++;
1943                                         break;
1944                                 }
1945                         } else {
1946                                 while (b[rp] == '\n')
1947                                         rp++;
1948                                 s = strstr(b+rp, "\n\n");
1949                                 if (s) {
1950                                         so = eo = s-b;
1951                                         while (b[eo] == '\n')
1952                                                 eo++;
1953                                         if (b[eo] != '\0')
1954                                                 break;
1955                                 }
1956                         }
1957                 }
1958
1959                 if (a > 0) {
1960                         memmove(m, m+a, p+1);
1961                         b = m;
1962                         a = 0;
1963                 }
1964
1965                 m = qrealloc(m, a+p+128, &size);
1966                 b = m + a;
1967                 pp = p;
1968                 p += safe_read(fd, b+p, size-p-1);
1969                 if (p < pp) {
1970                         p = 0;
1971                         r = 0;
1972                         setvar_i(intvar[ERRNO], errno);
1973                 }
1974                 b[p] = '\0';
1975
1976         } while (p > pp);
1977
1978         if (p == 0) {
1979                 r--;
1980         } else {
1981                 c = b[so]; b[so] = '\0';
1982                 setvar_s(v, b+rp);
1983                 v->type |= VF_USER;
1984                 b[so] = c;
1985                 c = b[eo]; b[eo] = '\0';
1986                 setvar_s(intvar[RT], b+so);
1987                 b[eo] = c;
1988         }
1989
1990         rsm->buffer = m;
1991         rsm->adv = a + eo;
1992         rsm->pos = p - eo;
1993         rsm->size = size;
1994
1995         debug_printf_eval("returning from %s(): %d\n", __func__, r);
1996
1997         return r;
1998 }
1999
2000 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
2001 {
2002         int r = 0;
2003         char c;
2004         const char *s = format;
2005
2006         if (int_as_int && n == (int)n) {
2007                 r = snprintf(b, size, "%d", (int)n);
2008         } else {
2009                 do { c = *s; } while (c && *++s);
2010                 if (strchr("diouxX", c)) {
2011                         r = snprintf(b, size, format, (int)n);
2012                 } else if (strchr("eEfgG", c)) {
2013                         r = snprintf(b, size, format, n);
2014                 } else {
2015                         syntax_error(EMSG_INV_FMT);
2016                 }
2017         }
2018         return r;
2019 }
2020
2021 /* formatted output into an allocated buffer, return ptr to buffer */
2022 static char *awk_printf(node *n)
2023 {
2024         char *b = NULL;
2025         char *fmt, *s, *f;
2026         const char *s1;
2027         int i, j, incr, bsize;
2028         char c, c1;
2029         var *v, *arg;
2030
2031         v = nvalloc(1);
2032         fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
2033
2034         i = 0;
2035         while (*f) {
2036                 s = f;
2037                 while (*f && (*f != '%' || *++f == '%'))
2038                         f++;
2039                 while (*f && !isalpha(*f)) {
2040                         if (*f == '*')
2041                                 syntax_error("%*x formats are not supported");
2042                         f++;
2043                 }
2044
2045                 incr = (f - s) + MAXVARFMT;
2046                 b = qrealloc(b, incr + i, &bsize);
2047                 c = *f;
2048                 if (c != '\0')
2049                         f++;
2050                 c1 = *f;
2051                 *f = '\0';
2052                 arg = evaluate(nextarg(&n), v);
2053
2054                 j = i;
2055                 if (c == 'c' || !c) {
2056                         i += sprintf(b+i, s, is_numeric(arg) ?
2057                                         (char)getvar_i(arg) : *getvar_s(arg));
2058                 } else if (c == 's') {
2059                         s1 = getvar_s(arg);
2060                         b = qrealloc(b, incr+i+strlen(s1), &bsize);
2061                         i += sprintf(b+i, s, s1);
2062                 } else {
2063                         i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
2064                 }
2065                 *f = c1;
2066
2067                 /* if there was an error while sprintf, return value is negative */
2068                 if (i < j)
2069                         i = j;
2070         }
2071
2072         free(fmt);
2073         nvfree(v);
2074         b = xrealloc(b, i + 1);
2075         b[i] = '\0';
2076         return b;
2077 }
2078
2079 /* Common substitution routine.
2080  * Replace (nm)'th substring of (src) that matches (rn) with (repl),
2081  * store result into (dest), return number of substitutions.
2082  * If nm = 0, replace all matches.
2083  * If src or dst is NULL, use $0.
2084  * If subexp != 0, enable subexpression matching (\1-\9).
2085  */
2086 static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int subexp)
2087 {
2088         char *resbuf;
2089         const char *sp;
2090         int match_no, residx, replen, resbufsize;
2091         int regexec_flags;
2092         regmatch_t pmatch[10];
2093         regex_t sreg, *regex;
2094
2095         resbuf = NULL;
2096         residx = 0;
2097         match_no = 0;
2098         regexec_flags = 0;
2099         regex = as_regex(rn, &sreg);
2100         sp = getvar_s(src ? src : intvar[F0]);
2101         replen = strlen(repl);
2102         while (regexec(regex, sp, 10, pmatch, regexec_flags) == 0) {
2103                 int so = pmatch[0].rm_so;
2104                 int eo = pmatch[0].rm_eo;
2105
2106                 //bb_error_msg("match %u: [%u,%u] '%s'%p", match_no+1, so, eo, sp,sp);
2107                 resbuf = qrealloc(resbuf, residx + eo + replen, &resbufsize);
2108                 memcpy(resbuf + residx, sp, eo);
2109                 residx += eo;
2110                 if (++match_no >= nm) {
2111                         const char *s;
2112                         int nbs;
2113
2114                         /* replace */
2115                         residx -= (eo - so);
2116                         nbs = 0;
2117                         for (s = repl; *s; s++) {
2118                                 char c = resbuf[residx++] = *s;
2119                                 if (c == '\\') {
2120                                         nbs++;
2121                                         continue;
2122                                 }
2123                                 if (c == '&' || (subexp && c >= '0' && c <= '9')) {
2124                                         int j;
2125                                         residx -= ((nbs + 3) >> 1);
2126                                         j = 0;
2127                                         if (c != '&') {
2128                                                 j = c - '0';
2129                                                 nbs++;
2130                                         }
2131                                         if (nbs % 2) {
2132                                                 resbuf[residx++] = c;
2133                                         } else {
2134                                                 int n = pmatch[j].rm_eo - pmatch[j].rm_so;
2135                                                 resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize);
2136                                                 memcpy(resbuf + residx, sp + pmatch[j].rm_so, n);
2137                                                 residx += n;
2138                                         }
2139                                 }
2140                                 nbs = 0;
2141                         }
2142                 }
2143
2144                 regexec_flags = REG_NOTBOL;
2145                 sp += eo;
2146                 if (match_no == nm)
2147                         break;
2148                 if (eo == so) {
2149                         /* Empty match (e.g. "b*" will match anywhere).
2150                          * Advance by one char. */
2151 //BUG (bug 1333):
2152 //gsub(/\<b*/,"") on "abc" will reach this point, advance to "bc"
2153 //... and will erroneously match "b" even though it is NOT at the word start.
2154 //we need REG_NOTBOW but it does not exist...
2155 //TODO: if EXTRA_COMPAT=y, use GNU matching and re_search,
2156 //it should be able to do it correctly.
2157                         /* Subtle: this is safe only because
2158                          * qrealloc allocated at least one extra byte */
2159                         resbuf[residx] = *sp;
2160                         if (*sp == '\0')
2161                                 goto ret;
2162                         sp++;
2163                         residx++;
2164                 }
2165         }
2166
2167         resbuf = qrealloc(resbuf, residx + strlen(sp), &resbufsize);
2168         strcpy(resbuf + residx, sp);
2169  ret:
2170         //bb_error_msg("end sp:'%s'%p", sp,sp);
2171         setvar_p(dest ? dest : intvar[F0], resbuf);
2172         if (regex == &sreg)
2173                 regfree(regex);
2174         return match_no;
2175 }
2176
2177 static NOINLINE int do_mktime(const char *ds)
2178 {
2179         struct tm then;
2180         int count;
2181
2182         /*memset(&then, 0, sizeof(then)); - not needed */
2183         then.tm_isdst = -1; /* default is unknown */
2184
2185         /* manpage of mktime says these fields are ints,
2186          * so we can sscanf stuff directly into them */
2187         count = sscanf(ds, "%u %u %u %u %u %u %d",
2188                 &then.tm_year, &then.tm_mon, &then.tm_mday,
2189                 &then.tm_hour, &then.tm_min, &then.tm_sec,
2190                 &then.tm_isdst);
2191
2192         if (count < 6
2193          || (unsigned)then.tm_mon < 1
2194          || (unsigned)then.tm_year < 1900
2195         ) {
2196                 return -1;
2197         }
2198
2199         then.tm_mon -= 1;
2200         then.tm_year -= 1900;
2201
2202         return mktime(&then);
2203 }
2204
2205 static NOINLINE var *exec_builtin(node *op, var *res)
2206 {
2207 #define tspl (G.exec_builtin__tspl)
2208
2209         var *tv;
2210         node *an[4];
2211         var *av[4];
2212         const char *as[4];
2213         regmatch_t pmatch[2];
2214         regex_t sreg, *re;
2215         node *spl;
2216         uint32_t isr, info;
2217         int nargs;
2218         time_t tt;
2219         int i, l, ll, n;
2220
2221         tv = nvalloc(4);
2222         isr = info = op->info;
2223         op = op->l.n;
2224
2225         av[2] = av[3] = NULL;
2226         for (i = 0; i < 4 && op; i++) {
2227                 an[i] = nextarg(&op);
2228                 if (isr & 0x09000000)
2229                         av[i] = evaluate(an[i], &tv[i]);
2230                 if (isr & 0x08000000)
2231                         as[i] = getvar_s(av[i]);
2232                 isr >>= 1;
2233         }
2234
2235         nargs = i;
2236         if ((uint32_t)nargs < (info >> 30))
2237                 syntax_error(EMSG_TOO_FEW_ARGS);
2238
2239         info &= OPNMASK;
2240         switch (info) {
2241
2242         case B_a2:
2243                 if (ENABLE_FEATURE_AWK_LIBM)
2244                         setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1])));
2245                 else
2246                         syntax_error(EMSG_NO_MATH);
2247                 break;
2248
2249         case B_sp: {
2250                 char *s, *s1;
2251
2252                 if (nargs > 2) {
2253                         spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2254                                 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2255                 } else {
2256                         spl = &fsplitter.n;
2257                 }
2258
2259                 n = awk_split(as[0], spl, &s);
2260                 s1 = s;
2261                 clear_array(iamarray(av[1]));
2262                 for (i = 1; i <= n; i++)
2263                         setari_u(av[1], i, nextword(&s));
2264                 free(s1);
2265                 setvar_i(res, n);
2266                 break;
2267         }
2268
2269         case B_ss: {
2270                 char *s;
2271
2272                 l = strlen(as[0]);
2273                 i = getvar_i(av[1]) - 1;
2274                 if (i > l)
2275                         i = l;
2276                 if (i < 0)
2277                         i = 0;
2278                 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2279                 if (n < 0)
2280                         n = 0;
2281                 s = xstrndup(as[0]+i, n);
2282                 setvar_p(res, s);
2283                 break;
2284         }
2285
2286         /* Bitwise ops must assume that operands are unsigned. GNU Awk 3.1.5:
2287          * awk '{ print or(-1,1) }' gives "4.29497e+09", not "-2.xxxe+09" */
2288         case B_an:
2289                 setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1]));
2290                 break;
2291
2292         case B_co:
2293                 setvar_i(res, ~getvar_i_int(av[0]));
2294                 break;
2295
2296         case B_ls:
2297                 setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1]));
2298                 break;
2299
2300         case B_or:
2301                 setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1]));
2302                 break;
2303
2304         case B_rs:
2305                 setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1]));
2306                 break;
2307
2308         case B_xo:
2309                 setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1]));
2310                 break;
2311
2312         case B_lo:
2313         case B_up: {
2314                 char *s, *s1;
2315                 s1 = s = xstrdup(as[0]);
2316                 while (*s1) {
2317                         //*s1 = (info == B_up) ? toupper(*s1) : tolower(*s1);
2318                         if ((unsigned char)((*s1 | 0x20) - 'a') <= ('z' - 'a'))
2319                                 *s1 = (info == B_up) ? (*s1 & 0xdf) : (*s1 | 0x20);
2320                         s1++;
2321                 }
2322                 setvar_p(res, s);
2323                 break;
2324         }
2325
2326         case B_ix:
2327                 n = 0;
2328                 ll = strlen(as[1]);
2329                 l = strlen(as[0]) - ll;
2330                 if (ll > 0 && l >= 0) {
2331                         if (!icase) {
2332                                 char *s = strstr(as[0], as[1]);
2333                                 if (s)
2334                                         n = (s - as[0]) + 1;
2335                         } else {
2336                                 /* this piece of code is terribly slow and
2337                                  * really should be rewritten
2338                                  */
2339                                 for (i = 0; i <= l; i++) {
2340                                         if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2341                                                 n = i+1;
2342                                                 break;
2343                                         }
2344                                 }
2345                         }
2346                 }
2347                 setvar_i(res, n);
2348                 break;
2349
2350         case B_ti:
2351                 if (nargs > 1)
2352                         tt = getvar_i(av[1]);
2353                 else
2354                         time(&tt);
2355                 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2356                 i = strftime(g_buf, MAXVARFMT,
2357                         ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2358                         localtime(&tt));
2359                 g_buf[i] = '\0';
2360                 setvar_s(res, g_buf);
2361                 break;
2362
2363         case B_mt:
2364                 setvar_i(res, do_mktime(as[0]));
2365                 break;
2366
2367         case B_ma:
2368                 re = as_regex(an[1], &sreg);
2369                 n = regexec(re, as[0], 1, pmatch, 0);
2370                 if (n == 0) {
2371                         pmatch[0].rm_so++;
2372                         pmatch[0].rm_eo++;
2373                 } else {
2374                         pmatch[0].rm_so = 0;
2375                         pmatch[0].rm_eo = -1;
2376                 }
2377                 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2378                 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2379                 setvar_i(res, pmatch[0].rm_so);
2380                 if (re == &sreg)
2381                         regfree(re);
2382                 break;
2383
2384         case B_ge:
2385                 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2386                 break;
2387
2388         case B_gs:
2389                 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2390                 break;
2391
2392         case B_su:
2393                 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2394                 break;
2395         }
2396
2397         nvfree(tv);
2398         return res;
2399 #undef tspl
2400 }
2401
2402 /*
2403  * Evaluate node - the heart of the program. Supplied with subtree
2404  * and place where to store result. returns ptr to result.
2405  */
2406 #define XC(n) ((n) >> 8)
2407
2408 static var *evaluate(node *op, var *res)
2409 {
2410 /* This procedure is recursive so we should count every byte */
2411 #define fnargs (G.evaluate__fnargs)
2412 /* seed is initialized to 1 */
2413 #define seed   (G.evaluate__seed)
2414 #define sreg   (G.evaluate__sreg)
2415
2416         var *v1;
2417
2418         if (!op)
2419                 return setvar_s(res, NULL);
2420
2421         debug_printf_eval("entered %s()\n", __func__);
2422
2423         v1 = nvalloc(2);
2424
2425         while (op) {
2426                 struct {
2427                         var *v;
2428                         const char *s;
2429                 } L = L; /* for compiler */
2430                 struct {
2431                         var *v;
2432                         const char *s;
2433                 } R = R;
2434                 double L_d = L_d;
2435                 uint32_t opinfo;
2436                 int opn;
2437                 node *op1;
2438
2439                 opinfo = op->info;
2440                 opn = (opinfo & OPNMASK);
2441                 g_lineno = op->lineno;
2442                 op1 = op->l.n;
2443                 debug_printf_eval("opinfo:%08x opn:%08x\n", opinfo, opn);
2444
2445                 /* execute inevitable things */
2446                 if (opinfo & OF_RES1)
2447                         L.v = evaluate(op1, v1);
2448                 if (opinfo & OF_RES2)
2449                         R.v = evaluate(op->r.n, v1+1);
2450                 if (opinfo & OF_STR1) {
2451                         L.s = getvar_s(L.v);
2452                         debug_printf_eval("L.s:'%s'\n", L.s);
2453                 }
2454                 if (opinfo & OF_STR2) {
2455                         R.s = getvar_s(R.v);
2456                         debug_printf_eval("R.s:'%s'\n", R.s);
2457                 }
2458                 if (opinfo & OF_NUM1) {
2459                         L_d = getvar_i(L.v);
2460                         debug_printf_eval("L_d:%f\n", L_d);
2461                 }
2462
2463                 debug_printf_eval("switch(0x%x)\n", XC(opinfo & OPCLSMASK));
2464                 switch (XC(opinfo & OPCLSMASK)) {
2465
2466                 /* -- iterative node type -- */
2467
2468                 /* test pattern */
2469                 case XC( OC_TEST ):
2470                         if ((op1->info & OPCLSMASK) == OC_COMMA) {
2471                                 /* it's range pattern */
2472                                 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2473                                         op->info |= OF_CHECKED;
2474                                         if (ptest(op1->r.n))
2475                                                 op->info &= ~OF_CHECKED;
2476                                         op = op->a.n;
2477                                 } else {
2478                                         op = op->r.n;
2479                                 }
2480                         } else {
2481                                 op = ptest(op1) ? op->a.n : op->r.n;
2482                         }
2483                         break;
2484
2485                 /* just evaluate an expression, also used as unconditional jump */
2486                 case XC( OC_EXEC ):
2487                         break;
2488
2489                 /* branch, used in if-else and various loops */
2490                 case XC( OC_BR ):
2491                         op = istrue(L.v) ? op->a.n : op->r.n;
2492                         break;
2493
2494                 /* initialize for-in loop */
2495                 case XC( OC_WALKINIT ):
2496                         hashwalk_init(L.v, iamarray(R.v));
2497                         break;
2498
2499                 /* get next array item */
2500                 case XC( OC_WALKNEXT ):
2501                         op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2502                         break;
2503
2504                 case XC( OC_PRINT ):
2505                 case XC( OC_PRINTF ): {
2506                         FILE *F = stdout;
2507
2508                         if (op->r.n) {
2509                                 rstream *rsm = newfile(R.s);
2510                                 if (!rsm->F) {
2511                                         if (opn == '|') {
2512                                                 rsm->F = popen(R.s, "w");
2513                                                 if (rsm->F == NULL)
2514                                                         bb_perror_msg_and_die("popen");
2515                                                 rsm->is_pipe = 1;
2516                                         } else {
2517                                                 rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2518                                         }
2519                                 }
2520                                 F = rsm->F;
2521                         }
2522
2523                         if ((opinfo & OPCLSMASK) == OC_PRINT) {
2524                                 if (!op1) {
2525                                         fputs(getvar_s(intvar[F0]), F);
2526                                 } else {
2527                                         while (op1) {
2528                                                 var *v = evaluate(nextarg(&op1), v1);
2529                                                 if (v->type & VF_NUMBER) {
2530                                                         fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2531                                                                         getvar_i(v), TRUE);
2532                                                         fputs(g_buf, F);
2533                                                 } else {
2534                                                         fputs(getvar_s(v), F);
2535                                                 }
2536
2537                                                 if (op1)
2538                                                         fputs(getvar_s(intvar[OFS]), F);
2539                                         }
2540                                 }
2541                                 fputs(getvar_s(intvar[ORS]), F);
2542
2543                         } else {        /* OC_PRINTF */
2544                                 char *s = awk_printf(op1);
2545                                 fputs(s, F);
2546                                 free(s);
2547                         }
2548                         fflush(F);
2549                         break;
2550                 }
2551
2552                 case XC( OC_DELETE ): {
2553                         uint32_t info = op1->info & OPCLSMASK;
2554                         var *v;
2555
2556                         if (info == OC_VAR) {
2557                                 v = op1->l.v;
2558                         } else if (info == OC_FNARG) {
2559                                 v = &fnargs[op1->l.aidx];
2560                         } else {
2561                                 syntax_error(EMSG_NOT_ARRAY);
2562                         }
2563
2564                         if (op1->r.n) {
2565                                 const char *s;
2566                                 clrvar(L.v);
2567                                 s = getvar_s(evaluate(op1->r.n, v1));
2568                                 hash_remove(iamarray(v), s);
2569                         } else {
2570                                 clear_array(iamarray(v));
2571                         }
2572                         break;
2573                 }
2574
2575                 case XC( OC_NEWSOURCE ):
2576                         g_progname = op->l.new_progname;
2577                         break;
2578
2579                 case XC( OC_RETURN ):
2580                         copyvar(res, L.v);
2581                         break;
2582
2583                 case XC( OC_NEXTFILE ):
2584                         nextfile = TRUE;
2585                 case XC( OC_NEXT ):
2586                         nextrec = TRUE;
2587                 case XC( OC_DONE ):
2588                         clrvar(res);
2589                         break;
2590
2591                 case XC( OC_EXIT ):
2592                         awk_exit(L_d);
2593
2594                 /* -- recursive node type -- */
2595
2596                 case XC( OC_VAR ):
2597                         L.v = op->l.v;
2598                         if (L.v == intvar[NF])
2599                                 split_f0();
2600                         goto v_cont;
2601
2602                 case XC( OC_FNARG ):
2603                         L.v = &fnargs[op->l.aidx];
2604  v_cont:
2605                         res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2606                         break;
2607
2608                 case XC( OC_IN ):
2609                         setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2610                         break;
2611
2612                 case XC( OC_REGEXP ):
2613                         op1 = op;
2614                         L.s = getvar_s(intvar[F0]);
2615                         goto re_cont;
2616
2617                 case XC( OC_MATCH ):
2618                         op1 = op->r.n;
2619  re_cont:
2620                         {
2621                                 regex_t *re = as_regex(op1, &sreg);
2622                                 int i = regexec(re, L.s, 0, NULL, 0);
2623                                 if (re == &sreg)
2624                                         regfree(re);
2625                                 setvar_i(res, (i == 0) ^ (opn == '!'));
2626                         }
2627                         break;
2628
2629                 case XC( OC_MOVE ):
2630                         debug_printf_eval("MOVE\n");
2631                         /* if source is a temporary string, jusk relink it to dest */
2632 //Disabled: if R.v is numeric but happens to have cached R.v->string,
2633 //then L.v ends up being a string, which is wrong
2634 //                      if (R.v == v1+1 && R.v->string) {
2635 //                              res = setvar_p(L.v, R.v->string);
2636 //                              R.v->string = NULL;
2637 //                      } else {
2638                                 res = copyvar(L.v, R.v);
2639 //                      }
2640                         break;
2641
2642                 case XC( OC_TERNARY ):
2643                         if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2644                                 syntax_error(EMSG_POSSIBLE_ERROR);
2645                         res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2646                         break;
2647
2648                 case XC( OC_FUNC ): {
2649                         var *vbeg, *v;
2650                         const char *sv_progname;
2651
2652                         if (!op->r.f->body.first)
2653                                 syntax_error(EMSG_UNDEF_FUNC);
2654
2655                         vbeg = v = nvalloc(op->r.f->nargs + 1);
2656                         while (op1) {
2657                                 var *arg = evaluate(nextarg(&op1), v1);
2658                                 copyvar(v, arg);
2659                                 v->type |= VF_CHILD;
2660                                 v->x.parent = arg;
2661                                 if (++v - vbeg >= op->r.f->nargs)
2662                                         break;
2663                         }
2664
2665                         v = fnargs;
2666                         fnargs = vbeg;
2667                         sv_progname = g_progname;
2668
2669                         res = evaluate(op->r.f->body.first, res);
2670
2671                         g_progname = sv_progname;
2672                         nvfree(fnargs);
2673                         fnargs = v;
2674
2675                         break;
2676                 }
2677
2678                 case XC( OC_GETLINE ):
2679                 case XC( OC_PGETLINE ): {
2680                         rstream *rsm;
2681                         int i;
2682
2683                         if (op1) {
2684                                 rsm = newfile(L.s);
2685                                 if (!rsm->F) {
2686                                         if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2687                                                 rsm->F = popen(L.s, "r");
2688                                                 rsm->is_pipe = TRUE;
2689                                         } else {
2690                                                 rsm->F = fopen_for_read(L.s);  /* not xfopen! */
2691                                         }
2692                                 }
2693                         } else {
2694                                 if (!iF)
2695                                         iF = next_input_file();
2696                                 rsm = iF;
2697                         }
2698
2699                         if (!rsm || !rsm->F) {
2700                                 setvar_i(intvar[ERRNO], errno);
2701                                 setvar_i(res, -1);
2702                                 break;
2703                         }
2704
2705                         if (!op->r.n)
2706                                 R.v = intvar[F0];
2707
2708                         i = awk_getline(rsm, R.v);
2709                         if (i > 0 && !op1) {
2710                                 incvar(intvar[FNR]);
2711                                 incvar(intvar[NR]);
2712                         }
2713                         setvar_i(res, i);
2714                         break;
2715                 }
2716
2717                 /* simple builtins */
2718                 case XC( OC_FBLTIN ): {
2719                         double R_d = R_d; /* for compiler */
2720
2721                         switch (opn) {
2722                         case F_in:
2723                                 R_d = (int)L_d;
2724                                 break;
2725
2726                         case F_rn:
2727                                 R_d = (double)rand() / (double)RAND_MAX;
2728                                 break;
2729
2730                         case F_co:
2731                                 if (ENABLE_FEATURE_AWK_LIBM) {
2732                                         R_d = cos(L_d);
2733                                         break;
2734                                 }
2735
2736                         case F_ex:
2737                                 if (ENABLE_FEATURE_AWK_LIBM) {
2738                                         R_d = exp(L_d);
2739                                         break;
2740                                 }
2741
2742                         case F_lg:
2743                                 if (ENABLE_FEATURE_AWK_LIBM) {
2744                                         R_d = log(L_d);
2745                                         break;
2746                                 }
2747
2748                         case F_si:
2749                                 if (ENABLE_FEATURE_AWK_LIBM) {
2750                                         R_d = sin(L_d);
2751                                         break;
2752                                 }
2753
2754                         case F_sq:
2755                                 if (ENABLE_FEATURE_AWK_LIBM) {
2756                                         R_d = sqrt(L_d);
2757                                         break;
2758                                 }
2759
2760                                 syntax_error(EMSG_NO_MATH);
2761                                 break;
2762
2763                         case F_sr:
2764                                 R_d = (double)seed;
2765                                 seed = op1 ? (unsigned)L_d : (unsigned)time(NULL);
2766                                 srand(seed);
2767                                 break;
2768
2769                         case F_ti:
2770                                 R_d = time(NULL);
2771                                 break;
2772
2773                         case F_le:
2774                                 if (!op1)
2775                                         L.s = getvar_s(intvar[F0]);
2776                                 R_d = strlen(L.s);
2777                                 break;
2778
2779                         case F_sy:
2780                                 fflush_all();
2781                                 R_d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2782                                                 ? (system(L.s) >> 8) : 0;
2783                                 break;
2784
2785                         case F_ff:
2786                                 if (!op1) {
2787                                         fflush(stdout);
2788                                 } else if (L.s && *L.s) {
2789                                         rstream *rsm = newfile(L.s);
2790                                         fflush(rsm->F);
2791                                 } else {
2792                                         fflush_all();
2793                                 }
2794                                 break;
2795
2796                         case F_cl: {
2797                                 rstream *rsm;
2798                                 int err = 0;
2799                                 rsm = (rstream *)hash_search(fdhash, L.s);
2800                                 debug_printf_eval("OC_FBLTIN F_cl rsm:%p\n", rsm);
2801                                 if (rsm) {
2802                                         debug_printf_eval("OC_FBLTIN F_cl "
2803                                                 "rsm->is_pipe:%d, ->F:%p\n",
2804                                                 rsm->is_pipe, rsm->F);
2805                                         /* Can be NULL if open failed. Example:
2806                                          * getline line <"doesnt_exist";
2807                                          * close("doesnt_exist"); <--- here rsm->F is NULL
2808                                          */
2809                                         if (rsm->F)
2810                                                 err = rsm->is_pipe ? pclose(rsm->F) : fclose(rsm->F);
2811                                         free(rsm->buffer);
2812                                         hash_remove(fdhash, L.s);
2813                                 }
2814                                 if (err)
2815                                         setvar_i(intvar[ERRNO], errno);
2816                                 R_d = (double)err;
2817                                 break;
2818                         }
2819                         } /* switch */
2820                         setvar_i(res, R_d);
2821                         break;
2822                 }
2823
2824                 case XC( OC_BUILTIN ):
2825                         res = exec_builtin(op, res);
2826                         break;
2827
2828                 case XC( OC_SPRINTF ):
2829                         setvar_p(res, awk_printf(op1));
2830                         break;
2831
2832                 case XC( OC_UNARY ): {
2833                         double Ld, R_d;
2834
2835                         Ld = R_d = getvar_i(R.v);
2836                         switch (opn) {
2837                         case 'P':
2838                                 Ld = ++R_d;
2839                                 goto r_op_change;
2840                         case 'p':
2841                                 R_d++;
2842                                 goto r_op_change;
2843                         case 'M':
2844                                 Ld = --R_d;
2845                                 goto r_op_change;
2846                         case 'm':
2847                                 R_d--;
2848  r_op_change:
2849                                 setvar_i(R.v, R_d);
2850                                 break;
2851                         case '!':
2852                                 Ld = !istrue(R.v);
2853                                 break;
2854                         case '-':
2855                                 Ld = -R_d;
2856                                 break;
2857                         }
2858                         setvar_i(res, Ld);
2859                         break;
2860                 }
2861
2862                 case XC( OC_FIELD ): {
2863                         int i = (int)getvar_i(R.v);
2864                         if (i == 0) {
2865                                 res = intvar[F0];
2866                         } else {
2867                                 split_f0();
2868                                 if (i > nfields)
2869                                         fsrealloc(i);
2870                                 res = &Fields[i - 1];
2871                         }
2872                         break;
2873                 }
2874
2875                 /* concatenation (" ") and index joining (",") */
2876                 case XC( OC_CONCAT ):
2877                 case XC( OC_COMMA ): {
2878                         const char *sep = "";
2879                         if ((opinfo & OPCLSMASK) == OC_COMMA)
2880                                 sep = getvar_s(intvar[SUBSEP]);
2881                         setvar_p(res, xasprintf("%s%s%s", L.s, sep, R.s));
2882                         break;
2883                 }
2884
2885                 case XC( OC_LAND ):
2886                         setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2887                         break;
2888
2889                 case XC( OC_LOR ):
2890                         setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2891                         break;
2892
2893                 case XC( OC_BINARY ):
2894                 case XC( OC_REPLACE ): {
2895                         double R_d = getvar_i(R.v);
2896                         debug_printf_eval("BINARY/REPLACE: R_d:%f opn:%c\n", R_d, opn);
2897                         switch (opn) {
2898                         case '+':
2899                                 L_d += R_d;
2900                                 break;
2901                         case '-':
2902                                 L_d -= R_d;
2903                                 break;
2904                         case '*':
2905                                 L_d *= R_d;
2906                                 break;
2907                         case '/':
2908                                 if (R_d == 0)
2909                                         syntax_error(EMSG_DIV_BY_ZERO);
2910                                 L_d /= R_d;
2911                                 break;
2912                         case '&':
2913                                 if (ENABLE_FEATURE_AWK_LIBM)
2914                                         L_d = pow(L_d, R_d);
2915                                 else
2916                                         syntax_error(EMSG_NO_MATH);
2917                                 break;
2918                         case '%':
2919                                 if (R_d == 0)
2920                                         syntax_error(EMSG_DIV_BY_ZERO);
2921                                 L_d -= (int)(L_d / R_d) * R_d;
2922                                 break;
2923                         }
2924                         debug_printf_eval("BINARY/REPLACE result:%f\n", L_d);
2925                         res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : L.v, L_d);
2926                         break;
2927                 }
2928
2929                 case XC( OC_COMPARE ): {
2930                         int i = i; /* for compiler */
2931                         double Ld;
2932
2933                         if (is_numeric(L.v) && is_numeric(R.v)) {
2934                                 Ld = getvar_i(L.v) - getvar_i(R.v);
2935                         } else {
2936                                 const char *l = getvar_s(L.v);
2937                                 const char *r = getvar_s(R.v);
2938                                 Ld = icase ? strcasecmp(l, r) : strcmp(l, r);
2939                         }
2940                         switch (opn & 0xfe) {
2941                         case 0:
2942                                 i = (Ld > 0);
2943                                 break;
2944                         case 2:
2945                                 i = (Ld >= 0);
2946                                 break;
2947                         case 4:
2948                                 i = (Ld == 0);
2949                                 break;
2950                         }
2951                         setvar_i(res, (i == 0) ^ (opn & 1));
2952                         break;
2953                 }
2954
2955                 default:
2956                         syntax_error(EMSG_POSSIBLE_ERROR);
2957                 }
2958                 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2959                         op = op->a.n;
2960                 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2961                         break;
2962                 if (nextrec)
2963                         break;
2964         } /* while (op) */
2965
2966         nvfree(v1);
2967         debug_printf_eval("returning from %s(): %p\n", __func__, res);
2968         return res;
2969 #undef fnargs
2970 #undef seed
2971 #undef sreg
2972 }
2973
2974
2975 /* -------- main & co. -------- */
2976
2977 static int awk_exit(int r)
2978 {
2979         var tv;
2980         unsigned i;
2981         hash_item *hi;
2982
2983         zero_out_var(&tv);
2984
2985         if (!exiting) {
2986                 exiting = TRUE;
2987                 nextrec = FALSE;
2988                 evaluate(endseq.first, &tv);
2989         }
2990
2991         /* waiting for children */
2992         for (i = 0; i < fdhash->csize; i++) {
2993                 hi = fdhash->items[i];
2994                 while (hi) {
2995                         if (hi->data.rs.F && hi->data.rs.is_pipe)
2996                                 pclose(hi->data.rs.F);
2997                         hi = hi->next;
2998                 }
2999         }
3000
3001         exit(r);
3002 }
3003
3004 /* if expr looks like "var=value", perform assignment and return 1,
3005  * otherwise return 0 */
3006 static int is_assignment(const char *expr)
3007 {
3008         char *exprc, *val;
3009
3010         if (!isalnum_(*expr) || (val = strchr(expr, '=')) == NULL) {
3011                 return FALSE;
3012         }
3013
3014         exprc = xstrdup(expr);
3015         val = exprc + (val - expr);
3016         *val++ = '\0';
3017
3018         unescape_string_in_place(val);
3019         setvar_u(newvar(exprc), val);
3020         free(exprc);
3021         return TRUE;
3022 }
3023
3024 /* switch to next input file */
3025 static rstream *next_input_file(void)
3026 {
3027 #define rsm          (G.next_input_file__rsm)
3028 #define files_happen (G.next_input_file__files_happen)
3029
3030         FILE *F;
3031         const char *fname, *ind;
3032
3033         if (rsm.F)
3034                 fclose(rsm.F);
3035         rsm.F = NULL;
3036         rsm.pos = rsm.adv = 0;
3037
3038         for (;;) {
3039                 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
3040                         if (files_happen)
3041                                 return NULL;
3042                         fname = "-";
3043                         F = stdin;
3044                         break;
3045                 }
3046                 ind = getvar_s(incvar(intvar[ARGIND]));
3047                 fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
3048                 if (fname && *fname && !is_assignment(fname)) {
3049                         F = xfopen_stdin(fname);
3050                         break;
3051                 }
3052         }
3053
3054         files_happen = TRUE;
3055         setvar_s(intvar[FILENAME], fname);
3056         rsm.F = F;
3057         return &rsm;
3058 #undef rsm
3059 #undef files_happen
3060 }
3061
3062 int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
3063 int awk_main(int argc, char **argv)
3064 {
3065         unsigned opt;
3066         char *opt_F;
3067         llist_t *list_v = NULL;
3068         llist_t *list_f = NULL;
3069         int i, j;
3070         var *v;
3071         var tv;
3072         char **envp;
3073         char *vnames = (char *)vNames; /* cheat */
3074         char *vvalues = (char *)vValues;
3075
3076         INIT_G();
3077
3078         /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
3079          * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
3080         if (ENABLE_LOCALE_SUPPORT)
3081                 setlocale(LC_NUMERIC, "C");
3082
3083         zero_out_var(&tv);
3084
3085         /* allocate global buffer */
3086         g_buf = xmalloc(MAXVARFMT + 1);
3087
3088         vhash = hash_init();
3089         ahash = hash_init();
3090         fdhash = hash_init();
3091         fnhash = hash_init();
3092
3093         /* initialize variables */
3094         for (i = 0; *vnames; i++) {
3095                 intvar[i] = v = newvar(nextword(&vnames));
3096                 if (*vvalues != '\377')
3097                         setvar_s(v, nextword(&vvalues));
3098                 else
3099                         setvar_i(v, 0);
3100
3101                 if (*vnames == '*') {
3102                         v->type |= VF_SPECIAL;
3103                         vnames++;
3104                 }
3105         }
3106
3107         handle_special(intvar[FS]);
3108         handle_special(intvar[RS]);
3109
3110         newfile("/dev/stdin")->F = stdin;
3111         newfile("/dev/stdout")->F = stdout;
3112         newfile("/dev/stderr")->F = stderr;
3113
3114         /* Huh, people report that sometimes environ is NULL. Oh well. */
3115         if (environ) for (envp = environ; *envp; envp++) {
3116                 /* environ is writable, thus we don't strdup it needlessly */
3117                 char *s = *envp;
3118                 char *s1 = strchr(s, '=');
3119                 if (s1) {
3120                         *s1 = '\0';
3121                         /* Both findvar and setvar_u take const char*
3122                          * as 2nd arg -> environment is not trashed */
3123                         setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
3124                         *s1 = '=';
3125                 }
3126         }
3127         opt_complementary = "v::f::"; /* -v and -f can occur multiple times */
3128         opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, NULL);
3129         argv += optind;
3130         argc -= optind;
3131         if (opt & 0x1) { /* -F */
3132                 unescape_string_in_place(opt_F);
3133                 setvar_s(intvar[FS], opt_F);
3134         }
3135         while (list_v) { /* -v */
3136                 if (!is_assignment(llist_pop(&list_v)))
3137                         bb_show_usage();
3138         }
3139         if (list_f) { /* -f */
3140                 do {
3141                         char *s = NULL;
3142                         FILE *from_file;
3143
3144                         g_progname = llist_pop(&list_f);
3145                         from_file = xfopen_stdin(g_progname);
3146                         /* one byte is reserved for some trick in next_token */
3147                         for (i = j = 1; j > 0; i += j) {
3148                                 s = xrealloc(s, i + 4096);
3149                                 j = fread(s + i, 1, 4094, from_file);
3150                         }
3151                         s[i] = '\0';
3152                         fclose(from_file);
3153                         parse_program(s + 1);
3154                         free(s);
3155                 } while (list_f);
3156                 argc++;
3157         } else { // no -f: take program from 1st parameter
3158                 if (!argc)
3159                         bb_show_usage();
3160                 g_progname = "cmd. line";
3161                 parse_program(*argv++);
3162         }
3163         if (opt & 0x8) // -W
3164                 bb_error_msg("warning: option -W is ignored");
3165
3166         /* fill in ARGV array */
3167         setvar_i(intvar[ARGC], argc);
3168         setari_u(intvar[ARGV], 0, "awk");
3169         i = 0;
3170         while (*argv)
3171                 setari_u(intvar[ARGV], ++i, *argv++);
3172
3173         evaluate(beginseq.first, &tv);
3174         if (!mainseq.first && !endseq.first)
3175                 awk_exit(EXIT_SUCCESS);
3176
3177         /* input file could already be opened in BEGIN block */
3178         if (!iF)
3179                 iF = next_input_file();
3180
3181         /* passing through input files */
3182         while (iF) {
3183                 nextfile = FALSE;
3184                 setvar_i(intvar[FNR], 0);
3185
3186                 while ((i = awk_getline(iF, intvar[F0])) > 0) {
3187                         nextrec = FALSE;
3188                         incvar(intvar[NR]);
3189                         incvar(intvar[FNR]);
3190                         evaluate(mainseq.first, &tv);
3191
3192                         if (nextfile)
3193                                 break;
3194                 }
3195
3196                 if (i < 0)
3197                         syntax_error(strerror(errno));
3198
3199                 iF = next_input_file();
3200         }
3201
3202         awk_exit(EXIT_SUCCESS);
3203         /*return 0;*/
3204 }