style fixes
[oweals/busybox.git] / editors / awk.c
1 /* vi: set sw=4 ts=4: */
2 /*
3  * awk implementation for busybox
4  *
5  * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
6  *
7  * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
8  */
9
10 #include "busybox.h"
11 #include "xregex.h"
12 #include <math.h>
13
14
15 #define MAXVARFMT       240
16 #define MINNVBLOCK      64
17
18 /* variable flags */
19 #define VF_NUMBER       0x0001  /* 1 = primary type is number */
20 #define VF_ARRAY        0x0002  /* 1 = it's an array */
21
22 #define VF_CACHED       0x0100  /* 1 = num/str value has cached str/num eq */
23 #define VF_USER         0x0200  /* 1 = user input (may be numeric string) */
24 #define VF_SPECIAL      0x0400  /* 1 = requires extra handling when changed */
25 #define VF_WALK         0x0800  /* 1 = variable has alloc'd x.walker list */
26 #define VF_FSTR         0x1000  /* 1 = string points to fstring buffer */
27 #define VF_CHILD        0x2000  /* 1 = function arg; x.parent points to source */
28 #define VF_DIRTY        0x4000  /* 1 = variable was set explicitly */
29
30 /* these flags are static, don't change them when value is changed */
31 #define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
32
33 /* Variable */
34 typedef struct var_s {
35         unsigned short type;            /* flags */
36         double number;
37         char *string;
38         union {
39                 int aidx;                               /* func arg idx (for compilation stage) */
40                 struct xhash_s *array;  /* array ptr */
41                 struct var_s *parent;   /* for func args, ptr to actual parameter */
42                 char **walker;                  /* list of array elements (for..in) */
43         } x;
44 } var;
45
46 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
47 typedef struct chain_s {
48         struct node_s *first;
49         struct node_s *last;
50         char *programname;
51 } chain;
52
53 /* Function */
54 typedef struct func_s {
55         unsigned short nargs;
56         struct chain_s body;
57 } func;
58
59 /* I/O stream */
60 typedef struct rstream_s {
61         FILE *F;
62         char *buffer;
63         int adv;
64         int size;
65         int pos;
66         unsigned short is_pipe;
67 } rstream;
68
69 typedef struct hash_item_s {
70         union {
71                 struct var_s v;                 /* variable/array hash */
72                 struct rstream_s rs;    /* redirect streams hash */
73                 struct func_s f;                /* functions hash */
74         } data;
75         struct hash_item_s *next;       /* next in chain */
76         char name[1];                           /* really it's longer */
77 } hash_item;
78
79 typedef struct xhash_s {
80         unsigned int nel;                                       /* num of elements */
81         unsigned int csize;                                     /* current hash size */
82         unsigned int nprime;                            /* next hash size in PRIMES[] */
83         unsigned int glen;                                      /* summary length of item names */
84         struct hash_item_s **items;
85 } xhash;
86
87 /* Tree node */
88 typedef struct node_s {
89         uint32_t info;
90         unsigned short lineno;
91         union {
92                 struct node_s *n;
93                 var *v;
94                 int i;
95                 char *s;
96                 regex_t *re;
97         } l;
98         union {
99                 struct node_s *n;
100                 regex_t *ire;
101                 func *f;
102                 int argno;
103         } r;
104         union {
105                 struct node_s *n;
106         } a;
107 } node;
108
109 /* Block of temporary variables */
110 typedef struct nvblock_s {
111         int size;
112         var *pos;
113         struct nvblock_s *prev;
114         struct nvblock_s *next;
115         var nv[0];
116 } nvblock;
117
118 typedef struct tsplitter_s {
119         node n;
120         regex_t re[2];
121 } tsplitter;
122
123 /* simple token classes */
124 /* Order and hex values are very important!!!  See next_token() */
125 #define TC_SEQSTART      1                              /* ( */
126 #define TC_SEQTERM      (1 << 1)                /* ) */
127 #define TC_REGEXP       (1 << 2)                /* /.../ */
128 #define TC_OUTRDR       (1 << 3)                /* | > >> */
129 #define TC_UOPPOST      (1 << 4)                /* unary postfix operator */
130 #define TC_UOPPRE1      (1 << 5)                /* unary prefix operator */
131 #define TC_BINOPX       (1 << 6)                /* two-opnd operator */
132 #define TC_IN           (1 << 7)
133 #define TC_COMMA        (1 << 8)
134 #define TC_PIPE         (1 << 9)                /* input redirection pipe */
135 #define TC_UOPPRE2      (1 << 10)               /* unary prefix operator */
136 #define TC_ARRTERM      (1 << 11)               /* ] */
137 #define TC_GRPSTART     (1 << 12)               /* { */
138 #define TC_GRPTERM      (1 << 13)               /* } */
139 #define TC_SEMICOL      (1 << 14)
140 #define TC_NEWLINE      (1 << 15)
141 #define TC_STATX        (1 << 16)               /* ctl statement (for, next...) */
142 #define TC_WHILE        (1 << 17)
143 #define TC_ELSE         (1 << 18)
144 #define TC_BUILTIN      (1 << 19)
145 #define TC_GETLINE      (1 << 20)
146 #define TC_FUNCDECL     (1 << 21)               /* `function' `func' */
147 #define TC_BEGIN        (1 << 22)
148 #define TC_END          (1 << 23)
149 #define TC_EOF          (1 << 24)
150 #define TC_VARIABLE     (1 << 25)
151 #define TC_ARRAY        (1 << 26)
152 #define TC_FUNCTION     (1 << 27)
153 #define TC_STRING       (1 << 28)
154 #define TC_NUMBER       (1 << 29)
155
156 #define TC_UOPPRE       (TC_UOPPRE1 | TC_UOPPRE2)
157
158 /* combined token classes */
159 #define TC_BINOP        (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
160 #define TC_UNARYOP      (TC_UOPPRE | TC_UOPPOST)
161 #define TC_OPERAND      (TC_VARIABLE | TC_ARRAY | TC_FUNCTION | \
162         TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
163
164 #define TC_STATEMNT     (TC_STATX | TC_WHILE)
165 #define TC_OPTERM       (TC_SEMICOL | TC_NEWLINE)
166
167 /* word tokens, cannot mean something else if not expected */
168 #define TC_WORD         (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN | \
169         TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
170
171 /* discard newlines after these */
172 #define TC_NOTERM       (TC_COMMA | TC_GRPSTART | TC_GRPTERM | \
173         TC_BINOP | TC_OPTERM)
174
175 /* what can expression begin with */
176 #define TC_OPSEQ        (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
177 /* what can group begin with */
178 #define TC_GRPSEQ       (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
179
180 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
181 /* operator is inserted between them */
182 #define TC_CONCAT1      (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM | \
183         TC_STRING | TC_NUMBER | TC_UOPPOST)
184 #define TC_CONCAT2      (TC_OPERAND | TC_UOPPRE)
185
186 #define OF_RES1         0x010000
187 #define OF_RES2         0x020000
188 #define OF_STR1         0x040000
189 #define OF_STR2         0x080000
190 #define OF_NUM1         0x100000
191 #define OF_CHECKED      0x200000
192
193 /* combined operator flags */
194 #define xx      0
195 #define xV      OF_RES2
196 #define xS      (OF_RES2 | OF_STR2)
197 #define Vx      OF_RES1
198 #define VV      (OF_RES1 | OF_RES2)
199 #define Nx      (OF_RES1 | OF_NUM1)
200 #define NV      (OF_RES1 | OF_NUM1 | OF_RES2)
201 #define Sx      (OF_RES1 | OF_STR1)
202 #define SV      (OF_RES1 | OF_STR1 | OF_RES2)
203 #define SS      (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
204
205 #define OPCLSMASK       0xFF00
206 #define OPNMASK         0x007F
207
208 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
209  * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
210  * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
211  */
212 #define P(x)    (x << 24)
213 #define PRIMASK         0x7F000000
214 #define PRIMASK2        0x7E000000
215
216 /* Operation classes */
217
218 #define SHIFT_TIL_THIS  0x0600
219 #define RECUR_FROM_THIS 0x1000
220
221 enum {
222         OC_DELETE=0x0100,       OC_EXEC=0x0200,         OC_NEWSOURCE=0x0300,
223         OC_PRINT=0x0400,        OC_PRINTF=0x0500,       OC_WALKINIT=0x0600,
224
225         OC_BR=0x0700,           OC_BREAK=0x0800,        OC_CONTINUE=0x0900,
226         OC_EXIT=0x0a00,         OC_NEXT=0x0b00,         OC_NEXTFILE=0x0c00,
227         OC_TEST=0x0d00,         OC_WALKNEXT=0x0e00,
228
229         OC_BINARY=0x1000,       OC_BUILTIN=0x1100,      OC_COLON=0x1200,
230         OC_COMMA=0x1300,        OC_COMPARE=0x1400,      OC_CONCAT=0x1500,
231         OC_FBLTIN=0x1600,       OC_FIELD=0x1700,        OC_FNARG=0x1800,
232         OC_FUNC=0x1900,         OC_GETLINE=0x1a00,      OC_IN=0x1b00,
233         OC_LAND=0x1c00,         OC_LOR=0x1d00,          OC_MATCH=0x1e00,
234         OC_MOVE=0x1f00,         OC_PGETLINE=0x2000,     OC_REGEXP=0x2100,
235         OC_REPLACE=0x2200,      OC_RETURN=0x2300,       OC_SPRINTF=0x2400,
236         OC_TERNARY=0x2500,      OC_UNARY=0x2600,        OC_VAR=0x2700,
237         OC_DONE=0x2800,
238
239         ST_IF=0x3000,           ST_DO=0x3100,           ST_FOR=0x3200,
240         ST_WHILE=0x3300
241 };
242
243 /* simple builtins */
244 enum {
245         F_in=0, F_rn,   F_co,   F_ex,   F_lg,   F_si,   F_sq,   F_sr,
246         F_ti,   F_le,   F_sy,   F_ff,   F_cl
247 };
248
249 /* builtins */
250 enum {
251         B_a2=0, B_ix,   B_ma,   B_sp,   B_ss,   B_ti,   B_lo,   B_up,
252         B_ge,   B_gs,   B_su,
253         B_an,   B_co,   B_ls,   B_or,   B_rs,   B_xo,
254 };
255
256 /* tokens and their corresponding info values */
257
258 #define NTC             "\377"          /* switch to next token class (tc<<1) */
259 #define NTCC    '\377'
260
261 #define OC_B    OC_BUILTIN
262
263 static char * const tokenlist =
264         "\1("           NTC
265         "\1)"           NTC
266         "\1/"           NTC                                                                     /* REGEXP */
267         "\2>>"          "\1>"           "\1|"           NTC                     /* OUTRDR */
268         "\2++"          "\2--"          NTC                                             /* UOPPOST */
269         "\2++"          "\2--"          "\1$"           NTC                     /* UOPPRE1 */
270         "\2=="          "\1="           "\2+="          "\2-="          /* BINOPX */
271         "\2*="          "\2/="          "\2%="          "\2^="
272         "\1+"           "\1-"           "\3**="         "\2**"
273         "\1/"           "\1%"           "\1^"           "\1*"
274         "\2!="          "\2>="          "\2<="          "\1>"
275         "\1<"           "\2!~"          "\1~"           "\2&&"
276         "\2||"          "\1?"           "\1:"           NTC
277         "\2in"          NTC
278         "\1,"           NTC
279         "\1|"           NTC
280         "\1+"           "\1-"           "\1!"           NTC                     /* UOPPRE2 */
281         "\1]"           NTC
282         "\1{"           NTC
283         "\1}"           NTC
284         "\1;"           NTC
285         "\1\n"          NTC
286         "\2if"          "\2do"          "\3for"         "\5break"       /* STATX */
287         "\10continue"                   "\6delete"      "\5print"
288         "\6printf"      "\4next"        "\10nextfile"
289         "\6return"      "\4exit"        NTC
290         "\5while"       NTC
291         "\4else"        NTC
292
293         "\3and"         "\5compl"       "\6lshift"      "\2or"
294         "\6rshift"      "\3xor"
295         "\5close"       "\6system"      "\6fflush"      "\5atan2"       /* BUILTIN */
296         "\3cos"         "\3exp"         "\3int"         "\3log"
297         "\4rand"        "\3sin"         "\4sqrt"        "\5srand"
298         "\6gensub"      "\4gsub"        "\5index"       "\6length"
299         "\5match"       "\5split"       "\7sprintf"     "\3sub"
300         "\6substr"      "\7systime"     "\10strftime"
301         "\7tolower"     "\7toupper"     NTC
302         "\7getline"     NTC
303         "\4func"        "\10function"   NTC
304         "\5BEGIN"       NTC
305         "\3END"         "\0"
306         ;
307
308 static const uint32_t tokeninfo[] = {
309
310         0,
311         0,
312         OC_REGEXP,
313         xS|'a',         xS|'w',         xS|'|',
314         OC_UNARY|xV|P(9)|'p',           OC_UNARY|xV|P(9)|'m',
315         OC_UNARY|xV|P(9)|'P',           OC_UNARY|xV|P(9)|'M',
316                 OC_FIELD|xV|P(5),
317         OC_COMPARE|VV|P(39)|5,          OC_MOVE|VV|P(74),
318                 OC_REPLACE|NV|P(74)|'+',        OC_REPLACE|NV|P(74)|'-',
319         OC_REPLACE|NV|P(74)|'*',        OC_REPLACE|NV|P(74)|'/',
320                 OC_REPLACE|NV|P(74)|'%',        OC_REPLACE|NV|P(74)|'&',
321         OC_BINARY|NV|P(29)|'+',         OC_BINARY|NV|P(29)|'-',
322                 OC_REPLACE|NV|P(74)|'&',        OC_BINARY|NV|P(15)|'&',
323         OC_BINARY|NV|P(25)|'/',         OC_BINARY|NV|P(25)|'%',
324                 OC_BINARY|NV|P(15)|'&',         OC_BINARY|NV|P(25)|'*',
325         OC_COMPARE|VV|P(39)|4,          OC_COMPARE|VV|P(39)|3,
326                 OC_COMPARE|VV|P(39)|0,          OC_COMPARE|VV|P(39)|1,
327         OC_COMPARE|VV|P(39)|2,          OC_MATCH|Sx|P(45)|'!',
328                 OC_MATCH|Sx|P(45)|'~',          OC_LAND|Vx|P(55),
329         OC_LOR|Vx|P(59),                        OC_TERNARY|Vx|P(64)|'?',
330                 OC_COLON|xx|P(67)|':',
331         OC_IN|SV|P(49),
332         OC_COMMA|SS|P(80),
333         OC_PGETLINE|SV|P(37),
334         OC_UNARY|xV|P(19)|'+',          OC_UNARY|xV|P(19)|'-',
335                 OC_UNARY|xV|P(19)|'!',
336         0,
337         0,
338         0,
339         0,
340         0,
341         ST_IF,                  ST_DO,                  ST_FOR,                 OC_BREAK,
342         OC_CONTINUE,                                    OC_DELETE|Vx,   OC_PRINT,
343         OC_PRINTF,              OC_NEXT,                OC_NEXTFILE,
344         OC_RETURN|Vx,   OC_EXIT|Nx,
345         ST_WHILE,
346         0,
347
348         OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
349         OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
350         OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
351         OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
352         OC_FBLTIN|F_rn,    OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
353         OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
354         OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF,        OC_B|B_su|P(0xb6),
355         OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti,    OC_B|B_ti|P(0x0b),
356         OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
357         OC_GETLINE|SV|P(0),
358         0,      0,
359         0,
360         0
361 };
362
363 /* internal variable names and their initial values       */
364 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
365 enum {
366         CONVFMT=0,      OFMT,           FS,                     OFS,
367         ORS,            RS,                     RT,                     FILENAME,
368         SUBSEP,         ARGIND,         ARGC,           ARGV,
369         ERRNO,          FNR,
370         NR,                     NF,                     IGNORECASE,
371         ENVIRON,        F0,                     _intvarcount_
372 };
373
374 static char * vNames =
375         "CONVFMT\0"     "OFMT\0"        "FS\0*"         "OFS\0"
376         "ORS\0"         "RS\0*"         "RT\0"          "FILENAME\0"
377         "SUBSEP\0"      "ARGIND\0"      "ARGC\0"        "ARGV\0"
378         "ERRNO\0"       "FNR\0"
379         "NR\0"          "NF\0*"         "IGNORECASE\0*"
380         "ENVIRON\0"     "$\0*"          "\0";
381
382 static char * vValues =
383         "%.6g\0"        "%.6g\0"        " \0"           " \0"
384         "\n\0"          "\n\0"          "\0"            "\0"
385         "\034\0"
386         "\377";
387
388 /* hash size may grow to these values */
389 #define FIRST_PRIME 61;
390 static const unsigned int PRIMES[] = { 251, 1021, 4093, 16381, 65521 };
391 enum { NPRIMES = sizeof(PRIMES) / sizeof(unsigned int) };
392
393 /* globals */
394
395 extern char **environ;
396
397 static var * V[_intvarcount_];
398 static chain beginseq, mainseq, endseq, *seq;
399 static int nextrec, nextfile;
400 static node *break_ptr, *continue_ptr;
401 static rstream *iF;
402 static xhash *vhash, *ahash, *fdhash, *fnhash;
403 static char *programname;
404 static short lineno;
405 static int is_f0_split;
406 static int nfields;
407 static var *Fields;
408 static tsplitter fsplitter, rsplitter;
409 static nvblock *cb;
410 static char *pos;
411 static char *buf;
412 static int icase;
413 static int exiting;
414
415 static struct {
416         uint32_t tclass;
417         uint32_t info;
418         char *string;
419         double number;
420         short lineno;
421         int rollback;
422 } t;
423
424 /* function prototypes */
425 static void handle_special(var *);
426 static node *parse_expr(uint32_t);
427 static void chain_group(void);
428 static var *evaluate(node *, var *);
429 static rstream *next_input_file(void);
430 static int fmt_num(char *, int, const char *, double, int);
431 static int awk_exit(int) ATTRIBUTE_NORETURN;
432
433 /* ---- error handling ---- */
434
435 static const char EMSG_INTERNAL_ERROR[] = "Internal error";
436 static const char EMSG_UNEXP_EOS[] = "Unexpected end of string";
437 static const char EMSG_UNEXP_TOKEN[] = "Unexpected token";
438 static const char EMSG_DIV_BY_ZERO[] = "Division by zero";
439 static const char EMSG_INV_FMT[] = "Invalid format specifier";
440 static const char EMSG_TOO_FEW_ARGS[] = "Too few arguments for builtin";
441 static const char EMSG_NOT_ARRAY[] = "Not an array";
442 static const char EMSG_POSSIBLE_ERROR[] = "Possible syntax error";
443 static const char EMSG_UNDEF_FUNC[] = "Call to undefined function";
444 #ifndef CONFIG_FEATURE_AWK_MATH
445 static const char EMSG_NO_MATH[] = "Math support is not compiled in";
446 #endif
447
448 static void syntax_error(const char * const message) ATTRIBUTE_NORETURN;
449 static void syntax_error(const char * const message)
450 {
451         bb_error_msg_and_die("%s:%i: %s", programname, lineno, message);
452 }
453
454 #define runtime_error(x) syntax_error(x)
455
456
457 /* ---- hash stuff ---- */
458
459 static unsigned int hashidx(const char *name)
460 {
461         unsigned int idx=0;
462
463         while (*name)  idx = *name++ + (idx << 6) - idx;
464         return idx;
465 }
466
467 /* create new hash */
468 static xhash *hash_init(void)
469 {
470         xhash *newhash;
471
472         newhash = xzalloc(sizeof(xhash));
473         newhash->csize = FIRST_PRIME;
474         newhash->items = xzalloc(newhash->csize * sizeof(hash_item *));
475
476         return newhash;
477 }
478
479 /* find item in hash, return ptr to data, NULL if not found */
480 static void *hash_search(xhash *hash, const char *name)
481 {
482         hash_item *hi;
483
484         hi = hash->items [ hashidx(name) % hash->csize ];
485         while (hi) {
486                 if (strcmp(hi->name, name) == 0)
487                         return &(hi->data);
488                 hi = hi->next;
489         }
490         return NULL;
491 }
492
493 /* grow hash if it becomes too big */
494 static void hash_rebuild(xhash *hash)
495 {
496         unsigned int newsize, i, idx;
497         hash_item **newitems, *hi, *thi;
498
499         if (hash->nprime == NPRIMES)
500                 return;
501
502         newsize = PRIMES[hash->nprime++];
503         newitems = xzalloc(newsize * sizeof(hash_item *));
504
505         for (i=0; i<hash->csize; i++) {
506                 hi = hash->items[i];
507                 while (hi) {
508                         thi = hi;
509                         hi = thi->next;
510                         idx = hashidx(thi->name) % newsize;
511                         thi->next = newitems[idx];
512                         newitems[idx] = thi;
513                 }
514         }
515
516         free(hash->items);
517         hash->csize = newsize;
518         hash->items = newitems;
519 }
520
521 /* find item in hash, add it if necessary. Return ptr to data */
522 static void *hash_find(xhash *hash, const char *name)
523 {
524         hash_item *hi;
525         unsigned int idx;
526         int l;
527
528         hi = hash_search(hash, name);
529         if (! hi) {
530                 if (++hash->nel / hash->csize > 10)
531                         hash_rebuild(hash);
532
533                 l = strlen(name) + 1;
534                 hi = xzalloc(sizeof(hash_item) + l);
535                 memcpy(hi->name, name, l);
536
537                 idx = hashidx(name) % hash->csize;
538                 hi->next = hash->items[idx];
539                 hash->items[idx] = hi;
540                 hash->glen += l;
541         }
542         return &(hi->data);
543 }
544
545 #define findvar(hash, name) (var *) hash_find ( (hash) , (name) )
546 #define newvar(name) (var *) hash_find ( vhash , (name) )
547 #define newfile(name) (rstream *) hash_find ( fdhash , (name) )
548 #define newfunc(name) (func *) hash_find ( fnhash , (name) )
549
550 static void hash_remove(xhash *hash, const char *name)
551 {
552         hash_item *hi, **phi;
553
554         phi = &(hash->items[ hashidx(name) % hash->csize ]);
555         while (*phi) {
556                 hi = *phi;
557                 if (strcmp(hi->name, name) == 0) {
558                         hash->glen -= (strlen(name) + 1);
559                         hash->nel--;
560                         *phi = hi->next;
561                         free(hi);
562                         break;
563                 }
564                 phi = &(hi->next);
565         }
566 }
567
568 /* ------ some useful functions ------ */
569
570 static void skip_spaces(char **s)
571 {
572         char *p = *s;
573
574         while (*p == ' ' || *p == '\t' ||
575                         (*p == '\\' && *(p+1) == '\n' && (++p, ++t.lineno))) {
576                 p++;
577         }
578         *s = p;
579 }
580
581 static char *nextword(char **s)
582 {
583         char *p = *s;
584
585         while (*(*s)++) ;
586
587         return p;
588 }
589
590 static char nextchar(char **s)
591 {
592         char c, *pps;
593
594         c = *((*s)++);
595         pps = *s;
596         if (c == '\\') c = bb_process_escape_sequence((const char**)s);
597         if (c == '\\' && *s == pps) c = *((*s)++);
598         return c;
599 }
600
601 static int ATTRIBUTE_ALWAYS_INLINE isalnum_(int c)
602 {
603         return (isalnum(c) || c == '_');
604 }
605
606 static FILE *afopen(const char *path, const char *mode)
607 {
608         return (*path == '-' && *(path+1) == '\0') ? stdin : xfopen(path, mode);
609 }
610
611 /* -------- working with variables (set/get/copy/etc) -------- */
612
613 static xhash *iamarray(var *v)
614 {
615         var *a = v;
616
617         while (a->type & VF_CHILD)
618                 a = a->x.parent;
619
620         if (! (a->type & VF_ARRAY)) {
621                 a->type |= VF_ARRAY;
622                 a->x.array = hash_init();
623         }
624         return a->x.array;
625 }
626
627 static void clear_array(xhash *array)
628 {
629         unsigned int i;
630         hash_item *hi, *thi;
631
632         for (i=0; i<array->csize; i++) {
633                 hi = array->items[i];
634                 while (hi) {
635                         thi = hi;
636                         hi = hi->next;
637                         free(thi->data.v.string);
638                         free(thi);
639                 }
640                 array->items[i] = NULL;
641         }
642         array->glen = array->nel = 0;
643 }
644
645 /* clear a variable */
646 static var *clrvar(var *v)
647 {
648         if (!(v->type & VF_FSTR))
649                 free(v->string);
650
651         v->type &= VF_DONTTOUCH;
652         v->type |= VF_DIRTY;
653         v->string = NULL;
654         return v;
655 }
656
657 /* assign string value to variable */
658 static var *setvar_p(var *v, char *value)
659 {
660         clrvar(v);
661         v->string = value;
662         handle_special(v);
663
664         return v;
665 }
666
667 /* same as setvar_p but make a copy of string */
668 static var *setvar_s(var *v, const char *value)
669 {
670         return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
671 }
672
673 /* same as setvar_s but set USER flag */
674 static var *setvar_u(var *v, const char *value)
675 {
676         setvar_s(v, value);
677         v->type |= VF_USER;
678         return v;
679 }
680
681 /* set array element to user string */
682 static void setari_u(var *a, int idx, const char *s)
683 {
684         var *v;
685         static char sidx[12];
686
687         sprintf(sidx, "%d", idx);
688         v = findvar(iamarray(a), sidx);
689         setvar_u(v, s);
690 }
691
692 /* assign numeric value to variable */
693 static var *setvar_i(var *v, double value)
694 {
695         clrvar(v);
696         v->type |= VF_NUMBER;
697         v->number = value;
698         handle_special(v);
699         return v;
700 }
701
702 static char *getvar_s(var *v)
703 {
704         /* if v is numeric and has no cached string, convert it to string */
705         if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
706                 fmt_num(buf, MAXVARFMT, getvar_s(V[CONVFMT]), v->number, TRUE);
707                 v->string = xstrdup(buf);
708                 v->type |= VF_CACHED;
709         }
710         return (v->string == NULL) ? "" : v->string;
711 }
712
713 static double getvar_i(var *v)
714 {
715         char *s;
716
717         if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
718                 v->number = 0;
719                 s = v->string;
720                 if (s && *s) {
721                         v->number = strtod(s, &s);
722                         if (v->type & VF_USER) {
723                                 skip_spaces(&s);
724                                 if (*s != '\0')
725                                         v->type &= ~VF_USER;
726                         }
727                 } else {
728                         v->type &= ~VF_USER;
729                 }
730                 v->type |= VF_CACHED;
731         }
732         return v->number;
733 }
734
735 static var *copyvar(var *dest, const var *src)
736 {
737         if (dest != src) {
738                 clrvar(dest);
739                 dest->type |= (src->type & ~VF_DONTTOUCH);
740                 dest->number = src->number;
741                 if (src->string)
742                         dest->string = xstrdup(src->string);
743         }
744         handle_special(dest);
745         return dest;
746 }
747
748 static var *incvar(var *v)
749 {
750         return setvar_i(v, getvar_i(v)+1.);
751 }
752
753 /* return true if v is number or numeric string */
754 static int is_numeric(var *v)
755 {
756         getvar_i(v);
757         return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
758 }
759
760 /* return 1 when value of v corresponds to true, 0 otherwise */
761 static int istrue(var *v)
762 {
763         if (is_numeric(v))
764                 return (v->number == 0) ? 0 : 1;
765         else
766                 return (v->string && *(v->string)) ? 1 : 0;
767 }
768
769 /* temporary variables allocator. Last allocated should be first freed */
770 static var *nvalloc(int n)
771 {
772         nvblock *pb = NULL;
773         var *v, *r;
774         int size;
775
776         while (cb) {
777                 pb = cb;
778                 if ((cb->pos - cb->nv) + n <= cb->size) break;
779                 cb = cb->next;
780         }
781
782         if (! cb) {
783                 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
784                 cb = xmalloc(sizeof(nvblock) + size * sizeof(var));
785                 cb->size = size;
786                 cb->pos = cb->nv;
787                 cb->prev = pb;
788                 cb->next = NULL;
789                 if (pb) pb->next = cb;
790         }
791
792         v = r = cb->pos;
793         cb->pos += n;
794
795         while (v < cb->pos) {
796                 v->type = 0;
797                 v->string = NULL;
798                 v++;
799         }
800
801         return r;
802 }
803
804 static void nvfree(var *v)
805 {
806         var *p;
807
808         if (v < cb->nv || v >= cb->pos)
809                 runtime_error(EMSG_INTERNAL_ERROR);
810
811         for (p=v; p<cb->pos; p++) {
812                 if ((p->type & (VF_ARRAY|VF_CHILD)) == VF_ARRAY) {
813                         clear_array(iamarray(p));
814                         free(p->x.array->items);
815                         free(p->x.array);
816                 }
817                 if (p->type & VF_WALK)
818                         free(p->x.walker);
819
820                 clrvar(p);
821         }
822
823         cb->pos = v;
824         while (cb->prev && cb->pos == cb->nv) {
825                 cb = cb->prev;
826         }
827 }
828
829 /* ------- awk program text parsing ------- */
830
831 /* Parse next token pointed by global pos, place results into global t.
832  * If token isn't expected, give away. Return token class
833  */
834 static uint32_t next_token(uint32_t expected)
835 {
836         char *p, *pp, *s;
837         char *tl;
838         uint32_t tc;
839         const uint32_t *ti;
840         int l;
841         static int concat_inserted;
842         static uint32_t save_tclass, save_info;
843         static uint32_t ltclass = TC_OPTERM;
844
845         if (t.rollback) {
846
847                 t.rollback = FALSE;
848
849         } else if (concat_inserted) {
850
851                 concat_inserted = FALSE;
852                 t.tclass = save_tclass;
853                 t.info = save_info;
854
855         } else {
856
857                 p = pos;
858
859         readnext:
860                 skip_spaces(&p);
861                 lineno = t.lineno;
862                 if (*p == '#')
863                         while (*p != '\n' && *p != '\0') p++;
864
865                 if (*p == '\n')
866                         t.lineno++;
867
868                 if (*p == '\0') {
869                         tc = TC_EOF;
870
871                 } else if (*p == '\"') {
872                         /* it's a string */
873                         t.string = s = ++p;
874                         while (*p != '\"') {
875                                 if (*p == '\0' || *p == '\n')
876                                         syntax_error(EMSG_UNEXP_EOS);
877                                 *(s++) = nextchar(&p);
878                         }
879                         p++;
880                         *s = '\0';
881                         tc = TC_STRING;
882
883                 } else if ((expected & TC_REGEXP) && *p == '/') {
884                         /* it's regexp */
885                         t.string = s = ++p;
886                         while (*p != '/') {
887                                 if (*p == '\0' || *p == '\n')
888                                         syntax_error(EMSG_UNEXP_EOS);
889                                 if ((*s++ = *p++) == '\\') {
890                                         pp = p;
891                                         *(s-1) = bb_process_escape_sequence((const char **)&p);
892                                         if (*pp == '\\') *s++ = '\\';
893                                         if (p == pp) *s++ = *p++;
894                                 }
895                         }
896                         p++;
897                         *s = '\0';
898                         tc = TC_REGEXP;
899
900                 } else if (*p == '.' || isdigit(*p)) {
901                         /* it's a number */
902                         t.number = strtod(p, &p);
903                         if (*p == '.')
904                                 syntax_error(EMSG_UNEXP_TOKEN);
905                         tc = TC_NUMBER;
906
907                 } else {
908                         /* search for something known */
909                         tl = tokenlist;
910                         tc = 0x00000001;
911                         ti = tokeninfo;
912                         while (*tl) {
913                                 l = *(tl++);
914                                 if (l == NTCC) {
915                                         tc <<= 1;
916                                         continue;
917                                 }
918                                 /* if token class is expected, token
919                                  * matches and it's not a longer word,
920                                  * then this is what we are looking for
921                                  */
922                                 if ((tc & (expected | TC_WORD | TC_NEWLINE)) &&
923                                 *tl == *p && strncmp(p, tl, l) == 0 &&
924                                 !((tc & TC_WORD) && isalnum_(*(p + l)))) {
925                                         t.info = *ti;
926                                         p += l;
927                                         break;
928                                 }
929                                 ti++;
930                                 tl += l;
931                         }
932
933                         if (! *tl) {
934                                 /* it's a name (var/array/function),
935                                  * otherwise it's something wrong
936                                  */
937                                 if (! isalnum_(*p))
938                                         syntax_error(EMSG_UNEXP_TOKEN);
939
940                                 t.string = --p;
941                                 while (isalnum_(*(++p))) {
942                                         *(p-1) = *p;
943                                 }
944                                 *(p-1) = '\0';
945                                 tc = TC_VARIABLE;
946                                 /* also consume whitespace between functionname and bracket */
947                                 if (! (expected & TC_VARIABLE)) skip_spaces(&p);
948                                 if (*p == '(') {
949                                         tc = TC_FUNCTION;
950                                 } else {
951                                         if (*p == '[') {
952                                                 p++;
953                                                 tc = TC_ARRAY;
954                                         }
955                                 }
956                         }
957                 }
958                 pos = p;
959
960                 /* skipping newlines in some cases */
961                 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
962                         goto readnext;
963
964                 /* insert concatenation operator when needed */
965                 if ((ltclass&TC_CONCAT1) && (tc&TC_CONCAT2) && (expected&TC_BINOP)) {
966                         concat_inserted = TRUE;
967                         save_tclass = tc;
968                         save_info = t.info;
969                         tc = TC_BINOP;
970                         t.info = OC_CONCAT | SS | P(35);
971                 }
972
973                 t.tclass = tc;
974         }
975         ltclass = t.tclass;
976
977         /* Are we ready for this? */
978         if (! (ltclass & expected))
979                 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
980                                                                 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
981
982         return ltclass;
983 }
984
985 static void rollback_token(void) { t.rollback = TRUE; }
986
987 static node *new_node(uint32_t info)
988 {
989         node *n;
990
991         n = xzalloc(sizeof(node));
992         n->info = info;
993         n->lineno = lineno;
994         return n;
995 }
996
997 static node *mk_re_node(char *s, node *n, regex_t *re)
998 {
999         n->info = OC_REGEXP;
1000         n->l.re = re;
1001         n->r.ire = re + 1;
1002         xregcomp(re, s, REG_EXTENDED);
1003         xregcomp(re+1, s, REG_EXTENDED | REG_ICASE);
1004
1005         return n;
1006 }
1007
1008 static node *condition(void)
1009 {
1010         next_token(TC_SEQSTART);
1011         return parse_expr(TC_SEQTERM);
1012 }
1013
1014 /* parse expression terminated by given argument, return ptr
1015  * to built subtree. Terminator is eaten by parse_expr */
1016 static node *parse_expr(uint32_t iexp)
1017 {
1018         node sn;
1019         node *cn = &sn;
1020         node *vn, *glptr;
1021         uint32_t tc, xtc;
1022         var *v;
1023
1024         sn.info = PRIMASK;
1025         sn.r.n = glptr = NULL;
1026         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1027
1028         while (! ((tc = next_token(xtc)) & iexp)) {
1029                 if (glptr && (t.info == (OC_COMPARE|VV|P(39)|2))) {
1030                         /* input redirection (<) attached to glptr node */
1031                         cn = glptr->l.n = new_node(OC_CONCAT|SS|P(37));
1032                         cn->a.n = glptr;
1033                         xtc = TC_OPERAND | TC_UOPPRE;
1034                         glptr = NULL;
1035
1036                 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1037                         /* for binary and postfix-unary operators, jump back over
1038                          * previous operators with higher priority */
1039                         vn = cn;
1040                         while ( ((t.info & PRIMASK) > (vn->a.n->info & PRIMASK2)) ||
1041                           ((t.info == vn->info) && ((t.info & OPCLSMASK) == OC_COLON)) )
1042                                 vn = vn->a.n;
1043                         if ((t.info & OPCLSMASK) == OC_TERNARY)
1044                                 t.info += P(6);
1045                         cn = vn->a.n->r.n = new_node(t.info);
1046                         cn->a.n = vn->a.n;
1047                         if (tc & TC_BINOP) {
1048                                 cn->l.n = vn;
1049                                 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1050                                 if ((t.info & OPCLSMASK) == OC_PGETLINE) {
1051                                         /* it's a pipe */
1052                                         next_token(TC_GETLINE);
1053                                         /* give maximum priority to this pipe */
1054                                         cn->info &= ~PRIMASK;
1055                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1056                                 }
1057                         } else {
1058                                 cn->r.n = vn;
1059                                 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1060                         }
1061                         vn->a.n = cn;
1062
1063                 } else {
1064                         /* for operands and prefix-unary operators, attach them
1065                          * to last node */
1066                         vn = cn;
1067                         cn = vn->r.n = new_node(t.info);
1068                         cn->a.n = vn;
1069                         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1070                         if (tc & (TC_OPERAND | TC_REGEXP)) {
1071                                 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1072                                 /* one should be very careful with switch on tclass -
1073                                  * only simple tclasses should be used! */
1074                                 switch (tc) {
1075                                   case TC_VARIABLE:
1076                                   case TC_ARRAY:
1077                                         cn->info = OC_VAR;
1078                                         if ((v = hash_search(ahash, t.string)) != NULL) {
1079                                                 cn->info = OC_FNARG;
1080                                                 cn->l.i = v->x.aidx;
1081                                         } else {
1082                                                 cn->l.v = newvar(t.string);
1083                                         }
1084                                         if (tc & TC_ARRAY) {
1085                                                 cn->info |= xS;
1086                                                 cn->r.n = parse_expr(TC_ARRTERM);
1087                                         }
1088                                         break;
1089
1090                                   case TC_NUMBER:
1091                                   case TC_STRING:
1092                                         cn->info = OC_VAR;
1093                                         v = cn->l.v = xzalloc(sizeof(var));
1094                                         if (tc & TC_NUMBER)
1095                                                 setvar_i(v, t.number);
1096                                         else
1097                                                 setvar_s(v, t.string);
1098                                         break;
1099
1100                                   case TC_REGEXP:
1101                                         mk_re_node(t.string, cn, xzalloc(sizeof(regex_t)*2));
1102                                         break;
1103
1104                                   case TC_FUNCTION:
1105                                         cn->info = OC_FUNC;
1106                                         cn->r.f = newfunc(t.string);
1107                                         cn->l.n = condition();
1108                                         break;
1109
1110                                   case TC_SEQSTART:
1111                                         cn = vn->r.n = parse_expr(TC_SEQTERM);
1112                                         cn->a.n = vn;
1113                                         break;
1114
1115                                   case TC_GETLINE:
1116                                         glptr = cn;
1117                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1118                                         break;
1119
1120                                   case TC_BUILTIN:
1121                                         cn->l.n = condition();
1122                                         break;
1123                                 }
1124                         }
1125                 }
1126         }
1127         return sn.r.n;
1128 }
1129
1130 /* add node to chain. Return ptr to alloc'd node */
1131 static node *chain_node(uint32_t info)
1132 {
1133         node *n;
1134
1135         if (! seq->first)
1136                 seq->first = seq->last = new_node(0);
1137
1138         if (seq->programname != programname) {
1139                 seq->programname = programname;
1140                 n = chain_node(OC_NEWSOURCE);
1141                 n->l.s = xstrdup(programname);
1142         }
1143
1144         n = seq->last;
1145         n->info = info;
1146         seq->last = n->a.n = new_node(OC_DONE);
1147
1148         return n;
1149 }
1150
1151 static void chain_expr(uint32_t info)
1152 {
1153         node *n;
1154
1155         n = chain_node(info);
1156         n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1157         if (t.tclass & TC_GRPTERM)
1158                 rollback_token();
1159 }
1160
1161 static node *chain_loop(node *nn)
1162 {
1163         node *n, *n2, *save_brk, *save_cont;
1164
1165         save_brk = break_ptr;
1166         save_cont = continue_ptr;
1167
1168         n = chain_node(OC_BR | Vx);
1169         continue_ptr = new_node(OC_EXEC);
1170         break_ptr = new_node(OC_EXEC);
1171         chain_group();
1172         n2 = chain_node(OC_EXEC | Vx);
1173         n2->l.n = nn;
1174         n2->a.n = n;
1175         continue_ptr->a.n = n2;
1176         break_ptr->a.n = n->r.n = seq->last;
1177
1178         continue_ptr = save_cont;
1179         break_ptr = save_brk;
1180
1181         return n;
1182 }
1183
1184 /* parse group and attach it to chain */
1185 static void chain_group(void)
1186 {
1187         uint32_t c;
1188         node *n, *n2, *n3;
1189
1190         do {
1191                 c = next_token(TC_GRPSEQ);
1192         } while (c & TC_NEWLINE);
1193
1194         if (c & TC_GRPSTART) {
1195                 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1196                         if (t.tclass & TC_NEWLINE) continue;
1197                         rollback_token();
1198                         chain_group();
1199                 }
1200         } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1201                 rollback_token();
1202                 chain_expr(OC_EXEC | Vx);
1203         } else {                                                /* TC_STATEMNT */
1204                 switch (t.info & OPCLSMASK) {
1205                         case ST_IF:
1206                                 n = chain_node(OC_BR | Vx);
1207                                 n->l.n = condition();
1208                                 chain_group();
1209                                 n2 = chain_node(OC_EXEC);
1210                                 n->r.n = seq->last;
1211                                 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE)==TC_ELSE) {
1212                                         chain_group();
1213                                         n2->a.n = seq->last;
1214                                 } else {
1215                                         rollback_token();
1216                                 }
1217                                 break;
1218
1219                         case ST_WHILE:
1220                                 n2 = condition();
1221                                 n = chain_loop(NULL);
1222                                 n->l.n = n2;
1223                                 break;
1224
1225                         case ST_DO:
1226                                 n2 = chain_node(OC_EXEC);
1227                                 n = chain_loop(NULL);
1228                                 n2->a.n = n->a.n;
1229                                 next_token(TC_WHILE);
1230                                 n->l.n = condition();
1231                                 break;
1232
1233                         case ST_FOR:
1234                                 next_token(TC_SEQSTART);
1235                                 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1236                                 if (t.tclass & TC_SEQTERM) {    /* for-in */
1237                                         if ((n2->info & OPCLSMASK) != OC_IN)
1238                                                 syntax_error(EMSG_UNEXP_TOKEN);
1239                                         n = chain_node(OC_WALKINIT | VV);
1240                                         n->l.n = n2->l.n;
1241                                         n->r.n = n2->r.n;
1242                                         n = chain_loop(NULL);
1243                                         n->info = OC_WALKNEXT | Vx;
1244                                         n->l.n = n2->l.n;
1245                                 } else {                        /* for (;;) */
1246                                         n = chain_node(OC_EXEC | Vx);
1247                                         n->l.n = n2;
1248                                         n2 = parse_expr(TC_SEMICOL);
1249                                         n3 = parse_expr(TC_SEQTERM);
1250                                         n = chain_loop(n3);
1251                                         n->l.n = n2;
1252                                         if (! n2)
1253                                                 n->info = OC_EXEC;
1254                                 }
1255                                 break;
1256
1257                         case OC_PRINT:
1258                         case OC_PRINTF:
1259                                 n = chain_node(t.info);
1260                                 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1261                                 if (t.tclass & TC_OUTRDR) {
1262                                         n->info |= t.info;
1263                                         n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1264                                 }
1265                                 if (t.tclass & TC_GRPTERM)
1266                                         rollback_token();
1267                                 break;
1268
1269                         case OC_BREAK:
1270                                 n = chain_node(OC_EXEC);
1271                                 n->a.n = break_ptr;
1272                                 break;
1273
1274                         case OC_CONTINUE:
1275                                 n = chain_node(OC_EXEC);
1276                                 n->a.n = continue_ptr;
1277                                 break;
1278
1279                         /* delete, next, nextfile, return, exit */
1280                         default:
1281                                 chain_expr(t.info);
1282                 }
1283         }
1284 }
1285
1286 static void parse_program(char *p)
1287 {
1288         uint32_t tclass;
1289         node *cn;
1290         func *f;
1291         var *v;
1292
1293         pos = p;
1294         t.lineno = 1;
1295         while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1296                                 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1297
1298                 if (tclass & TC_OPTERM)
1299                         continue;
1300
1301                 seq = &mainseq;
1302                 if (tclass & TC_BEGIN) {
1303                         seq = &beginseq;
1304                         chain_group();
1305
1306                 } else if (tclass & TC_END) {
1307                         seq = &endseq;
1308                         chain_group();
1309
1310                 } else if (tclass & TC_FUNCDECL) {
1311                         next_token(TC_FUNCTION);
1312                         pos++;
1313                         f = newfunc(t.string);
1314                         f->body.first = NULL;
1315                         f->nargs = 0;
1316                         while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1317                                 v = findvar(ahash, t.string);
1318                                 v->x.aidx = (f->nargs)++;
1319
1320                                 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1321                                         break;
1322                         }
1323                         seq = &(f->body);
1324                         chain_group();
1325                         clear_array(ahash);
1326
1327                 } else if (tclass & TC_OPSEQ) {
1328                         rollback_token();
1329                         cn = chain_node(OC_TEST);
1330                         cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1331                         if (t.tclass & TC_GRPSTART) {
1332                                 rollback_token();
1333                                 chain_group();
1334                         } else {
1335                                 chain_node(OC_PRINT);
1336                         }
1337                         cn->r.n = mainseq.last;
1338
1339                 } else /* if (tclass & TC_GRPSTART) */ {
1340                         rollback_token();
1341                         chain_group();
1342                 }
1343         }
1344 }
1345
1346
1347 /* -------- program execution part -------- */
1348
1349 static node *mk_splitter(char *s, tsplitter *spl)
1350 {
1351         regex_t *re, *ire;
1352         node *n;
1353
1354         re = &spl->re[0];
1355         ire = &spl->re[1];
1356         n = &spl->n;
1357         if ((n->info & OPCLSMASK) == OC_REGEXP) {
1358                 regfree(re);
1359                 regfree(ire);
1360         }
1361         if (strlen(s) > 1) {
1362                 mk_re_node(s, n, re);
1363         } else {
1364                 n->info = (uint32_t) *s;
1365         }
1366
1367         return n;
1368 }
1369
1370 /* use node as a regular expression. Supplied with node ptr and regex_t
1371  * storage space. Return ptr to regex (if result points to preg, it should
1372  * be later regfree'd manually
1373  */
1374 static regex_t *as_regex(node *op, regex_t *preg)
1375 {
1376         var *v;
1377         char *s;
1378
1379         if ((op->info & OPCLSMASK) == OC_REGEXP) {
1380                 return icase ? op->r.ire : op->l.re;
1381         } else {
1382                 v = nvalloc(1);
1383                 s = getvar_s(evaluate(op, v));
1384                 xregcomp(preg, s, icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED);
1385                 nvfree(v);
1386                 return preg;
1387         }
1388 }
1389
1390 /* gradually increasing buffer */
1391 static void qrealloc(char **b, int n, int *size)
1392 {
1393         if (! *b || n >= *size)
1394                 *b = xrealloc(*b, *size = n + (n>>1) + 80);
1395 }
1396
1397 /* resize field storage space */
1398 static void fsrealloc(int size)
1399 {
1400         static int maxfields = 0;
1401         int i;
1402
1403         if (size >= maxfields) {
1404                 i = maxfields;
1405                 maxfields = size + 16;
1406                 Fields = (var *)xrealloc(Fields, maxfields * sizeof(var));
1407                 for (; i<maxfields; i++) {
1408                         Fields[i].type = VF_SPECIAL;
1409                         Fields[i].string = NULL;
1410                 }
1411         }
1412
1413         if (size < nfields) {
1414                 for (i=size; i<nfields; i++) {
1415                         clrvar(Fields+i);
1416                 }
1417         }
1418         nfields = size;
1419 }
1420
1421 static int awk_split(char *s, node *spl, char **slist)
1422 {
1423         int l, n=0;
1424         char c[4];
1425         char *s1;
1426         regmatch_t pmatch[2];
1427
1428         /* in worst case, each char would be a separate field */
1429         *slist = s1 = xstrndup(s, strlen(s) * 2 + 3);
1430
1431         c[0] = c[1] = (char)spl->info;
1432         c[2] = c[3] = '\0';
1433         if (*getvar_s(V[RS]) == '\0') c[2] = '\n';
1434
1435         if ((spl->info & OPCLSMASK) == OC_REGEXP) {             /* regex split */
1436                 while (*s) {
1437                         l = strcspn(s, c+2);
1438                         if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0 &&
1439                         pmatch[0].rm_so <= l) {
1440                                 l = pmatch[0].rm_so;
1441                                 if (pmatch[0].rm_eo == 0) { l++; pmatch[0].rm_eo++; }
1442                         } else {
1443                                 pmatch[0].rm_eo = l;
1444                                 if (*(s+l)) pmatch[0].rm_eo++;
1445                         }
1446
1447                         memcpy(s1, s, l);
1448                         *(s1+l) = '\0';
1449                         nextword(&s1);
1450                         s += pmatch[0].rm_eo;
1451                         n++;
1452                 }
1453         } else if (c[0] == '\0') {              /* null split */
1454                 while (*s) {
1455                         *(s1++) = *(s++);
1456                         *(s1++) = '\0';
1457                         n++;
1458                 }
1459         } else if (c[0] != ' ') {               /* single-character split */
1460                 if (icase) {
1461                         c[0] = toupper(c[0]);
1462                         c[1] = tolower(c[1]);
1463                 }
1464                 if (*s1) n++;
1465                 while ((s1 = strpbrk(s1, c))) {
1466                         *(s1++) = '\0';
1467                         n++;
1468                 }
1469         } else {                                /* space split */
1470                 while (*s) {
1471                         s = skip_whitespace(s);
1472                         if (! *s) break;
1473                         n++;
1474                         while (*s && !isspace(*s))
1475                                 *(s1++) = *(s++);
1476                         *(s1++) = '\0';
1477                 }
1478         }
1479         return n;
1480 }
1481
1482 static void split_f0(void)
1483 {
1484         static char *fstrings = NULL;
1485         int i, n;
1486         char *s;
1487
1488         if (is_f0_split)
1489                 return;
1490
1491         is_f0_split = TRUE;
1492         free(fstrings);
1493         fsrealloc(0);
1494         n = awk_split(getvar_s(V[F0]), &fsplitter.n, &fstrings);
1495         fsrealloc(n);
1496         s = fstrings;
1497         for (i=0; i<n; i++) {
1498                 Fields[i].string = nextword(&s);
1499                 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1500         }
1501
1502         /* set NF manually to avoid side effects */
1503         clrvar(V[NF]);
1504         V[NF]->type = VF_NUMBER | VF_SPECIAL;
1505         V[NF]->number = nfields;
1506 }
1507
1508 /* perform additional actions when some internal variables changed */
1509 static void handle_special(var *v)
1510 {
1511         int n;
1512         char *b, *sep, *s;
1513         int sl, l, len, i, bsize;
1514
1515         if (! (v->type & VF_SPECIAL))
1516                 return;
1517
1518         if (v == V[NF]) {
1519                 n = (int)getvar_i(v);
1520                 fsrealloc(n);
1521
1522                 /* recalculate $0 */
1523                 sep = getvar_s(V[OFS]);
1524                 sl = strlen(sep);
1525                 b = NULL;
1526                 len = 0;
1527                 for (i=0; i<n; i++) {
1528                         s = getvar_s(&Fields[i]);
1529                         l = strlen(s);
1530                         if (b) {
1531                                 memcpy(b+len, sep, sl);
1532                                 len += sl;
1533                         }
1534                         qrealloc(&b, len+l+sl, &bsize);
1535                         memcpy(b+len, s, l);
1536                         len += l;
1537                 }
1538                 if (b) b[len] = '\0';
1539                 setvar_p(V[F0], b);
1540                 is_f0_split = TRUE;
1541
1542         } else if (v == V[F0]) {
1543                 is_f0_split = FALSE;
1544
1545         } else if (v == V[FS]) {
1546                 mk_splitter(getvar_s(v), &fsplitter);
1547
1548         } else if (v == V[RS]) {
1549                 mk_splitter(getvar_s(v), &rsplitter);
1550
1551         } else if (v == V[IGNORECASE]) {
1552                 icase = istrue(v);
1553
1554         } else {                                                /* $n */
1555                 n = getvar_i(V[NF]);
1556                 setvar_i(V[NF], n > v-Fields ? n : v-Fields+1);
1557                 /* right here v is invalid. Just to note... */
1558         }
1559 }
1560
1561 /* step through func/builtin/etc arguments */
1562 static node *nextarg(node **pn)
1563 {
1564         node *n;
1565
1566         n = *pn;
1567         if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1568                 *pn = n->r.n;
1569                 n = n->l.n;
1570         } else {
1571                 *pn = NULL;
1572         }
1573         return n;
1574 }
1575
1576 static void hashwalk_init(var *v, xhash *array)
1577 {
1578         char **w;
1579         hash_item *hi;
1580         int i;
1581
1582         if (v->type & VF_WALK)
1583                 free(v->x.walker);
1584
1585         v->type |= VF_WALK;
1586         w = v->x.walker = xzalloc(2 + 2*sizeof(char *) + array->glen);
1587         *w = *(w+1) = (char *)(w + 2);
1588         for (i=0; i<array->csize; i++) {
1589                 hi = array->items[i];
1590                 while (hi) {
1591                         strcpy(*w, hi->name);
1592                         nextword(w);
1593                         hi = hi->next;
1594                 }
1595         }
1596 }
1597
1598 static int hashwalk_next(var *v)
1599 {
1600         char **w;
1601
1602         w = v->x.walker;
1603         if (*(w+1) == *w)
1604                 return FALSE;
1605
1606         setvar_s(v, nextword(w+1));
1607         return TRUE;
1608 }
1609
1610 /* evaluate node, return 1 when result is true, 0 otherwise */
1611 static int ptest(node *pattern)
1612 {
1613         static var v;
1614         return istrue(evaluate(pattern, &v));
1615 }
1616
1617 /* read next record from stream rsm into a variable v */
1618 static int awk_getline(rstream *rsm, var *v)
1619 {
1620         char *b;
1621         regmatch_t pmatch[2];
1622         int a, p, pp=0, size;
1623         int fd, so, eo, r, rp;
1624         char c, *m, *s;
1625
1626         /* we're using our own buffer since we need access to accumulating
1627          * characters
1628          */
1629         fd = fileno(rsm->F);
1630         m = rsm->buffer;
1631         a = rsm->adv;
1632         p = rsm->pos;
1633         size = rsm->size;
1634         c = (char) rsplitter.n.info;
1635         rp = 0;
1636
1637         if (! m) qrealloc(&m, 256, &size);
1638         do {
1639                 b = m + a;
1640                 so = eo = p;
1641                 r = 1;
1642                 if (p > 0) {
1643                         if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1644                                 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1645                                                                                                 b, 1, pmatch, 0) == 0) {
1646                                         so = pmatch[0].rm_so;
1647                                         eo = pmatch[0].rm_eo;
1648                                         if (b[eo] != '\0')
1649                                                 break;
1650                                 }
1651                         } else if (c != '\0') {
1652                                 s = strchr(b+pp, c);
1653                                 if (! s) s = memchr(b+pp, '\0', p - pp);
1654                                 if (s) {
1655                                         so = eo = s-b;
1656                                         eo++;
1657                                         break;
1658                                 }
1659                         } else {
1660                                 while (b[rp] == '\n')
1661                                         rp++;
1662                                 s = strstr(b+rp, "\n\n");
1663                                 if (s) {
1664                                         so = eo = s-b;
1665                                         while (b[eo] == '\n') eo++;
1666                                         if (b[eo] != '\0')
1667                                                 break;
1668                                 }
1669                         }
1670                 }
1671
1672                 if (a > 0) {
1673                         memmove(m, (const void *)(m+a), p+1);
1674                         b = m;
1675                         a = 0;
1676                 }
1677
1678                 qrealloc(&m, a+p+128, &size);
1679                 b = m + a;
1680                 pp = p;
1681                 p += safe_read(fd, b+p, size-p-1);
1682                 if (p < pp) {
1683                         p = 0;
1684                         r = 0;
1685                         setvar_i(V[ERRNO], errno);
1686                 }
1687                 b[p] = '\0';
1688
1689         } while (p > pp);
1690
1691         if (p == 0) {
1692                 r--;
1693         } else {
1694                 c = b[so]; b[so] = '\0';
1695                 setvar_s(v, b+rp);
1696                 v->type |= VF_USER;
1697                 b[so] = c;
1698                 c = b[eo]; b[eo] = '\0';
1699                 setvar_s(V[RT], b+so);
1700                 b[eo] = c;
1701         }
1702
1703         rsm->buffer = m;
1704         rsm->adv = a + eo;
1705         rsm->pos = p - eo;
1706         rsm->size = size;
1707
1708         return r;
1709 }
1710
1711 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1712 {
1713         int r=0;
1714         char c;
1715         const char *s=format;
1716
1717         if (int_as_int && n == (int)n) {
1718                 r = snprintf(b, size, "%d", (int)n);
1719         } else {
1720                 do { c = *s; } while (*s && *++s);
1721                 if (strchr("diouxX", c)) {
1722                         r = snprintf(b, size, format, (int)n);
1723                 } else if (strchr("eEfgG", c)) {
1724                         r = snprintf(b, size, format, n);
1725                 } else {
1726                         runtime_error(EMSG_INV_FMT);
1727                 }
1728         }
1729         return r;
1730 }
1731
1732
1733 /* formatted output into an allocated buffer, return ptr to buffer */
1734 static char *awk_printf(node *n)
1735 {
1736         char *b = NULL;
1737         char *fmt, *s, *s1, *f;
1738         int i, j, incr, bsize;
1739         char c, c1;
1740         var *v, *arg;
1741
1742         v = nvalloc(1);
1743         fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1744
1745         i = 0;
1746         while (*f) {
1747                 s = f;
1748                 while (*f && (*f != '%' || *(++f) == '%'))
1749                         f++;
1750                 while (*f && !isalpha(*f))
1751                         f++;
1752
1753                 incr = (f - s) + MAXVARFMT;
1754                 qrealloc(&b, incr+i, &bsize);
1755                 c = *f; if (c != '\0') f++;
1756                 c1 = *f ; *f = '\0';
1757                 arg = evaluate(nextarg(&n), v);
1758
1759                 j = i;
1760                 if (c == 'c' || !c) {
1761                         i += sprintf(b+i, s,
1762                                         is_numeric(arg) ? (char)getvar_i(arg) : *getvar_s(arg));
1763
1764                 } else if (c == 's') {
1765                         s1 = getvar_s(arg);
1766                         qrealloc(&b, incr+i+strlen(s1), &bsize);
1767                         i += sprintf(b+i, s, s1);
1768
1769                 } else {
1770                         i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1771                 }
1772                 *f = c1;
1773
1774                 /* if there was an error while sprintf, return value is negative */
1775                 if (i < j) i = j;
1776
1777         }
1778
1779         b = xrealloc(b, i+1);
1780         free(fmt);
1781         nvfree(v);
1782         b[i] = '\0';
1783         return b;
1784 }
1785
1786 /* common substitution routine
1787  * replace (nm) substring of (src) that match (n) with (repl), store
1788  * result into (dest), return number of substitutions. If nm=0, replace
1789  * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1790  * subexpression matching (\1-\9)
1791  */
1792 static int awk_sub(node *rn, char *repl, int nm, var *src, var *dest, int ex)
1793 {
1794         char *ds = NULL;
1795         char *sp, *s;
1796         int c, i, j, di, rl, so, eo, nbs, n, dssize;
1797         regmatch_t pmatch[10];
1798         regex_t sreg, *re;
1799
1800         re = as_regex(rn, &sreg);
1801         if (! src) src = V[F0];
1802         if (! dest) dest = V[F0];
1803
1804         i = di = 0;
1805         sp = getvar_s(src);
1806         rl = strlen(repl);
1807         while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0:REG_NOTBOL) == 0) {
1808                 so = pmatch[0].rm_so;
1809                 eo = pmatch[0].rm_eo;
1810
1811                 qrealloc(&ds, di + eo + rl, &dssize);
1812                 memcpy(ds + di, sp, eo);
1813                 di += eo;
1814                 if (++i >= nm) {
1815                         /* replace */
1816                         di -= (eo - so);
1817                         nbs = 0;
1818                         for (s = repl; *s; s++) {
1819                                 ds[di++] = c = *s;
1820                                 if (c == '\\') {
1821                                         nbs++;
1822                                         continue;
1823                                 }
1824                                 if (c == '&' || (ex && c >= '0' && c <= '9')) {
1825                                         di -= ((nbs + 3) >> 1);
1826                                         j = 0;
1827                                         if (c != '&') {
1828                                                 j = c - '0';
1829                                                 nbs++;
1830                                         }
1831                                         if (nbs % 2) {
1832                                                 ds[di++] = c;
1833                                         } else {
1834                                                 n = pmatch[j].rm_eo - pmatch[j].rm_so;
1835                                                 qrealloc(&ds, di + rl + n, &dssize);
1836                                                 memcpy(ds + di, sp + pmatch[j].rm_so, n);
1837                                                 di += n;
1838                                         }
1839                                 }
1840                                 nbs = 0;
1841                         }
1842                 }
1843
1844                 sp += eo;
1845                 if (i == nm) break;
1846                 if (eo == so) {
1847                         if (! (ds[di++] = *sp++)) break;
1848                 }
1849         }
1850
1851         qrealloc(&ds, di + strlen(sp), &dssize);
1852         strcpy(ds + di, sp);
1853         setvar_p(dest, ds);
1854         if (re == &sreg) regfree(re);
1855         return i;
1856 }
1857
1858 static var *exec_builtin(node *op, var *res)
1859 {
1860         int (*to_xxx)(int);
1861         var *tv;
1862         node *an[4];
1863         var  *av[4];
1864         char *as[4];
1865         regmatch_t pmatch[2];
1866         regex_t sreg, *re;
1867         static tsplitter tspl;
1868         node *spl;
1869         uint32_t isr, info;
1870         int nargs;
1871         time_t tt;
1872         char *s, *s1;
1873         int i, l, ll, n;
1874
1875         tv = nvalloc(4);
1876         isr = info = op->info;
1877         op = op->l.n;
1878
1879         av[2] = av[3] = NULL;
1880         for (i=0 ; i<4 && op ; i++) {
1881                 an[i] = nextarg(&op);
1882                 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
1883                 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
1884                 isr >>= 1;
1885         }
1886
1887         nargs = i;
1888         if (nargs < (info >> 30))
1889                 runtime_error(EMSG_TOO_FEW_ARGS);
1890
1891         switch (info & OPNMASK) {
1892
1893           case B_a2:
1894 #ifdef CONFIG_FEATURE_AWK_MATH
1895                 setvar_i(res, atan2(getvar_i(av[i]), getvar_i(av[1])));
1896 #else
1897                 runtime_error(EMSG_NO_MATH);
1898 #endif
1899                 break;
1900
1901           case B_sp:
1902                 if (nargs > 2) {
1903                         spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
1904                                 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
1905                 } else {
1906                         spl = &fsplitter.n;
1907                 }
1908
1909                 n = awk_split(as[0], spl, &s);
1910                 s1 = s;
1911                 clear_array(iamarray(av[1]));
1912                 for (i=1; i<=n; i++)
1913                         setari_u(av[1], i, nextword(&s1));
1914                 free(s);
1915                 setvar_i(res, n);
1916                 break;
1917
1918           case B_ss:
1919                 l = strlen(as[0]);
1920                 i = getvar_i(av[1]) - 1;
1921                 if (i>l) i=l; if (i<0) i=0;
1922                 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
1923                 if (n<0) n=0;
1924                 s = xmalloc(n+1);
1925                 strncpy(s, as[0]+i, n);
1926                 s[n] = '\0';
1927                 setvar_p(res, s);
1928                 break;
1929                 
1930          case B_an:
1931                 setvar_i(res, (long)getvar_i(av[0]) & (long)getvar_i(av[1]));
1932                 break;
1933                 
1934          case B_co:
1935                 setvar_i(res, ~(long)getvar_i(av[0]));
1936                 break;
1937
1938          case B_ls:
1939                 setvar_i(res, (long)getvar_i(av[0]) << (long)getvar_i(av[1]));
1940                 break;
1941
1942          case B_or:
1943                 setvar_i(res, (long)getvar_i(av[0]) | (long)getvar_i(av[1]));
1944                 break;
1945
1946          case B_rs:
1947                 setvar_i(res, (long)((unsigned long)getvar_i(av[0]) >> (unsigned long)getvar_i(av[1])));
1948                 break;
1949
1950          case B_xo:
1951                 setvar_i(res, (long)getvar_i(av[0]) ^ (long)getvar_i(av[1]));
1952                 break;
1953
1954           case B_lo:
1955                 to_xxx = tolower;
1956                 goto lo_cont;
1957
1958           case B_up:
1959                 to_xxx = toupper;
1960 lo_cont:
1961                 s1 = s = xstrdup(as[0]);
1962                 while (*s1) {
1963                         *s1 = (*to_xxx)(*s1);
1964                         s1++;
1965                 }
1966                 setvar_p(res, s);
1967                 break;
1968
1969           case B_ix:
1970                 n = 0;
1971                 ll = strlen(as[1]);
1972                 l = strlen(as[0]) - ll;
1973                 if (ll > 0 && l >= 0) {
1974                         if (! icase) {
1975                                 s = strstr(as[0], as[1]);
1976                                 if (s) n = (s - as[0]) + 1;
1977                         } else {
1978                                 /* this piece of code is terribly slow and
1979                                  * really should be rewritten
1980                                  */
1981                                 for (i=0; i<=l; i++) {
1982                                         if (strncasecmp(as[0]+i, as[1], ll) == 0) {
1983                                                 n = i+1;
1984                                                 break;
1985                                         }
1986                                 }
1987                         }
1988                 }
1989                 setvar_i(res, n);
1990                 break;
1991
1992           case B_ti:
1993                 if (nargs > 1)
1994                         tt = getvar_i(av[1]);
1995                 else
1996                         time(&tt);
1997                 s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
1998                 i = strftime(buf, MAXVARFMT, s, localtime(&tt));
1999                 buf[i] = '\0';
2000                 setvar_s(res, buf);
2001                 break;
2002
2003           case B_ma:
2004                 re = as_regex(an[1], &sreg);
2005                 n = regexec(re, as[0], 1, pmatch, 0);
2006                 if (n == 0) {
2007                         pmatch[0].rm_so++;
2008                         pmatch[0].rm_eo++;
2009                 } else {
2010                         pmatch[0].rm_so = 0;
2011                         pmatch[0].rm_eo = -1;
2012                 }
2013                 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2014                 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2015                 setvar_i(res, pmatch[0].rm_so);
2016                 if (re == &sreg) regfree(re);
2017                 break;
2018
2019           case B_ge:
2020                 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2021                 break;
2022
2023           case B_gs:
2024                 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2025                 break;
2026
2027           case B_su:
2028                 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2029                 break;
2030         }
2031
2032         nvfree(tv);
2033         return res;
2034 }
2035
2036 /*
2037  * Evaluate node - the heart of the program. Supplied with subtree
2038  * and place where to store result. returns ptr to result.
2039  */
2040 #define XC(n) ((n) >> 8)
2041
2042 static var *evaluate(node *op, var *res)
2043 {
2044         /* This procedure is recursive so we should count every byte */
2045         static var *fnargs = NULL;
2046         static unsigned int seed = 1;
2047         static regex_t sreg;
2048         node *op1;
2049         var *v1;
2050         union {
2051                 var *v;
2052                 char *s;
2053                 double d;
2054                 int i;
2055         } L, R;
2056         uint32_t opinfo;
2057         short opn;
2058         union {
2059                 char *s;
2060                 rstream *rsm;
2061                 FILE *F;
2062                 var *v;
2063                 regex_t *re;
2064                 uint32_t info;
2065         } X;
2066
2067         if (! op)
2068                 return setvar_s(res, NULL);
2069
2070         v1 = nvalloc(2);
2071
2072         while (op) {
2073
2074                 opinfo = op->info;
2075                 opn = (short)(opinfo & OPNMASK);
2076                 lineno = op->lineno;
2077
2078                 /* execute inevitable things */
2079                 op1 = op->l.n;
2080                 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2081                 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2082                 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2083                 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2084                 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2085
2086                 switch (XC(opinfo & OPCLSMASK)) {
2087
2088                   /* -- iterative node type -- */
2089
2090                   /* test pattern */
2091                   case XC( OC_TEST ):
2092                         if ((op1->info & OPCLSMASK) == OC_COMMA) {
2093                                 /* it's range pattern */
2094                                 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2095                                         op->info |= OF_CHECKED;
2096                                         if (ptest(op1->r.n))
2097                                                 op->info &= ~OF_CHECKED;
2098
2099                                         op = op->a.n;
2100                                 } else {
2101                                         op = op->r.n;
2102                                 }
2103                         } else {
2104                                 op = (ptest(op1)) ? op->a.n : op->r.n;
2105                         }
2106                         break;
2107
2108                   /* just evaluate an expression, also used as unconditional jump */
2109                   case XC( OC_EXEC ):
2110                         break;
2111
2112                   /* branch, used in if-else and various loops */
2113                   case XC( OC_BR ):
2114                         op = istrue(L.v) ? op->a.n : op->r.n;
2115                         break;
2116
2117                   /* initialize for-in loop */
2118                   case XC( OC_WALKINIT ):
2119                         hashwalk_init(L.v, iamarray(R.v));
2120                         break;
2121
2122                   /* get next array item */
2123                   case XC( OC_WALKNEXT ):
2124                         op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2125                         break;
2126
2127                   case XC( OC_PRINT ):
2128                   case XC( OC_PRINTF ):
2129                         X.F = stdout;
2130                         if (op->r.n) {
2131                                 X.rsm = newfile(R.s);
2132                                 if (! X.rsm->F) {
2133                                         if (opn == '|') {
2134                                                 if((X.rsm->F = popen(R.s, "w")) == NULL)
2135                                                         bb_perror_msg_and_die("popen");
2136                                                 X.rsm->is_pipe = 1;
2137                                         } else {
2138                                                 X.rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2139                                         }
2140                                 }
2141                                 X.F = X.rsm->F;
2142                         }
2143
2144                         if ((opinfo & OPCLSMASK) == OC_PRINT) {
2145                                 if (! op1) {
2146                                         fputs(getvar_s(V[F0]), X.F);
2147                                 } else {
2148                                         while (op1) {
2149                                                 L.v = evaluate(nextarg(&op1), v1);
2150                                                 if (L.v->type & VF_NUMBER) {
2151                                                         fmt_num(buf, MAXVARFMT, getvar_s(V[OFMT]),
2152                                                                         getvar_i(L.v), TRUE);
2153                                                         fputs(buf, X.F);
2154                                                 } else {
2155                                                         fputs(getvar_s(L.v), X.F);
2156                                                 }
2157
2158                                                 if (op1) fputs(getvar_s(V[OFS]), X.F);
2159                                         }
2160                                 }
2161                                 fputs(getvar_s(V[ORS]), X.F);
2162
2163                         } else {        /* OC_PRINTF */
2164                                 L.s = awk_printf(op1);
2165                                 fputs(L.s, X.F);
2166                                 free(L.s);
2167                         }
2168                         fflush(X.F);
2169                         break;
2170
2171                   case XC( OC_DELETE ):
2172                         X.info = op1->info & OPCLSMASK;
2173                         if (X.info == OC_VAR) {
2174                                 R.v = op1->l.v;
2175                         } else if (X.info == OC_FNARG) {
2176                                 R.v = &fnargs[op1->l.i];
2177                         } else {
2178                                 runtime_error(EMSG_NOT_ARRAY);
2179                         }
2180
2181                         if (op1->r.n) {
2182                                 clrvar(L.v);
2183                                 L.s = getvar_s(evaluate(op1->r.n, v1));
2184                                 hash_remove(iamarray(R.v), L.s);
2185                         } else {
2186                                 clear_array(iamarray(R.v));
2187                         }
2188                         break;
2189
2190                   case XC( OC_NEWSOURCE ):
2191                         programname = op->l.s;
2192                         break;
2193
2194                   case XC( OC_RETURN ):
2195                         copyvar(res, L.v);
2196                         break;
2197
2198                   case XC( OC_NEXTFILE ):
2199                         nextfile = TRUE;
2200                   case XC( OC_NEXT ):
2201                         nextrec = TRUE;
2202                   case XC( OC_DONE ):
2203                         clrvar(res);
2204                         break;
2205
2206                   case XC( OC_EXIT ):
2207                         awk_exit(L.d);
2208
2209                   /* -- recursive node type -- */
2210
2211                   case XC( OC_VAR ):
2212                         L.v = op->l.v;
2213                         if (L.v == V[NF])
2214                                 split_f0();
2215                         goto v_cont;
2216
2217                   case XC( OC_FNARG ):
2218                         L.v = &fnargs[op->l.i];
2219
2220 v_cont:
2221                         res = (op->r.n) ? findvar(iamarray(L.v), R.s) : L.v;
2222                         break;
2223
2224                   case XC( OC_IN ):
2225                         setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2226                         break;
2227
2228                   case XC( OC_REGEXP ):
2229                         op1 = op;
2230                         L.s = getvar_s(V[F0]);
2231                         goto re_cont;
2232
2233                   case XC( OC_MATCH ):
2234                         op1 = op->r.n;
2235 re_cont:
2236                         X.re = as_regex(op1, &sreg);
2237                         R.i = regexec(X.re, L.s, 0, NULL, 0);
2238                         if (X.re == &sreg) regfree(X.re);
2239                         setvar_i(res, (R.i == 0 ? 1 : 0) ^ (opn == '!' ? 1 : 0));
2240                         break;
2241
2242                   case XC( OC_MOVE ):
2243                         /* if source is a temporary string, jusk relink it to dest */
2244                         if (R.v == v1+1 && R.v->string) {
2245                                 res = setvar_p(L.v, R.v->string);
2246                                 R.v->string = NULL;
2247                         } else {
2248                                 res = copyvar(L.v, R.v);
2249                         }
2250                         break;
2251
2252                   case XC( OC_TERNARY ):
2253                         if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2254                                 runtime_error(EMSG_POSSIBLE_ERROR);
2255                         res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2256                         break;
2257
2258                   case XC( OC_FUNC ):
2259                         if (! op->r.f->body.first)
2260                                 runtime_error(EMSG_UNDEF_FUNC);
2261
2262                         X.v = R.v = nvalloc(op->r.f->nargs+1);
2263                         while (op1) {
2264                                 L.v = evaluate(nextarg(&op1), v1);
2265                                 copyvar(R.v, L.v);
2266                                 R.v->type |= VF_CHILD;
2267                                 R.v->x.parent = L.v;
2268                                 if (++R.v - X.v >= op->r.f->nargs)
2269                                         break;
2270                         }
2271
2272                         R.v = fnargs;
2273                         fnargs = X.v;
2274
2275                         L.s = programname;
2276                         res = evaluate(op->r.f->body.first, res);
2277                         programname = L.s;
2278
2279                         nvfree(fnargs);
2280                         fnargs = R.v;
2281                         break;
2282
2283                   case XC( OC_GETLINE ):
2284                   case XC( OC_PGETLINE ):
2285                         if (op1) {
2286                                 X.rsm = newfile(L.s);
2287                                 if (! X.rsm->F) {
2288                                         if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2289                                                 X.rsm->F = popen(L.s, "r");
2290                                                 X.rsm->is_pipe = TRUE;
2291                                         } else {
2292                                                 X.rsm->F = fopen(L.s, "r");             /* not xfopen! */
2293                                         }
2294                                 }
2295                         } else {
2296                                 if (! iF) iF = next_input_file();
2297                                 X.rsm = iF;
2298                         }
2299
2300                         if (! X.rsm->F) {
2301                                 setvar_i(V[ERRNO], errno);
2302                                 setvar_i(res, -1);
2303                                 break;
2304                         }
2305
2306                         if (! op->r.n)
2307                                 R.v = V[F0];
2308
2309                         L.i = awk_getline(X.rsm, R.v);
2310                         if (L.i > 0) {
2311                                 if (! op1) {
2312                                         incvar(V[FNR]);
2313                                         incvar(V[NR]);
2314                                 }
2315                         }
2316                         setvar_i(res, L.i);
2317                         break;
2318
2319                   /* simple builtins */
2320                   case XC( OC_FBLTIN ):
2321                         switch (opn) {
2322
2323                           case F_in:
2324                                 R.d = (int)L.d;
2325                                 break;
2326
2327                           case F_rn:
2328                                 R.d =  (double)rand() / (double)RAND_MAX;
2329                                 break;
2330
2331 #ifdef CONFIG_FEATURE_AWK_MATH
2332                           case F_co:
2333                                 R.d = cos(L.d);
2334                                 break;
2335
2336                           case F_ex:
2337                                 R.d = exp(L.d);
2338                                 break;
2339
2340                           case F_lg:
2341                                 R.d = log(L.d);
2342                                 break;
2343
2344                           case F_si:
2345                                 R.d = sin(L.d);
2346                                 break;
2347
2348                           case F_sq:
2349                                 R.d = sqrt(L.d);
2350                                 break;
2351 #else
2352                           case F_co:
2353                           case F_ex:
2354                           case F_lg:
2355                           case F_si:
2356                           case F_sq:
2357                                 runtime_error(EMSG_NO_MATH);
2358                                 break;
2359 #endif
2360
2361                           case F_sr:
2362                                 R.d = (double)seed;
2363                                 seed = op1 ? (unsigned int)L.d : (unsigned int)time(NULL);
2364                                 srand(seed);
2365                                 break;
2366
2367                           case F_ti:
2368                                 R.d = time(NULL);
2369                                 break;
2370
2371                           case F_le:
2372                                 if (! op1)
2373                                         L.s = getvar_s(V[F0]);
2374                                 R.d = strlen(L.s);
2375                                 break;
2376
2377                           case F_sy:
2378                                 fflush(NULL);
2379                                 R.d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2380                                                 ? (system(L.s) >> 8) : 0;
2381                                 break;
2382
2383                           case F_ff:
2384                                 if (! op1)
2385                                         fflush(stdout);
2386                                 else {
2387                                         if (L.s && *L.s) {
2388                                                 X.rsm = newfile(L.s);
2389                                                 fflush(X.rsm->F);
2390                                         } else {
2391                                                 fflush(NULL);
2392                                         }
2393                                 }
2394                                 break;
2395
2396                           case F_cl:
2397                                 X.rsm = (rstream *)hash_search(fdhash, L.s);
2398                                 if (X.rsm) {
2399                                         R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
2400                                         free(X.rsm->buffer);
2401                                         hash_remove(fdhash, L.s);
2402                                 }
2403                                 if (R.i != 0)
2404                                         setvar_i(V[ERRNO], errno);
2405                                 R.d = (double)R.i;
2406                                 break;
2407                         }
2408                         setvar_i(res, R.d);
2409                         break;
2410
2411                   case XC( OC_BUILTIN ):
2412                         res = exec_builtin(op, res);
2413                         break;
2414
2415                   case XC( OC_SPRINTF ):
2416                         setvar_p(res, awk_printf(op1));
2417                         break;
2418
2419                   case XC( OC_UNARY ):
2420                         X.v = R.v;
2421                         L.d = R.d = getvar_i(R.v);
2422                         switch (opn) {
2423                           case 'P':
2424                                 L.d = ++R.d;
2425                                 goto r_op_change;
2426                           case 'p':
2427                                 R.d++;
2428                                 goto r_op_change;
2429                           case 'M':
2430                                 L.d = --R.d;
2431                                 goto r_op_change;
2432                           case 'm':
2433                                 R.d--;
2434                                 goto r_op_change;
2435                           case '!':
2436                                 L.d = istrue(X.v) ? 0 : 1;
2437                                 break;
2438                           case '-':
2439                                 L.d = -R.d;
2440                                 break;
2441                         r_op_change:
2442                                 setvar_i(X.v, R.d);
2443                         }
2444                         setvar_i(res, L.d);
2445                         break;
2446
2447                   case XC( OC_FIELD ):
2448                         R.i = (int)getvar_i(R.v);
2449                         if (R.i == 0) {
2450                                 res = V[F0];
2451                         } else {
2452                                 split_f0();
2453                                 if (R.i > nfields)
2454                                         fsrealloc(R.i);
2455
2456                                 res = &Fields[R.i-1];
2457                         }
2458                         break;
2459
2460                   /* concatenation (" ") and index joining (",") */
2461                   case XC( OC_CONCAT ):
2462                   case XC( OC_COMMA ):
2463                         opn = strlen(L.s) + strlen(R.s) + 2;
2464                         X.s = xmalloc(opn);
2465                         strcpy(X.s, L.s);
2466                         if ((opinfo & OPCLSMASK) == OC_COMMA) {
2467                                 L.s = getvar_s(V[SUBSEP]);
2468                                 X.s = (char *)xrealloc(X.s, opn + strlen(L.s));
2469                                 strcat(X.s, L.s);
2470                         }
2471                         strcat(X.s, R.s);
2472                         setvar_p(res, X.s);
2473                         break;
2474
2475                   case XC( OC_LAND ):
2476                         setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2477                         break;
2478
2479                   case XC( OC_LOR ):
2480                         setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2481                         break;
2482
2483                   case XC( OC_BINARY ):
2484                   case XC( OC_REPLACE ):
2485                         R.d = getvar_i(R.v);
2486                         switch (opn) {
2487                           case '+':
2488                                 L.d += R.d;
2489                                 break;
2490                           case '-':
2491                                 L.d -= R.d;
2492                                 break;
2493                           case '*':
2494                                 L.d *= R.d;
2495                                 break;
2496                           case '/':
2497                                 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2498                                 L.d /= R.d;
2499                                 break;
2500                           case '&':
2501 #ifdef CONFIG_FEATURE_AWK_MATH
2502                                 L.d = pow(L.d, R.d);
2503 #else
2504                                 runtime_error(EMSG_NO_MATH);
2505 #endif
2506                                 break;
2507                           case '%':
2508                                 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2509                                 L.d -= (int)(L.d / R.d) * R.d;
2510                                 break;
2511                         }
2512                         res = setvar_i(((opinfo&OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2513                         break;
2514
2515                   case XC( OC_COMPARE ):
2516                         if (is_numeric(L.v) && is_numeric(R.v)) {
2517                                 L.d = getvar_i(L.v) - getvar_i(R.v);
2518                         } else {
2519                                 L.s = getvar_s(L.v);
2520                                 R.s = getvar_s(R.v);
2521                                 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2522                         }
2523                         switch (opn & 0xfe) {
2524                           case 0:
2525                                 R.i = (L.d > 0);
2526                                 break;
2527                           case 2:
2528                                 R.i = (L.d >= 0);
2529                                 break;
2530                           case 4:
2531                                 R.i = (L.d == 0);
2532                                 break;
2533                         }
2534                         setvar_i(res, (opn & 0x1 ? R.i : !R.i) ? 1 : 0);
2535                         break;
2536
2537                   default:
2538                         runtime_error(EMSG_POSSIBLE_ERROR);
2539                 }
2540                 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2541                         op = op->a.n;
2542                 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2543                         break;
2544                 if (nextrec)
2545                         break;
2546         }
2547         nvfree(v1);
2548         return res;
2549 }
2550
2551
2552 /* -------- main & co. -------- */
2553
2554 static int awk_exit(int r)
2555 {
2556         unsigned int i;
2557         hash_item *hi;
2558         static var tv;
2559
2560         if (! exiting) {
2561                 exiting = TRUE;
2562                 nextrec = FALSE;
2563                 evaluate(endseq.first, &tv);
2564         }
2565
2566         /* waiting for children */
2567         for (i=0; i<fdhash->csize; i++) {
2568                 hi = fdhash->items[i];
2569                 while (hi) {
2570                         if (hi->data.rs.F && hi->data.rs.is_pipe)
2571                                 pclose(hi->data.rs.F);
2572                         hi = hi->next;
2573                 }
2574         }
2575
2576         exit(r);
2577 }
2578
2579 /* if expr looks like "var=value", perform assignment and return 1,
2580  * otherwise return 0 */
2581 static int is_assignment(const char *expr)
2582 {
2583         char *exprc, *s, *s0, *s1;
2584
2585         exprc = xstrdup(expr);
2586         if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2587                 free(exprc);
2588                 return FALSE;
2589         }
2590
2591         *(s++) = '\0';
2592         s0 = s1 = s;
2593         while (*s)
2594                 *(s1++) = nextchar(&s);
2595
2596         *s1 = '\0';
2597         setvar_u(newvar(exprc), s0);
2598         free(exprc);
2599         return TRUE;
2600 }
2601
2602 /* switch to next input file */
2603 static rstream *next_input_file(void)
2604 {
2605         static rstream rsm;
2606         FILE *F = NULL;
2607         char *fname, *ind;
2608         static int files_happen = FALSE;
2609
2610         if (rsm.F) fclose(rsm.F);
2611         rsm.F = NULL;
2612         rsm.pos = rsm.adv = 0;
2613
2614         do {
2615                 if (getvar_i(V[ARGIND])+1 >= getvar_i(V[ARGC])) {
2616                         if (files_happen)
2617                                 return NULL;
2618                         fname = "-";
2619                         F = stdin;
2620                 } else {
2621                         ind = getvar_s(incvar(V[ARGIND]));
2622                         fname = getvar_s(findvar(iamarray(V[ARGV]), ind));
2623                         if (fname && *fname && !is_assignment(fname))
2624                                 F = afopen(fname, "r");
2625                 }
2626         } while (!F);
2627
2628         files_happen = TRUE;
2629         setvar_s(V[FILENAME], fname);
2630         rsm.F = F;
2631         return &rsm;
2632 }
2633
2634 int awk_main(int argc, char **argv)
2635 {
2636         unsigned opt;
2637         char *opt_F, *opt_v, *opt_W;
2638         char *s, *s1;
2639         int i, j, c, flen;
2640         var *v;
2641         static var tv;
2642         char **envp;
2643         static int from_file = FALSE;
2644         rstream *rsm;
2645         FILE *F, *stdfiles[3];
2646         static char * stdnames = "/dev/stdin\0/dev/stdout\0/dev/stderr";
2647
2648         /* allocate global buffer */
2649         buf = xmalloc(MAXVARFMT+1);
2650
2651         vhash = hash_init();
2652         ahash = hash_init();
2653         fdhash = hash_init();
2654         fnhash = hash_init();
2655
2656         /* initialize variables */
2657         for (i=0;  *vNames;  i++) {
2658                 V[i] = v = newvar(nextword(&vNames));
2659                 if (*vValues != '\377')
2660                         setvar_s(v, nextword(&vValues));
2661                 else
2662                         setvar_i(v, 0);
2663
2664                 if (*vNames == '*') {
2665                         v->type |= VF_SPECIAL;
2666                         vNames++;
2667                 }
2668         }
2669
2670         handle_special(V[FS]);
2671         handle_special(V[RS]);
2672
2673         stdfiles[0] = stdin;
2674         stdfiles[1] = stdout;
2675         stdfiles[2] = stderr;
2676         for (i=0; i<3; i++) {
2677                 rsm = newfile(nextword(&stdnames));
2678                 rsm->F = stdfiles[i];
2679         }
2680
2681         for (envp=environ; *envp; envp++) {
2682                 s = xstrdup(*envp);
2683                 s1 = strchr(s, '=');
2684                 if (!s1) {
2685                         goto keep_going;
2686                 }
2687                 *(s1++) = '\0';
2688                 setvar_u(findvar(iamarray(V[ENVIRON]), s), s1);
2689 keep_going:
2690                 free(s);
2691         }
2692
2693         opt = getopt32(argc, argv, "F:v:f:W:", &opt_F, &opt_v, &programname, &opt_W);
2694         if (opt & 0x1) setvar_s(V[FS], opt_F); // -F
2695         if (opt & 0x2) if (!is_assignment(opt_v)) bb_show_usage(); // -v
2696         if (opt & 0x4) { // -f
2697                 from_file = TRUE;
2698                 F = afopen(programname, "r");
2699                 s = NULL;
2700                 /* one byte is reserved for some trick in next_token */
2701                 if (fseek(F, 0, SEEK_END) == 0) {
2702                         flen = ftell(F);
2703                         s = xmalloc(flen+4);
2704                         fseek(F, 0, SEEK_SET);
2705                         i = 1 + fread(s+1, 1, flen, F);
2706                 } else {
2707                         for (i=j=1; j>0; i+=j) {
2708                                 s = (char *)xrealloc(s, i+4096);
2709                                 j = fread(s+i, 1, 4094, F);
2710                         }
2711                 }
2712                 s[i] = '\0';
2713                 fclose(F);
2714                 parse_program(s+1);
2715                 free(s);
2716         }
2717         if (opt & 0x8) // -W
2718                 bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
2719
2720         if (!from_file) {
2721                 if (argc == optind)
2722                         bb_show_usage();
2723                 programname = "cmd. line";
2724                 parse_program(argv[optind++]);
2725
2726         }
2727
2728         /* fill in ARGV array */
2729         setvar_i(V[ARGC], argc - optind + 1);
2730         setari_u(V[ARGV], 0, "awk");
2731         for (i = optind; i < argc; i++)
2732                 setari_u(V[ARGV], i+1-optind, argv[i]);
2733
2734         evaluate(beginseq.first, &tv);
2735         if (! mainseq.first && ! endseq.first)
2736                 awk_exit(EXIT_SUCCESS);
2737
2738         /* input file could already be opened in BEGIN block */
2739         if (! iF) iF = next_input_file();
2740
2741         /* passing through input files */
2742         while (iF) {
2743
2744                 nextfile = FALSE;
2745                 setvar_i(V[FNR], 0);
2746
2747                 while ((c = awk_getline(iF, V[F0])) > 0) {
2748
2749                         nextrec = FALSE;
2750                         incvar(V[NR]);
2751                         incvar(V[FNR]);
2752                         evaluate(mainseq.first, &tv);
2753
2754                         if (nextfile)
2755                                 break;
2756                 }
2757
2758                 if (c < 0)
2759                         runtime_error(strerror(errno));
2760
2761                 iF = next_input_file();
2762
2763         }
2764
2765         awk_exit(EXIT_SUCCESS);
2766
2767         return 0;
2768 }