awk: style cleanup. A lot of rw data moved to ro
[oweals/busybox.git] / editors / awk.c
1 /* vi: set sw=4 ts=4: */
2 /*
3  * awk implementation for busybox
4  *
5  * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
6  *
7  * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
8  */
9
10 #include "busybox.h"
11 #include "xregex.h"
12 #include <math.h>
13
14
15 #define MAXVARFMT       240
16 #define MINNVBLOCK      64
17
18 /* variable flags */
19 #define VF_NUMBER       0x0001  /* 1 = primary type is number */
20 #define VF_ARRAY        0x0002  /* 1 = it's an array */
21
22 #define VF_CACHED       0x0100  /* 1 = num/str value has cached str/num eq */
23 #define VF_USER         0x0200  /* 1 = user input (may be numeric string) */
24 #define VF_SPECIAL      0x0400  /* 1 = requires extra handling when changed */
25 #define VF_WALK         0x0800  /* 1 = variable has alloc'd x.walker list */
26 #define VF_FSTR         0x1000  /* 1 = string points to fstring buffer */
27 #define VF_CHILD        0x2000  /* 1 = function arg; x.parent points to source */
28 #define VF_DIRTY        0x4000  /* 1 = variable was set explicitly */
29
30 /* these flags are static, don't change them when value is changed */
31 #define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
32
33 /* Variable */
34 typedef struct var_s {
35         unsigned short type;            /* flags */
36         double number;
37         char *string;
38         union {
39                 int aidx;                               /* func arg idx (for compilation stage) */
40                 struct xhash_s *array;  /* array ptr */
41                 struct var_s *parent;   /* for func args, ptr to actual parameter */
42                 char **walker;                  /* list of array elements (for..in) */
43         } x;
44 } var;
45
46 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
47 typedef struct chain_s {
48         struct node_s *first;
49         struct node_s *last;
50         char *programname;
51 } chain;
52
53 /* Function */
54 typedef struct func_s {
55         unsigned short nargs;
56         struct chain_s body;
57 } func;
58
59 /* I/O stream */
60 typedef struct rstream_s {
61         FILE *F;
62         char *buffer;
63         int adv;
64         int size;
65         int pos;
66         unsigned short is_pipe;
67 } rstream;
68
69 typedef struct hash_item_s {
70         union {
71                 struct var_s v;                 /* variable/array hash */
72                 struct rstream_s rs;    /* redirect streams hash */
73                 struct func_s f;                /* functions hash */
74         } data;
75         struct hash_item_s *next;       /* next in chain */
76         char name[1];                           /* really it's longer */
77 } hash_item;
78
79 typedef struct xhash_s {
80         unsigned nel;                                   /* num of elements */
81         unsigned csize;                                 /* current hash size */
82         unsigned nprime;                                /* next hash size in PRIMES[] */
83         unsigned glen;                                  /* summary length of item names */
84         struct hash_item_s **items;
85 } xhash;
86
87 /* Tree node */
88 typedef struct node_s {
89         uint32_t info;
90         unsigned short lineno;
91         union {
92                 struct node_s *n;
93                 var *v;
94                 int i;
95                 char *s;
96                 regex_t *re;
97         } l;
98         union {
99                 struct node_s *n;
100                 regex_t *ire;
101                 func *f;
102                 int argno;
103         } r;
104         union {
105                 struct node_s *n;
106         } a;
107 } node;
108
109 /* Block of temporary variables */
110 typedef struct nvblock_s {
111         int size;
112         var *pos;
113         struct nvblock_s *prev;
114         struct nvblock_s *next;
115         var nv[0];
116 } nvblock;
117
118 typedef struct tsplitter_s {
119         node n;
120         regex_t re[2];
121 } tsplitter;
122
123 /* simple token classes */
124 /* Order and hex values are very important!!!  See next_token() */
125 #define TC_SEQSTART      1                              /* ( */
126 #define TC_SEQTERM      (1 << 1)                /* ) */
127 #define TC_REGEXP       (1 << 2)                /* /.../ */
128 #define TC_OUTRDR       (1 << 3)                /* | > >> */
129 #define TC_UOPPOST      (1 << 4)                /* unary postfix operator */
130 #define TC_UOPPRE1      (1 << 5)                /* unary prefix operator */
131 #define TC_BINOPX       (1 << 6)                /* two-opnd operator */
132 #define TC_IN           (1 << 7)
133 #define TC_COMMA        (1 << 8)
134 #define TC_PIPE         (1 << 9)                /* input redirection pipe */
135 #define TC_UOPPRE2      (1 << 10)               /* unary prefix operator */
136 #define TC_ARRTERM      (1 << 11)               /* ] */
137 #define TC_GRPSTART     (1 << 12)               /* { */
138 #define TC_GRPTERM      (1 << 13)               /* } */
139 #define TC_SEMICOL      (1 << 14)
140 #define TC_NEWLINE      (1 << 15)
141 #define TC_STATX        (1 << 16)               /* ctl statement (for, next...) */
142 #define TC_WHILE        (1 << 17)
143 #define TC_ELSE         (1 << 18)
144 #define TC_BUILTIN      (1 << 19)
145 #define TC_GETLINE      (1 << 20)
146 #define TC_FUNCDECL     (1 << 21)               /* `function' `func' */
147 #define TC_BEGIN        (1 << 22)
148 #define TC_END          (1 << 23)
149 #define TC_EOF          (1 << 24)
150 #define TC_VARIABLE     (1 << 25)
151 #define TC_ARRAY        (1 << 26)
152 #define TC_FUNCTION     (1 << 27)
153 #define TC_STRING       (1 << 28)
154 #define TC_NUMBER       (1 << 29)
155
156 #define TC_UOPPRE       (TC_UOPPRE1 | TC_UOPPRE2)
157
158 /* combined token classes */
159 #define TC_BINOP        (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
160 #define TC_UNARYOP      (TC_UOPPRE | TC_UOPPOST)
161 #define TC_OPERAND      (TC_VARIABLE | TC_ARRAY | TC_FUNCTION | \
162         TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
163
164 #define TC_STATEMNT     (TC_STATX | TC_WHILE)
165 #define TC_OPTERM       (TC_SEMICOL | TC_NEWLINE)
166
167 /* word tokens, cannot mean something else if not expected */
168 #define TC_WORD         (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN | \
169         TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
170
171 /* discard newlines after these */
172 #define TC_NOTERM       (TC_COMMA | TC_GRPSTART | TC_GRPTERM | \
173         TC_BINOP | TC_OPTERM)
174
175 /* what can expression begin with */
176 #define TC_OPSEQ        (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
177 /* what can group begin with */
178 #define TC_GRPSEQ       (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
179
180 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
181 /* operator is inserted between them */
182 #define TC_CONCAT1      (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM | \
183         TC_STRING | TC_NUMBER | TC_UOPPOST)
184 #define TC_CONCAT2      (TC_OPERAND | TC_UOPPRE)
185
186 #define OF_RES1         0x010000
187 #define OF_RES2         0x020000
188 #define OF_STR1         0x040000
189 #define OF_STR2         0x080000
190 #define OF_NUM1         0x100000
191 #define OF_CHECKED      0x200000
192
193 /* combined operator flags */
194 #define xx      0
195 #define xV      OF_RES2
196 #define xS      (OF_RES2 | OF_STR2)
197 #define Vx      OF_RES1
198 #define VV      (OF_RES1 | OF_RES2)
199 #define Nx      (OF_RES1 | OF_NUM1)
200 #define NV      (OF_RES1 | OF_NUM1 | OF_RES2)
201 #define Sx      (OF_RES1 | OF_STR1)
202 #define SV      (OF_RES1 | OF_STR1 | OF_RES2)
203 #define SS      (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
204
205 #define OPCLSMASK       0xFF00
206 #define OPNMASK         0x007F
207
208 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
209  * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
210  * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
211  */
212 #define P(x)    (x << 24)
213 #define PRIMASK         0x7F000000
214 #define PRIMASK2        0x7E000000
215
216 /* Operation classes */
217
218 #define SHIFT_TIL_THIS  0x0600
219 #define RECUR_FROM_THIS 0x1000
220
221 enum {
222         OC_DELETE=0x0100,       OC_EXEC=0x0200,         OC_NEWSOURCE=0x0300,
223         OC_PRINT=0x0400,        OC_PRINTF=0x0500,       OC_WALKINIT=0x0600,
224
225         OC_BR=0x0700,           OC_BREAK=0x0800,        OC_CONTINUE=0x0900,
226         OC_EXIT=0x0a00,         OC_NEXT=0x0b00,         OC_NEXTFILE=0x0c00,
227         OC_TEST=0x0d00,         OC_WALKNEXT=0x0e00,
228
229         OC_BINARY=0x1000,       OC_BUILTIN=0x1100,      OC_COLON=0x1200,
230         OC_COMMA=0x1300,        OC_COMPARE=0x1400,      OC_CONCAT=0x1500,
231         OC_FBLTIN=0x1600,       OC_FIELD=0x1700,        OC_FNARG=0x1800,
232         OC_FUNC=0x1900,         OC_GETLINE=0x1a00,      OC_IN=0x1b00,
233         OC_LAND=0x1c00,         OC_LOR=0x1d00,          OC_MATCH=0x1e00,
234         OC_MOVE=0x1f00,         OC_PGETLINE=0x2000,     OC_REGEXP=0x2100,
235         OC_REPLACE=0x2200,      OC_RETURN=0x2300,       OC_SPRINTF=0x2400,
236         OC_TERNARY=0x2500,      OC_UNARY=0x2600,        OC_VAR=0x2700,
237         OC_DONE=0x2800,
238
239         ST_IF=0x3000,           ST_DO=0x3100,           ST_FOR=0x3200,
240         ST_WHILE=0x3300
241 };
242
243 /* simple builtins */
244 enum {
245         F_in=0, F_rn,   F_co,   F_ex,   F_lg,   F_si,   F_sq,   F_sr,
246         F_ti,   F_le,   F_sy,   F_ff,   F_cl
247 };
248
249 /* builtins */
250 enum {
251         B_a2=0, B_ix,   B_ma,   B_sp,   B_ss,   B_ti,   B_lo,   B_up,
252         B_ge,   B_gs,   B_su,
253         B_an,   B_co,   B_ls,   B_or,   B_rs,   B_xo,
254 };
255
256 /* tokens and their corresponding info values */
257
258 #define NTC             "\377"          /* switch to next token class (tc<<1) */
259 #define NTCC    '\377'
260
261 #define OC_B    OC_BUILTIN
262
263 static const char tokenlist[] =
264         "\1("       NTC
265         "\1)"       NTC
266         "\1/"       NTC                                 /* REGEXP */
267         "\2>>"      "\1>"       "\1|"       NTC         /* OUTRDR */
268         "\2++"      "\2--"      NTC                     /* UOPPOST */
269         "\2++"      "\2--"      "\1$"       NTC         /* UOPPRE1 */
270         "\2=="      "\1="       "\2+="      "\2-="      /* BINOPX */
271         "\2*="      "\2/="      "\2%="      "\2^="
272         "\1+"       "\1-"       "\3**="     "\2**"
273         "\1/"       "\1%"       "\1^"       "\1*"
274         "\2!="      "\2>="      "\2<="      "\1>"
275         "\1<"       "\2!~"      "\1~"       "\2&&"
276         "\2||"      "\1?"       "\1:"       NTC
277         "\2in"      NTC
278         "\1,"       NTC
279         "\1|"       NTC
280         "\1+"       "\1-"       "\1!"       NTC         /* UOPPRE2 */
281         "\1]"       NTC
282         "\1{"       NTC
283         "\1}"       NTC
284         "\1;"       NTC
285         "\1\n"      NTC
286         "\2if"      "\2do"      "\3for"     "\5break"   /* STATX */
287         "\10continue"           "\6delete"  "\5print"
288         "\6printf"  "\4next"    "\10nextfile"
289         "\6return"  "\4exit"    NTC
290         "\5while"   NTC
291         "\4else"    NTC
292
293         "\3and"     "\5compl"   "\6lshift"  "\2or"
294         "\6rshift"  "\3xor"
295         "\5close"   "\6system"  "\6fflush"  "\5atan2"   /* BUILTIN */
296         "\3cos"     "\3exp"     "\3int"     "\3log"
297         "\4rand"    "\3sin"     "\4sqrt"    "\5srand"
298         "\6gensub"  "\4gsub"    "\5index"   "\6length"
299         "\5match"   "\5split"   "\7sprintf" "\3sub"
300         "\6substr"  "\7systime" "\10strftime"
301         "\7tolower" "\7toupper" NTC
302         "\7getline" NTC
303         "\4func"    "\10function"   NTC
304         "\5BEGIN"   NTC
305         "\3END"     "\0"
306         ;
307
308 static const uint32_t tokeninfo[] = {
309         0,
310         0,
311         OC_REGEXP,
312         xS|'a',     xS|'w',     xS|'|',
313         OC_UNARY|xV|P(9)|'p',       OC_UNARY|xV|P(9)|'m',
314         OC_UNARY|xV|P(9)|'P',       OC_UNARY|xV|P(9)|'M',
315             OC_FIELD|xV|P(5),
316         OC_COMPARE|VV|P(39)|5,      OC_MOVE|VV|P(74),
317             OC_REPLACE|NV|P(74)|'+',    OC_REPLACE|NV|P(74)|'-',
318         OC_REPLACE|NV|P(74)|'*',    OC_REPLACE|NV|P(74)|'/',
319             OC_REPLACE|NV|P(74)|'%',    OC_REPLACE|NV|P(74)|'&',
320         OC_BINARY|NV|P(29)|'+',     OC_BINARY|NV|P(29)|'-',
321             OC_REPLACE|NV|P(74)|'&',    OC_BINARY|NV|P(15)|'&',
322         OC_BINARY|NV|P(25)|'/',     OC_BINARY|NV|P(25)|'%',
323             OC_BINARY|NV|P(15)|'&',     OC_BINARY|NV|P(25)|'*',
324         OC_COMPARE|VV|P(39)|4,      OC_COMPARE|VV|P(39)|3,
325             OC_COMPARE|VV|P(39)|0,      OC_COMPARE|VV|P(39)|1,
326         OC_COMPARE|VV|P(39)|2,      OC_MATCH|Sx|P(45)|'!',
327             OC_MATCH|Sx|P(45)|'~',      OC_LAND|Vx|P(55),
328         OC_LOR|Vx|P(59),            OC_TERNARY|Vx|P(64)|'?',
329             OC_COLON|xx|P(67)|':',
330         OC_IN|SV|P(49),
331         OC_COMMA|SS|P(80),
332         OC_PGETLINE|SV|P(37),
333         OC_UNARY|xV|P(19)|'+',      OC_UNARY|xV|P(19)|'-',
334             OC_UNARY|xV|P(19)|'!',
335         0,
336         0,
337         0,
338         0,
339         0,
340         ST_IF,          ST_DO,          ST_FOR,         OC_BREAK,
341         OC_CONTINUE,                    OC_DELETE|Vx,   OC_PRINT,
342         OC_PRINTF,      OC_NEXT,        OC_NEXTFILE,
343         OC_RETURN|Vx,   OC_EXIT|Nx,
344         ST_WHILE,
345         0,
346
347         OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
348         OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
349         OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
350         OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
351         OC_FBLTIN|F_rn,    OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
352         OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
353         OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF,        OC_B|B_su|P(0xb6),
354         OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti,    OC_B|B_ti|P(0x0b),
355         OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
356         OC_GETLINE|SV|P(0),
357         0,      0,
358         0,
359         0
360 };
361
362 /* internal variable names and their initial values       */
363 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
364 enum {
365         CONVFMT=0,  OFMT,       FS,         OFS,
366         ORS,        RS,         RT,         FILENAME,
367         SUBSEP,     ARGIND,     ARGC,       ARGV,
368         ERRNO,      FNR,
369         NR,         NF,         IGNORECASE,
370         ENVIRON,    F0,         _intvarcount_
371 };
372
373 static const char vNames[] =
374         "CONVFMT\0" "OFMT\0"    "FS\0*"     "OFS\0"
375         "ORS\0"     "RS\0*"     "RT\0"      "FILENAME\0"
376         "SUBSEP\0"  "ARGIND\0"  "ARGC\0"    "ARGV\0"
377         "ERRNO\0"   "FNR\0"
378         "NR\0"      "NF\0*"     "IGNORECASE\0*"
379         "ENVIRON\0" "$\0*"      "\0";
380
381 static const char vValues[] =
382         "%.6g\0"    "%.6g\0"    " \0"       " \0"
383         "\n\0"      "\n\0"      "\0"        "\0"
384         "\034\0"
385         "\377";
386
387 /* hash size may grow to these values */
388 #define FIRST_PRIME 61;
389 static const unsigned PRIMES[] = { 251, 1021, 4093, 16381, 65521 };
390 enum { NPRIMES = sizeof(PRIMES) / sizeof(unsigned) };
391
392 /* globals */
393
394 extern char **environ;
395
396 static var * V[_intvarcount_];
397 static chain beginseq, mainseq, endseq, *seq;
398 static int nextrec, nextfile;
399 static node *break_ptr, *continue_ptr;
400 static rstream *iF;
401 static xhash *vhash, *ahash, *fdhash, *fnhash;
402 static char *programname;
403 static short lineno;
404 static int is_f0_split;
405 static int nfields;
406 static var *Fields;
407 static tsplitter fsplitter, rsplitter;
408 static nvblock *cb;
409 static char *pos;
410 static char *buf;
411 static int icase;
412 static int exiting;
413
414 static struct {
415         uint32_t tclass;
416         uint32_t info;
417         char *string;
418         double number;
419         short lineno;
420         int rollback;
421 } t;
422
423 /* function prototypes */
424 static void handle_special(var *);
425 static node *parse_expr(uint32_t);
426 static void chain_group(void);
427 static var *evaluate(node *, var *);
428 static rstream *next_input_file(void);
429 static int fmt_num(char *, int, const char *, double, int);
430 static int awk_exit(int) ATTRIBUTE_NORETURN;
431
432 /* ---- error handling ---- */
433
434 static const char EMSG_INTERNAL_ERROR[] = "Internal error";
435 static const char EMSG_UNEXP_EOS[] = "Unexpected end of string";
436 static const char EMSG_UNEXP_TOKEN[] = "Unexpected token";
437 static const char EMSG_DIV_BY_ZERO[] = "Division by zero";
438 static const char EMSG_INV_FMT[] = "Invalid format specifier";
439 static const char EMSG_TOO_FEW_ARGS[] = "Too few arguments for builtin";
440 static const char EMSG_NOT_ARRAY[] = "Not an array";
441 static const char EMSG_POSSIBLE_ERROR[] = "Possible syntax error";
442 static const char EMSG_UNDEF_FUNC[] = "Call to undefined function";
443 #if !ENABLE_FEATURE_AWK_MATH
444 static const char EMSG_NO_MATH[] = "Math support is not compiled in";
445 #endif
446
447 static void zero_out_var(var * vp)
448 {
449         memset(vp, 0, sizeof(*vp));
450 }
451
452 static void syntax_error(const char * const message) ATTRIBUTE_NORETURN;
453 static void syntax_error(const char * const message)
454 {
455         bb_error_msg_and_die("%s:%i: %s", programname, lineno, message);
456 }
457
458 #define runtime_error(x) syntax_error(x)
459
460
461 /* ---- hash stuff ---- */
462
463 static unsigned hashidx(const char *name)
464 {
465         unsigned idx = 0;
466
467         while (*name) idx = *name++ + (idx << 6) - idx;
468         return idx;
469 }
470
471 /* create new hash */
472 static xhash *hash_init(void)
473 {
474         xhash *newhash;
475
476         newhash = xzalloc(sizeof(xhash));
477         newhash->csize = FIRST_PRIME;
478         newhash->items = xzalloc(newhash->csize * sizeof(hash_item *));
479
480         return newhash;
481 }
482
483 /* find item in hash, return ptr to data, NULL if not found */
484 static void *hash_search(xhash *hash, const char *name)
485 {
486         hash_item *hi;
487
488         hi = hash->items [ hashidx(name) % hash->csize ];
489         while (hi) {
490                 if (strcmp(hi->name, name) == 0)
491                         return &(hi->data);
492                 hi = hi->next;
493         }
494         return NULL;
495 }
496
497 /* grow hash if it becomes too big */
498 static void hash_rebuild(xhash *hash)
499 {
500         unsigned newsize, i, idx;
501         hash_item **newitems, *hi, *thi;
502
503         if (hash->nprime == NPRIMES)
504                 return;
505
506         newsize = PRIMES[hash->nprime++];
507         newitems = xzalloc(newsize * sizeof(hash_item *));
508
509         for (i=0; i<hash->csize; i++) {
510                 hi = hash->items[i];
511                 while (hi) {
512                         thi = hi;
513                         hi = thi->next;
514                         idx = hashidx(thi->name) % newsize;
515                         thi->next = newitems[idx];
516                         newitems[idx] = thi;
517                 }
518         }
519
520         free(hash->items);
521         hash->csize = newsize;
522         hash->items = newitems;
523 }
524
525 /* find item in hash, add it if necessary. Return ptr to data */
526 static void *hash_find(xhash *hash, const char *name)
527 {
528         hash_item *hi;
529         unsigned idx;
530         int l;
531
532         hi = hash_search(hash, name);
533         if (! hi) {
534                 if (++hash->nel / hash->csize > 10)
535                         hash_rebuild(hash);
536
537                 l = strlen(name) + 1;
538                 hi = xzalloc(sizeof(hash_item) + l);
539                 memcpy(hi->name, name, l);
540
541                 idx = hashidx(name) % hash->csize;
542                 hi->next = hash->items[idx];
543                 hash->items[idx] = hi;
544                 hash->glen += l;
545         }
546         return &(hi->data);
547 }
548
549 #define findvar(hash, name) ((var*)    hash_find((hash) , (name)))
550 #define newvar(name)        ((var*)    hash_find(vhash , (name)))
551 #define newfile(name)       ((rstream*)hash_find(fdhash ,(name)))
552 #define newfunc(name)       ((func*)   hash_find(fnhash , (name)))
553
554 static void hash_remove(xhash *hash, const char *name)
555 {
556         hash_item *hi, **phi;
557
558         phi = &(hash->items[ hashidx(name) % hash->csize ]);
559         while (*phi) {
560                 hi = *phi;
561                 if (strcmp(hi->name, name) == 0) {
562                         hash->glen -= (strlen(name) + 1);
563                         hash->nel--;
564                         *phi = hi->next;
565                         free(hi);
566                         break;
567                 }
568                 phi = &(hi->next);
569         }
570 }
571
572 /* ------ some useful functions ------ */
573
574 static void skip_spaces(char **s)
575 {
576         char *p = *s;
577
578         while (*p == ' ' || *p == '\t' ||
579                         (*p == '\\' && *(p+1) == '\n' && (++p, ++t.lineno))) {
580                 p++;
581         }
582         *s = p;
583 }
584
585 static char *nextword(char **s)
586 {
587         char *p = *s;
588
589         while (*(*s)++) /* */;
590
591         return p;
592 }
593
594 static char nextchar(char **s)
595 {
596         char c, *pps;
597
598         c = *((*s)++);
599         pps = *s;
600         if (c == '\\') c = bb_process_escape_sequence((const char**)s);
601         if (c == '\\' && *s == pps) c = *((*s)++);
602         return c;
603 }
604
605 static int ATTRIBUTE_ALWAYS_INLINE isalnum_(int c)
606 {
607         return (isalnum(c) || c == '_');
608 }
609
610 static FILE *afopen(const char *path, const char *mode)
611 {
612         return (*path == '-' && *(path+1) == '\0') ? stdin : xfopen(path, mode);
613 }
614
615 /* -------- working with variables (set/get/copy/etc) -------- */
616
617 static xhash *iamarray(var *v)
618 {
619         var *a = v;
620
621         while (a->type & VF_CHILD)
622                 a = a->x.parent;
623
624         if (! (a->type & VF_ARRAY)) {
625                 a->type |= VF_ARRAY;
626                 a->x.array = hash_init();
627         }
628         return a->x.array;
629 }
630
631 static void clear_array(xhash *array)
632 {
633         unsigned i;
634         hash_item *hi, *thi;
635
636         for (i=0; i<array->csize; i++) {
637                 hi = array->items[i];
638                 while (hi) {
639                         thi = hi;
640                         hi = hi->next;
641                         free(thi->data.v.string);
642                         free(thi);
643                 }
644                 array->items[i] = NULL;
645         }
646         array->glen = array->nel = 0;
647 }
648
649 /* clear a variable */
650 static var *clrvar(var *v)
651 {
652         if (!(v->type & VF_FSTR))
653                 free(v->string);
654
655         v->type &= VF_DONTTOUCH;
656         v->type |= VF_DIRTY;
657         v->string = NULL;
658         return v;
659 }
660
661 /* assign string value to variable */
662 static var *setvar_p(var *v, char *value)
663 {
664         clrvar(v);
665         v->string = value;
666         handle_special(v);
667
668         return v;
669 }
670
671 /* same as setvar_p but make a copy of string */
672 static var *setvar_s(var *v, const char *value)
673 {
674         return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
675 }
676
677 /* same as setvar_s but set USER flag */
678 static var *setvar_u(var *v, const char *value)
679 {
680         setvar_s(v, value);
681         v->type |= VF_USER;
682         return v;
683 }
684
685 /* set array element to user string */
686 static void setari_u(var *a, int idx, const char *s)
687 {
688         var *v;
689         static char sidx[12];
690
691         sprintf(sidx, "%d", idx);
692         v = findvar(iamarray(a), sidx);
693         setvar_u(v, s);
694 }
695
696 /* assign numeric value to variable */
697 static var *setvar_i(var *v, double value)
698 {
699         clrvar(v);
700         v->type |= VF_NUMBER;
701         v->number = value;
702         handle_special(v);
703         return v;
704 }
705
706 static char *getvar_s(var *v)
707 {
708         /* if v is numeric and has no cached string, convert it to string */
709         if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
710                 fmt_num(buf, MAXVARFMT, getvar_s(V[CONVFMT]), v->number, TRUE);
711                 v->string = xstrdup(buf);
712                 v->type |= VF_CACHED;
713         }
714         return (v->string == NULL) ? "" : v->string;
715 }
716
717 static double getvar_i(var *v)
718 {
719         char *s;
720
721         if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
722                 v->number = 0;
723                 s = v->string;
724                 if (s && *s) {
725                         v->number = strtod(s, &s);
726                         if (v->type & VF_USER) {
727                                 skip_spaces(&s);
728                                 if (*s != '\0')
729                                         v->type &= ~VF_USER;
730                         }
731                 } else {
732                         v->type &= ~VF_USER;
733                 }
734                 v->type |= VF_CACHED;
735         }
736         return v->number;
737 }
738
739 static var *copyvar(var *dest, const var *src)
740 {
741         if (dest != src) {
742                 clrvar(dest);
743                 dest->type |= (src->type & ~VF_DONTTOUCH);
744                 dest->number = src->number;
745                 if (src->string)
746                         dest->string = xstrdup(src->string);
747         }
748         handle_special(dest);
749         return dest;
750 }
751
752 static var *incvar(var *v)
753 {
754         return setvar_i(v, getvar_i(v)+1.);
755 }
756
757 /* return true if v is number or numeric string */
758 static int is_numeric(var *v)
759 {
760         getvar_i(v);
761         return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
762 }
763
764 /* return 1 when value of v corresponds to true, 0 otherwise */
765 static int istrue(var *v)
766 {
767         if (is_numeric(v))
768                 return (v->number == 0) ? 0 : 1;
769         else
770                 return (v->string && *(v->string)) ? 1 : 0;
771 }
772
773 /* temporary variables allocator. Last allocated should be first freed */
774 static var *nvalloc(int n)
775 {
776         nvblock *pb = NULL;
777         var *v, *r;
778         int size;
779
780         while (cb) {
781                 pb = cb;
782                 if ((cb->pos - cb->nv) + n <= cb->size) break;
783                 cb = cb->next;
784         }
785
786         if (! cb) {
787                 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
788                 cb = xmalloc(sizeof(nvblock) + size * sizeof(var));
789                 cb->size = size;
790                 cb->pos = cb->nv;
791                 cb->prev = pb;
792                 cb->next = NULL;
793                 if (pb) pb->next = cb;
794         }
795
796         v = r = cb->pos;
797         cb->pos += n;
798
799         while (v < cb->pos) {
800                 v->type = 0;
801                 v->string = NULL;
802                 v++;
803         }
804
805         return r;
806 }
807
808 static void nvfree(var *v)
809 {
810         var *p;
811
812         if (v < cb->nv || v >= cb->pos)
813                 runtime_error(EMSG_INTERNAL_ERROR);
814
815         for (p=v; p<cb->pos; p++) {
816                 if ((p->type & (VF_ARRAY|VF_CHILD)) == VF_ARRAY) {
817                         clear_array(iamarray(p));
818                         free(p->x.array->items);
819                         free(p->x.array);
820                 }
821                 if (p->type & VF_WALK)
822                         free(p->x.walker);
823
824                 clrvar(p);
825         }
826
827         cb->pos = v;
828         while (cb->prev && cb->pos == cb->nv) {
829                 cb = cb->prev;
830         }
831 }
832
833 /* ------- awk program text parsing ------- */
834
835 /* Parse next token pointed by global pos, place results into global t.
836  * If token isn't expected, give away. Return token class
837  */
838 static uint32_t next_token(uint32_t expected)
839 {
840         static int concat_inserted;
841         static uint32_t save_tclass, save_info;
842         static uint32_t ltclass = TC_OPTERM;
843
844         char *p, *pp, *s;
845         const char *tl;
846         uint32_t tc;
847         const uint32_t *ti;
848         int l;
849
850         if (t.rollback) {
851
852                 t.rollback = FALSE;
853
854         } else if (concat_inserted) {
855
856                 concat_inserted = FALSE;
857                 t.tclass = save_tclass;
858                 t.info = save_info;
859
860         } else {
861
862                 p = pos;
863
864         readnext:
865                 skip_spaces(&p);
866                 lineno = t.lineno;
867                 if (*p == '#')
868                         while (*p != '\n' && *p != '\0') p++;
869
870                 if (*p == '\n')
871                         t.lineno++;
872
873                 if (*p == '\0') {
874                         tc = TC_EOF;
875
876                 } else if (*p == '\"') {
877                         /* it's a string */
878                         t.string = s = ++p;
879                         while (*p != '\"') {
880                                 if (*p == '\0' || *p == '\n')
881                                         syntax_error(EMSG_UNEXP_EOS);
882                                 *(s++) = nextchar(&p);
883                         }
884                         p++;
885                         *s = '\0';
886                         tc = TC_STRING;
887
888                 } else if ((expected & TC_REGEXP) && *p == '/') {
889                         /* it's regexp */
890                         t.string = s = ++p;
891                         while (*p != '/') {
892                                 if (*p == '\0' || *p == '\n')
893                                         syntax_error(EMSG_UNEXP_EOS);
894                                 if ((*s++ = *p++) == '\\') {
895                                         pp = p;
896                                         *(s-1) = bb_process_escape_sequence((const char **)&p);
897                                         if (*pp == '\\') *s++ = '\\';
898                                         if (p == pp) *s++ = *p++;
899                                 }
900                         }
901                         p++;
902                         *s = '\0';
903                         tc = TC_REGEXP;
904
905                 } else if (*p == '.' || isdigit(*p)) {
906                         /* it's a number */
907                         t.number = strtod(p, &p);
908                         if (*p == '.')
909                                 syntax_error(EMSG_UNEXP_TOKEN);
910                         tc = TC_NUMBER;
911
912                 } else {
913                         /* search for something known */
914                         tl = tokenlist;
915                         tc = 0x00000001;
916                         ti = tokeninfo;
917                         while (*tl) {
918                                 l = *(tl++);
919                                 if (l == NTCC) {
920                                         tc <<= 1;
921                                         continue;
922                                 }
923                                 /* if token class is expected, token
924                                  * matches and it's not a longer word,
925                                  * then this is what we are looking for
926                                  */
927                                 if ((tc & (expected | TC_WORD | TC_NEWLINE)) &&
928                                 *tl == *p && strncmp(p, tl, l) == 0 &&
929                                 !((tc & TC_WORD) && isalnum_(*(p + l)))) {
930                                         t.info = *ti;
931                                         p += l;
932                                         break;
933                                 }
934                                 ti++;
935                                 tl += l;
936                         }
937
938                         if (!*tl) {
939                                 /* it's a name (var/array/function),
940                                  * otherwise it's something wrong
941                                  */
942                                 if (! isalnum_(*p))
943                                         syntax_error(EMSG_UNEXP_TOKEN);
944
945                                 t.string = --p;
946                                 while (isalnum_(*(++p))) {
947                                         *(p-1) = *p;
948                                 }
949                                 *(p-1) = '\0';
950                                 tc = TC_VARIABLE;
951                                 /* also consume whitespace between functionname and bracket */
952                                 if (! (expected & TC_VARIABLE)) skip_spaces(&p);
953                                 if (*p == '(') {
954                                         tc = TC_FUNCTION;
955                                 } else {
956                                         if (*p == '[') {
957                                                 p++;
958                                                 tc = TC_ARRAY;
959                                         }
960                                 }
961                         }
962                 }
963                 pos = p;
964
965                 /* skipping newlines in some cases */
966                 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
967                         goto readnext;
968
969                 /* insert concatenation operator when needed */
970                 if ((ltclass&TC_CONCAT1) && (tc&TC_CONCAT2) && (expected&TC_BINOP)) {
971                         concat_inserted = TRUE;
972                         save_tclass = tc;
973                         save_info = t.info;
974                         tc = TC_BINOP;
975                         t.info = OC_CONCAT | SS | P(35);
976                 }
977
978                 t.tclass = tc;
979         }
980         ltclass = t.tclass;
981
982         /* Are we ready for this? */
983         if (! (ltclass & expected))
984                 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
985                                                                 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
986
987         return ltclass;
988 }
989
990 static void rollback_token(void) { t.rollback = TRUE; }
991
992 static node *new_node(uint32_t info)
993 {
994         node *n;
995
996         n = xzalloc(sizeof(node));
997         n->info = info;
998         n->lineno = lineno;
999         return n;
1000 }
1001
1002 static node *mk_re_node(char *s, node *n, regex_t *re)
1003 {
1004         n->info = OC_REGEXP;
1005         n->l.re = re;
1006         n->r.ire = re + 1;
1007         xregcomp(re, s, REG_EXTENDED);
1008         xregcomp(re+1, s, REG_EXTENDED | REG_ICASE);
1009
1010         return n;
1011 }
1012
1013 static node *condition(void)
1014 {
1015         next_token(TC_SEQSTART);
1016         return parse_expr(TC_SEQTERM);
1017 }
1018
1019 /* parse expression terminated by given argument, return ptr
1020  * to built subtree. Terminator is eaten by parse_expr */
1021 static node *parse_expr(uint32_t iexp)
1022 {
1023         node sn;
1024         node *cn = &sn;
1025         node *vn, *glptr;
1026         uint32_t tc, xtc;
1027         var *v;
1028
1029         sn.info = PRIMASK;
1030         sn.r.n = glptr = NULL;
1031         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1032
1033         while (! ((tc = next_token(xtc)) & iexp)) {
1034                 if (glptr && (t.info == (OC_COMPARE|VV|P(39)|2))) {
1035                         /* input redirection (<) attached to glptr node */
1036                         cn = glptr->l.n = new_node(OC_CONCAT|SS|P(37));
1037                         cn->a.n = glptr;
1038                         xtc = TC_OPERAND | TC_UOPPRE;
1039                         glptr = NULL;
1040
1041                 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1042                         /* for binary and postfix-unary operators, jump back over
1043                          * previous operators with higher priority */
1044                         vn = cn;
1045                         while ( ((t.info & PRIMASK) > (vn->a.n->info & PRIMASK2)) ||
1046                           ((t.info == vn->info) && ((t.info & OPCLSMASK) == OC_COLON)) )
1047                                 vn = vn->a.n;
1048                         if ((t.info & OPCLSMASK) == OC_TERNARY)
1049                                 t.info += P(6);
1050                         cn = vn->a.n->r.n = new_node(t.info);
1051                         cn->a.n = vn->a.n;
1052                         if (tc & TC_BINOP) {
1053                                 cn->l.n = vn;
1054                                 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1055                                 if ((t.info & OPCLSMASK) == OC_PGETLINE) {
1056                                         /* it's a pipe */
1057                                         next_token(TC_GETLINE);
1058                                         /* give maximum priority to this pipe */
1059                                         cn->info &= ~PRIMASK;
1060                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1061                                 }
1062                         } else {
1063                                 cn->r.n = vn;
1064                                 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1065                         }
1066                         vn->a.n = cn;
1067
1068                 } else {
1069                         /* for operands and prefix-unary operators, attach them
1070                          * to last node */
1071                         vn = cn;
1072                         cn = vn->r.n = new_node(t.info);
1073                         cn->a.n = vn;
1074                         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1075                         if (tc & (TC_OPERAND | TC_REGEXP)) {
1076                                 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1077                                 /* one should be very careful with switch on tclass -
1078                                  * only simple tclasses should be used! */
1079                                 switch (tc) {
1080                                 case TC_VARIABLE:
1081                                 case TC_ARRAY:
1082                                         cn->info = OC_VAR;
1083                                         if ((v = hash_search(ahash, t.string)) != NULL) {
1084                                                 cn->info = OC_FNARG;
1085                                                 cn->l.i = v->x.aidx;
1086                                         } else {
1087                                                 cn->l.v = newvar(t.string);
1088                                         }
1089                                         if (tc & TC_ARRAY) {
1090                                                 cn->info |= xS;
1091                                                 cn->r.n = parse_expr(TC_ARRTERM);
1092                                         }
1093                                         break;
1094
1095                                 case TC_NUMBER:
1096                                 case TC_STRING:
1097                                         cn->info = OC_VAR;
1098                                         v = cn->l.v = xzalloc(sizeof(var));
1099                                         if (tc & TC_NUMBER)
1100                                                 setvar_i(v, t.number);
1101                                         else
1102                                                 setvar_s(v, t.string);
1103                                         break;
1104
1105                                 case TC_REGEXP:
1106                                         mk_re_node(t.string, cn, xzalloc(sizeof(regex_t)*2));
1107                                         break;
1108
1109                                 case TC_FUNCTION:
1110                                         cn->info = OC_FUNC;
1111                                         cn->r.f = newfunc(t.string);
1112                                         cn->l.n = condition();
1113                                         break;
1114
1115                                 case TC_SEQSTART:
1116                                         cn = vn->r.n = parse_expr(TC_SEQTERM);
1117                                         cn->a.n = vn;
1118                                         break;
1119
1120                                 case TC_GETLINE:
1121                                         glptr = cn;
1122                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1123                                         break;
1124
1125                                 case TC_BUILTIN:
1126                                         cn->l.n = condition();
1127                                         break;
1128                                 }
1129                         }
1130                 }
1131         }
1132         return sn.r.n;
1133 }
1134
1135 /* add node to chain. Return ptr to alloc'd node */
1136 static node *chain_node(uint32_t info)
1137 {
1138         node *n;
1139
1140         if (! seq->first)
1141                 seq->first = seq->last = new_node(0);
1142
1143         if (seq->programname != programname) {
1144                 seq->programname = programname;
1145                 n = chain_node(OC_NEWSOURCE);
1146                 n->l.s = xstrdup(programname);
1147         }
1148
1149         n = seq->last;
1150         n->info = info;
1151         seq->last = n->a.n = new_node(OC_DONE);
1152
1153         return n;
1154 }
1155
1156 static void chain_expr(uint32_t info)
1157 {
1158         node *n;
1159
1160         n = chain_node(info);
1161         n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1162         if (t.tclass & TC_GRPTERM)
1163                 rollback_token();
1164 }
1165
1166 static node *chain_loop(node *nn)
1167 {
1168         node *n, *n2, *save_brk, *save_cont;
1169
1170         save_brk = break_ptr;
1171         save_cont = continue_ptr;
1172
1173         n = chain_node(OC_BR | Vx);
1174         continue_ptr = new_node(OC_EXEC);
1175         break_ptr = new_node(OC_EXEC);
1176         chain_group();
1177         n2 = chain_node(OC_EXEC | Vx);
1178         n2->l.n = nn;
1179         n2->a.n = n;
1180         continue_ptr->a.n = n2;
1181         break_ptr->a.n = n->r.n = seq->last;
1182
1183         continue_ptr = save_cont;
1184         break_ptr = save_brk;
1185
1186         return n;
1187 }
1188
1189 /* parse group and attach it to chain */
1190 static void chain_group(void)
1191 {
1192         uint32_t c;
1193         node *n, *n2, *n3;
1194
1195         do {
1196                 c = next_token(TC_GRPSEQ);
1197         } while (c & TC_NEWLINE);
1198
1199         if (c & TC_GRPSTART) {
1200                 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1201                         if (t.tclass & TC_NEWLINE) continue;
1202                         rollback_token();
1203                         chain_group();
1204                 }
1205         } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1206                 rollback_token();
1207                 chain_expr(OC_EXEC | Vx);
1208         } else {                                                /* TC_STATEMNT */
1209                 switch (t.info & OPCLSMASK) {
1210                         case ST_IF:
1211                                 n = chain_node(OC_BR | Vx);
1212                                 n->l.n = condition();
1213                                 chain_group();
1214                                 n2 = chain_node(OC_EXEC);
1215                                 n->r.n = seq->last;
1216                                 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE)==TC_ELSE) {
1217                                         chain_group();
1218                                         n2->a.n = seq->last;
1219                                 } else {
1220                                         rollback_token();
1221                                 }
1222                                 break;
1223
1224                         case ST_WHILE:
1225                                 n2 = condition();
1226                                 n = chain_loop(NULL);
1227                                 n->l.n = n2;
1228                                 break;
1229
1230                         case ST_DO:
1231                                 n2 = chain_node(OC_EXEC);
1232                                 n = chain_loop(NULL);
1233                                 n2->a.n = n->a.n;
1234                                 next_token(TC_WHILE);
1235                                 n->l.n = condition();
1236                                 break;
1237
1238                         case ST_FOR:
1239                                 next_token(TC_SEQSTART);
1240                                 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1241                                 if (t.tclass & TC_SEQTERM) {    /* for-in */
1242                                         if ((n2->info & OPCLSMASK) != OC_IN)
1243                                                 syntax_error(EMSG_UNEXP_TOKEN);
1244                                         n = chain_node(OC_WALKINIT | VV);
1245                                         n->l.n = n2->l.n;
1246                                         n->r.n = n2->r.n;
1247                                         n = chain_loop(NULL);
1248                                         n->info = OC_WALKNEXT | Vx;
1249                                         n->l.n = n2->l.n;
1250                                 } else {                        /* for (;;) */
1251                                         n = chain_node(OC_EXEC | Vx);
1252                                         n->l.n = n2;
1253                                         n2 = parse_expr(TC_SEMICOL);
1254                                         n3 = parse_expr(TC_SEQTERM);
1255                                         n = chain_loop(n3);
1256                                         n->l.n = n2;
1257                                         if (! n2)
1258                                                 n->info = OC_EXEC;
1259                                 }
1260                                 break;
1261
1262                         case OC_PRINT:
1263                         case OC_PRINTF:
1264                                 n = chain_node(t.info);
1265                                 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1266                                 if (t.tclass & TC_OUTRDR) {
1267                                         n->info |= t.info;
1268                                         n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1269                                 }
1270                                 if (t.tclass & TC_GRPTERM)
1271                                         rollback_token();
1272                                 break;
1273
1274                         case OC_BREAK:
1275                                 n = chain_node(OC_EXEC);
1276                                 n->a.n = break_ptr;
1277                                 break;
1278
1279                         case OC_CONTINUE:
1280                                 n = chain_node(OC_EXEC);
1281                                 n->a.n = continue_ptr;
1282                                 break;
1283
1284                         /* delete, next, nextfile, return, exit */
1285                         default:
1286                                 chain_expr(t.info);
1287                 }
1288         }
1289 }
1290
1291 static void parse_program(char *p)
1292 {
1293         uint32_t tclass;
1294         node *cn;
1295         func *f;
1296         var *v;
1297
1298         pos = p;
1299         t.lineno = 1;
1300         while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1301                                 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1302
1303                 if (tclass & TC_OPTERM)
1304                         continue;
1305
1306                 seq = &mainseq;
1307                 if (tclass & TC_BEGIN) {
1308                         seq = &beginseq;
1309                         chain_group();
1310
1311                 } else if (tclass & TC_END) {
1312                         seq = &endseq;
1313                         chain_group();
1314
1315                 } else if (tclass & TC_FUNCDECL) {
1316                         next_token(TC_FUNCTION);
1317                         pos++;
1318                         f = newfunc(t.string);
1319                         f->body.first = NULL;
1320                         f->nargs = 0;
1321                         while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1322                                 v = findvar(ahash, t.string);
1323                                 v->x.aidx = (f->nargs)++;
1324
1325                                 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1326                                         break;
1327                         }
1328                         seq = &(f->body);
1329                         chain_group();
1330                         clear_array(ahash);
1331
1332                 } else if (tclass & TC_OPSEQ) {
1333                         rollback_token();
1334                         cn = chain_node(OC_TEST);
1335                         cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1336                         if (t.tclass & TC_GRPSTART) {
1337                                 rollback_token();
1338                                 chain_group();
1339                         } else {
1340                                 chain_node(OC_PRINT);
1341                         }
1342                         cn->r.n = mainseq.last;
1343
1344                 } else /* if (tclass & TC_GRPSTART) */ {
1345                         rollback_token();
1346                         chain_group();
1347                 }
1348         }
1349 }
1350
1351
1352 /* -------- program execution part -------- */
1353
1354 static node *mk_splitter(char *s, tsplitter *spl)
1355 {
1356         regex_t *re, *ire;
1357         node *n;
1358
1359         re = &spl->re[0];
1360         ire = &spl->re[1];
1361         n = &spl->n;
1362         if ((n->info & OPCLSMASK) == OC_REGEXP) {
1363                 regfree(re);
1364                 regfree(ire);
1365         }
1366         if (strlen(s) > 1) {
1367                 mk_re_node(s, n, re);
1368         } else {
1369                 n->info = (uint32_t) *s;
1370         }
1371
1372         return n;
1373 }
1374
1375 /* use node as a regular expression. Supplied with node ptr and regex_t
1376  * storage space. Return ptr to regex (if result points to preg, it should
1377  * be later regfree'd manually
1378  */
1379 static regex_t *as_regex(node *op, regex_t *preg)
1380 {
1381         var *v;
1382         char *s;
1383
1384         if ((op->info & OPCLSMASK) == OC_REGEXP) {
1385                 return icase ? op->r.ire : op->l.re;
1386         } else {
1387                 v = nvalloc(1);
1388                 s = getvar_s(evaluate(op, v));
1389                 xregcomp(preg, s, icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED);
1390                 nvfree(v);
1391                 return preg;
1392         }
1393 }
1394
1395 /* gradually increasing buffer */
1396 static void qrealloc(char **b, int n, int *size)
1397 {
1398         if (! *b || n >= *size)
1399                 *b = xrealloc(*b, *size = n + (n>>1) + 80);
1400 }
1401
1402 /* resize field storage space */
1403 static void fsrealloc(int size)
1404 {
1405         static int maxfields = 0;
1406         int i;
1407
1408         if (size >= maxfields) {
1409                 i = maxfields;
1410                 maxfields = size + 16;
1411                 Fields = xrealloc(Fields, maxfields * sizeof(var));
1412                 for (; i < maxfields; i++) {
1413                         Fields[i].type = VF_SPECIAL;
1414                         Fields[i].string = NULL;
1415                 }
1416         }
1417
1418         if (size < nfields) {
1419                 for (i=size; i<nfields; i++) {
1420                         clrvar(Fields+i);
1421                 }
1422         }
1423         nfields = size;
1424 }
1425
1426 static int awk_split(char *s, node *spl, char **slist)
1427 {
1428         int l, n = 0;
1429         char c[4];
1430         char *s1;
1431         regmatch_t pmatch[2];
1432
1433         /* in worst case, each char would be a separate field */
1434         *slist = s1 = xstrndup(s, strlen(s) * 2 + 3);
1435
1436         c[0] = c[1] = (char)spl->info;
1437         c[2] = c[3] = '\0';
1438         if (*getvar_s(V[RS]) == '\0') c[2] = '\n';
1439
1440         if ((spl->info & OPCLSMASK) == OC_REGEXP) {             /* regex split */
1441                 while (*s) {
1442                         l = strcspn(s, c+2);
1443                         if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0 &&
1444                         pmatch[0].rm_so <= l) {
1445                                 l = pmatch[0].rm_so;
1446                                 if (pmatch[0].rm_eo == 0) { l++; pmatch[0].rm_eo++; }
1447                         } else {
1448                                 pmatch[0].rm_eo = l;
1449                                 if (s[l]) pmatch[0].rm_eo++;
1450                         }
1451
1452                         memcpy(s1, s, l);
1453                         s1[l] = '\0';
1454                         nextword(&s1);
1455                         s += pmatch[0].rm_eo;
1456                         n++;
1457                 }
1458         } else if (c[0] == '\0') {              /* null split */
1459                 while (*s) {
1460                         *(s1++) = *(s++);
1461                         *(s1++) = '\0';
1462                         n++;
1463                 }
1464         } else if (c[0] != ' ') {               /* single-character split */
1465                 if (icase) {
1466                         c[0] = toupper(c[0]);
1467                         c[1] = tolower(c[1]);
1468                 }
1469                 if (*s1) n++;
1470                 while ((s1 = strpbrk(s1, c))) {
1471                         *(s1++) = '\0';
1472                         n++;
1473                 }
1474         } else {                                /* space split */
1475                 while (*s) {
1476                         s = skip_whitespace(s);
1477                         if (! *s) break;
1478                         n++;
1479                         while (*s && !isspace(*s))
1480                                 *(s1++) = *(s++);
1481                         *(s1++) = '\0';
1482                 }
1483         }
1484         return n;
1485 }
1486
1487 static void split_f0(void)
1488 {
1489         static char *fstrings = NULL;
1490         int i, n;
1491         char *s;
1492
1493         if (is_f0_split)
1494                 return;
1495
1496         is_f0_split = TRUE;
1497         free(fstrings);
1498         fsrealloc(0);
1499         n = awk_split(getvar_s(V[F0]), &fsplitter.n, &fstrings);
1500         fsrealloc(n);
1501         s = fstrings;
1502         for (i = 0; i < n; i++) {
1503                 Fields[i].string = nextword(&s);
1504                 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1505         }
1506
1507         /* set NF manually to avoid side effects */
1508         clrvar(V[NF]);
1509         V[NF]->type = VF_NUMBER | VF_SPECIAL;
1510         V[NF]->number = nfields;
1511 }
1512
1513 /* perform additional actions when some internal variables changed */
1514 static void handle_special(var *v)
1515 {
1516         int n;
1517         char *b, *sep, *s;
1518         int sl, l, len, i, bsize;
1519
1520         if (! (v->type & VF_SPECIAL))
1521                 return;
1522
1523         if (v == V[NF]) {
1524                 n = (int)getvar_i(v);
1525                 fsrealloc(n);
1526
1527                 /* recalculate $0 */
1528                 sep = getvar_s(V[OFS]);
1529                 sl = strlen(sep);
1530                 b = NULL;
1531                 len = 0;
1532                 for (i=0; i<n; i++) {
1533                         s = getvar_s(&Fields[i]);
1534                         l = strlen(s);
1535                         if (b) {
1536                                 memcpy(b+len, sep, sl);
1537                                 len += sl;
1538                         }
1539                         qrealloc(&b, len+l+sl, &bsize);
1540                         memcpy(b+len, s, l);
1541                         len += l;
1542                 }
1543                 if (b) b[len] = '\0';
1544                 setvar_p(V[F0], b);
1545                 is_f0_split = TRUE;
1546
1547         } else if (v == V[F0]) {
1548                 is_f0_split = FALSE;
1549
1550         } else if (v == V[FS]) {
1551                 mk_splitter(getvar_s(v), &fsplitter);
1552
1553         } else if (v == V[RS]) {
1554                 mk_splitter(getvar_s(v), &rsplitter);
1555
1556         } else if (v == V[IGNORECASE]) {
1557                 icase = istrue(v);
1558
1559         } else {                                                /* $n */
1560                 n = getvar_i(V[NF]);
1561                 setvar_i(V[NF], n > v-Fields ? n : v-Fields+1);
1562                 /* right here v is invalid. Just to note... */
1563         }
1564 }
1565
1566 /* step through func/builtin/etc arguments */
1567 static node *nextarg(node **pn)
1568 {
1569         node *n;
1570
1571         n = *pn;
1572         if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1573                 *pn = n->r.n;
1574                 n = n->l.n;
1575         } else {
1576                 *pn = NULL;
1577         }
1578         return n;
1579 }
1580
1581 static void hashwalk_init(var *v, xhash *array)
1582 {
1583         char **w;
1584         hash_item *hi;
1585         int i;
1586
1587         if (v->type & VF_WALK)
1588                 free(v->x.walker);
1589
1590         v->type |= VF_WALK;
1591         w = v->x.walker = xzalloc(2 + 2*sizeof(char *) + array->glen);
1592         *w = *(w+1) = (char *)(w + 2);
1593         for (i=0; i<array->csize; i++) {
1594                 hi = array->items[i];
1595                 while (hi) {
1596                         strcpy(*w, hi->name);
1597                         nextword(w);
1598                         hi = hi->next;
1599                 }
1600         }
1601 }
1602
1603 static int hashwalk_next(var *v)
1604 {
1605         char **w;
1606
1607         w = v->x.walker;
1608         if (*(w+1) == *w)
1609                 return FALSE;
1610
1611         setvar_s(v, nextword(w+1));
1612         return TRUE;
1613 }
1614
1615 /* evaluate node, return 1 when result is true, 0 otherwise */
1616 static int ptest(node *pattern)
1617 {
1618         static var v; /* static: to save stack space? */
1619
1620         return istrue(evaluate(pattern, &v));
1621 }
1622
1623 /* read next record from stream rsm into a variable v */
1624 static int awk_getline(rstream *rsm, var *v)
1625 {
1626         char *b;
1627         regmatch_t pmatch[2];
1628         int a, p, pp=0, size;
1629         int fd, so, eo, r, rp;
1630         char c, *m, *s;
1631
1632         /* we're using our own buffer since we need access to accumulating
1633          * characters
1634          */
1635         fd = fileno(rsm->F);
1636         m = rsm->buffer;
1637         a = rsm->adv;
1638         p = rsm->pos;
1639         size = rsm->size;
1640         c = (char) rsplitter.n.info;
1641         rp = 0;
1642
1643         if (! m) qrealloc(&m, 256, &size);
1644         do {
1645                 b = m + a;
1646                 so = eo = p;
1647                 r = 1;
1648                 if (p > 0) {
1649                         if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1650                                 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1651                                                                                                 b, 1, pmatch, 0) == 0) {
1652                                         so = pmatch[0].rm_so;
1653                                         eo = pmatch[0].rm_eo;
1654                                         if (b[eo] != '\0')
1655                                                 break;
1656                                 }
1657                         } else if (c != '\0') {
1658                                 s = strchr(b+pp, c);
1659                                 if (! s) s = memchr(b+pp, '\0', p - pp);
1660                                 if (s) {
1661                                         so = eo = s-b;
1662                                         eo++;
1663                                         break;
1664                                 }
1665                         } else {
1666                                 while (b[rp] == '\n')
1667                                         rp++;
1668                                 s = strstr(b+rp, "\n\n");
1669                                 if (s) {
1670                                         so = eo = s-b;
1671                                         while (b[eo] == '\n') eo++;
1672                                         if (b[eo] != '\0')
1673                                                 break;
1674                                 }
1675                         }
1676                 }
1677
1678                 if (a > 0) {
1679                         memmove(m, (const void *)(m+a), p+1);
1680                         b = m;
1681                         a = 0;
1682                 }
1683
1684                 qrealloc(&m, a+p+128, &size);
1685                 b = m + a;
1686                 pp = p;
1687                 p += safe_read(fd, b+p, size-p-1);
1688                 if (p < pp) {
1689                         p = 0;
1690                         r = 0;
1691                         setvar_i(V[ERRNO], errno);
1692                 }
1693                 b[p] = '\0';
1694
1695         } while (p > pp);
1696
1697         if (p == 0) {
1698                 r--;
1699         } else {
1700                 c = b[so]; b[so] = '\0';
1701                 setvar_s(v, b+rp);
1702                 v->type |= VF_USER;
1703                 b[so] = c;
1704                 c = b[eo]; b[eo] = '\0';
1705                 setvar_s(V[RT], b+so);
1706                 b[eo] = c;
1707         }
1708
1709         rsm->buffer = m;
1710         rsm->adv = a + eo;
1711         rsm->pos = p - eo;
1712         rsm->size = size;
1713
1714         return r;
1715 }
1716
1717 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1718 {
1719         int r = 0;
1720         char c;
1721         const char *s = format;
1722
1723         if (int_as_int && n == (int)n) {
1724                 r = snprintf(b, size, "%d", (int)n);
1725         } else {
1726                 do { c = *s; } while (c && *++s);
1727                 if (strchr("diouxX", c)) {
1728                         r = snprintf(b, size, format, (int)n);
1729                 } else if (strchr("eEfgG", c)) {
1730                         r = snprintf(b, size, format, n);
1731                 } else {
1732                         runtime_error(EMSG_INV_FMT);
1733                 }
1734         }
1735         return r;
1736 }
1737
1738
1739 /* formatted output into an allocated buffer, return ptr to buffer */
1740 static char *awk_printf(node *n)
1741 {
1742         char *b = NULL;
1743         char *fmt, *s, *s1, *f;
1744         int i, j, incr, bsize;
1745         char c, c1;
1746         var *v, *arg;
1747
1748         v = nvalloc(1);
1749         fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1750
1751         i = 0;
1752         while (*f) {
1753                 s = f;
1754                 while (*f && (*f != '%' || *(++f) == '%'))
1755                         f++;
1756                 while (*f && !isalpha(*f))
1757                         f++;
1758
1759                 incr = (f - s) + MAXVARFMT;
1760                 qrealloc(&b, incr + i, &bsize);
1761                 c = *f;
1762                 if (c != '\0') f++;
1763                 c1 = *f;
1764                 *f = '\0';
1765                 arg = evaluate(nextarg(&n), v);
1766
1767                 j = i;
1768                 if (c == 'c' || !c) {
1769                         i += sprintf(b+i, s, is_numeric(arg) ?
1770                                         (char)getvar_i(arg) : *getvar_s(arg));
1771
1772                 } else if (c == 's') {
1773                         s1 = getvar_s(arg);
1774                         qrealloc(&b, incr+i+strlen(s1), &bsize);
1775                         i += sprintf(b+i, s, s1);
1776
1777                 } else {
1778                         i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1779                 }
1780                 *f = c1;
1781
1782                 /* if there was an error while sprintf, return value is negative */
1783                 if (i < j) i = j;
1784
1785         }
1786
1787         b = xrealloc(b, i + 1);
1788         free(fmt);
1789         nvfree(v);
1790         b[i] = '\0';
1791         return b;
1792 }
1793
1794 /* common substitution routine
1795  * replace (nm) substring of (src) that match (n) with (repl), store
1796  * result into (dest), return number of substitutions. If nm=0, replace
1797  * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1798  * subexpression matching (\1-\9)
1799  */
1800 static int awk_sub(node *rn, char *repl, int nm, var *src, var *dest, int ex)
1801 {
1802         char *ds = NULL;
1803         char *sp, *s;
1804         int c, i, j, di, rl, so, eo, nbs, n, dssize;
1805         regmatch_t pmatch[10];
1806         regex_t sreg, *re;
1807
1808         re = as_regex(rn, &sreg);
1809         if (! src) src = V[F0];
1810         if (! dest) dest = V[F0];
1811
1812         i = di = 0;
1813         sp = getvar_s(src);
1814         rl = strlen(repl);
1815         while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0:REG_NOTBOL) == 0) {
1816                 so = pmatch[0].rm_so;
1817                 eo = pmatch[0].rm_eo;
1818
1819                 qrealloc(&ds, di + eo + rl, &dssize);
1820                 memcpy(ds + di, sp, eo);
1821                 di += eo;
1822                 if (++i >= nm) {
1823                         /* replace */
1824                         di -= (eo - so);
1825                         nbs = 0;
1826                         for (s = repl; *s; s++) {
1827                                 ds[di++] = c = *s;
1828                                 if (c == '\\') {
1829                                         nbs++;
1830                                         continue;
1831                                 }
1832                                 if (c == '&' || (ex && c >= '0' && c <= '9')) {
1833                                         di -= ((nbs + 3) >> 1);
1834                                         j = 0;
1835                                         if (c != '&') {
1836                                                 j = c - '0';
1837                                                 nbs++;
1838                                         }
1839                                         if (nbs % 2) {
1840                                                 ds[di++] = c;
1841                                         } else {
1842                                                 n = pmatch[j].rm_eo - pmatch[j].rm_so;
1843                                                 qrealloc(&ds, di + rl + n, &dssize);
1844                                                 memcpy(ds + di, sp + pmatch[j].rm_so, n);
1845                                                 di += n;
1846                                         }
1847                                 }
1848                                 nbs = 0;
1849                         }
1850                 }
1851
1852                 sp += eo;
1853                 if (i == nm) break;
1854                 if (eo == so) {
1855                         if (! (ds[di++] = *sp++)) break;
1856                 }
1857         }
1858
1859         qrealloc(&ds, di + strlen(sp), &dssize);
1860         strcpy(ds + di, sp);
1861         setvar_p(dest, ds);
1862         if (re == &sreg) regfree(re);
1863         return i;
1864 }
1865
1866 static var *exec_builtin(node *op, var *res)
1867 {
1868         int (*to_xxx)(int);
1869         var *tv;
1870         node *an[4];
1871         var  *av[4];
1872         char *as[4];
1873         regmatch_t pmatch[2];
1874         regex_t sreg, *re;
1875         static tsplitter tspl;
1876         node *spl;
1877         uint32_t isr, info;
1878         int nargs;
1879         time_t tt;
1880         char *s, *s1;
1881         int i, l, ll, n;
1882
1883         tv = nvalloc(4);
1884         isr = info = op->info;
1885         op = op->l.n;
1886
1887         av[2] = av[3] = NULL;
1888         for (i=0 ; i<4 && op ; i++) {
1889                 an[i] = nextarg(&op);
1890                 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
1891                 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
1892                 isr >>= 1;
1893         }
1894
1895         nargs = i;
1896         if (nargs < (info >> 30))
1897                 runtime_error(EMSG_TOO_FEW_ARGS);
1898
1899         switch (info & OPNMASK) {
1900
1901         case B_a2:
1902 #if ENABLE_FEATURE_AWK_MATH
1903                 setvar_i(res, atan2(getvar_i(av[i]), getvar_i(av[1])));
1904 #else
1905                 runtime_error(EMSG_NO_MATH);
1906 #endif
1907                 break;
1908
1909         case B_sp:
1910                 if (nargs > 2) {
1911                         spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
1912                                 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
1913                 } else {
1914                         spl = &fsplitter.n;
1915                 }
1916
1917                 n = awk_split(as[0], spl, &s);
1918                 s1 = s;
1919                 clear_array(iamarray(av[1]));
1920                 for (i=1; i<=n; i++)
1921                         setari_u(av[1], i, nextword(&s1));
1922                 free(s);
1923                 setvar_i(res, n);
1924                 break;
1925
1926         case B_ss:
1927                 l = strlen(as[0]);
1928                 i = getvar_i(av[1]) - 1;
1929                 if (i>l) i=l; if (i<0) i=0;
1930                 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
1931                 if (n<0) n=0;
1932                 s = xmalloc(n+1);
1933                 strncpy(s, as[0]+i, n);
1934                 s[n] = '\0';
1935                 setvar_p(res, s);
1936                 break;
1937                 
1938         case B_an:
1939                 setvar_i(res, (long)getvar_i(av[0]) & (long)getvar_i(av[1]));
1940                 break;
1941                 
1942         case B_co:
1943                 setvar_i(res, ~(long)getvar_i(av[0]));
1944                 break;
1945
1946         case B_ls:
1947                 setvar_i(res, (long)getvar_i(av[0]) << (long)getvar_i(av[1]));
1948                 break;
1949
1950         case B_or:
1951                 setvar_i(res, (long)getvar_i(av[0]) | (long)getvar_i(av[1]));
1952                 break;
1953
1954         case B_rs:
1955                 setvar_i(res, (long)((unsigned long)getvar_i(av[0]) >> (unsigned long)getvar_i(av[1])));
1956                 break;
1957
1958         case B_xo:
1959                 setvar_i(res, (long)getvar_i(av[0]) ^ (long)getvar_i(av[1]));
1960                 break;
1961
1962         case B_lo:
1963                 to_xxx = tolower;
1964                 goto lo_cont;
1965
1966         case B_up:
1967                 to_xxx = toupper;
1968 lo_cont:
1969                 s1 = s = xstrdup(as[0]);
1970                 while (*s1) {
1971                         *s1 = (*to_xxx)(*s1);
1972                         s1++;
1973                 }
1974                 setvar_p(res, s);
1975                 break;
1976
1977         case B_ix:
1978                 n = 0;
1979                 ll = strlen(as[1]);
1980                 l = strlen(as[0]) - ll;
1981                 if (ll > 0 && l >= 0) {
1982                         if (! icase) {
1983                                 s = strstr(as[0], as[1]);
1984                                 if (s) n = (s - as[0]) + 1;
1985                         } else {
1986                                 /* this piece of code is terribly slow and
1987                                  * really should be rewritten
1988                                  */
1989                                 for (i=0; i<=l; i++) {
1990                                         if (strncasecmp(as[0]+i, as[1], ll) == 0) {
1991                                                 n = i+1;
1992                                                 break;
1993                                         }
1994                                 }
1995                         }
1996                 }
1997                 setvar_i(res, n);
1998                 break;
1999
2000         case B_ti:
2001                 if (nargs > 1)
2002                         tt = getvar_i(av[1]);
2003                 else
2004                         time(&tt);
2005                 s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2006                 i = strftime(buf, MAXVARFMT, s, localtime(&tt));
2007                 buf[i] = '\0';
2008                 setvar_s(res, buf);
2009                 break;
2010
2011         case B_ma:
2012                 re = as_regex(an[1], &sreg);
2013                 n = regexec(re, as[0], 1, pmatch, 0);
2014                 if (n == 0) {
2015                         pmatch[0].rm_so++;
2016                         pmatch[0].rm_eo++;
2017                 } else {
2018                         pmatch[0].rm_so = 0;
2019                         pmatch[0].rm_eo = -1;
2020                 }
2021                 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2022                 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2023                 setvar_i(res, pmatch[0].rm_so);
2024                 if (re == &sreg) regfree(re);
2025                 break;
2026
2027         case B_ge:
2028                 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2029                 break;
2030
2031         case B_gs:
2032                 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2033                 break;
2034
2035         case B_su:
2036                 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2037                 break;
2038         }
2039
2040         nvfree(tv);
2041         return res;
2042 }
2043
2044 /*
2045  * Evaluate node - the heart of the program. Supplied with subtree
2046  * and place where to store result. returns ptr to result.
2047  */
2048 #define XC(n) ((n) >> 8)
2049
2050 static var *evaluate(node *op, var *res)
2051 {
2052         /* This procedure is recursive so we should count every byte */
2053         static var *fnargs = NULL;
2054         static unsigned seed = 1;
2055         static regex_t sreg;
2056         node *op1;
2057         var *v1;
2058         union {
2059                 var *v;
2060                 char *s;
2061                 double d;
2062                 int i;
2063         } L, R;
2064         uint32_t opinfo;
2065         short opn;
2066         union {
2067                 char *s;
2068                 rstream *rsm;
2069                 FILE *F;
2070                 var *v;
2071                 regex_t *re;
2072                 uint32_t info;
2073         } X;
2074
2075         if (! op)
2076                 return setvar_s(res, NULL);
2077
2078         v1 = nvalloc(2);
2079
2080         while (op) {
2081
2082                 opinfo = op->info;
2083                 opn = (short)(opinfo & OPNMASK);
2084                 lineno = op->lineno;
2085
2086                 /* execute inevitable things */
2087                 op1 = op->l.n;
2088                 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2089                 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2090                 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2091                 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2092                 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2093
2094                 switch (XC(opinfo & OPCLSMASK)) {
2095
2096                   /* -- iterative node type -- */
2097
2098                   /* test pattern */
2099                 case XC( OC_TEST ):
2100                         if ((op1->info & OPCLSMASK) == OC_COMMA) {
2101                                 /* it's range pattern */
2102                                 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2103                                         op->info |= OF_CHECKED;
2104                                         if (ptest(op1->r.n))
2105                                                 op->info &= ~OF_CHECKED;
2106
2107                                         op = op->a.n;
2108                                 } else {
2109                                         op = op->r.n;
2110                                 }
2111                         } else {
2112                                 op = (ptest(op1)) ? op->a.n : op->r.n;
2113                         }
2114                         break;
2115
2116                   /* just evaluate an expression, also used as unconditional jump */
2117                 case XC( OC_EXEC ):
2118                         break;
2119
2120                   /* branch, used in if-else and various loops */
2121                 case XC( OC_BR ):
2122                         op = istrue(L.v) ? op->a.n : op->r.n;
2123                         break;
2124
2125                   /* initialize for-in loop */
2126                 case XC( OC_WALKINIT ):
2127                         hashwalk_init(L.v, iamarray(R.v));
2128                         break;
2129
2130                   /* get next array item */
2131                 case XC( OC_WALKNEXT ):
2132                         op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2133                         break;
2134
2135                 case XC( OC_PRINT ):
2136                 case XC( OC_PRINTF ):
2137                         X.F = stdout;
2138                         if (op->r.n) {
2139                                 X.rsm = newfile(R.s);
2140                                 if (! X.rsm->F) {
2141                                         if (opn == '|') {
2142                                                 if((X.rsm->F = popen(R.s, "w")) == NULL)
2143                                                         bb_perror_msg_and_die("popen");
2144                                                 X.rsm->is_pipe = 1;
2145                                         } else {
2146                                                 X.rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2147                                         }
2148                                 }
2149                                 X.F = X.rsm->F;
2150                         }
2151
2152                         if ((opinfo & OPCLSMASK) == OC_PRINT) {
2153                                 if (! op1) {
2154                                         fputs(getvar_s(V[F0]), X.F);
2155                                 } else {
2156                                         while (op1) {
2157                                                 L.v = evaluate(nextarg(&op1), v1);
2158                                                 if (L.v->type & VF_NUMBER) {
2159                                                         fmt_num(buf, MAXVARFMT, getvar_s(V[OFMT]),
2160                                                                         getvar_i(L.v), TRUE);
2161                                                         fputs(buf, X.F);
2162                                                 } else {
2163                                                         fputs(getvar_s(L.v), X.F);
2164                                                 }
2165
2166                                                 if (op1) fputs(getvar_s(V[OFS]), X.F);
2167                                         }
2168                                 }
2169                                 fputs(getvar_s(V[ORS]), X.F);
2170
2171                         } else {        /* OC_PRINTF */
2172                                 L.s = awk_printf(op1);
2173                                 fputs(L.s, X.F);
2174                                 free(L.s);
2175                         }
2176                         fflush(X.F);
2177                         break;
2178
2179                 case XC( OC_DELETE ):
2180                         X.info = op1->info & OPCLSMASK;
2181                         if (X.info == OC_VAR) {
2182                                 R.v = op1->l.v;
2183                         } else if (X.info == OC_FNARG) {
2184                                 R.v = &fnargs[op1->l.i];
2185                         } else {
2186                                 runtime_error(EMSG_NOT_ARRAY);
2187                         }
2188
2189                         if (op1->r.n) {
2190                                 clrvar(L.v);
2191                                 L.s = getvar_s(evaluate(op1->r.n, v1));
2192                                 hash_remove(iamarray(R.v), L.s);
2193                         } else {
2194                                 clear_array(iamarray(R.v));
2195                         }
2196                         break;
2197
2198                 case XC( OC_NEWSOURCE ):
2199                         programname = op->l.s;
2200                         break;
2201
2202                 case XC( OC_RETURN ):
2203                         copyvar(res, L.v);
2204                         break;
2205
2206                 case XC( OC_NEXTFILE ):
2207                         nextfile = TRUE;
2208                 case XC( OC_NEXT ):
2209                         nextrec = TRUE;
2210                 case XC( OC_DONE ):
2211                         clrvar(res);
2212                         break;
2213
2214                 case XC( OC_EXIT ):
2215                         awk_exit(L.d);
2216
2217                   /* -- recursive node type -- */
2218
2219                 case XC( OC_VAR ):
2220                         L.v = op->l.v;
2221                         if (L.v == V[NF])
2222                                 split_f0();
2223                         goto v_cont;
2224
2225                 case XC( OC_FNARG ):
2226                         L.v = &fnargs[op->l.i];
2227
2228 v_cont:
2229                         res = (op->r.n) ? findvar(iamarray(L.v), R.s) : L.v;
2230                         break;
2231
2232                 case XC( OC_IN ):
2233                         setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2234                         break;
2235
2236                 case XC( OC_REGEXP ):
2237                         op1 = op;
2238                         L.s = getvar_s(V[F0]);
2239                         goto re_cont;
2240
2241                 case XC( OC_MATCH ):
2242                         op1 = op->r.n;
2243 re_cont:
2244                         X.re = as_regex(op1, &sreg);
2245                         R.i = regexec(X.re, L.s, 0, NULL, 0);
2246                         if (X.re == &sreg) regfree(X.re);
2247                         setvar_i(res, (R.i == 0 ? 1 : 0) ^ (opn == '!' ? 1 : 0));
2248                         break;
2249
2250                 case XC( OC_MOVE ):
2251                         /* if source is a temporary string, jusk relink it to dest */
2252                         if (R.v == v1+1 && R.v->string) {
2253                                 res = setvar_p(L.v, R.v->string);
2254                                 R.v->string = NULL;
2255                         } else {
2256                                 res = copyvar(L.v, R.v);
2257                         }
2258                         break;
2259
2260                 case XC( OC_TERNARY ):
2261                         if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2262                                 runtime_error(EMSG_POSSIBLE_ERROR);
2263                         res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2264                         break;
2265
2266                 case XC( OC_FUNC ):
2267                         if (! op->r.f->body.first)
2268                                 runtime_error(EMSG_UNDEF_FUNC);
2269
2270                         X.v = R.v = nvalloc(op->r.f->nargs+1);
2271                         while (op1) {
2272                                 L.v = evaluate(nextarg(&op1), v1);
2273                                 copyvar(R.v, L.v);
2274                                 R.v->type |= VF_CHILD;
2275                                 R.v->x.parent = L.v;
2276                                 if (++R.v - X.v >= op->r.f->nargs)
2277                                         break;
2278                         }
2279
2280                         R.v = fnargs;
2281                         fnargs = X.v;
2282
2283                         L.s = programname;
2284                         res = evaluate(op->r.f->body.first, res);
2285                         programname = L.s;
2286
2287                         nvfree(fnargs);
2288                         fnargs = R.v;
2289                         break;
2290
2291                 case XC( OC_GETLINE ):
2292                 case XC( OC_PGETLINE ):
2293                         if (op1) {
2294                                 X.rsm = newfile(L.s);
2295                                 if (! X.rsm->F) {
2296                                         if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2297                                                 X.rsm->F = popen(L.s, "r");
2298                                                 X.rsm->is_pipe = TRUE;
2299                                         } else {
2300                                                 X.rsm->F = fopen(L.s, "r");             /* not xfopen! */
2301                                         }
2302                                 }
2303                         } else {
2304                                 if (! iF) iF = next_input_file();
2305                                 X.rsm = iF;
2306                         }
2307
2308                         if (! X.rsm->F) {
2309                                 setvar_i(V[ERRNO], errno);
2310                                 setvar_i(res, -1);
2311                                 break;
2312                         }
2313
2314                         if (! op->r.n)
2315                                 R.v = V[F0];
2316
2317                         L.i = awk_getline(X.rsm, R.v);
2318                         if (L.i > 0) {
2319                                 if (! op1) {
2320                                         incvar(V[FNR]);
2321                                         incvar(V[NR]);
2322                                 }
2323                         }
2324                         setvar_i(res, L.i);
2325                         break;
2326
2327                   /* simple builtins */
2328                 case XC( OC_FBLTIN ):
2329                         switch (opn) {
2330
2331                         case F_in:
2332                                 R.d = (int)L.d;
2333                                 break;
2334
2335                         case F_rn:
2336                                 R.d = (double)rand() / (double)RAND_MAX;
2337                                 break;
2338
2339 #if ENABLE_FEATURE_AWK_MATH
2340                         case F_co:
2341                                 R.d = cos(L.d);
2342                                 break;
2343
2344                         case F_ex:
2345                                 R.d = exp(L.d);
2346                                 break;
2347
2348                         case F_lg:
2349                                 R.d = log(L.d);
2350                                 break;
2351
2352                         case F_si:
2353                                 R.d = sin(L.d);
2354                                 break;
2355
2356                         case F_sq:
2357                                 R.d = sqrt(L.d);
2358                                 break;
2359 #else
2360                         case F_co:
2361                         case F_ex:
2362                         case F_lg:
2363                         case F_si:
2364                         case F_sq:
2365                                 runtime_error(EMSG_NO_MATH);
2366                                 break;
2367 #endif
2368
2369                         case F_sr:
2370                                 R.d = (double)seed;
2371                                 seed = op1 ? (unsigned)L.d : (unsigned)time(NULL);
2372                                 srand(seed);
2373                                 break;
2374
2375                         case F_ti:
2376                                 R.d = time(NULL);
2377                                 break;
2378
2379                         case F_le:
2380                                 if (! op1)
2381                                         L.s = getvar_s(V[F0]);
2382                                 R.d = strlen(L.s);
2383                                 break;
2384
2385                         case F_sy:
2386                                 fflush(NULL);
2387                                 R.d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2388                                                 ? (system(L.s) >> 8) : 0;
2389                                 break;
2390
2391                         case F_ff:
2392                                 if (! op1)
2393                                         fflush(stdout);
2394                                 else {
2395                                         if (L.s && *L.s) {
2396                                                 X.rsm = newfile(L.s);
2397                                                 fflush(X.rsm->F);
2398                                         } else {
2399                                                 fflush(NULL);
2400                                         }
2401                                 }
2402                                 break;
2403
2404                         case F_cl:
2405                                 X.rsm = (rstream *)hash_search(fdhash, L.s);
2406                                 if (X.rsm) {
2407                                         R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
2408                                         free(X.rsm->buffer);
2409                                         hash_remove(fdhash, L.s);
2410                                 }
2411                                 if (R.i != 0)
2412                                         setvar_i(V[ERRNO], errno);
2413                                 R.d = (double)R.i;
2414                                 break;
2415                         }
2416                         setvar_i(res, R.d);
2417                         break;
2418
2419                 case XC( OC_BUILTIN ):
2420                         res = exec_builtin(op, res);
2421                         break;
2422
2423                 case XC( OC_SPRINTF ):
2424                         setvar_p(res, awk_printf(op1));
2425                         break;
2426
2427                 case XC( OC_UNARY ):
2428                         X.v = R.v;
2429                         L.d = R.d = getvar_i(R.v);
2430                         switch (opn) {
2431                         case 'P':
2432                                 L.d = ++R.d;
2433                                 goto r_op_change;
2434                         case 'p':
2435                                 R.d++;
2436                                 goto r_op_change;
2437                         case 'M':
2438                                 L.d = --R.d;
2439                                 goto r_op_change;
2440                         case 'm':
2441                                 R.d--;
2442                                 goto r_op_change;
2443                         case '!':
2444                                 L.d = istrue(X.v) ? 0 : 1;
2445                                 break;
2446                         case '-':
2447                                 L.d = -R.d;
2448                                 break;
2449  r_op_change:
2450                                 setvar_i(X.v, R.d);
2451                         }
2452                         setvar_i(res, L.d);
2453                         break;
2454
2455                 case XC( OC_FIELD ):
2456                         R.i = (int)getvar_i(R.v);
2457                         if (R.i == 0) {
2458                                 res = V[F0];
2459                         } else {
2460                                 split_f0();
2461                                 if (R.i > nfields)
2462                                         fsrealloc(R.i);
2463
2464                                 res = &Fields[R.i-1];
2465                         }
2466                         break;
2467
2468                   /* concatenation (" ") and index joining (",") */
2469                 case XC( OC_CONCAT ):
2470                 case XC( OC_COMMA ):
2471                         opn = strlen(L.s) + strlen(R.s) + 2;
2472                         X.s = xmalloc(opn);
2473                         strcpy(X.s, L.s);
2474                         if ((opinfo & OPCLSMASK) == OC_COMMA) {
2475                                 L.s = getvar_s(V[SUBSEP]);
2476                                 X.s = xrealloc(X.s, opn + strlen(L.s));
2477                                 strcat(X.s, L.s);
2478                         }
2479                         strcat(X.s, R.s);
2480                         setvar_p(res, X.s);
2481                         break;
2482
2483                 case XC( OC_LAND ):
2484                         setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2485                         break;
2486
2487                 case XC( OC_LOR ):
2488                         setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2489                         break;
2490
2491                 case XC( OC_BINARY ):
2492                 case XC( OC_REPLACE ):
2493                         R.d = getvar_i(R.v);
2494                         switch (opn) {
2495                         case '+':
2496                                 L.d += R.d;
2497                                 break;
2498                         case '-':
2499                                 L.d -= R.d;
2500                                 break;
2501                         case '*':
2502                                 L.d *= R.d;
2503                                 break;
2504                         case '/':
2505                                 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2506                                 L.d /= R.d;
2507                                 break;
2508                         case '&':
2509 #if ENABLE_FEATURE_AWK_MATH
2510                                 L.d = pow(L.d, R.d);
2511 #else
2512                                 runtime_error(EMSG_NO_MATH);
2513 #endif
2514                                 break;
2515                         case '%':
2516                                 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2517                                 L.d -= (int)(L.d / R.d) * R.d;
2518                                 break;
2519                         }
2520                         res = setvar_i(((opinfo&OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2521                         break;
2522
2523                 case XC( OC_COMPARE ):
2524                         if (is_numeric(L.v) && is_numeric(R.v)) {
2525                                 L.d = getvar_i(L.v) - getvar_i(R.v);
2526                         } else {
2527                                 L.s = getvar_s(L.v);
2528                                 R.s = getvar_s(R.v);
2529                                 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2530                         }
2531                         switch (opn & 0xfe) {
2532                         case 0:
2533                                 R.i = (L.d > 0);
2534                                 break;
2535                         case 2:
2536                                 R.i = (L.d >= 0);
2537                                 break;
2538                         case 4:
2539                                 R.i = (L.d == 0);
2540                                 break;
2541                         }
2542                         setvar_i(res, (opn & 0x1 ? R.i : !R.i) ? 1 : 0);
2543                         break;
2544
2545                 default:
2546                         runtime_error(EMSG_POSSIBLE_ERROR);
2547                 }
2548                 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2549                         op = op->a.n;
2550                 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2551                         break;
2552                 if (nextrec)
2553                         break;
2554         }
2555         nvfree(v1);
2556         return res;
2557 }
2558
2559
2560 /* -------- main & co. -------- */
2561
2562 static int awk_exit(int r)
2563 {
2564         var tv;
2565         unsigned i;
2566         hash_item *hi;
2567
2568         zero_out_var(&tv);
2569
2570         if (!exiting) {
2571                 exiting = TRUE;
2572                 nextrec = FALSE;
2573                 evaluate(endseq.first, &tv);
2574         }
2575
2576         /* waiting for children */
2577         for (i = 0; i < fdhash->csize; i++) {
2578                 hi = fdhash->items[i];
2579                 while (hi) {
2580                         if (hi->data.rs.F && hi->data.rs.is_pipe)
2581                                 pclose(hi->data.rs.F);
2582                         hi = hi->next;
2583                 }
2584         }
2585
2586         exit(r);
2587 }
2588
2589 /* if expr looks like "var=value", perform assignment and return 1,
2590  * otherwise return 0 */
2591 static int is_assignment(const char *expr)
2592 {
2593         char *exprc, *s, *s0, *s1;
2594
2595         exprc = xstrdup(expr);
2596         if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2597                 free(exprc);
2598                 return FALSE;
2599         }
2600
2601         *(s++) = '\0';
2602         s0 = s1 = s;
2603         while (*s)
2604                 *(s1++) = nextchar(&s);
2605
2606         *s1 = '\0';
2607         setvar_u(newvar(exprc), s0);
2608         free(exprc);
2609         return TRUE;
2610 }
2611
2612 /* switch to next input file */
2613 static rstream *next_input_file(void)
2614 {
2615         static rstream rsm;
2616         FILE *F = NULL;
2617         char *fname, *ind;
2618         static int files_happen = FALSE;
2619
2620         if (rsm.F) fclose(rsm.F);
2621         rsm.F = NULL;
2622         rsm.pos = rsm.adv = 0;
2623
2624         do {
2625                 if (getvar_i(V[ARGIND])+1 >= getvar_i(V[ARGC])) {
2626                         if (files_happen)
2627                                 return NULL;
2628                         fname = "-";
2629                         F = stdin;
2630                 } else {
2631                         ind = getvar_s(incvar(V[ARGIND]));
2632                         fname = getvar_s(findvar(iamarray(V[ARGV]), ind));
2633                         if (fname && *fname && !is_assignment(fname))
2634                                 F = afopen(fname, "r");
2635                 }
2636         } while (!F);
2637
2638         files_happen = TRUE;
2639         setvar_s(V[FILENAME], fname);
2640         rsm.F = F;
2641         return &rsm;
2642 }
2643
2644 int awk_main(int argc, char **argv)
2645 {
2646         unsigned opt;
2647         char *opt_F, *opt_v, *opt_W;
2648         int i, j, flen;
2649         var *v;
2650         var tv;
2651         char **envp;
2652         char *vnames = (char *)vNames; /* cheat */
2653         char *vvalues = (char *)vValues;
2654
2655         zero_out_var(&tv);
2656
2657         /* allocate global buffer */
2658         buf = xmalloc(MAXVARFMT + 1);
2659
2660         vhash = hash_init();
2661         ahash = hash_init();
2662         fdhash = hash_init();
2663         fnhash = hash_init();
2664
2665         /* initialize variables */
2666         for (i = 0; *vnames; i++) {
2667                 V[i] = v = newvar(nextword(&vnames));
2668                 if (*vvalues != '\377')
2669                         setvar_s(v, nextword(&vvalues));
2670                 else
2671                         setvar_i(v, 0);
2672
2673                 if (*vnames == '*') {
2674                         v->type |= VF_SPECIAL;
2675                         vnames++;
2676                 }
2677         }
2678
2679         handle_special(V[FS]);
2680         handle_special(V[RS]);
2681
2682         newfile("/dev/stdin")->F = stdin;
2683         newfile("/dev/stdout")->F = stdout;
2684         newfile("/dev/stderr")->F = stderr;
2685
2686         for (envp = environ; *envp; envp++) {
2687                 char *s = xstrdup(*envp);
2688                 char *s1 = strchr(s, '=');
2689                 if (s1) {
2690                         *s1++ = '\0';
2691                         setvar_u(findvar(iamarray(V[ENVIRON]), s), s1);
2692                 }
2693                 free(s);
2694         }
2695
2696         opt = getopt32(argc, argv, "F:v:f:W:", &opt_F, &opt_v, &programname, &opt_W);
2697         argv += optind;
2698         argc -= optind;
2699         if (opt & 0x1) setvar_s(V[FS], opt_F); // -F
2700         if (opt & 0x2) if (!is_assignment(opt_v)) bb_show_usage(); // -v
2701         if (opt & 0x4) { // -f
2702                 char *s = s; /* die, gcc, die */
2703                 FILE *from_file = afopen(programname, "r");
2704                 /* one byte is reserved for some trick in next_token */
2705                 if (fseek(from_file, 0, SEEK_END) == 0) {
2706                         flen = ftell(from_file);
2707                         s = xmalloc(flen + 4);
2708                         fseek(from_file, 0, SEEK_SET);
2709                         i = 1 + fread(s + 1, 1, flen, from_file);
2710                 } else {
2711                         for (i = j = 1; j > 0; i += j) {
2712                                 s = xrealloc(s, i + 4096);
2713                                 j = fread(s + i, 1, 4094, from_file);
2714                         }
2715                 }
2716                 s[i] = '\0';
2717                 fclose(from_file);
2718                 parse_program(s + 1);
2719                 free(s);
2720         } else { // no -f: take program from 1st parameter
2721                 if (!argc)
2722                         bb_show_usage();
2723                 programname = "cmd. line";
2724                 parse_program(*argv++);
2725                 argc--;
2726         }
2727         if (opt & 0x8) // -W
2728                 bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
2729
2730         /* fill in ARGV array */
2731         setvar_i(V[ARGC], argc + 1);
2732         setari_u(V[ARGV], 0, "awk");
2733         i = 0;
2734         while (*argv)
2735                 setari_u(V[ARGV], ++i, *argv++);
2736
2737         evaluate(beginseq.first, &tv);
2738         if (!mainseq.first && !endseq.first)
2739                 awk_exit(EXIT_SUCCESS);
2740
2741         /* input file could already be opened in BEGIN block */
2742         if (!iF) iF = next_input_file();
2743
2744         /* passing through input files */
2745         while (iF) {
2746                 nextfile = FALSE;
2747                 setvar_i(V[FNR], 0);
2748
2749                 while ((i = awk_getline(iF, V[F0])) > 0) {
2750                         nextrec = FALSE;
2751                         incvar(V[NR]);
2752                         incvar(V[FNR]);
2753                         evaluate(mainseq.first, &tv);
2754
2755                         if (nextfile)
2756                                 break;
2757                 }
2758
2759                 if (i < 0)
2760                         runtime_error(strerror(errno));
2761
2762                 iF = next_input_file();
2763         }
2764
2765         awk_exit(EXIT_SUCCESS);
2766         /*return 0;*/
2767 }