setlogcons, from Jan Kaszka.
[oweals/busybox.git] / editors / awk.c
1 /* vi: set sw=4 ts=4: */
2 /*
3  * awk implementation for busybox
4  *
5  * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
6  *
7  * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
8  */
9
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include <unistd.h>
13 #include <errno.h>
14 #include <string.h>
15 #include <time.h>
16 #include <math.h>
17 #include <ctype.h>
18 #include <getopt.h>
19
20 #include "xregex.h"
21 #include "busybox.h"
22
23
24 #define MAXVARFMT       240
25 #define MINNVBLOCK      64
26
27 /* variable flags */
28 #define VF_NUMBER       0x0001  /* 1 = primary type is number */
29 #define VF_ARRAY        0x0002  /* 1 = it's an array */
30
31 #define VF_CACHED       0x0100  /* 1 = num/str value has cached str/num eq */
32 #define VF_USER         0x0200  /* 1 = user input (may be numeric string) */
33 #define VF_SPECIAL      0x0400  /* 1 = requires extra handling when changed */
34 #define VF_WALK         0x0800  /* 1 = variable has alloc'd x.walker list */
35 #define VF_FSTR         0x1000  /* 1 = string points to fstring buffer */
36 #define VF_CHILD        0x2000  /* 1 = function arg; x.parent points to source */
37 #define VF_DIRTY        0x4000  /* 1 = variable was set explicitly */
38
39 /* these flags are static, don't change them when value is changed */
40 #define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
41
42 /* Variable */
43 typedef struct var_s {
44         unsigned short type;            /* flags */
45         double number;
46         char *string;
47         union {
48                 int aidx;                               /* func arg index (on compilation stage) */
49                 struct xhash_s *array;  /* array ptr */
50                 struct var_s *parent;   /* for func args, ptr to actual parameter */
51                 char **walker;                  /* list of array elements (for..in) */
52         } x;
53 } var;
54
55 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
56 typedef struct chain_s {
57         struct node_s *first;
58         struct node_s *last;
59         char *programname;
60 } chain;
61
62 /* Function */
63 typedef struct func_s {
64         unsigned short nargs;
65         struct chain_s body;
66 } func;
67
68 /* I/O stream */
69 typedef struct rstream_s {
70         FILE *F;
71         char *buffer;
72         int adv;
73         int size;
74         int pos;
75         unsigned short is_pipe;
76 } rstream;
77
78 typedef struct hash_item_s {
79         union {
80                 struct var_s v;                 /* variable/array hash */
81                 struct rstream_s rs;    /* redirect streams hash */
82                 struct func_s f;                /* functions hash */
83         } data;
84         struct hash_item_s *next;       /* next in chain */
85         char name[1];                           /* really it's longer */
86 } hash_item;
87
88 typedef struct xhash_s {
89         unsigned int nel;                                       /* num of elements */
90         unsigned int csize;                                     /* current hash size */
91         unsigned int nprime;                            /* next hash size in PRIMES[] */
92         unsigned int glen;                                      /* summary length of item names */
93         struct hash_item_s **items;
94 } xhash;
95
96 /* Tree node */
97 typedef struct node_s {
98         uint32_t info;
99         unsigned short lineno;
100         union {
101                 struct node_s *n;
102                 var *v;
103                 int i;
104                 char *s;
105                 regex_t *re;
106         } l;
107         union {
108                 struct node_s *n;
109                 regex_t *ire;
110                 func *f;
111                 int argno;
112         } r;
113         union {
114                 struct node_s *n;
115         } a;
116 } node;
117
118 /* Block of temporary variables */
119 typedef struct nvblock_s {
120         int size;
121         var *pos;
122         struct nvblock_s *prev;
123         struct nvblock_s *next;
124         var nv[0];
125 } nvblock;
126
127 typedef struct tsplitter_s {
128         node n;
129         regex_t re[2];
130 } tsplitter;
131
132 /* simple token classes */
133 /* Order and hex values are very important!!!  See next_token() */
134 #define TC_SEQSTART      1                              /* ( */
135 #define TC_SEQTERM      (1 << 1)                /* ) */
136 #define TC_REGEXP       (1 << 2)                /* /.../ */
137 #define TC_OUTRDR       (1 << 3)                /* | > >> */
138 #define TC_UOPPOST      (1 << 4)                /* unary postfix operator */
139 #define TC_UOPPRE1      (1 << 5)                /* unary prefix operator */
140 #define TC_BINOPX       (1 << 6)                /* two-opnd operator */
141 #define TC_IN           (1 << 7)
142 #define TC_COMMA        (1 << 8)
143 #define TC_PIPE         (1 << 9)                /* input redirection pipe */
144 #define TC_UOPPRE2      (1 << 10)               /* unary prefix operator */
145 #define TC_ARRTERM      (1 << 11)               /* ] */
146 #define TC_GRPSTART     (1 << 12)               /* { */
147 #define TC_GRPTERM      (1 << 13)               /* } */
148 #define TC_SEMICOL      (1 << 14)
149 #define TC_NEWLINE      (1 << 15)
150 #define TC_STATX        (1 << 16)               /* ctl statement (for, next...) */
151 #define TC_WHILE        (1 << 17)
152 #define TC_ELSE         (1 << 18)
153 #define TC_BUILTIN      (1 << 19)
154 #define TC_GETLINE      (1 << 20)
155 #define TC_FUNCDECL     (1 << 21)               /* `function' `func' */
156 #define TC_BEGIN        (1 << 22)
157 #define TC_END          (1 << 23)
158 #define TC_EOF          (1 << 24)
159 #define TC_VARIABLE     (1 << 25)
160 #define TC_ARRAY        (1 << 26)
161 #define TC_FUNCTION     (1 << 27)
162 #define TC_STRING       (1 << 28)
163 #define TC_NUMBER       (1 << 29)
164
165 #define TC_UOPPRE       (TC_UOPPRE1 | TC_UOPPRE2)
166
167 /* combined token classes */
168 #define TC_BINOP        (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
169 #define TC_UNARYOP      (TC_UOPPRE | TC_UOPPOST)
170 #define TC_OPERAND      (TC_VARIABLE | TC_ARRAY | TC_FUNCTION | \
171         TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
172
173 #define TC_STATEMNT     (TC_STATX | TC_WHILE)
174 #define TC_OPTERM       (TC_SEMICOL | TC_NEWLINE)
175
176 /* word tokens, cannot mean something else if not expected */
177 #define TC_WORD         (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN | \
178         TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
179
180 /* discard newlines after these */
181 #define TC_NOTERM       (TC_COMMA | TC_GRPSTART | TC_GRPTERM | \
182         TC_BINOP | TC_OPTERM)
183
184 /* what can expression begin with */
185 #define TC_OPSEQ        (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
186 /* what can group begin with */
187 #define TC_GRPSEQ       (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
188
189 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
190 /* operator is inserted between them */
191 #define TC_CONCAT1      (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM | \
192         TC_STRING | TC_NUMBER | TC_UOPPOST)
193 #define TC_CONCAT2      (TC_OPERAND | TC_UOPPRE)
194
195 #define OF_RES1         0x010000
196 #define OF_RES2         0x020000
197 #define OF_STR1         0x040000
198 #define OF_STR2         0x080000
199 #define OF_NUM1         0x100000
200 #define OF_CHECKED      0x200000
201
202 /* combined operator flags */
203 #define xx      0
204 #define xV      OF_RES2
205 #define xS      (OF_RES2 | OF_STR2)
206 #define Vx      OF_RES1
207 #define VV      (OF_RES1 | OF_RES2)
208 #define Nx      (OF_RES1 | OF_NUM1)
209 #define NV      (OF_RES1 | OF_NUM1 | OF_RES2)
210 #define Sx      (OF_RES1 | OF_STR1)
211 #define SV      (OF_RES1 | OF_STR1 | OF_RES2)
212 #define SS      (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
213
214 #define OPCLSMASK       0xFF00
215 #define OPNMASK         0x007F
216
217 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
218  * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
219  * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
220  */
221 #define P(x)    (x << 24)
222 #define PRIMASK         0x7F000000
223 #define PRIMASK2        0x7E000000
224
225 /* Operation classes */
226
227 #define SHIFT_TIL_THIS  0x0600
228 #define RECUR_FROM_THIS 0x1000
229
230 enum {
231         OC_DELETE=0x0100,       OC_EXEC=0x0200,         OC_NEWSOURCE=0x0300,
232         OC_PRINT=0x0400,        OC_PRINTF=0x0500,       OC_WALKINIT=0x0600,
233
234         OC_BR=0x0700,           OC_BREAK=0x0800,        OC_CONTINUE=0x0900,
235         OC_EXIT=0x0a00,         OC_NEXT=0x0b00,         OC_NEXTFILE=0x0c00,
236         OC_TEST=0x0d00,         OC_WALKNEXT=0x0e00,
237
238         OC_BINARY=0x1000,       OC_BUILTIN=0x1100,      OC_COLON=0x1200,
239         OC_COMMA=0x1300,        OC_COMPARE=0x1400,      OC_CONCAT=0x1500,
240         OC_FBLTIN=0x1600,       OC_FIELD=0x1700,        OC_FNARG=0x1800,
241         OC_FUNC=0x1900,         OC_GETLINE=0x1a00,      OC_IN=0x1b00,
242         OC_LAND=0x1c00,         OC_LOR=0x1d00,          OC_MATCH=0x1e00,
243         OC_MOVE=0x1f00,         OC_PGETLINE=0x2000,     OC_REGEXP=0x2100,
244         OC_REPLACE=0x2200,      OC_RETURN=0x2300,       OC_SPRINTF=0x2400,
245         OC_TERNARY=0x2500,      OC_UNARY=0x2600,        OC_VAR=0x2700,
246         OC_DONE=0x2800,
247
248         ST_IF=0x3000,           ST_DO=0x3100,           ST_FOR=0x3200,
249         ST_WHILE=0x3300
250 };
251
252 /* simple builtins */
253 enum {
254         F_in=0, F_rn,   F_co,   F_ex,   F_lg,   F_si,   F_sq,   F_sr,
255         F_ti,   F_le,   F_sy,   F_ff,   F_cl
256 };
257
258 /* builtins */
259 enum {
260         B_a2=0, B_ix,   B_ma,   B_sp,   B_ss,   B_ti,   B_lo,   B_up,
261         B_ge,   B_gs,   B_su
262 };
263
264 /* tokens and their corresponding info values */
265
266 #define NTC             "\377"          /* switch to next token class (tc<<1) */
267 #define NTCC    '\377'
268
269 #define OC_B    OC_BUILTIN
270
271 static char * const tokenlist =
272         "\1("           NTC
273         "\1)"           NTC
274         "\1/"           NTC                                                                     /* REGEXP */
275         "\2>>"          "\1>"           "\1|"           NTC                     /* OUTRDR */
276         "\2++"          "\2--"          NTC                                             /* UOPPOST */
277         "\2++"          "\2--"          "\1$"           NTC                     /* UOPPRE1 */
278         "\2=="          "\1="           "\2+="          "\2-="          /* BINOPX */
279         "\2*="          "\2/="          "\2%="          "\2^="
280         "\1+"           "\1-"           "\3**="         "\2**"
281         "\1/"           "\1%"           "\1^"           "\1*"
282         "\2!="          "\2>="          "\2<="          "\1>"
283         "\1<"           "\2!~"          "\1~"           "\2&&"
284         "\2||"          "\1?"           "\1:"           NTC
285         "\2in"          NTC
286         "\1,"           NTC
287         "\1|"           NTC
288         "\1+"           "\1-"           "\1!"           NTC                     /* UOPPRE2 */
289         "\1]"           NTC
290         "\1{"           NTC
291         "\1}"           NTC
292         "\1;"           NTC
293         "\1\n"          NTC
294         "\2if"          "\2do"          "\3for"         "\5break"       /* STATX */
295         "\10continue"                   "\6delete"      "\5print"
296         "\6printf"      "\4next"        "\10nextfile"
297         "\6return"      "\4exit"        NTC
298         "\5while"       NTC
299         "\4else"        NTC
300
301         "\5close"       "\6system"      "\6fflush"      "\5atan2"       /* BUILTIN */
302         "\3cos"         "\3exp"         "\3int"         "\3log"
303         "\4rand"        "\3sin"         "\4sqrt"        "\5srand"
304         "\6gensub"      "\4gsub"        "\5index"       "\6length"
305         "\5match"       "\5split"       "\7sprintf"     "\3sub"
306         "\6substr"      "\7systime"     "\10strftime"
307         "\7tolower"     "\7toupper"     NTC
308         "\7getline"     NTC
309         "\4func"        "\10function"   NTC
310         "\5BEGIN"       NTC
311         "\3END"         "\0"
312         ;
313
314 static const uint32_t tokeninfo[] = {
315
316         0,
317         0,
318         OC_REGEXP,
319         xS|'a',         xS|'w',         xS|'|',
320         OC_UNARY|xV|P(9)|'p',           OC_UNARY|xV|P(9)|'m',
321         OC_UNARY|xV|P(9)|'P',           OC_UNARY|xV|P(9)|'M',
322                 OC_FIELD|xV|P(5),
323         OC_COMPARE|VV|P(39)|5,          OC_MOVE|VV|P(74),
324                 OC_REPLACE|NV|P(74)|'+',        OC_REPLACE|NV|P(74)|'-',
325         OC_REPLACE|NV|P(74)|'*',        OC_REPLACE|NV|P(74)|'/',
326                 OC_REPLACE|NV|P(74)|'%',        OC_REPLACE|NV|P(74)|'&',
327         OC_BINARY|NV|P(29)|'+',         OC_BINARY|NV|P(29)|'-',
328                 OC_REPLACE|NV|P(74)|'&',        OC_BINARY|NV|P(15)|'&',
329         OC_BINARY|NV|P(25)|'/',         OC_BINARY|NV|P(25)|'%',
330                 OC_BINARY|NV|P(15)|'&',         OC_BINARY|NV|P(25)|'*',
331         OC_COMPARE|VV|P(39)|4,          OC_COMPARE|VV|P(39)|3,
332                 OC_COMPARE|VV|P(39)|0,          OC_COMPARE|VV|P(39)|1,
333         OC_COMPARE|VV|P(39)|2,          OC_MATCH|Sx|P(45)|'!',
334                 OC_MATCH|Sx|P(45)|'~',          OC_LAND|Vx|P(55),
335         OC_LOR|Vx|P(59),                        OC_TERNARY|Vx|P(64)|'?',
336                 OC_COLON|xx|P(67)|':',
337         OC_IN|SV|P(49),
338         OC_COMMA|SS|P(80),
339         OC_PGETLINE|SV|P(37),
340         OC_UNARY|xV|P(19)|'+',          OC_UNARY|xV|P(19)|'-',
341                 OC_UNARY|xV|P(19)|'!',
342         0,
343         0,
344         0,
345         0,
346         0,
347         ST_IF,                  ST_DO,                  ST_FOR,                 OC_BREAK,
348         OC_CONTINUE,                                    OC_DELETE|Vx,   OC_PRINT,
349         OC_PRINTF,              OC_NEXT,                OC_NEXTFILE,
350         OC_RETURN|Vx,   OC_EXIT|Nx,
351         ST_WHILE,
352         0,
353
354         OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
355         OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
356         OC_FBLTIN|F_rn,    OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
357         OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
358         OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF,        OC_B|B_su|P(0xb6),
359         OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti,    OC_B|B_ti|P(0x0b),
360         OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
361         OC_GETLINE|SV|P(0),
362         0,      0,
363         0,
364         0
365 };
366
367 /* internal variable names and their initial values       */
368 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
369 enum {
370         CONVFMT=0,      OFMT,           FS,                     OFS,
371         ORS,            RS,                     RT,                     FILENAME,
372         SUBSEP,         ARGIND,         ARGC,           ARGV,
373         ERRNO,          FNR,
374         NR,                     NF,                     IGNORECASE,
375         ENVIRON,        F0,                     _intvarcount_
376 };
377
378 static char * vNames =
379         "CONVFMT\0"     "OFMT\0"        "FS\0*"         "OFS\0"
380         "ORS\0"         "RS\0*"         "RT\0"          "FILENAME\0"
381         "SUBSEP\0"      "ARGIND\0"      "ARGC\0"        "ARGV\0"
382         "ERRNO\0"       "FNR\0"
383         "NR\0"          "NF\0*"         "IGNORECASE\0*"
384         "ENVIRON\0"     "$\0*"          "\0";
385
386 static char * vValues =
387         "%.6g\0"        "%.6g\0"        " \0"           " \0"
388         "\n\0"          "\n\0"          "\0"            "\0"
389         "\034\0"
390         "\377";
391
392 /* hash size may grow to these values */
393 #define FIRST_PRIME 61;
394 static const unsigned int PRIMES[] = { 251, 1021, 4093, 16381, 65521 };
395 enum { NPRIMES = sizeof(PRIMES) / sizeof(unsigned int) };
396
397 /* globals */
398
399 extern char **environ;
400
401 static var * V[_intvarcount_];
402 static chain beginseq, mainseq, endseq, *seq;
403 static int nextrec, nextfile;
404 static node *break_ptr, *continue_ptr;
405 static rstream *iF;
406 static xhash *vhash, *ahash, *fdhash, *fnhash;
407 static char *programname;
408 static short lineno;
409 static int is_f0_split;
410 static int nfields;
411 static var *Fields;
412 static tsplitter fsplitter, rsplitter;
413 static nvblock *cb;
414 static char *pos;
415 static char *buf;
416 static int icase;
417 static int exiting;
418
419 static struct {
420         uint32_t tclass;
421         uint32_t info;
422         char *string;
423         double number;
424         short lineno;
425         int rollback;
426 } t;
427
428 /* function prototypes */
429 static void handle_special(var *);
430 static node *parse_expr(uint32_t);
431 static void chain_group(void);
432 static var *evaluate(node *, var *);
433 static rstream *next_input_file(void);
434 static int fmt_num(char *, int, const char *, double, int);
435 static int awk_exit(int) ATTRIBUTE_NORETURN;
436
437 /* ---- error handling ---- */
438
439 static const char EMSG_INTERNAL_ERROR[] = "Internal error";
440 static const char EMSG_UNEXP_EOS[] = "Unexpected end of string";
441 static const char EMSG_UNEXP_TOKEN[] = "Unexpected token";
442 static const char EMSG_DIV_BY_ZERO[] = "Division by zero";
443 static const char EMSG_INV_FMT[] = "Invalid format specifier";
444 static const char EMSG_TOO_FEW_ARGS[] = "Too few arguments for builtin";
445 static const char EMSG_NOT_ARRAY[] = "Not an array";
446 static const char EMSG_POSSIBLE_ERROR[] = "Possible syntax error";
447 static const char EMSG_UNDEF_FUNC[] = "Call to undefined function";
448 #ifndef CONFIG_FEATURE_AWK_MATH
449 static const char EMSG_NO_MATH[] = "Math support is not compiled in";
450 #endif
451
452 static void syntax_error(const char * const message) ATTRIBUTE_NORETURN;
453 static void syntax_error(const char * const message)
454 {
455         bb_error_msg_and_die("%s:%i: %s", programname, lineno, message);
456 }
457
458 #define runtime_error(x) syntax_error(x)
459
460
461 /* ---- hash stuff ---- */
462
463 static unsigned int hashidx(const char *name)
464 {
465         register unsigned int idx=0;
466
467         while (*name)  idx = *name++ + (idx << 6) - idx;
468         return idx;
469 }
470
471 /* create new hash */
472 static xhash *hash_init(void)
473 {
474         xhash *newhash;
475
476         newhash = (xhash *)xcalloc(1, sizeof(xhash));
477         newhash->csize = FIRST_PRIME;
478         newhash->items = (hash_item **)xcalloc(newhash->csize, sizeof(hash_item *));
479
480         return newhash;
481 }
482
483 /* find item in hash, return ptr to data, NULL if not found */
484 static void *hash_search(xhash *hash, const char *name)
485 {
486         hash_item *hi;
487
488         hi = hash->items [ hashidx(name) % hash->csize ];
489         while (hi) {
490                 if (strcmp(hi->name, name) == 0)
491                         return &(hi->data);
492                 hi = hi->next;
493         }
494         return NULL;
495 }
496
497 /* grow hash if it becomes too big */
498 static void hash_rebuild(xhash *hash)
499 {
500         unsigned int newsize, i, idx;
501         hash_item **newitems, *hi, *thi;
502
503         if (hash->nprime == NPRIMES)
504                 return;
505
506         newsize = PRIMES[hash->nprime++];
507         newitems = (hash_item **)xcalloc(newsize, sizeof(hash_item *));
508
509         for (i=0; i<hash->csize; i++) {
510                 hi = hash->items[i];
511                 while (hi) {
512                         thi = hi;
513                         hi = thi->next;
514                         idx = hashidx(thi->name) % newsize;
515                         thi->next = newitems[idx];
516                         newitems[idx] = thi;
517                 }
518         }
519
520         free(hash->items);
521         hash->csize = newsize;
522         hash->items = newitems;
523 }
524
525 /* find item in hash, add it if necessary. Return ptr to data */
526 static void *hash_find(xhash *hash, const char *name)
527 {
528         hash_item *hi;
529         unsigned int idx;
530         int l;
531
532         hi = hash_search(hash, name);
533         if (! hi) {
534                 if (++hash->nel / hash->csize > 10)
535                         hash_rebuild(hash);
536
537                 l = bb_strlen(name) + 1;
538                 hi = xcalloc(sizeof(hash_item) + l, 1);
539                 memcpy(hi->name, name, l);
540
541                 idx = hashidx(name) % hash->csize;
542                 hi->next = hash->items[idx];
543                 hash->items[idx] = hi;
544                 hash->glen += l;
545         }
546         return &(hi->data);
547 }
548
549 #define findvar(hash, name) (var *) hash_find ( (hash) , (name) )
550 #define newvar(name) (var *) hash_find ( vhash , (name) )
551 #define newfile(name) (rstream *) hash_find ( fdhash , (name) )
552 #define newfunc(name) (func *) hash_find ( fnhash , (name) )
553
554 static void hash_remove(xhash *hash, const char *name)
555 {
556         hash_item *hi, **phi;
557
558         phi = &(hash->items[ hashidx(name) % hash->csize ]);
559         while (*phi) {
560                 hi = *phi;
561                 if (strcmp(hi->name, name) == 0) {
562                         hash->glen -= (bb_strlen(name) + 1);
563                         hash->nel--;
564                         *phi = hi->next;
565                         free(hi);
566                         break;
567                 }
568                 phi = &(hi->next);
569         }
570 }
571
572 /* ------ some useful functions ------ */
573
574 static void skip_spaces(char **s)
575 {
576         register char *p = *s;
577
578         while(*p == ' ' || *p == '\t' ||
579                         (*p == '\\' && *(p+1) == '\n' && (++p, ++t.lineno))) {
580                 p++;
581         }
582         *s = p;
583 }
584
585 static char *nextword(char **s)
586 {
587         register char *p = *s;
588
589         while (*(*s)++) ;
590
591         return p;
592 }
593
594 static char nextchar(char **s)
595 {
596         register char c, *pps;
597
598         c = *((*s)++);
599         pps = *s;
600         if (c == '\\') c = bb_process_escape_sequence((const char**)s);
601         if (c == '\\' && *s == pps) c = *((*s)++);
602         return c;
603 }
604
605 static inline int isalnum_(int c)
606 {
607         return (isalnum(c) || c == '_');
608 }
609
610 static FILE *afopen(const char *path, const char *mode)
611 {
612         return (*path == '-' && *(path+1) == '\0') ? stdin : bb_xfopen(path, mode);
613 }
614
615 /* -------- working with variables (set/get/copy/etc) -------- */
616
617 static xhash *iamarray(var *v)
618 {
619         var *a = v;
620
621         while (a->type & VF_CHILD)
622                 a = a->x.parent;
623
624         if (! (a->type & VF_ARRAY)) {
625                 a->type |= VF_ARRAY;
626                 a->x.array = hash_init();
627         }
628         return a->x.array;
629 }
630
631 static void clear_array(xhash *array)
632 {
633         unsigned int i;
634         hash_item *hi, *thi;
635
636         for (i=0; i<array->csize; i++) {
637                 hi = array->items[i];
638                 while (hi) {
639                         thi = hi;
640                         hi = hi->next;
641                         free(thi->data.v.string);
642                         free(thi);
643                 }
644                 array->items[i] = NULL;
645         }
646         array->glen = array->nel = 0;
647 }
648
649 /* clear a variable */
650 static var *clrvar(var *v)
651 {
652         if (!(v->type & VF_FSTR))
653                 free(v->string);
654
655         v->type &= VF_DONTTOUCH;
656         v->type |= VF_DIRTY;
657         v->string = NULL;
658         return v;
659 }
660
661 /* assign string value to variable */
662 static var *setvar_p(var *v, char *value)
663 {
664         clrvar(v);
665         v->string = value;
666         handle_special(v);
667
668         return v;
669 }
670
671 /* same as setvar_p but make a copy of string */
672 static var *setvar_s(var *v, const char *value)
673 {
674         return setvar_p(v, (value && *value) ? bb_xstrdup(value) : NULL);
675 }
676
677 /* same as setvar_s but set USER flag */
678 static var *setvar_u(var *v, const char *value)
679 {
680         setvar_s(v, value);
681         v->type |= VF_USER;
682         return v;
683 }
684
685 /* set array element to user string */
686 static void setari_u(var *a, int idx, const char *s)
687 {
688         register var *v;
689         static char sidx[12];
690
691         sprintf(sidx, "%d", idx);
692         v = findvar(iamarray(a), sidx);
693         setvar_u(v, s);
694 }
695
696 /* assign numeric value to variable */
697 static var *setvar_i(var *v, double value)
698 {
699         clrvar(v);
700         v->type |= VF_NUMBER;
701         v->number = value;
702         handle_special(v);
703         return v;
704 }
705
706 static char *getvar_s(var *v)
707 {
708         /* if v is numeric and has no cached string, convert it to string */
709         if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
710                 fmt_num(buf, MAXVARFMT, getvar_s(V[CONVFMT]), v->number, TRUE);
711                 v->string = bb_xstrdup(buf);
712                 v->type |= VF_CACHED;
713         }
714         return (v->string == NULL) ? "" : v->string;
715 }
716
717 static double getvar_i(var *v)
718 {
719         char *s;
720
721         if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
722                 v->number = 0;
723                 s = v->string;
724                 if (s && *s) {
725                         v->number = strtod(s, &s);
726                         if (v->type & VF_USER) {
727                                 skip_spaces(&s);
728                                 if (*s != '\0')
729                                         v->type &= ~VF_USER;
730                         }
731                 } else {
732                         v->type &= ~VF_USER;
733                 }
734                 v->type |= VF_CACHED;
735         }
736         return v->number;
737 }
738
739 static var *copyvar(var *dest, const var *src)
740 {
741         if (dest != src) {
742                 clrvar(dest);
743                 dest->type |= (src->type & ~VF_DONTTOUCH);
744                 dest->number = src->number;
745                 if (src->string)
746                         dest->string = bb_xstrdup(src->string);
747         }
748         handle_special(dest);
749         return dest;
750 }
751
752 static var *incvar(var *v)
753 {
754         return setvar_i(v, getvar_i(v)+1.);
755 }
756
757 /* return true if v is number or numeric string */
758 static int is_numeric(var *v)
759 {
760         getvar_i(v);
761         return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
762 }
763
764 /* return 1 when value of v corresponds to true, 0 otherwise */
765 static int istrue(var *v)
766 {
767         if (is_numeric(v))
768                 return (v->number == 0) ? 0 : 1;
769         else
770                 return (v->string && *(v->string)) ? 1 : 0;
771 }
772
773 /* temporary variables allocator. Last allocated should be first freed */
774 static var *nvalloc(int n)
775 {
776         nvblock *pb = NULL;
777         var *v, *r;
778         int size;
779
780         while (cb) {
781                 pb = cb;
782                 if ((cb->pos - cb->nv) + n <= cb->size) break;
783                 cb = cb->next;
784         }
785
786         if (! cb) {
787                 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
788                 cb = (nvblock *)xmalloc(sizeof(nvblock) + size * sizeof(var));
789                 cb->size = size;
790                 cb->pos = cb->nv;
791                 cb->prev = pb;
792                 cb->next = NULL;
793                 if (pb) pb->next = cb;
794         }
795
796         v = r = cb->pos;
797         cb->pos += n;
798
799         while (v < cb->pos) {
800                 v->type = 0;
801                 v->string = NULL;
802                 v++;
803         }
804
805         return r;
806 }
807
808 static void nvfree(var *v)
809 {
810         var *p;
811
812         if (v < cb->nv || v >= cb->pos)
813                 runtime_error(EMSG_INTERNAL_ERROR);
814
815         for (p=v; p<cb->pos; p++) {
816                 if ((p->type & (VF_ARRAY|VF_CHILD)) == VF_ARRAY) {
817                         clear_array(iamarray(p));
818                         free(p->x.array->items);
819                         free(p->x.array);
820                 }
821                 if (p->type & VF_WALK)
822                         free(p->x.walker);
823
824                 clrvar(p);
825         }
826
827         cb->pos = v;
828         while (cb->prev && cb->pos == cb->nv) {
829                 cb = cb->prev;
830         }
831 }
832
833 /* ------- awk program text parsing ------- */
834
835 /* Parse next token pointed by global pos, place results into global t.
836  * If token isn't expected, give away. Return token class
837  */
838 static uint32_t next_token(uint32_t expected)
839 {
840         char *p, *pp, *s;
841         char *tl;
842         uint32_t tc;
843         const uint32_t *ti;
844         int l;
845         static int concat_inserted;
846         static uint32_t save_tclass, save_info;
847         static uint32_t ltclass = TC_OPTERM;
848
849         if (t.rollback) {
850
851                 t.rollback = FALSE;
852
853         } else if (concat_inserted) {
854
855                 concat_inserted = FALSE;
856                 t.tclass = save_tclass;
857                 t.info = save_info;
858
859         } else {
860
861                 p = pos;
862
863         readnext:
864                 skip_spaces(&p);
865                 lineno = t.lineno;
866                 if (*p == '#')
867                         while (*p != '\n' && *p != '\0') p++;
868
869                 if (*p == '\n')
870                         t.lineno++;
871
872                 if (*p == '\0') {
873                         tc = TC_EOF;
874
875                 } else if (*p == '\"') {
876                         /* it's a string */
877                         t.string = s = ++p;
878                         while (*p != '\"') {
879                                 if (*p == '\0' || *p == '\n')
880                                         syntax_error(EMSG_UNEXP_EOS);
881                                 *(s++) = nextchar(&p);
882                         }
883                         p++;
884                         *s = '\0';
885                         tc = TC_STRING;
886
887                 } else if ((expected & TC_REGEXP) && *p == '/') {
888                         /* it's regexp */
889                         t.string = s = ++p;
890                         while (*p != '/') {
891                                 if (*p == '\0' || *p == '\n')
892                                         syntax_error(EMSG_UNEXP_EOS);
893                                 if ((*s++ = *p++) == '\\') {
894                                         pp = p;
895                                         *(s-1) = bb_process_escape_sequence((const char **)&p);
896                                         if (*pp == '\\') *s++ = '\\';
897                                         if (p == pp) *s++ = *p++;
898                                 }
899                         }
900                         p++;
901                         *s = '\0';
902                         tc = TC_REGEXP;
903
904                 } else if (*p == '.' || isdigit(*p)) {
905                         /* it's a number */
906                         t.number = strtod(p, &p);
907                         if (*p == '.')
908                                 syntax_error(EMSG_UNEXP_TOKEN);
909                         tc = TC_NUMBER;
910
911                 } else {
912                         /* search for something known */
913                         tl = tokenlist;
914                         tc = 0x00000001;
915                         ti = tokeninfo;
916                         while (*tl) {
917                                 l = *(tl++);
918                                 if (l == NTCC) {
919                                         tc <<= 1;
920                                         continue;
921                                 }
922                                 /* if token class is expected, token
923                                  * matches and it's not a longer word,
924                                  * then this is what we are looking for
925                                  */
926                                 if ((tc & (expected | TC_WORD | TC_NEWLINE)) &&
927                                 *tl == *p && strncmp(p, tl, l) == 0 &&
928                                 !((tc & TC_WORD) && isalnum_(*(p + l)))) {
929                                         t.info = *ti;
930                                         p += l;
931                                         break;
932                                 }
933                                 ti++;
934                                 tl += l;
935                         }
936
937                         if (! *tl) {
938                                 /* it's a name (var/array/function),
939                                  * otherwise it's something wrong
940                                  */
941                                 if (! isalnum_(*p))
942                                         syntax_error(EMSG_UNEXP_TOKEN);
943
944                                 t.string = --p;
945                                 while(isalnum_(*(++p))) {
946                                         *(p-1) = *p;
947                                 }
948                                 *(p-1) = '\0';
949                                 tc = TC_VARIABLE;
950                                 /* also consume whitespace between functionname and bracket */
951                                 if (! (expected & TC_VARIABLE)) skip_spaces(&p);
952                                 if (*p == '(') {
953                                         tc = TC_FUNCTION;
954                                 } else {
955                                         if (*p == '[') {
956                                                 p++;
957                                                 tc = TC_ARRAY;
958                                         }
959                                 }
960                         }
961                 }
962                 pos = p;
963
964                 /* skipping newlines in some cases */
965                 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
966                         goto readnext;
967
968                 /* insert concatenation operator when needed */
969                 if ((ltclass&TC_CONCAT1) && (tc&TC_CONCAT2) && (expected&TC_BINOP)) {
970                         concat_inserted = TRUE;
971                         save_tclass = tc;
972                         save_info = t.info;
973                         tc = TC_BINOP;
974                         t.info = OC_CONCAT | SS | P(35);
975                 }
976
977                 t.tclass = tc;
978         }
979         ltclass = t.tclass;
980
981         /* Are we ready for this? */
982         if (! (ltclass & expected))
983                 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
984                                                                 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
985
986         return ltclass;
987 }
988
989 static void rollback_token(void) { t.rollback = TRUE; }
990
991 static node *new_node(uint32_t info)
992 {
993         register node *n;
994
995         n = (node *)xcalloc(sizeof(node), 1);
996         n->info = info;
997         n->lineno = lineno;
998         return n;
999 }
1000
1001 static node *mk_re_node(char *s, node *n, regex_t *re)
1002 {
1003         n->info = OC_REGEXP;
1004         n->l.re = re;
1005         n->r.ire = re + 1;
1006         xregcomp(re, s, REG_EXTENDED);
1007         xregcomp(re+1, s, REG_EXTENDED | REG_ICASE);
1008
1009         return n;
1010 }
1011
1012 static node *condition(void)
1013 {
1014         next_token(TC_SEQSTART);
1015         return parse_expr(TC_SEQTERM);
1016 }
1017
1018 /* parse expression terminated by given argument, return ptr
1019  * to built subtree. Terminator is eaten by parse_expr */
1020 static node *parse_expr(uint32_t iexp)
1021 {
1022         node sn;
1023         node *cn = &sn;
1024         node *vn, *glptr;
1025         uint32_t tc, xtc;
1026         var *v;
1027
1028         sn.info = PRIMASK;
1029         sn.r.n = glptr = NULL;
1030         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1031
1032         while (! ((tc = next_token(xtc)) & iexp)) {
1033                 if (glptr && (t.info == (OC_COMPARE|VV|P(39)|2))) {
1034                         /* input redirection (<) attached to glptr node */
1035                         cn = glptr->l.n = new_node(OC_CONCAT|SS|P(37));
1036                         cn->a.n = glptr;
1037                         xtc = TC_OPERAND | TC_UOPPRE;
1038                         glptr = NULL;
1039
1040                 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1041                         /* for binary and postfix-unary operators, jump back over
1042                          * previous operators with higher priority */
1043                         vn = cn;
1044                         while ( ((t.info & PRIMASK) > (vn->a.n->info & PRIMASK2)) ||
1045                           ((t.info == vn->info) && ((t.info & OPCLSMASK) == OC_COLON)) )
1046                                 vn = vn->a.n;
1047                         if ((t.info & OPCLSMASK) == OC_TERNARY)
1048                                 t.info += P(6);
1049                         cn = vn->a.n->r.n = new_node(t.info);
1050                         cn->a.n = vn->a.n;
1051                         if (tc & TC_BINOP) {
1052                                 cn->l.n = vn;
1053                                 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1054                                 if ((t.info & OPCLSMASK) == OC_PGETLINE) {
1055                                         /* it's a pipe */
1056                                         next_token(TC_GETLINE);
1057                                         /* give maximum priority to this pipe */
1058                                         cn->info &= ~PRIMASK;
1059                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1060                                 }
1061                         } else {
1062                                 cn->r.n = vn;
1063                                 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1064                         }
1065                         vn->a.n = cn;
1066
1067                 } else {
1068                         /* for operands and prefix-unary operators, attach them
1069                          * to last node */
1070                         vn = cn;
1071                         cn = vn->r.n = new_node(t.info);
1072                         cn->a.n = vn;
1073                         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1074                         if (tc & (TC_OPERAND | TC_REGEXP)) {
1075                                 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1076                                 /* one should be very careful with switch on tclass -
1077                                  * only simple tclasses should be used! */
1078                                 switch (tc) {
1079                                   case TC_VARIABLE:
1080                                   case TC_ARRAY:
1081                                         cn->info = OC_VAR;
1082                                         if ((v = hash_search(ahash, t.string)) != NULL) {
1083                                                 cn->info = OC_FNARG;
1084                                                 cn->l.i = v->x.aidx;
1085                                         } else {
1086                                                 cn->l.v = newvar(t.string);
1087                                         }
1088                                         if (tc & TC_ARRAY) {
1089                                                 cn->info |= xS;
1090                                                 cn->r.n = parse_expr(TC_ARRTERM);
1091                                         }
1092                                         break;
1093
1094                                   case TC_NUMBER:
1095                                   case TC_STRING:
1096                                         cn->info = OC_VAR;
1097                                         v = cn->l.v = xcalloc(sizeof(var), 1);
1098                                         if (tc & TC_NUMBER)
1099                                                 setvar_i(v, t.number);
1100                                         else
1101                                                 setvar_s(v, t.string);
1102                                         break;
1103
1104                                   case TC_REGEXP:
1105                                         mk_re_node(t.string, cn,
1106                                                                         (regex_t *)xcalloc(sizeof(regex_t),2));
1107                                         break;
1108
1109                                   case TC_FUNCTION:
1110                                         cn->info = OC_FUNC;
1111                                         cn->r.f = newfunc(t.string);
1112                                         cn->l.n = condition();
1113                                         break;
1114
1115                                   case TC_SEQSTART:
1116                                         cn = vn->r.n = parse_expr(TC_SEQTERM);
1117                                         cn->a.n = vn;
1118                                         break;
1119
1120                                   case TC_GETLINE:
1121                                         glptr = cn;
1122                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1123                                         break;
1124
1125                                   case TC_BUILTIN:
1126                                         cn->l.n = condition();
1127                                         break;
1128                                 }
1129                         }
1130                 }
1131         }
1132         return sn.r.n;
1133 }
1134
1135 /* add node to chain. Return ptr to alloc'd node */
1136 static node *chain_node(uint32_t info)
1137 {
1138         register node *n;
1139
1140         if (! seq->first)
1141                 seq->first = seq->last = new_node(0);
1142
1143         if (seq->programname != programname) {
1144                 seq->programname = programname;
1145                 n = chain_node(OC_NEWSOURCE);
1146                 n->l.s = bb_xstrdup(programname);
1147         }
1148
1149         n = seq->last;
1150         n->info = info;
1151         seq->last = n->a.n = new_node(OC_DONE);
1152
1153         return n;
1154 }
1155
1156 static void chain_expr(uint32_t info)
1157 {
1158         node *n;
1159
1160         n = chain_node(info);
1161         n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1162         if (t.tclass & TC_GRPTERM)
1163                 rollback_token();
1164 }
1165
1166 static node *chain_loop(node *nn)
1167 {
1168         node *n, *n2, *save_brk, *save_cont;
1169
1170         save_brk = break_ptr;
1171         save_cont = continue_ptr;
1172
1173         n = chain_node(OC_BR | Vx);
1174         continue_ptr = new_node(OC_EXEC);
1175         break_ptr = new_node(OC_EXEC);
1176         chain_group();
1177         n2 = chain_node(OC_EXEC | Vx);
1178         n2->l.n = nn;
1179         n2->a.n = n;
1180         continue_ptr->a.n = n2;
1181         break_ptr->a.n = n->r.n = seq->last;
1182
1183         continue_ptr = save_cont;
1184         break_ptr = save_brk;
1185
1186         return n;
1187 }
1188
1189 /* parse group and attach it to chain */
1190 static void chain_group(void)
1191 {
1192         uint32_t c;
1193         node *n, *n2, *n3;
1194
1195         do {
1196                 c = next_token(TC_GRPSEQ);
1197         } while (c & TC_NEWLINE);
1198
1199         if (c & TC_GRPSTART) {
1200                 while(next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1201                         if (t.tclass & TC_NEWLINE) continue;
1202                         rollback_token();
1203                         chain_group();
1204                 }
1205         } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1206                 rollback_token();
1207                 chain_expr(OC_EXEC | Vx);
1208         } else {                                                /* TC_STATEMNT */
1209                 switch (t.info & OPCLSMASK) {
1210                         case ST_IF:
1211                                 n = chain_node(OC_BR | Vx);
1212                                 n->l.n = condition();
1213                                 chain_group();
1214                                 n2 = chain_node(OC_EXEC);
1215                                 n->r.n = seq->last;
1216                                 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE)==TC_ELSE) {
1217                                         chain_group();
1218                                         n2->a.n = seq->last;
1219                                 } else {
1220                                         rollback_token();
1221                                 }
1222                                 break;
1223
1224                         case ST_WHILE:
1225                                 n2 = condition();
1226                                 n = chain_loop(NULL);
1227                                 n->l.n = n2;
1228                                 break;
1229
1230                         case ST_DO:
1231                                 n2 = chain_node(OC_EXEC);
1232                                 n = chain_loop(NULL);
1233                                 n2->a.n = n->a.n;
1234                                 next_token(TC_WHILE);
1235                                 n->l.n = condition();
1236                                 break;
1237
1238                         case ST_FOR:
1239                                 next_token(TC_SEQSTART);
1240                                 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1241                                 if (t.tclass & TC_SEQTERM) {                            /* for-in */
1242                                         if ((n2->info & OPCLSMASK) != OC_IN)
1243                                                 syntax_error(EMSG_UNEXP_TOKEN);
1244                                         n = chain_node(OC_WALKINIT | VV);
1245                                         n->l.n = n2->l.n;
1246                                         n->r.n = n2->r.n;
1247                                         n = chain_loop(NULL);
1248                                         n->info = OC_WALKNEXT | Vx;
1249                                         n->l.n = n2->l.n;
1250                                 } else {                                                                        /* for(;;) */
1251                                         n = chain_node(OC_EXEC | Vx);
1252                                         n->l.n = n2;
1253                                         n2 = parse_expr(TC_SEMICOL);
1254                                         n3 = parse_expr(TC_SEQTERM);
1255                                         n = chain_loop(n3);
1256                                         n->l.n = n2;
1257                                         if (! n2)
1258                                                 n->info = OC_EXEC;
1259                                 }
1260                                 break;
1261
1262                         case OC_PRINT:
1263                         case OC_PRINTF:
1264                                 n = chain_node(t.info);
1265                                 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1266                                 if (t.tclass & TC_OUTRDR) {
1267                                         n->info |= t.info;
1268                                         n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1269                                 }
1270                                 if (t.tclass & TC_GRPTERM)
1271                                         rollback_token();
1272                                 break;
1273
1274                         case OC_BREAK:
1275                                 n = chain_node(OC_EXEC);
1276                                 n->a.n = break_ptr;
1277                                 break;
1278
1279                         case OC_CONTINUE:
1280                                 n = chain_node(OC_EXEC);
1281                                 n->a.n = continue_ptr;
1282                                 break;
1283
1284                         /* delete, next, nextfile, return, exit */
1285                         default:
1286                                 chain_expr(t.info);
1287
1288                 }
1289         }
1290 }
1291
1292 static void parse_program(char *p)
1293 {
1294         uint32_t tclass;
1295         node *cn;
1296         func *f;
1297         var *v;
1298
1299         pos = p;
1300         t.lineno = 1;
1301         while((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1302                                 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1303
1304                 if (tclass & TC_OPTERM)
1305                         continue;
1306
1307                 seq = &mainseq;
1308                 if (tclass & TC_BEGIN) {
1309                         seq = &beginseq;
1310                         chain_group();
1311
1312                 } else if (tclass & TC_END) {
1313                         seq = &endseq;
1314                         chain_group();
1315
1316                 } else if (tclass & TC_FUNCDECL) {
1317                         next_token(TC_FUNCTION);
1318                         pos++;
1319                         f = newfunc(t.string);
1320                         f->body.first = NULL;
1321                         f->nargs = 0;
1322                         while(next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1323                                 v = findvar(ahash, t.string);
1324                                 v->x.aidx = (f->nargs)++;
1325
1326                                 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1327                                         break;
1328                         }
1329                         seq = &(f->body);
1330                         chain_group();
1331                         clear_array(ahash);
1332
1333                 } else if (tclass & TC_OPSEQ) {
1334                         rollback_token();
1335                         cn = chain_node(OC_TEST);
1336                         cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1337                         if (t.tclass & TC_GRPSTART) {
1338                                 rollback_token();
1339                                 chain_group();
1340                         } else {
1341                                 chain_node(OC_PRINT);
1342                         }
1343                         cn->r.n = mainseq.last;
1344
1345                 } else /* if (tclass & TC_GRPSTART) */ {
1346                         rollback_token();
1347                         chain_group();
1348                 }
1349         }
1350 }
1351
1352
1353 /* -------- program execution part -------- */
1354
1355 static node *mk_splitter(char *s, tsplitter *spl)
1356 {
1357         register regex_t *re, *ire;
1358         node *n;
1359
1360         re = &spl->re[0];
1361         ire = &spl->re[1];
1362         n = &spl->n;
1363         if ((n->info && OPCLSMASK) == OC_REGEXP) {
1364                 regfree(re);
1365                 regfree(ire);
1366         }
1367         if (bb_strlen(s) > 1) {
1368                 mk_re_node(s, n, re);
1369         } else {
1370                 n->info = (uint32_t) *s;
1371         }
1372
1373         return n;
1374 }
1375
1376 /* use node as a regular expression. Supplied with node ptr and regex_t
1377  * storage space. Return ptr to regex (if result points to preg, it should
1378  * be later regfree'd manually
1379  */
1380 static regex_t *as_regex(node *op, regex_t *preg)
1381 {
1382         var *v;
1383         char *s;
1384
1385         if ((op->info & OPCLSMASK) == OC_REGEXP) {
1386                 return icase ? op->r.ire : op->l.re;
1387         } else {
1388                 v = nvalloc(1);
1389                 s = getvar_s(evaluate(op, v));
1390                 xregcomp(preg, s, icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED);
1391                 nvfree(v);
1392                 return preg;
1393         }
1394 }
1395
1396 /* gradually increasing buffer */
1397 static void qrealloc(char **b, int n, int *size)
1398 {
1399         if (! *b || n >= *size)
1400                 *b = xrealloc(*b, *size = n + (n>>1) + 80);
1401 }
1402
1403 /* resize field storage space */
1404 static void fsrealloc(int size)
1405 {
1406         static int maxfields = 0;
1407         int i;
1408
1409         if (size >= maxfields) {
1410                 i = maxfields;
1411                 maxfields = size + 16;
1412                 Fields = (var *)xrealloc(Fields, maxfields * sizeof(var));
1413                 for (; i<maxfields; i++) {
1414                         Fields[i].type = VF_SPECIAL;
1415                         Fields[i].string = NULL;
1416                 }
1417         }
1418
1419         if (size < nfields) {
1420                 for (i=size; i<nfields; i++) {
1421                         clrvar(Fields+i);
1422                 }
1423         }
1424         nfields = size;
1425 }
1426
1427 static int awk_split(char *s, node *spl, char **slist)
1428 {
1429         int l, n=0;
1430         char c[4];
1431         char *s1;
1432         regmatch_t pmatch[2];
1433
1434         /* in worst case, each char would be a separate field */
1435         *slist = s1 = bb_xstrndup(s, bb_strlen(s) * 2 + 3);
1436
1437         c[0] = c[1] = (char)spl->info;
1438         c[2] = c[3] = '\0';
1439         if (*getvar_s(V[RS]) == '\0') c[2] = '\n';
1440
1441         if ((spl->info & OPCLSMASK) == OC_REGEXP) {             /* regex split */
1442                 while (*s) {
1443                         l = strcspn(s, c+2);
1444                         if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0 &&
1445                         pmatch[0].rm_so <= l) {
1446                                 l = pmatch[0].rm_so;
1447                                 if (pmatch[0].rm_eo == 0) { l++; pmatch[0].rm_eo++; }
1448                         } else {
1449                                 pmatch[0].rm_eo = l;
1450                                 if (*(s+l)) pmatch[0].rm_eo++;
1451                         }
1452
1453                         memcpy(s1, s, l);
1454                         *(s1+l) = '\0';
1455                         nextword(&s1);
1456                         s += pmatch[0].rm_eo;
1457                         n++;
1458                 }
1459         } else if (c[0] == '\0') {              /* null split */
1460                 while(*s) {
1461                         *(s1++) = *(s++);
1462                         *(s1++) = '\0';
1463                         n++;
1464                 }
1465         } else if (c[0] != ' ') {               /* single-character split */
1466                 if (icase) {
1467                         c[0] = toupper(c[0]);
1468                         c[1] = tolower(c[1]);
1469                 }
1470                 if (*s1) n++;
1471                 while ((s1 = strpbrk(s1, c))) {
1472                         *(s1++) = '\0';
1473                         n++;
1474                 }
1475         } else {                                /* space split */
1476                 while (*s) {
1477                         while (isspace(*s)) s++;
1478                         if (! *s) break;
1479                         n++;
1480                         while (*s && !isspace(*s))
1481                                 *(s1++) = *(s++);
1482                         *(s1++) = '\0';
1483                 }
1484         }
1485         return n;
1486 }
1487
1488 static void split_f0(void)
1489 {
1490         static char *fstrings = NULL;
1491         int i, n;
1492         char *s;
1493
1494         if (is_f0_split)
1495                 return;
1496
1497         is_f0_split = TRUE;
1498         free(fstrings);
1499         fsrealloc(0);
1500         n = awk_split(getvar_s(V[F0]), &fsplitter.n, &fstrings);
1501         fsrealloc(n);
1502         s = fstrings;
1503         for (i=0; i<n; i++) {
1504                 Fields[i].string = nextword(&s);
1505                 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1506         }
1507
1508         /* set NF manually to avoid side effects */
1509         clrvar(V[NF]);
1510         V[NF]->type = VF_NUMBER | VF_SPECIAL;
1511         V[NF]->number = nfields;
1512 }
1513
1514 /* perform additional actions when some internal variables changed */
1515 static void handle_special(var *v)
1516 {
1517         int n;
1518         char *b, *sep, *s;
1519         int sl, l, len, i, bsize;
1520
1521         if (! (v->type & VF_SPECIAL))
1522                 return;
1523
1524         if (v == V[NF]) {
1525                 n = (int)getvar_i(v);
1526                 fsrealloc(n);
1527
1528                 /* recalculate $0 */
1529                 sep = getvar_s(V[OFS]);
1530                 sl = bb_strlen(sep);
1531                 b = NULL;
1532                 len = 0;
1533                 for (i=0; i<n; i++) {
1534                         s = getvar_s(&Fields[i]);
1535                         l = bb_strlen(s);
1536                         if (b) {
1537                                 memcpy(b+len, sep, sl);
1538                                 len += sl;
1539                         }
1540                         qrealloc(&b, len+l+sl, &bsize);
1541                         memcpy(b+len, s, l);
1542                         len += l;
1543                 }
1544                 if (b) b[len] = '\0';
1545                 setvar_p(V[F0], b);
1546                 is_f0_split = TRUE;
1547
1548         } else if (v == V[F0]) {
1549                 is_f0_split = FALSE;
1550
1551         } else if (v == V[FS]) {
1552                 mk_splitter(getvar_s(v), &fsplitter);
1553
1554         } else if (v == V[RS]) {
1555                 mk_splitter(getvar_s(v), &rsplitter);
1556
1557         } else if (v == V[IGNORECASE]) {
1558                 icase = istrue(v);
1559
1560         } else {                                                /* $n */
1561                 n = getvar_i(V[NF]);
1562                 setvar_i(V[NF], n > v-Fields ? n : v-Fields+1);
1563                 /* right here v is invalid. Just to note... */
1564         }
1565 }
1566
1567 /* step through func/builtin/etc arguments */
1568 static node *nextarg(node **pn)
1569 {
1570         node *n;
1571
1572         n = *pn;
1573         if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1574                 *pn = n->r.n;
1575                 n = n->l.n;
1576         } else {
1577                 *pn = NULL;
1578         }
1579         return n;
1580 }
1581
1582 static void hashwalk_init(var *v, xhash *array)
1583 {
1584         char **w;
1585         hash_item *hi;
1586         int i;
1587
1588         if (v->type & VF_WALK)
1589                 free(v->x.walker);
1590
1591         v->type |= VF_WALK;
1592         w = v->x.walker = (char **)xcalloc(2 + 2*sizeof(char *) + array->glen, 1);
1593         *w = *(w+1) = (char *)(w + 2);
1594         for (i=0; i<array->csize; i++) {
1595                 hi = array->items[i];
1596                 while(hi) {
1597                         strcpy(*w, hi->name);
1598                         nextword(w);
1599                         hi = hi->next;
1600                 }
1601         }
1602 }
1603
1604 static int hashwalk_next(var *v)
1605 {
1606         char **w;
1607
1608         w = v->x.walker;
1609         if (*(w+1) == *w)
1610                 return FALSE;
1611
1612         setvar_s(v, nextword(w+1));
1613         return TRUE;
1614 }
1615
1616 /* evaluate node, return 1 when result is true, 0 otherwise */
1617 static int ptest(node *pattern)
1618 {
1619         static var v;
1620         return istrue(evaluate(pattern, &v));
1621 }
1622
1623 /* read next record from stream rsm into a variable v */
1624 static int awk_getline(rstream *rsm, var *v)
1625 {
1626         char *b;
1627         regmatch_t pmatch[2];
1628         int a, p, pp=0, size;
1629         int fd, so, eo, r, rp;
1630         char c, *m, *s;
1631
1632         /* we're using our own buffer since we need access to accumulating
1633          * characters
1634          */
1635         fd = fileno(rsm->F);
1636         m = rsm->buffer;
1637         a = rsm->adv;
1638         p = rsm->pos;
1639         size = rsm->size;
1640         c = (char) rsplitter.n.info;
1641         rp = 0;
1642
1643         if (! m) qrealloc(&m, 256, &size);
1644         do {
1645                 b = m + a;
1646                 so = eo = p;
1647                 r = 1;
1648                 if (p > 0) {
1649                         if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1650                                 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1651                                                                                                 b, 1, pmatch, 0) == 0) {
1652                                         so = pmatch[0].rm_so;
1653                                         eo = pmatch[0].rm_eo;
1654                                         if (b[eo] != '\0')
1655                                                 break;
1656                                 }
1657                         } else if (c != '\0') {
1658                                 s = strchr(b+pp, c);
1659                                 if (! s) s = memchr(b+pp, '\0', p - pp);
1660                                 if (s) {
1661                                         so = eo = s-b;
1662                                         eo++;
1663                                         break;
1664                                 }
1665                         } else {
1666                                 while (b[rp] == '\n')
1667                                         rp++;
1668                                 s = strstr(b+rp, "\n\n");
1669                                 if (s) {
1670                                         so = eo = s-b;
1671                                         while (b[eo] == '\n') eo++;
1672                                         if (b[eo] != '\0')
1673                                                 break;
1674                                 }
1675                         }
1676                 }
1677
1678                 if (a > 0) {
1679                         memmove(m, (const void *)(m+a), p+1);
1680                         b = m;
1681                         a = 0;
1682                 }
1683
1684                 qrealloc(&m, a+p+128, &size);
1685                 b = m + a;
1686                 pp = p;
1687                 p += safe_read(fd, b+p, size-p-1);
1688                 if (p < pp) {
1689                         p = 0;
1690                         r = 0;
1691                         setvar_i(V[ERRNO], errno);
1692                 }
1693                 b[p] = '\0';
1694
1695         } while (p > pp);
1696
1697         if (p == 0) {
1698                 r--;
1699         } else {
1700                 c = b[so]; b[so] = '\0';
1701                 setvar_s(v, b+rp);
1702                 v->type |= VF_USER;
1703                 b[so] = c;
1704                 c = b[eo]; b[eo] = '\0';
1705                 setvar_s(V[RT], b+so);
1706                 b[eo] = c;
1707         }
1708
1709         rsm->buffer = m;
1710         rsm->adv = a + eo;
1711         rsm->pos = p - eo;
1712         rsm->size = size;
1713
1714         return r;
1715 }
1716
1717 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1718 {
1719         int r=0;
1720         char c;
1721         const char *s=format;
1722
1723         if (int_as_int && n == (int)n) {
1724                 r = snprintf(b, size, "%d", (int)n);
1725         } else {
1726                 do { c = *s; } while (*s && *++s);
1727                 if (strchr("diouxX", c)) {
1728                         r = snprintf(b, size, format, (int)n);
1729                 } else if (strchr("eEfgG", c)) {
1730                         r = snprintf(b, size, format, n);
1731                 } else {
1732                         runtime_error(EMSG_INV_FMT);
1733                 }
1734         }
1735         return r;
1736 }
1737
1738
1739 /* formatted output into an allocated buffer, return ptr to buffer */
1740 static char *awk_printf(node *n)
1741 {
1742         char *b = NULL;
1743         char *fmt, *s, *s1, *f;
1744         int i, j, incr, bsize;
1745         char c, c1;
1746         var *v, *arg;
1747
1748         v = nvalloc(1);
1749         fmt = f = bb_xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1750
1751         i = 0;
1752         while (*f) {
1753                 s = f;
1754                 while (*f && (*f != '%' || *(++f) == '%'))
1755                         f++;
1756                 while (*f && !isalpha(*f))
1757                         f++;
1758
1759                 incr = (f - s) + MAXVARFMT;
1760                 qrealloc(&b, incr+i, &bsize);
1761                 c = *f; if (c != '\0') f++;
1762                 c1 = *f ; *f = '\0';
1763                 arg = evaluate(nextarg(&n), v);
1764
1765                 j = i;
1766                 if (c == 'c' || !c) {
1767                         i += sprintf(b+i, s,
1768                                         is_numeric(arg) ? (char)getvar_i(arg) : *getvar_s(arg));
1769
1770                 } else if (c == 's') {
1771                     s1 = getvar_s(arg);
1772                         qrealloc(&b, incr+i+bb_strlen(s1), &bsize);
1773                         i += sprintf(b+i, s, s1);
1774
1775                 } else {
1776                         i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1777                 }
1778                 *f = c1;
1779
1780                 /* if there was an error while sprintf, return value is negative */
1781                 if (i < j) i = j;
1782
1783         }
1784
1785         b = xrealloc(b, i+1);
1786         free(fmt);
1787         nvfree(v);
1788         b[i] = '\0';
1789         return b;
1790 }
1791
1792 /* common substitution routine
1793  * replace (nm) substring of (src) that match (n) with (repl), store
1794  * result into (dest), return number of substitutions. If nm=0, replace
1795  * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1796  * subexpression matching (\1-\9)
1797  */
1798 static int awk_sub(node *rn, char *repl, int nm, var *src, var *dest, int ex)
1799 {
1800         char *ds = NULL;
1801         char *sp, *s;
1802         int c, i, j, di, rl, so, eo, nbs, n, dssize;
1803         regmatch_t pmatch[10];
1804         regex_t sreg, *re;
1805
1806         re = as_regex(rn, &sreg);
1807         if (! src) src = V[F0];
1808         if (! dest) dest = V[F0];
1809
1810         i = di = 0;
1811         sp = getvar_s(src);
1812         rl = bb_strlen(repl);
1813         while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0:REG_NOTBOL) == 0) {
1814                 so = pmatch[0].rm_so;
1815                 eo = pmatch[0].rm_eo;
1816
1817                 qrealloc(&ds, di + eo + rl, &dssize);
1818                 memcpy(ds + di, sp, eo);
1819                 di += eo;
1820                 if (++i >= nm) {
1821                         /* replace */
1822                         di -= (eo - so);
1823                         nbs = 0;
1824                         for (s = repl; *s; s++) {
1825                                 ds[di++] = c = *s;
1826                                 if (c == '\\') {
1827                                         nbs++;
1828                                         continue;
1829                                 }
1830                                 if (c == '&' || (ex && c >= '0' && c <= '9')) {
1831                                         di -= ((nbs + 3) >> 1);
1832                                         j = 0;
1833                                         if (c != '&') {
1834                                                 j = c - '0';
1835                                                 nbs++;
1836                                         }
1837                                         if (nbs % 2) {
1838                                                 ds[di++] = c;
1839                                         } else {
1840                                                 n = pmatch[j].rm_eo - pmatch[j].rm_so;
1841                                                 qrealloc(&ds, di + rl + n, &dssize);
1842                                                 memcpy(ds + di, sp + pmatch[j].rm_so, n);
1843                                                 di += n;
1844                                         }
1845                                 }
1846                                 nbs = 0;
1847                         }
1848                 }
1849
1850                 sp += eo;
1851                 if (i == nm) break;
1852                 if (eo == so) {
1853                         if (! (ds[di++] = *sp++)) break;
1854                 }
1855         }
1856
1857         qrealloc(&ds, di + strlen(sp), &dssize);
1858         strcpy(ds + di, sp);
1859         setvar_p(dest, ds);
1860         if (re == &sreg) regfree(re);
1861         return i;
1862 }
1863
1864 static var *exec_builtin(node *op, var *res)
1865 {
1866         int (*to_xxx)(int);
1867         var *tv;
1868         node *an[4];
1869         var  *av[4];
1870         char *as[4];
1871         regmatch_t pmatch[2];
1872         regex_t sreg, *re;
1873         static tsplitter tspl;
1874         node *spl;
1875         uint32_t isr, info;
1876         int nargs;
1877         time_t tt;
1878         char *s, *s1;
1879         int i, l, ll, n;
1880
1881         tv = nvalloc(4);
1882         isr = info = op->info;
1883         op = op->l.n;
1884
1885         av[2] = av[3] = NULL;
1886         for (i=0 ; i<4 && op ; i++) {
1887                 an[i] = nextarg(&op);
1888                 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
1889                 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
1890                 isr >>= 1;
1891         }
1892
1893         nargs = i;
1894         if (nargs < (info >> 30))
1895                 runtime_error(EMSG_TOO_FEW_ARGS);
1896
1897         switch (info & OPNMASK) {
1898
1899           case B_a2:
1900 #ifdef CONFIG_FEATURE_AWK_MATH
1901                 setvar_i(res, atan2(getvar_i(av[i]), getvar_i(av[1])));
1902 #else
1903                 runtime_error(EMSG_NO_MATH);
1904 #endif
1905                 break;
1906
1907           case B_sp:
1908                 if (nargs > 2) {
1909                         spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
1910                                 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
1911                 } else {
1912                         spl = &fsplitter.n;
1913                 }
1914
1915                 n = awk_split(as[0], spl, &s);
1916                 s1 = s;
1917                 clear_array(iamarray(av[1]));
1918                 for (i=1; i<=n; i++)
1919                         setari_u(av[1], i, nextword(&s1));
1920                 free(s);
1921                 setvar_i(res, n);
1922                 break;
1923
1924           case B_ss:
1925                 l = bb_strlen(as[0]);
1926                 i = getvar_i(av[1]) - 1;
1927                 if (i>l) i=l; if (i<0) i=0;
1928                 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
1929                 if (n<0) n=0;
1930                 s = xmalloc(n+1);
1931                 strncpy(s, as[0]+i, n);
1932                 s[n] = '\0';
1933                 setvar_p(res, s);
1934                 break;
1935
1936           case B_lo:
1937                 to_xxx = tolower;
1938                 goto lo_cont;
1939
1940           case B_up:
1941                 to_xxx = toupper;
1942 lo_cont:
1943                 s1 = s = bb_xstrdup(as[0]);
1944                 while (*s1) {
1945                         *s1 = (*to_xxx)(*s1);
1946                         s1++;
1947                 }
1948                 setvar_p(res, s);
1949                 break;
1950
1951           case B_ix:
1952                 n = 0;
1953                 ll = bb_strlen(as[1]);
1954                 l = bb_strlen(as[0]) - ll;
1955                 if (ll > 0 && l >= 0) {
1956                         if (! icase) {
1957                                 s = strstr(as[0], as[1]);
1958                                 if (s) n = (s - as[0]) + 1;
1959                         } else {
1960                                 /* this piece of code is terribly slow and
1961                                  * really should be rewritten
1962                                  */
1963                                 for (i=0; i<=l; i++) {
1964                                         if (strncasecmp(as[0]+i, as[1], ll) == 0) {
1965                                                 n = i+1;
1966                                                 break;
1967                                         }
1968                                 }
1969                         }
1970                 }
1971                 setvar_i(res, n);
1972                 break;
1973
1974           case B_ti:
1975                 if (nargs > 1)
1976                         tt = getvar_i(av[1]);
1977                 else
1978                         time(&tt);
1979                 s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
1980                 i = strftime(buf, MAXVARFMT, s, localtime(&tt));
1981                 buf[i] = '\0';
1982                 setvar_s(res, buf);
1983                 break;
1984
1985           case B_ma:
1986                 re = as_regex(an[1], &sreg);
1987                 n = regexec(re, as[0], 1, pmatch, 0);
1988                 if (n == 0) {
1989                         pmatch[0].rm_so++;
1990                         pmatch[0].rm_eo++;
1991                 } else {
1992                         pmatch[0].rm_so = 0;
1993                         pmatch[0].rm_eo = -1;
1994                 }
1995                 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
1996                 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
1997                 setvar_i(res, pmatch[0].rm_so);
1998                 if (re == &sreg) regfree(re);
1999                 break;
2000
2001           case B_ge:
2002                 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2003                 break;
2004
2005           case B_gs:
2006                 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2007                 break;
2008
2009           case B_su:
2010                 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2011                 break;
2012         }
2013
2014         nvfree(tv);
2015         return res;
2016 }
2017
2018 /*
2019  * Evaluate node - the heart of the program. Supplied with subtree
2020  * and place where to store result. returns ptr to result.
2021  */
2022 #define XC(n) ((n) >> 8)
2023
2024 static var *evaluate(node *op, var *res)
2025 {
2026         /* This procedure is recursive so we should count every byte */
2027         static var *fnargs = NULL;
2028         static unsigned int seed = 1;
2029         static regex_t sreg;
2030         node *op1;
2031         var *v1;
2032         union {
2033                 var *v;
2034                 char *s;
2035                 double d;
2036                 int i;
2037         } L, R;
2038         uint32_t opinfo;
2039         short opn;
2040         union {
2041                 char *s;
2042                 rstream *rsm;
2043                 FILE *F;
2044                 var *v;
2045                 regex_t *re;
2046                 uint32_t info;
2047         } X;
2048
2049         if (! op)
2050                 return setvar_s(res, NULL);
2051
2052         v1 = nvalloc(2);
2053
2054         while (op) {
2055
2056                 opinfo = op->info;
2057                 opn = (short)(opinfo & OPNMASK);
2058                 lineno = op->lineno;
2059
2060                 /* execute inevitable things */
2061                 op1 = op->l.n;
2062                 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2063                 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2064                 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2065                 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2066                 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2067
2068                 switch (XC(opinfo & OPCLSMASK)) {
2069
2070                   /* -- iterative node type -- */
2071
2072                   /* test pattern */
2073                   case XC( OC_TEST ):
2074                         if ((op1->info & OPCLSMASK) == OC_COMMA) {
2075                                 /* it's range pattern */
2076                                 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2077                                         op->info |= OF_CHECKED;
2078                                         if (ptest(op1->r.n))
2079                                                 op->info &= ~OF_CHECKED;
2080
2081                                         op = op->a.n;
2082                                 } else {
2083                                         op = op->r.n;
2084                                 }
2085                         } else {
2086                                 op = (ptest(op1)) ? op->a.n : op->r.n;
2087                         }
2088                         break;
2089
2090                   /* just evaluate an expression, also used as unconditional jump */
2091                   case XC( OC_EXEC ):
2092                         break;
2093
2094                   /* branch, used in if-else and various loops */
2095                   case XC( OC_BR ):
2096                         op = istrue(L.v) ? op->a.n : op->r.n;
2097                         break;
2098
2099                   /* initialize for-in loop */
2100                   case XC( OC_WALKINIT ):
2101                         hashwalk_init(L.v, iamarray(R.v));
2102                         break;
2103
2104                   /* get next array item */
2105                   case XC( OC_WALKNEXT ):
2106                         op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2107                         break;
2108
2109                   case XC( OC_PRINT ):
2110                   case XC( OC_PRINTF ):
2111                         X.F = stdout;
2112                         if (op->r.n) {
2113                                 X.rsm = newfile(R.s);
2114                                 if (! X.rsm->F) {
2115                                         if (opn == '|') {
2116                                                 if((X.rsm->F = popen(R.s, "w")) == NULL)
2117                                                         bb_perror_msg_and_die("popen");
2118                                                 X.rsm->is_pipe = 1;
2119                                         } else {
2120                                                 X.rsm->F = bb_xfopen(R.s, opn=='w' ? "w" : "a");
2121                                         }
2122                                 }
2123                                 X.F = X.rsm->F;
2124                         }
2125
2126                         if ((opinfo & OPCLSMASK) == OC_PRINT) {
2127                                 if (! op1) {
2128                                         fputs(getvar_s(V[F0]), X.F);
2129                                 } else {
2130                                         while (op1) {
2131                                                 L.v = evaluate(nextarg(&op1), v1);
2132                                                 if (L.v->type & VF_NUMBER) {
2133                                                         fmt_num(buf, MAXVARFMT, getvar_s(V[OFMT]),
2134                                                                                                                 getvar_i(L.v), TRUE);
2135                                                         fputs(buf, X.F);
2136                                                 } else {
2137                                                         fputs(getvar_s(L.v), X.F);
2138                                                 }
2139
2140                                                 if (op1) fputs(getvar_s(V[OFS]), X.F);
2141                                         }
2142                                 }
2143                                 fputs(getvar_s(V[ORS]), X.F);
2144
2145                         } else {        /* OC_PRINTF */
2146                                 L.s = awk_printf(op1);
2147                                 fputs(L.s, X.F);
2148                                 free(L.s);
2149                         }
2150                         fflush(X.F);
2151                         break;
2152
2153                   case XC( OC_DELETE ):
2154                         X.info = op1->info & OPCLSMASK;
2155                         if (X.info == OC_VAR) {
2156                                 R.v = op1->l.v;
2157                         } else if (X.info == OC_FNARG) {
2158                                 R.v = &fnargs[op1->l.i];
2159                         } else {
2160                                 runtime_error(EMSG_NOT_ARRAY);
2161                         }
2162
2163                         if (op1->r.n) {
2164                                 clrvar(L.v);
2165                                 L.s = getvar_s(evaluate(op1->r.n, v1));
2166                                 hash_remove(iamarray(R.v), L.s);
2167                         } else {
2168                                 clear_array(iamarray(R.v));
2169                         }
2170                         break;
2171
2172                   case XC( OC_NEWSOURCE ):
2173                         programname = op->l.s;
2174                         break;
2175
2176                   case XC( OC_RETURN ):
2177                         copyvar(res, L.v);
2178                         break;
2179
2180                   case XC( OC_NEXTFILE ):
2181                         nextfile = TRUE;
2182                   case XC( OC_NEXT ):
2183                         nextrec = TRUE;
2184                   case XC( OC_DONE ):
2185                         clrvar(res);
2186                         break;
2187
2188                   case XC( OC_EXIT ):
2189                         awk_exit(L.d);
2190
2191                   /* -- recursive node type -- */
2192
2193                   case XC( OC_VAR ):
2194                         L.v = op->l.v;
2195                         if (L.v == V[NF])
2196                                 split_f0();
2197                         goto v_cont;
2198
2199                   case XC( OC_FNARG ):
2200                         L.v = &fnargs[op->l.i];
2201
2202 v_cont:
2203                         res = (op->r.n) ? findvar(iamarray(L.v), R.s) : L.v;
2204                         break;
2205
2206                   case XC( OC_IN ):
2207                         setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2208                         break;
2209
2210                   case XC( OC_REGEXP ):
2211                         op1 = op;
2212                         L.s = getvar_s(V[F0]);
2213                         goto re_cont;
2214
2215                   case XC( OC_MATCH ):
2216                         op1 = op->r.n;
2217 re_cont:
2218                         X.re = as_regex(op1, &sreg);
2219                         R.i = regexec(X.re, L.s, 0, NULL, 0);
2220                         if (X.re == &sreg) regfree(X.re);
2221                         setvar_i(res, (R.i == 0 ? 1 : 0) ^ (opn == '!' ? 1 : 0));
2222                         break;
2223
2224                   case XC( OC_MOVE ):
2225                         /* if source is a temporary string, jusk relink it to dest */
2226                         if (R.v == v1+1 && R.v->string) {
2227                                 res = setvar_p(L.v, R.v->string);
2228                                 R.v->string = NULL;
2229                         } else {
2230                                 res = copyvar(L.v, R.v);
2231                         }
2232                         break;
2233
2234                   case XC( OC_TERNARY ):
2235                         if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2236                                 runtime_error(EMSG_POSSIBLE_ERROR);
2237                         res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2238                         break;
2239
2240                   case XC( OC_FUNC ):
2241                         if (! op->r.f->body.first)
2242                                 runtime_error(EMSG_UNDEF_FUNC);
2243
2244                         X.v = R.v = nvalloc(op->r.f->nargs+1);
2245                         while (op1) {
2246                                 L.v = evaluate(nextarg(&op1), v1);
2247                                 copyvar(R.v, L.v);
2248                                 R.v->type |= VF_CHILD;
2249                                 R.v->x.parent = L.v;
2250                                 if (++R.v - X.v >= op->r.f->nargs)
2251                                         break;
2252                         }
2253
2254                         R.v = fnargs;
2255                         fnargs = X.v;
2256
2257                         L.s = programname;
2258                         res = evaluate(op->r.f->body.first, res);
2259                         programname = L.s;
2260
2261                         nvfree(fnargs);
2262                         fnargs = R.v;
2263                         break;
2264
2265                   case XC( OC_GETLINE ):
2266                   case XC( OC_PGETLINE ):
2267                         if (op1) {
2268                                 X.rsm = newfile(L.s);
2269                                 if (! X.rsm->F) {
2270                                         if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2271                                                 X.rsm->F = popen(L.s, "r");
2272                                                 X.rsm->is_pipe = TRUE;
2273                                         } else {
2274                                                 X.rsm->F = fopen(L.s, "r");             /* not bb_xfopen! */
2275                                         }
2276                                 }
2277                         } else {
2278                                 if (! iF) iF = next_input_file();
2279                                 X.rsm = iF;
2280                         }
2281
2282                         if (! X.rsm->F) {
2283                                 setvar_i(V[ERRNO], errno);
2284                                 setvar_i(res, -1);
2285                                 break;
2286                         }
2287
2288                         if (! op->r.n)
2289                                 R.v = V[F0];
2290
2291                         L.i = awk_getline(X.rsm, R.v);
2292                         if (L.i > 0) {
2293                                 if (! op1) {
2294                                         incvar(V[FNR]);
2295                                         incvar(V[NR]);
2296                                 }
2297                         }
2298                         setvar_i(res, L.i);
2299                         break;
2300
2301                   /* simple builtins */
2302                   case XC( OC_FBLTIN ):
2303                         switch (opn) {
2304
2305                           case F_in:
2306                                 R.d = (int)L.d;
2307                                 break;
2308
2309                           case F_rn:
2310                                 R.d =  (double)rand() / (double)RAND_MAX;
2311                                 break;
2312
2313 #ifdef CONFIG_FEATURE_AWK_MATH
2314                           case F_co:
2315                                 R.d = cos(L.d);
2316                                 break;
2317
2318                           case F_ex:
2319                                 R.d = exp(L.d);
2320                                 break;
2321
2322                           case F_lg:
2323                                 R.d = log(L.d);
2324                                 break;
2325
2326                           case F_si:
2327                                 R.d = sin(L.d);
2328                                 break;
2329
2330                           case F_sq:
2331                                 R.d = sqrt(L.d);
2332                                 break;
2333 #else
2334                           case F_co:
2335                           case F_ex:
2336                           case F_lg:
2337                           case F_si:
2338                           case F_sq:
2339                                 runtime_error(EMSG_NO_MATH);
2340                                 break;
2341 #endif
2342
2343                           case F_sr:
2344                                 R.d = (double)seed;
2345                                 seed = op1 ? (unsigned int)L.d : (unsigned int)time(NULL);
2346                                 srand(seed);
2347                                 break;
2348
2349                           case F_ti:
2350                                 R.d = time(NULL);
2351                                 break;
2352
2353                           case F_le:
2354                                 if (! op1)
2355                                         L.s = getvar_s(V[F0]);
2356                                 R.d = bb_strlen(L.s);
2357                                 break;
2358
2359                           case F_sy:
2360                                 fflush(NULL);
2361                                 R.d = (L.s && *L.s) ? (system(L.s) >> 8) : 0;
2362                                 break;
2363
2364                           case F_ff:
2365                                 if (! op1)
2366                                         fflush(stdout);
2367                                 else {
2368                                         if (L.s && *L.s) {
2369                                                 X.rsm = newfile(L.s);
2370                                                 fflush(X.rsm->F);
2371                                         } else {
2372                                                 fflush(NULL);
2373                                         }
2374                                 }
2375                                 break;
2376
2377                           case F_cl:
2378                                 X.rsm = (rstream *)hash_search(fdhash, L.s);
2379                                 if (X.rsm) {
2380                                         R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
2381                                         free(X.rsm->buffer);
2382                                         hash_remove(fdhash, L.s);
2383                                 }
2384                                 if (R.i != 0)
2385                                         setvar_i(V[ERRNO], errno);
2386                                 R.d = (double)R.i;
2387                                 break;
2388                         }
2389                         setvar_i(res, R.d);
2390                         break;
2391
2392                   case XC( OC_BUILTIN ):
2393                         res = exec_builtin(op, res);
2394                         break;
2395
2396                   case XC( OC_SPRINTF ):
2397                         setvar_p(res, awk_printf(op1));
2398                         break;
2399
2400                   case XC( OC_UNARY ):
2401                         X.v = R.v;
2402                         L.d = R.d = getvar_i(R.v);
2403                         switch (opn) {
2404                           case 'P':
2405                                 L.d = ++R.d;
2406                                 goto r_op_change;
2407                           case 'p':
2408                                 R.d++;
2409                                 goto r_op_change;
2410                           case 'M':
2411                                 L.d = --R.d;
2412                                 goto r_op_change;
2413                           case 'm':
2414                                 R.d--;
2415                                 goto r_op_change;
2416                           case '!':
2417                             L.d = istrue(X.v) ? 0 : 1;
2418                                 break;
2419                           case '-':
2420                                 L.d = -R.d;
2421                                 break;
2422                         r_op_change:
2423                                 setvar_i(X.v, R.d);
2424                         }
2425                         setvar_i(res, L.d);
2426                         break;
2427
2428                   case XC( OC_FIELD ):
2429                         R.i = (int)getvar_i(R.v);
2430                         if (R.i == 0) {
2431                                 res = V[F0];
2432                         } else {
2433                                 split_f0();
2434                                 if (R.i > nfields)
2435                                         fsrealloc(R.i);
2436
2437                                 res = &Fields[R.i-1];
2438                         }
2439                         break;
2440
2441                   /* concatenation (" ") and index joining (",") */
2442                   case XC( OC_CONCAT ):
2443                   case XC( OC_COMMA ):
2444                         opn = bb_strlen(L.s) + bb_strlen(R.s) + 2;
2445                         X.s = (char *)xmalloc(opn);
2446                         strcpy(X.s, L.s);
2447                         if ((opinfo & OPCLSMASK) == OC_COMMA) {
2448                                 L.s = getvar_s(V[SUBSEP]);
2449                                 X.s = (char *)xrealloc(X.s, opn + bb_strlen(L.s));
2450                                 strcat(X.s, L.s);
2451                         }
2452                         strcat(X.s, R.s);
2453                         setvar_p(res, X.s);
2454                         break;
2455
2456                   case XC( OC_LAND ):
2457                         setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2458                         break;
2459
2460                   case XC( OC_LOR ):
2461                         setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2462                         break;
2463
2464                   case XC( OC_BINARY ):
2465                   case XC( OC_REPLACE ):
2466                         R.d = getvar_i(R.v);
2467                         switch (opn) {
2468                           case '+':
2469                                 L.d += R.d;
2470                                 break;
2471                           case '-':
2472                                 L.d -= R.d;
2473                                 break;
2474                           case '*':
2475                                 L.d *= R.d;
2476                                 break;
2477                           case '/':
2478                                 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2479                                 L.d /= R.d;
2480                                 break;
2481                           case '&':
2482 #ifdef CONFIG_FEATURE_AWK_MATH
2483                                 L.d = pow(L.d, R.d);
2484 #else
2485                                 runtime_error(EMSG_NO_MATH);
2486 #endif
2487                                 break;
2488                           case '%':
2489                                 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2490                                 L.d -= (int)(L.d / R.d) * R.d;
2491                                 break;
2492                         }
2493                         res = setvar_i(((opinfo&OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2494                         break;
2495
2496                   case XC( OC_COMPARE ):
2497                         if (is_numeric(L.v) && is_numeric(R.v)) {
2498                                 L.d = getvar_i(L.v) - getvar_i(R.v);
2499                         } else {
2500                                 L.s = getvar_s(L.v);
2501                                 R.s = getvar_s(R.v);
2502                                 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2503                         }
2504                         switch (opn & 0xfe) {
2505                           case 0:
2506                                 R.i = (L.d > 0);
2507                                 break;
2508                           case 2:
2509                                 R.i = (L.d >= 0);
2510                                 break;
2511                           case 4:
2512                                 R.i = (L.d == 0);
2513                                 break;
2514                         }
2515                         setvar_i(res, (opn & 0x1 ? R.i : !R.i) ? 1 : 0);
2516                         break;
2517
2518                   default:
2519                         runtime_error(EMSG_POSSIBLE_ERROR);
2520                 }
2521                 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2522                         op = op->a.n;
2523                 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2524                         break;
2525                 if (nextrec)
2526                         break;
2527         }
2528         nvfree(v1);
2529         return res;
2530 }
2531
2532
2533 /* -------- main & co. -------- */
2534
2535 static int awk_exit(int r)
2536 {
2537         unsigned int i;
2538         hash_item *hi;
2539         static var tv;
2540
2541         if (! exiting) {
2542                 exiting = TRUE;
2543                 nextrec = FALSE;
2544                 evaluate(endseq.first, &tv);
2545         }
2546
2547         /* waiting for children */
2548         for (i=0; i<fdhash->csize; i++) {
2549                 hi = fdhash->items[i];
2550                 while(hi) {
2551                         if (hi->data.rs.F && hi->data.rs.is_pipe)
2552                                 pclose(hi->data.rs.F);
2553                         hi = hi->next;
2554                 }
2555         }
2556
2557         exit(r);
2558 }
2559
2560 /* if expr looks like "var=value", perform assignment and return 1,
2561  * otherwise return 0 */
2562 static int is_assignment(const char *expr)
2563 {
2564         char *exprc, *s, *s0, *s1;
2565
2566         exprc = bb_xstrdup(expr);
2567         if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2568                 free(exprc);
2569                 return FALSE;
2570         }
2571
2572         *(s++) = '\0';
2573         s0 = s1 = s;
2574         while (*s)
2575                 *(s1++) = nextchar(&s);
2576
2577         *s1 = '\0';
2578         setvar_u(newvar(exprc), s0);
2579         free(exprc);
2580         return TRUE;
2581 }
2582
2583 /* switch to next input file */
2584 static rstream *next_input_file(void)
2585 {
2586         static rstream rsm;
2587         FILE *F = NULL;
2588         char *fname, *ind;
2589         static int files_happen = FALSE;
2590
2591         if (rsm.F) fclose(rsm.F);
2592         rsm.F = NULL;
2593         rsm.pos = rsm.adv = 0;
2594
2595         do {
2596                 if (getvar_i(V[ARGIND])+1 >= getvar_i(V[ARGC])) {
2597                         if (files_happen)
2598                                 return NULL;
2599                         fname = "-";
2600                         F = stdin;
2601                 } else {
2602                         ind = getvar_s(incvar(V[ARGIND]));
2603                         fname = getvar_s(findvar(iamarray(V[ARGV]), ind));
2604                         if (fname && *fname && !is_assignment(fname))
2605                                 F = afopen(fname, "r");
2606                 }
2607         } while (!F);
2608
2609         files_happen = TRUE;
2610         setvar_s(V[FILENAME], fname);
2611         rsm.F = F;
2612         return &rsm;
2613 }
2614
2615 int awk_main(int argc, char **argv)
2616 {
2617         char *s, *s1;
2618         int i, j, c, flen;
2619         var *v;
2620         static var tv;
2621         char **envp;
2622         static int from_file = FALSE;
2623         rstream *rsm;
2624         FILE *F, *stdfiles[3];
2625         static char * stdnames = "/dev/stdin\0/dev/stdout\0/dev/stderr";
2626
2627         /* allocate global buffer */
2628         buf = xmalloc(MAXVARFMT+1);
2629
2630         vhash = hash_init();
2631         ahash = hash_init();
2632         fdhash = hash_init();
2633         fnhash = hash_init();
2634
2635         /* initialize variables */
2636         for (i=0;  *vNames;  i++) {
2637                 V[i] = v = newvar(nextword(&vNames));
2638                 if (*vValues != '\377')
2639                         setvar_s(v, nextword(&vValues));
2640                 else
2641                         setvar_i(v, 0);
2642
2643                 if (*vNames == '*') {
2644                         v->type |= VF_SPECIAL;
2645                         vNames++;
2646                 }
2647         }
2648
2649         handle_special(V[FS]);
2650         handle_special(V[RS]);
2651
2652         stdfiles[0] = stdin;
2653         stdfiles[1] = stdout;
2654         stdfiles[2] = stderr;
2655         for (i=0; i<3; i++) {
2656                 rsm = newfile(nextword(&stdnames));
2657                 rsm->F = stdfiles[i];
2658         }
2659
2660         for (envp=environ; *envp; envp++) {
2661                 s = bb_xstrdup(*envp);
2662                 s1 = strchr(s, '=');
2663                 if (!s1) {
2664                         goto keep_going;
2665                 }
2666                 *(s1++) = '\0';
2667                 setvar_u(findvar(iamarray(V[ENVIRON]), s), s1);
2668 keep_going:
2669                 free(s);
2670         }
2671
2672         while((c = getopt(argc, argv, "F:v:f:W:")) != EOF) {
2673                 switch (c) {
2674                         case 'F':
2675                                 setvar_s(V[FS], optarg);
2676                                 break;
2677                         case 'v':
2678                                 if (! is_assignment(optarg))
2679                                         bb_show_usage();
2680                                 break;
2681                         case 'f':
2682                                 from_file = TRUE;
2683                                 F = afopen(programname = optarg, "r");
2684                                 s = NULL;
2685                                 /* one byte is reserved for some trick in next_token */
2686                                 if (fseek(F, 0, SEEK_END) == 0) {
2687                                         flen = ftell(F);
2688                                         s = (char *)xmalloc(flen+4);
2689                                         fseek(F, 0, SEEK_SET);
2690                                         i = 1 + fread(s+1, 1, flen, F);
2691                                 } else {
2692                                         for (i=j=1; j>0; i+=j) {
2693                                                 s = (char *)xrealloc(s, i+4096);
2694                                                 j = fread(s+i, 1, 4094, F);
2695                                         }
2696                                 }
2697                                 s[i] = '\0';
2698                                 fclose(F);
2699                                 parse_program(s+1);
2700                                 free(s);
2701                                 break;
2702                         case 'W':
2703                                 bb_error_msg("Warning: unrecognized option '-W %s' ignored\n", optarg);
2704                                 break;
2705
2706                         default:
2707                                 bb_show_usage();
2708                 }
2709         }
2710
2711         if (!from_file) {
2712                 if (argc == optind)
2713                         bb_show_usage();
2714                 programname="cmd. line";
2715                 parse_program(argv[optind++]);
2716
2717         }
2718
2719         /* fill in ARGV array */
2720         setvar_i(V[ARGC], argc - optind + 1);
2721         setari_u(V[ARGV], 0, "awk");
2722         for(i=optind; i < argc; i++)
2723                 setari_u(V[ARGV], i+1-optind, argv[i]);
2724
2725         evaluate(beginseq.first, &tv);
2726         if (! mainseq.first && ! endseq.first)
2727                 awk_exit(EXIT_SUCCESS);
2728
2729         /* input file could already be opened in BEGIN block */
2730         if (! iF) iF = next_input_file();
2731
2732         /* passing through input files */
2733         while (iF) {
2734
2735                 nextfile = FALSE;
2736                 setvar_i(V[FNR], 0);
2737
2738                 while ((c = awk_getline(iF, V[F0])) > 0) {
2739
2740                         nextrec = FALSE;
2741                         incvar(V[NR]);
2742                         incvar(V[FNR]);
2743                         evaluate(mainseq.first, &tv);
2744
2745                         if (nextfile)
2746                                 break;
2747                 }
2748
2749                 if (c < 0)
2750                         runtime_error(strerror(errno));
2751
2752                 iF = next_input_file();
2753
2754         }
2755
2756         awk_exit(EXIT_SUCCESS);
2757
2758         return 0;
2759 }
2760