087be44a5d3f588708d8070e633f77528ef407f8
[oweals/busybox.git] / editors / awk.c
1 /* vi: set sw=4 ts=4: */
2 /*
3  * awk implementation for busybox
4  *
5  * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20  *
21  */
22
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <unistd.h>
26 #include <errno.h>
27 #include <string.h>
28 #include <time.h>
29 #include <math.h>
30 #include <ctype.h>
31 #include <getopt.h>
32
33 #include "xregex.h"
34 #include "busybox.h"
35
36
37 #define MAXVARFMT       240
38 #define MINNVBLOCK      64
39
40 /* variable flags */
41 #define VF_NUMBER       0x0001  /* 1 = primary type is number */
42 #define VF_ARRAY        0x0002  /* 1 = it's an array */
43
44 #define VF_CACHED       0x0100  /* 1 = num/str value has cached str/num eq */
45 #define VF_USER         0x0200  /* 1 = user input (may be numeric string) */
46 #define VF_SPECIAL      0x0400  /* 1 = requires extra handling when changed */
47 #define VF_WALK         0x0800  /* 1 = variable has alloc'd x.walker list */
48 #define VF_FSTR         0x1000  /* 1 = string points to fstring buffer */
49 #define VF_CHILD        0x2000  /* 1 = function arg; x.parent points to source */
50 #define VF_DIRTY        0x4000  /* 1 = variable was set explicitly */
51
52 /* these flags are static, don't change them when value is changed */
53 #define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
54
55 /* Variable */
56 typedef struct var_s {
57         unsigned short type;            /* flags */
58         double number;
59         char *string;
60         union {
61                 int aidx;                               /* func arg index (on compilation stage) */
62                 struct xhash_s *array;  /* array ptr */
63                 struct var_s *parent;   /* for func args, ptr to actual parameter */
64                 char **walker;                  /* list of array elements (for..in) */
65         } x;
66 } var;
67
68 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
69 typedef struct chain_s {
70         struct node_s *first;
71         struct node_s *last;
72         char *programname;
73 } chain;
74
75 /* Function */
76 typedef struct func_s {
77         unsigned short nargs;
78         struct chain_s body;
79 } func;
80
81 /* I/O stream */
82 typedef struct rstream_s {
83         FILE *F;
84         char *buffer;
85         int adv;
86         int size;
87         int pos;
88         unsigned short is_pipe;
89 } rstream;
90
91 typedef struct hash_item_s {
92         union {
93                 struct var_s v;                 /* variable/array hash */
94                 struct rstream_s rs;    /* redirect streams hash */
95                 struct func_s f;                /* functions hash */
96         } data;
97         struct hash_item_s *next;       /* next in chain */
98         char name[1];                           /* really it's longer */
99 } hash_item;
100
101 typedef struct xhash_s {
102         unsigned int nel;                                       /* num of elements */
103         unsigned int csize;                                     /* current hash size */
104         unsigned int nprime;                            /* next hash size in PRIMES[] */
105         unsigned int glen;                                      /* summary length of item names */
106         struct hash_item_s **items;
107 } xhash;
108
109 /* Tree node */
110 typedef struct node_s {
111         uint32_t info;
112         unsigned short lineno;
113         union {
114                 struct node_s *n;
115                 var *v;
116                 int i;
117                 char *s;
118                 regex_t *re;
119         } l;
120         union {
121                 struct node_s *n;
122                 regex_t *ire;
123                 func *f;
124                 int argno;
125         } r;
126         union {
127                 struct node_s *n;
128         } a;
129 } node;
130
131 /* Block of temporary variables */
132 typedef struct nvblock_s {
133         int size;
134         var *pos;
135         struct nvblock_s *prev;
136         struct nvblock_s *next;
137         var nv[0];
138 } nvblock;
139
140 typedef struct tsplitter_s {
141         node n;
142         regex_t re[2];
143 } tsplitter;
144
145 /* simple token classes */
146 /* Order and hex values are very important!!!  See next_token() */
147 #define TC_SEQSTART      1                              /* ( */
148 #define TC_SEQTERM      (1 << 1)                /* ) */
149 #define TC_REGEXP       (1 << 2)                /* /.../ */
150 #define TC_OUTRDR       (1 << 3)                /* | > >> */
151 #define TC_UOPPOST      (1 << 4)                /* unary postfix operator */
152 #define TC_UOPPRE1      (1 << 5)                /* unary prefix operator */
153 #define TC_BINOPX       (1 << 6)                /* two-opnd operator */
154 #define TC_IN           (1 << 7)
155 #define TC_COMMA        (1 << 8)
156 #define TC_PIPE         (1 << 9)                /* input redirection pipe */
157 #define TC_UOPPRE2      (1 << 10)               /* unary prefix operator */
158 #define TC_ARRTERM      (1 << 11)               /* ] */
159 #define TC_GRPSTART     (1 << 12)               /* { */
160 #define TC_GRPTERM      (1 << 13)               /* } */
161 #define TC_SEMICOL      (1 << 14)
162 #define TC_NEWLINE      (1 << 15)
163 #define TC_STATX        (1 << 16)               /* ctl statement (for, next...) */
164 #define TC_WHILE        (1 << 17)
165 #define TC_ELSE         (1 << 18)
166 #define TC_BUILTIN      (1 << 19)
167 #define TC_GETLINE      (1 << 20)
168 #define TC_FUNCDECL     (1 << 21)               /* `function' `func' */
169 #define TC_BEGIN        (1 << 22)
170 #define TC_END          (1 << 23)
171 #define TC_EOF          (1 << 24)
172 #define TC_VARIABLE     (1 << 25)
173 #define TC_ARRAY        (1 << 26)
174 #define TC_FUNCTION     (1 << 27)
175 #define TC_STRING       (1 << 28)
176 #define TC_NUMBER       (1 << 29)
177
178 #define TC_UOPPRE       (TC_UOPPRE1 | TC_UOPPRE2)
179
180 /* combined token classes */
181 #define TC_BINOP        (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
182 #define TC_UNARYOP      (TC_UOPPRE | TC_UOPPOST)
183 #define TC_OPERAND      (TC_VARIABLE | TC_ARRAY | TC_FUNCTION | \
184         TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
185
186 #define TC_STATEMNT     (TC_STATX | TC_WHILE)
187 #define TC_OPTERM       (TC_SEMICOL | TC_NEWLINE)
188
189 /* word tokens, cannot mean something else if not expected */
190 #define TC_WORD         (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN | \
191         TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
192
193 /* discard newlines after these */
194 #define TC_NOTERM       (TC_COMMA | TC_GRPSTART | TC_GRPTERM | \
195         TC_BINOP | TC_OPTERM)
196
197 /* what can expression begin with */
198 #define TC_OPSEQ        (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
199 /* what can group begin with */
200 #define TC_GRPSEQ       (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
201
202 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
203 /* operator is inserted between them */
204 #define TC_CONCAT1      (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM | \
205         TC_STRING | TC_NUMBER | TC_UOPPOST)
206 #define TC_CONCAT2      (TC_OPERAND | TC_UOPPRE)
207
208 #define OF_RES1         0x010000
209 #define OF_RES2         0x020000
210 #define OF_STR1         0x040000
211 #define OF_STR2         0x080000
212 #define OF_NUM1         0x100000
213 #define OF_CHECKED      0x200000
214
215 /* combined operator flags */
216 #define xx      0
217 #define xV      OF_RES2
218 #define xS      (OF_RES2 | OF_STR2)
219 #define Vx      OF_RES1
220 #define VV      (OF_RES1 | OF_RES2)
221 #define Nx      (OF_RES1 | OF_NUM1)
222 #define NV      (OF_RES1 | OF_NUM1 | OF_RES2)
223 #define Sx      (OF_RES1 | OF_STR1)
224 #define SV      (OF_RES1 | OF_STR1 | OF_RES2)
225 #define SS      (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
226
227 #define OPCLSMASK       0xFF00
228 #define OPNMASK         0x007F
229
230 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
231  * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
232  * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
233  */
234 #define P(x)    (x << 24)
235 #define PRIMASK         0x7F000000
236 #define PRIMASK2        0x7E000000
237
238 /* Operation classes */
239
240 #define SHIFT_TIL_THIS  0x0600
241 #define RECUR_FROM_THIS 0x1000
242
243 enum {
244         OC_DELETE=0x0100,       OC_EXEC=0x0200,         OC_NEWSOURCE=0x0300,
245         OC_PRINT=0x0400,        OC_PRINTF=0x0500,       OC_WALKINIT=0x0600,
246
247         OC_BR=0x0700,           OC_BREAK=0x0800,        OC_CONTINUE=0x0900,
248         OC_EXIT=0x0a00,         OC_NEXT=0x0b00,         OC_NEXTFILE=0x0c00,
249         OC_TEST=0x0d00,         OC_WALKNEXT=0x0e00,
250
251         OC_BINARY=0x1000,       OC_BUILTIN=0x1100,      OC_COLON=0x1200,
252         OC_COMMA=0x1300,        OC_COMPARE=0x1400,      OC_CONCAT=0x1500,
253         OC_FBLTIN=0x1600,       OC_FIELD=0x1700,        OC_FNARG=0x1800,
254         OC_FUNC=0x1900,         OC_GETLINE=0x1a00,      OC_IN=0x1b00,
255         OC_LAND=0x1c00,         OC_LOR=0x1d00,          OC_MATCH=0x1e00,
256         OC_MOVE=0x1f00,         OC_PGETLINE=0x2000,     OC_REGEXP=0x2100,
257         OC_REPLACE=0x2200,      OC_RETURN=0x2300,       OC_SPRINTF=0x2400,
258         OC_TERNARY=0x2500,      OC_UNARY=0x2600,        OC_VAR=0x2700,
259         OC_DONE=0x2800,
260
261         ST_IF=0x3000,           ST_DO=0x3100,           ST_FOR=0x3200,
262         ST_WHILE=0x3300
263 };
264
265 /* simple builtins */
266 enum {
267         F_in=0, F_rn,   F_co,   F_ex,   F_lg,   F_si,   F_sq,   F_sr,
268         F_ti,   F_le,   F_sy,   F_ff,   F_cl
269 };
270
271 /* builtins */
272 enum {
273         B_a2=0, B_ix,   B_ma,   B_sp,   B_ss,   B_ti,   B_lo,   B_up,
274         B_ge,   B_gs,   B_su
275 };
276
277 /* tokens and their corresponding info values */
278
279 #define NTC             "\377"          /* switch to next token class (tc<<1) */
280 #define NTCC    '\377'
281
282 #define OC_B    OC_BUILTIN
283
284 static char * const tokenlist =
285         "\1("           NTC
286         "\1)"           NTC
287         "\1/"           NTC                                                                     /* REGEXP */
288         "\2>>"          "\1>"           "\1|"           NTC                     /* OUTRDR */
289         "\2++"          "\2--"          NTC                                             /* UOPPOST */
290         "\2++"          "\2--"          "\1$"           NTC                     /* UOPPRE1 */
291         "\2=="          "\1="           "\2+="          "\2-="          /* BINOPX */
292         "\2*="          "\2/="          "\2%="          "\2^="
293         "\1+"           "\1-"           "\3**="         "\2**"
294         "\1/"           "\1%"           "\1^"           "\1*"
295         "\2!="          "\2>="          "\2<="          "\1>"
296         "\1<"           "\2!~"          "\1~"           "\2&&"
297         "\2||"          "\1?"           "\1:"           NTC
298         "\2in"          NTC
299         "\1,"           NTC
300         "\1|"           NTC
301         "\1+"           "\1-"           "\1!"           NTC                     /* UOPPRE2 */
302         "\1]"           NTC
303         "\1{"           NTC
304         "\1}"           NTC
305         "\1;"           NTC
306         "\1\n"          NTC
307         "\2if"          "\2do"          "\3for"         "\5break"       /* STATX */
308         "\10continue"                   "\6delete"      "\5print"
309         "\6printf"      "\4next"        "\10nextfile"
310         "\6return"      "\4exit"        NTC
311         "\5while"       NTC
312         "\4else"        NTC
313
314         "\5close"       "\6system"      "\6fflush"      "\5atan2"       /* BUILTIN */
315         "\3cos"         "\3exp"         "\3int"         "\3log"
316         "\4rand"        "\3sin"         "\4sqrt"        "\5srand"
317         "\6gensub"      "\4gsub"        "\5index"       "\6length"
318         "\5match"       "\5split"       "\7sprintf"     "\3sub"
319         "\6substr"      "\7systime"     "\10strftime"
320         "\7tolower"     "\7toupper"     NTC
321         "\7getline"     NTC
322         "\4func"        "\10function"   NTC
323         "\5BEGIN"       NTC
324         "\3END"         "\0"
325         ;
326
327 static uint32_t tokeninfo[] = {
328
329         0,
330         0,
331         OC_REGEXP,
332         xS|'a',         xS|'w',         xS|'|',
333         OC_UNARY|xV|P(9)|'p',           OC_UNARY|xV|P(9)|'m',
334         OC_UNARY|xV|P(9)|'P',           OC_UNARY|xV|P(9)|'M',
335                 OC_FIELD|xV|P(5),
336         OC_COMPARE|VV|P(39)|5,          OC_MOVE|VV|P(74),
337                 OC_REPLACE|NV|P(74)|'+',        OC_REPLACE|NV|P(74)|'-',
338         OC_REPLACE|NV|P(74)|'*',        OC_REPLACE|NV|P(74)|'/',
339                 OC_REPLACE|NV|P(74)|'%',        OC_REPLACE|NV|P(74)|'&',
340         OC_BINARY|NV|P(29)|'+',         OC_BINARY|NV|P(29)|'-',
341                 OC_REPLACE|NV|P(74)|'&',        OC_BINARY|NV|P(15)|'&',
342         OC_BINARY|NV|P(25)|'/',         OC_BINARY|NV|P(25)|'%',
343                 OC_BINARY|NV|P(15)|'&',         OC_BINARY|NV|P(25)|'*',
344         OC_COMPARE|VV|P(39)|4,          OC_COMPARE|VV|P(39)|3,
345                 OC_COMPARE|VV|P(39)|0,          OC_COMPARE|VV|P(39)|1,
346         OC_COMPARE|VV|P(39)|2,          OC_MATCH|Sx|P(45)|'!',
347                 OC_MATCH|Sx|P(45)|'~',          OC_LAND|Vx|P(55),
348         OC_LOR|Vx|P(59),                        OC_TERNARY|Vx|P(64)|'?',
349                 OC_COLON|xx|P(67)|':',
350         OC_IN|SV|P(49),
351         OC_COMMA|SS|P(80),
352         OC_PGETLINE|SV|P(37),
353         OC_UNARY|xV|P(19)|'+',          OC_UNARY|xV|P(19)|'-',
354                 OC_UNARY|xV|P(19)|'!',
355         0,
356         0,
357         0,
358         0,
359         0,
360         ST_IF,                  ST_DO,                  ST_FOR,                 OC_BREAK,
361         OC_CONTINUE,                                    OC_DELETE|Vx,   OC_PRINT,
362         OC_PRINTF,              OC_NEXT,                OC_NEXTFILE,
363         OC_RETURN|Vx,   OC_EXIT|Nx,
364         ST_WHILE,
365         0,
366
367         OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
368         OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
369         OC_FBLTIN|F_rn,    OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
370         OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
371         OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF,        OC_B|B_su|P(0xb6),
372         OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti,    OC_B|B_ti|P(0x0b),
373         OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
374         OC_GETLINE|SV|P(0),
375         0,      0,
376         0,
377         0
378 };
379
380 /* internal variable names and their initial values       */
381 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
382 enum {
383         CONVFMT=0,      OFMT,           FS,                     OFS,
384         ORS,            RS,                     RT,                     FILENAME,
385         SUBSEP,         ARGIND,         ARGC,           ARGV,
386         ERRNO,          FNR,
387         NR,                     NF,                     IGNORECASE,
388         ENVIRON,        F0,                     _intvarcount_
389 };
390
391 static char * vNames =
392         "CONVFMT\0"     "OFMT\0"        "FS\0*"         "OFS\0"
393         "ORS\0"         "RS\0*"         "RT\0"          "FILENAME\0"
394         "SUBSEP\0"      "ARGIND\0"      "ARGC\0"        "ARGV\0"
395         "ERRNO\0"       "FNR\0"
396         "NR\0"          "NF\0*"         "IGNORECASE\0*"
397         "ENVIRON\0"     "$\0*"          "\0";
398
399 static char * vValues =
400         "%.6g\0"        "%.6g\0"        " \0"           " \0"
401         "\n\0"          "\n\0"          "\0"            "\0"
402         "\034\0"
403         "\377";
404
405 /* hash size may grow to these values */
406 #define FIRST_PRIME 61;
407 static const unsigned int PRIMES[] = { 251, 1021, 4093, 16381, 65521 };
408 static const unsigned int NPRIMES = sizeof(PRIMES) / sizeof(unsigned int);
409
410 /* globals */
411
412 extern char **environ;
413
414 static var * V[_intvarcount_];
415 static chain beginseq, mainseq, endseq, *seq;
416 static int nextrec, nextfile;
417 static node *break_ptr, *continue_ptr;
418 static rstream *iF;
419 static xhash *vhash, *ahash, *fdhash, *fnhash;
420 static char *programname;
421 static short lineno;
422 static int is_f0_split;
423 static int nfields = 0;
424 static var *Fields = NULL;
425 static tsplitter fsplitter, rsplitter;
426 static nvblock *cb = NULL;
427 static char *pos;
428 static char *buf;
429 static int icase = FALSE;
430 static int exiting = FALSE;
431
432 static struct {
433         uint32_t tclass;
434         uint32_t info;
435         char *string;
436         double number;
437         short lineno;
438         int rollback;
439 } t;
440
441 /* function prototypes */
442 static void handle_special(var *);
443 static node *parse_expr(uint32_t);
444 static void chain_group(void);
445 static var *evaluate(node *, var *);
446 static rstream *next_input_file(void);
447 static int fmt_num(char *, int, char *, double, int);
448 static int awk_exit(int);
449
450 /* ---- error handling ---- */
451
452 static const char EMSG_INTERNAL_ERROR[] = "Internal error";
453 static const char EMSG_UNEXP_EOS[] = "Unexpected end of string";
454 static const char EMSG_UNEXP_TOKEN[] = "Unexpected token";
455 static const char EMSG_DIV_BY_ZERO[] = "Division by zero";
456 static const char EMSG_INV_FMT[] = "Invalid format specifier";
457 static const char EMSG_TOO_FEW_ARGS[] = "Too few arguments for builtin";
458 static const char EMSG_NOT_ARRAY[] = "Not an array";
459 static const char EMSG_POSSIBLE_ERROR[] = "Possible syntax error";
460 static const char EMSG_UNDEF_FUNC[] = "Call to undefined function";
461 #ifndef CONFIG_FEATURE_AWK_MATH
462 static const char EMSG_NO_MATH[] = "Math support is not compiled in";
463 #endif
464
465 static void syntax_error(const char * const message)
466 {
467         bb_error_msg("%s:%i: %s", programname, lineno, message);
468         exit(1);
469 }
470
471 #define runtime_error(x) syntax_error(x)
472
473
474 /* ---- hash stuff ---- */
475
476 static unsigned int hashidx(char *name)
477 {
478         register unsigned int idx=0;
479
480         while (*name)  idx = *name++ + (idx << 6) - idx;
481         return idx;
482 }
483
484 /* create new hash */
485 static xhash *hash_init(void)
486 {
487         xhash *newhash;
488
489         newhash = (xhash *)xcalloc(1, sizeof(xhash));
490         newhash->csize = FIRST_PRIME;
491         newhash->items = (hash_item **)xcalloc(newhash->csize, sizeof(hash_item *));
492
493         return newhash;
494 }
495
496 /* find item in hash, return ptr to data, NULL if not found */
497 static void *hash_search(xhash *hash, char *name)
498 {
499         hash_item *hi;
500
501         hi = hash->items [ hashidx(name) % hash->csize ];
502         while (hi) {
503                 if (strcmp(hi->name, name) == 0)
504                         return &(hi->data);
505                 hi = hi->next;
506         }
507         return NULL;
508 }
509
510 /* grow hash if it becomes too big */
511 static void hash_rebuild(xhash *hash)
512 {
513         unsigned int newsize, i, idx;
514         hash_item **newitems, *hi, *thi;
515
516         if (hash->nprime == NPRIMES)
517                 return;
518
519         newsize = PRIMES[hash->nprime++];
520         newitems = (hash_item **)xcalloc(newsize, sizeof(hash_item *));
521
522         for (i=0; i<hash->csize; i++) {
523                 hi = hash->items[i];
524                 while (hi) {
525                         thi = hi;
526                         hi = thi->next;
527                         idx = hashidx(thi->name) % newsize;
528                         thi->next = newitems[idx];
529                         newitems[idx] = thi;
530                 }
531         }
532
533         free(hash->items);
534         hash->csize = newsize;
535         hash->items = newitems;
536 }
537
538 /* find item in hash, add it if necessary. Return ptr to data */
539 static void *hash_find(xhash *hash, char *name)
540 {
541         hash_item *hi;
542         unsigned int idx;
543         int l;
544
545         hi = hash_search(hash, name);
546         if (! hi) {
547                 if (++hash->nel / hash->csize > 10)
548                         hash_rebuild(hash);
549
550                 l = bb_strlen(name) + 1;
551                 hi = xcalloc(sizeof(hash_item) + l, 1);
552                 memcpy(hi->name, name, l);
553
554                 idx = hashidx(name) % hash->csize;
555                 hi->next = hash->items[idx];
556                 hash->items[idx] = hi;
557                 hash->glen += l;
558         }
559         return &(hi->data);
560 }
561
562 #define findvar(hash, name) (var *) hash_find ( (hash) , (name) )
563 #define newvar(name) (var *) hash_find ( vhash , (name) )
564 #define newfile(name) (rstream *) hash_find ( fdhash , (name) )
565 #define newfunc(name) (func *) hash_find ( fnhash , (name) )
566
567 static void hash_remove(xhash *hash, char *name)
568 {
569         hash_item *hi, **phi;
570
571         phi = &(hash->items[ hashidx(name) % hash->csize ]);
572         while (*phi) {
573                 hi = *phi;
574                 if (strcmp(hi->name, name) == 0) {
575                         hash->glen -= (bb_strlen(name) + 1);
576                         hash->nel--;
577                         *phi = hi->next;
578                         free(hi);
579                         break;
580                 }
581                 phi = &(hi->next);
582         }
583 }
584
585 /* ------ some useful functions ------ */
586
587 static void skip_spaces(char **s)
588 {
589         register char *p = *s;
590
591         while(*p == ' ' || *p == '\t' ||
592                                         (*p == '\\' && *(p+1) == '\n' && (++p, ++t.lineno))) {
593                 p++;
594         }
595         *s = p;
596 }
597
598 static char *nextword(char **s)
599 {
600         register char *p = *s;
601
602         while (*(*s)++) ;
603
604         return p;
605 }
606
607 static char nextchar(char **s)
608 {
609         register char c, *pps;
610
611         c = *((*s)++);
612         pps = *s;
613         if (c == '\\') c = bb_process_escape_sequence((const char**)s);
614         if (c == '\\' && *s == pps) c = *((*s)++);
615         return c;
616 }
617
618 static inline int isalnum_(int c)
619 {
620         return (isalnum(c) || c == '_');
621 }
622
623 static FILE *afopen(const char *path, const char *mode)
624 {
625         return (*path == '-' && *(path+1) == '\0') ? stdin : bb_xfopen(path, mode);
626 }
627
628 /* -------- working with variables (set/get/copy/etc) -------- */
629
630 static xhash *iamarray(var *v)
631 {
632         var *a = v;
633
634         while (a->type & VF_CHILD)
635                 a = a->x.parent;
636
637         if (! (a->type & VF_ARRAY)) {
638                 a->type |= VF_ARRAY;
639                 a->x.array = hash_init();
640         }
641         return a->x.array;
642 }
643
644 static void clear_array(xhash *array)
645 {
646         unsigned int i;
647         hash_item *hi, *thi;
648
649         for (i=0; i<array->csize; i++) {
650                 hi = array->items[i];
651                 while (hi) {
652                         thi = hi;
653                         hi = hi->next;
654                         free(thi->data.v.string);
655                         free(thi);
656                 }
657                 array->items[i] = NULL;
658         }
659         array->glen = array->nel = 0;
660 }
661
662 /* clear a variable */
663 static var *clrvar(var *v)
664 {
665         if (!(v->type & VF_FSTR))
666                 free(v->string);
667
668         v->type &= VF_DONTTOUCH;
669         v->type |= VF_DIRTY;
670         v->string = NULL;
671         return v;
672 }
673
674 /* assign string value to variable */
675 static var *setvar_p(var *v, char *value)
676 {
677         clrvar(v);
678         v->string = value;
679         handle_special(v);
680
681         return v;
682 }
683
684 /* same as setvar_p but make a copy of string */
685 static var *setvar_s(var *v, char *value)
686 {
687         return setvar_p(v, (value && *value) ? bb_xstrdup(value) : NULL);
688 }
689
690 /* same as setvar_s but set USER flag */
691 static var *setvar_u(var *v, char *value)
692 {
693         setvar_s(v, value);
694         v->type |= VF_USER;
695         return v;
696 }
697
698 /* set array element to user string */
699 static void setari_u(var *a, int idx, char *s)
700 {
701         register var *v;
702         static char sidx[12];
703
704         sprintf(sidx, "%d", idx);
705         v = findvar(iamarray(a), sidx);
706         setvar_u(v, s);
707 }
708
709 /* assign numeric value to variable */
710 static var *setvar_i(var *v, double value)
711 {
712         clrvar(v);
713         v->type |= VF_NUMBER;
714         v->number = value;
715         handle_special(v);
716         return v;
717 }
718
719 static char *getvar_s(var *v)
720 {
721         /* if v is numeric and has no cached string, convert it to string */
722         if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
723                 fmt_num(buf, MAXVARFMT, getvar_s(V[CONVFMT]), v->number, TRUE);
724                 v->string = bb_xstrdup(buf);
725                 v->type |= VF_CACHED;
726         }
727         return (v->string == NULL) ? "" : v->string;
728 }
729
730 static double getvar_i(var *v)
731 {
732         char *s;
733
734         if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
735                 v->number = 0;
736                 s = v->string;
737                 if (s && *s) {
738                         v->number = strtod(s, &s);
739                         if (v->type & VF_USER) {
740                                 skip_spaces(&s);
741                                 if (*s != '\0')
742                                         v->type &= ~VF_USER;
743                         }
744                 } else {
745                         v->type &= ~VF_USER;
746                 }
747                 v->type |= VF_CACHED;
748         }
749         return v->number;
750 }
751
752 static var *copyvar(var *dest, var *src)
753 {
754         if (dest != src) {
755                 clrvar(dest);
756                 dest->type |= (src->type & ~VF_DONTTOUCH);
757                 dest->number = src->number;
758                 if (src->string)
759                         dest->string = bb_xstrdup(src->string);
760         }
761         handle_special(dest);
762         return dest;
763 }
764
765 static var *incvar(var *v)
766 {
767         return setvar_i(v, getvar_i(v)+1.);
768 }
769
770 /* return true if v is number or numeric string */
771 static int is_numeric(var *v)
772 {
773         getvar_i(v);
774         return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
775 }
776
777 /* return 1 when value of v corresponds to true, 0 otherwise */
778 static int istrue(var *v)
779 {
780         if (is_numeric(v))
781                 return (v->number == 0) ? 0 : 1;
782         else
783                 return (v->string && *(v->string)) ? 1 : 0;
784 }
785
786 /* temporary variables allocator. Last allocated should be first freed */
787 static var *nvalloc(int n)
788 {
789         nvblock *pb = NULL;
790         var *v, *r;
791         int size;
792
793         while (cb) {
794                 pb = cb;
795                 if ((cb->pos - cb->nv) + n <= cb->size) break;
796                 cb = cb->next;
797         }
798
799         if (! cb) {
800                 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
801                 cb = (nvblock *)xmalloc(sizeof(nvblock) + size * sizeof(var));
802                 cb->size = size;
803                 cb->pos = cb->nv;
804                 cb->prev = pb;
805                 cb->next = NULL;
806                 if (pb) pb->next = cb;
807         }
808
809         v = r = cb->pos;
810         cb->pos += n;
811
812         while (v < cb->pos) {
813                 v->type = 0;
814                 v->string = NULL;
815                 v++;
816         }
817
818         return r;
819 }
820
821 static void nvfree(var *v)
822 {
823         var *p;
824
825         if (v < cb->nv || v >= cb->pos)
826                 runtime_error(EMSG_INTERNAL_ERROR);
827
828         for (p=v; p<cb->pos; p++) {
829                 if ((p->type & (VF_ARRAY|VF_CHILD)) == VF_ARRAY) {
830                         clear_array(iamarray(p));
831                         free(p->x.array->items);
832                         free(p->x.array);
833                 }
834                 if (p->type & VF_WALK)
835                         free(p->x.walker);
836
837                 clrvar(p);
838         }
839
840         cb->pos = v;
841         while (cb->prev && cb->pos == cb->nv) {
842                 cb = cb->prev;
843         }
844 }
845
846 /* ------- awk program text parsing ------- */
847
848 /* Parse next token pointed by global pos, place results into global t.
849  * If token isn't expected, give away. Return token class
850  */
851 static uint32_t next_token(uint32_t expected)
852 {
853         char *p, *pp, *s;
854         char *tl;
855         uint32_t tc, *ti;
856         int l;
857         static int concat_inserted = FALSE;
858         static uint32_t save_tclass, save_info;
859         static uint32_t ltclass = TC_OPTERM;
860
861         if (t.rollback) {
862
863                 t.rollback = FALSE;
864
865         } else if (concat_inserted) {
866
867                 concat_inserted = FALSE;
868                 t.tclass = save_tclass;
869                 t.info = save_info;
870
871         } else {
872
873                 p = pos;
874
875         readnext:
876                 skip_spaces(&p);
877                 lineno = t.lineno;
878                 if (*p == '#')
879                         while (*p != '\n' && *p != '\0') p++;
880
881                 if (*p == '\n')
882                         t.lineno++;
883
884                 if (*p == '\0') {
885                         tc = TC_EOF;
886
887                 } else if (*p == '\"') {
888                         /* it's a string */
889                         t.string = s = ++p;
890                         while (*p != '\"') {
891                                 if (*p == '\0' || *p == '\n')
892                                         syntax_error(EMSG_UNEXP_EOS);
893                                 *(s++) = nextchar(&p);
894                         }
895                         p++;
896                         *s = '\0';
897                         tc = TC_STRING;
898
899                 } else if ((expected & TC_REGEXP) && *p == '/') {
900                         /* it's regexp */
901                         t.string = s = ++p;
902                         while (*p != '/') {
903                                 if (*p == '\0' || *p == '\n')
904                                         syntax_error(EMSG_UNEXP_EOS);
905                                 if ((*s++ = *p++) == '\\') {
906                                         pp = p;
907                                         *(s-1) = bb_process_escape_sequence((const char **)&p);
908                                         if (*pp == '\\') *s++ = '\\';
909                                         if (p == pp) *s++ = *p++;
910                                 }
911                         }
912                         p++;
913                         *s = '\0';
914                         tc = TC_REGEXP;
915
916                 } else if (*p == '.' || isdigit(*p)) {
917                         /* it's a number */
918                         t.number = strtod(p, &p);
919                         if (*p == '.')
920                                 syntax_error(EMSG_UNEXP_TOKEN);
921                         tc = TC_NUMBER;
922
923                 } else {
924                         /* search for something known */
925                         tl = tokenlist;
926                         tc = 0x00000001;
927                         ti = tokeninfo;
928                         while (*tl) {
929                                 l = *(tl++);
930                                 if (l == NTCC) {
931                                         tc <<= 1;
932                                         continue;
933                                 }
934                                 /* if token class is expected, token
935                                  * matches and it's not a longer word,
936                                  * then this is what we are looking for
937                                  */
938                                 if ((tc & (expected | TC_WORD | TC_NEWLINE)) &&
939                                 *tl == *p && strncmp(p, tl, l) == 0 &&
940                                 !((tc & TC_WORD) && isalnum_(*(p + l)))) {
941                                         t.info = *ti;
942                                         p += l;
943                                         break;
944                                 }
945                                 ti++;
946                                 tl += l;
947                         }
948
949                         if (! *tl) {
950                                 /* it's a name (var/array/function),
951                                  * otherwise it's something wrong
952                                  */
953                                 if (! isalnum_(*p))
954                                         syntax_error(EMSG_UNEXP_TOKEN);
955
956                                 t.string = --p;
957                                 while(isalnum_(*(++p))) {
958                                         *(p-1) = *p;
959                                 }
960                                 *(p-1) = '\0';
961                                 tc = TC_VARIABLE;
962                                 if (*p == '(') {
963                                         tc = TC_FUNCTION;
964                                 } else {
965                                         skip_spaces(&p);
966                                         if (*p == '[') {
967                                                 p++;
968                                                 tc = TC_ARRAY;
969                                         }
970                                 }
971                         }
972                 }
973                 pos = p;
974
975                 /* skipping newlines in some cases */
976                 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
977                         goto readnext;
978
979                 /* insert concatenation operator when needed */
980                 if ((ltclass&TC_CONCAT1) && (tc&TC_CONCAT2) && (expected&TC_BINOP)) {
981                         concat_inserted = TRUE;
982                         save_tclass = tc;
983                         save_info = t.info;
984                         tc = TC_BINOP;
985                         t.info = OC_CONCAT | SS | P(35);
986                 }
987
988                 t.tclass = tc;
989         }
990         ltclass = t.tclass;
991
992         /* Are we ready for this? */
993         if (! (ltclass & expected))
994                 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
995                                                                 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
996
997         return ltclass;
998 }
999
1000 static void rollback_token(void) { t.rollback = TRUE; }
1001
1002 static node *new_node(uint32_t info)
1003 {
1004         register node *n;
1005
1006         n = (node *)xcalloc(sizeof(node), 1);
1007         n->info = info;
1008         n->lineno = lineno;
1009         return n;
1010 }
1011
1012 static node *mk_re_node(char *s, node *n, regex_t *re)
1013 {
1014         n->info = OC_REGEXP;
1015         n->l.re = re;
1016         n->r.ire = re + 1;
1017         xregcomp(re, s, REG_EXTENDED);
1018         xregcomp(re+1, s, REG_EXTENDED | REG_ICASE);
1019
1020         return n;
1021 }
1022
1023 static node *condition(void)
1024 {
1025         next_token(TC_SEQSTART);
1026         return parse_expr(TC_SEQTERM);
1027 }
1028
1029 /* parse expression terminated by given argument, return ptr
1030  * to built subtree. Terminator is eaten by parse_expr */
1031 static node *parse_expr(uint32_t iexp)
1032 {
1033         node sn;
1034         node *cn = &sn;
1035         node *vn, *glptr;
1036         uint32_t tc, xtc;
1037         var *v;
1038
1039         sn.info = PRIMASK;
1040         sn.r.n = glptr = NULL;
1041         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1042
1043         while (! ((tc = next_token(xtc)) & iexp)) {
1044                 if (glptr && (t.info == (OC_COMPARE|VV|P(39)|2))) {
1045                         /* input redirection (<) attached to glptr node */
1046                         cn = glptr->l.n = new_node(OC_CONCAT|SS|P(37));
1047                         cn->a.n = glptr;
1048                         xtc = TC_OPERAND | TC_UOPPRE;
1049                         glptr = NULL;
1050
1051                 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1052                         /* for binary and postfix-unary operators, jump back over
1053                          * previous operators with higher priority */
1054                         vn = cn;
1055                         while ( ((t.info & PRIMASK) > (vn->a.n->info & PRIMASK2)) ||
1056                           ((t.info == vn->info) && ((t.info & OPCLSMASK) == OC_COLON)) )
1057                                 vn = vn->a.n;
1058                         if ((t.info & OPCLSMASK) == OC_TERNARY)
1059                                 t.info += P(6);
1060                         cn = vn->a.n->r.n = new_node(t.info);
1061                         cn->a.n = vn->a.n;
1062                         if (tc & TC_BINOP) {
1063                                 cn->l.n = vn;
1064                                 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1065                                 if ((t.info & OPCLSMASK) == OC_PGETLINE) {
1066                                         /* it's a pipe */
1067                                         next_token(TC_GETLINE);
1068                                         /* give maximum priority to this pipe */
1069                                         cn->info &= ~PRIMASK;
1070                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1071                                 }
1072                         } else {
1073                                 cn->r.n = vn;
1074                                 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1075                         }
1076                         vn->a.n = cn;
1077
1078                 } else {
1079                         /* for operands and prefix-unary operators, attach them
1080                          * to last node */
1081                         vn = cn;
1082                         cn = vn->r.n = new_node(t.info);
1083                         cn->a.n = vn;
1084                         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1085                         if (tc & (TC_OPERAND | TC_REGEXP)) {
1086                                 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1087                                 /* one should be very careful with switch on tclass -
1088                                  * only simple tclasses should be used! */
1089                                 switch (tc) {
1090                                   case TC_VARIABLE:
1091                                   case TC_ARRAY:
1092                                         cn->info = OC_VAR;
1093                                         if ((v = hash_search(ahash, t.string)) != NULL) {
1094                                                 cn->info = OC_FNARG;
1095                                                 cn->l.i = v->x.aidx;
1096                                         } else {
1097                                                 cn->l.v = newvar(t.string);
1098                                         }
1099                                         if (tc & TC_ARRAY) {
1100                                                 cn->info |= xS;
1101                                                 cn->r.n = parse_expr(TC_ARRTERM);
1102                                         }
1103                                         break;
1104
1105                                   case TC_NUMBER:
1106                                   case TC_STRING:
1107                                         cn->info = OC_VAR;
1108                                         v = cn->l.v = xcalloc(sizeof(var), 1);
1109                                         if (tc & TC_NUMBER)
1110                                                 setvar_i(v, t.number);
1111                                         else
1112                                                 setvar_s(v, t.string);
1113                                         break;
1114
1115                                   case TC_REGEXP:
1116                                         mk_re_node(t.string, cn,
1117                                                                         (regex_t *)xcalloc(sizeof(regex_t),2));
1118                                         break;
1119
1120                                   case TC_FUNCTION:
1121                                         cn->info = OC_FUNC;
1122                                         cn->r.f = newfunc(t.string);
1123                                         cn->l.n = condition();
1124                                         break;
1125
1126                                   case TC_SEQSTART:
1127                                         cn = vn->r.n = parse_expr(TC_SEQTERM);
1128                                         cn->a.n = vn;
1129                                         break;
1130
1131                                   case TC_GETLINE:
1132                                         glptr = cn;
1133                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1134                                         break;
1135
1136                                   case TC_BUILTIN:
1137                                         cn->l.n = condition();
1138                                         break;
1139                                 }
1140                         }
1141                 }
1142         }
1143         return sn.r.n;
1144 }
1145
1146 /* add node to chain. Return ptr to alloc'd node */
1147 static node *chain_node(uint32_t info)
1148 {
1149         register node *n;
1150
1151         if (! seq->first)
1152                 seq->first = seq->last = new_node(0);
1153
1154         if (seq->programname != programname) {
1155                 seq->programname = programname;
1156                 n = chain_node(OC_NEWSOURCE);
1157                 n->l.s = bb_xstrdup(programname);
1158         }
1159
1160         n = seq->last;
1161         n->info = info;
1162         seq->last = n->a.n = new_node(OC_DONE);
1163
1164         return n;
1165 }
1166
1167 static void chain_expr(uint32_t info)
1168 {
1169         node *n;
1170
1171         n = chain_node(info);
1172         n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1173         if (t.tclass & TC_GRPTERM)
1174                 rollback_token();
1175 }
1176
1177 static node *chain_loop(node *nn)
1178 {
1179         node *n, *n2, *save_brk, *save_cont;
1180
1181         save_brk = break_ptr;
1182         save_cont = continue_ptr;
1183
1184         n = chain_node(OC_BR | Vx);
1185         continue_ptr = new_node(OC_EXEC);
1186         break_ptr = new_node(OC_EXEC);
1187         chain_group();
1188         n2 = chain_node(OC_EXEC | Vx);
1189         n2->l.n = nn;
1190         n2->a.n = n;
1191         continue_ptr->a.n = n2;
1192         break_ptr->a.n = n->r.n = seq->last;
1193
1194         continue_ptr = save_cont;
1195         break_ptr = save_brk;
1196
1197         return n;
1198 }
1199
1200 /* parse group and attach it to chain */
1201 static void chain_group(void)
1202 {
1203         uint32_t c;
1204         node *n, *n2, *n3;
1205
1206         do {
1207                 c = next_token(TC_GRPSEQ);
1208         } while (c & TC_NEWLINE);
1209
1210         if (c & TC_GRPSTART) {
1211                 while(next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1212                         if (t.tclass & TC_NEWLINE) continue;
1213                         rollback_token();
1214                         chain_group();
1215                 }
1216         } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1217                 rollback_token();
1218                 chain_expr(OC_EXEC | Vx);
1219         } else {                                                /* TC_STATEMNT */
1220                 switch (t.info & OPCLSMASK) {
1221                         case ST_IF:
1222                                 n = chain_node(OC_BR | Vx);
1223                                 n->l.n = condition();
1224                                 chain_group();
1225                                 n2 = chain_node(OC_EXEC);
1226                                 n->r.n = seq->last;
1227                                 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE)==TC_ELSE) {
1228                                         chain_group();
1229                                         n2->a.n = seq->last;
1230                                 } else {
1231                                         rollback_token();
1232                                 }
1233                                 break;
1234
1235                         case ST_WHILE:
1236                                 n2 = condition();
1237                                 n = chain_loop(NULL);
1238                                 n->l.n = n2;
1239                                 break;
1240
1241                         case ST_DO:
1242                                 n2 = chain_node(OC_EXEC);
1243                                 n = chain_loop(NULL);
1244                                 n2->a.n = n->a.n;
1245                                 next_token(TC_WHILE);
1246                                 n->l.n = condition();
1247                                 break;
1248
1249                         case ST_FOR:
1250                                 next_token(TC_SEQSTART);
1251                                 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1252                                 if (t.tclass & TC_SEQTERM) {                            /* for-in */
1253                                         if ((n2->info & OPCLSMASK) != OC_IN)
1254                                                 syntax_error(EMSG_UNEXP_TOKEN);
1255                                         n = chain_node(OC_WALKINIT | VV);
1256                                         n->l.n = n2->l.n;
1257                                         n->r.n = n2->r.n;
1258                                         n = chain_loop(NULL);
1259                                         n->info = OC_WALKNEXT | Vx;
1260                                         n->l.n = n2->l.n;
1261                                 } else {                                                                        /* for(;;) */
1262                                         n = chain_node(OC_EXEC | Vx);
1263                                         n->l.n = n2;
1264                                         n2 = parse_expr(TC_SEMICOL);
1265                                         n3 = parse_expr(TC_SEQTERM);
1266                                         n = chain_loop(n3);
1267                                         n->l.n = n2;
1268                                         if (! n2)
1269                                                 n->info = OC_EXEC;
1270                                 }
1271                                 break;
1272
1273                         case OC_PRINT:
1274                         case OC_PRINTF:
1275                                 n = chain_node(t.info);
1276                                 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1277                                 if (t.tclass & TC_OUTRDR) {
1278                                         n->info |= t.info;
1279                                         n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1280                                 }
1281                                 if (t.tclass & TC_GRPTERM)
1282                                         rollback_token();
1283                                 break;
1284
1285                         case OC_BREAK:
1286                                 n = chain_node(OC_EXEC);
1287                                 n->a.n = break_ptr;
1288                                 break;
1289
1290                         case OC_CONTINUE:
1291                                 n = chain_node(OC_EXEC);
1292                                 n->a.n = continue_ptr;
1293                                 break;
1294
1295                         /* delete, next, nextfile, return, exit */
1296                         default:
1297                                 chain_expr(t.info);
1298
1299                 }
1300         }
1301 }
1302
1303 static void parse_program(char *p)
1304 {
1305         uint32_t tclass;
1306         node *cn;
1307         func *f;
1308         var *v;
1309
1310         pos = p;
1311         t.lineno = 1;
1312         while((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1313                                 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1314
1315                 if (tclass & TC_OPTERM)
1316                         continue;
1317
1318                 seq = &mainseq;
1319                 if (tclass & TC_BEGIN) {
1320                         seq = &beginseq;
1321                         chain_group();
1322
1323                 } else if (tclass & TC_END) {
1324                         seq = &endseq;
1325                         chain_group();
1326
1327                 } else if (tclass & TC_FUNCDECL) {
1328                         next_token(TC_FUNCTION);
1329                         pos++;
1330                         f = newfunc(t.string);
1331                         f->body.first = NULL;
1332                         f->nargs = 0;
1333                         while(next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1334                                 v = findvar(ahash, t.string);
1335                                 v->x.aidx = (f->nargs)++;
1336
1337                                 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1338                                         break;
1339                         }
1340                         seq = &(f->body);
1341                         chain_group();
1342                         clear_array(ahash);
1343
1344                 } else if (tclass & TC_OPSEQ) {
1345                         rollback_token();
1346                         cn = chain_node(OC_TEST);
1347                         cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1348                         if (t.tclass & TC_GRPSTART) {
1349                                 rollback_token();
1350                                 chain_group();
1351                         } else {
1352                                 chain_node(OC_PRINT);
1353                         }
1354                         cn->r.n = mainseq.last;
1355
1356                 } else /* if (tclass & TC_GRPSTART) */ {
1357                         rollback_token();
1358                         chain_group();
1359                 }
1360         }
1361 }
1362
1363
1364 /* -------- program execution part -------- */
1365
1366 static node *mk_splitter(char *s, tsplitter *spl)
1367 {
1368         register regex_t *re, *ire;
1369         node *n;
1370
1371         re = &spl->re[0];
1372         ire = &spl->re[1];
1373         n = &spl->n;
1374         if ((n->info && OPCLSMASK) == OC_REGEXP) {
1375                 regfree(re);
1376                 regfree(ire);
1377         }
1378         if (bb_strlen(s) > 1) {
1379                 mk_re_node(s, n, re);
1380         } else {
1381                 n->info = (uint32_t) *s;
1382         }
1383
1384         return n;
1385 }
1386
1387 /* use node as a regular expression. Supplied with node ptr and regex_t
1388  * storage space. Return ptr to regex (if result points to preg, it should
1389  * be later regfree'd manually
1390  */
1391 static regex_t *as_regex(node *op, regex_t *preg)
1392 {
1393         var *v;
1394         char *s;
1395
1396         if ((op->info & OPCLSMASK) == OC_REGEXP) {
1397                 return icase ? op->r.ire : op->l.re;
1398         } else {
1399                 v = nvalloc(1);
1400                 s = getvar_s(evaluate(op, v));
1401                 xregcomp(preg, s, icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED);
1402                 nvfree(v);
1403                 return preg;
1404         }
1405 }
1406
1407 /* gradually increasing buffer */
1408 static void qrealloc(char **b, int n, int *size)
1409 {
1410         if (! *b || n >= *size)
1411                 *b = xrealloc(*b, *size = n + (n>>1) + 80);
1412 }
1413
1414 /* resize field storage space */
1415 static void fsrealloc(int size)
1416 {
1417         static int maxfields = 0;
1418         int i;
1419
1420         if (size >= maxfields) {
1421                 i = maxfields;
1422                 maxfields = size + 16;
1423                 Fields = (var *)xrealloc(Fields, maxfields * sizeof(var));
1424                 for (; i<maxfields; i++) {
1425                         Fields[i].type = VF_SPECIAL;
1426                         Fields[i].string = NULL;
1427                 }
1428         }
1429
1430         if (size < nfields) {
1431                 for (i=size; i<nfields; i++) {
1432                         clrvar(Fields+i);
1433                 }
1434         }
1435         nfields = size;
1436 }
1437
1438 static int awk_split(char *s, node *spl, char **slist)
1439 {
1440         int l, n=0;
1441         char c[4];
1442         char *s1;
1443         regmatch_t pmatch[2];
1444
1445         /* in worst case, each char would be a separate field */
1446         *slist = s1 = bb_xstrndup(s, bb_strlen(s) * 2 + 3);
1447
1448         c[0] = c[1] = (char)spl->info;
1449         c[2] = c[3] = '\0';
1450         if (*getvar_s(V[RS]) == '\0') c[2] = '\n';
1451
1452         if ((spl->info & OPCLSMASK) == OC_REGEXP) {             /* regex split */
1453                 while (*s) {
1454                         l = strcspn(s, c+2);
1455                         if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0 &&
1456                         pmatch[0].rm_so <= l) {
1457                                 l = pmatch[0].rm_so;
1458                                 if (pmatch[0].rm_eo == 0) { l++; pmatch[0].rm_eo++; }
1459                         } else {
1460                                 pmatch[0].rm_eo = l;
1461                                 if (*(s+l)) pmatch[0].rm_eo++;
1462                         }
1463
1464                         memcpy(s1, s, l);
1465                         *(s1+l) = '\0';
1466                         nextword(&s1);
1467                         s += pmatch[0].rm_eo;
1468                         n++;
1469                 }
1470         } else if (c[0] == '\0') {              /* null split */
1471                 while(*s) {
1472                         *(s1++) = *(s++);
1473                         *(s1++) = '\0';
1474                         n++;
1475                 }
1476         } else if (c[0] != ' ') {               /* single-character split */
1477                 if (icase) {
1478                         c[0] = toupper(c[0]);
1479                         c[1] = tolower(c[1]);
1480                 }
1481                 if (*s1) n++;
1482                 while ((s1 = strpbrk(s1, c))) {
1483                         *(s1++) = '\0';
1484                         n++;
1485                 }
1486         } else {                                /* space split */
1487                 while (*s) {
1488                         while (isspace(*s)) s++;
1489                         if (! *s) break;
1490                         n++;
1491                         while (*s && !isspace(*s))
1492                                 *(s1++) = *(s++);
1493                         *(s1++) = '\0';
1494                 }
1495         }
1496         return n;
1497 }
1498
1499 static void split_f0(void)
1500 {
1501         static char *fstrings = NULL;
1502         int i, n;
1503         char *s;
1504
1505         if (is_f0_split)
1506                 return;
1507
1508         is_f0_split = TRUE;
1509         free(fstrings);
1510         fsrealloc(0);
1511         n = awk_split(getvar_s(V[F0]), &fsplitter.n, &fstrings);
1512         fsrealloc(n);
1513         s = fstrings;
1514         for (i=0; i<n; i++) {
1515                 Fields[i].string = nextword(&s);
1516                 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1517         }
1518
1519         /* set NF manually to avoid side effects */
1520         clrvar(V[NF]);
1521         V[NF]->type = VF_NUMBER | VF_SPECIAL;
1522         V[NF]->number = nfields;
1523 }
1524
1525 /* perform additional actions when some internal variables changed */
1526 static void handle_special(var *v)
1527 {
1528         int n;
1529         char *b, *sep, *s;
1530         int sl, l, len, i, bsize;
1531
1532         if (! (v->type & VF_SPECIAL))
1533                 return;
1534
1535         if (v == V[NF]) {
1536                 n = (int)getvar_i(v);
1537                 fsrealloc(n);
1538
1539                 /* recalculate $0 */
1540                 sep = getvar_s(V[OFS]);
1541                 sl = bb_strlen(sep);
1542                 b = NULL;
1543                 len = 0;
1544                 for (i=0; i<n; i++) {
1545                         s = getvar_s(&Fields[i]);
1546                         l = bb_strlen(s);
1547                         if (b) {
1548                                 memcpy(b+len, sep, sl);
1549                                 len += sl;
1550                         }
1551                         qrealloc(&b, len+l+sl, &bsize);
1552                         memcpy(b+len, s, l);
1553                         len += l;
1554                 }
1555                 if (b) b[len] = '\0';
1556                 setvar_p(V[F0], b);
1557                 is_f0_split = TRUE;
1558
1559         } else if (v == V[F0]) {
1560                 is_f0_split = FALSE;
1561
1562         } else if (v == V[FS]) {
1563                 mk_splitter(getvar_s(v), &fsplitter);
1564
1565         } else if (v == V[RS]) {
1566                 mk_splitter(getvar_s(v), &rsplitter);
1567
1568         } else if (v == V[IGNORECASE]) {
1569                 icase = istrue(v);
1570
1571         } else {                                                /* $n */
1572                 n = getvar_i(V[NF]);
1573                 setvar_i(V[NF], n > v-Fields ? n : v-Fields+1);
1574                 /* right here v is invalid. Just to note... */
1575         }
1576 }
1577
1578 /* step through func/builtin/etc arguments */
1579 static node *nextarg(node **pn)
1580 {
1581         node *n;
1582
1583         n = *pn;
1584         if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1585                 *pn = n->r.n;
1586                 n = n->l.n;
1587         } else {
1588                 *pn = NULL;
1589         }
1590         return n;
1591 }
1592
1593 static void hashwalk_init(var *v, xhash *array)
1594 {
1595         char **w;
1596         hash_item *hi;
1597         int i;
1598
1599         if (v->type & VF_WALK)
1600                 free(v->x.walker);
1601
1602         v->type |= VF_WALK;
1603         w = v->x.walker = (char **)xcalloc(2 + 2*sizeof(char *) + array->glen, 1);
1604         *w = *(w+1) = (char *)(w + 2);
1605         for (i=0; i<array->csize; i++) {
1606                 hi = array->items[i];
1607                 while(hi) {
1608                         strcpy(*w, hi->name);
1609                         nextword(w);
1610                         hi = hi->next;
1611                 }
1612         }
1613 }
1614
1615 static int hashwalk_next(var *v)
1616 {
1617         char **w;
1618
1619         w = v->x.walker;
1620         if (*(w+1) == *w)
1621                 return FALSE;
1622
1623         setvar_s(v, nextword(w+1));
1624         return TRUE;
1625 }
1626
1627 /* evaluate node, return 1 when result is true, 0 otherwise */
1628 static int ptest(node *pattern)
1629 {
1630         static var v;
1631         return istrue(evaluate(pattern, &v));
1632 }
1633
1634 /* read next record from stream rsm into a variable v */
1635 static int awk_getline(rstream *rsm, var *v)
1636 {
1637         char *b;
1638         regmatch_t pmatch[2];
1639         int a, p, pp=0, size;
1640         int fd, so, eo, r, rp;
1641         char c, *m, *s;
1642
1643         /* we're using our own buffer since we need access to accumulating
1644          * characters
1645          */
1646         fd = fileno(rsm->F);
1647         m = rsm->buffer;
1648         a = rsm->adv;
1649         p = rsm->pos;
1650         size = rsm->size;
1651         c = (char) rsplitter.n.info;
1652         rp = 0;
1653
1654         if (! m) qrealloc(&m, 256, &size);
1655         do {
1656                 b = m + a;
1657                 so = eo = p;
1658                 r = 1;
1659                 if (p > 0) {
1660                         if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1661                                 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1662                                                                                                 b, 1, pmatch, 0) == 0) {
1663                                         so = pmatch[0].rm_so;
1664                                         eo = pmatch[0].rm_eo;
1665                                         if (b[eo] != '\0')
1666                                                 break;
1667                                 }
1668                         } else if (c != '\0') {
1669                                 s = strchr(b+pp, c);
1670                                 if (s) {
1671                                         so = eo = s-b;
1672                                         eo++;
1673                                         break;
1674                                 }
1675                         } else {
1676                                 while (b[rp] == '\n')
1677                                         rp++;
1678                                 s = strstr(b+rp, "\n\n");
1679                                 if (s) {
1680                                         so = eo = s-b;
1681                                         while (b[eo] == '\n') eo++;
1682                                         if (b[eo] != '\0')
1683                                                 break;
1684                                 }
1685                         }
1686                 }
1687
1688                 if (a > 0) {
1689                         memmove(m, (const void *)(m+a), p+1);
1690                         b = m;
1691                         a = 0;
1692                 }
1693
1694                 qrealloc(&m, a+p+128, &size);
1695                 b = m + a;
1696                 pp = p;
1697                 p += safe_read(fd, b+p, size-p-1);
1698                 if (p < pp) {
1699                         p = 0;
1700                         r = 0;
1701                         setvar_i(V[ERRNO], errno);
1702                 }
1703                 b[p] = '\0';
1704
1705         } while (p > pp);
1706
1707         if (p == 0) {
1708                 r--;
1709         } else {
1710                 c = b[so]; b[so] = '\0';
1711                 setvar_s(v, b+rp);
1712                 v->type |= VF_USER;
1713                 b[so] = c;
1714                 c = b[eo]; b[eo] = '\0';
1715                 setvar_s(V[RT], b+so);
1716                 b[eo] = c;
1717         }
1718
1719         rsm->buffer = m;
1720         rsm->adv = a + eo;
1721         rsm->pos = p - eo;
1722         rsm->size = size;
1723
1724         return r;
1725 }
1726
1727 static int fmt_num(char *b, int size, char *format, double n, int int_as_int)
1728 {
1729         int r=0;
1730         char c, *s=format;
1731
1732         if (int_as_int && n == (int)n) {
1733                 r = snprintf(b, size, "%d", (int)n);
1734         } else {
1735                 do { c = *s; } while (*s && *++s);
1736                 if (strchr("diouxX", c)) {
1737                         r = snprintf(b, size, format, (int)n);
1738                 } else if (strchr("eEfgG", c)) {
1739                         r = snprintf(b, size, format, n);
1740                 } else {
1741                         runtime_error(EMSG_INV_FMT);
1742                 }
1743         }
1744         return r;
1745 }
1746
1747
1748 /* formatted output into an allocated buffer, return ptr to buffer */
1749 static char *awk_printf(node *n)
1750 {
1751         char *b = NULL;
1752         char *fmt, *s, *s1, *f;
1753         int i, j, incr, bsize;
1754         char c, c1;
1755         var *v, *arg;
1756
1757         v = nvalloc(1);
1758         fmt = f = bb_xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1759
1760         i = 0;
1761         while (*f) {
1762                 s = f;
1763                 while (*f && (*f != '%' || *(++f) == '%'))
1764                         f++;
1765                 while (*f && !isalpha(*f))
1766                         f++;
1767
1768                 incr = (f - s) + MAXVARFMT;
1769                 qrealloc(&b, incr+i, &bsize);
1770                 c = *f; if (c != '\0') f++;
1771                 c1 = *f ; *f = '\0';
1772                 arg = evaluate(nextarg(&n), v);
1773
1774                 j = i;
1775                 if (c == 'c' || !c) {
1776                         i += sprintf(b+i, s,
1777                                         is_numeric(arg) ? (char)getvar_i(arg) : *getvar_s(arg));
1778
1779                 } else if (c == 's') {
1780                     s1 = getvar_s(arg);
1781                         qrealloc(&b, incr+i+bb_strlen(s1), &bsize);
1782                         i += sprintf(b+i, s, s1);
1783
1784                 } else {
1785                         i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1786                 }
1787                 *f = c1;
1788
1789                 /* if there was an error while sprintf, return value is negative */
1790                 if (i < j) i = j;
1791
1792         }
1793
1794         b = xrealloc(b, i+1);
1795         free(fmt);
1796         nvfree(v);
1797         b[i] = '\0';
1798         return b;
1799 }
1800
1801 /* common substitution routine
1802  * replace (nm) substring of (src) that match (n) with (repl), store
1803  * result into (dest), return number of substitutions. If nm=0, replace
1804  * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1805  * subexpression matching (\1-\9)
1806  */
1807 static int awk_sub(node *rn, char *repl, int nm, var *src, var *dest, int ex)
1808 {
1809         char *ds = NULL;
1810         char *sp, *s;
1811         int c, i, j, di, rl, so, eo, nbs, n, dssize;
1812         regmatch_t pmatch[10];
1813         regex_t sreg, *re;
1814
1815         re = as_regex(rn, &sreg);
1816         if (! src) src = V[F0];
1817         if (! dest) dest = V[F0];
1818
1819         i = di = 0;
1820         sp = getvar_s(src);
1821         rl = bb_strlen(repl);
1822         while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0:REG_NOTBOL) == 0) {
1823                 so = pmatch[0].rm_so;
1824                 eo = pmatch[0].rm_eo;
1825
1826                 qrealloc(&ds, di + eo + rl, &dssize);
1827                 memcpy(ds + di, sp, eo);
1828                 di += eo;
1829                 if (++i >= nm) {
1830                         /* replace */
1831                         di -= (eo - so);
1832                         nbs = 0;
1833                         for (s = repl; *s; s++) {
1834                                 ds[di++] = c = *s;
1835                                 if (c == '\\') {
1836                                         nbs++;
1837                                         continue;
1838                                 }
1839                                 if (c == '&' || (ex && c >= '0' && c <= '9')) {
1840                                         di -= ((nbs + 3) >> 1);
1841                                         j = 0;
1842                                         if (c != '&') {
1843                                                 j = c - '0';
1844                                                 nbs++;
1845                                         }
1846                                         if (nbs % 2) {
1847                                                 ds[di++] = c;
1848                                         } else {
1849                                                 n = pmatch[j].rm_eo - pmatch[j].rm_so;
1850                                                 qrealloc(&ds, di + rl + n, &dssize);
1851                                                 memcpy(ds + di, sp + pmatch[j].rm_so, n);
1852                                                 di += n;
1853                                         }
1854                                 }
1855                                 nbs = 0;
1856                         }
1857                 }
1858
1859                 sp += eo;
1860                 if (i == nm) break;
1861                 if (eo == so) {
1862                         if (! (ds[di++] = *sp++)) break;
1863                 }
1864         }
1865
1866         qrealloc(&ds, di + strlen(sp), &dssize);
1867         strcpy(ds + di, sp);
1868         setvar_p(dest, ds);
1869         if (re == &sreg) regfree(re);
1870         return i;
1871 }
1872
1873 static var *exec_builtin(node *op, var *res)
1874 {
1875         int (*to_xxx)(int);
1876         var *tv;
1877         node *an[4];
1878         var  *av[4];
1879         char *as[4];
1880         regmatch_t pmatch[2];
1881         regex_t sreg, *re;
1882         static tsplitter tspl;
1883         node *spl;
1884         uint32_t isr, info;
1885         int nargs;
1886         time_t tt;
1887         char *s, *s1;
1888         int i, l, ll, n;
1889
1890         tv = nvalloc(4);
1891         isr = info = op->info;
1892         op = op->l.n;
1893
1894         av[2] = av[3] = NULL;
1895         for (i=0 ; i<4 && op ; i++) {
1896                 an[i] = nextarg(&op);
1897                 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
1898                 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
1899                 isr >>= 1;
1900         }
1901
1902         nargs = i;
1903         if (nargs < (info >> 30))
1904                 runtime_error(EMSG_TOO_FEW_ARGS);
1905
1906         switch (info & OPNMASK) {
1907
1908           case B_a2:
1909 #ifdef CONFIG_FEATURE_AWK_MATH
1910                 setvar_i(res, atan2(getvar_i(av[i]), getvar_i(av[1])));
1911 #else
1912                 runtime_error(EMSG_NO_MATH);
1913 #endif
1914                 break;
1915
1916           case B_sp:
1917                 if (nargs > 2) {
1918                         spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
1919                                 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
1920                 } else {
1921                         spl = &fsplitter.n;
1922                 }
1923
1924                 n = awk_split(as[0], spl, &s);
1925                 s1 = s;
1926                 clear_array(iamarray(av[1]));
1927                 for (i=1; i<=n; i++)
1928                         setari_u(av[1], i, nextword(&s1));
1929                 free(s);
1930                 setvar_i(res, n);
1931                 break;
1932
1933           case B_ss:
1934                 l = bb_strlen(as[0]);
1935                 i = getvar_i(av[1]) - 1;
1936                 if (i>l) i=l; if (i<0) i=0;
1937                 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
1938                 if (n<0) n=0;
1939                 s = xmalloc(n+1);
1940                 strncpy(s, as[0]+i, n);
1941                 s[n] = '\0';
1942                 setvar_p(res, s);
1943                 break;
1944
1945           case B_lo:
1946                 to_xxx = tolower;
1947                 goto lo_cont;
1948
1949           case B_up:
1950                 to_xxx = toupper;
1951 lo_cont:
1952                 s1 = s = bb_xstrdup(as[0]);
1953                 while (*s1) {
1954                         *s1 = (*to_xxx)(*s1);
1955                         s1++;
1956                 }
1957                 setvar_p(res, s);
1958                 break;
1959
1960           case B_ix:
1961                 n = 0;
1962                 ll = bb_strlen(as[1]);
1963                 l = bb_strlen(as[0]) - ll;
1964                 if (ll > 0 && l >= 0) {
1965                         if (! icase) {
1966                                 s = strstr(as[0], as[1]);
1967                                 if (s) n = (s - as[0]) + 1;
1968                         } else {
1969                                 /* this piece of code is terribly slow and
1970                                  * really should be rewritten
1971                                  */
1972                                 for (i=0; i<=l; i++) {
1973                                         if (strncasecmp(as[0]+i, as[1], ll) == 0) {
1974                                                 n = i+1;
1975                                                 break;
1976                                         }
1977                                 }
1978                         }
1979                 }
1980                 setvar_i(res, n);
1981                 break;
1982
1983           case B_ti:
1984                 if (nargs > 1)
1985                         tt = getvar_i(av[1]);
1986                 else
1987                         time(&tt);
1988                 s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
1989                 i = strftime(buf, MAXVARFMT, s, localtime(&tt));
1990                 buf[i] = '\0';
1991                 setvar_s(res, buf);
1992                 break;
1993
1994           case B_ma:
1995                 re = as_regex(an[1], &sreg);
1996                 n = regexec(re, as[0], 1, pmatch, 0);
1997                 if (n == 0) {
1998                         pmatch[0].rm_so++;
1999                         pmatch[0].rm_eo++;
2000                 } else {
2001                         pmatch[0].rm_so = 0;
2002                         pmatch[0].rm_eo = -1;
2003                 }
2004                 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2005                 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2006                 setvar_i(res, pmatch[0].rm_so);
2007                 if (re == &sreg) regfree(re);
2008                 break;
2009
2010           case B_ge:
2011                 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2012                 break;
2013
2014           case B_gs:
2015                 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2016                 break;
2017
2018           case B_su:
2019                 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2020                 break;
2021         }
2022
2023         nvfree(tv);
2024         return res;
2025 }
2026
2027 /*
2028  * Evaluate node - the heart of the program. Supplied with subtree
2029  * and place where to store result. returns ptr to result.
2030  */
2031 #define XC(n) ((n) >> 8)
2032
2033 static var *evaluate(node *op, var *res)
2034 {
2035         /* This procedure is recursive so we should count every byte */
2036         static var *fnargs = NULL;
2037         static unsigned int seed = 1;
2038         static regex_t sreg;
2039         node *op1;
2040         var *v1;
2041         union {
2042                 var *v;
2043                 char *s;
2044                 double d;
2045                 int i;
2046         } L, R;
2047         uint32_t opinfo;
2048         short opn;
2049         union {
2050                 char *s;
2051                 rstream *rsm;
2052                 FILE *F;
2053                 var *v;
2054                 regex_t *re;
2055                 uint32_t info;
2056         } X;
2057
2058         if (! op)
2059                 return setvar_s(res, NULL);
2060
2061         v1 = nvalloc(2);
2062
2063         while (op) {
2064
2065                 opinfo = op->info;
2066                 opn = (short)(opinfo & OPNMASK);
2067                 lineno = op->lineno;
2068
2069                 /* execute inevitable things */
2070                 op1 = op->l.n;
2071                 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2072                 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2073                 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2074                 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2075                 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2076
2077                 switch (XC(opinfo & OPCLSMASK)) {
2078
2079                   /* -- iterative node type -- */
2080
2081                   /* test pattern */
2082                   case XC( OC_TEST ):
2083                         if ((op1->info & OPCLSMASK) == OC_COMMA) {
2084                                 /* it's range pattern */
2085                                 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2086                                         op->info |= OF_CHECKED;
2087                                         if (ptest(op1->r.n))
2088                                                 op->info &= ~OF_CHECKED;
2089
2090                                         op = op->a.n;
2091                                 } else {
2092                                         op = op->r.n;
2093                                 }
2094                         } else {
2095                                 op = (ptest(op1)) ? op->a.n : op->r.n;
2096                         }
2097                         break;
2098
2099                   /* just evaluate an expression, also used as unconditional jump */
2100                   case XC( OC_EXEC ):
2101                         break;
2102
2103                   /* branch, used in if-else and various loops */
2104                   case XC( OC_BR ):
2105                         op = istrue(L.v) ? op->a.n : op->r.n;
2106                         break;
2107
2108                   /* initialize for-in loop */
2109                   case XC( OC_WALKINIT ):
2110                         hashwalk_init(L.v, iamarray(R.v));
2111                         break;
2112
2113                   /* get next array item */
2114                   case XC( OC_WALKNEXT ):
2115                         op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2116                         break;
2117
2118                   case XC( OC_PRINT ):
2119                   case XC( OC_PRINTF ):
2120                         X.F = stdout;
2121                         if (op->r.n) {
2122                                 X.rsm = newfile(R.s);
2123                                 if (! X.rsm->F) {
2124                                         if (opn == '|') {
2125                                                 if((X.rsm->F = popen(R.s, "w")) == NULL)
2126                                                         bb_perror_msg_and_die("popen");
2127                                                 X.rsm->is_pipe = 1;
2128                                         } else {
2129                                                 X.rsm->F = bb_xfopen(R.s, opn=='w' ? "w" : "a");
2130                                         }
2131                                 }
2132                                 X.F = X.rsm->F;
2133                         }
2134
2135                         if ((opinfo & OPCLSMASK) == OC_PRINT) {
2136                                 if (! op1) {
2137                                         fputs(getvar_s(V[F0]), X.F);
2138                                 } else {
2139                                         while (op1) {
2140                                                 L.v = evaluate(nextarg(&op1), v1);
2141                                                 if (L.v->type & VF_NUMBER) {
2142                                                         fmt_num(buf, MAXVARFMT, getvar_s(V[OFMT]),
2143                                                                                                                 getvar_i(L.v), TRUE);
2144                                                         fputs(buf, X.F);
2145                                                 } else {
2146                                                         fputs(getvar_s(L.v), X.F);
2147                                                 }
2148
2149                                                 if (op1) fputs(getvar_s(V[OFS]), X.F);
2150                                         }
2151                                 }
2152                                 fputs(getvar_s(V[ORS]), X.F);
2153
2154                         } else {        /* OC_PRINTF */
2155                                 L.s = awk_printf(op1);
2156                                 fputs(L.s, X.F);
2157                                 free(L.s);
2158                         }
2159                         fflush(X.F);
2160                         break;
2161
2162                   case XC( OC_DELETE ):
2163                         X.info = op1->info & OPCLSMASK;
2164                         if (X.info == OC_VAR) {
2165                                 R.v = op1->l.v;
2166                         } else if (X.info == OC_FNARG) {
2167                                 R.v = &fnargs[op1->l.i];
2168                         } else {
2169                                 runtime_error(EMSG_NOT_ARRAY);
2170                         }
2171
2172                         if (op1->r.n) {
2173                                 clrvar(L.v);
2174                                 L.s = getvar_s(evaluate(op1->r.n, v1));
2175                                 hash_remove(iamarray(R.v), L.s);
2176                         } else {
2177                                 clear_array(iamarray(R.v));
2178                         }
2179                         break;
2180
2181                   case XC( OC_NEWSOURCE ):
2182                         programname = op->l.s;
2183                         break;
2184
2185                   case XC( OC_RETURN ):
2186                         copyvar(res, L.v);
2187                         break;
2188
2189                   case XC( OC_NEXTFILE ):
2190                         nextfile = TRUE;
2191                   case XC( OC_NEXT ):
2192                         nextrec = TRUE;
2193                   case XC( OC_DONE ):
2194                         clrvar(res);
2195                         break;
2196
2197                   case XC( OC_EXIT ):
2198                         awk_exit(L.d);
2199
2200                   /* -- recursive node type -- */
2201
2202                   case XC( OC_VAR ):
2203                         L.v = op->l.v;
2204                         if (L.v == V[NF])
2205                                 split_f0();
2206                         goto v_cont;
2207
2208                   case XC( OC_FNARG ):
2209                         L.v = &fnargs[op->l.i];
2210
2211 v_cont:
2212                         res = (op->r.n) ? findvar(iamarray(L.v), R.s) : L.v;
2213                         break;
2214
2215                   case XC( OC_IN ):
2216                         setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2217                         break;
2218
2219                   case XC( OC_REGEXP ):
2220                         op1 = op;
2221                         L.s = getvar_s(V[F0]);
2222                         goto re_cont;
2223
2224                   case XC( OC_MATCH ):
2225                         op1 = op->r.n;
2226 re_cont:
2227                         X.re = as_regex(op1, &sreg);
2228                         R.i = regexec(X.re, L.s, 0, NULL, 0);
2229                         if (X.re == &sreg) regfree(X.re);
2230                         setvar_i(res, (R.i == 0 ? 1 : 0) ^ (opn == '!' ? 1 : 0));
2231                         break;
2232
2233                   case XC( OC_MOVE ):
2234                         /* if source is a temporary string, jusk relink it to dest */
2235                         if (R.v == v1+1 && R.v->string) {
2236                                 res = setvar_p(L.v, R.v->string);
2237                                 R.v->string = NULL;
2238                         } else {
2239                                 res = copyvar(L.v, R.v);
2240                         }
2241                         break;
2242
2243                   case XC( OC_TERNARY ):
2244                         if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2245                                 runtime_error(EMSG_POSSIBLE_ERROR);
2246                         res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2247                         break;
2248
2249                   case XC( OC_FUNC ):
2250                         if (! op->r.f->body.first)
2251                                 runtime_error(EMSG_UNDEF_FUNC);
2252
2253                         X.v = R.v = nvalloc(op->r.f->nargs+1);
2254                         while (op1) {
2255                                 L.v = evaluate(nextarg(&op1), v1);
2256                                 copyvar(R.v, L.v);
2257                                 R.v->type |= VF_CHILD;
2258                                 R.v->x.parent = L.v;
2259                                 if (++R.v - X.v >= op->r.f->nargs)
2260                                         break;
2261                         }
2262
2263                         R.v = fnargs;
2264                         fnargs = X.v;
2265
2266                         L.s = programname;
2267                         res = evaluate(op->r.f->body.first, res);
2268                         programname = L.s;
2269
2270                         nvfree(fnargs);
2271                         fnargs = R.v;
2272                         break;
2273
2274                   case XC( OC_GETLINE ):
2275                   case XC( OC_PGETLINE ):
2276                         if (op1) {
2277                                 X.rsm = newfile(L.s);
2278                                 if (! X.rsm->F) {
2279                                         if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2280                                                 X.rsm->F = popen(L.s, "r");
2281                                                 X.rsm->is_pipe = TRUE;
2282                                         } else {
2283                                                 X.rsm->F = fopen(L.s, "r");             /* not bb_xfopen! */
2284                                         }
2285                                 }
2286                         } else {
2287                                 if (! iF) iF = next_input_file();
2288                                 X.rsm = iF;
2289                         }
2290
2291                         if (! X.rsm->F) {
2292                                 setvar_i(V[ERRNO], errno);
2293                                 setvar_i(res, -1);
2294                                 break;
2295                         }
2296
2297                         if (! op->r.n)
2298                                 R.v = V[F0];
2299
2300                         L.i = awk_getline(X.rsm, R.v);
2301                         if (L.i > 0) {
2302                                 if (! op1) {
2303                                         incvar(V[FNR]);
2304                                         incvar(V[NR]);
2305                                 }
2306                         }
2307                         setvar_i(res, L.i);
2308                         break;
2309
2310                   /* simple builtins */
2311                   case XC( OC_FBLTIN ):
2312                         switch (opn) {
2313
2314                           case F_in:
2315                                 R.d = (int)L.d;
2316                                 break;
2317
2318                           case F_rn:
2319                                 R.d =  (double)rand() / (double)RAND_MAX;
2320                                 break;
2321
2322 #ifdef CONFIG_FEATURE_AWK_MATH
2323                           case F_co:
2324                                 R.d = cos(L.d);
2325                                 break;
2326
2327                           case F_ex:
2328                                 R.d = exp(L.d);
2329                                 break;
2330
2331                           case F_lg:
2332                                 R.d = log(L.d);
2333                                 break;
2334
2335                           case F_si:
2336                                 R.d = sin(L.d);
2337                                 break;
2338
2339                           case F_sq:
2340                                 R.d = sqrt(L.d);
2341                                 break;
2342 #else
2343                           case F_co:
2344                           case F_ex:
2345                           case F_lg:
2346                           case F_si:
2347                           case F_sq:
2348                                 runtime_error(EMSG_NO_MATH);
2349                                 break;
2350 #endif
2351
2352                           case F_sr:
2353                                 R.d = (double)seed;
2354                                 seed = op1 ? (unsigned int)L.d : (unsigned int)time(NULL);
2355                                 srand(seed);
2356                                 break;
2357
2358                           case F_ti:
2359                                 R.d = time(NULL);
2360                                 break;
2361
2362                           case F_le:
2363                                 if (! op1)
2364                                         L.s = getvar_s(V[F0]);
2365                                 R.d = bb_strlen(L.s);
2366                                 break;
2367
2368                           case F_sy:
2369                                 fflush(NULL);
2370                                 R.d = (L.s && *L.s) ? system(L.s) : 0;
2371                                 break;
2372
2373                           case F_ff:
2374                                 if (! op1)
2375                                         fflush(stdout);
2376                                 else {
2377                                         if (L.s && *L.s) {
2378                                                 X.rsm = newfile(L.s);
2379                                                 fflush(X.rsm->F);
2380                                         } else {
2381                                                 fflush(NULL);
2382                                         }
2383                                 }
2384                                 break;
2385
2386                           case F_cl:
2387                                 X.rsm = (rstream *)hash_search(fdhash, L.s);
2388                                 if (X.rsm) {
2389                                         R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
2390                                         free(X.rsm->buffer);
2391                                         hash_remove(fdhash, L.s);
2392                                 }
2393                                 if (R.i != 0)
2394                                         setvar_i(V[ERRNO], errno);
2395                                 R.d = (double)R.i;
2396                                 break;
2397                         }
2398                         setvar_i(res, R.d);
2399                         break;
2400
2401                   case XC( OC_BUILTIN ):
2402                         res = exec_builtin(op, res);
2403                         break;
2404
2405                   case XC( OC_SPRINTF ):
2406                         setvar_p(res, awk_printf(op1));
2407                         break;
2408
2409                   case XC( OC_UNARY ):
2410                         X.v = R.v;
2411                         L.d = R.d = getvar_i(R.v);
2412                         switch (opn) {
2413                           case 'P':
2414                                 L.d = ++R.d;
2415                                 goto r_op_change;
2416                           case 'p':
2417                                 R.d++;
2418                                 goto r_op_change;
2419                           case 'M':
2420                                 L.d = --R.d;
2421                                 goto r_op_change;
2422                           case 'm':
2423                                 R.d--;
2424                                 goto r_op_change;
2425                           case '!':
2426                             L.d = istrue(X.v) ? 0 : 1;
2427                                 break;
2428                           case '-':
2429                                 L.d = -R.d;
2430                                 break;
2431                         r_op_change:
2432                                 setvar_i(X.v, R.d);
2433                         }
2434                         setvar_i(res, L.d);
2435                         break;
2436
2437                   case XC( OC_FIELD ):
2438                         R.i = (int)getvar_i(R.v);
2439                         if (R.i == 0) {
2440                                 res = V[F0];
2441                         } else {
2442                                 split_f0();
2443                                 if (R.i > nfields)
2444                                         fsrealloc(R.i);
2445
2446                                 res = &Fields[R.i-1];
2447                         }
2448                         break;
2449
2450                   /* concatenation (" ") and index joining (",") */
2451                   case XC( OC_CONCAT ):
2452                   case XC( OC_COMMA ):
2453                         opn = bb_strlen(L.s) + bb_strlen(R.s) + 2;
2454                         X.s = (char *)xmalloc(opn);
2455                         strcpy(X.s, L.s);
2456                         if ((opinfo & OPCLSMASK) == OC_COMMA) {
2457                                 L.s = getvar_s(V[SUBSEP]);
2458                                 X.s = (char *)xrealloc(X.s, opn + bb_strlen(L.s));
2459                                 strcat(X.s, L.s);
2460                         }
2461                         strcat(X.s, R.s);
2462                         setvar_p(res, X.s);
2463                         break;
2464
2465                   case XC( OC_LAND ):
2466                         setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2467                         break;
2468
2469                   case XC( OC_LOR ):
2470                         setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2471                         break;
2472
2473                   case XC( OC_BINARY ):
2474                   case XC( OC_REPLACE ):
2475                         R.d = getvar_i(R.v);
2476                         switch (opn) {
2477                           case '+':
2478                                 L.d += R.d;
2479                                 break;
2480                           case '-':
2481                                 L.d -= R.d;
2482                                 break;
2483                           case '*':
2484                                 L.d *= R.d;
2485                                 break;
2486                           case '/':
2487                                 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2488                                 L.d /= R.d;
2489                                 break;
2490                           case '&':
2491 #ifdef CONFIG_FEATURE_AWK_MATH
2492                                 L.d = pow(L.d, R.d);
2493 #else
2494                                 runtime_error(EMSG_NO_MATH);
2495 #endif
2496                                 break;
2497                           case '%':
2498                                 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2499                                 L.d -= (int)(L.d / R.d) * R.d;
2500                                 break;
2501                         }
2502                         res = setvar_i(((opinfo&OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2503                         break;
2504
2505                   case XC( OC_COMPARE ):
2506                         if (is_numeric(L.v) && is_numeric(R.v)) {
2507                                 L.d = getvar_i(L.v) - getvar_i(R.v);
2508                         } else {
2509                                 L.s = getvar_s(L.v);
2510                                 R.s = getvar_s(R.v);
2511                                 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2512                         }
2513                         switch (opn & 0xfe) {
2514                           case 0:
2515                                 R.i = (L.d > 0);
2516                                 break;
2517                           case 2:
2518                                 R.i = (L.d >= 0);
2519                                 break;
2520                           case 4:
2521                                 R.i = (L.d == 0);
2522                                 break;
2523                         }
2524                         setvar_i(res, (opn & 0x1 ? R.i : !R.i) ? 1 : 0);
2525                         break;
2526
2527                   default:
2528                         runtime_error(EMSG_POSSIBLE_ERROR);
2529                 }
2530                 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2531                         op = op->a.n;
2532                 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2533                         break;
2534                 if (nextrec)
2535                         break;
2536         }
2537         nvfree(v1);
2538         return res;
2539 }
2540
2541
2542 /* -------- main & co. -------- */
2543
2544 static int awk_exit(int r)
2545 {
2546         unsigned int i;
2547         hash_item *hi;
2548         static var tv;
2549
2550         if (! exiting) {
2551                 exiting = TRUE;
2552                 nextrec = FALSE;
2553                 evaluate(endseq.first, &tv);
2554         }
2555
2556         /* waiting for children */
2557         for (i=0; i<fdhash->csize; i++) {
2558                 hi = fdhash->items[i];
2559                 while(hi) {
2560                         if (hi->data.rs.F && hi->data.rs.is_pipe)
2561                                 pclose(hi->data.rs.F);
2562                         hi = hi->next;
2563                 }
2564         }
2565
2566         exit(r);
2567 }
2568
2569 /* if expr looks like "var=value", perform assignment and return 1,
2570  * otherwise return 0 */
2571 static int is_assignment(char *expr)
2572 {
2573         char *exprc, *s, *s0, *s1;
2574
2575         exprc = bb_xstrdup(expr);
2576         if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2577                 free(exprc);
2578                 return FALSE;
2579         }
2580
2581         *(s++) = '\0';
2582         s0 = s1 = s;
2583         while (*s)
2584                 *(s1++) = nextchar(&s);
2585
2586         *s1 = '\0';
2587         setvar_u(newvar(exprc), s0);
2588         free(exprc);
2589         return TRUE;
2590 }
2591
2592 /* switch to next input file */
2593 static rstream *next_input_file(void)
2594 {
2595         static rstream rsm;
2596         FILE *F = NULL;
2597         char *fname, *ind;
2598         static int files_happen = FALSE;
2599
2600         if (rsm.F) fclose(rsm.F);
2601         rsm.F = NULL;
2602         rsm.pos = rsm.adv = 0;
2603
2604         do {
2605                 if (getvar_i(V[ARGIND])+1 >= getvar_i(V[ARGC])) {
2606                         if (files_happen)
2607                                 return NULL;
2608                         fname = "-";
2609                         F = stdin;
2610                 } else {
2611                         ind = getvar_s(incvar(V[ARGIND]));
2612                         fname = getvar_s(findvar(iamarray(V[ARGV]), ind));
2613                         if (fname && *fname && !is_assignment(fname))
2614                                 F = afopen(fname, "r");
2615                 }
2616         } while (!F);
2617
2618         files_happen = TRUE;
2619         setvar_s(V[FILENAME], fname);
2620         rsm.F = F;
2621         return &rsm;
2622 }
2623
2624 extern int awk_main(int argc, char **argv)
2625 {
2626         char *s, *s1;
2627         int i, j, c;
2628         var *v;
2629         static var tv;
2630         char **envp;
2631         static int from_file = FALSE;
2632         rstream *rsm;
2633         FILE *F, *stdfiles[3];
2634         static char * stdnames = "/dev/stdin\0/dev/stdout\0/dev/stderr";
2635
2636         /* allocate global buffer */
2637         buf = xmalloc(MAXVARFMT+1);
2638
2639         vhash = hash_init();
2640         ahash = hash_init();
2641         fdhash = hash_init();
2642         fnhash = hash_init();
2643
2644         /* initialize variables */
2645         for (i=0;  *vNames;  i++) {
2646                 V[i] = v = newvar(nextword(&vNames));
2647                 if (*vValues != '\377')
2648                         setvar_s(v, nextword(&vValues));
2649                 else
2650                         setvar_i(v, 0);
2651
2652                 if (*vNames == '*') {
2653                         v->type |= VF_SPECIAL;
2654                         vNames++;
2655                 }
2656         }
2657
2658         handle_special(V[FS]);
2659         handle_special(V[RS]);
2660
2661         stdfiles[0] = stdin;
2662         stdfiles[1] = stdout;
2663         stdfiles[2] = stderr;
2664         for (i=0; i<3; i++) {
2665                 rsm = newfile(nextword(&stdnames));
2666                 rsm->F = stdfiles[i];
2667         }
2668
2669         for (envp=environ; *envp; envp++) {
2670                 s = bb_xstrdup(*envp);
2671                 s1 = strchr(s, '=');
2672                 if (!s1) {
2673                         goto keep_going;
2674                 }
2675                 *(s1++) = '\0';
2676                 setvar_u(findvar(iamarray(V[ENVIRON]), s), s1);
2677 keep_going:
2678                 free(s);
2679         }
2680
2681         while((c = getopt(argc, argv, "F:v:f:W:")) != EOF) {
2682                 switch (c) {
2683                         case 'F':
2684                                 setvar_s(V[FS], optarg);
2685                                 break;
2686                         case 'v':
2687                                 if (! is_assignment(optarg))
2688                                         bb_show_usage();
2689                                 break;
2690                         case 'f':
2691                                 from_file = TRUE;
2692                                 F = afopen(programname = optarg, "r");
2693                                 s = NULL;
2694                                 /* one byte is reserved for some trick in next_token */
2695                                 for (i=j=1; j>0; i+=j) {
2696                                         s = (char *)xrealloc(s, i+4096);
2697                                         j = fread(s+i, 1, 4094, F);
2698                                 }
2699                                 s[i] = '\0';
2700                                 fclose(F);
2701                                 parse_program(s+1);
2702                                 free(s);
2703                                 break;
2704                         case 'W':
2705                                 bb_error_msg("Warning: unrecognized option '-W %s' ignored\n", optarg);
2706                                 break;
2707
2708                         default:
2709                                 bb_show_usage();
2710                 }
2711         }
2712
2713         if (!from_file) {
2714                 if (argc == optind)
2715                         bb_show_usage();
2716                 programname="cmd. line";
2717                 parse_program(argv[optind++]);
2718
2719         }
2720
2721         /* fill in ARGV array */
2722         setvar_i(V[ARGC], argc - optind + 1);
2723         setari_u(V[ARGV], 0, "awk");
2724         for(i=optind; i < argc; i++)
2725                 setari_u(V[ARGV], i+1-optind, argv[i]);
2726
2727         evaluate(beginseq.first, &tv);
2728         if (! mainseq.first && ! endseq.first)
2729                 awk_exit(EXIT_SUCCESS);
2730
2731         /* input file could already be opened in BEGIN block */
2732         if (! iF) iF = next_input_file();
2733
2734         /* passing through input files */
2735         while (iF) {
2736
2737                 nextfile = FALSE;
2738                 setvar_i(V[FNR], 0);
2739
2740                 while ((c = awk_getline(iF, V[F0])) > 0) {
2741
2742                         nextrec = FALSE;
2743                         incvar(V[NR]);
2744                         incvar(V[FNR]);
2745                         evaluate(mainseq.first, &tv);
2746
2747                         if (nextfile)
2748                                 break;
2749                 }
2750
2751                 if (c < 0)
2752                         runtime_error(strerror(errno));
2753
2754                 iF = next_input_file();
2755
2756         }
2757
2758         awk_exit(EXIT_SUCCESS);
2759
2760         return 0;
2761 }
2762