d00fcafb171003e51ff90cf1e596992d3a31707a
[oweals/busybox.git] / editors / awk.c
1 /* vi: set sw=4 ts=4: */
2 /*
3  * awk implementation for busybox
4  *
5  * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20  *
21  */
22
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <unistd.h>
26 #include <errno.h>
27 #include <string.h>
28 #include <time.h>
29 #include <math.h>
30 #include <ctype.h>
31 #include <getopt.h>
32
33 #include "xregex.h"
34 #include "busybox.h"
35
36
37 #define MAXVARFMT       240
38 #define MINNVBLOCK      64
39
40 /* variable flags */
41 #define VF_NUMBER       0x0001  /* 1 = primary type is number */
42 #define VF_ARRAY        0x0002  /* 1 = it's an array */
43
44 #define VF_CACHED       0x0100  /* 1 = num/str value has cached str/num eq */
45 #define VF_USER         0x0200  /* 1 = user input (may be numeric string) */
46 #define VF_SPECIAL      0x0400  /* 1 = requires extra handling when changed */
47 #define VF_WALK         0x0800  /* 1 = variable has alloc'd x.walker list */
48 #define VF_FSTR         0x1000  /* 1 = string points to fstring buffer */
49 #define VF_CHILD        0x2000  /* 1 = function arg; x.parent points to source */
50 #define VF_DIRTY        0x4000  /* 1 = variable was set explicitly */
51
52 /* these flags are static, don't change them when value is changed */
53 #define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
54
55 /* Variable */
56 typedef struct var_s {
57         unsigned short type;            /* flags */
58         double number;
59         char *string;
60         union {
61                 int aidx;                               /* func arg index (on compilation stage) */
62                 struct xhash_s *array;  /* array ptr */
63                 struct var_s *parent;   /* for func args, ptr to actual parameter */
64                 char **walker;                  /* list of array elements (for..in) */
65         } x;
66 } var;
67
68 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
69 typedef struct chain_s {
70         struct node_s *first;
71         struct node_s *last;
72         char *programname;
73 } chain;
74
75 /* Function */
76 typedef struct func_s {
77         unsigned short nargs;
78         struct chain_s body;
79 } func;
80
81 /* I/O stream */
82 typedef struct rstream_s {
83         FILE *F;
84         char *buffer;
85         int adv;
86         int size;
87         int pos;
88         unsigned short is_pipe;
89 } rstream;
90
91 typedef struct hash_item_s {
92         union {
93                 struct var_s v;                 /* variable/array hash */
94                 struct rstream_s rs;    /* redirect streams hash */
95                 struct func_s f;                /* functions hash */
96         } data;
97         struct hash_item_s *next;       /* next in chain */
98         char name[1];                           /* really it's longer */
99 } hash_item;
100
101 typedef struct xhash_s {
102         unsigned int nel;                                       /* num of elements */
103         unsigned int csize;                                     /* current hash size */
104         unsigned int nprime;                            /* next hash size in PRIMES[] */
105         unsigned int glen;                                      /* summary length of item names */
106         struct hash_item_s **items;
107 } xhash;
108
109 /* Tree node */
110 typedef struct node_s {
111         uint32_t info;
112         unsigned short lineno;
113         union {
114                 struct node_s *n;
115                 var *v;
116                 int i;
117                 char *s;
118                 regex_t *re;
119         } l;
120         union {
121                 struct node_s *n;
122                 regex_t *ire;
123                 func *f;
124                 int argno;
125         } r;
126         union {
127                 struct node_s *n;
128         } a;
129 } node;
130
131 /* Block of temporary variables */
132 typedef struct nvblock_s {
133         int size;
134         var *pos;
135         struct nvblock_s *prev;
136         struct nvblock_s *next;
137         var nv[0];
138 } nvblock;
139
140 typedef struct tsplitter_s {
141         node n;
142         regex_t re[2];
143 } tsplitter;
144
145 /* simple token classes */
146 /* Order and hex values are very important!!!  See next_token() */
147 #define TC_SEQSTART      1                              /* ( */
148 #define TC_SEQTERM      (1 << 1)                /* ) */
149 #define TC_REGEXP       (1 << 2)                /* /.../ */
150 #define TC_OUTRDR       (1 << 3)                /* | > >> */
151 #define TC_UOPPOST      (1 << 4)                /* unary postfix operator */
152 #define TC_UOPPRE1      (1 << 5)                /* unary prefix operator */
153 #define TC_BINOPX       (1 << 6)                /* two-opnd operator */
154 #define TC_IN           (1 << 7)
155 #define TC_COMMA        (1 << 8)
156 #define TC_PIPE         (1 << 9)                /* input redirection pipe */
157 #define TC_UOPPRE2      (1 << 10)               /* unary prefix operator */
158 #define TC_ARRTERM      (1 << 11)               /* ] */
159 #define TC_GRPSTART     (1 << 12)               /* { */
160 #define TC_GRPTERM      (1 << 13)               /* } */
161 #define TC_SEMICOL      (1 << 14)
162 #define TC_NEWLINE      (1 << 15)
163 #define TC_STATX        (1 << 16)               /* ctl statement (for, next...) */
164 #define TC_WHILE        (1 << 17)
165 #define TC_ELSE         (1 << 18)
166 #define TC_BUILTIN      (1 << 19)
167 #define TC_GETLINE      (1 << 20)
168 #define TC_FUNCDECL     (1 << 21)               /* `function' `func' */
169 #define TC_BEGIN        (1 << 22)
170 #define TC_END          (1 << 23)
171 #define TC_EOF          (1 << 24)
172 #define TC_VARIABLE     (1 << 25)
173 #define TC_ARRAY        (1 << 26)
174 #define TC_FUNCTION     (1 << 27)
175 #define TC_STRING       (1 << 28)
176 #define TC_NUMBER       (1 << 29)
177
178 #define TC_UOPPRE       (TC_UOPPRE1 | TC_UOPPRE2)
179
180 /* combined token classes */
181 #define TC_BINOP        (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
182 #define TC_UNARYOP      (TC_UOPPRE | TC_UOPPOST)
183 #define TC_OPERAND      (TC_VARIABLE | TC_ARRAY | TC_FUNCTION | \
184         TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
185
186 #define TC_STATEMNT     (TC_STATX | TC_WHILE)
187 #define TC_OPTERM       (TC_SEMICOL | TC_NEWLINE)
188
189 /* word tokens, cannot mean something else if not expected */
190 #define TC_WORD         (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN | \
191         TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
192
193 /* discard newlines after these */
194 #define TC_NOTERM       (TC_COMMA | TC_GRPSTART | TC_GRPTERM | \
195         TC_BINOP | TC_OPTERM)
196
197 /* what can expression begin with */
198 #define TC_OPSEQ        (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
199 /* what can group begin with */
200 #define TC_GRPSEQ       (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
201
202 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
203 /* operator is inserted between them */
204 #define TC_CONCAT1      (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM | \
205         TC_STRING | TC_NUMBER | TC_UOPPOST)
206 #define TC_CONCAT2      (TC_OPERAND | TC_UOPPRE)
207
208 #define OF_RES1         0x010000
209 #define OF_RES2         0x020000
210 #define OF_STR1         0x040000
211 #define OF_STR2         0x080000
212 #define OF_NUM1         0x100000
213 #define OF_CHECKED      0x200000
214
215 /* combined operator flags */
216 #define xx      0
217 #define xV      OF_RES2
218 #define xS      (OF_RES2 | OF_STR2)
219 #define Vx      OF_RES1
220 #define VV      (OF_RES1 | OF_RES2)
221 #define Nx      (OF_RES1 | OF_NUM1)
222 #define NV      (OF_RES1 | OF_NUM1 | OF_RES2)
223 #define Sx      (OF_RES1 | OF_STR1)
224 #define SV      (OF_RES1 | OF_STR1 | OF_RES2)
225 #define SS      (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
226
227 #define OPCLSMASK       0xFF00
228 #define OPNMASK         0x007F
229
230 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
231  * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
232  * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
233  */
234 #define P(x)    (x << 24)
235 #define PRIMASK         0x7F000000
236 #define PRIMASK2        0x7E000000
237
238 /* Operation classes */
239
240 #define SHIFT_TIL_THIS  0x0600
241 #define RECUR_FROM_THIS 0x1000
242
243 enum {
244         OC_DELETE=0x0100,       OC_EXEC=0x0200,         OC_NEWSOURCE=0x0300,
245         OC_PRINT=0x0400,        OC_PRINTF=0x0500,       OC_WALKINIT=0x0600,
246
247         OC_BR=0x0700,           OC_BREAK=0x0800,        OC_CONTINUE=0x0900,
248         OC_EXIT=0x0a00,         OC_NEXT=0x0b00,         OC_NEXTFILE=0x0c00,
249         OC_TEST=0x0d00,         OC_WALKNEXT=0x0e00,
250
251         OC_BINARY=0x1000,       OC_BUILTIN=0x1100,      OC_COLON=0x1200,
252         OC_COMMA=0x1300,        OC_COMPARE=0x1400,      OC_CONCAT=0x1500,
253         OC_FBLTIN=0x1600,       OC_FIELD=0x1700,        OC_FNARG=0x1800,
254         OC_FUNC=0x1900,         OC_GETLINE=0x1a00,      OC_IN=0x1b00,
255         OC_LAND=0x1c00,         OC_LOR=0x1d00,          OC_MATCH=0x1e00,
256         OC_MOVE=0x1f00,         OC_PGETLINE=0x2000,     OC_REGEXP=0x2100,
257         OC_REPLACE=0x2200,      OC_RETURN=0x2300,       OC_SPRINTF=0x2400,
258         OC_TERNARY=0x2500,      OC_UNARY=0x2600,        OC_VAR=0x2700,
259         OC_DONE=0x2800,
260
261         ST_IF=0x3000,           ST_DO=0x3100,           ST_FOR=0x3200,
262         ST_WHILE=0x3300
263 };
264
265 /* simple builtins */
266 enum {
267         F_in=0, F_rn,   F_co,   F_ex,   F_lg,   F_si,   F_sq,   F_sr,
268         F_ti,   F_le,   F_sy,   F_ff,   F_cl
269 };
270
271 /* builtins */
272 enum {
273         B_a2=0, B_ix,   B_ma,   B_sp,   B_ss,   B_ti,   B_lo,   B_up,
274         B_ge,   B_gs,   B_su
275 };
276
277 /* tokens and their corresponding info values */
278
279 #define NTC             "\377"          /* switch to next token class (tc<<1) */
280 #define NTCC    '\377'
281
282 #define OC_B    OC_BUILTIN
283
284 static char * const tokenlist =
285         "\1("           NTC
286         "\1)"           NTC
287         "\1/"           NTC                                                                     /* REGEXP */
288         "\2>>"          "\1>"           "\1|"           NTC                     /* OUTRDR */
289         "\2++"          "\2--"          NTC                                             /* UOPPOST */
290         "\2++"          "\2--"          "\1$"           NTC                     /* UOPPRE1 */
291         "\2=="          "\1="           "\2+="          "\2-="          /* BINOPX */
292         "\2*="          "\2/="          "\2%="          "\2^="
293         "\1+"           "\1-"           "\3**="         "\2**"
294         "\1/"           "\1%"           "\1^"           "\1*"
295         "\2!="          "\2>="          "\2<="          "\1>"
296         "\1<"           "\2!~"          "\1~"           "\2&&"
297         "\2||"          "\1?"           "\1:"           NTC
298         "\2in"          NTC
299         "\1,"           NTC
300         "\1|"           NTC
301         "\1+"           "\1-"           "\1!"           NTC                     /* UOPPRE2 */
302         "\1]"           NTC
303         "\1{"           NTC
304         "\1}"           NTC
305         "\1;"           NTC
306         "\1\n"          NTC
307         "\2if"          "\2do"          "\3for"         "\5break"       /* STATX */
308         "\10continue"                   "\6delete"      "\5print"
309         "\6printf"      "\4next"        "\10nextfile"
310         "\6return"      "\4exit"        NTC
311         "\5while"       NTC
312         "\4else"        NTC
313
314         "\5close"       "\6system"      "\6fflush"      "\5atan2"       /* BUILTIN */
315         "\3cos"         "\3exp"         "\3int"         "\3log"
316         "\4rand"        "\3sin"         "\4sqrt"        "\5srand"
317         "\6gensub"      "\4gsub"        "\5index"       "\6length"
318         "\5match"       "\5split"       "\7sprintf"     "\3sub"
319         "\6substr"      "\7systime"     "\10strftime"
320         "\7tolower"     "\7toupper"     NTC
321         "\7getline"     NTC
322         "\4func"        "\10function"   NTC
323         "\5BEGIN"       NTC
324         "\3END"         "\0"
325         ;
326
327 static uint32_t tokeninfo[] = {
328
329         0,
330         0,
331         OC_REGEXP,
332         xS|'a',         xS|'w',         xS|'|',
333         OC_UNARY|xV|P(9)|'p',           OC_UNARY|xV|P(9)|'m',
334         OC_UNARY|xV|P(9)|'P',           OC_UNARY|xV|P(9)|'M',
335                 OC_FIELD|xV|P(5),
336         OC_COMPARE|VV|P(39)|5,          OC_MOVE|VV|P(74),
337                 OC_REPLACE|NV|P(74)|'+',        OC_REPLACE|NV|P(74)|'-',
338         OC_REPLACE|NV|P(74)|'*',        OC_REPLACE|NV|P(74)|'/',
339                 OC_REPLACE|NV|P(74)|'%',        OC_REPLACE|NV|P(74)|'&',
340         OC_BINARY|NV|P(29)|'+',         OC_BINARY|NV|P(29)|'-',
341                 OC_REPLACE|NV|P(74)|'&',        OC_BINARY|NV|P(15)|'&',
342         OC_BINARY|NV|P(25)|'/',         OC_BINARY|NV|P(25)|'%',
343                 OC_BINARY|NV|P(15)|'&',         OC_BINARY|NV|P(25)|'*',
344         OC_COMPARE|VV|P(39)|4,          OC_COMPARE|VV|P(39)|3,
345                 OC_COMPARE|VV|P(39)|0,          OC_COMPARE|VV|P(39)|1,
346         OC_COMPARE|VV|P(39)|2,          OC_MATCH|Sx|P(45)|'!',
347                 OC_MATCH|Sx|P(45)|'~',          OC_LAND|Vx|P(55),
348         OC_LOR|Vx|P(59),                        OC_TERNARY|Vx|P(64)|'?',
349                 OC_COLON|xx|P(67)|':',
350         OC_IN|SV|P(49),
351         OC_COMMA|SS|P(80),
352         OC_PGETLINE|SV|P(37),
353         OC_UNARY|xV|P(19)|'+',          OC_UNARY|xV|P(19)|'-',
354                 OC_UNARY|xV|P(19)|'!',
355         0,
356         0,
357         0,
358         0,
359         0,
360         ST_IF,                  ST_DO,                  ST_FOR,                 OC_BREAK,
361         OC_CONTINUE,                                    OC_DELETE|Vx,   OC_PRINT,
362         OC_PRINTF,              OC_NEXT,                OC_NEXTFILE,
363         OC_RETURN|Vx,   OC_EXIT|Nx,
364         ST_WHILE,
365         0,
366
367         OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
368         OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
369         OC_FBLTIN|F_rn,    OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
370         OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
371         OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF,        OC_B|B_su|P(0xb6),
372         OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti,    OC_B|B_ti|P(0x0b),
373         OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
374         OC_GETLINE|SV|P(0),
375         0,      0,
376         0,
377         0
378 };
379
380 /* internal variable names and their initial values       */
381 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
382 enum {
383         CONVFMT=0,      OFMT,           FS,                     OFS,
384         ORS,            RS,                     RT,                     FILENAME,
385         SUBSEP,         ARGIND,         ARGC,           ARGV,
386         ERRNO,          FNR,
387         NR,                     NF,                     IGNORECASE,
388         ENVIRON,        F0,                     _intvarcount_
389 };
390
391 static char * vNames =
392         "CONVFMT\0"     "OFMT\0"        "FS\0*"         "OFS\0"
393         "ORS\0"         "RS\0*"         "RT\0"          "FILENAME\0"
394         "SUBSEP\0"      "ARGIND\0"      "ARGC\0"        "ARGV\0"
395         "ERRNO\0"       "FNR\0"
396         "NR\0"          "NF\0*"         "IGNORECASE\0*"
397         "ENVIRON\0"     "$\0*"          "\0";
398
399 static char * vValues =
400         "%.6g\0"        "%.6g\0"        " \0"           " \0"
401         "\n\0"          "\n\0"          "\0"            "\0"
402         "\034\0"
403         "\377";
404
405 /* hash size may grow to these values */
406 #define FIRST_PRIME 61;
407 static const unsigned int PRIMES[] = { 251, 1021, 4093, 16381, 65521 };
408 static const unsigned int NPRIMES = sizeof(PRIMES) / sizeof(unsigned int);
409
410 /* globals */
411
412 extern char **environ;
413
414 static var * V[_intvarcount_];
415 static chain beginseq, mainseq, endseq, *seq;
416 static int nextrec, nextfile;
417 static node *break_ptr, *continue_ptr;
418 static rstream *iF;
419 static xhash *vhash, *ahash, *fdhash, *fnhash;
420 static char *programname;
421 static short lineno;
422 static int is_f0_split;
423 static int nfields = 0;
424 static var *Fields = NULL;
425 static tsplitter fsplitter, rsplitter;
426 static nvblock *cb = NULL;
427 static char *pos;
428 static char *buf;
429 static int icase = FALSE;
430 static int exiting = FALSE;
431
432 static struct {
433         uint32_t tclass;
434         uint32_t info;
435         char *string;
436         double number;
437         short lineno;
438         int rollback;
439 } t;
440
441 /* function prototypes */
442 static void handle_special(var *);
443 static node *parse_expr(uint32_t);
444 static void chain_group(void);
445 static var *evaluate(node *, var *);
446 static rstream *next_input_file(void);
447 static int fmt_num(char *, int, char *, double, int);
448 static int awk_exit(int);
449
450 /* ---- error handling ---- */
451
452 static const char EMSG_INTERNAL_ERROR[] = "Internal error";
453 static const char EMSG_UNEXP_EOS[] = "Unexpected end of string";
454 static const char EMSG_UNEXP_TOKEN[] = "Unexpected token";
455 static const char EMSG_DIV_BY_ZERO[] = "Division by zero";
456 static const char EMSG_INV_FMT[] = "Invalid format specifier";
457 static const char EMSG_TOO_FEW_ARGS[] = "Too few arguments for builtin";
458 static const char EMSG_NOT_ARRAY[] = "Not an array";
459 static const char EMSG_POSSIBLE_ERROR[] = "Possible syntax error";
460 static const char EMSG_UNDEF_FUNC[] = "Call to undefined function";
461 #ifndef CONFIG_FEATURE_AWK_MATH
462 static const char EMSG_NO_MATH[] = "Math support is not compiled in";
463 #endif
464
465 static void syntax_error(const char * const message)
466 {
467         bb_error_msg("%s:%i: %s", programname, lineno, message);
468         exit(1);
469 }
470
471 #define runtime_error(x) syntax_error(x)
472
473
474 /* ---- hash stuff ---- */
475
476 static unsigned int hashidx(char *name)
477 {
478         register unsigned int idx=0;
479
480         while (*name)  idx = *name++ + (idx << 6) - idx;
481         return idx;
482 }
483
484 /* create new hash */
485 static xhash *hash_init(void)
486 {
487         xhash *newhash;
488
489         newhash = (xhash *)xcalloc(1, sizeof(xhash));
490         newhash->csize = FIRST_PRIME;
491         newhash->items = (hash_item **)xcalloc(newhash->csize, sizeof(hash_item *));
492
493         return newhash;
494 }
495
496 /* find item in hash, return ptr to data, NULL if not found */
497 static void *hash_search(xhash *hash, char *name)
498 {
499         hash_item *hi;
500
501         hi = hash->items [ hashidx(name) % hash->csize ];
502         while (hi) {
503                 if (strcmp(hi->name, name) == 0)
504                         return &(hi->data);
505                 hi = hi->next;
506         }
507         return NULL;
508 }
509
510 /* grow hash if it becomes too big */
511 static void hash_rebuild(xhash *hash)
512 {
513         unsigned int newsize, i, idx;
514         hash_item **newitems, *hi, *thi;
515
516         if (hash->nprime == NPRIMES)
517                 return;
518
519         newsize = PRIMES[hash->nprime++];
520         newitems = (hash_item **)xcalloc(newsize, sizeof(hash_item *));
521
522         for (i=0; i<hash->csize; i++) {
523                 hi = hash->items[i];
524                 while (hi) {
525                         thi = hi;
526                         hi = thi->next;
527                         idx = hashidx(thi->name) % newsize;
528                         thi->next = newitems[idx];
529                         newitems[idx] = thi;
530                 }
531         }
532
533         free(hash->items);
534         hash->csize = newsize;
535         hash->items = newitems;
536 }
537
538 /* find item in hash, add it if necessary. Return ptr to data */
539 static void *hash_find(xhash *hash, char *name)
540 {
541         hash_item *hi;
542         unsigned int idx;
543         int l;
544
545         hi = hash_search(hash, name);
546         if (! hi) {
547                 if (++hash->nel / hash->csize > 10)
548                         hash_rebuild(hash);
549
550                 l = bb_strlen(name) + 1;
551                 hi = xcalloc(sizeof(hash_item) + l, 1);
552                 memcpy(hi->name, name, l);
553
554                 idx = hashidx(name) % hash->csize;
555                 hi->next = hash->items[idx];
556                 hash->items[idx] = hi;
557                 hash->glen += l;
558         }
559         return &(hi->data);
560 }
561
562 #define findvar(hash, name) (var *) hash_find ( (hash) , (name) )
563 #define newvar(name) (var *) hash_find ( vhash , (name) )
564 #define newfile(name) (rstream *) hash_find ( fdhash , (name) )
565 #define newfunc(name) (func *) hash_find ( fnhash , (name) )
566
567 static void hash_remove(xhash *hash, char *name)
568 {
569         hash_item *hi, **phi;
570
571         phi = &(hash->items[ hashidx(name) % hash->csize ]);
572         while (*phi) {
573                 hi = *phi;
574                 if (strcmp(hi->name, name) == 0) {
575                         hash->glen -= (bb_strlen(name) + 1);
576                         hash->nel--;
577                         *phi = hi->next;
578                         free(hi);
579                         break;
580                 }
581                 phi = &(hi->next);
582         }
583 }
584
585 /* ------ some useful functions ------ */
586
587 static void skip_spaces(char **s)
588 {
589         register char *p = *s;
590
591         while(*p == ' ' || *p == '\t' ||
592                                         (*p == '\\' && *(p+1) == '\n' && (++p, ++t.lineno))) {
593                 p++;
594         }
595         *s = p;
596 }
597
598 static char *nextword(char **s)
599 {
600         register char *p = *s;
601
602         while (*(*s)++) ;
603
604         return p;
605 }
606
607 static char nextchar(char **s)
608 {
609         register char c, *pps;
610
611         c = *((*s)++);
612         pps = *s;
613         if (c == '\\') c = bb_process_escape_sequence((const char**)s);
614         if (c == '\\' && *s == pps) c = *((*s)++);
615         return c;
616 }
617
618 static inline int isalnum_(int c)
619 {
620         return (isalnum(c) || c == '_');
621 }
622
623 static FILE *afopen(const char *path, const char *mode)
624 {
625         return (*path == '-' && *(path+1) == '\0') ? stdin : bb_xfopen(path, mode);
626 }
627
628 /* -------- working with variables (set/get/copy/etc) -------- */
629
630 static xhash *iamarray(var *v)
631 {
632         var *a = v;
633
634         while (a->type & VF_CHILD)
635                 a = a->x.parent;
636
637         if (! (a->type & VF_ARRAY)) {
638                 a->type |= VF_ARRAY;
639                 a->x.array = hash_init();
640         }
641         return a->x.array;
642 }
643
644 static void clear_array(xhash *array)
645 {
646         unsigned int i;
647         hash_item *hi, *thi;
648
649         for (i=0; i<array->csize; i++) {
650                 hi = array->items[i];
651                 while (hi) {
652                         thi = hi;
653                         hi = hi->next;
654                         free(thi->data.v.string);
655                         free(thi);
656                 }
657                 array->items[i] = NULL;
658         }
659         array->glen = array->nel = 0;
660 }
661
662 /* clear a variable */
663 static var *clrvar(var *v)
664 {
665         if (!(v->type & VF_FSTR))
666                 free(v->string);
667
668         v->type &= VF_DONTTOUCH;
669         v->type |= VF_DIRTY;
670         v->string = NULL;
671         return v;
672 }
673
674 /* assign string value to variable */
675 static var *setvar_p(var *v, char *value)
676 {
677         clrvar(v);
678         v->string = value;
679         handle_special(v);
680
681         return v;
682 }
683
684 /* same as setvar_p but make a copy of string */
685 static var *setvar_s(var *v, char *value)
686 {
687         return setvar_p(v, (value && *value) ? bb_xstrdup(value) : NULL);
688 }
689
690 /* same as setvar_s but set USER flag */
691 static var *setvar_u(var *v, char *value)
692 {
693         setvar_s(v, value);
694         v->type |= VF_USER;
695         return v;
696 }
697
698 /* set array element to user string */
699 static void setari_u(var *a, int idx, char *s)
700 {
701         register var *v;
702         static char sidx[12];
703
704         sprintf(sidx, "%d", idx);
705         v = findvar(iamarray(a), sidx);
706         setvar_u(v, s);
707 }
708
709 /* assign numeric value to variable */
710 static var *setvar_i(var *v, double value)
711 {
712         clrvar(v);
713         v->type |= VF_NUMBER;
714         v->number = value;
715         handle_special(v);
716         return v;
717 }
718
719 static char *getvar_s(var *v)
720 {
721         /* if v is numeric and has no cached string, convert it to string */
722         if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
723                 fmt_num(buf, MAXVARFMT, getvar_s(V[CONVFMT]), v->number, TRUE);
724                 v->string = bb_xstrdup(buf);
725                 v->type |= VF_CACHED;
726         }
727         return (v->string == NULL) ? "" : v->string;
728 }
729
730 static double getvar_i(var *v)
731 {
732         char *s;
733
734         if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
735                 v->number = 0;
736                 s = v->string;
737                 if (s && *s) {
738                         v->number = strtod(s, &s);
739                         if (v->type & VF_USER) {
740                                 skip_spaces(&s);
741                                 if (*s != '\0')
742                                         v->type &= ~VF_USER;
743                         }
744                 } else {
745                         v->type &= ~VF_USER;
746                 }
747                 v->type |= VF_CACHED;
748         }
749         return v->number;
750 }
751
752 static var *copyvar(var *dest, var *src)
753 {
754         if (dest != src) {
755                 clrvar(dest);
756                 dest->type |= (src->type & ~VF_DONTTOUCH);
757                 dest->number = src->number;
758                 if (src->string)
759                         dest->string = bb_xstrdup(src->string);
760         }
761         handle_special(dest);
762         return dest;
763 }
764
765 static var *incvar(var *v)
766 {
767         return setvar_i(v, getvar_i(v)+1.);
768 }
769
770 /* return true if v is number or numeric string */
771 static int is_numeric(var *v)
772 {
773         getvar_i(v);
774         return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
775 }
776
777 /* return 1 when value of v corresponds to true, 0 otherwise */
778 static int istrue(var *v)
779 {
780         if (is_numeric(v))
781                 return (v->number == 0) ? 0 : 1;
782         else
783                 return (v->string && *(v->string)) ? 1 : 0;
784 }
785
786 /* temporary variables allocator. Last allocated should be first freed */
787 static var *nvalloc(int n)
788 {
789         nvblock *pb = NULL;
790         var *v, *r;
791         int size;
792
793         while (cb) {
794                 pb = cb;
795                 if ((cb->pos - cb->nv) + n <= cb->size) break;
796                 cb = cb->next;
797         }
798
799         if (! cb) {
800                 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
801                 cb = (nvblock *)xmalloc(sizeof(nvblock) + size * sizeof(var));
802                 cb->size = size;
803                 cb->pos = cb->nv;
804                 cb->prev = pb;
805                 cb->next = NULL;
806                 if (pb) pb->next = cb;
807         }
808
809         v = r = cb->pos;
810         cb->pos += n;
811
812         while (v < cb->pos) {
813                 v->type = 0;
814                 v->string = NULL;
815                 v++;
816         }
817
818         return r;
819 }
820
821 static void nvfree(var *v)
822 {
823         var *p;
824
825         if (v < cb->nv || v >= cb->pos)
826                 runtime_error(EMSG_INTERNAL_ERROR);
827
828         for (p=v; p<cb->pos; p++) {
829                 if ((p->type & (VF_ARRAY|VF_CHILD)) == VF_ARRAY) {
830                         clear_array(iamarray(p));
831                         free(p->x.array->items);
832                         free(p->x.array);
833                 }
834                 if (p->type & VF_WALK)
835                         free(p->x.walker);
836
837                 clrvar(p);
838         }
839
840         cb->pos = v;
841         while (cb->prev && cb->pos == cb->nv) {
842                 cb = cb->prev;
843         }
844 }
845
846 /* ------- awk program text parsing ------- */
847
848 /* Parse next token pointed by global pos, place results into global t.
849  * If token isn't expected, give away. Return token class
850  */
851 static uint32_t next_token(uint32_t expected)
852 {
853         char *p, *pp, *s;
854         char *tl;
855         uint32_t tc, *ti;
856         int l;
857         static int concat_inserted = FALSE;
858         static uint32_t save_tclass, save_info;
859         static uint32_t ltclass = TC_OPTERM;
860
861         if (t.rollback) {
862
863                 t.rollback = FALSE;
864
865         } else if (concat_inserted) {
866
867                 concat_inserted = FALSE;
868                 t.tclass = save_tclass;
869                 t.info = save_info;
870
871         } else {
872
873                 p = pos;
874
875         readnext:
876                 skip_spaces(&p);
877                 lineno = t.lineno;
878                 if (*p == '#')
879                         while (*p != '\n' && *p != '\0') p++;
880
881                 if (*p == '\n')
882                         t.lineno++;
883
884                 if (*p == '\0') {
885                         tc = TC_EOF;
886
887                 } else if (*p == '\"') {
888                         /* it's a string */
889                         t.string = s = ++p;
890                         while (*p != '\"') {
891                                 if (*p == '\0' || *p == '\n')
892                                         syntax_error(EMSG_UNEXP_EOS);
893                                 *(s++) = nextchar(&p);
894                         }
895                         p++;
896                         *s = '\0';
897                         tc = TC_STRING;
898
899                 } else if ((expected & TC_REGEXP) && *p == '/') {
900                         /* it's regexp */
901                         t.string = s = ++p;
902                         while (*p != '/') {
903                                 if (*p == '\0' || *p == '\n')
904                                         syntax_error(EMSG_UNEXP_EOS);
905                                 if ((*s++ = *p++) == '\\') {
906                                         pp = p;
907                                         *(s-1) = bb_process_escape_sequence((const char **)&p);
908                                         if (*pp == '\\') *s++ = '\\';
909                                         if (p == pp) *s++ = *p++;
910                                 }
911                         }
912                         p++;
913                         *s = '\0';
914                         tc = TC_REGEXP;
915
916                 } else if (*p == '.' || isdigit(*p)) {
917                         /* it's a number */
918                         t.number = strtod(p, &p);
919                         if (*p == '.')
920                                 syntax_error(EMSG_UNEXP_TOKEN);
921                         tc = TC_NUMBER;
922
923                 } else {
924                         /* search for something known */
925                         tl = tokenlist;
926                         tc = 0x00000001;
927                         ti = tokeninfo;
928                         while (*tl) {
929                                 l = *(tl++);
930                                 if (l == NTCC) {
931                                         tc <<= 1;
932                                         continue;
933                                 }
934                                 /* if token class is expected, token
935                                  * matches and it's not a longer word,
936                                  * then this is what we are looking for
937                                  */
938                                 if ((tc & (expected | TC_WORD | TC_NEWLINE)) &&
939                                 *tl == *p && strncmp(p, tl, l) == 0 &&
940                                 !((tc & TC_WORD) && isalnum_(*(p + l)))) {
941                                         t.info = *ti;
942                                         p += l;
943                                         break;
944                                 }
945                                 ti++;
946                                 tl += l;
947                         }
948
949                         if (! *tl) {
950                                 /* it's a name (var/array/function),
951                                  * otherwise it's something wrong
952                                  */
953                                 if (! isalnum_(*p))
954                                         syntax_error(EMSG_UNEXP_TOKEN);
955
956                                 t.string = --p;
957                                 while(isalnum_(*(++p))) {
958                                         *(p-1) = *p;
959                                 }
960                                 *(p-1) = '\0';
961                                 tc = TC_VARIABLE;
962                                 /* also consume whitespace between functionname and bracket */
963                                 skip_spaces(&p);
964                                 if (*p == '(') {
965                                         tc = TC_FUNCTION;
966                                 } else {
967                                         if (*p == '[') {
968                                                 p++;
969                                                 tc = TC_ARRAY;
970                                         }
971                                 }
972                         }
973                 }
974                 pos = p;
975
976                 /* skipping newlines in some cases */
977                 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
978                         goto readnext;
979
980                 /* insert concatenation operator when needed */
981                 if ((ltclass&TC_CONCAT1) && (tc&TC_CONCAT2) && (expected&TC_BINOP)) {
982                         concat_inserted = TRUE;
983                         save_tclass = tc;
984                         save_info = t.info;
985                         tc = TC_BINOP;
986                         t.info = OC_CONCAT | SS | P(35);
987                 }
988
989                 t.tclass = tc;
990         }
991         ltclass = t.tclass;
992
993         /* Are we ready for this? */
994         if (! (ltclass & expected))
995                 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
996                                                                 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
997
998         return ltclass;
999 }
1000
1001 static void rollback_token(void) { t.rollback = TRUE; }
1002
1003 static node *new_node(uint32_t info)
1004 {
1005         register node *n;
1006
1007         n = (node *)xcalloc(sizeof(node), 1);
1008         n->info = info;
1009         n->lineno = lineno;
1010         return n;
1011 }
1012
1013 static node *mk_re_node(char *s, node *n, regex_t *re)
1014 {
1015         n->info = OC_REGEXP;
1016         n->l.re = re;
1017         n->r.ire = re + 1;
1018         xregcomp(re, s, REG_EXTENDED);
1019         xregcomp(re+1, s, REG_EXTENDED | REG_ICASE);
1020
1021         return n;
1022 }
1023
1024 static node *condition(void)
1025 {
1026         next_token(TC_SEQSTART);
1027         return parse_expr(TC_SEQTERM);
1028 }
1029
1030 /* parse expression terminated by given argument, return ptr
1031  * to built subtree. Terminator is eaten by parse_expr */
1032 static node *parse_expr(uint32_t iexp)
1033 {
1034         node sn;
1035         node *cn = &sn;
1036         node *vn, *glptr;
1037         uint32_t tc, xtc;
1038         var *v;
1039
1040         sn.info = PRIMASK;
1041         sn.r.n = glptr = NULL;
1042         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1043
1044         while (! ((tc = next_token(xtc)) & iexp)) {
1045                 if (glptr && (t.info == (OC_COMPARE|VV|P(39)|2))) {
1046                         /* input redirection (<) attached to glptr node */
1047                         cn = glptr->l.n = new_node(OC_CONCAT|SS|P(37));
1048                         cn->a.n = glptr;
1049                         xtc = TC_OPERAND | TC_UOPPRE;
1050                         glptr = NULL;
1051
1052                 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1053                         /* for binary and postfix-unary operators, jump back over
1054                          * previous operators with higher priority */
1055                         vn = cn;
1056                         while ( ((t.info & PRIMASK) > (vn->a.n->info & PRIMASK2)) ||
1057                           ((t.info == vn->info) && ((t.info & OPCLSMASK) == OC_COLON)) )
1058                                 vn = vn->a.n;
1059                         if ((t.info & OPCLSMASK) == OC_TERNARY)
1060                                 t.info += P(6);
1061                         cn = vn->a.n->r.n = new_node(t.info);
1062                         cn->a.n = vn->a.n;
1063                         if (tc & TC_BINOP) {
1064                                 cn->l.n = vn;
1065                                 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1066                                 if ((t.info & OPCLSMASK) == OC_PGETLINE) {
1067                                         /* it's a pipe */
1068                                         next_token(TC_GETLINE);
1069                                         /* give maximum priority to this pipe */
1070                                         cn->info &= ~PRIMASK;
1071                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1072                                 }
1073                         } else {
1074                                 cn->r.n = vn;
1075                                 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1076                         }
1077                         vn->a.n = cn;
1078
1079                 } else {
1080                         /* for operands and prefix-unary operators, attach them
1081                          * to last node */
1082                         vn = cn;
1083                         cn = vn->r.n = new_node(t.info);
1084                         cn->a.n = vn;
1085                         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1086                         if (tc & (TC_OPERAND | TC_REGEXP)) {
1087                                 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1088                                 /* one should be very careful with switch on tclass -
1089                                  * only simple tclasses should be used! */
1090                                 switch (tc) {
1091                                   case TC_VARIABLE:
1092                                   case TC_ARRAY:
1093                                         cn->info = OC_VAR;
1094                                         if ((v = hash_search(ahash, t.string)) != NULL) {
1095                                                 cn->info = OC_FNARG;
1096                                                 cn->l.i = v->x.aidx;
1097                                         } else {
1098                                                 cn->l.v = newvar(t.string);
1099                                         }
1100                                         if (tc & TC_ARRAY) {
1101                                                 cn->info |= xS;
1102                                                 cn->r.n = parse_expr(TC_ARRTERM);
1103                                         }
1104                                         break;
1105
1106                                   case TC_NUMBER:
1107                                   case TC_STRING:
1108                                         cn->info = OC_VAR;
1109                                         v = cn->l.v = xcalloc(sizeof(var), 1);
1110                                         if (tc & TC_NUMBER)
1111                                                 setvar_i(v, t.number);
1112                                         else
1113                                                 setvar_s(v, t.string);
1114                                         break;
1115
1116                                   case TC_REGEXP:
1117                                         mk_re_node(t.string, cn,
1118                                                                         (regex_t *)xcalloc(sizeof(regex_t),2));
1119                                         break;
1120
1121                                   case TC_FUNCTION:
1122                                         cn->info = OC_FUNC;
1123                                         cn->r.f = newfunc(t.string);
1124                                         cn->l.n = condition();
1125                                         break;
1126
1127                                   case TC_SEQSTART:
1128                                         cn = vn->r.n = parse_expr(TC_SEQTERM);
1129                                         cn->a.n = vn;
1130                                         break;
1131
1132                                   case TC_GETLINE:
1133                                         glptr = cn;
1134                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1135                                         break;
1136
1137                                   case TC_BUILTIN:
1138                                         cn->l.n = condition();
1139                                         break;
1140                                 }
1141                         }
1142                 }
1143         }
1144         return sn.r.n;
1145 }
1146
1147 /* add node to chain. Return ptr to alloc'd node */
1148 static node *chain_node(uint32_t info)
1149 {
1150         register node *n;
1151
1152         if (! seq->first)
1153                 seq->first = seq->last = new_node(0);
1154
1155         if (seq->programname != programname) {
1156                 seq->programname = programname;
1157                 n = chain_node(OC_NEWSOURCE);
1158                 n->l.s = bb_xstrdup(programname);
1159         }
1160
1161         n = seq->last;
1162         n->info = info;
1163         seq->last = n->a.n = new_node(OC_DONE);
1164
1165         return n;
1166 }
1167
1168 static void chain_expr(uint32_t info)
1169 {
1170         node *n;
1171
1172         n = chain_node(info);
1173         n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1174         if (t.tclass & TC_GRPTERM)
1175                 rollback_token();
1176 }
1177
1178 static node *chain_loop(node *nn)
1179 {
1180         node *n, *n2, *save_brk, *save_cont;
1181
1182         save_brk = break_ptr;
1183         save_cont = continue_ptr;
1184
1185         n = chain_node(OC_BR | Vx);
1186         continue_ptr = new_node(OC_EXEC);
1187         break_ptr = new_node(OC_EXEC);
1188         chain_group();
1189         n2 = chain_node(OC_EXEC | Vx);
1190         n2->l.n = nn;
1191         n2->a.n = n;
1192         continue_ptr->a.n = n2;
1193         break_ptr->a.n = n->r.n = seq->last;
1194
1195         continue_ptr = save_cont;
1196         break_ptr = save_brk;
1197
1198         return n;
1199 }
1200
1201 /* parse group and attach it to chain */
1202 static void chain_group(void)
1203 {
1204         uint32_t c;
1205         node *n, *n2, *n3;
1206
1207         do {
1208                 c = next_token(TC_GRPSEQ);
1209         } while (c & TC_NEWLINE);
1210
1211         if (c & TC_GRPSTART) {
1212                 while(next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1213                         if (t.tclass & TC_NEWLINE) continue;
1214                         rollback_token();
1215                         chain_group();
1216                 }
1217         } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1218                 rollback_token();
1219                 chain_expr(OC_EXEC | Vx);
1220         } else {                                                /* TC_STATEMNT */
1221                 switch (t.info & OPCLSMASK) {
1222                         case ST_IF:
1223                                 n = chain_node(OC_BR | Vx);
1224                                 n->l.n = condition();
1225                                 chain_group();
1226                                 n2 = chain_node(OC_EXEC);
1227                                 n->r.n = seq->last;
1228                                 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE)==TC_ELSE) {
1229                                         chain_group();
1230                                         n2->a.n = seq->last;
1231                                 } else {
1232                                         rollback_token();
1233                                 }
1234                                 break;
1235
1236                         case ST_WHILE:
1237                                 n2 = condition();
1238                                 n = chain_loop(NULL);
1239                                 n->l.n = n2;
1240                                 break;
1241
1242                         case ST_DO:
1243                                 n2 = chain_node(OC_EXEC);
1244                                 n = chain_loop(NULL);
1245                                 n2->a.n = n->a.n;
1246                                 next_token(TC_WHILE);
1247                                 n->l.n = condition();
1248                                 break;
1249
1250                         case ST_FOR:
1251                                 next_token(TC_SEQSTART);
1252                                 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1253                                 if (t.tclass & TC_SEQTERM) {                            /* for-in */
1254                                         if ((n2->info & OPCLSMASK) != OC_IN)
1255                                                 syntax_error(EMSG_UNEXP_TOKEN);
1256                                         n = chain_node(OC_WALKINIT | VV);
1257                                         n->l.n = n2->l.n;
1258                                         n->r.n = n2->r.n;
1259                                         n = chain_loop(NULL);
1260                                         n->info = OC_WALKNEXT | Vx;
1261                                         n->l.n = n2->l.n;
1262                                 } else {                                                                        /* for(;;) */
1263                                         n = chain_node(OC_EXEC | Vx);
1264                                         n->l.n = n2;
1265                                         n2 = parse_expr(TC_SEMICOL);
1266                                         n3 = parse_expr(TC_SEQTERM);
1267                                         n = chain_loop(n3);
1268                                         n->l.n = n2;
1269                                         if (! n2)
1270                                                 n->info = OC_EXEC;
1271                                 }
1272                                 break;
1273
1274                         case OC_PRINT:
1275                         case OC_PRINTF:
1276                                 n = chain_node(t.info);
1277                                 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1278                                 if (t.tclass & TC_OUTRDR) {
1279                                         n->info |= t.info;
1280                                         n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1281                                 }
1282                                 if (t.tclass & TC_GRPTERM)
1283                                         rollback_token();
1284                                 break;
1285
1286                         case OC_BREAK:
1287                                 n = chain_node(OC_EXEC);
1288                                 n->a.n = break_ptr;
1289                                 break;
1290
1291                         case OC_CONTINUE:
1292                                 n = chain_node(OC_EXEC);
1293                                 n->a.n = continue_ptr;
1294                                 break;
1295
1296                         /* delete, next, nextfile, return, exit */
1297                         default:
1298                                 chain_expr(t.info);
1299
1300                 }
1301         }
1302 }
1303
1304 static void parse_program(char *p)
1305 {
1306         uint32_t tclass;
1307         node *cn;
1308         func *f;
1309         var *v;
1310
1311         pos = p;
1312         t.lineno = 1;
1313         while((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1314                                 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1315
1316                 if (tclass & TC_OPTERM)
1317                         continue;
1318
1319                 seq = &mainseq;
1320                 if (tclass & TC_BEGIN) {
1321                         seq = &beginseq;
1322                         chain_group();
1323
1324                 } else if (tclass & TC_END) {
1325                         seq = &endseq;
1326                         chain_group();
1327
1328                 } else if (tclass & TC_FUNCDECL) {
1329                         next_token(TC_FUNCTION);
1330                         pos++;
1331                         f = newfunc(t.string);
1332                         f->body.first = NULL;
1333                         f->nargs = 0;
1334                         while(next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1335                                 v = findvar(ahash, t.string);
1336                                 v->x.aidx = (f->nargs)++;
1337
1338                                 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1339                                         break;
1340                         }
1341                         seq = &(f->body);
1342                         chain_group();
1343                         clear_array(ahash);
1344
1345                 } else if (tclass & TC_OPSEQ) {
1346                         rollback_token();
1347                         cn = chain_node(OC_TEST);
1348                         cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1349                         if (t.tclass & TC_GRPSTART) {
1350                                 rollback_token();
1351                                 chain_group();
1352                         } else {
1353                                 chain_node(OC_PRINT);
1354                         }
1355                         cn->r.n = mainseq.last;
1356
1357                 } else /* if (tclass & TC_GRPSTART) */ {
1358                         rollback_token();
1359                         chain_group();
1360                 }
1361         }
1362 }
1363
1364
1365 /* -------- program execution part -------- */
1366
1367 static node *mk_splitter(char *s, tsplitter *spl)
1368 {
1369         register regex_t *re, *ire;
1370         node *n;
1371
1372         re = &spl->re[0];
1373         ire = &spl->re[1];
1374         n = &spl->n;
1375         if ((n->info && OPCLSMASK) == OC_REGEXP) {
1376                 regfree(re);
1377                 regfree(ire);
1378         }
1379         if (bb_strlen(s) > 1) {
1380                 mk_re_node(s, n, re);
1381         } else {
1382                 n->info = (uint32_t) *s;
1383         }
1384
1385         return n;
1386 }
1387
1388 /* use node as a regular expression. Supplied with node ptr and regex_t
1389  * storage space. Return ptr to regex (if result points to preg, it should
1390  * be later regfree'd manually
1391  */
1392 static regex_t *as_regex(node *op, regex_t *preg)
1393 {
1394         var *v;
1395         char *s;
1396
1397         if ((op->info & OPCLSMASK) == OC_REGEXP) {
1398                 return icase ? op->r.ire : op->l.re;
1399         } else {
1400                 v = nvalloc(1);
1401                 s = getvar_s(evaluate(op, v));
1402                 xregcomp(preg, s, icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED);
1403                 nvfree(v);
1404                 return preg;
1405         }
1406 }
1407
1408 /* gradually increasing buffer */
1409 static void qrealloc(char **b, int n, int *size)
1410 {
1411         if (! *b || n >= *size)
1412                 *b = xrealloc(*b, *size = n + (n>>1) + 80);
1413 }
1414
1415 /* resize field storage space */
1416 static void fsrealloc(int size)
1417 {
1418         static int maxfields = 0;
1419         int i;
1420
1421         if (size >= maxfields) {
1422                 i = maxfields;
1423                 maxfields = size + 16;
1424                 Fields = (var *)xrealloc(Fields, maxfields * sizeof(var));
1425                 for (; i<maxfields; i++) {
1426                         Fields[i].type = VF_SPECIAL;
1427                         Fields[i].string = NULL;
1428                 }
1429         }
1430
1431         if (size < nfields) {
1432                 for (i=size; i<nfields; i++) {
1433                         clrvar(Fields+i);
1434                 }
1435         }
1436         nfields = size;
1437 }
1438
1439 static int awk_split(char *s, node *spl, char **slist)
1440 {
1441         int l, n=0;
1442         char c[4];
1443         char *s1;
1444         regmatch_t pmatch[2];
1445
1446         /* in worst case, each char would be a separate field */
1447         *slist = s1 = bb_xstrndup(s, bb_strlen(s) * 2 + 3);
1448
1449         c[0] = c[1] = (char)spl->info;
1450         c[2] = c[3] = '\0';
1451         if (*getvar_s(V[RS]) == '\0') c[2] = '\n';
1452
1453         if ((spl->info & OPCLSMASK) == OC_REGEXP) {             /* regex split */
1454                 while (*s) {
1455                         l = strcspn(s, c+2);
1456                         if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0 &&
1457                         pmatch[0].rm_so <= l) {
1458                                 l = pmatch[0].rm_so;
1459                                 if (pmatch[0].rm_eo == 0) { l++; pmatch[0].rm_eo++; }
1460                         } else {
1461                                 pmatch[0].rm_eo = l;
1462                                 if (*(s+l)) pmatch[0].rm_eo++;
1463                         }
1464
1465                         memcpy(s1, s, l);
1466                         *(s1+l) = '\0';
1467                         nextword(&s1);
1468                         s += pmatch[0].rm_eo;
1469                         n++;
1470                 }
1471         } else if (c[0] == '\0') {              /* null split */
1472                 while(*s) {
1473                         *(s1++) = *(s++);
1474                         *(s1++) = '\0';
1475                         n++;
1476                 }
1477         } else if (c[0] != ' ') {               /* single-character split */
1478                 if (icase) {
1479                         c[0] = toupper(c[0]);
1480                         c[1] = tolower(c[1]);
1481                 }
1482                 if (*s1) n++;
1483                 while ((s1 = strpbrk(s1, c))) {
1484                         *(s1++) = '\0';
1485                         n++;
1486                 }
1487         } else {                                /* space split */
1488                 while (*s) {
1489                         while (isspace(*s)) s++;
1490                         if (! *s) break;
1491                         n++;
1492                         while (*s && !isspace(*s))
1493                                 *(s1++) = *(s++);
1494                         *(s1++) = '\0';
1495                 }
1496         }
1497         return n;
1498 }
1499
1500 static void split_f0(void)
1501 {
1502         static char *fstrings = NULL;
1503         int i, n;
1504         char *s;
1505
1506         if (is_f0_split)
1507                 return;
1508
1509         is_f0_split = TRUE;
1510         free(fstrings);
1511         fsrealloc(0);
1512         n = awk_split(getvar_s(V[F0]), &fsplitter.n, &fstrings);
1513         fsrealloc(n);
1514         s = fstrings;
1515         for (i=0; i<n; i++) {
1516                 Fields[i].string = nextword(&s);
1517                 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1518         }
1519
1520         /* set NF manually to avoid side effects */
1521         clrvar(V[NF]);
1522         V[NF]->type = VF_NUMBER | VF_SPECIAL;
1523         V[NF]->number = nfields;
1524 }
1525
1526 /* perform additional actions when some internal variables changed */
1527 static void handle_special(var *v)
1528 {
1529         int n;
1530         char *b, *sep, *s;
1531         int sl, l, len, i, bsize;
1532
1533         if (! (v->type & VF_SPECIAL))
1534                 return;
1535
1536         if (v == V[NF]) {
1537                 n = (int)getvar_i(v);
1538                 fsrealloc(n);
1539
1540                 /* recalculate $0 */
1541                 sep = getvar_s(V[OFS]);
1542                 sl = bb_strlen(sep);
1543                 b = NULL;
1544                 len = 0;
1545                 for (i=0; i<n; i++) {
1546                         s = getvar_s(&Fields[i]);
1547                         l = bb_strlen(s);
1548                         if (b) {
1549                                 memcpy(b+len, sep, sl);
1550                                 len += sl;
1551                         }
1552                         qrealloc(&b, len+l+sl, &bsize);
1553                         memcpy(b+len, s, l);
1554                         len += l;
1555                 }
1556                 if (b) b[len] = '\0';
1557                 setvar_p(V[F0], b);
1558                 is_f0_split = TRUE;
1559
1560         } else if (v == V[F0]) {
1561                 is_f0_split = FALSE;
1562
1563         } else if (v == V[FS]) {
1564                 mk_splitter(getvar_s(v), &fsplitter);
1565
1566         } else if (v == V[RS]) {
1567                 mk_splitter(getvar_s(v), &rsplitter);
1568
1569         } else if (v == V[IGNORECASE]) {
1570                 icase = istrue(v);
1571
1572         } else {                                                /* $n */
1573                 n = getvar_i(V[NF]);
1574                 setvar_i(V[NF], n > v-Fields ? n : v-Fields+1);
1575                 /* right here v is invalid. Just to note... */
1576         }
1577 }
1578
1579 /* step through func/builtin/etc arguments */
1580 static node *nextarg(node **pn)
1581 {
1582         node *n;
1583
1584         n = *pn;
1585         if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1586                 *pn = n->r.n;
1587                 n = n->l.n;
1588         } else {
1589                 *pn = NULL;
1590         }
1591         return n;
1592 }
1593
1594 static void hashwalk_init(var *v, xhash *array)
1595 {
1596         char **w;
1597         hash_item *hi;
1598         int i;
1599
1600         if (v->type & VF_WALK)
1601                 free(v->x.walker);
1602
1603         v->type |= VF_WALK;
1604         w = v->x.walker = (char **)xcalloc(2 + 2*sizeof(char *) + array->glen, 1);
1605         *w = *(w+1) = (char *)(w + 2);
1606         for (i=0; i<array->csize; i++) {
1607                 hi = array->items[i];
1608                 while(hi) {
1609                         strcpy(*w, hi->name);
1610                         nextword(w);
1611                         hi = hi->next;
1612                 }
1613         }
1614 }
1615
1616 static int hashwalk_next(var *v)
1617 {
1618         char **w;
1619
1620         w = v->x.walker;
1621         if (*(w+1) == *w)
1622                 return FALSE;
1623
1624         setvar_s(v, nextword(w+1));
1625         return TRUE;
1626 }
1627
1628 /* evaluate node, return 1 when result is true, 0 otherwise */
1629 static int ptest(node *pattern)
1630 {
1631         static var v;
1632         return istrue(evaluate(pattern, &v));
1633 }
1634
1635 /* read next record from stream rsm into a variable v */
1636 static int awk_getline(rstream *rsm, var *v)
1637 {
1638         char *b;
1639         regmatch_t pmatch[2];
1640         int a, p, pp=0, size;
1641         int fd, so, eo, r, rp;
1642         char c, *m, *s;
1643
1644         /* we're using our own buffer since we need access to accumulating
1645          * characters
1646          */
1647         fd = fileno(rsm->F);
1648         m = rsm->buffer;
1649         a = rsm->adv;
1650         p = rsm->pos;
1651         size = rsm->size;
1652         c = (char) rsplitter.n.info;
1653         rp = 0;
1654
1655         if (! m) qrealloc(&m, 256, &size);
1656         do {
1657                 b = m + a;
1658                 so = eo = p;
1659                 r = 1;
1660                 if (p > 0) {
1661                         if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1662                                 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1663                                                                                                 b, 1, pmatch, 0) == 0) {
1664                                         so = pmatch[0].rm_so;
1665                                         eo = pmatch[0].rm_eo;
1666                                         if (b[eo] != '\0')
1667                                                 break;
1668                                 }
1669                         } else if (c != '\0') {
1670                                 s = strchr(b+pp, c);
1671                                 if (s) {
1672                                         so = eo = s-b;
1673                                         eo++;
1674                                         break;
1675                                 }
1676                         } else {
1677                                 while (b[rp] == '\n')
1678                                         rp++;
1679                                 s = strstr(b+rp, "\n\n");
1680                                 if (s) {
1681                                         so = eo = s-b;
1682                                         while (b[eo] == '\n') eo++;
1683                                         if (b[eo] != '\0')
1684                                                 break;
1685                                 }
1686                         }
1687                 }
1688
1689                 if (a > 0) {
1690                         memmove(m, (const void *)(m+a), p+1);
1691                         b = m;
1692                         a = 0;
1693                 }
1694
1695                 qrealloc(&m, a+p+128, &size);
1696                 b = m + a;
1697                 pp = p;
1698                 p += safe_read(fd, b+p, size-p-1);
1699                 if (p < pp) {
1700                         p = 0;
1701                         r = 0;
1702                         setvar_i(V[ERRNO], errno);
1703                 }
1704                 b[p] = '\0';
1705
1706         } while (p > pp);
1707
1708         if (p == 0) {
1709                 r--;
1710         } else {
1711                 c = b[so]; b[so] = '\0';
1712                 setvar_s(v, b+rp);
1713                 v->type |= VF_USER;
1714                 b[so] = c;
1715                 c = b[eo]; b[eo] = '\0';
1716                 setvar_s(V[RT], b+so);
1717                 b[eo] = c;
1718         }
1719
1720         rsm->buffer = m;
1721         rsm->adv = a + eo;
1722         rsm->pos = p - eo;
1723         rsm->size = size;
1724
1725         return r;
1726 }
1727
1728 static int fmt_num(char *b, int size, char *format, double n, int int_as_int)
1729 {
1730         int r=0;
1731         char c, *s=format;
1732
1733         if (int_as_int && n == (int)n) {
1734                 r = snprintf(b, size, "%d", (int)n);
1735         } else {
1736                 do { c = *s; } while (*s && *++s);
1737                 if (strchr("diouxX", c)) {
1738                         r = snprintf(b, size, format, (int)n);
1739                 } else if (strchr("eEfgG", c)) {
1740                         r = snprintf(b, size, format, n);
1741                 } else {
1742                         runtime_error(EMSG_INV_FMT);
1743                 }
1744         }
1745         return r;
1746 }
1747
1748
1749 /* formatted output into an allocated buffer, return ptr to buffer */
1750 static char *awk_printf(node *n)
1751 {
1752         char *b = NULL;
1753         char *fmt, *s, *s1, *f;
1754         int i, j, incr, bsize;
1755         char c, c1;
1756         var *v, *arg;
1757
1758         v = nvalloc(1);
1759         fmt = f = bb_xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1760
1761         i = 0;
1762         while (*f) {
1763                 s = f;
1764                 while (*f && (*f != '%' || *(++f) == '%'))
1765                         f++;
1766                 while (*f && !isalpha(*f))
1767                         f++;
1768
1769                 incr = (f - s) + MAXVARFMT;
1770                 qrealloc(&b, incr+i, &bsize);
1771                 c = *f; if (c != '\0') f++;
1772                 c1 = *f ; *f = '\0';
1773                 arg = evaluate(nextarg(&n), v);
1774
1775                 j = i;
1776                 if (c == 'c' || !c) {
1777                         i += sprintf(b+i, s,
1778                                         is_numeric(arg) ? (char)getvar_i(arg) : *getvar_s(arg));
1779
1780                 } else if (c == 's') {
1781                     s1 = getvar_s(arg);
1782                         qrealloc(&b, incr+i+bb_strlen(s1), &bsize);
1783                         i += sprintf(b+i, s, s1);
1784
1785                 } else {
1786                         i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1787                 }
1788                 *f = c1;
1789
1790                 /* if there was an error while sprintf, return value is negative */
1791                 if (i < j) i = j;
1792
1793         }
1794
1795         b = xrealloc(b, i+1);
1796         free(fmt);
1797         nvfree(v);
1798         b[i] = '\0';
1799         return b;
1800 }
1801
1802 /* common substitution routine
1803  * replace (nm) substring of (src) that match (n) with (repl), store
1804  * result into (dest), return number of substitutions. If nm=0, replace
1805  * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1806  * subexpression matching (\1-\9)
1807  */
1808 static int awk_sub(node *rn, char *repl, int nm, var *src, var *dest, int ex)
1809 {
1810         char *ds = NULL;
1811         char *sp, *s;
1812         int c, i, j, di, rl, so, eo, nbs, n, dssize;
1813         regmatch_t pmatch[10];
1814         regex_t sreg, *re;
1815
1816         re = as_regex(rn, &sreg);
1817         if (! src) src = V[F0];
1818         if (! dest) dest = V[F0];
1819
1820         i = di = 0;
1821         sp = getvar_s(src);
1822         rl = bb_strlen(repl);
1823         while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0:REG_NOTBOL) == 0) {
1824                 so = pmatch[0].rm_so;
1825                 eo = pmatch[0].rm_eo;
1826
1827                 qrealloc(&ds, di + eo + rl, &dssize);
1828                 memcpy(ds + di, sp, eo);
1829                 di += eo;
1830                 if (++i >= nm) {
1831                         /* replace */
1832                         di -= (eo - so);
1833                         nbs = 0;
1834                         for (s = repl; *s; s++) {
1835                                 ds[di++] = c = *s;
1836                                 if (c == '\\') {
1837                                         nbs++;
1838                                         continue;
1839                                 }
1840                                 if (c == '&' || (ex && c >= '0' && c <= '9')) {
1841                                         di -= ((nbs + 3) >> 1);
1842                                         j = 0;
1843                                         if (c != '&') {
1844                                                 j = c - '0';
1845                                                 nbs++;
1846                                         }
1847                                         if (nbs % 2) {
1848                                                 ds[di++] = c;
1849                                         } else {
1850                                                 n = pmatch[j].rm_eo - pmatch[j].rm_so;
1851                                                 qrealloc(&ds, di + rl + n, &dssize);
1852                                                 memcpy(ds + di, sp + pmatch[j].rm_so, n);
1853                                                 di += n;
1854                                         }
1855                                 }
1856                                 nbs = 0;
1857                         }
1858                 }
1859
1860                 sp += eo;
1861                 if (i == nm) break;
1862                 if (eo == so) {
1863                         if (! (ds[di++] = *sp++)) break;
1864                 }
1865         }
1866
1867         qrealloc(&ds, di + strlen(sp), &dssize);
1868         strcpy(ds + di, sp);
1869         setvar_p(dest, ds);
1870         if (re == &sreg) regfree(re);
1871         return i;
1872 }
1873
1874 static var *exec_builtin(node *op, var *res)
1875 {
1876         int (*to_xxx)(int);
1877         var *tv;
1878         node *an[4];
1879         var  *av[4];
1880         char *as[4];
1881         regmatch_t pmatch[2];
1882         regex_t sreg, *re;
1883         static tsplitter tspl;
1884         node *spl;
1885         uint32_t isr, info;
1886         int nargs;
1887         time_t tt;
1888         char *s, *s1;
1889         int i, l, ll, n;
1890
1891         tv = nvalloc(4);
1892         isr = info = op->info;
1893         op = op->l.n;
1894
1895         av[2] = av[3] = NULL;
1896         for (i=0 ; i<4 && op ; i++) {
1897                 an[i] = nextarg(&op);
1898                 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
1899                 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
1900                 isr >>= 1;
1901         }
1902
1903         nargs = i;
1904         if (nargs < (info >> 30))
1905                 runtime_error(EMSG_TOO_FEW_ARGS);
1906
1907         switch (info & OPNMASK) {
1908
1909           case B_a2:
1910 #ifdef CONFIG_FEATURE_AWK_MATH
1911                 setvar_i(res, atan2(getvar_i(av[i]), getvar_i(av[1])));
1912 #else
1913                 runtime_error(EMSG_NO_MATH);
1914 #endif
1915                 break;
1916
1917           case B_sp:
1918                 if (nargs > 2) {
1919                         spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
1920                                 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
1921                 } else {
1922                         spl = &fsplitter.n;
1923                 }
1924
1925                 n = awk_split(as[0], spl, &s);
1926                 s1 = s;
1927                 clear_array(iamarray(av[1]));
1928                 for (i=1; i<=n; i++)
1929                         setari_u(av[1], i, nextword(&s1));
1930                 free(s);
1931                 setvar_i(res, n);
1932                 break;
1933
1934           case B_ss:
1935                 l = bb_strlen(as[0]);
1936                 i = getvar_i(av[1]) - 1;
1937                 if (i>l) i=l; if (i<0) i=0;
1938                 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
1939                 if (n<0) n=0;
1940                 s = xmalloc(n+1);
1941                 strncpy(s, as[0]+i, n);
1942                 s[n] = '\0';
1943                 setvar_p(res, s);
1944                 break;
1945
1946           case B_lo:
1947                 to_xxx = tolower;
1948                 goto lo_cont;
1949
1950           case B_up:
1951                 to_xxx = toupper;
1952 lo_cont:
1953                 s1 = s = bb_xstrdup(as[0]);
1954                 while (*s1) {
1955                         *s1 = (*to_xxx)(*s1);
1956                         s1++;
1957                 }
1958                 setvar_p(res, s);
1959                 break;
1960
1961           case B_ix:
1962                 n = 0;
1963                 ll = bb_strlen(as[1]);
1964                 l = bb_strlen(as[0]) - ll;
1965                 if (ll > 0 && l >= 0) {
1966                         if (! icase) {
1967                                 s = strstr(as[0], as[1]);
1968                                 if (s) n = (s - as[0]) + 1;
1969                         } else {
1970                                 /* this piece of code is terribly slow and
1971                                  * really should be rewritten
1972                                  */
1973                                 for (i=0; i<=l; i++) {
1974                                         if (strncasecmp(as[0]+i, as[1], ll) == 0) {
1975                                                 n = i+1;
1976                                                 break;
1977                                         }
1978                                 }
1979                         }
1980                 }
1981                 setvar_i(res, n);
1982                 break;
1983
1984           case B_ti:
1985                 if (nargs > 1)
1986                         tt = getvar_i(av[1]);
1987                 else
1988                         time(&tt);
1989                 s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
1990                 i = strftime(buf, MAXVARFMT, s, localtime(&tt));
1991                 buf[i] = '\0';
1992                 setvar_s(res, buf);
1993                 break;
1994
1995           case B_ma:
1996                 re = as_regex(an[1], &sreg);
1997                 n = regexec(re, as[0], 1, pmatch, 0);
1998                 if (n == 0) {
1999                         pmatch[0].rm_so++;
2000                         pmatch[0].rm_eo++;
2001                 } else {
2002                         pmatch[0].rm_so = 0;
2003                         pmatch[0].rm_eo = -1;
2004                 }
2005                 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2006                 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2007                 setvar_i(res, pmatch[0].rm_so);
2008                 if (re == &sreg) regfree(re);
2009                 break;
2010
2011           case B_ge:
2012                 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2013                 break;
2014
2015           case B_gs:
2016                 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2017                 break;
2018
2019           case B_su:
2020                 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2021                 break;
2022         }
2023
2024         nvfree(tv);
2025         return res;
2026 }
2027
2028 /*
2029  * Evaluate node - the heart of the program. Supplied with subtree
2030  * and place where to store result. returns ptr to result.
2031  */
2032 #define XC(n) ((n) >> 8)
2033
2034 static var *evaluate(node *op, var *res)
2035 {
2036         /* This procedure is recursive so we should count every byte */
2037         static var *fnargs = NULL;
2038         static unsigned int seed = 1;
2039         static regex_t sreg;
2040         node *op1;
2041         var *v1;
2042         union {
2043                 var *v;
2044                 char *s;
2045                 double d;
2046                 int i;
2047         } L, R;
2048         uint32_t opinfo;
2049         short opn;
2050         union {
2051                 char *s;
2052                 rstream *rsm;
2053                 FILE *F;
2054                 var *v;
2055                 regex_t *re;
2056                 uint32_t info;
2057         } X;
2058
2059         if (! op)
2060                 return setvar_s(res, NULL);
2061
2062         v1 = nvalloc(2);
2063
2064         while (op) {
2065
2066                 opinfo = op->info;
2067                 opn = (short)(opinfo & OPNMASK);
2068                 lineno = op->lineno;
2069
2070                 /* execute inevitable things */
2071                 op1 = op->l.n;
2072                 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2073                 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2074                 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2075                 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2076                 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2077
2078                 switch (XC(opinfo & OPCLSMASK)) {
2079
2080                   /* -- iterative node type -- */
2081
2082                   /* test pattern */
2083                   case XC( OC_TEST ):
2084                         if ((op1->info & OPCLSMASK) == OC_COMMA) {
2085                                 /* it's range pattern */
2086                                 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2087                                         op->info |= OF_CHECKED;
2088                                         if (ptest(op1->r.n))
2089                                                 op->info &= ~OF_CHECKED;
2090
2091                                         op = op->a.n;
2092                                 } else {
2093                                         op = op->r.n;
2094                                 }
2095                         } else {
2096                                 op = (ptest(op1)) ? op->a.n : op->r.n;
2097                         }
2098                         break;
2099
2100                   /* just evaluate an expression, also used as unconditional jump */
2101                   case XC( OC_EXEC ):
2102                         break;
2103
2104                   /* branch, used in if-else and various loops */
2105                   case XC( OC_BR ):
2106                         op = istrue(L.v) ? op->a.n : op->r.n;
2107                         break;
2108
2109                   /* initialize for-in loop */
2110                   case XC( OC_WALKINIT ):
2111                         hashwalk_init(L.v, iamarray(R.v));
2112                         break;
2113
2114                   /* get next array item */
2115                   case XC( OC_WALKNEXT ):
2116                         op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2117                         break;
2118
2119                   case XC( OC_PRINT ):
2120                   case XC( OC_PRINTF ):
2121                         X.F = stdout;
2122                         if (op->r.n) {
2123                                 X.rsm = newfile(R.s);
2124                                 if (! X.rsm->F) {
2125                                         if (opn == '|') {
2126                                                 if((X.rsm->F = popen(R.s, "w")) == NULL)
2127                                                         bb_perror_msg_and_die("popen");
2128                                                 X.rsm->is_pipe = 1;
2129                                         } else {
2130                                                 X.rsm->F = bb_xfopen(R.s, opn=='w' ? "w" : "a");
2131                                         }
2132                                 }
2133                                 X.F = X.rsm->F;
2134                         }
2135
2136                         if ((opinfo & OPCLSMASK) == OC_PRINT) {
2137                                 if (! op1) {
2138                                         fputs(getvar_s(V[F0]), X.F);
2139                                 } else {
2140                                         while (op1) {
2141                                                 L.v = evaluate(nextarg(&op1), v1);
2142                                                 if (L.v->type & VF_NUMBER) {
2143                                                         fmt_num(buf, MAXVARFMT, getvar_s(V[OFMT]),
2144                                                                                                                 getvar_i(L.v), TRUE);
2145                                                         fputs(buf, X.F);
2146                                                 } else {
2147                                                         fputs(getvar_s(L.v), X.F);
2148                                                 }
2149
2150                                                 if (op1) fputs(getvar_s(V[OFS]), X.F);
2151                                         }
2152                                 }
2153                                 fputs(getvar_s(V[ORS]), X.F);
2154
2155                         } else {        /* OC_PRINTF */
2156                                 L.s = awk_printf(op1);
2157                                 fputs(L.s, X.F);
2158                                 free(L.s);
2159                         }
2160                         fflush(X.F);
2161                         break;
2162
2163                   case XC( OC_DELETE ):
2164                         X.info = op1->info & OPCLSMASK;
2165                         if (X.info == OC_VAR) {
2166                                 R.v = op1->l.v;
2167                         } else if (X.info == OC_FNARG) {
2168                                 R.v = &fnargs[op1->l.i];
2169                         } else {
2170                                 runtime_error(EMSG_NOT_ARRAY);
2171                         }
2172
2173                         if (op1->r.n) {
2174                                 clrvar(L.v);
2175                                 L.s = getvar_s(evaluate(op1->r.n, v1));
2176                                 hash_remove(iamarray(R.v), L.s);
2177                         } else {
2178                                 clear_array(iamarray(R.v));
2179                         }
2180                         break;
2181
2182                   case XC( OC_NEWSOURCE ):
2183                         programname = op->l.s;
2184                         break;
2185
2186                   case XC( OC_RETURN ):
2187                         copyvar(res, L.v);
2188                         break;
2189
2190                   case XC( OC_NEXTFILE ):
2191                         nextfile = TRUE;
2192                   case XC( OC_NEXT ):
2193                         nextrec = TRUE;
2194                   case XC( OC_DONE ):
2195                         clrvar(res);
2196                         break;
2197
2198                   case XC( OC_EXIT ):
2199                         awk_exit(L.d);
2200
2201                   /* -- recursive node type -- */
2202
2203                   case XC( OC_VAR ):
2204                         L.v = op->l.v;
2205                         if (L.v == V[NF])
2206                                 split_f0();
2207                         goto v_cont;
2208
2209                   case XC( OC_FNARG ):
2210                         L.v = &fnargs[op->l.i];
2211
2212 v_cont:
2213                         res = (op->r.n) ? findvar(iamarray(L.v), R.s) : L.v;
2214                         break;
2215
2216                   case XC( OC_IN ):
2217                         setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2218                         break;
2219
2220                   case XC( OC_REGEXP ):
2221                         op1 = op;
2222                         L.s = getvar_s(V[F0]);
2223                         goto re_cont;
2224
2225                   case XC( OC_MATCH ):
2226                         op1 = op->r.n;
2227 re_cont:
2228                         X.re = as_regex(op1, &sreg);
2229                         R.i = regexec(X.re, L.s, 0, NULL, 0);
2230                         if (X.re == &sreg) regfree(X.re);
2231                         setvar_i(res, (R.i == 0 ? 1 : 0) ^ (opn == '!' ? 1 : 0));
2232                         break;
2233
2234                   case XC( OC_MOVE ):
2235                         /* if source is a temporary string, jusk relink it to dest */
2236                         if (R.v == v1+1 && R.v->string) {
2237                                 res = setvar_p(L.v, R.v->string);
2238                                 R.v->string = NULL;
2239                         } else {
2240                                 res = copyvar(L.v, R.v);
2241                         }
2242                         break;
2243
2244                   case XC( OC_TERNARY ):
2245                         if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2246                                 runtime_error(EMSG_POSSIBLE_ERROR);
2247                         res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2248                         break;
2249
2250                   case XC( OC_FUNC ):
2251                         if (! op->r.f->body.first)
2252                                 runtime_error(EMSG_UNDEF_FUNC);
2253
2254                         X.v = R.v = nvalloc(op->r.f->nargs+1);
2255                         while (op1) {
2256                                 L.v = evaluate(nextarg(&op1), v1);
2257                                 copyvar(R.v, L.v);
2258                                 R.v->type |= VF_CHILD;
2259                                 R.v->x.parent = L.v;
2260                                 if (++R.v - X.v >= op->r.f->nargs)
2261                                         break;
2262                         }
2263
2264                         R.v = fnargs;
2265                         fnargs = X.v;
2266
2267                         L.s = programname;
2268                         res = evaluate(op->r.f->body.first, res);
2269                         programname = L.s;
2270
2271                         nvfree(fnargs);
2272                         fnargs = R.v;
2273                         break;
2274
2275                   case XC( OC_GETLINE ):
2276                   case XC( OC_PGETLINE ):
2277                         if (op1) {
2278                                 X.rsm = newfile(L.s);
2279                                 if (! X.rsm->F) {
2280                                         if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2281                                                 X.rsm->F = popen(L.s, "r");
2282                                                 X.rsm->is_pipe = TRUE;
2283                                         } else {
2284                                                 X.rsm->F = fopen(L.s, "r");             /* not bb_xfopen! */
2285                                         }
2286                                 }
2287                         } else {
2288                                 if (! iF) iF = next_input_file();
2289                                 X.rsm = iF;
2290                         }
2291
2292                         if (! X.rsm->F) {
2293                                 setvar_i(V[ERRNO], errno);
2294                                 setvar_i(res, -1);
2295                                 break;
2296                         }
2297
2298                         if (! op->r.n)
2299                                 R.v = V[F0];
2300
2301                         L.i = awk_getline(X.rsm, R.v);
2302                         if (L.i > 0) {
2303                                 if (! op1) {
2304                                         incvar(V[FNR]);
2305                                         incvar(V[NR]);
2306                                 }
2307                         }
2308                         setvar_i(res, L.i);
2309                         break;
2310
2311                   /* simple builtins */
2312                   case XC( OC_FBLTIN ):
2313                         switch (opn) {
2314
2315                           case F_in:
2316                                 R.d = (int)L.d;
2317                                 break;
2318
2319                           case F_rn:
2320                                 R.d =  (double)rand() / (double)RAND_MAX;
2321                                 break;
2322
2323 #ifdef CONFIG_FEATURE_AWK_MATH
2324                           case F_co:
2325                                 R.d = cos(L.d);
2326                                 break;
2327
2328                           case F_ex:
2329                                 R.d = exp(L.d);
2330                                 break;
2331
2332                           case F_lg:
2333                                 R.d = log(L.d);
2334                                 break;
2335
2336                           case F_si:
2337                                 R.d = sin(L.d);
2338                                 break;
2339
2340                           case F_sq:
2341                                 R.d = sqrt(L.d);
2342                                 break;
2343 #else
2344                           case F_co:
2345                           case F_ex:
2346                           case F_lg:
2347                           case F_si:
2348                           case F_sq:
2349                                 runtime_error(EMSG_NO_MATH);
2350                                 break;
2351 #endif
2352
2353                           case F_sr:
2354                                 R.d = (double)seed;
2355                                 seed = op1 ? (unsigned int)L.d : (unsigned int)time(NULL);
2356                                 srand(seed);
2357                                 break;
2358
2359                           case F_ti:
2360                                 R.d = time(NULL);
2361                                 break;
2362
2363                           case F_le:
2364                                 if (! op1)
2365                                         L.s = getvar_s(V[F0]);
2366                                 R.d = bb_strlen(L.s);
2367                                 break;
2368
2369                           case F_sy:
2370                                 fflush(NULL);
2371                                 R.d = (L.s && *L.s) ? system(L.s) : 0;
2372                                 break;
2373
2374                           case F_ff:
2375                                 if (! op1)
2376                                         fflush(stdout);
2377                                 else {
2378                                         if (L.s && *L.s) {
2379                                                 X.rsm = newfile(L.s);
2380                                                 fflush(X.rsm->F);
2381                                         } else {
2382                                                 fflush(NULL);
2383                                         }
2384                                 }
2385                                 break;
2386
2387                           case F_cl:
2388                                 X.rsm = (rstream *)hash_search(fdhash, L.s);
2389                                 if (X.rsm) {
2390                                         R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
2391                                         free(X.rsm->buffer);
2392                                         hash_remove(fdhash, L.s);
2393                                 }
2394                                 if (R.i != 0)
2395                                         setvar_i(V[ERRNO], errno);
2396                                 R.d = (double)R.i;
2397                                 break;
2398                         }
2399                         setvar_i(res, R.d);
2400                         break;
2401
2402                   case XC( OC_BUILTIN ):
2403                         res = exec_builtin(op, res);
2404                         break;
2405
2406                   case XC( OC_SPRINTF ):
2407                         setvar_p(res, awk_printf(op1));
2408                         break;
2409
2410                   case XC( OC_UNARY ):
2411                         X.v = R.v;
2412                         L.d = R.d = getvar_i(R.v);
2413                         switch (opn) {
2414                           case 'P':
2415                                 L.d = ++R.d;
2416                                 goto r_op_change;
2417                           case 'p':
2418                                 R.d++;
2419                                 goto r_op_change;
2420                           case 'M':
2421                                 L.d = --R.d;
2422                                 goto r_op_change;
2423                           case 'm':
2424                                 R.d--;
2425                                 goto r_op_change;
2426                           case '!':
2427                             L.d = istrue(X.v) ? 0 : 1;
2428                                 break;
2429                           case '-':
2430                                 L.d = -R.d;
2431                                 break;
2432                         r_op_change:
2433                                 setvar_i(X.v, R.d);
2434                         }
2435                         setvar_i(res, L.d);
2436                         break;
2437
2438                   case XC( OC_FIELD ):
2439                         R.i = (int)getvar_i(R.v);
2440                         if (R.i == 0) {
2441                                 res = V[F0];
2442                         } else {
2443                                 split_f0();
2444                                 if (R.i > nfields)
2445                                         fsrealloc(R.i);
2446
2447                                 res = &Fields[R.i-1];
2448                         }
2449                         break;
2450
2451                   /* concatenation (" ") and index joining (",") */
2452                   case XC( OC_CONCAT ):
2453                   case XC( OC_COMMA ):
2454                         opn = bb_strlen(L.s) + bb_strlen(R.s) + 2;
2455                         X.s = (char *)xmalloc(opn);
2456                         strcpy(X.s, L.s);
2457                         if ((opinfo & OPCLSMASK) == OC_COMMA) {
2458                                 L.s = getvar_s(V[SUBSEP]);
2459                                 X.s = (char *)xrealloc(X.s, opn + bb_strlen(L.s));
2460                                 strcat(X.s, L.s);
2461                         }
2462                         strcat(X.s, R.s);
2463                         setvar_p(res, X.s);
2464                         break;
2465
2466                   case XC( OC_LAND ):
2467                         setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2468                         break;
2469
2470                   case XC( OC_LOR ):
2471                         setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2472                         break;
2473
2474                   case XC( OC_BINARY ):
2475                   case XC( OC_REPLACE ):
2476                         R.d = getvar_i(R.v);
2477                         switch (opn) {
2478                           case '+':
2479                                 L.d += R.d;
2480                                 break;
2481                           case '-':
2482                                 L.d -= R.d;
2483                                 break;
2484                           case '*':
2485                                 L.d *= R.d;
2486                                 break;
2487                           case '/':
2488                                 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2489                                 L.d /= R.d;
2490                                 break;
2491                           case '&':
2492 #ifdef CONFIG_FEATURE_AWK_MATH
2493                                 L.d = pow(L.d, R.d);
2494 #else
2495                                 runtime_error(EMSG_NO_MATH);
2496 #endif
2497                                 break;
2498                           case '%':
2499                                 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2500                                 L.d -= (int)(L.d / R.d) * R.d;
2501                                 break;
2502                         }
2503                         res = setvar_i(((opinfo&OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2504                         break;
2505
2506                   case XC( OC_COMPARE ):
2507                         if (is_numeric(L.v) && is_numeric(R.v)) {
2508                                 L.d = getvar_i(L.v) - getvar_i(R.v);
2509                         } else {
2510                                 L.s = getvar_s(L.v);
2511                                 R.s = getvar_s(R.v);
2512                                 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2513                         }
2514                         switch (opn & 0xfe) {
2515                           case 0:
2516                                 R.i = (L.d > 0);
2517                                 break;
2518                           case 2:
2519                                 R.i = (L.d >= 0);
2520                                 break;
2521                           case 4:
2522                                 R.i = (L.d == 0);
2523                                 break;
2524                         }
2525                         setvar_i(res, (opn & 0x1 ? R.i : !R.i) ? 1 : 0);
2526                         break;
2527
2528                   default:
2529                         runtime_error(EMSG_POSSIBLE_ERROR);
2530                 }
2531                 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2532                         op = op->a.n;
2533                 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2534                         break;
2535                 if (nextrec)
2536                         break;
2537         }
2538         nvfree(v1);
2539         return res;
2540 }
2541
2542
2543 /* -------- main & co. -------- */
2544
2545 static int awk_exit(int r)
2546 {
2547         unsigned int i;
2548         hash_item *hi;
2549         static var tv;
2550
2551         if (! exiting) {
2552                 exiting = TRUE;
2553                 nextrec = FALSE;
2554                 evaluate(endseq.first, &tv);
2555         }
2556
2557         /* waiting for children */
2558         for (i=0; i<fdhash->csize; i++) {
2559                 hi = fdhash->items[i];
2560                 while(hi) {
2561                         if (hi->data.rs.F && hi->data.rs.is_pipe)
2562                                 pclose(hi->data.rs.F);
2563                         hi = hi->next;
2564                 }
2565         }
2566
2567         exit(r);
2568 }
2569
2570 /* if expr looks like "var=value", perform assignment and return 1,
2571  * otherwise return 0 */
2572 static int is_assignment(char *expr)
2573 {
2574         char *exprc, *s, *s0, *s1;
2575
2576         exprc = bb_xstrdup(expr);
2577         if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2578                 free(exprc);
2579                 return FALSE;
2580         }
2581
2582         *(s++) = '\0';
2583         s0 = s1 = s;
2584         while (*s)
2585                 *(s1++) = nextchar(&s);
2586
2587         *s1 = '\0';
2588         setvar_u(newvar(exprc), s0);
2589         free(exprc);
2590         return TRUE;
2591 }
2592
2593 /* switch to next input file */
2594 static rstream *next_input_file(void)
2595 {
2596         static rstream rsm;
2597         FILE *F = NULL;
2598         char *fname, *ind;
2599         static int files_happen = FALSE;
2600
2601         if (rsm.F) fclose(rsm.F);
2602         rsm.F = NULL;
2603         rsm.pos = rsm.adv = 0;
2604
2605         do {
2606                 if (getvar_i(V[ARGIND])+1 >= getvar_i(V[ARGC])) {
2607                         if (files_happen)
2608                                 return NULL;
2609                         fname = "-";
2610                         F = stdin;
2611                 } else {
2612                         ind = getvar_s(incvar(V[ARGIND]));
2613                         fname = getvar_s(findvar(iamarray(V[ARGV]), ind));
2614                         if (fname && *fname && !is_assignment(fname))
2615                                 F = afopen(fname, "r");
2616                 }
2617         } while (!F);
2618
2619         files_happen = TRUE;
2620         setvar_s(V[FILENAME], fname);
2621         rsm.F = F;
2622         return &rsm;
2623 }
2624
2625 extern int awk_main(int argc, char **argv)
2626 {
2627         char *s, *s1;
2628         int i, j, c;
2629         var *v;
2630         static var tv;
2631         char **envp;
2632         static int from_file = FALSE;
2633         rstream *rsm;
2634         FILE *F, *stdfiles[3];
2635         static char * stdnames = "/dev/stdin\0/dev/stdout\0/dev/stderr";
2636
2637         /* allocate global buffer */
2638         buf = xmalloc(MAXVARFMT+1);
2639
2640         vhash = hash_init();
2641         ahash = hash_init();
2642         fdhash = hash_init();
2643         fnhash = hash_init();
2644
2645         /* initialize variables */
2646         for (i=0;  *vNames;  i++) {
2647                 V[i] = v = newvar(nextword(&vNames));
2648                 if (*vValues != '\377')
2649                         setvar_s(v, nextword(&vValues));
2650                 else
2651                         setvar_i(v, 0);
2652
2653                 if (*vNames == '*') {
2654                         v->type |= VF_SPECIAL;
2655                         vNames++;
2656                 }
2657         }
2658
2659         handle_special(V[FS]);
2660         handle_special(V[RS]);
2661
2662         stdfiles[0] = stdin;
2663         stdfiles[1] = stdout;
2664         stdfiles[2] = stderr;
2665         for (i=0; i<3; i++) {
2666                 rsm = newfile(nextword(&stdnames));
2667                 rsm->F = stdfiles[i];
2668         }
2669
2670         for (envp=environ; *envp; envp++) {
2671                 s = bb_xstrdup(*envp);
2672                 s1 = strchr(s, '=');
2673                 if (!s1) {
2674                         goto keep_going;
2675                 }
2676                 *(s1++) = '\0';
2677                 setvar_u(findvar(iamarray(V[ENVIRON]), s), s1);
2678 keep_going:
2679                 free(s);
2680         }
2681
2682         while((c = getopt(argc, argv, "F:v:f:W:")) != EOF) {
2683                 switch (c) {
2684                         case 'F':
2685                                 setvar_s(V[FS], optarg);
2686                                 break;
2687                         case 'v':
2688                                 if (! is_assignment(optarg))
2689                                         bb_show_usage();
2690                                 break;
2691                         case 'f':
2692                                 from_file = TRUE;
2693                                 F = afopen(programname = optarg, "r");
2694                                 s = NULL;
2695                                 /* one byte is reserved for some trick in next_token */
2696                                 for (i=j=1; j>0; i+=j) {
2697                                         s = (char *)xrealloc(s, i+4096);
2698                                         j = fread(s+i, 1, 4094, F);
2699                                 }
2700                                 s[i] = '\0';
2701                                 fclose(F);
2702                                 parse_program(s+1);
2703                                 free(s);
2704                                 break;
2705                         case 'W':
2706                                 bb_error_msg("Warning: unrecognized option '-W %s' ignored\n", optarg);
2707                                 break;
2708
2709                         default:
2710                                 bb_show_usage();
2711                 }
2712         }
2713
2714         if (!from_file) {
2715                 if (argc == optind)
2716                         bb_show_usage();
2717                 programname="cmd. line";
2718                 parse_program(argv[optind++]);
2719
2720         }
2721
2722         /* fill in ARGV array */
2723         setvar_i(V[ARGC], argc - optind + 1);
2724         setari_u(V[ARGV], 0, "awk");
2725         for(i=optind; i < argc; i++)
2726                 setari_u(V[ARGV], i+1-optind, argv[i]);
2727
2728         evaluate(beginseq.first, &tv);
2729         if (! mainseq.first && ! endseq.first)
2730                 awk_exit(EXIT_SUCCESS);
2731
2732         /* input file could already be opened in BEGIN block */
2733         if (! iF) iF = next_input_file();
2734
2735         /* passing through input files */
2736         while (iF) {
2737
2738                 nextfile = FALSE;
2739                 setvar_i(V[FNR], 0);
2740
2741                 while ((c = awk_getline(iF, V[F0])) > 0) {
2742
2743                         nextrec = FALSE;
2744                         incvar(V[NR]);
2745                         incvar(V[FNR]);
2746                         evaluate(mainseq.first, &tv);
2747
2748                         if (nextfile)
2749                                 break;
2750                 }
2751
2752                 if (c < 0)
2753                         runtime_error(strerror(errno));
2754
2755                 iF = next_input_file();
2756
2757         }
2758
2759         awk_exit(EXIT_SUCCESS);
2760
2761         return 0;
2762 }
2763