cb54d2597e3ca46916f69d5d99b585d06323b9d6
[oweals/busybox.git] / editors / awk.c
1 /* vi: set sw=4 ts=4: */
2 /*
3  * awk implementation for busybox
4  *
5  * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20  *
21  */
22
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <unistd.h>
26 #include <errno.h>
27 #include <string.h>
28 #include <time.h>
29 #include <math.h>
30 #include <ctype.h>
31 #include <getopt.h>
32
33 #include "xregex.h"
34 #include "busybox.h"
35
36
37 #define MAXVARFMT       240
38 #define MINNVBLOCK      64
39
40 /* variable flags */
41 #define VF_NUMBER       0x0001  /* 1 = primary type is number */
42 #define VF_ARRAY        0x0002  /* 1 = it's an array */
43
44 #define VF_CACHED       0x0100  /* 1 = num/str value has cached str/num eq */
45 #define VF_USER         0x0200  /* 1 = user input (may be numeric string) */
46 #define VF_SPECIAL      0x0400  /* 1 = requires extra handling when changed */
47 #define VF_WALK         0x0800  /* 1 = variable has alloc'd x.walker list */
48 #define VF_FSTR         0x1000  /* 1 = string points to fstring buffer */
49 #define VF_CHILD        0x2000  /* 1 = function arg; x.parent points to source */
50 #define VF_DIRTY        0x4000  /* 1 = variable was set explicitly */
51
52 /* these flags are static, don't change them when value is changed */
53 #define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
54
55 /* Variable */
56 typedef struct var_s {
57         unsigned short type;            /* flags */
58         double number;
59         char *string;
60         union {
61                 int aidx;                               /* func arg index (on compilation stage) */
62                 struct xhash_s *array;  /* array ptr */
63                 struct var_s *parent;   /* for func args, ptr to actual parameter */
64                 char **walker;                  /* list of array elements (for..in) */
65         } x;
66 } var;
67
68 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
69 typedef struct chain_s {
70         struct node_s *first;
71         struct node_s *last;
72         char *programname;
73 } chain;
74
75 /* Function */
76 typedef struct func_s {
77         unsigned short nargs;
78         struct chain_s body;
79 } func;
80
81 /* I/O stream */
82 typedef struct rstream_s {
83         FILE *F;
84         char *buffer;
85         int adv;
86         int size;
87         int pos;
88         unsigned short is_pipe;
89 } rstream;
90
91 typedef struct hash_item_s {
92         union {
93                 struct var_s v;                 /* variable/array hash */
94                 struct rstream_s rs;    /* redirect streams hash */
95                 struct func_s f;                /* functions hash */
96         } data;
97         struct hash_item_s *next;       /* next in chain */
98         char name[1];                           /* really it's longer */
99 } hash_item;
100
101 typedef struct xhash_s {
102         unsigned int nel;                                       /* num of elements */
103         unsigned int csize;                                     /* current hash size */
104         unsigned int nprime;                            /* next hash size in PRIMES[] */
105         unsigned int glen;                                      /* summary length of item names */
106         struct hash_item_s **items;
107 } xhash;
108
109 /* Tree node */
110 typedef struct node_s {
111         uint32_t info;
112         unsigned short lineno;
113         union {
114                 struct node_s *n;
115                 var *v;
116                 int i;
117                 char *s;
118                 regex_t *re;
119         } l;
120         union {
121                 struct node_s *n;
122                 regex_t *ire;
123                 func *f;
124                 int argno;
125         } r;
126         union {
127                 struct node_s *n;
128         } a;
129 } node;
130
131 /* Block of temporary variables */
132 typedef struct nvblock_s {
133         int size;
134         var *pos;
135         struct nvblock_s *prev;
136         struct nvblock_s *next;
137         var nv[0];
138 } nvblock;
139
140 typedef struct tsplitter_s {
141         node n;
142         regex_t re[2];
143 } tsplitter;
144
145 /* simple token classes */
146 /* Order and hex values are very important!!!  See next_token() */
147 #define TC_SEQSTART      1                              /* ( */
148 #define TC_SEQTERM      (1 << 1)                /* ) */
149 #define TC_REGEXP       (1 << 2)                /* /.../ */
150 #define TC_OUTRDR       (1 << 3)                /* | > >> */
151 #define TC_UOPPOST      (1 << 4)                /* unary postfix operator */
152 #define TC_UOPPRE1      (1 << 5)                /* unary prefix operator */
153 #define TC_BINOPX       (1 << 6)                /* two-opnd operator */
154 #define TC_IN           (1 << 7)
155 #define TC_COMMA        (1 << 8)
156 #define TC_PIPE         (1 << 9)                /* input redirection pipe */
157 #define TC_UOPPRE2      (1 << 10)               /* unary prefix operator */
158 #define TC_ARRTERM      (1 << 11)               /* ] */
159 #define TC_GRPSTART     (1 << 12)               /* { */
160 #define TC_GRPTERM      (1 << 13)               /* } */
161 #define TC_SEMICOL      (1 << 14)
162 #define TC_NEWLINE      (1 << 15)
163 #define TC_STATX        (1 << 16)               /* ctl statement (for, next...) */
164 #define TC_WHILE        (1 << 17)
165 #define TC_ELSE         (1 << 18)
166 #define TC_BUILTIN      (1 << 19)
167 #define TC_GETLINE      (1 << 20)
168 #define TC_FUNCDECL     (1 << 21)               /* `function' `func' */
169 #define TC_BEGIN        (1 << 22)
170 #define TC_END          (1 << 23)
171 #define TC_EOF          (1 << 24)
172 #define TC_VARIABLE     (1 << 25)
173 #define TC_ARRAY        (1 << 26)
174 #define TC_FUNCTION     (1 << 27)
175 #define TC_STRING       (1 << 28)
176 #define TC_NUMBER       (1 << 29)
177
178 #define TC_UOPPRE       (TC_UOPPRE1 | TC_UOPPRE2)
179
180 /* combined token classes */
181 #define TC_BINOP        (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
182 #define TC_UNARYOP      (TC_UOPPRE | TC_UOPPOST)
183 #define TC_OPERAND      (TC_VARIABLE | TC_ARRAY | TC_FUNCTION | \
184         TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
185
186 #define TC_STATEMNT     (TC_STATX | TC_WHILE)
187 #define TC_OPTERM       (TC_SEMICOL | TC_NEWLINE)
188
189 /* word tokens, cannot mean something else if not expected */
190 #define TC_WORD         (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN | \
191         TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
192
193 /* discard newlines after these */
194 #define TC_NOTERM       (TC_COMMA | TC_GRPSTART | TC_GRPTERM | \
195         TC_BINOP | TC_OPTERM)
196
197 /* what can expression begin with */
198 #define TC_OPSEQ        (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
199 /* what can group begin with */
200 #define TC_GRPSEQ       (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
201
202 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
203 /* operator is inserted between them */
204 #define TC_CONCAT1      (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM | \
205         TC_STRING | TC_NUMBER | TC_UOPPOST)
206 #define TC_CONCAT2      (TC_OPERAND | TC_UOPPRE)
207
208 #define OF_RES1         0x010000
209 #define OF_RES2         0x020000
210 #define OF_STR1         0x040000
211 #define OF_STR2         0x080000
212 #define OF_NUM1         0x100000
213 #define OF_CHECKED      0x200000
214
215 /* combined operator flags */
216 #define xx      0
217 #define xV      OF_RES2
218 #define xS      (OF_RES2 | OF_STR2)
219 #define Vx      OF_RES1
220 #define VV      (OF_RES1 | OF_RES2)
221 #define Nx      (OF_RES1 | OF_NUM1)
222 #define NV      (OF_RES1 | OF_NUM1 | OF_RES2)
223 #define Sx      (OF_RES1 | OF_STR1)
224 #define SV      (OF_RES1 | OF_STR1 | OF_RES2)
225 #define SS      (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
226
227 #define OPCLSMASK       0xFF00
228 #define OPNMASK         0x007F
229
230 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
231  * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
232  * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
233  */
234 #define P(x)    (x << 24)
235 #define PRIMASK         0x7F000000
236 #define PRIMASK2        0x7E000000
237
238 /* Operation classes */
239
240 #define SHIFT_TIL_THIS  0x0600
241 #define RECUR_FROM_THIS 0x1000
242
243 enum {
244         OC_DELETE=0x0100,       OC_EXEC=0x0200,         OC_NEWSOURCE=0x0300,
245         OC_PRINT=0x0400,        OC_PRINTF=0x0500,       OC_WALKINIT=0x0600,
246
247         OC_BR=0x0700,           OC_BREAK=0x0800,        OC_CONTINUE=0x0900,
248         OC_EXIT=0x0a00,         OC_NEXT=0x0b00,         OC_NEXTFILE=0x0c00,
249         OC_TEST=0x0d00,         OC_WALKNEXT=0x0e00,
250
251         OC_BINARY=0x1000,       OC_BUILTIN=0x1100,      OC_COLON=0x1200,
252         OC_COMMA=0x1300,        OC_COMPARE=0x1400,      OC_CONCAT=0x1500,
253         OC_FBLTIN=0x1600,       OC_FIELD=0x1700,        OC_FNARG=0x1800,
254         OC_FUNC=0x1900,         OC_GETLINE=0x1a00,      OC_IN=0x1b00,
255         OC_LAND=0x1c00,         OC_LOR=0x1d00,          OC_MATCH=0x1e00,
256         OC_MOVE=0x1f00,         OC_PGETLINE=0x2000,     OC_REGEXP=0x2100,
257         OC_REPLACE=0x2200,      OC_RETURN=0x2300,       OC_SPRINTF=0x2400,
258         OC_TERNARY=0x2500,      OC_UNARY=0x2600,        OC_VAR=0x2700,
259         OC_DONE=0x2800,
260
261         ST_IF=0x3000,           ST_DO=0x3100,           ST_FOR=0x3200,
262         ST_WHILE=0x3300
263 };
264
265 /* simple builtins */
266 enum {
267         F_in=0, F_rn,   F_co,   F_ex,   F_lg,   F_si,   F_sq,   F_sr,
268         F_ti,   F_le,   F_sy,   F_ff,   F_cl
269 };
270
271 /* builtins */
272 enum {
273         B_a2=0, B_ix,   B_ma,   B_sp,   B_ss,   B_ti,   B_lo,   B_up,
274         B_ge,   B_gs,   B_su
275 };
276
277 /* tokens and their corresponding info values */
278
279 #define NTC             "\377"          /* switch to next token class (tc<<1) */
280 #define NTCC    '\377'
281
282 #define OC_B    OC_BUILTIN
283
284 static char * const tokenlist =
285         "\1("           NTC
286         "\1)"           NTC
287         "\1/"           NTC                                                                     /* REGEXP */
288         "\2>>"          "\1>"           "\1|"           NTC                     /* OUTRDR */
289         "\2++"          "\2--"          NTC                                             /* UOPPOST */
290         "\2++"          "\2--"          "\1$"           NTC                     /* UOPPRE1 */
291         "\2=="          "\1="           "\2+="          "\2-="          /* BINOPX */
292         "\2*="          "\2/="          "\2%="          "\2^="
293         "\1+"           "\1-"           "\3**="         "\2**"
294         "\1/"           "\1%"           "\1^"           "\1*"
295         "\2!="          "\2>="          "\2<="          "\1>"
296         "\1<"           "\2!~"          "\1~"           "\2&&"
297         "\2||"          "\1?"           "\1:"           NTC
298         "\2in"          NTC
299         "\1,"           NTC
300         "\1|"           NTC
301         "\1+"           "\1-"           "\1!"           NTC                     /* UOPPRE2 */
302         "\1]"           NTC
303         "\1{"           NTC
304         "\1}"           NTC
305         "\1;"           NTC
306         "\1\n"          NTC
307         "\2if"          "\2do"          "\3for"         "\5break"       /* STATX */
308         "\10continue"                   "\6delete"      "\5print"
309         "\6printf"      "\4next"        "\10nextfile"
310         "\6return"      "\4exit"        NTC
311         "\5while"       NTC
312         "\4else"        NTC
313
314         "\5close"       "\6system"      "\6fflush"      "\5atan2"       /* BUILTIN */
315         "\3cos"         "\3exp"         "\3int"         "\3log"
316         "\4rand"        "\3sin"         "\4sqrt"        "\5srand"
317         "\6gensub"      "\4gsub"        "\5index"       "\6length"
318         "\5match"       "\5split"       "\7sprintf"     "\3sub"
319         "\6substr"      "\7systime"     "\10strftime"
320         "\7tolower"     "\7toupper"     NTC
321         "\7getline"     NTC
322         "\4func"        "\10function"   NTC
323         "\5BEGIN"       NTC
324         "\3END"         "\0"
325         ;
326
327 static const uint32_t tokeninfo[] = {
328
329         0,
330         0,
331         OC_REGEXP,
332         xS|'a',         xS|'w',         xS|'|',
333         OC_UNARY|xV|P(9)|'p',           OC_UNARY|xV|P(9)|'m',
334         OC_UNARY|xV|P(9)|'P',           OC_UNARY|xV|P(9)|'M',
335                 OC_FIELD|xV|P(5),
336         OC_COMPARE|VV|P(39)|5,          OC_MOVE|VV|P(74),
337                 OC_REPLACE|NV|P(74)|'+',        OC_REPLACE|NV|P(74)|'-',
338         OC_REPLACE|NV|P(74)|'*',        OC_REPLACE|NV|P(74)|'/',
339                 OC_REPLACE|NV|P(74)|'%',        OC_REPLACE|NV|P(74)|'&',
340         OC_BINARY|NV|P(29)|'+',         OC_BINARY|NV|P(29)|'-',
341                 OC_REPLACE|NV|P(74)|'&',        OC_BINARY|NV|P(15)|'&',
342         OC_BINARY|NV|P(25)|'/',         OC_BINARY|NV|P(25)|'%',
343                 OC_BINARY|NV|P(15)|'&',         OC_BINARY|NV|P(25)|'*',
344         OC_COMPARE|VV|P(39)|4,          OC_COMPARE|VV|P(39)|3,
345                 OC_COMPARE|VV|P(39)|0,          OC_COMPARE|VV|P(39)|1,
346         OC_COMPARE|VV|P(39)|2,          OC_MATCH|Sx|P(45)|'!',
347                 OC_MATCH|Sx|P(45)|'~',          OC_LAND|Vx|P(55),
348         OC_LOR|Vx|P(59),                        OC_TERNARY|Vx|P(64)|'?',
349                 OC_COLON|xx|P(67)|':',
350         OC_IN|SV|P(49),
351         OC_COMMA|SS|P(80),
352         OC_PGETLINE|SV|P(37),
353         OC_UNARY|xV|P(19)|'+',          OC_UNARY|xV|P(19)|'-',
354                 OC_UNARY|xV|P(19)|'!',
355         0,
356         0,
357         0,
358         0,
359         0,
360         ST_IF,                  ST_DO,                  ST_FOR,                 OC_BREAK,
361         OC_CONTINUE,                                    OC_DELETE|Vx,   OC_PRINT,
362         OC_PRINTF,              OC_NEXT,                OC_NEXTFILE,
363         OC_RETURN|Vx,   OC_EXIT|Nx,
364         ST_WHILE,
365         0,
366
367         OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
368         OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
369         OC_FBLTIN|F_rn,    OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
370         OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
371         OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF,        OC_B|B_su|P(0xb6),
372         OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti,    OC_B|B_ti|P(0x0b),
373         OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
374         OC_GETLINE|SV|P(0),
375         0,      0,
376         0,
377         0
378 };
379
380 /* internal variable names and their initial values       */
381 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
382 enum {
383         CONVFMT=0,      OFMT,           FS,                     OFS,
384         ORS,            RS,                     RT,                     FILENAME,
385         SUBSEP,         ARGIND,         ARGC,           ARGV,
386         ERRNO,          FNR,
387         NR,                     NF,                     IGNORECASE,
388         ENVIRON,        F0,                     _intvarcount_
389 };
390
391 static char * vNames =
392         "CONVFMT\0"     "OFMT\0"        "FS\0*"         "OFS\0"
393         "ORS\0"         "RS\0*"         "RT\0"          "FILENAME\0"
394         "SUBSEP\0"      "ARGIND\0"      "ARGC\0"        "ARGV\0"
395         "ERRNO\0"       "FNR\0"
396         "NR\0"          "NF\0*"         "IGNORECASE\0*"
397         "ENVIRON\0"     "$\0*"          "\0";
398
399 static char * vValues =
400         "%.6g\0"        "%.6g\0"        " \0"           " \0"
401         "\n\0"          "\n\0"          "\0"            "\0"
402         "\034\0"
403         "\377";
404
405 /* hash size may grow to these values */
406 #define FIRST_PRIME 61;
407 static const unsigned int PRIMES[] = { 251, 1021, 4093, 16381, 65521 };
408 static const unsigned int NPRIMES = sizeof(PRIMES) / sizeof(unsigned int);
409
410 /* globals */
411
412 extern char **environ;
413
414 static var * V[_intvarcount_];
415 static chain beginseq, mainseq, endseq, *seq;
416 static int nextrec, nextfile;
417 static node *break_ptr, *continue_ptr;
418 static rstream *iF;
419 static xhash *vhash, *ahash, *fdhash, *fnhash;
420 static char *programname;
421 static short lineno;
422 static int is_f0_split;
423 static int nfields;
424 static var *Fields;
425 static tsplitter fsplitter, rsplitter;
426 static nvblock *cb;
427 static char *pos;
428 static char *buf;
429 static int icase;
430 static int exiting;
431
432 static struct {
433         uint32_t tclass;
434         uint32_t info;
435         char *string;
436         double number;
437         short lineno;
438         int rollback;
439 } t;
440
441 /* function prototypes */
442 static void handle_special(var *);
443 static node *parse_expr(uint32_t);
444 static void chain_group(void);
445 static var *evaluate(node *, var *);
446 static rstream *next_input_file(void);
447 static int fmt_num(char *, int, const char *, double, int);
448 static int awk_exit(int) attribute_noreturn;
449
450 /* ---- error handling ---- */
451
452 static const char EMSG_INTERNAL_ERROR[] = "Internal error";
453 static const char EMSG_UNEXP_EOS[] = "Unexpected end of string";
454 static const char EMSG_UNEXP_TOKEN[] = "Unexpected token";
455 static const char EMSG_DIV_BY_ZERO[] = "Division by zero";
456 static const char EMSG_INV_FMT[] = "Invalid format specifier";
457 static const char EMSG_TOO_FEW_ARGS[] = "Too few arguments for builtin";
458 static const char EMSG_NOT_ARRAY[] = "Not an array";
459 static const char EMSG_POSSIBLE_ERROR[] = "Possible syntax error";
460 static const char EMSG_UNDEF_FUNC[] = "Call to undefined function";
461 #ifndef CONFIG_FEATURE_AWK_MATH
462 static const char EMSG_NO_MATH[] = "Math support is not compiled in";
463 #endif
464
465 static void syntax_error(const char * const message) attribute_noreturn;
466 static void syntax_error(const char * const message)
467 {
468         bb_error_msg_and_die("%s:%i: %s", programname, lineno, message);
469 }
470
471 #define runtime_error(x) syntax_error(x)
472
473
474 /* ---- hash stuff ---- */
475
476 static unsigned int hashidx(const char *name)
477 {
478         register unsigned int idx=0;
479
480         while (*name)  idx = *name++ + (idx << 6) - idx;
481         return idx;
482 }
483
484 /* create new hash */
485 static xhash *hash_init(void)
486 {
487         xhash *newhash;
488
489         newhash = (xhash *)xcalloc(1, sizeof(xhash));
490         newhash->csize = FIRST_PRIME;
491         newhash->items = (hash_item **)xcalloc(newhash->csize, sizeof(hash_item *));
492
493         return newhash;
494 }
495
496 /* find item in hash, return ptr to data, NULL if not found */
497 static void *hash_search(xhash *hash, const char *name)
498 {
499         hash_item *hi;
500
501         hi = hash->items [ hashidx(name) % hash->csize ];
502         while (hi) {
503                 if (strcmp(hi->name, name) == 0)
504                         return &(hi->data);
505                 hi = hi->next;
506         }
507         return NULL;
508 }
509
510 /* grow hash if it becomes too big */
511 static void hash_rebuild(xhash *hash)
512 {
513         unsigned int newsize, i, idx;
514         hash_item **newitems, *hi, *thi;
515
516         if (hash->nprime == NPRIMES)
517                 return;
518
519         newsize = PRIMES[hash->nprime++];
520         newitems = (hash_item **)xcalloc(newsize, sizeof(hash_item *));
521
522         for (i=0; i<hash->csize; i++) {
523                 hi = hash->items[i];
524                 while (hi) {
525                         thi = hi;
526                         hi = thi->next;
527                         idx = hashidx(thi->name) % newsize;
528                         thi->next = newitems[idx];
529                         newitems[idx] = thi;
530                 }
531         }
532
533         free(hash->items);
534         hash->csize = newsize;
535         hash->items = newitems;
536 }
537
538 /* find item in hash, add it if necessary. Return ptr to data */
539 static void *hash_find(xhash *hash, const char *name)
540 {
541         hash_item *hi;
542         unsigned int idx;
543         int l;
544
545         hi = hash_search(hash, name);
546         if (! hi) {
547                 if (++hash->nel / hash->csize > 10)
548                         hash_rebuild(hash);
549
550                 l = bb_strlen(name) + 1;
551                 hi = xcalloc(sizeof(hash_item) + l, 1);
552                 memcpy(hi->name, name, l);
553
554                 idx = hashidx(name) % hash->csize;
555                 hi->next = hash->items[idx];
556                 hash->items[idx] = hi;
557                 hash->glen += l;
558         }
559         return &(hi->data);
560 }
561
562 #define findvar(hash, name) (var *) hash_find ( (hash) , (name) )
563 #define newvar(name) (var *) hash_find ( vhash , (name) )
564 #define newfile(name) (rstream *) hash_find ( fdhash , (name) )
565 #define newfunc(name) (func *) hash_find ( fnhash , (name) )
566
567 static void hash_remove(xhash *hash, const char *name)
568 {
569         hash_item *hi, **phi;
570
571         phi = &(hash->items[ hashidx(name) % hash->csize ]);
572         while (*phi) {
573                 hi = *phi;
574                 if (strcmp(hi->name, name) == 0) {
575                         hash->glen -= (bb_strlen(name) + 1);
576                         hash->nel--;
577                         *phi = hi->next;
578                         free(hi);
579                         break;
580                 }
581                 phi = &(hi->next);
582         }
583 }
584
585 /* ------ some useful functions ------ */
586
587 static void skip_spaces(char **s)
588 {
589         register char *p = *s;
590
591         while(*p == ' ' || *p == '\t' ||
592                         (*p == '\\' && *(p+1) == '\n' && (++p, ++t.lineno))) {
593                 p++;
594         }
595         *s = p;
596 }
597
598 static char *nextword(char **s)
599 {
600         register char *p = *s;
601
602         while (*(*s)++) ;
603
604         return p;
605 }
606
607 static char nextchar(char **s)
608 {
609         register char c, *pps;
610
611         c = *((*s)++);
612         pps = *s;
613         if (c == '\\') c = bb_process_escape_sequence((const char**)s);
614         if (c == '\\' && *s == pps) c = *((*s)++);
615         return c;
616 }
617
618 static inline int isalnum_(int c)
619 {
620         return (isalnum(c) || c == '_');
621 }
622
623 static FILE *afopen(const char *path, const char *mode)
624 {
625         return (*path == '-' && *(path+1) == '\0') ? stdin : bb_xfopen(path, mode);
626 }
627
628 /* -------- working with variables (set/get/copy/etc) -------- */
629
630 static xhash *iamarray(var *v)
631 {
632         var *a = v;
633
634         while (a->type & VF_CHILD)
635                 a = a->x.parent;
636
637         if (! (a->type & VF_ARRAY)) {
638                 a->type |= VF_ARRAY;
639                 a->x.array = hash_init();
640         }
641         return a->x.array;
642 }
643
644 static void clear_array(xhash *array)
645 {
646         unsigned int i;
647         hash_item *hi, *thi;
648
649         for (i=0; i<array->csize; i++) {
650                 hi = array->items[i];
651                 while (hi) {
652                         thi = hi;
653                         hi = hi->next;
654                         free(thi->data.v.string);
655                         free(thi);
656                 }
657                 array->items[i] = NULL;
658         }
659         array->glen = array->nel = 0;
660 }
661
662 /* clear a variable */
663 static var *clrvar(var *v)
664 {
665         if (!(v->type & VF_FSTR))
666                 free(v->string);
667
668         v->type &= VF_DONTTOUCH;
669         v->type |= VF_DIRTY;
670         v->string = NULL;
671         return v;
672 }
673
674 /* assign string value to variable */
675 static var *setvar_p(var *v, char *value)
676 {
677         clrvar(v);
678         v->string = value;
679         handle_special(v);
680
681         return v;
682 }
683
684 /* same as setvar_p but make a copy of string */
685 static var *setvar_s(var *v, const char *value)
686 {
687         return setvar_p(v, (value && *value) ? bb_xstrdup(value) : NULL);
688 }
689
690 /* same as setvar_s but set USER flag */
691 static var *setvar_u(var *v, const char *value)
692 {
693         setvar_s(v, value);
694         v->type |= VF_USER;
695         return v;
696 }
697
698 /* set array element to user string */
699 static void setari_u(var *a, int idx, const char *s)
700 {
701         register var *v;
702         static char sidx[12];
703
704         sprintf(sidx, "%d", idx);
705         v = findvar(iamarray(a), sidx);
706         setvar_u(v, s);
707 }
708
709 /* assign numeric value to variable */
710 static var *setvar_i(var *v, double value)
711 {
712         clrvar(v);
713         v->type |= VF_NUMBER;
714         v->number = value;
715         handle_special(v);
716         return v;
717 }
718
719 static char *getvar_s(var *v)
720 {
721         /* if v is numeric and has no cached string, convert it to string */
722         if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
723                 fmt_num(buf, MAXVARFMT, getvar_s(V[CONVFMT]), v->number, TRUE);
724                 v->string = bb_xstrdup(buf);
725                 v->type |= VF_CACHED;
726         }
727         return (v->string == NULL) ? "" : v->string;
728 }
729
730 static double getvar_i(var *v)
731 {
732         char *s;
733
734         if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
735                 v->number = 0;
736                 s = v->string;
737                 if (s && *s) {
738                         v->number = strtod(s, &s);
739                         if (v->type & VF_USER) {
740                                 skip_spaces(&s);
741                                 if (*s != '\0')
742                                         v->type &= ~VF_USER;
743                         }
744                 } else {
745                         v->type &= ~VF_USER;
746                 }
747                 v->type |= VF_CACHED;
748         }
749         return v->number;
750 }
751
752 static var *copyvar(var *dest, const var *src)
753 {
754         if (dest != src) {
755                 clrvar(dest);
756                 dest->type |= (src->type & ~VF_DONTTOUCH);
757                 dest->number = src->number;
758                 if (src->string)
759                         dest->string = bb_xstrdup(src->string);
760         }
761         handle_special(dest);
762         return dest;
763 }
764
765 static var *incvar(var *v)
766 {
767         return setvar_i(v, getvar_i(v)+1.);
768 }
769
770 /* return true if v is number or numeric string */
771 static int is_numeric(var *v)
772 {
773         getvar_i(v);
774         return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
775 }
776
777 /* return 1 when value of v corresponds to true, 0 otherwise */
778 static int istrue(var *v)
779 {
780         if (is_numeric(v))
781                 return (v->number == 0) ? 0 : 1;
782         else
783                 return (v->string && *(v->string)) ? 1 : 0;
784 }
785
786 /* temporary variables allocator. Last allocated should be first freed */
787 static var *nvalloc(int n)
788 {
789         nvblock *pb = NULL;
790         var *v, *r;
791         int size;
792
793         while (cb) {
794                 pb = cb;
795                 if ((cb->pos - cb->nv) + n <= cb->size) break;
796                 cb = cb->next;
797         }
798
799         if (! cb) {
800                 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
801                 cb = (nvblock *)xmalloc(sizeof(nvblock) + size * sizeof(var));
802                 cb->size = size;
803                 cb->pos = cb->nv;
804                 cb->prev = pb;
805                 cb->next = NULL;
806                 if (pb) pb->next = cb;
807         }
808
809         v = r = cb->pos;
810         cb->pos += n;
811
812         while (v < cb->pos) {
813                 v->type = 0;
814                 v->string = NULL;
815                 v++;
816         }
817
818         return r;
819 }
820
821 static void nvfree(var *v)
822 {
823         var *p;
824
825         if (v < cb->nv || v >= cb->pos)
826                 runtime_error(EMSG_INTERNAL_ERROR);
827
828         for (p=v; p<cb->pos; p++) {
829                 if ((p->type & (VF_ARRAY|VF_CHILD)) == VF_ARRAY) {
830                         clear_array(iamarray(p));
831                         free(p->x.array->items);
832                         free(p->x.array);
833                 }
834                 if (p->type & VF_WALK)
835                         free(p->x.walker);
836
837                 clrvar(p);
838         }
839
840         cb->pos = v;
841         while (cb->prev && cb->pos == cb->nv) {
842                 cb = cb->prev;
843         }
844 }
845
846 /* ------- awk program text parsing ------- */
847
848 /* Parse next token pointed by global pos, place results into global t.
849  * If token isn't expected, give away. Return token class
850  */
851 static uint32_t next_token(uint32_t expected)
852 {
853         char *p, *pp, *s;
854         char *tl;
855         uint32_t tc;
856         const uint32_t *ti;
857         int l;
858         static int concat_inserted;
859         static uint32_t save_tclass, save_info;
860         static uint32_t ltclass = TC_OPTERM;
861
862         if (t.rollback) {
863
864                 t.rollback = FALSE;
865
866         } else if (concat_inserted) {
867
868                 concat_inserted = FALSE;
869                 t.tclass = save_tclass;
870                 t.info = save_info;
871
872         } else {
873
874                 p = pos;
875
876         readnext:
877                 skip_spaces(&p);
878                 lineno = t.lineno;
879                 if (*p == '#')
880                         while (*p != '\n' && *p != '\0') p++;
881
882                 if (*p == '\n')
883                         t.lineno++;
884
885                 if (*p == '\0') {
886                         tc = TC_EOF;
887
888                 } else if (*p == '\"') {
889                         /* it's a string */
890                         t.string = s = ++p;
891                         while (*p != '\"') {
892                                 if (*p == '\0' || *p == '\n')
893                                         syntax_error(EMSG_UNEXP_EOS);
894                                 *(s++) = nextchar(&p);
895                         }
896                         p++;
897                         *s = '\0';
898                         tc = TC_STRING;
899
900                 } else if ((expected & TC_REGEXP) && *p == '/') {
901                         /* it's regexp */
902                         t.string = s = ++p;
903                         while (*p != '/') {
904                                 if (*p == '\0' || *p == '\n')
905                                         syntax_error(EMSG_UNEXP_EOS);
906                                 if ((*s++ = *p++) == '\\') {
907                                         pp = p;
908                                         *(s-1) = bb_process_escape_sequence((const char **)&p);
909                                         if (*pp == '\\') *s++ = '\\';
910                                         if (p == pp) *s++ = *p++;
911                                 }
912                         }
913                         p++;
914                         *s = '\0';
915                         tc = TC_REGEXP;
916
917                 } else if (*p == '.' || isdigit(*p)) {
918                         /* it's a number */
919                         t.number = strtod(p, &p);
920                         if (*p == '.')
921                                 syntax_error(EMSG_UNEXP_TOKEN);
922                         tc = TC_NUMBER;
923
924                 } else {
925                         /* search for something known */
926                         tl = tokenlist;
927                         tc = 0x00000001;
928                         ti = tokeninfo;
929                         while (*tl) {
930                                 l = *(tl++);
931                                 if (l == NTCC) {
932                                         tc <<= 1;
933                                         continue;
934                                 }
935                                 /* if token class is expected, token
936                                  * matches and it's not a longer word,
937                                  * then this is what we are looking for
938                                  */
939                                 if ((tc & (expected | TC_WORD | TC_NEWLINE)) &&
940                                 *tl == *p && strncmp(p, tl, l) == 0 &&
941                                 !((tc & TC_WORD) && isalnum_(*(p + l)))) {
942                                         t.info = *ti;
943                                         p += l;
944                                         break;
945                                 }
946                                 ti++;
947                                 tl += l;
948                         }
949
950                         if (! *tl) {
951                                 /* it's a name (var/array/function),
952                                  * otherwise it's something wrong
953                                  */
954                                 if (! isalnum_(*p))
955                                         syntax_error(EMSG_UNEXP_TOKEN);
956
957                                 t.string = --p;
958                                 while(isalnum_(*(++p))) {
959                                         *(p-1) = *p;
960                                 }
961                                 *(p-1) = '\0';
962                                 tc = TC_VARIABLE;
963                                 /* also consume whitespace between functionname and bracket */
964                                 skip_spaces(&p);
965                                 if (*p == '(') {
966                                         tc = TC_FUNCTION;
967                                 } else {
968                                         if (*p == '[') {
969                                                 p++;
970                                                 tc = TC_ARRAY;
971                                         }
972                                 }
973                         }
974                 }
975                 pos = p;
976
977                 /* skipping newlines in some cases */
978                 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
979                         goto readnext;
980
981                 /* insert concatenation operator when needed */
982                 if ((ltclass&TC_CONCAT1) && (tc&TC_CONCAT2) && (expected&TC_BINOP)) {
983                         concat_inserted = TRUE;
984                         save_tclass = tc;
985                         save_info = t.info;
986                         tc = TC_BINOP;
987                         t.info = OC_CONCAT | SS | P(35);
988                 }
989
990                 t.tclass = tc;
991         }
992         ltclass = t.tclass;
993
994         /* Are we ready for this? */
995         if (! (ltclass & expected))
996                 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
997                                                                 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
998
999         return ltclass;
1000 }
1001
1002 static void rollback_token(void) { t.rollback = TRUE; }
1003
1004 static node *new_node(uint32_t info)
1005 {
1006         register node *n;
1007
1008         n = (node *)xcalloc(sizeof(node), 1);
1009         n->info = info;
1010         n->lineno = lineno;
1011         return n;
1012 }
1013
1014 static node *mk_re_node(char *s, node *n, regex_t *re)
1015 {
1016         n->info = OC_REGEXP;
1017         n->l.re = re;
1018         n->r.ire = re + 1;
1019         xregcomp(re, s, REG_EXTENDED);
1020         xregcomp(re+1, s, REG_EXTENDED | REG_ICASE);
1021
1022         return n;
1023 }
1024
1025 static node *condition(void)
1026 {
1027         next_token(TC_SEQSTART);
1028         return parse_expr(TC_SEQTERM);
1029 }
1030
1031 /* parse expression terminated by given argument, return ptr
1032  * to built subtree. Terminator is eaten by parse_expr */
1033 static node *parse_expr(uint32_t iexp)
1034 {
1035         node sn;
1036         node *cn = &sn;
1037         node *vn, *glptr;
1038         uint32_t tc, xtc;
1039         var *v;
1040
1041         sn.info = PRIMASK;
1042         sn.r.n = glptr = NULL;
1043         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1044
1045         while (! ((tc = next_token(xtc)) & iexp)) {
1046                 if (glptr && (t.info == (OC_COMPARE|VV|P(39)|2))) {
1047                         /* input redirection (<) attached to glptr node */
1048                         cn = glptr->l.n = new_node(OC_CONCAT|SS|P(37));
1049                         cn->a.n = glptr;
1050                         xtc = TC_OPERAND | TC_UOPPRE;
1051                         glptr = NULL;
1052
1053                 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1054                         /* for binary and postfix-unary operators, jump back over
1055                          * previous operators with higher priority */
1056                         vn = cn;
1057                         while ( ((t.info & PRIMASK) > (vn->a.n->info & PRIMASK2)) ||
1058                           ((t.info == vn->info) && ((t.info & OPCLSMASK) == OC_COLON)) )
1059                                 vn = vn->a.n;
1060                         if ((t.info & OPCLSMASK) == OC_TERNARY)
1061                                 t.info += P(6);
1062                         cn = vn->a.n->r.n = new_node(t.info);
1063                         cn->a.n = vn->a.n;
1064                         if (tc & TC_BINOP) {
1065                                 cn->l.n = vn;
1066                                 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1067                                 if ((t.info & OPCLSMASK) == OC_PGETLINE) {
1068                                         /* it's a pipe */
1069                                         next_token(TC_GETLINE);
1070                                         /* give maximum priority to this pipe */
1071                                         cn->info &= ~PRIMASK;
1072                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1073                                 }
1074                         } else {
1075                                 cn->r.n = vn;
1076                                 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1077                         }
1078                         vn->a.n = cn;
1079
1080                 } else {
1081                         /* for operands and prefix-unary operators, attach them
1082                          * to last node */
1083                         vn = cn;
1084                         cn = vn->r.n = new_node(t.info);
1085                         cn->a.n = vn;
1086                         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1087                         if (tc & (TC_OPERAND | TC_REGEXP)) {
1088                                 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1089                                 /* one should be very careful with switch on tclass -
1090                                  * only simple tclasses should be used! */
1091                                 switch (tc) {
1092                                   case TC_VARIABLE:
1093                                   case TC_ARRAY:
1094                                         cn->info = OC_VAR;
1095                                         if ((v = hash_search(ahash, t.string)) != NULL) {
1096                                                 cn->info = OC_FNARG;
1097                                                 cn->l.i = v->x.aidx;
1098                                         } else {
1099                                                 cn->l.v = newvar(t.string);
1100                                         }
1101                                         if (tc & TC_ARRAY) {
1102                                                 cn->info |= xS;
1103                                                 cn->r.n = parse_expr(TC_ARRTERM);
1104                                         }
1105                                         break;
1106
1107                                   case TC_NUMBER:
1108                                   case TC_STRING:
1109                                         cn->info = OC_VAR;
1110                                         v = cn->l.v = xcalloc(sizeof(var), 1);
1111                                         if (tc & TC_NUMBER)
1112                                                 setvar_i(v, t.number);
1113                                         else
1114                                                 setvar_s(v, t.string);
1115                                         break;
1116
1117                                   case TC_REGEXP:
1118                                         mk_re_node(t.string, cn,
1119                                                                         (regex_t *)xcalloc(sizeof(regex_t),2));
1120                                         break;
1121
1122                                   case TC_FUNCTION:
1123                                         cn->info = OC_FUNC;
1124                                         cn->r.f = newfunc(t.string);
1125                                         cn->l.n = condition();
1126                                         break;
1127
1128                                   case TC_SEQSTART:
1129                                         cn = vn->r.n = parse_expr(TC_SEQTERM);
1130                                         cn->a.n = vn;
1131                                         break;
1132
1133                                   case TC_GETLINE:
1134                                         glptr = cn;
1135                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1136                                         break;
1137
1138                                   case TC_BUILTIN:
1139                                         cn->l.n = condition();
1140                                         break;
1141                                 }
1142                         }
1143                 }
1144         }
1145         return sn.r.n;
1146 }
1147
1148 /* add node to chain. Return ptr to alloc'd node */
1149 static node *chain_node(uint32_t info)
1150 {
1151         register node *n;
1152
1153         if (! seq->first)
1154                 seq->first = seq->last = new_node(0);
1155
1156         if (seq->programname != programname) {
1157                 seq->programname = programname;
1158                 n = chain_node(OC_NEWSOURCE);
1159                 n->l.s = bb_xstrdup(programname);
1160         }
1161
1162         n = seq->last;
1163         n->info = info;
1164         seq->last = n->a.n = new_node(OC_DONE);
1165
1166         return n;
1167 }
1168
1169 static void chain_expr(uint32_t info)
1170 {
1171         node *n;
1172
1173         n = chain_node(info);
1174         n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1175         if (t.tclass & TC_GRPTERM)
1176                 rollback_token();
1177 }
1178
1179 static node *chain_loop(node *nn)
1180 {
1181         node *n, *n2, *save_brk, *save_cont;
1182
1183         save_brk = break_ptr;
1184         save_cont = continue_ptr;
1185
1186         n = chain_node(OC_BR | Vx);
1187         continue_ptr = new_node(OC_EXEC);
1188         break_ptr = new_node(OC_EXEC);
1189         chain_group();
1190         n2 = chain_node(OC_EXEC | Vx);
1191         n2->l.n = nn;
1192         n2->a.n = n;
1193         continue_ptr->a.n = n2;
1194         break_ptr->a.n = n->r.n = seq->last;
1195
1196         continue_ptr = save_cont;
1197         break_ptr = save_brk;
1198
1199         return n;
1200 }
1201
1202 /* parse group and attach it to chain */
1203 static void chain_group(void)
1204 {
1205         uint32_t c;
1206         node *n, *n2, *n3;
1207
1208         do {
1209                 c = next_token(TC_GRPSEQ);
1210         } while (c & TC_NEWLINE);
1211
1212         if (c & TC_GRPSTART) {
1213                 while(next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1214                         if (t.tclass & TC_NEWLINE) continue;
1215                         rollback_token();
1216                         chain_group();
1217                 }
1218         } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1219                 rollback_token();
1220                 chain_expr(OC_EXEC | Vx);
1221         } else {                                                /* TC_STATEMNT */
1222                 switch (t.info & OPCLSMASK) {
1223                         case ST_IF:
1224                                 n = chain_node(OC_BR | Vx);
1225                                 n->l.n = condition();
1226                                 chain_group();
1227                                 n2 = chain_node(OC_EXEC);
1228                                 n->r.n = seq->last;
1229                                 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE)==TC_ELSE) {
1230                                         chain_group();
1231                                         n2->a.n = seq->last;
1232                                 } else {
1233                                         rollback_token();
1234                                 }
1235                                 break;
1236
1237                         case ST_WHILE:
1238                                 n2 = condition();
1239                                 n = chain_loop(NULL);
1240                                 n->l.n = n2;
1241                                 break;
1242
1243                         case ST_DO:
1244                                 n2 = chain_node(OC_EXEC);
1245                                 n = chain_loop(NULL);
1246                                 n2->a.n = n->a.n;
1247                                 next_token(TC_WHILE);
1248                                 n->l.n = condition();
1249                                 break;
1250
1251                         case ST_FOR:
1252                                 next_token(TC_SEQSTART);
1253                                 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1254                                 if (t.tclass & TC_SEQTERM) {                            /* for-in */
1255                                         if ((n2->info & OPCLSMASK) != OC_IN)
1256                                                 syntax_error(EMSG_UNEXP_TOKEN);
1257                                         n = chain_node(OC_WALKINIT | VV);
1258                                         n->l.n = n2->l.n;
1259                                         n->r.n = n2->r.n;
1260                                         n = chain_loop(NULL);
1261                                         n->info = OC_WALKNEXT | Vx;
1262                                         n->l.n = n2->l.n;
1263                                 } else {                                                                        /* for(;;) */
1264                                         n = chain_node(OC_EXEC | Vx);
1265                                         n->l.n = n2;
1266                                         n2 = parse_expr(TC_SEMICOL);
1267                                         n3 = parse_expr(TC_SEQTERM);
1268                                         n = chain_loop(n3);
1269                                         n->l.n = n2;
1270                                         if (! n2)
1271                                                 n->info = OC_EXEC;
1272                                 }
1273                                 break;
1274
1275                         case OC_PRINT:
1276                         case OC_PRINTF:
1277                                 n = chain_node(t.info);
1278                                 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1279                                 if (t.tclass & TC_OUTRDR) {
1280                                         n->info |= t.info;
1281                                         n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1282                                 }
1283                                 if (t.tclass & TC_GRPTERM)
1284                                         rollback_token();
1285                                 break;
1286
1287                         case OC_BREAK:
1288                                 n = chain_node(OC_EXEC);
1289                                 n->a.n = break_ptr;
1290                                 break;
1291
1292                         case OC_CONTINUE:
1293                                 n = chain_node(OC_EXEC);
1294                                 n->a.n = continue_ptr;
1295                                 break;
1296
1297                         /* delete, next, nextfile, return, exit */
1298                         default:
1299                                 chain_expr(t.info);
1300
1301                 }
1302         }
1303 }
1304
1305 static void parse_program(char *p)
1306 {
1307         uint32_t tclass;
1308         node *cn;
1309         func *f;
1310         var *v;
1311
1312         pos = p;
1313         t.lineno = 1;
1314         while((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1315                                 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1316
1317                 if (tclass & TC_OPTERM)
1318                         continue;
1319
1320                 seq = &mainseq;
1321                 if (tclass & TC_BEGIN) {
1322                         seq = &beginseq;
1323                         chain_group();
1324
1325                 } else if (tclass & TC_END) {
1326                         seq = &endseq;
1327                         chain_group();
1328
1329                 } else if (tclass & TC_FUNCDECL) {
1330                         next_token(TC_FUNCTION);
1331                         pos++;
1332                         f = newfunc(t.string);
1333                         f->body.first = NULL;
1334                         f->nargs = 0;
1335                         while(next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1336                                 v = findvar(ahash, t.string);
1337                                 v->x.aidx = (f->nargs)++;
1338
1339                                 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1340                                         break;
1341                         }
1342                         seq = &(f->body);
1343                         chain_group();
1344                         clear_array(ahash);
1345
1346                 } else if (tclass & TC_OPSEQ) {
1347                         rollback_token();
1348                         cn = chain_node(OC_TEST);
1349                         cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1350                         if (t.tclass & TC_GRPSTART) {
1351                                 rollback_token();
1352                                 chain_group();
1353                         } else {
1354                                 chain_node(OC_PRINT);
1355                         }
1356                         cn->r.n = mainseq.last;
1357
1358                 } else /* if (tclass & TC_GRPSTART) */ {
1359                         rollback_token();
1360                         chain_group();
1361                 }
1362         }
1363 }
1364
1365
1366 /* -------- program execution part -------- */
1367
1368 static node *mk_splitter(char *s, tsplitter *spl)
1369 {
1370         register regex_t *re, *ire;
1371         node *n;
1372
1373         re = &spl->re[0];
1374         ire = &spl->re[1];
1375         n = &spl->n;
1376         if ((n->info && OPCLSMASK) == OC_REGEXP) {
1377                 regfree(re);
1378                 regfree(ire);
1379         }
1380         if (bb_strlen(s) > 1) {
1381                 mk_re_node(s, n, re);
1382         } else {
1383                 n->info = (uint32_t) *s;
1384         }
1385
1386         return n;
1387 }
1388
1389 /* use node as a regular expression. Supplied with node ptr and regex_t
1390  * storage space. Return ptr to regex (if result points to preg, it should
1391  * be later regfree'd manually
1392  */
1393 static regex_t *as_regex(node *op, regex_t *preg)
1394 {
1395         var *v;
1396         char *s;
1397
1398         if ((op->info & OPCLSMASK) == OC_REGEXP) {
1399                 return icase ? op->r.ire : op->l.re;
1400         } else {
1401                 v = nvalloc(1);
1402                 s = getvar_s(evaluate(op, v));
1403                 xregcomp(preg, s, icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED);
1404                 nvfree(v);
1405                 return preg;
1406         }
1407 }
1408
1409 /* gradually increasing buffer */
1410 static void qrealloc(char **b, int n, int *size)
1411 {
1412         if (! *b || n >= *size)
1413                 *b = xrealloc(*b, *size = n + (n>>1) + 80);
1414 }
1415
1416 /* resize field storage space */
1417 static void fsrealloc(int size)
1418 {
1419         static int maxfields = 0;
1420         int i;
1421
1422         if (size >= maxfields) {
1423                 i = maxfields;
1424                 maxfields = size + 16;
1425                 Fields = (var *)xrealloc(Fields, maxfields * sizeof(var));
1426                 for (; i<maxfields; i++) {
1427                         Fields[i].type = VF_SPECIAL;
1428                         Fields[i].string = NULL;
1429                 }
1430         }
1431
1432         if (size < nfields) {
1433                 for (i=size; i<nfields; i++) {
1434                         clrvar(Fields+i);
1435                 }
1436         }
1437         nfields = size;
1438 }
1439
1440 static int awk_split(char *s, node *spl, char **slist)
1441 {
1442         int l, n=0;
1443         char c[4];
1444         char *s1;
1445         regmatch_t pmatch[2];
1446
1447         /* in worst case, each char would be a separate field */
1448         *slist = s1 = bb_xstrndup(s, bb_strlen(s) * 2 + 3);
1449
1450         c[0] = c[1] = (char)spl->info;
1451         c[2] = c[3] = '\0';
1452         if (*getvar_s(V[RS]) == '\0') c[2] = '\n';
1453
1454         if ((spl->info & OPCLSMASK) == OC_REGEXP) {             /* regex split */
1455                 while (*s) {
1456                         l = strcspn(s, c+2);
1457                         if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0 &&
1458                         pmatch[0].rm_so <= l) {
1459                                 l = pmatch[0].rm_so;
1460                                 if (pmatch[0].rm_eo == 0) { l++; pmatch[0].rm_eo++; }
1461                         } else {
1462                                 pmatch[0].rm_eo = l;
1463                                 if (*(s+l)) pmatch[0].rm_eo++;
1464                         }
1465
1466                         memcpy(s1, s, l);
1467                         *(s1+l) = '\0';
1468                         nextword(&s1);
1469                         s += pmatch[0].rm_eo;
1470                         n++;
1471                 }
1472         } else if (c[0] == '\0') {              /* null split */
1473                 while(*s) {
1474                         *(s1++) = *(s++);
1475                         *(s1++) = '\0';
1476                         n++;
1477                 }
1478         } else if (c[0] != ' ') {               /* single-character split */
1479                 if (icase) {
1480                         c[0] = toupper(c[0]);
1481                         c[1] = tolower(c[1]);
1482                 }
1483                 if (*s1) n++;
1484                 while ((s1 = strpbrk(s1, c))) {
1485                         *(s1++) = '\0';
1486                         n++;
1487                 }
1488         } else {                                /* space split */
1489                 while (*s) {
1490                         while (isspace(*s)) s++;
1491                         if (! *s) break;
1492                         n++;
1493                         while (*s && !isspace(*s))
1494                                 *(s1++) = *(s++);
1495                         *(s1++) = '\0';
1496                 }
1497         }
1498         return n;
1499 }
1500
1501 static void split_f0(void)
1502 {
1503         static char *fstrings = NULL;
1504         int i, n;
1505         char *s;
1506
1507         if (is_f0_split)
1508                 return;
1509
1510         is_f0_split = TRUE;
1511         free(fstrings);
1512         fsrealloc(0);
1513         n = awk_split(getvar_s(V[F0]), &fsplitter.n, &fstrings);
1514         fsrealloc(n);
1515         s = fstrings;
1516         for (i=0; i<n; i++) {
1517                 Fields[i].string = nextword(&s);
1518                 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1519         }
1520
1521         /* set NF manually to avoid side effects */
1522         clrvar(V[NF]);
1523         V[NF]->type = VF_NUMBER | VF_SPECIAL;
1524         V[NF]->number = nfields;
1525 }
1526
1527 /* perform additional actions when some internal variables changed */
1528 static void handle_special(var *v)
1529 {
1530         int n;
1531         char *b, *sep, *s;
1532         int sl, l, len, i, bsize;
1533
1534         if (! (v->type & VF_SPECIAL))
1535                 return;
1536
1537         if (v == V[NF]) {
1538                 n = (int)getvar_i(v);
1539                 fsrealloc(n);
1540
1541                 /* recalculate $0 */
1542                 sep = getvar_s(V[OFS]);
1543                 sl = bb_strlen(sep);
1544                 b = NULL;
1545                 len = 0;
1546                 for (i=0; i<n; i++) {
1547                         s = getvar_s(&Fields[i]);
1548                         l = bb_strlen(s);
1549                         if (b) {
1550                                 memcpy(b+len, sep, sl);
1551                                 len += sl;
1552                         }
1553                         qrealloc(&b, len+l+sl, &bsize);
1554                         memcpy(b+len, s, l);
1555                         len += l;
1556                 }
1557                 if (b) b[len] = '\0';
1558                 setvar_p(V[F0], b);
1559                 is_f0_split = TRUE;
1560
1561         } else if (v == V[F0]) {
1562                 is_f0_split = FALSE;
1563
1564         } else if (v == V[FS]) {
1565                 mk_splitter(getvar_s(v), &fsplitter);
1566
1567         } else if (v == V[RS]) {
1568                 mk_splitter(getvar_s(v), &rsplitter);
1569
1570         } else if (v == V[IGNORECASE]) {
1571                 icase = istrue(v);
1572
1573         } else {                                                /* $n */
1574                 n = getvar_i(V[NF]);
1575                 setvar_i(V[NF], n > v-Fields ? n : v-Fields+1);
1576                 /* right here v is invalid. Just to note... */
1577         }
1578 }
1579
1580 /* step through func/builtin/etc arguments */
1581 static node *nextarg(node **pn)
1582 {
1583         node *n;
1584
1585         n = *pn;
1586         if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1587                 *pn = n->r.n;
1588                 n = n->l.n;
1589         } else {
1590                 *pn = NULL;
1591         }
1592         return n;
1593 }
1594
1595 static void hashwalk_init(var *v, xhash *array)
1596 {
1597         char **w;
1598         hash_item *hi;
1599         int i;
1600
1601         if (v->type & VF_WALK)
1602                 free(v->x.walker);
1603
1604         v->type |= VF_WALK;
1605         w = v->x.walker = (char **)xcalloc(2 + 2*sizeof(char *) + array->glen, 1);
1606         *w = *(w+1) = (char *)(w + 2);
1607         for (i=0; i<array->csize; i++) {
1608                 hi = array->items[i];
1609                 while(hi) {
1610                         strcpy(*w, hi->name);
1611                         nextword(w);
1612                         hi = hi->next;
1613                 }
1614         }
1615 }
1616
1617 static int hashwalk_next(var *v)
1618 {
1619         char **w;
1620
1621         w = v->x.walker;
1622         if (*(w+1) == *w)
1623                 return FALSE;
1624
1625         setvar_s(v, nextword(w+1));
1626         return TRUE;
1627 }
1628
1629 /* evaluate node, return 1 when result is true, 0 otherwise */
1630 static int ptest(node *pattern)
1631 {
1632         static var v;
1633         return istrue(evaluate(pattern, &v));
1634 }
1635
1636 /* read next record from stream rsm into a variable v */
1637 static int awk_getline(rstream *rsm, var *v)
1638 {
1639         char *b;
1640         regmatch_t pmatch[2];
1641         int a, p, pp=0, size;
1642         int fd, so, eo, r, rp;
1643         char c, *m, *s;
1644
1645         /* we're using our own buffer since we need access to accumulating
1646          * characters
1647          */
1648         fd = fileno(rsm->F);
1649         m = rsm->buffer;
1650         a = rsm->adv;
1651         p = rsm->pos;
1652         size = rsm->size;
1653         c = (char) rsplitter.n.info;
1654         rp = 0;
1655
1656         if (! m) qrealloc(&m, 256, &size);
1657         do {
1658                 b = m + a;
1659                 so = eo = p;
1660                 r = 1;
1661                 if (p > 0) {
1662                         if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1663                                 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1664                                                                                                 b, 1, pmatch, 0) == 0) {
1665                                         so = pmatch[0].rm_so;
1666                                         eo = pmatch[0].rm_eo;
1667                                         if (b[eo] != '\0')
1668                                                 break;
1669                                 }
1670                         } else if (c != '\0') {
1671                                 s = strchr(b+pp, c);
1672                                 if (s) {
1673                                         so = eo = s-b;
1674                                         eo++;
1675                                         break;
1676                                 }
1677                         } else {
1678                                 while (b[rp] == '\n')
1679                                         rp++;
1680                                 s = strstr(b+rp, "\n\n");
1681                                 if (s) {
1682                                         so = eo = s-b;
1683                                         while (b[eo] == '\n') eo++;
1684                                         if (b[eo] != '\0')
1685                                                 break;
1686                                 }
1687                         }
1688                 }
1689
1690                 if (a > 0) {
1691                         memmove(m, (const void *)(m+a), p+1);
1692                         b = m;
1693                         a = 0;
1694                 }
1695
1696                 qrealloc(&m, a+p+128, &size);
1697                 b = m + a;
1698                 pp = p;
1699                 p += safe_read(fd, b+p, size-p-1);
1700                 if (p < pp) {
1701                         p = 0;
1702                         r = 0;
1703                         setvar_i(V[ERRNO], errno);
1704                 }
1705                 b[p] = '\0';
1706
1707         } while (p > pp);
1708
1709         if (p == 0) {
1710                 r--;
1711         } else {
1712                 c = b[so]; b[so] = '\0';
1713                 setvar_s(v, b+rp);
1714                 v->type |= VF_USER;
1715                 b[so] = c;
1716                 c = b[eo]; b[eo] = '\0';
1717                 setvar_s(V[RT], b+so);
1718                 b[eo] = c;
1719         }
1720
1721         rsm->buffer = m;
1722         rsm->adv = a + eo;
1723         rsm->pos = p - eo;
1724         rsm->size = size;
1725
1726         return r;
1727 }
1728
1729 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1730 {
1731         int r=0;
1732         char c;
1733         const char *s=format;
1734
1735         if (int_as_int && n == (int)n) {
1736                 r = snprintf(b, size, "%d", (int)n);
1737         } else {
1738                 do { c = *s; } while (*s && *++s);
1739                 if (strchr("diouxX", c)) {
1740                         r = snprintf(b, size, format, (int)n);
1741                 } else if (strchr("eEfgG", c)) {
1742                         r = snprintf(b, size, format, n);
1743                 } else {
1744                         runtime_error(EMSG_INV_FMT);
1745                 }
1746         }
1747         return r;
1748 }
1749
1750
1751 /* formatted output into an allocated buffer, return ptr to buffer */
1752 static char *awk_printf(node *n)
1753 {
1754         char *b = NULL;
1755         char *fmt, *s, *s1, *f;
1756         int i, j, incr, bsize;
1757         char c, c1;
1758         var *v, *arg;
1759
1760         v = nvalloc(1);
1761         fmt = f = bb_xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1762
1763         i = 0;
1764         while (*f) {
1765                 s = f;
1766                 while (*f && (*f != '%' || *(++f) == '%'))
1767                         f++;
1768                 while (*f && !isalpha(*f))
1769                         f++;
1770
1771                 incr = (f - s) + MAXVARFMT;
1772                 qrealloc(&b, incr+i, &bsize);
1773                 c = *f; if (c != '\0') f++;
1774                 c1 = *f ; *f = '\0';
1775                 arg = evaluate(nextarg(&n), v);
1776
1777                 j = i;
1778                 if (c == 'c' || !c) {
1779                         i += sprintf(b+i, s,
1780                                         is_numeric(arg) ? (char)getvar_i(arg) : *getvar_s(arg));
1781
1782                 } else if (c == 's') {
1783                     s1 = getvar_s(arg);
1784                         qrealloc(&b, incr+i+bb_strlen(s1), &bsize);
1785                         i += sprintf(b+i, s, s1);
1786
1787                 } else {
1788                         i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1789                 }
1790                 *f = c1;
1791
1792                 /* if there was an error while sprintf, return value is negative */
1793                 if (i < j) i = j;
1794
1795         }
1796
1797         b = xrealloc(b, i+1);
1798         free(fmt);
1799         nvfree(v);
1800         b[i] = '\0';
1801         return b;
1802 }
1803
1804 /* common substitution routine
1805  * replace (nm) substring of (src) that match (n) with (repl), store
1806  * result into (dest), return number of substitutions. If nm=0, replace
1807  * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1808  * subexpression matching (\1-\9)
1809  */
1810 static int awk_sub(node *rn, char *repl, int nm, var *src, var *dest, int ex)
1811 {
1812         char *ds = NULL;
1813         char *sp, *s;
1814         int c, i, j, di, rl, so, eo, nbs, n, dssize;
1815         regmatch_t pmatch[10];
1816         regex_t sreg, *re;
1817
1818         re = as_regex(rn, &sreg);
1819         if (! src) src = V[F0];
1820         if (! dest) dest = V[F0];
1821
1822         i = di = 0;
1823         sp = getvar_s(src);
1824         rl = bb_strlen(repl);
1825         while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0:REG_NOTBOL) == 0) {
1826                 so = pmatch[0].rm_so;
1827                 eo = pmatch[0].rm_eo;
1828
1829                 qrealloc(&ds, di + eo + rl, &dssize);
1830                 memcpy(ds + di, sp, eo);
1831                 di += eo;
1832                 if (++i >= nm) {
1833                         /* replace */
1834                         di -= (eo - so);
1835                         nbs = 0;
1836                         for (s = repl; *s; s++) {
1837                                 ds[di++] = c = *s;
1838                                 if (c == '\\') {
1839                                         nbs++;
1840                                         continue;
1841                                 }
1842                                 if (c == '&' || (ex && c >= '0' && c <= '9')) {
1843                                         di -= ((nbs + 3) >> 1);
1844                                         j = 0;
1845                                         if (c != '&') {
1846                                                 j = c - '0';
1847                                                 nbs++;
1848                                         }
1849                                         if (nbs % 2) {
1850                                                 ds[di++] = c;
1851                                         } else {
1852                                                 n = pmatch[j].rm_eo - pmatch[j].rm_so;
1853                                                 qrealloc(&ds, di + rl + n, &dssize);
1854                                                 memcpy(ds + di, sp + pmatch[j].rm_so, n);
1855                                                 di += n;
1856                                         }
1857                                 }
1858                                 nbs = 0;
1859                         }
1860                 }
1861
1862                 sp += eo;
1863                 if (i == nm) break;
1864                 if (eo == so) {
1865                         if (! (ds[di++] = *sp++)) break;
1866                 }
1867         }
1868
1869         qrealloc(&ds, di + strlen(sp), &dssize);
1870         strcpy(ds + di, sp);
1871         setvar_p(dest, ds);
1872         if (re == &sreg) regfree(re);
1873         return i;
1874 }
1875
1876 static var *exec_builtin(node *op, var *res)
1877 {
1878         int (*to_xxx)(int);
1879         var *tv;
1880         node *an[4];
1881         var  *av[4];
1882         char *as[4];
1883         regmatch_t pmatch[2];
1884         regex_t sreg, *re;
1885         static tsplitter tspl;
1886         node *spl;
1887         uint32_t isr, info;
1888         int nargs;
1889         time_t tt;
1890         char *s, *s1;
1891         int i, l, ll, n;
1892
1893         tv = nvalloc(4);
1894         isr = info = op->info;
1895         op = op->l.n;
1896
1897         av[2] = av[3] = NULL;
1898         for (i=0 ; i<4 && op ; i++) {
1899                 an[i] = nextarg(&op);
1900                 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
1901                 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
1902                 isr >>= 1;
1903         }
1904
1905         nargs = i;
1906         if (nargs < (info >> 30))
1907                 runtime_error(EMSG_TOO_FEW_ARGS);
1908
1909         switch (info & OPNMASK) {
1910
1911           case B_a2:
1912 #ifdef CONFIG_FEATURE_AWK_MATH
1913                 setvar_i(res, atan2(getvar_i(av[i]), getvar_i(av[1])));
1914 #else
1915                 runtime_error(EMSG_NO_MATH);
1916 #endif
1917                 break;
1918
1919           case B_sp:
1920                 if (nargs > 2) {
1921                         spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
1922                                 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
1923                 } else {
1924                         spl = &fsplitter.n;
1925                 }
1926
1927                 n = awk_split(as[0], spl, &s);
1928                 s1 = s;
1929                 clear_array(iamarray(av[1]));
1930                 for (i=1; i<=n; i++)
1931                         setari_u(av[1], i, nextword(&s1));
1932                 free(s);
1933                 setvar_i(res, n);
1934                 break;
1935
1936           case B_ss:
1937                 l = bb_strlen(as[0]);
1938                 i = getvar_i(av[1]) - 1;
1939                 if (i>l) i=l; if (i<0) i=0;
1940                 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
1941                 if (n<0) n=0;
1942                 s = xmalloc(n+1);
1943                 strncpy(s, as[0]+i, n);
1944                 s[n] = '\0';
1945                 setvar_p(res, s);
1946                 break;
1947
1948           case B_lo:
1949                 to_xxx = tolower;
1950                 goto lo_cont;
1951
1952           case B_up:
1953                 to_xxx = toupper;
1954 lo_cont:
1955                 s1 = s = bb_xstrdup(as[0]);
1956                 while (*s1) {
1957                         *s1 = (*to_xxx)(*s1);
1958                         s1++;
1959                 }
1960                 setvar_p(res, s);
1961                 break;
1962
1963           case B_ix:
1964                 n = 0;
1965                 ll = bb_strlen(as[1]);
1966                 l = bb_strlen(as[0]) - ll;
1967                 if (ll > 0 && l >= 0) {
1968                         if (! icase) {
1969                                 s = strstr(as[0], as[1]);
1970                                 if (s) n = (s - as[0]) + 1;
1971                         } else {
1972                                 /* this piece of code is terribly slow and
1973                                  * really should be rewritten
1974                                  */
1975                                 for (i=0; i<=l; i++) {
1976                                         if (strncasecmp(as[0]+i, as[1], ll) == 0) {
1977                                                 n = i+1;
1978                                                 break;
1979                                         }
1980                                 }
1981                         }
1982                 }
1983                 setvar_i(res, n);
1984                 break;
1985
1986           case B_ti:
1987                 if (nargs > 1)
1988                         tt = getvar_i(av[1]);
1989                 else
1990                         time(&tt);
1991                 s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
1992                 i = strftime(buf, MAXVARFMT, s, localtime(&tt));
1993                 buf[i] = '\0';
1994                 setvar_s(res, buf);
1995                 break;
1996
1997           case B_ma:
1998                 re = as_regex(an[1], &sreg);
1999                 n = regexec(re, as[0], 1, pmatch, 0);
2000                 if (n == 0) {
2001                         pmatch[0].rm_so++;
2002                         pmatch[0].rm_eo++;
2003                 } else {
2004                         pmatch[0].rm_so = 0;
2005                         pmatch[0].rm_eo = -1;
2006                 }
2007                 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2008                 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2009                 setvar_i(res, pmatch[0].rm_so);
2010                 if (re == &sreg) regfree(re);
2011                 break;
2012
2013           case B_ge:
2014                 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2015                 break;
2016
2017           case B_gs:
2018                 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2019                 break;
2020
2021           case B_su:
2022                 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2023                 break;
2024         }
2025
2026         nvfree(tv);
2027         return res;
2028 }
2029
2030 /*
2031  * Evaluate node - the heart of the program. Supplied with subtree
2032  * and place where to store result. returns ptr to result.
2033  */
2034 #define XC(n) ((n) >> 8)
2035
2036 static var *evaluate(node *op, var *res)
2037 {
2038         /* This procedure is recursive so we should count every byte */
2039         static var *fnargs = NULL;
2040         static unsigned int seed = 1;
2041         static regex_t sreg;
2042         node *op1;
2043         var *v1;
2044         union {
2045                 var *v;
2046                 char *s;
2047                 double d;
2048                 int i;
2049         } L, R;
2050         uint32_t opinfo;
2051         short opn;
2052         union {
2053                 char *s;
2054                 rstream *rsm;
2055                 FILE *F;
2056                 var *v;
2057                 regex_t *re;
2058                 uint32_t info;
2059         } X;
2060
2061         if (! op)
2062                 return setvar_s(res, NULL);
2063
2064         v1 = nvalloc(2);
2065
2066         while (op) {
2067
2068                 opinfo = op->info;
2069                 opn = (short)(opinfo & OPNMASK);
2070                 lineno = op->lineno;
2071
2072                 /* execute inevitable things */
2073                 op1 = op->l.n;
2074                 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2075                 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2076                 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2077                 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2078                 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2079
2080                 switch (XC(opinfo & OPCLSMASK)) {
2081
2082                   /* -- iterative node type -- */
2083
2084                   /* test pattern */
2085                   case XC( OC_TEST ):
2086                         if ((op1->info & OPCLSMASK) == OC_COMMA) {
2087                                 /* it's range pattern */
2088                                 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2089                                         op->info |= OF_CHECKED;
2090                                         if (ptest(op1->r.n))
2091                                                 op->info &= ~OF_CHECKED;
2092
2093                                         op = op->a.n;
2094                                 } else {
2095                                         op = op->r.n;
2096                                 }
2097                         } else {
2098                                 op = (ptest(op1)) ? op->a.n : op->r.n;
2099                         }
2100                         break;
2101
2102                   /* just evaluate an expression, also used as unconditional jump */
2103                   case XC( OC_EXEC ):
2104                         break;
2105
2106                   /* branch, used in if-else and various loops */
2107                   case XC( OC_BR ):
2108                         op = istrue(L.v) ? op->a.n : op->r.n;
2109                         break;
2110
2111                   /* initialize for-in loop */
2112                   case XC( OC_WALKINIT ):
2113                         hashwalk_init(L.v, iamarray(R.v));
2114                         break;
2115
2116                   /* get next array item */
2117                   case XC( OC_WALKNEXT ):
2118                         op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2119                         break;
2120
2121                   case XC( OC_PRINT ):
2122                   case XC( OC_PRINTF ):
2123                         X.F = stdout;
2124                         if (op->r.n) {
2125                                 X.rsm = newfile(R.s);
2126                                 if (! X.rsm->F) {
2127                                         if (opn == '|') {
2128                                                 if((X.rsm->F = popen(R.s, "w")) == NULL)
2129                                                         bb_perror_msg_and_die("popen");
2130                                                 X.rsm->is_pipe = 1;
2131                                         } else {
2132                                                 X.rsm->F = bb_xfopen(R.s, opn=='w' ? "w" : "a");
2133                                         }
2134                                 }
2135                                 X.F = X.rsm->F;
2136                         }
2137
2138                         if ((opinfo & OPCLSMASK) == OC_PRINT) {
2139                                 if (! op1) {
2140                                         fputs(getvar_s(V[F0]), X.F);
2141                                 } else {
2142                                         while (op1) {
2143                                                 L.v = evaluate(nextarg(&op1), v1);
2144                                                 if (L.v->type & VF_NUMBER) {
2145                                                         fmt_num(buf, MAXVARFMT, getvar_s(V[OFMT]),
2146                                                                                                                 getvar_i(L.v), TRUE);
2147                                                         fputs(buf, X.F);
2148                                                 } else {
2149                                                         fputs(getvar_s(L.v), X.F);
2150                                                 }
2151
2152                                                 if (op1) fputs(getvar_s(V[OFS]), X.F);
2153                                         }
2154                                 }
2155                                 fputs(getvar_s(V[ORS]), X.F);
2156
2157                         } else {        /* OC_PRINTF */
2158                                 L.s = awk_printf(op1);
2159                                 fputs(L.s, X.F);
2160                                 free(L.s);
2161                         }
2162                         fflush(X.F);
2163                         break;
2164
2165                   case XC( OC_DELETE ):
2166                         X.info = op1->info & OPCLSMASK;
2167                         if (X.info == OC_VAR) {
2168                                 R.v = op1->l.v;
2169                         } else if (X.info == OC_FNARG) {
2170                                 R.v = &fnargs[op1->l.i];
2171                         } else {
2172                                 runtime_error(EMSG_NOT_ARRAY);
2173                         }
2174
2175                         if (op1->r.n) {
2176                                 clrvar(L.v);
2177                                 L.s = getvar_s(evaluate(op1->r.n, v1));
2178                                 hash_remove(iamarray(R.v), L.s);
2179                         } else {
2180                                 clear_array(iamarray(R.v));
2181                         }
2182                         break;
2183
2184                   case XC( OC_NEWSOURCE ):
2185                         programname = op->l.s;
2186                         break;
2187
2188                   case XC( OC_RETURN ):
2189                         copyvar(res, L.v);
2190                         break;
2191
2192                   case XC( OC_NEXTFILE ):
2193                         nextfile = TRUE;
2194                   case XC( OC_NEXT ):
2195                         nextrec = TRUE;
2196                   case XC( OC_DONE ):
2197                         clrvar(res);
2198                         break;
2199
2200                   case XC( OC_EXIT ):
2201                         awk_exit(L.d);
2202
2203                   /* -- recursive node type -- */
2204
2205                   case XC( OC_VAR ):
2206                         L.v = op->l.v;
2207                         if (L.v == V[NF])
2208                                 split_f0();
2209                         goto v_cont;
2210
2211                   case XC( OC_FNARG ):
2212                         L.v = &fnargs[op->l.i];
2213
2214 v_cont:
2215                         res = (op->r.n) ? findvar(iamarray(L.v), R.s) : L.v;
2216                         break;
2217
2218                   case XC( OC_IN ):
2219                         setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2220                         break;
2221
2222                   case XC( OC_REGEXP ):
2223                         op1 = op;
2224                         L.s = getvar_s(V[F0]);
2225                         goto re_cont;
2226
2227                   case XC( OC_MATCH ):
2228                         op1 = op->r.n;
2229 re_cont:
2230                         X.re = as_regex(op1, &sreg);
2231                         R.i = regexec(X.re, L.s, 0, NULL, 0);
2232                         if (X.re == &sreg) regfree(X.re);
2233                         setvar_i(res, (R.i == 0 ? 1 : 0) ^ (opn == '!' ? 1 : 0));
2234                         break;
2235
2236                   case XC( OC_MOVE ):
2237                         /* if source is a temporary string, jusk relink it to dest */
2238                         if (R.v == v1+1 && R.v->string) {
2239                                 res = setvar_p(L.v, R.v->string);
2240                                 R.v->string = NULL;
2241                         } else {
2242                                 res = copyvar(L.v, R.v);
2243                         }
2244                         break;
2245
2246                   case XC( OC_TERNARY ):
2247                         if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2248                                 runtime_error(EMSG_POSSIBLE_ERROR);
2249                         res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2250                         break;
2251
2252                   case XC( OC_FUNC ):
2253                         if (! op->r.f->body.first)
2254                                 runtime_error(EMSG_UNDEF_FUNC);
2255
2256                         X.v = R.v = nvalloc(op->r.f->nargs+1);
2257                         while (op1) {
2258                                 L.v = evaluate(nextarg(&op1), v1);
2259                                 copyvar(R.v, L.v);
2260                                 R.v->type |= VF_CHILD;
2261                                 R.v->x.parent = L.v;
2262                                 if (++R.v - X.v >= op->r.f->nargs)
2263                                         break;
2264                         }
2265
2266                         R.v = fnargs;
2267                         fnargs = X.v;
2268
2269                         L.s = programname;
2270                         res = evaluate(op->r.f->body.first, res);
2271                         programname = L.s;
2272
2273                         nvfree(fnargs);
2274                         fnargs = R.v;
2275                         break;
2276
2277                   case XC( OC_GETLINE ):
2278                   case XC( OC_PGETLINE ):
2279                         if (op1) {
2280                                 X.rsm = newfile(L.s);
2281                                 if (! X.rsm->F) {
2282                                         if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2283                                                 X.rsm->F = popen(L.s, "r");
2284                                                 X.rsm->is_pipe = TRUE;
2285                                         } else {
2286                                                 X.rsm->F = fopen(L.s, "r");             /* not bb_xfopen! */
2287                                         }
2288                                 }
2289                         } else {
2290                                 if (! iF) iF = next_input_file();
2291                                 X.rsm = iF;
2292                         }
2293
2294                         if (! X.rsm->F) {
2295                                 setvar_i(V[ERRNO], errno);
2296                                 setvar_i(res, -1);
2297                                 break;
2298                         }
2299
2300                         if (! op->r.n)
2301                                 R.v = V[F0];
2302
2303                         L.i = awk_getline(X.rsm, R.v);
2304                         if (L.i > 0) {
2305                                 if (! op1) {
2306                                         incvar(V[FNR]);
2307                                         incvar(V[NR]);
2308                                 }
2309                         }
2310                         setvar_i(res, L.i);
2311                         break;
2312
2313                   /* simple builtins */
2314                   case XC( OC_FBLTIN ):
2315                         switch (opn) {
2316
2317                           case F_in:
2318                                 R.d = (int)L.d;
2319                                 break;
2320
2321                           case F_rn:
2322                                 R.d =  (double)rand() / (double)RAND_MAX;
2323                                 break;
2324
2325 #ifdef CONFIG_FEATURE_AWK_MATH
2326                           case F_co:
2327                                 R.d = cos(L.d);
2328                                 break;
2329
2330                           case F_ex:
2331                                 R.d = exp(L.d);
2332                                 break;
2333
2334                           case F_lg:
2335                                 R.d = log(L.d);
2336                                 break;
2337
2338                           case F_si:
2339                                 R.d = sin(L.d);
2340                                 break;
2341
2342                           case F_sq:
2343                                 R.d = sqrt(L.d);
2344                                 break;
2345 #else
2346                           case F_co:
2347                           case F_ex:
2348                           case F_lg:
2349                           case F_si:
2350                           case F_sq:
2351                                 runtime_error(EMSG_NO_MATH);
2352                                 break;
2353 #endif
2354
2355                           case F_sr:
2356                                 R.d = (double)seed;
2357                                 seed = op1 ? (unsigned int)L.d : (unsigned int)time(NULL);
2358                                 srand(seed);
2359                                 break;
2360
2361                           case F_ti:
2362                                 R.d = time(NULL);
2363                                 break;
2364
2365                           case F_le:
2366                                 if (! op1)
2367                                         L.s = getvar_s(V[F0]);
2368                                 R.d = bb_strlen(L.s);
2369                                 break;
2370
2371                           case F_sy:
2372                                 fflush(NULL);
2373                                 R.d = (L.s && *L.s) ? system(L.s) : 0;
2374                                 break;
2375
2376                           case F_ff:
2377                                 if (! op1)
2378                                         fflush(stdout);
2379                                 else {
2380                                         if (L.s && *L.s) {
2381                                                 X.rsm = newfile(L.s);
2382                                                 fflush(X.rsm->F);
2383                                         } else {
2384                                                 fflush(NULL);
2385                                         }
2386                                 }
2387                                 break;
2388
2389                           case F_cl:
2390                                 X.rsm = (rstream *)hash_search(fdhash, L.s);
2391                                 if (X.rsm) {
2392                                         R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
2393                                         free(X.rsm->buffer);
2394                                         hash_remove(fdhash, L.s);
2395                                 }
2396                                 if (R.i != 0)
2397                                         setvar_i(V[ERRNO], errno);
2398                                 R.d = (double)R.i;
2399                                 break;
2400                         }
2401                         setvar_i(res, R.d);
2402                         break;
2403
2404                   case XC( OC_BUILTIN ):
2405                         res = exec_builtin(op, res);
2406                         break;
2407
2408                   case XC( OC_SPRINTF ):
2409                         setvar_p(res, awk_printf(op1));
2410                         break;
2411
2412                   case XC( OC_UNARY ):
2413                         X.v = R.v;
2414                         L.d = R.d = getvar_i(R.v);
2415                         switch (opn) {
2416                           case 'P':
2417                                 L.d = ++R.d;
2418                                 goto r_op_change;
2419                           case 'p':
2420                                 R.d++;
2421                                 goto r_op_change;
2422                           case 'M':
2423                                 L.d = --R.d;
2424                                 goto r_op_change;
2425                           case 'm':
2426                                 R.d--;
2427                                 goto r_op_change;
2428                           case '!':
2429                             L.d = istrue(X.v) ? 0 : 1;
2430                                 break;
2431                           case '-':
2432                                 L.d = -R.d;
2433                                 break;
2434                         r_op_change:
2435                                 setvar_i(X.v, R.d);
2436                         }
2437                         setvar_i(res, L.d);
2438                         break;
2439
2440                   case XC( OC_FIELD ):
2441                         R.i = (int)getvar_i(R.v);
2442                         if (R.i == 0) {
2443                                 res = V[F0];
2444                         } else {
2445                                 split_f0();
2446                                 if (R.i > nfields)
2447                                         fsrealloc(R.i);
2448
2449                                 res = &Fields[R.i-1];
2450                         }
2451                         break;
2452
2453                   /* concatenation (" ") and index joining (",") */
2454                   case XC( OC_CONCAT ):
2455                   case XC( OC_COMMA ):
2456                         opn = bb_strlen(L.s) + bb_strlen(R.s) + 2;
2457                         X.s = (char *)xmalloc(opn);
2458                         strcpy(X.s, L.s);
2459                         if ((opinfo & OPCLSMASK) == OC_COMMA) {
2460                                 L.s = getvar_s(V[SUBSEP]);
2461                                 X.s = (char *)xrealloc(X.s, opn + bb_strlen(L.s));
2462                                 strcat(X.s, L.s);
2463                         }
2464                         strcat(X.s, R.s);
2465                         setvar_p(res, X.s);
2466                         break;
2467
2468                   case XC( OC_LAND ):
2469                         setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2470                         break;
2471
2472                   case XC( OC_LOR ):
2473                         setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2474                         break;
2475
2476                   case XC( OC_BINARY ):
2477                   case XC( OC_REPLACE ):
2478                         R.d = getvar_i(R.v);
2479                         switch (opn) {
2480                           case '+':
2481                                 L.d += R.d;
2482                                 break;
2483                           case '-':
2484                                 L.d -= R.d;
2485                                 break;
2486                           case '*':
2487                                 L.d *= R.d;
2488                                 break;
2489                           case '/':
2490                                 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2491                                 L.d /= R.d;
2492                                 break;
2493                           case '&':
2494 #ifdef CONFIG_FEATURE_AWK_MATH
2495                                 L.d = pow(L.d, R.d);
2496 #else
2497                                 runtime_error(EMSG_NO_MATH);
2498 #endif
2499                                 break;
2500                           case '%':
2501                                 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2502                                 L.d -= (int)(L.d / R.d) * R.d;
2503                                 break;
2504                         }
2505                         res = setvar_i(((opinfo&OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2506                         break;
2507
2508                   case XC( OC_COMPARE ):
2509                         if (is_numeric(L.v) && is_numeric(R.v)) {
2510                                 L.d = getvar_i(L.v) - getvar_i(R.v);
2511                         } else {
2512                                 L.s = getvar_s(L.v);
2513                                 R.s = getvar_s(R.v);
2514                                 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2515                         }
2516                         switch (opn & 0xfe) {
2517                           case 0:
2518                                 R.i = (L.d > 0);
2519                                 break;
2520                           case 2:
2521                                 R.i = (L.d >= 0);
2522                                 break;
2523                           case 4:
2524                                 R.i = (L.d == 0);
2525                                 break;
2526                         }
2527                         setvar_i(res, (opn & 0x1 ? R.i : !R.i) ? 1 : 0);
2528                         break;
2529
2530                   default:
2531                         runtime_error(EMSG_POSSIBLE_ERROR);
2532                 }
2533                 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2534                         op = op->a.n;
2535                 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2536                         break;
2537                 if (nextrec)
2538                         break;
2539         }
2540         nvfree(v1);
2541         return res;
2542 }
2543
2544
2545 /* -------- main & co. -------- */
2546
2547 static int awk_exit(int r)
2548 {
2549         unsigned int i;
2550         hash_item *hi;
2551         static var tv;
2552
2553         if (! exiting) {
2554                 exiting = TRUE;
2555                 nextrec = FALSE;
2556                 evaluate(endseq.first, &tv);
2557         }
2558
2559         /* waiting for children */
2560         for (i=0; i<fdhash->csize; i++) {
2561                 hi = fdhash->items[i];
2562                 while(hi) {
2563                         if (hi->data.rs.F && hi->data.rs.is_pipe)
2564                                 pclose(hi->data.rs.F);
2565                         hi = hi->next;
2566                 }
2567         }
2568
2569         exit(r);
2570 }
2571
2572 /* if expr looks like "var=value", perform assignment and return 1,
2573  * otherwise return 0 */
2574 static int is_assignment(const char *expr)
2575 {
2576         char *exprc, *s, *s0, *s1;
2577
2578         exprc = bb_xstrdup(expr);
2579         if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2580                 free(exprc);
2581                 return FALSE;
2582         }
2583
2584         *(s++) = '\0';
2585         s0 = s1 = s;
2586         while (*s)
2587                 *(s1++) = nextchar(&s);
2588
2589         *s1 = '\0';
2590         setvar_u(newvar(exprc), s0);
2591         free(exprc);
2592         return TRUE;
2593 }
2594
2595 /* switch to next input file */
2596 static rstream *next_input_file(void)
2597 {
2598         static rstream rsm;
2599         FILE *F = NULL;
2600         char *fname, *ind;
2601         static int files_happen = FALSE;
2602
2603         if (rsm.F) fclose(rsm.F);
2604         rsm.F = NULL;
2605         rsm.pos = rsm.adv = 0;
2606
2607         do {
2608                 if (getvar_i(V[ARGIND])+1 >= getvar_i(V[ARGC])) {
2609                         if (files_happen)
2610                                 return NULL;
2611                         fname = "-";
2612                         F = stdin;
2613                 } else {
2614                         ind = getvar_s(incvar(V[ARGIND]));
2615                         fname = getvar_s(findvar(iamarray(V[ARGV]), ind));
2616                         if (fname && *fname && !is_assignment(fname))
2617                                 F = afopen(fname, "r");
2618                 }
2619         } while (!F);
2620
2621         files_happen = TRUE;
2622         setvar_s(V[FILENAME], fname);
2623         rsm.F = F;
2624         return &rsm;
2625 }
2626
2627 extern int awk_main(int argc, char **argv)
2628 {
2629         char *s, *s1;
2630         int i, j, c;
2631         var *v;
2632         static var tv;
2633         char **envp;
2634         static int from_file = FALSE;
2635         rstream *rsm;
2636         FILE *F, *stdfiles[3];
2637         static char * stdnames = "/dev/stdin\0/dev/stdout\0/dev/stderr";
2638
2639         /* allocate global buffer */
2640         buf = xmalloc(MAXVARFMT+1);
2641
2642         vhash = hash_init();
2643         ahash = hash_init();
2644         fdhash = hash_init();
2645         fnhash = hash_init();
2646
2647         /* initialize variables */
2648         for (i=0;  *vNames;  i++) {
2649                 V[i] = v = newvar(nextword(&vNames));
2650                 if (*vValues != '\377')
2651                         setvar_s(v, nextword(&vValues));
2652                 else
2653                         setvar_i(v, 0);
2654
2655                 if (*vNames == '*') {
2656                         v->type |= VF_SPECIAL;
2657                         vNames++;
2658                 }
2659         }
2660
2661         handle_special(V[FS]);
2662         handle_special(V[RS]);
2663
2664         stdfiles[0] = stdin;
2665         stdfiles[1] = stdout;
2666         stdfiles[2] = stderr;
2667         for (i=0; i<3; i++) {
2668                 rsm = newfile(nextword(&stdnames));
2669                 rsm->F = stdfiles[i];
2670         }
2671
2672         for (envp=environ; *envp; envp++) {
2673                 s = bb_xstrdup(*envp);
2674                 s1 = strchr(s, '=');
2675                 if (!s1) {
2676                         goto keep_going;
2677                 }
2678                 *(s1++) = '\0';
2679                 setvar_u(findvar(iamarray(V[ENVIRON]), s), s1);
2680 keep_going:
2681                 free(s);
2682         }
2683
2684         while((c = getopt(argc, argv, "F:v:f:W:")) != EOF) {
2685                 switch (c) {
2686                         case 'F':
2687                                 setvar_s(V[FS], optarg);
2688                                 break;
2689                         case 'v':
2690                                 if (! is_assignment(optarg))
2691                                         bb_show_usage();
2692                                 break;
2693                         case 'f':
2694                                 from_file = TRUE;
2695                                 F = afopen(programname = optarg, "r");
2696                                 s = NULL;
2697                                 /* one byte is reserved for some trick in next_token */
2698                                 for (i=j=1; j>0; i+=j) {
2699                                         s = (char *)xrealloc(s, i+4096);
2700                                         j = fread(s+i, 1, 4094, F);
2701                                 }
2702                                 s[i] = '\0';
2703                                 fclose(F);
2704                                 parse_program(s+1);
2705                                 free(s);
2706                                 break;
2707                         case 'W':
2708                                 bb_error_msg("Warning: unrecognized option '-W %s' ignored\n", optarg);
2709                                 break;
2710
2711                         default:
2712                                 bb_show_usage();
2713                 }
2714         }
2715
2716         if (!from_file) {
2717                 if (argc == optind)
2718                         bb_show_usage();
2719                 programname="cmd. line";
2720                 parse_program(argv[optind++]);
2721
2722         }
2723
2724         /* fill in ARGV array */
2725         setvar_i(V[ARGC], argc - optind + 1);
2726         setari_u(V[ARGV], 0, "awk");
2727         for(i=optind; i < argc; i++)
2728                 setari_u(V[ARGV], i+1-optind, argv[i]);
2729
2730         evaluate(beginseq.first, &tv);
2731         if (! mainseq.first && ! endseq.first)
2732                 awk_exit(EXIT_SUCCESS);
2733
2734         /* input file could already be opened in BEGIN block */
2735         if (! iF) iF = next_input_file();
2736
2737         /* passing through input files */
2738         while (iF) {
2739
2740                 nextfile = FALSE;
2741                 setvar_i(V[FNR], 0);
2742
2743                 while ((c = awk_getline(iF, V[F0])) > 0) {
2744
2745                         nextrec = FALSE;
2746                         incvar(V[NR]);
2747                         incvar(V[FNR]);
2748                         evaluate(mainseq.first, &tv);
2749
2750                         if (nextfile)
2751                                 break;
2752                 }
2753
2754                 if (c < 0)
2755                         runtime_error(strerror(errno));
2756
2757                 iF = next_input_file();
2758
2759         }
2760
2761         awk_exit(EXIT_SUCCESS);
2762
2763         return 0;
2764 }
2765