8f746b48ce75726e883b5c3564422032d84a5633
[oweals/busybox.git] / editors / awk.c
1 /* vi: set sw=4 ts=4: */
2 /*
3  * awk implementation for busybox
4  *
5  * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20  *
21  */
22
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <unistd.h>
26 #include <errno.h>
27 #include <string.h>
28 #include <time.h>
29 #include <math.h>
30 #include <ctype.h>
31 #include <getopt.h>
32 #include <regex.h>
33
34 #include "busybox.h"
35
36
37 #define MAXVARFMT       240
38 #define MINNVBLOCK      64
39
40 /* variable flags */
41 #define VF_NUMBER       0x0001  /* 1 = primary type is number */
42 #define VF_ARRAY        0x0002  /* 1 = it's an array */
43
44 #define VF_CACHED       0x0100  /* 1 = num/str value has cached str/num eq */
45 #define VF_USER         0x0200  /* 1 = user input (may be numeric string) */
46 #define VF_SPECIAL      0x0400  /* 1 = requires extra handling when changed */
47 #define VF_WALK         0x0800  /* 1 = variable has alloc'd x.walker list */
48 #define VF_FSTR         0x1000  /* 1 = string points to fstring buffer */
49 #define VF_CHILD        0x2000  /* 1 = function arg; x.parent points to source */
50 #define VF_DIRTY        0x4000  /* 1 = variable was set explicitly */
51
52 /* these flags are static, don't change them when value is changed */
53 #define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
54
55 /* Variable */
56 typedef struct var_s {
57         unsigned short type;            /* flags */
58         double number;
59         char *string;
60         union {
61                 int aidx;                               /* func arg index (on compilation stage) */
62                 struct xhash_s *array;  /* array ptr */
63                 struct var_s *parent;   /* for func args, ptr to actual parameter */
64                 char **walker;                  /* list of array elements (for..in) */
65         } x;
66 } var;
67
68 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
69 typedef struct chain_s {
70         struct node_s *first;
71         struct node_s *last;
72         char *programname;
73 } chain;
74
75 /* Function */
76 typedef struct func_s {
77         unsigned short nargs;
78         struct chain_s body;
79 } func;
80
81 /* I/O stream */
82 typedef struct rstream_s {
83         FILE *F;
84         char *buffer;
85         int adv;
86         int size;
87         int pos;
88         unsigned short is_pipe;
89 } rstream;
90
91 typedef struct hash_item_s {
92         union {
93                 struct var_s v;                 /* variable/array hash */
94                 struct rstream_s rs;    /* redirect streams hash */
95                 struct func_s f;                /* functions hash */
96         } data;
97         struct hash_item_s *next;       /* next in chain */
98         char name[1];                           /* really it's longer */
99 } hash_item;
100
101 typedef struct xhash_s {
102         unsigned int nel;                                       /* num of elements */
103         unsigned int csize;                                     /* current hash size */
104         unsigned int nprime;                            /* next hash size in PRIMES[] */
105         unsigned int glen;                                      /* summary length of item names */
106         struct hash_item_s **items;
107 } xhash;
108
109 /* Tree node */
110 typedef struct node_s {
111         unsigned long info;
112         unsigned short lineno;
113         union {
114                 struct node_s *n;
115                 var *v;
116                 int i;
117                 char *s;
118                 regex_t *re;
119         } l;
120         union {
121                 struct node_s *n;
122                 regex_t *ire;
123                 func *f;
124                 int argno;
125         } r;
126         union {
127                 struct node_s *n;
128         } a;
129 } node;
130
131 /* Block of temporary variables */
132 typedef struct nvblock_s {
133         int size;
134         var *pos;
135         struct nvblock_s *prev;
136         struct nvblock_s *next;
137         var nv[0];
138 } nvblock;
139
140 typedef struct tsplitter_s {
141         node n;
142         regex_t re[2];
143 } tsplitter;
144
145 /* simple token classes */
146 /* Order and hex values are very important!!!  See next_token() */
147 #define TC_SEQSTART      1                              /* ( */
148 #define TC_SEQTERM      (1 << 1)                /* ) */
149 #define TC_REGEXP       (1 << 2)                /* /.../ */
150 #define TC_OUTRDR       (1 << 3)                /* | > >> */
151 #define TC_UOPPOST      (1 << 4)                /* unary postfix operator */
152 #define TC_UOPPRE1      (1 << 5)                /* unary prefix operator */
153 #define TC_BINOPX       (1 << 6)                /* two-opnd operator */
154 #define TC_IN           (1 << 7)
155 #define TC_COMMA        (1 << 8)
156 #define TC_PIPE         (1 << 9)                /* input redirection pipe */
157 #define TC_UOPPRE2      (1 << 10)               /* unary prefix operator */
158 #define TC_ARRTERM      (1 << 11)               /* ] */
159 #define TC_GRPSTART     (1 << 12)               /* { */
160 #define TC_GRPTERM      (1 << 13)               /* } */
161 #define TC_SEMICOL      (1 << 14)
162 #define TC_NEWLINE      (1 << 15)
163 #define TC_STATX        (1 << 16)               /* ctl statement (for, next...) */
164 #define TC_WHILE        (1 << 17)
165 #define TC_ELSE         (1 << 18)
166 #define TC_BUILTIN      (1 << 19)
167 #define TC_GETLINE      (1 << 20)
168 #define TC_FUNCDECL     (1 << 21)               /* `function' `func' */
169 #define TC_BEGIN        (1 << 22)
170 #define TC_END          (1 << 23)
171 #define TC_EOF          (1 << 24)
172 #define TC_VARIABLE     (1 << 25)
173 #define TC_ARRAY        (1 << 26)
174 #define TC_FUNCTION     (1 << 27)
175 #define TC_STRING       (1 << 28)
176 #define TC_NUMBER       (1 << 29)
177
178 #define TC_UOPPRE       (TC_UOPPRE1 | TC_UOPPRE2)
179
180 /* combined token classes */
181 #define TC_BINOP        (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
182 #define TC_UNARYOP      (TC_UOPPRE | TC_UOPPOST)
183 #define TC_OPERAND      (TC_VARIABLE | TC_ARRAY | TC_FUNCTION | \
184         TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
185
186 #define TC_STATEMNT     (TC_STATX | TC_WHILE)
187 #define TC_OPTERM       (TC_SEMICOL | TC_NEWLINE)
188
189 /* word tokens, cannot mean something else if not expected */
190 #define TC_WORD         (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN | \
191         TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
192
193 /* discard newlines after these */
194 #define TC_NOTERM       (TC_COMMA | TC_GRPSTART | TC_GRPTERM | \
195         TC_BINOP | TC_OPTERM)
196
197 /* what can expression begin with */
198 #define TC_OPSEQ        (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
199 /* what can group begin with */
200 #define TC_GRPSEQ       (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
201
202 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
203 /* operator is inserted between them */
204 #define TC_CONCAT1      (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM | \
205         TC_STRING | TC_NUMBER | TC_UOPPOST)
206 #define TC_CONCAT2      (TC_OPERAND | TC_UOPPRE)
207
208 #define OF_RES1         0x010000
209 #define OF_RES2         0x020000
210 #define OF_STR1         0x040000
211 #define OF_STR2         0x080000
212 #define OF_NUM1         0x100000
213 #define OF_CHECKED      0x200000
214
215 /* combined operator flags */
216 #define xx      0
217 #define xV      OF_RES2
218 #define xS      (OF_RES2 | OF_STR2)
219 #define Vx      OF_RES1
220 #define VV      (OF_RES1 | OF_RES2)
221 #define Nx      (OF_RES1 | OF_NUM1)
222 #define NV      (OF_RES1 | OF_NUM1 | OF_RES2)
223 #define Sx      (OF_RES1 | OF_STR1)
224 #define SV      (OF_RES1 | OF_STR1 | OF_RES2)
225 #define SS      (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
226
227 #define OPCLSMASK       0xFF00
228 #define OPNMASK         0x007F
229
230 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
231  * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
232  * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
233  */
234 #define P(x)    (x << 24)
235 #define PRIMASK         0x7F000000
236 #define PRIMASK2        0x7E000000
237
238 /* Operation classes */
239
240 #define SHIFT_TIL_THIS  0x0600
241 #define RECUR_FROM_THIS 0x1000
242
243 enum {
244         OC_DELETE=0x0100,       OC_EXEC=0x0200,         OC_NEWSOURCE=0x0300,
245         OC_PRINT=0x0400,        OC_PRINTF=0x0500,       OC_WALKINIT=0x0600,
246
247         OC_BR=0x0700,           OC_BREAK=0x0800,        OC_CONTINUE=0x0900,
248         OC_EXIT=0x0a00,         OC_NEXT=0x0b00,         OC_NEXTFILE=0x0c00,
249         OC_TEST=0x0d00,         OC_WALKNEXT=0x0e00,
250
251         OC_BINARY=0x1000,       OC_BUILTIN=0x1100,      OC_COLON=0x1200,
252         OC_COMMA=0x1300,        OC_COMPARE=0x1400,      OC_CONCAT=0x1500,
253         OC_FBLTIN=0x1600,       OC_FIELD=0x1700,        OC_FNARG=0x1800,
254         OC_FUNC=0x1900,         OC_GETLINE=0x1a00,      OC_IN=0x1b00,
255         OC_LAND=0x1c00,         OC_LOR=0x1d00,          OC_MATCH=0x1e00,
256         OC_MOVE=0x1f00,         OC_PGETLINE=0x2000,     OC_REGEXP=0x2100,
257         OC_REPLACE=0x2200,      OC_RETURN=0x2300,       OC_SPRINTF=0x2400,
258         OC_TERNARY=0x2500,      OC_UNARY=0x2600,        OC_VAR=0x2700,
259         OC_DONE=0x2800,
260
261         ST_IF=0x3000,           ST_DO=0x3100,           ST_FOR=0x3200,
262         ST_WHILE=0x3300
263 };
264
265 /* simple builtins */
266 enum {
267         F_in=0, F_rn,   F_co,   F_ex,   F_lg,   F_si,   F_sq,   F_sr,
268         F_ti,   F_le,   F_sy,   F_ff,   F_cl
269 };
270
271 /* builtins */
272 enum {
273         B_a2=0, B_ix,   B_ma,   B_sp,   B_ss,   B_ti,   B_lo,   B_up,
274         B_ge,   B_gs,   B_su
275 };
276
277 /* tokens and their corresponding info values */
278
279 #define NTC             "\377"          /* switch to next token class (tc<<1) */
280 #define NTCC    '\377'
281
282 #define OC_B    OC_BUILTIN
283
284 static char * const tokenlist =
285         "\1("           NTC
286         "\1)"           NTC
287         "\1/"           NTC                                                                     /* REGEXP */
288         "\2>>"          "\1>"           "\1|"           NTC                     /* OUTRDR */
289         "\2++"          "\2--"          NTC                                             /* UOPPOST */
290         "\2++"          "\2--"          "\1$"           NTC                     /* UOPPRE1 */
291         "\2=="          "\1="           "\2+="          "\2-="          /* BINOPX */
292         "\2*="          "\2/="          "\2%="          "\2^="
293         "\1+"           "\1-"           "\3**="         "\2**"
294         "\1/"           "\1%"           "\1^"           "\1*"
295         "\2!="          "\2>="          "\2<="          "\1>"
296         "\1<"           "\2!~"          "\1~"           "\2&&"
297         "\2||"          "\1?"           "\1:"           NTC
298         "\2in"          NTC
299         "\1,"           NTC
300         "\1|"           NTC
301         "\1+"           "\1-"           "\1!"           NTC                     /* UOPPRE2 */
302         "\1]"           NTC
303         "\1{"           NTC
304         "\1}"           NTC
305         "\1;"           NTC
306         "\1\n"          NTC
307         "\2if"          "\2do"          "\3for"         "\5break"       /* STATX */
308         "\10continue"                   "\6delete"      "\5print"
309         "\6printf"      "\4next"        "\10nextfile"
310         "\6return"      "\4exit"        NTC
311         "\5while"       NTC
312         "\4else"        NTC
313
314         "\5close"       "\6system"      "\6fflush"      "\5atan2"       /* BUILTIN */
315         "\3cos"         "\3exp"         "\3int"         "\3log"
316         "\4rand"        "\3sin"         "\4sqrt"        "\5srand"
317         "\6gensub"      "\4gsub"        "\5index"       "\6length"
318         "\5match"       "\5split"       "\7sprintf"     "\3sub"
319         "\6substr"      "\7systime"     "\10strftime"
320         "\7tolower"     "\7toupper"     NTC
321         "\7getline"     NTC
322         "\4func"        "\10function"   NTC
323         "\5BEGIN"       NTC
324         "\3END"         "\0"
325         ;
326
327 static unsigned long tokeninfo[] = {
328
329         0,
330         0,
331         OC_REGEXP,
332         xS|'a',         xS|'w',         xS|'|',
333         OC_UNARY|xV|P(9)|'p',           OC_UNARY|xV|P(9)|'m',
334         OC_UNARY|xV|P(9)|'P',           OC_UNARY|xV|P(9)|'M',
335                 OC_FIELD|xV|P(5),
336         OC_COMPARE|VV|P(39)|5,          OC_MOVE|VV|P(74),
337                 OC_REPLACE|NV|P(74)|'+',        OC_REPLACE|NV|P(74)|'-',
338         OC_REPLACE|NV|P(74)|'*',        OC_REPLACE|NV|P(74)|'/',
339                 OC_REPLACE|NV|P(74)|'%',        OC_REPLACE|NV|P(74)|'&',
340         OC_BINARY|NV|P(29)|'+',         OC_BINARY|NV|P(29)|'-',
341                 OC_REPLACE|NV|P(74)|'&',        OC_BINARY|NV|P(15)|'&',
342         OC_BINARY|NV|P(25)|'/',         OC_BINARY|NV|P(25)|'%',
343                 OC_BINARY|NV|P(15)|'&',         OC_BINARY|NV|P(25)|'*',
344         OC_COMPARE|VV|P(39)|4,          OC_COMPARE|VV|P(39)|3,
345                 OC_COMPARE|VV|P(39)|0,          OC_COMPARE|VV|P(39)|1,
346         OC_COMPARE|VV|P(39)|2,          OC_MATCH|Sx|P(45)|'!',
347                 OC_MATCH|Sx|P(45)|'~',          OC_LAND|Vx|P(55),
348         OC_LOR|Vx|P(59),                        OC_TERNARY|Vx|P(64)|'?',
349                 OC_COLON|xx|P(67)|':',
350         OC_IN|SV|P(49),
351         OC_COMMA|SS|P(80),
352         OC_PGETLINE|SV|P(37),
353         OC_UNARY|xV|P(19)|'+',          OC_UNARY|xV|P(19)|'-',
354                 OC_UNARY|xV|P(19)|'!',
355         0,
356         0,
357         0,
358         0,
359         0,
360         ST_IF,                  ST_DO,                  ST_FOR,                 OC_BREAK,
361         OC_CONTINUE,                                    OC_DELETE|Vx,   OC_PRINT,
362         OC_PRINTF,              OC_NEXT,                OC_NEXTFILE,
363         OC_RETURN|Vx,   OC_EXIT|Nx,
364         ST_WHILE,
365         0,
366
367         OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
368         OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
369         OC_FBLTIN|F_rn,    OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
370         OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
371         OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF,        OC_B|B_su|P(0xb6),
372         OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti,    OC_B|B_ti|P(0x0b),
373         OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
374         OC_GETLINE|SV|P(0),
375         0,      0,
376         0,
377         0
378 };
379
380 /* internal variable names and their initial values       */
381 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */ 
382 enum {
383         CONVFMT=0,      OFMT,           FS,                     OFS,
384         ORS,            RS,                     RT,                     FILENAME,
385         SUBSEP,         ARGIND,         ARGC,           ARGV,
386         ERRNO,          FNR,
387         NR,                     NF,                     IGNORECASE,
388         ENVIRON,        F0,                     _intvarcount_
389 };
390
391 static char * vNames =
392         "CONVFMT\0"     "OFMT\0"        "FS\0*"         "OFS\0"
393         "ORS\0"         "RS\0*"         "RT\0"          "FILENAME\0"    
394         "SUBSEP\0"      "ARGIND\0"      "ARGC\0"        "ARGV\0"
395         "ERRNO\0"       "FNR\0"
396         "NR\0"          "NF\0*"         "IGNORECASE\0*"
397         "ENVIRON\0"     "$\0*"          "\0";
398
399 static char * vValues =
400         "%.6g\0"        "%.6g\0"        " \0"           " \0"
401         "\n\0"          "\n\0"          "\0"            "\0"
402         "\034\0"
403         "\377";
404
405 /* hash size may grow to these values */
406 #define FIRST_PRIME 61;
407 static const unsigned int PRIMES[] = { 251, 1021, 4093, 16381, 65521 };
408 static const unsigned int NPRIMES = sizeof(PRIMES) / sizeof(unsigned int);
409
410 /* globals */
411
412 extern char **environ;
413
414 static var * V[_intvarcount_];
415 static chain beginseq, mainseq, endseq, *seq;
416 static int nextrec, nextfile;
417 static node *break_ptr, *continue_ptr;
418 static rstream *iF;
419 static xhash *vhash, *ahash, *fdhash, *fnhash;
420 static char *programname;
421 static short lineno;
422 static int is_f0_split;
423 static int nfields = 0;
424 static var *Fields = NULL;
425 static tsplitter fsplitter, rsplitter;
426 static nvblock *cb = NULL;
427 static char *pos;
428 static char *buf;
429 static int icase = FALSE;
430 static int exiting = FALSE;
431
432 static struct {
433         unsigned long tclass;
434         unsigned long info;
435         char *string;
436         double number;
437         short lineno;
438         int rollback;
439 } t;
440
441 /* function prototypes */
442 extern void xregcomp(regex_t *preg, const char *regex, int cflags);
443 static void handle_special(var *);
444 static node *parse_expr(unsigned long);
445 static void chain_group(void);
446 static var *evaluate(node *, var *);
447 static rstream *next_input_file(void);
448 static int fmt_num(char *, int, char *, double, int);
449 static int awk_exit(int);
450
451 /* ---- error handling ---- */
452
453 static const char EMSG_INTERNAL_ERROR[] = "Internal error";
454 static const char EMSG_UNEXP_EOS[] = "Unexpected end of string";
455 static const char EMSG_UNEXP_TOKEN[] = "Unexpected token";
456 static const char EMSG_DIV_BY_ZERO[] = "Division by zero";
457 static const char EMSG_INV_FMT[] = "Invalid format specifier";
458 static const char EMSG_TOO_FEW_ARGS[] = "Too few arguments for builtin";
459 static const char EMSG_NOT_ARRAY[] = "Not an array";
460 static const char EMSG_POSSIBLE_ERROR[] = "Possible syntax error";
461 static const char EMSG_UNDEF_FUNC[] = "Call to undefined function";
462 #ifndef CONFIG_FEATURE_AWK_MATH
463 static const char EMSG_NO_MATH[] = "Math support is not compiled in";
464 #endif
465
466 static void syntax_error(const char * const message)
467 {
468         bb_error_msg("%s:%i: %s", programname, lineno, message);
469         exit(1);
470 }
471
472 #define runtime_error(x) syntax_error(x)
473
474
475 /* ---- hash stuff ---- */
476
477 static unsigned int hashidx(char *name) {
478
479         register unsigned int idx=0;
480
481         while (*name)  idx = *name++ + (idx << 6) - idx;
482         return idx;
483 }
484
485 /* create new hash */
486 static xhash *hash_init(void) {
487
488         xhash *newhash;
489         
490         newhash = (xhash *)xcalloc(1, sizeof(xhash));
491         newhash->csize = FIRST_PRIME;
492         newhash->items = (hash_item **)xcalloc(newhash->csize, sizeof(hash_item *));
493
494         return newhash;
495 }
496
497 /* find item in hash, return ptr to data, NULL if not found */
498 static void *hash_search(xhash *hash, char *name) {
499
500         hash_item *hi;
501
502         hi = hash->items [ hashidx(name) % hash->csize ];
503         while (hi) {
504                 if (strcmp(hi->name, name) == 0)
505                         return &(hi->data);
506                 hi = hi->next;
507         }
508         return NULL;
509 }
510
511 /* grow hash if it becomes too big */
512 static void hash_rebuild(xhash *hash) {
513
514         unsigned int newsize, i, idx;
515         hash_item **newitems, *hi, *thi;
516
517         if (hash->nprime == NPRIMES)
518                 return;
519
520         newsize = PRIMES[hash->nprime++];
521         newitems = (hash_item **)xcalloc(newsize, sizeof(hash_item *));
522
523         for (i=0; i<hash->csize; i++) {
524                 hi = hash->items[i];
525                 while (hi) {
526                         thi = hi;
527                         hi = thi->next;
528                         idx = hashidx(thi->name) % newsize;
529                         thi->next = newitems[idx];
530                         newitems[idx] = thi;
531                 }
532         }
533
534         free(hash->items);
535         hash->csize = newsize;
536         hash->items = newitems;
537 }
538
539 /* find item in hash, add it if necessary. Return ptr to data */
540 static void *hash_find(xhash *hash, char *name) {
541
542         hash_item *hi;
543         unsigned int idx;
544         int l;
545
546         hi = hash_search(hash, name);
547         if (! hi) {
548                 if (++hash->nel / hash->csize > 10)
549                         hash_rebuild(hash);
550
551                 l = bb_strlen(name) + 1;
552                 hi = xcalloc(sizeof(hash_item) + l, 1);
553                 memcpy(hi->name, name, l);
554
555                 idx = hashidx(name) % hash->csize;
556                 hi->next = hash->items[idx];
557                 hash->items[idx] = hi;
558                 hash->glen += l;
559         }
560         return &(hi->data);
561 }
562
563 #define findvar(hash, name) (var *) hash_find ( (hash) , (name) )
564 #define newvar(name) (var *) hash_find ( vhash , (name) )
565 #define newfile(name) (rstream *) hash_find ( fdhash , (name) )
566 #define newfunc(name) (func *) hash_find ( fnhash , (name) )
567
568 static void hash_remove(xhash *hash, char *name) {
569
570         hash_item *hi, **phi;
571
572         phi = &(hash->items[ hashidx(name) % hash->csize ]);
573         while (*phi) {
574                 hi = *phi;
575                 if (strcmp(hi->name, name) == 0) {
576                         hash->glen -= (bb_strlen(name) + 1);
577                         hash->nel--;
578                         *phi = hi->next;
579                         free(hi);
580                         break;
581                 }
582                 phi = &(hi->next);
583         }
584 }
585
586 /* ------ some useful functions ------ */
587
588 static void skip_spaces(char **s) {
589
590         register char *p = *s;
591
592         while(*p == ' ' || *p == '\t' ||
593                                         (*p == '\\' && *(p+1) == '\n' && (++p, ++t.lineno))) {
594                 p++;
595         }
596         *s = p;
597 }
598
599 static char *nextword(char **s) {
600
601         register char *p = *s;
602
603         while (*(*s)++) ;
604
605         return p;
606 }
607
608 static char nextchar(char **s) {
609
610         register char c, *pps;
611
612         c = *((*s)++);
613         pps = *s;
614         if (c == '\\') c = bb_process_escape_sequence((const char**)s);
615         if (c == '\\' && *s == pps) c = *((*s)++);
616         return c;
617 }
618
619 static inline int isalnum_(int c) {
620
621         return (isalnum(c) || c == '_');
622 }
623
624 static FILE *afopen(const char *path, const char *mode) {
625
626         return (*path == '-' && *(path+1) == '\0') ? stdin : bb_xfopen(path, mode);
627 }
628
629 /* -------- working with variables (set/get/copy/etc) -------- */
630
631 static xhash *iamarray(var *v) {
632
633         var *a = v;
634
635         while (a->type & VF_CHILD)
636                 a = a->x.parent;
637
638         if (! (a->type & VF_ARRAY)) {
639                 a->type |= VF_ARRAY;
640                 a->x.array = hash_init();
641         }
642         return a->x.array;
643 }
644
645 static void clear_array(xhash *array) {
646
647         unsigned int i;
648         hash_item *hi, *thi;
649
650         for (i=0; i<array->csize; i++) {
651                 hi = array->items[i];
652                 while (hi) {
653                         thi = hi;
654                         hi = hi->next;
655                         free(thi->data.v.string);
656                         free(thi);
657                 }
658                 array->items[i] = NULL;
659         }
660         array->glen = array->nel = 0;
661 }
662
663 /* clear a variable */
664 static var *clrvar(var *v) {
665
666         if (!(v->type & VF_FSTR))
667                 free(v->string);
668
669         v->type &= VF_DONTTOUCH;
670         v->type |= VF_DIRTY;
671         v->string = NULL;
672         return v;
673 }
674
675 /* assign string value to variable */
676 static var *setvar_p(var *v, char *value) {
677
678         clrvar(v);
679         v->string = value;
680         handle_special(v);
681
682         return v;
683 }
684
685 /* same as setvar_p but make a copy of string */
686 static var *setvar_s(var *v, char *value) {
687
688         return setvar_p(v, (value && *value) ? bb_xstrdup(value) : NULL);
689 }
690
691 /* same as setvar_s but set USER flag */
692 static var *setvar_u(var *v, char *value) {
693
694         setvar_s(v, value);
695         v->type |= VF_USER;
696         return v;
697 }
698
699 /* set array element to user string */
700 static void setari_u(var *a, int idx, char *s) {
701
702         register var *v;
703         static char sidx[12];
704
705         sprintf(sidx, "%d", idx);
706         v = findvar(iamarray(a), sidx);
707         setvar_u(v, s);
708 }
709
710 /* assign numeric value to variable */
711 static var *setvar_i(var *v, double value) {
712
713         clrvar(v);
714         v->type |= VF_NUMBER;
715         v->number = value;
716         handle_special(v);
717         return v;
718 }
719
720 static char *getvar_s(var *v) {
721
722         /* if v is numeric and has no cached string, convert it to string */
723         if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
724                 fmt_num(buf, MAXVARFMT, getvar_s(V[CONVFMT]), v->number, TRUE);
725                 v->string = bb_xstrdup(buf);
726                 v->type |= VF_CACHED;
727         }
728         return (v->string == NULL) ? "" : v->string;
729 }
730
731 static double getvar_i(var *v) {
732
733         char *s;
734
735         if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
736                 v->number = 0;
737                 s = v->string;
738                 if (s && *s) {
739                         v->number = strtod(s, &s);
740                         if (v->type & VF_USER) {
741                                 skip_spaces(&s);
742                                 if (*s != '\0')
743                                         v->type &= ~VF_USER;
744                         }
745                 } else {
746                         v->type &= ~VF_USER;
747                 }
748                 v->type |= VF_CACHED;
749         }
750         return v->number;
751 }
752
753 static var *copyvar(var *dest, var *src) {
754
755         if (dest != src) {
756                 clrvar(dest);
757                 dest->type |= (src->type & ~VF_DONTTOUCH);
758                 dest->number = src->number;
759                 if (src->string)
760                         dest->string = bb_xstrdup(src->string);
761         }
762         handle_special(dest);
763         return dest;
764 }
765
766 static var *incvar(var *v) {
767
768         return setvar_i(v, getvar_i(v)+1.);
769 }
770
771 /* return true if v is number or numeric string */
772 static int is_numeric(var *v) {
773
774         getvar_i(v);
775         return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
776 }
777
778 /* return 1 when value of v corresponds to true, 0 otherwise */
779 static int istrue(var *v) {
780
781         if (is_numeric(v))
782                 return (v->number == 0) ? 0 : 1;
783         else
784                 return (v->string && *(v->string)) ? 1 : 0;
785 }
786
787 /* temporary varables allocator. Last allocated should be first freed */
788 static var *nvalloc(int n) {
789
790         nvblock *pb = NULL;
791         var *v, *r;
792         int size;
793
794         while (cb) {
795                 pb = cb;
796                 if ((cb->pos - cb->nv) + n <= cb->size) break;
797                 cb = cb->next;
798         }
799
800         if (! cb) {
801                 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
802                 cb = (nvblock *)xmalloc(sizeof(nvblock) + size * sizeof(var));
803                 cb->size = size;
804                 cb->pos = cb->nv;
805                 cb->prev = pb;
806                 cb->next = NULL;
807                 if (pb) pb->next = cb;
808         }
809
810         v = r = cb->pos;
811         cb->pos += n;
812
813         while (v < cb->pos) {
814                 v->type = 0;
815                 v->string = NULL;
816                 v++;
817         }
818
819         return r;
820 }
821
822 static void nvfree(var *v) {
823
824         var *p;
825
826         if (v < cb->nv || v >= cb->pos)
827                 runtime_error(EMSG_INTERNAL_ERROR);
828
829         for (p=v; p<cb->pos; p++) {
830                 if ((p->type & (VF_ARRAY|VF_CHILD)) == VF_ARRAY) {
831                         clear_array(iamarray(p));
832                         free(p->x.array->items);
833                         free(p->x.array);
834                 }
835                 if (p->type & VF_WALK)
836                         free(p->x.walker);
837
838                 clrvar(p);
839         }
840
841         cb->pos = v;
842         while (cb->prev && cb->pos == cb->nv) {
843                 cb = cb->prev;
844         }
845 }
846
847 /* ------- awk program text parsing ------- */
848
849 /* Parse next token pointed by global pos, place results into global t.
850  * If token isn't expected, give away. Return token class
851  */
852 static unsigned long next_token(unsigned long expected) {
853
854         char *p, *pp, *s;
855         char *tl;
856         unsigned long tc, *ti;
857         int l;
858         static int concat_inserted = FALSE;
859         static unsigned long save_tclass, save_info;
860         static unsigned long ltclass = TC_OPTERM;
861
862         if (t.rollback) {
863
864                 t.rollback = FALSE;
865
866         } else if (concat_inserted) {
867
868                 concat_inserted = FALSE;
869                 t.tclass = save_tclass;
870                 t.info = save_info;
871
872         } else {
873
874                 p = pos;
875
876         readnext:
877                 skip_spaces(&p);
878                 lineno = t.lineno;
879                 if (*p == '#')
880                         while (*p != '\n' && *p != '\0') p++;
881
882                 if (*p == '\n')
883                         t.lineno++;
884
885                 if (*p == '\0') {
886                         tc = TC_EOF;
887
888                 } else if (*p == '\"') {
889                         /* it's a string */
890                         t.string = s = ++p;
891                         while (*p != '\"') {
892                                 if (*p == '\0' || *p == '\n')
893                                         syntax_error(EMSG_UNEXP_EOS);
894                                 *(s++) = nextchar(&p);
895                         }
896                         p++;
897                         *s = '\0';
898                         tc = TC_STRING;
899
900                 } else if ((expected & TC_REGEXP) && *p == '/') {
901                         /* it's regexp */
902                         t.string = s = ++p;
903                         while (*p != '/') {
904                                 if (*p == '\0' || *p == '\n')
905                                         syntax_error(EMSG_UNEXP_EOS);
906                                 if ((*s++ = *p++) == '\\') {
907                                         pp = p;
908                                         *(s-1) = bb_process_escape_sequence((const char **)&p);
909                                         if (*pp == '\\') *s++ = '\\';
910                                         if (p == pp) *s++ = *p++;
911                                 }
912                         }
913                         p++;
914                         *s = '\0';
915                         tc = TC_REGEXP;
916
917                 } else if (*p == '.' || isdigit(*p)) {
918                         /* it's a number */
919                         t.number = strtod(p, &p);
920                         if (*p == '.')
921                                 syntax_error(EMSG_UNEXP_TOKEN);
922                         tc = TC_NUMBER;
923
924                 } else {
925                         /* search for something known */
926                         tl = tokenlist;
927                         tc = 0x00000001;
928                         ti = tokeninfo;
929                         while (*tl) {
930                                 l = *(tl++);
931                                 if (l == NTCC) {
932                                         tc <<= 1;
933                                         continue;
934                                 }
935                                 /* if token class is expected, token
936                                  * matches and it's not a longer word,
937                                  * then this is what we are looking for
938                                  */
939                                 if ((tc & (expected | TC_WORD | TC_NEWLINE)) &&
940                                 *tl == *p && strncmp(p, tl, l) == 0 &&
941                                 !((tc & TC_WORD) && isalnum_(*(p + l)))) {
942                                         t.info = *ti;
943                                         p += l;
944                                         break;
945                                 }
946                                 ti++;
947                                 tl += l;
948                         }
949
950                         if (! *tl) {
951                                 /* it's a name (var/array/function),
952                                  * otherwise it's something wrong
953                                  */
954                                 if (! isalnum_(*p))
955                                         syntax_error(EMSG_UNEXP_TOKEN);
956
957                                 t.string = --p;
958                                 while(isalnum_(*(++p))) {
959                                         *(p-1) = *p;
960                                 }
961                                 *(p-1) = '\0';
962                                 tc = TC_VARIABLE;
963                                 if (*p == '(') {
964                                         tc = TC_FUNCTION;
965                                 } else {
966                                         skip_spaces(&p);
967                                         if (*p == '[') {
968                                                 p++;
969                                                 tc = TC_ARRAY;
970                                         }
971                                 }
972                         }
973                 }
974                 pos = p;
975
976                 /* skipping newlines in some cases */
977                 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
978                         goto readnext;
979
980                 /* insert concatenation operator when needed */
981                 if ((ltclass&TC_CONCAT1) && (tc&TC_CONCAT2) && (expected&TC_BINOP)) {
982                         concat_inserted = TRUE;
983                         save_tclass = tc;
984                         save_info = t.info;
985                         tc = TC_BINOP;
986                         t.info = OC_CONCAT | SS | P(35);
987                 }
988
989                 t.tclass = tc;
990         }
991         ltclass = t.tclass;
992
993         /* Are we ready for this? */
994         if (! (ltclass & expected))
995                 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
996                                                                 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
997
998         return ltclass;
999 }
1000
1001 static void rollback_token(void) { t.rollback = TRUE; }
1002
1003 static node *new_node(unsigned long info) {
1004
1005         register node *n;
1006
1007         n = (node *)xcalloc(sizeof(node), 1);
1008         n->info = info;
1009         n->lineno = lineno;
1010         return n;
1011 }
1012
1013 static node *mk_re_node(char *s, node *n, regex_t *re) {
1014
1015         n->info = OC_REGEXP;
1016         n->l.re = re;
1017         n->r.ire = re + 1;
1018         xregcomp(re, s, REG_EXTENDED);
1019         xregcomp(re+1, s, REG_EXTENDED | REG_ICASE);
1020
1021         return n;
1022 }
1023
1024 static node *condition(void) {
1025
1026         next_token(TC_SEQSTART);
1027         return parse_expr(TC_SEQTERM);
1028 }
1029
1030 /* parse expression terminated by given argument, return ptr
1031  * to built subtree. Terminator is eaten by parse_expr */
1032 static node *parse_expr(unsigned long iexp) {
1033
1034         node sn;
1035         node *cn = &sn;
1036         node *vn, *glptr;
1037         unsigned long tc, xtc;
1038         var *v;
1039
1040         sn.info = PRIMASK;
1041         sn.r.n = glptr = NULL;
1042         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1043
1044         while (! ((tc = next_token(xtc)) & iexp)) {
1045                 if (glptr && (t.info == (OC_COMPARE|VV|P(39)|2))) {
1046                         /* input redirection (<) attached to glptr node */
1047                         cn = glptr->l.n = new_node(OC_CONCAT|SS|P(37));
1048                         xtc = TC_OPERAND | TC_UOPPRE;
1049                         glptr = NULL;
1050
1051                 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1052                         /* for binary and postfix-unary operators, jump back over
1053                          * previous operators with higher priority */
1054                         vn = cn;
1055                         while ( ((t.info & PRIMASK) > (vn->a.n->info & PRIMASK2)) || 
1056                           ((t.info == vn->info) && ((t.info & OPCLSMASK) == OC_COLON)) )
1057                                 vn = vn->a.n;
1058                         if ((t.info & OPCLSMASK) == OC_TERNARY)
1059                                 t.info += P(6);
1060                         cn = vn->a.n->r.n = new_node(t.info);
1061                         cn->a.n = vn->a.n;
1062                         if (tc & TC_BINOP) {
1063                                 cn->l.n = vn;
1064                                 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1065                                 if ((t.info & OPCLSMASK) == OC_PGETLINE) {
1066                                         /* it's a pipe */
1067                                         next_token(TC_GETLINE);
1068                                         /* give maximum priority to this pipe */
1069                                         cn->info &= ~PRIMASK;
1070                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1071                                 }
1072                         } else {
1073                                 cn->r.n = vn;
1074                                 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1075                         }
1076                         vn->a.n = cn;
1077
1078                 } else {
1079                         /* for operands and prefix-unary operators, attach them
1080                          * to last node */
1081                         vn = cn;
1082                         cn = vn->r.n = new_node(t.info);
1083                         cn->a.n = vn;
1084                         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1085                         if (tc & (TC_OPERAND | TC_REGEXP)) {
1086                                 xtc = TC_UOPPRE | TC_BINOP | TC_OPERAND | iexp;
1087                                 /* one should be very careful with switch on tclass - 
1088                                  * only simple tclasses should be used! */
1089                                 switch (tc) {
1090                                   case TC_VARIABLE:
1091                                   case TC_ARRAY:
1092                                         cn->info = OC_VAR;
1093                                         if ((v = hash_search(ahash, t.string)) != NULL) {
1094                                                 cn->info = OC_FNARG;
1095                                                 cn->l.i = v->x.aidx;
1096                                         } else {
1097                                                 cn->l.v = newvar(t.string);
1098                                         }
1099                                         if (tc & TC_ARRAY) {
1100                                                 cn->info |= xS;
1101                                                 cn->r.n = parse_expr(TC_ARRTERM);
1102                                         }
1103                                         xtc = TC_UOPPOST | TC_UOPPRE | TC_BINOP | TC_OPERAND | iexp;
1104                                         break;
1105                                         
1106                                   case TC_NUMBER:
1107                                   case TC_STRING:
1108                                         cn->info = OC_VAR;
1109                                         v = cn->l.v = xcalloc(sizeof(var), 1);
1110                                         if (tc & TC_NUMBER)
1111                                                 setvar_i(v, t.number);
1112                                         else
1113                                                 setvar_s(v, t.string);
1114                                         break;
1115
1116                                   case TC_REGEXP:
1117                                         mk_re_node(t.string, cn,
1118                                                                         (regex_t *)xcalloc(sizeof(regex_t),2));
1119                                         break;
1120
1121                                   case TC_FUNCTION:
1122                                         cn->info = OC_FUNC;
1123                                         cn->r.f = newfunc(t.string);
1124                                         cn->l.n = condition();
1125                                         break;
1126
1127                                   case TC_SEQSTART:
1128                                         cn = vn->r.n = parse_expr(TC_SEQTERM);
1129                                         cn->a.n = vn;
1130                                         break;
1131
1132                                   case TC_GETLINE:
1133                                         glptr = cn;
1134                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1135                                         break;
1136
1137                                   case TC_BUILTIN:
1138                                         cn->l.n = condition();
1139                                         break;
1140                                 }
1141                         }
1142                 }
1143         }
1144         return sn.r.n;
1145 }
1146
1147 /* add node to chain. Return ptr to alloc'd node */
1148 static node *chain_node(unsigned long info) {
1149
1150         register node *n;
1151
1152         if (! seq->first)
1153                 seq->first = seq->last = new_node(0);
1154
1155         if (seq->programname != programname) {
1156                 seq->programname = programname;
1157                 n = chain_node(OC_NEWSOURCE);
1158                 n->l.s = bb_xstrdup(programname);
1159         }
1160
1161         n = seq->last;
1162         n->info = info;
1163         seq->last = n->a.n = new_node(OC_DONE);
1164
1165         return n;
1166 }
1167
1168 static void chain_expr(unsigned long info) {
1169
1170         node *n;
1171
1172         n = chain_node(info);
1173         n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1174         if (t.tclass & TC_GRPTERM)
1175                 rollback_token();
1176 }
1177
1178 static node *chain_loop(node *nn) {
1179
1180         node *n, *n2, *save_brk, *save_cont;
1181
1182         save_brk = break_ptr;
1183         save_cont = continue_ptr;
1184
1185         n = chain_node(OC_BR | Vx);
1186         continue_ptr = new_node(OC_EXEC);
1187         break_ptr = new_node(OC_EXEC);
1188         chain_group();
1189         n2 = chain_node(OC_EXEC | Vx);
1190         n2->l.n = nn;
1191         n2->a.n = n;
1192         continue_ptr->a.n = n2;
1193         break_ptr->a.n = n->r.n = seq->last;
1194
1195         continue_ptr = save_cont;
1196         break_ptr = save_brk;
1197
1198         return n;
1199 }
1200
1201 /* parse group and attach it to chain */
1202 static void chain_group(void) {
1203
1204         unsigned long c;
1205         node *n, *n2, *n3;
1206
1207         do {
1208                 c = next_token(TC_GRPSEQ);
1209         } while (c & TC_NEWLINE);
1210
1211         if (c & TC_GRPSTART) {
1212                 while(next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1213                         rollback_token();
1214                         chain_group();
1215                 }
1216         } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1217                 rollback_token();
1218                 chain_expr(OC_EXEC | Vx);
1219         } else {                                                /* TC_STATEMNT */
1220                 switch (t.info & OPCLSMASK) {
1221                         case ST_IF:
1222                                 n = chain_node(OC_BR | Vx);
1223                                 n->l.n = condition();
1224                                 chain_group();
1225                                 n2 = chain_node(OC_EXEC);
1226                                 n->r.n = seq->last;
1227                                 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE)==TC_ELSE) {
1228                                         chain_group();
1229                                         n2->a.n = seq->last;
1230                                 } else {
1231                                         rollback_token();
1232                                 }
1233                                 break;
1234
1235                         case ST_WHILE:
1236                                 n2 = condition();
1237                                 n = chain_loop(NULL);
1238                                 n->l.n = n2;
1239                                 break;
1240
1241                         case ST_DO:
1242                                 n2 = chain_node(OC_EXEC);
1243                                 n = chain_loop(NULL);
1244                                 n2->a.n = n->a.n;
1245                                 next_token(TC_WHILE);
1246                                 n->l.n = condition();
1247                                 break;
1248
1249                         case ST_FOR:
1250                                 next_token(TC_SEQSTART);
1251                                 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1252                                 if (t.tclass & TC_SEQTERM) {                            /* for-in */
1253                                         if ((n2->info & OPCLSMASK) != OC_IN)
1254                                                 syntax_error(EMSG_UNEXP_TOKEN);
1255                                         n = chain_node(OC_WALKINIT | VV);
1256                                         n->l.n = n2->l.n;
1257                                         n->r.n = n2->r.n;
1258                                         n = chain_loop(NULL);
1259                                         n->info = OC_WALKNEXT | Vx;
1260                                         n->l.n = n2->l.n;
1261                                 } else {                                                                        /* for(;;) */
1262                                         n = chain_node(OC_EXEC | Vx);
1263                                         n->l.n = n2;
1264                                         n2 = parse_expr(TC_SEMICOL);
1265                                         n3 = parse_expr(TC_SEQTERM);
1266                                         n = chain_loop(n3);
1267                                         n->l.n = n2;
1268                                         if (! n2)
1269                                                 n->info = OC_EXEC;
1270                                 }
1271                                 break;
1272
1273                         case OC_PRINT:
1274                         case OC_PRINTF:
1275                                 n = chain_node(t.info);
1276                                 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1277                                 if (t.tclass & TC_OUTRDR) {
1278                                         n->info |= t.info;
1279                                         n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1280                                 }
1281                                 if (t.tclass & TC_GRPTERM)
1282                                         rollback_token();
1283                                 break;
1284
1285                         case OC_BREAK:
1286                                 n = chain_node(OC_EXEC);
1287                                 n->a.n = break_ptr;
1288                                 break;
1289
1290                         case OC_CONTINUE:
1291                                 n = chain_node(OC_EXEC);
1292                                 n->a.n = continue_ptr;
1293                                 break;
1294
1295                         /* delete, next, nextfile, return, exit */
1296                         default:
1297                                 chain_expr(t.info);
1298
1299                 }
1300         }
1301 }
1302
1303 static void parse_program(char *p) {
1304
1305         unsigned long tclass;
1306         node *cn;
1307         func *f;
1308         var *v;
1309
1310         pos = p;
1311         t.lineno = 1;
1312         while((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1313                                 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1314
1315                 if (tclass & TC_OPTERM)
1316                         continue;
1317
1318                 seq = &mainseq;
1319                 if (tclass & TC_BEGIN) {
1320                         seq = &beginseq;
1321                         chain_group();
1322
1323                 } else if (tclass & TC_END) {
1324                         seq = &endseq;
1325                         chain_group();
1326
1327                 } else if (tclass & TC_FUNCDECL) {
1328                         next_token(TC_FUNCTION);
1329                         pos++;
1330                         f = newfunc(t.string);
1331                         f->body.first = NULL;
1332                         f->nargs = 0;
1333                         while(next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1334                                 v = findvar(ahash, t.string);
1335                                 v->x.aidx = (f->nargs)++;
1336
1337                                 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1338                                         break;
1339                         }
1340                         seq = &(f->body);
1341                         chain_group();
1342                         clear_array(ahash);
1343
1344                 } else if (tclass & TC_OPSEQ) {
1345                         rollback_token();
1346                         cn = chain_node(OC_TEST);
1347                         cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1348                         if (t.tclass & TC_GRPSTART) {
1349                                 rollback_token();
1350                                 chain_group();
1351                         } else {
1352                                 chain_node(OC_PRINT);
1353                         }
1354                         cn->r.n = mainseq.last;
1355
1356                 } else /* if (tclass & TC_GRPSTART) */ {
1357                         rollback_token();
1358                         chain_group();
1359                 }
1360         }
1361 }
1362
1363
1364 /* -------- program execution part -------- */
1365
1366 static node *mk_splitter(char *s, tsplitter *spl) {
1367
1368         register regex_t *re, *ire;
1369         node *n;
1370
1371         re = &spl->re[0];
1372         ire = &spl->re[1];
1373         n = &spl->n;
1374         if ((n->info && OPCLSMASK) == OC_REGEXP) {
1375                 regfree(re);
1376                 regfree(ire);
1377         }
1378         if (bb_strlen(s) > 1) {
1379                 mk_re_node(s, n, re);
1380         } else {
1381                 n->info = (unsigned long) *s;
1382         }
1383
1384         return n;
1385 }
1386
1387 /* use node as a regular expression. Supplied with node ptr and regex_t
1388  * storage space. Return ptr to regex (if result points to preg, it shuold
1389  * be later regfree'd manually
1390  */
1391 static regex_t *as_regex(node *op, regex_t *preg) {
1392
1393         var *v;
1394         char *s;
1395
1396         if ((op->info & OPCLSMASK) == OC_REGEXP) {
1397                 return icase ? op->r.ire : op->l.re;
1398         } else {
1399                 v = nvalloc(1);
1400                 s = getvar_s(evaluate(op, v));
1401                 xregcomp(preg, s, icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED);
1402                 nvfree(v);
1403                 return preg;
1404         }
1405 }
1406
1407 /* gradually increasing buffer */
1408 static void qrealloc(char **b, int n, int *size) {
1409
1410         if (! *b || n >= *size)
1411                 *b = xrealloc(*b, *size = n + (n>>1) + 80);
1412 }
1413
1414 /* resize field storage space */
1415 static void fsrealloc(int size) {
1416
1417         static int maxfields = 0;
1418         int i;
1419
1420         if (size >= maxfields) {
1421                 i = maxfields;
1422                 maxfields = size + 16;
1423                 Fields = (var *)xrealloc(Fields, maxfields * sizeof(var));
1424                 for (; i<maxfields; i++) {
1425                         Fields[i].type = VF_SPECIAL;
1426                         Fields[i].string = NULL;
1427                 }
1428         }
1429
1430         if (size < nfields) {
1431                 for (i=size; i<nfields; i++) {
1432                         clrvar(Fields+i);
1433                 }
1434         }
1435         nfields = size;
1436 }
1437
1438 static int awk_split(char *s, node *spl, char **slist) {
1439
1440         int l, n=0;
1441         char c[4];
1442         char *s1;
1443         regmatch_t pmatch[2];
1444
1445         /* in worst case, each char would be a separate field */
1446         *slist = s1 = bb_xstrndup(s, bb_strlen(s) * 2 + 3);
1447
1448         c[0] = c[1] = (char)spl->info;
1449         c[2] = c[3] = '\0';
1450         if (*getvar_s(V[RS]) == '\0') c[2] = '\n';
1451
1452         if ((spl->info & OPCLSMASK) == OC_REGEXP) {             /* regex split */
1453                 while (*s) {
1454                         l = strcspn(s, c+2);
1455                         if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0 &&
1456                         pmatch[0].rm_so <= l) {
1457                                 l = pmatch[0].rm_so;
1458                                 if (pmatch[0].rm_eo == 0) { l++; pmatch[0].rm_eo++; }
1459                         } else {
1460                                 pmatch[0].rm_eo = l;
1461                                 if (*(s+l)) pmatch[0].rm_eo++;
1462                         }
1463
1464                         memcpy(s1, s, l);
1465                         *(s1+l) = '\0';
1466                         nextword(&s1);
1467                         s += pmatch[0].rm_eo;
1468                         n++;
1469                 }
1470         } else if (c[0] == '\0') {              /* null split */
1471                 while(*s) {
1472                         *(s1++) = *(s++);
1473                         *(s1++) = '\0';
1474                         n++;
1475                 }
1476         } else if (c[0] != ' ') {               /* single-character split */
1477                 if (icase) {
1478                         c[0] = toupper(c[0]);
1479                         c[1] = tolower(c[1]);
1480                 }
1481                 if (*s1) n++;
1482                 while ((s1 = strpbrk(s1, c))) {
1483                         *(s1++) = '\0';
1484                         n++;
1485                 }
1486         } else {                                /* space split */
1487                 while (*s) {
1488                         while (isspace(*s)) s++;
1489                         if (! *s) break;
1490                         n++;
1491                         while (*s && !isspace(*s))
1492                                 *(s1++) = *(s++);
1493                         *(s1++) = '\0';
1494                 }
1495         }
1496         return n;
1497 }
1498
1499 static void split_f0(void) {
1500
1501         static char *fstrings = NULL;
1502         int i, n;
1503         char *s;
1504
1505         if (is_f0_split)
1506                 return;
1507
1508         is_f0_split = TRUE;
1509         free(fstrings);
1510         fsrealloc(0);
1511         n = awk_split(getvar_s(V[F0]), &fsplitter.n, &fstrings);
1512         fsrealloc(n);
1513         s = fstrings;
1514         for (i=0; i<n; i++) {
1515                 Fields[i].string = nextword(&s);
1516                 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1517         }
1518
1519         /* set NF manually to avoid side effects */
1520         clrvar(V[NF]);
1521         V[NF]->type = VF_NUMBER | VF_SPECIAL;
1522         V[NF]->number = nfields;
1523 }
1524
1525 /* perform additional actions when some internal variables changed */
1526 static void handle_special(var *v) {
1527
1528         int n;
1529         char *b, *sep, *s;
1530         int sl, l, len, i, bsize;
1531
1532         if (! (v->type & VF_SPECIAL))
1533                 return;
1534
1535         if (v == V[NF]) {
1536                 n = (int)getvar_i(v);
1537                 fsrealloc(n);
1538
1539                 /* recalculate $0 */
1540                 sep = getvar_s(V[OFS]);
1541                 sl = bb_strlen(sep);
1542                 b = NULL;
1543                 len = 0;
1544                 for (i=0; i<n; i++) {
1545                         s = getvar_s(&Fields[i]);
1546                         l = bb_strlen(s);
1547                         if (b) {
1548                                 memcpy(b+len, sep, sl);
1549                                 len += sl;
1550                         }
1551                         qrealloc(&b, len+l+sl, &bsize);
1552                         memcpy(b+len, s, l);
1553                         len += l;
1554                 }
1555                 b[len] = '\0';
1556                 setvar_p(V[F0], b);
1557                 is_f0_split = TRUE;
1558
1559         } else if (v == V[F0]) {
1560                 is_f0_split = FALSE;
1561
1562         } else if (v == V[FS]) {
1563                 mk_splitter(getvar_s(v), &fsplitter);
1564
1565         } else if (v == V[RS]) {
1566                 mk_splitter(getvar_s(v), &rsplitter);
1567
1568         } else if (v == V[IGNORECASE]) {
1569                 icase = istrue(v);
1570
1571         } else {                                                /* $n */
1572                 n = getvar_i(V[NF]);
1573                 setvar_i(V[NF], n > v-Fields ? n : v-Fields+1);
1574                 /* right here v is invalid. Just to note... */
1575         }
1576 }
1577
1578 /* step through func/builtin/etc arguments */
1579 static node *nextarg(node **pn) {
1580
1581         node *n;
1582
1583         n = *pn;
1584         if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1585                 *pn = n->r.n;
1586                 n = n->l.n;
1587         } else {
1588                 *pn = NULL;
1589         }
1590         return n;
1591 }
1592
1593 static void hashwalk_init(var *v, xhash *array) {
1594
1595         char **w;
1596         hash_item *hi;
1597         int i;
1598
1599         if (v->type & VF_WALK)
1600                 free(v->x.walker);
1601
1602         v->type |= VF_WALK;
1603         w = v->x.walker = (char **)xcalloc(2 + 2*sizeof(char *) + array->glen, 1);
1604         *w = *(w+1) = (char *)(w + 2);
1605         for (i=0; i<array->csize; i++) {
1606                 hi = array->items[i];
1607                 while(hi) {
1608                         strcpy(*w, hi->name);
1609                         nextword(w);
1610                         hi = hi->next;
1611                 }
1612         }
1613 }
1614
1615 static int hashwalk_next(var *v) {
1616
1617         char **w;
1618
1619         w = v->x.walker;
1620         if (*(w+1) == *w)
1621                 return FALSE;
1622
1623         setvar_s(v, nextword(w+1));
1624         return TRUE;
1625 }
1626
1627 /* evaluate node, return 1 when result is true, 0 otherwise */
1628 static int ptest(node *pattern) {
1629         static var v;
1630
1631         return istrue(evaluate(pattern, &v));
1632 }
1633
1634 /* read next record from stream rsm into a variable v */
1635 static int awk_getline(rstream *rsm, var *v) {
1636
1637         char *b;
1638         regmatch_t pmatch[2];
1639         int a, p, pp=0, size;
1640         int fd, so, eo, r, rp;
1641         char c, *m, *s;
1642
1643         /* we're using our own buffer since we need access to accumulating
1644          * characters
1645          */
1646         fd = fileno(rsm->F);
1647         m = rsm->buffer;
1648         a = rsm->adv;
1649         p = rsm->pos;
1650         size = rsm->size;
1651         c = (char) rsplitter.n.info;
1652         rp = 0;
1653
1654         if (! m) qrealloc(&m, 256, &size);
1655         do {
1656                 b = m + a;
1657                 so = eo = p;
1658                 r = 1;
1659                 if (p > 0) {
1660                         if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1661                                 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1662                                                                                                 b, 1, pmatch, 0) == 0) {
1663                                         so = pmatch[0].rm_so;
1664                                         eo = pmatch[0].rm_eo;
1665                                         if (b[eo] != '\0')
1666                                                 break;
1667                                 }
1668                         } else if (c != '\0') {
1669                                 s = strchr(b+pp, c);
1670                                 if (s) {
1671                                         so = eo = s-b;
1672                                         eo++;
1673                                         break;
1674                                 }
1675                         } else {
1676                                 while (b[rp] == '\n')
1677                                         rp++;
1678                                 s = strstr(b+rp, "\n\n");
1679                                 if (s) {
1680                                         so = eo = s-b;
1681                                         while (b[eo] == '\n') eo++;
1682                                         if (b[eo] != '\0')
1683                                                 break;
1684                                 }
1685                         }
1686                 }
1687
1688                 if (a > 0) {
1689                         memmove(m, (const void *)(m+a), p+1);
1690                         b = m;
1691                         a = 0;
1692                 }
1693
1694                 qrealloc(&m, a+p+128, &size);
1695                 b = m + a;
1696                 pp = p;
1697                 p += safe_read(fd, b+p, size-p-1);
1698                 if (p < pp) {
1699                         p = 0;
1700                         r = 0;
1701                         setvar_i(V[ERRNO], errno);
1702                 }
1703                 b[p] = '\0';
1704
1705         } while (p > pp);
1706
1707         if (p == 0) {
1708                 r--;
1709         } else {
1710                 c = b[so]; b[so] = '\0';
1711                 setvar_s(v, b+rp);
1712                 v->type |= VF_USER;
1713                 b[so] = c;
1714                 c = b[eo]; b[eo] = '\0';
1715                 setvar_s(V[RT], b+so);
1716                 b[eo] = c;
1717         }
1718
1719         rsm->buffer = m;
1720         rsm->adv = a + eo;
1721         rsm->pos = p - eo;
1722         rsm->size = size;
1723
1724         return r;
1725 }
1726
1727 static int fmt_num(char *b, int size, char *format, double n, int int_as_int) {
1728
1729         int r=0;
1730         char c, *s=format;
1731
1732         if (int_as_int && n == (int)n) {
1733                 r = snprintf(b, size, "%d", (int)n);
1734         } else {
1735                 do { c = *s; } while (*s && *++s);
1736                 if (strchr("diouxX", c)) {
1737                         r = snprintf(b, size, format, (int)n);
1738                 } else if (strchr("eEfgG", c)) {
1739                         r = snprintf(b, size, format, n);
1740                 } else {
1741                         runtime_error(EMSG_INV_FMT);
1742                 }
1743         }
1744         return r;
1745 }
1746
1747
1748 /* formatted output into an allocated buffer, return ptr to buffer */
1749 static char *awk_printf(node *n) {
1750
1751         char *b = NULL;
1752         char *fmt, *s, *s1, *f;
1753         int i, j, incr, bsize;
1754         char c, c1;
1755         var *v, *arg;
1756
1757         v = nvalloc(1);
1758         fmt = f = bb_xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1759
1760         i = 0;
1761         while (*f) {
1762                 s = f;
1763                 while (*f && (*f != '%' || *(++f) == '%'))
1764                         f++;
1765                 while (*f && !isalpha(*f)) 
1766                         f++;
1767
1768                 incr = (f - s) + MAXVARFMT;
1769                 qrealloc(&b, incr+i, &bsize);
1770                 c = *f; if (c != '\0') f++;
1771                 c1 = *f ; *f = '\0';
1772                 arg = evaluate(nextarg(&n), v);
1773
1774                 j = i;
1775                 if (c == 'c' || !c) {
1776                         i += sprintf(b+i, s,
1777                                         is_numeric(arg) ? (char)getvar_i(arg) : *getvar_s(arg));
1778
1779                 } else if (c == 's') {
1780                     s1 = getvar_s(arg);
1781                         qrealloc(&b, incr+i+bb_strlen(s1), &bsize);
1782                         i += sprintf(b+i, s, s1);
1783
1784                 } else {
1785                         i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1786                 }
1787                 *f = c1;
1788
1789                 /* if there was an error while sprintf, return value is negative */
1790                 if (i < j) i = j;
1791
1792         }
1793
1794         b = xrealloc(b, i+1);
1795         free(fmt);
1796         nvfree(v);
1797         b[i] = '\0';
1798         return b;
1799 }
1800
1801 /* common substitution routine
1802  * replace (nm) substring of (src) that match (n) with (repl), store
1803  * result into (dest), return number of substitutions. If nm=0, replace
1804  * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1805  * subexpression matching (\1-\9)
1806  */
1807 static int awk_sub(node *rn, char *repl, int nm, var *src, var *dest, int ex) {
1808
1809         char *ds = NULL;
1810         char *sp, *s;
1811         int c, i, j, di, rl, so, eo, nbs, n, dssize;
1812         regmatch_t pmatch[10];
1813         regex_t sreg, *re;
1814
1815         re = as_regex(rn, &sreg);
1816         if (! src) src = V[F0];
1817         if (! dest) dest = V[F0];
1818
1819         i = di = 0;
1820         sp = getvar_s(src);
1821         rl = bb_strlen(repl);
1822         while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0:REG_NOTBOL) == 0) {
1823                 so = pmatch[0].rm_so;
1824                 eo = pmatch[0].rm_eo;
1825
1826                 qrealloc(&ds, di + eo + rl, &dssize);
1827                 memcpy(ds + di, sp, eo);
1828                 di += eo;
1829                 if (++i >= nm) {
1830                         /* replace */
1831                         di -= (eo - so);
1832                         nbs = 0;
1833                         for (s = repl; *s; s++) {
1834                                 ds[di++] = c = *s;
1835                                 if (c == '\\') {
1836                                         nbs++;
1837                                         continue;
1838                                 }
1839                                 if (c == '&' || (ex && c >= '0' && c <= '9')) {
1840                                         di -= ((nbs + 3) >> 1);
1841                                         j = 0;
1842                                         if (c != '&') {
1843                                                 j = c - '0';
1844                                                 nbs++;
1845                                         }
1846                                         if (nbs % 2) {
1847                                                 ds[di++] = c;
1848                                         } else {
1849                                                 n = pmatch[j].rm_eo - pmatch[j].rm_so;
1850                                                 qrealloc(&ds, di + rl + n, &dssize);
1851                                                 memcpy(ds + di, sp + pmatch[j].rm_so, n);
1852                                                 di += n;
1853                                         }
1854                                 }
1855                                 nbs = 0;
1856                         }
1857                 }
1858
1859                 sp += eo;
1860                 if (i == nm) break;
1861                 if (eo == so) {
1862                         if (! (ds[di++] = *sp++)) break;
1863                 }
1864         }
1865
1866         qrealloc(&ds, di + strlen(sp), &dssize);
1867         strcpy(ds + di, sp);
1868         setvar_p(dest, ds);
1869         if (re == &sreg) regfree(re);
1870         return i;
1871 }
1872
1873 static var *exec_builtin(node *op, var *res) {
1874
1875         int (*to_xxx)(int);
1876         var *tv;
1877         node *an[4];
1878         var  *av[4];
1879         char *as[4];
1880         regmatch_t pmatch[2];
1881         regex_t sreg, *re;
1882         static tsplitter tspl;
1883         node *spl;
1884         unsigned long isr, info;
1885         int nargs;
1886         time_t tt;
1887         char *s, *s1;
1888         int i, l, ll, n;
1889
1890         tv = nvalloc(4);
1891         isr = info = op->info;
1892         op = op->l.n;
1893
1894         av[2] = av[3] = NULL;
1895         for (i=0 ; i<4 && op ; i++) {
1896                 an[i] = nextarg(&op);
1897                 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
1898                 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
1899                 isr >>= 1;
1900         }
1901
1902         nargs = i;
1903         if (nargs < (info >> 30))
1904                 runtime_error(EMSG_TOO_FEW_ARGS);
1905
1906         switch (info & OPNMASK) {
1907
1908           case B_a2:
1909 #ifdef CONFIG_FEATURE_AWK_MATH
1910                 setvar_i(res, atan2(getvar_i(av[i]), getvar_i(av[1])));
1911 #else
1912                 runtime_error(EMSG_NO_MATH);
1913 #endif
1914                 break;
1915
1916           case B_sp:
1917                 if (nargs > 2) {
1918                         spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
1919                                 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
1920                 } else {
1921                         spl = &fsplitter.n;
1922                 }
1923
1924                 n = awk_split(as[0], spl, &s);
1925                 s1 = s;
1926                 clear_array(iamarray(av[1]));
1927                 for (i=1; i<=n; i++)
1928                         setari_u(av[1], i, nextword(&s1));
1929                 free(s);
1930                 setvar_i(res, n);
1931                 break;
1932
1933           case B_ss:
1934                 l = bb_strlen(as[0]);
1935                 i = getvar_i(av[1]) - 1;
1936                 if (i>l) i=l; if (i<0) i=0;
1937                 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
1938                 if (n<0) n=0;
1939                 s = xmalloc(n+1);
1940                 strncpy(s, as[0]+i, n);
1941                 s[n] = '\0';
1942                 setvar_p(res, s);
1943                 break;
1944
1945           case B_lo:
1946                 to_xxx = tolower;
1947                 goto lo_cont;
1948
1949           case B_up:
1950                 to_xxx = toupper;
1951 lo_cont:
1952                 s1 = s = bb_xstrdup(as[0]);
1953                 while (*s1) {
1954                         *s1 = (*to_xxx)(*s1);
1955                         s1++;
1956                 }
1957                 setvar_p(res, s);
1958                 break;
1959
1960           case B_ix:
1961                 n = 0;
1962                 ll = bb_strlen(as[1]);
1963                 l = bb_strlen(as[0]) - ll;
1964                 if (ll > 0 && l >= 0) {
1965                         if (! icase) {
1966                                 s = strstr(as[0], as[1]);
1967                                 if (s) n = (s - as[0]) + 1;
1968                         } else {
1969                                 /* this piece of code is terribly slow and
1970                                  * really should be rewritten
1971                                  */
1972                                 for (i=0; i<=l; i++) {
1973                                         if (strncasecmp(as[0]+i, as[1], ll) == 0) {
1974                                                 n = i+1;
1975                                                 break;
1976                                         }
1977                                 }
1978                         }
1979                 }
1980                 setvar_i(res, n);
1981                 break;
1982
1983           case B_ti:
1984                 if (nargs > 1)
1985                         tt = getvar_i(av[1]);
1986                 else
1987                         time(&tt);
1988                 s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
1989                 i = strftime(buf, MAXVARFMT, s, localtime(&tt));
1990                 buf[i] = '\0';
1991                 setvar_s(res, buf);
1992                 break;
1993
1994           case B_ma:
1995                 re = as_regex(an[1], &sreg);
1996                 n = regexec(re, as[0], 1, pmatch, 0);
1997                 if (n == 0) {
1998                         pmatch[0].rm_so++;
1999                         pmatch[0].rm_eo++;
2000                 } else {
2001                         pmatch[0].rm_so = 0;
2002                         pmatch[0].rm_eo = -1;
2003                 }
2004                 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2005                 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2006                 setvar_i(res, pmatch[0].rm_so);
2007                 if (re == &sreg) regfree(re);
2008                 break;
2009
2010           case B_ge:
2011                 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2012                 break;
2013
2014           case B_gs:
2015                 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2016                 break;
2017
2018           case B_su:
2019                 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2020                 break;
2021         }
2022
2023         nvfree(tv);
2024         return res;
2025 }
2026
2027 /*
2028  * Evaluate node - the heart of the program. Supplied with subtree
2029  * and place where to store result. returns ptr to result.
2030  */
2031 #define XC(n) ((n) >> 8)
2032
2033 static var *evaluate(node *op, var *res) {
2034
2035         /* This procedure is recursive so we should count every byte */
2036         static var *fnargs = NULL;
2037         static unsigned int seed = 1;
2038         static regex_t sreg;
2039         node *op1;
2040         var *v1;
2041         union {
2042                 var *v;
2043                 char *s;
2044                 double d;
2045                 int i;
2046         } L, R;
2047         unsigned long opinfo;
2048         short opn;
2049         union {
2050                 char *s;
2051                 rstream *rsm;
2052                 FILE *F;
2053                 var *v;
2054                 regex_t *re;
2055                 unsigned long info;
2056         } X;
2057
2058         if (! op)
2059                 return setvar_s(res, NULL);
2060
2061         v1 = nvalloc(2);
2062
2063         while (op) {
2064
2065                 opinfo = op->info;
2066                 opn = (short)(opinfo & OPNMASK);
2067                 lineno = op->lineno;
2068
2069                 /* execute inevitable things */
2070                 op1 = op->l.n;
2071                 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2072                 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2073                 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2074                 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2075                 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2076
2077                 switch (XC(opinfo & OPCLSMASK)) {
2078
2079                   /* -- iterative node type -- */
2080
2081                   /* test pattern */
2082                   case XC( OC_TEST ):
2083                         if ((op1->info & OPCLSMASK) == OC_COMMA) {
2084                                 /* it's range pattern */
2085                                 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2086                                         op->info |= OF_CHECKED;
2087                                         if (ptest(op1->r.n))
2088                                                 op->info &= ~OF_CHECKED;
2089
2090                                         op = op->a.n;
2091                                 } else {
2092                                         op = op->r.n;
2093                                 }
2094                         } else {
2095                                 op = (ptest(op1)) ? op->a.n : op->r.n;
2096                         }
2097                         break;
2098
2099                   /* just evaluate an expression, also used as unconditional jump */
2100                   case XC( OC_EXEC ):
2101                         break;
2102
2103                   /* branch, used in if-else and various loops */
2104                   case XC( OC_BR ):
2105                         op = istrue(L.v) ? op->a.n : op->r.n;
2106                         break;
2107
2108                   /* initialize for-in loop */
2109                   case XC( OC_WALKINIT ):
2110                         hashwalk_init(L.v, iamarray(R.v));
2111                         break;
2112
2113                   /* get next array item */
2114                   case XC( OC_WALKNEXT ):
2115                         op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2116                         break;
2117
2118                   case XC( OC_PRINT ):
2119                   case XC( OC_PRINTF ):
2120                         X.F = stdout;
2121                         if (op->r.n) {
2122                                 X.rsm = newfile(R.s);
2123                                 if (! X.rsm->F) {
2124                                         if (opn == '|') {
2125                                                 if((X.rsm->F = popen(R.s, "w")) == NULL)
2126                                                         bb_perror_msg_and_die("popen");
2127                                                 X.rsm->is_pipe = 1;
2128                                         } else {
2129                                                 X.rsm->F = bb_xfopen(R.s, opn=='w' ? "w" : "a");
2130                                         }
2131                                 }
2132                                 X.F = X.rsm->F;
2133                         }
2134
2135                         if ((opinfo & OPCLSMASK) == OC_PRINT) {
2136                                 if (! op1) {
2137                                         fputs(getvar_s(V[F0]), X.F);
2138                                 } else {
2139                                         while (op1) {
2140                                                 L.v = evaluate(nextarg(&op1), v1);
2141                                                 if (L.v->type & VF_NUMBER) {
2142                                                         fmt_num(buf, MAXVARFMT, getvar_s(V[OFMT]),
2143                                                                                                                 getvar_i(L.v), TRUE);
2144                                                         fputs(buf, X.F);
2145                                                 } else {
2146                                                         fputs(getvar_s(L.v), X.F);
2147                                                 }
2148
2149                                                 if (op1) fputs(getvar_s(V[OFS]), X.F);
2150                                         }
2151                                 }
2152                                 fputs(getvar_s(V[ORS]), X.F);
2153
2154                         } else {        /* OC_PRINTF */
2155                                 L.s = awk_printf(op1);
2156                                 fputs(L.s, X.F);
2157                                 free(L.s);
2158                         }
2159                         fflush(X.F);
2160                         break;
2161
2162                   case XC( OC_DELETE ):
2163                         X.info = op1->info & OPCLSMASK;
2164                         if (X.info == OC_VAR) {
2165                                 R.v = op1->l.v;
2166                         } else if (X.info == OC_FNARG) {
2167                                 R.v = &fnargs[op1->l.i];
2168                         } else {
2169                                 runtime_error(EMSG_NOT_ARRAY);
2170                         }
2171
2172                         if (op1->r.n) {
2173                                 clrvar(L.v);
2174                                 L.s = getvar_s(evaluate(op1->r.n, v1));
2175                                 hash_remove(iamarray(R.v), L.s);
2176                         } else {
2177                                 clear_array(iamarray(R.v));
2178                         }
2179                         break;
2180
2181                   case XC( OC_NEWSOURCE ):
2182                         programname = op->l.s;
2183                         break;
2184
2185                   case XC( OC_RETURN ):
2186                         copyvar(res, L.v);
2187                         break;
2188
2189                   case XC( OC_NEXTFILE ):
2190                         nextfile = TRUE;
2191                   case XC( OC_NEXT ):
2192                         nextrec = TRUE;
2193                   case XC( OC_DONE ):
2194                         clrvar(res);
2195                         break;
2196
2197                   case XC( OC_EXIT ):
2198                         awk_exit(L.d);
2199
2200                   /* -- recursive node type -- */
2201
2202                   case XC( OC_VAR ):
2203                         L.v = op->l.v;
2204                         if (L.v == V[NF])
2205                                 split_f0();
2206                         goto v_cont;
2207
2208                   case XC( OC_FNARG ):
2209                         L.v = &fnargs[op->l.i];
2210
2211 v_cont:
2212                         res = (op->r.n) ? findvar(iamarray(L.v), R.s) : L.v;
2213                         break;
2214
2215                   case XC( OC_IN ):
2216                         setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2217                         break;
2218
2219                   case XC( OC_REGEXP ):
2220                         op1 = op;
2221                         L.s = getvar_s(V[F0]);
2222                         goto re_cont;
2223
2224                   case XC( OC_MATCH ):
2225                         op1 = op->r.n;
2226 re_cont:
2227                         X.re = as_regex(op1, &sreg);
2228                         R.i = regexec(X.re, L.s, 0, NULL, 0);
2229                         if (X.re == &sreg) regfree(X.re);
2230                         setvar_i(res, (R.i == 0 ? 1 : 0) ^ (opn == '!' ? 1 : 0));
2231                         break;
2232
2233                   case XC( OC_MOVE ):
2234                         /* if source is a temporary string, jusk relink it to dest */
2235                         if (R.v == v1+1 && R.v->string) {
2236                                 res = setvar_p(L.v, R.v->string);
2237                                 R.v->string = NULL;
2238                         } else {
2239                                 res = copyvar(L.v, R.v);
2240                         }
2241                         break;
2242
2243                   case XC( OC_TERNARY ):
2244                         if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2245                                 runtime_error(EMSG_POSSIBLE_ERROR);
2246                         res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2247                         break;
2248
2249                   case XC( OC_FUNC ):
2250                         if (! op->r.f->body.first)
2251                                 runtime_error(EMSG_UNDEF_FUNC);
2252
2253                         X.v = R.v = nvalloc(op->r.f->nargs+1);
2254                         while (op1) {
2255                                 L.v = evaluate(nextarg(&op1), v1);
2256                                 copyvar(R.v, L.v);
2257                                 R.v->type |= VF_CHILD;
2258                                 R.v->x.parent = L.v;
2259                                 if (++R.v - X.v >= op->r.f->nargs)
2260                                         break;
2261                         }
2262
2263                         R.v = fnargs;
2264                         fnargs = X.v;
2265
2266                         L.s = programname;
2267                         res = evaluate(op->r.f->body.first, res);
2268                         programname = L.s;
2269
2270                         nvfree(fnargs);
2271                         fnargs = R.v;
2272                         break;
2273
2274                   case XC( OC_GETLINE ):
2275                   case XC( OC_PGETLINE ):
2276                         if (op1) {
2277                                 X.rsm = newfile(L.s);
2278                                 if (! X.rsm->F) {
2279                                         if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2280                                                 X.rsm->F = popen(L.s, "r");
2281                                                 X.rsm->is_pipe = TRUE;
2282                                         } else {
2283                                                 X.rsm->F = fopen(L.s, "r");             /* not bb_xfopen! */
2284                                         }
2285                                 }
2286                         } else {
2287                                 if (! iF) iF = next_input_file();
2288                                 X.rsm = iF;
2289                         }
2290
2291                         if (! X.rsm->F) {
2292                                 setvar_i(V[ERRNO], errno);
2293                                 setvar_i(res, -1);
2294                                 break;
2295                         }
2296
2297                         if (! op->r.n)
2298                                 R.v = V[F0];
2299
2300                         L.i = awk_getline(X.rsm, R.v);
2301                         if (L.i > 0) {
2302                                 if (! op1) {
2303                                         incvar(V[FNR]);
2304                                         incvar(V[NR]);
2305                                 }
2306                         }
2307                         setvar_i(res, L.i);
2308                         break;
2309
2310                   /* simple builtins */
2311                   case XC( OC_FBLTIN ):
2312                         switch (opn) {
2313
2314                           case F_in:
2315                                 R.d = (int)L.d;
2316                                 break;
2317
2318                           case F_rn:
2319                                 R.d =  (double)rand() / (double)RAND_MAX;
2320                                 break;
2321
2322 #ifdef CONFIG_FEATURE_AWK_MATH
2323                           case F_co:
2324                                 R.d = cos(L.d);
2325                                 break;
2326
2327                           case F_ex:
2328                                 R.d = exp(L.d);
2329                                 break;
2330
2331                           case F_lg:
2332                                 R.d = log(L.d);
2333                                 break;
2334
2335                           case F_si:
2336                                 R.d = sin(L.d);
2337                                 break;
2338
2339                           case F_sq:
2340                                 R.d = sqrt(L.d);
2341                                 break;
2342 #else
2343                           case F_co:
2344                           case F_ex:
2345                           case F_lg:
2346                           case F_si:
2347                           case F_sq:
2348                                 runtime_error(EMSG_NO_MATH);
2349                                 break;
2350 #endif
2351
2352                           case F_sr:
2353                                 R.d = (double)seed;
2354                                 seed = op1 ? (unsigned int)L.d : (unsigned int)time(NULL);
2355                                 srand(seed);
2356                                 break;
2357
2358                           case F_ti:
2359                                 R.d = time(NULL);
2360                                 break;
2361
2362                           case F_le:
2363                                 if (! op1)
2364                                         L.s = getvar_s(V[F0]);
2365                                 R.d = bb_strlen(L.s);
2366                                 break;
2367
2368                           case F_sy:
2369                                 fflush(NULL);
2370                                 R.d = (L.s && *L.s) ? system(L.s) : 0;
2371                                 break;
2372
2373                           case F_ff:
2374                                 if (! op1)
2375                                         fflush(stdout);
2376                                 else {
2377                                         if (L.s && *L.s) {
2378                                                 X.rsm = newfile(L.s);
2379                                                 fflush(X.rsm->F);
2380                                         } else {
2381                                                 fflush(NULL);
2382                                         }
2383                                 }
2384                                 break;
2385
2386                           case F_cl:
2387                                 X.rsm = (rstream *)hash_search(fdhash, L.s);
2388                                 if (X.rsm) {
2389                                         R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
2390                                         free(X.rsm->buffer);
2391                                         hash_remove(fdhash, L.s);
2392                                 }
2393                                 if (R.i != 0)
2394                                         setvar_i(V[ERRNO], errno);
2395                                 R.d = (double)R.i;
2396                                 break;
2397                         }
2398                         setvar_i(res, R.d);
2399                         break;
2400
2401                   case XC( OC_BUILTIN ):
2402                         res = exec_builtin(op, res);
2403                         break;
2404
2405                   case XC( OC_SPRINTF ):
2406                         setvar_p(res, awk_printf(op1));
2407                         break;
2408
2409                   case XC( OC_UNARY ):
2410                         X.v = R.v;
2411                         L.d = R.d = getvar_i(R.v);
2412                         switch (opn) {
2413                           case 'P':
2414                                 L.d = ++R.d;
2415                                 goto r_op_change;
2416                           case 'p':
2417                                 R.d++;
2418                                 goto r_op_change;
2419                           case 'M':
2420                                 L.d = --R.d;
2421                                 goto r_op_change;
2422                           case 'm':
2423                                 R.d--;
2424                                 goto r_op_change;
2425                           case '!':
2426                             L.d = istrue(X.v) ? 0 : 1;
2427                                 break;
2428                           case '-':
2429                                 L.d = -R.d;
2430                                 break;
2431                         r_op_change:
2432                                 setvar_i(X.v, R.d);
2433                         }
2434                         setvar_i(res, L.d);
2435                         break;
2436
2437                   case XC( OC_FIELD ):
2438                         R.i = (int)getvar_i(R.v);
2439                         if (R.i == 0) {
2440                                 res = V[F0];
2441                         } else {
2442                                 split_f0();
2443                                 if (R.i > nfields)
2444                                         fsrealloc(R.i);
2445
2446                                 res = &Fields[R.i-1];
2447                         }
2448                         break;
2449
2450                   /* concatenation (" ") and index joining (",") */
2451                   case XC( OC_CONCAT ):
2452                   case XC( OC_COMMA ):
2453                         opn = bb_strlen(L.s) + bb_strlen(R.s) + 2;
2454                         X.s = (char *)xmalloc(opn);
2455                         strcpy(X.s, L.s);
2456                         if ((opinfo & OPCLSMASK) == OC_COMMA) {
2457                                 L.s = getvar_s(V[SUBSEP]);
2458                                 X.s = (char *)xrealloc(X.s, opn + bb_strlen(L.s));
2459                                 strcat(X.s, L.s);
2460                         }
2461                         strcat(X.s, R.s);
2462                         setvar_p(res, X.s);
2463                         break;
2464
2465                   case XC( OC_LAND ):
2466                         setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2467                         break;
2468
2469                   case XC( OC_LOR ):
2470                         setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2471                         break;
2472
2473                   case XC( OC_BINARY ):
2474                   case XC( OC_REPLACE ):
2475                         R.d = getvar_i(R.v);
2476                         switch (opn) {
2477                           case '+':
2478                                 L.d += R.d;
2479                                 break;
2480                           case '-':
2481                                 L.d -= R.d;
2482                                 break;
2483                           case '*':
2484                                 L.d *= R.d;
2485                                 break;
2486                           case '/':
2487                                 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2488                                 L.d /= R.d;
2489                                 break;
2490                           case '&':
2491 #ifdef CONFIG_FEATURE_AWK_MATH
2492                                 L.d = pow(L.d, R.d);
2493 #else
2494                                 runtime_error(EMSG_NO_MATH);
2495 #endif
2496                                 break;
2497                           case '%':
2498                                 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2499                                 L.d -= (int)(L.d / R.d) * R.d;
2500                                 break;
2501                         }
2502                         res = setvar_i(((opinfo&OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2503                         break;
2504
2505                   case XC( OC_COMPARE ):
2506                         if (is_numeric(L.v) && is_numeric(R.v)) {
2507                                 L.d = getvar_i(L.v) - getvar_i(R.v);
2508                         } else {
2509                                 L.s = getvar_s(L.v);
2510                                 R.s = getvar_s(R.v);
2511                                 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2512                         }
2513                         switch (opn & 0xfe) {
2514                           case 0:
2515                                 R.i = (L.d > 0);
2516                                 break;
2517                           case 2:
2518                                 R.i = (L.d >= 0);
2519                                 break;
2520                           case 4:
2521                                 R.i = (L.d == 0);
2522                                 break;
2523                         }
2524                         setvar_i(res, (opn & 0x1 ? R.i : !R.i) ? 1 : 0);
2525                         break;
2526
2527                   default:
2528                         runtime_error(EMSG_POSSIBLE_ERROR);
2529                 }
2530                 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2531                         op = op->a.n;
2532                 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2533                         break;
2534                 if (nextrec)
2535                         break;
2536         }
2537         nvfree(v1);
2538         return res;
2539 }
2540
2541
2542 /* -------- main & co. -------- */
2543
2544 static int awk_exit(int r) {
2545
2546         unsigned int i;
2547         hash_item *hi;
2548         static var tv;
2549
2550         if (! exiting) {
2551                 exiting = TRUE;
2552                 evaluate(endseq.first, &tv);
2553         }
2554
2555         /* waiting for children */
2556         for (i=0; i<fdhash->csize; i++) {
2557                 hi = fdhash->items[i];
2558                 while(hi) {
2559                         if (hi->data.rs.F && hi->data.rs.is_pipe)
2560                                 pclose(hi->data.rs.F);
2561                         hi = hi->next;
2562                 }
2563         }
2564
2565         exit(r);
2566 }
2567
2568 /* if expr looks like "var=value", perform assignment and return 1,
2569  * otherwise return 0 */
2570 static int is_assignment(char *expr) {
2571
2572         char *exprc, *s, *s0, *s1;
2573
2574         exprc = bb_xstrdup(expr);
2575         if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2576                 free(exprc);
2577                 return FALSE;
2578         }
2579
2580         *(s++) = '\0';
2581         s0 = s1 = s;
2582         while (*s)
2583                 *(s1++) = nextchar(&s);
2584
2585         *s1 = '\0';
2586         setvar_u(newvar(exprc), s0);
2587         free(exprc);
2588         return TRUE;
2589 }
2590
2591 /* switch to next input file */
2592 static rstream *next_input_file(void) {
2593
2594         static rstream rsm;
2595         FILE *F = NULL;
2596         char *fname, *ind;
2597         static int files_happen = FALSE;
2598
2599         if (rsm.F) fclose(rsm.F);
2600         rsm.F = NULL;
2601         rsm.pos = rsm.adv = 0;
2602
2603         do {
2604                 if (getvar_i(V[ARGIND])+1 >= getvar_i(V[ARGC])) {
2605                         if (files_happen)
2606                                 return NULL;
2607                         fname = "-";
2608                         F = stdin;
2609                 } else {
2610                         ind = getvar_s(incvar(V[ARGIND]));
2611                         fname = getvar_s(findvar(iamarray(V[ARGV]), ind));
2612                         if (fname && *fname && !is_assignment(fname))
2613                                 F = afopen(fname, "r");
2614                 }
2615         } while (!F);
2616
2617         files_happen = TRUE;
2618         setvar_s(V[FILENAME], fname);
2619         rsm.F = F;
2620         return &rsm;
2621 }
2622
2623 extern int awk_main(int argc, char **argv) {
2624
2625         char *s, *s1;
2626         int i, j, c;
2627         var *v;
2628         static var tv;
2629         char **envp;
2630         static int from_file = FALSE;
2631         rstream *rsm;
2632         FILE *F, *stdfiles[3];
2633         static char * stdnames = "/dev/stdin\0/dev/stdout\0/dev/stderr";
2634
2635         /* allocate global buffer */
2636         buf = xmalloc(MAXVARFMT+1);
2637
2638         vhash = hash_init();
2639         ahash = hash_init();
2640         fdhash = hash_init();
2641         fnhash = hash_init();
2642
2643         /* initialize variables */
2644         for (i=0;  *vNames;  i++) {
2645                 V[i] = v = newvar(nextword(&vNames));
2646                 if (*vValues != '\377')
2647                         setvar_s(v, nextword(&vValues));
2648                 else
2649                         setvar_i(v, 0);
2650
2651                 if (*vNames == '*') {
2652                         v->type |= VF_SPECIAL;
2653                         vNames++;
2654                 }
2655         }
2656
2657         handle_special(V[FS]);
2658         handle_special(V[RS]);
2659
2660         stdfiles[0] = stdin;
2661         stdfiles[1] = stdout;
2662         stdfiles[2] = stderr;
2663         for (i=0; i<3; i++) {
2664                 rsm = newfile(nextword(&stdnames));
2665                 rsm->F = stdfiles[i];
2666         }
2667
2668         for (envp=environ; *envp; envp++) {
2669                 s = bb_xstrdup(*envp);
2670                 s1 = strchr(s, '=');
2671                 *(s1++) = '\0';
2672                 setvar_u(findvar(iamarray(V[ENVIRON]), s), s1);
2673                 free(s);
2674         }
2675
2676         while((c = getopt(argc, argv, "F:v:f:W:")) != EOF) {
2677                 switch (c) {
2678                         case 'F':
2679                                 setvar_s(V[FS], optarg);
2680                                 break;
2681                         case 'v':
2682                                 if (! is_assignment(optarg))
2683                                         bb_show_usage();
2684                                 break;
2685                         case 'f':
2686                                 from_file = TRUE;
2687                                 F = afopen(programname = optarg, "r");
2688                                 s = NULL;
2689                                 /* one byte is reserved for some trick in next_token */
2690                                 for (i=j=1; j>0; i+=j) {
2691                                         s = (char *)xrealloc(s, i+4096);
2692                                         j = fread(s+i, 1, 4094, F);
2693                                 }
2694                                 s[i] = '\0';
2695                                 fclose(F);
2696                                 parse_program(s+1);
2697                                 free(s);
2698                                 break;
2699                         case 'W':
2700                                 bb_error_msg("Warning: unrecognized option '-W %s' ignored\n", optarg);
2701                                 break;
2702
2703                         default:
2704                                 bb_show_usage();
2705                 }
2706         }
2707
2708         if (!from_file) {
2709                 if (argc == optind)
2710                         bb_show_usage();
2711                 programname="cmd. line";
2712                 parse_program(argv[optind++]);
2713
2714         }
2715
2716         /* fill in ARGV array */
2717         setvar_i(V[ARGC], argc - optind + 1);
2718         setari_u(V[ARGV], 0, "awk");
2719         for(i=optind; i < argc; i++)
2720                 setari_u(V[ARGV], i+1-optind, argv[i]);
2721
2722         evaluate(beginseq.first, &tv);
2723         if (! mainseq.first && ! endseq.first)
2724                 awk_exit(EXIT_SUCCESS);
2725
2726         /* input file could already be opened in BEGIN block */
2727         if (! iF) iF = next_input_file();
2728
2729         /* passing through input files */
2730         while (iF) {
2731
2732                 nextfile = FALSE;
2733                 setvar_i(V[FNR], 0);
2734
2735                 while ((c = awk_getline(iF, V[F0])) > 0) {
2736
2737                         nextrec = FALSE;
2738                         incvar(V[NR]);
2739                         incvar(V[FNR]);
2740                         evaluate(mainseq.first, &tv);
2741
2742                         if (nextfile)
2743                                 break;
2744                 }
2745
2746                 if (c < 0)
2747                         runtime_error(strerror(errno));
2748
2749                 iF = next_input_file();
2750
2751         }
2752
2753         awk_exit(EXIT_SUCCESS);
2754
2755         return 0;
2756 }
2757