Patch from Mike Castle to cleanup some modutils issues, in
[oweals/busybox.git] / editors / awk.c
1 /* vi: set sw=4 ts=4: */
2 /*
3  * awk implementation for busybox
4  *
5  * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20  *
21  */
22
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <unistd.h>
26 #include <errno.h>
27 #include <string.h>
28 #include <time.h>
29 #include <math.h>
30 #include <ctype.h>
31 #include <getopt.h>
32 #include <regex.h>
33
34 #include "busybox.h"
35
36
37 #define MAXVARFMT       240
38 #define MINNVBLOCK      64
39
40 /* variable flags */
41 #define VF_NUMBER       0x0001  /* 1 = primary type is number */
42 #define VF_ARRAY        0x0002  /* 1 = it's an array */
43
44 #define VF_CACHED       0x0100  /* 1 = num/str value has cached str/num eq */
45 #define VF_USER         0x0200  /* 1 = user input (may be numeric string) */
46 #define VF_SPECIAL      0x0400  /* 1 = requires extra handling when changed */
47 #define VF_WALK         0x0800  /* 1 = variable has alloc'd x.walker list */
48 #define VF_FSTR         0x1000  /* 1 = string points to fstring buffer */
49 #define VF_CHILD        0x2000  /* 1 = function arg; x.parent points to source */
50 #define VF_DIRTY        0x4000  /* 1 = variable was set explicitly */
51
52 /* these flags are static, don't change them when value is changed */
53 #define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
54
55 /* Variable */
56 typedef struct var_s {
57         unsigned short type;            /* flags */
58         double number;
59         char *string;
60         union {
61                 int aidx;                               /* func arg index (on compilation stage) */
62                 struct xhash_s *array;  /* array ptr */
63                 struct var_s *parent;   /* for func args, ptr to actual parameter */
64                 char **walker;                  /* list of array elements (for..in) */
65         } x;
66 } var;
67
68 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
69 typedef struct chain_s {
70         struct node_s *first;
71         struct node_s *last;
72         char *programname;
73 } chain;
74
75 /* Function */
76 typedef struct func_s {
77         unsigned short nargs;
78         struct chain_s body;
79 } func;
80
81 /* I/O stream */
82 typedef struct rstream_s {
83         FILE *F;
84         char *buffer;
85         int adv;
86         int size;
87         int pos;
88         unsigned short is_pipe;
89 } rstream;
90
91 typedef struct hash_item_s {
92         union {
93                 struct var_s v;                 /* variable/array hash */
94                 struct rstream_s rs;    /* redirect streams hash */
95                 struct func_s f;                /* functions hash */
96         } data;
97         struct hash_item_s *next;       /* next in chain */
98         char name[1];                           /* really it's longer */
99 } hash_item;
100
101 typedef struct xhash_s {
102         unsigned int nel;                                       /* num of elements */
103         unsigned int csize;                                     /* current hash size */
104         unsigned int nprime;                            /* next hash size in PRIMES[] */
105         unsigned int glen;                                      /* summary length of item names */
106         struct hash_item_s **items;
107 } xhash;
108
109 /* Tree node */
110 typedef struct node_s {
111         unsigned long info;
112         unsigned short lineno;
113         union {
114                 struct node_s *n;
115                 var *v;
116                 int i;
117                 char *s;
118                 regex_t *re;
119         } l;
120         union {
121                 struct node_s *n;
122                 regex_t *ire;
123                 func *f;
124                 int argno;
125         } r;
126         union {
127                 struct node_s *n;
128         } a;
129 } node;
130
131 /* Block of temporary variables */
132 typedef struct nvblock_s {
133         int size;
134         var *pos;
135         struct nvblock_s *prev;
136         struct nvblock_s *next;
137         var nv[0];
138 } nvblock;
139
140 typedef struct tsplitter_s {
141         node n;
142         regex_t re[2];
143 } tsplitter;
144
145 /* simple token classes */
146 /* Order and hex values are very important!!!  See next_token() */
147 #define TC_SEQSTART      1                              /* ( */
148 #define TC_SEQTERM      (1 << 1)                /* ) */
149 #define TC_REGEXP       (1 << 2)                /* /.../ */
150 #define TC_OUTRDR       (1 << 3)                /* | > >> */
151 #define TC_UOPPOST      (1 << 4)                /* unary postfix operator */
152 #define TC_UOPPRE1      (1 << 5)                /* unary prefix operator */
153 #define TC_BINOPX       (1 << 6)                /* two-opnd operator */
154 #define TC_IN           (1 << 7)
155 #define TC_COMMA        (1 << 8)
156 #define TC_PIPE         (1 << 9)                /* input redirection pipe */
157 #define TC_UOPPRE2      (1 << 10)               /* unary prefix operator */
158 #define TC_ARRTERM      (1 << 11)               /* ] */
159 #define TC_GRPSTART     (1 << 12)               /* { */
160 #define TC_GRPTERM      (1 << 13)               /* } */
161 #define TC_SEMICOL      (1 << 14)
162 #define TC_NEWLINE      (1 << 15)
163 #define TC_STATX        (1 << 16)               /* ctl statement (for, next...) */
164 #define TC_WHILE        (1 << 17)
165 #define TC_ELSE         (1 << 18)
166 #define TC_BUILTIN      (1 << 19)
167 #define TC_GETLINE      (1 << 20)
168 #define TC_FUNCDECL     (1 << 21)               /* `function' `func' */
169 #define TC_BEGIN        (1 << 22)
170 #define TC_END          (1 << 23)
171 #define TC_EOF          (1 << 24)
172 #define TC_VARIABLE     (1 << 25)
173 #define TC_ARRAY        (1 << 26)
174 #define TC_FUNCTION     (1 << 27)
175 #define TC_STRING       (1 << 28)
176 #define TC_NUMBER       (1 << 29)
177
178 #define TC_UOPPRE       (TC_UOPPRE1 | TC_UOPPRE2)
179
180 /* combined token classes */
181 #define TC_BINOP        (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
182 #define TC_UNARYOP      (TC_UOPPRE | TC_UOPPOST)
183 #define TC_OPERAND      (TC_VARIABLE | TC_ARRAY | TC_FUNCTION | \
184         TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
185
186 #define TC_STATEMNT     (TC_STATX | TC_WHILE)
187 #define TC_OPTERM       (TC_SEMICOL | TC_NEWLINE)
188
189 /* word tokens, cannot mean something else if not expected */
190 #define TC_WORD         (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN | \
191         TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
192
193 /* discard newlines after these */
194 #define TC_NOTERM       (TC_COMMA | TC_GRPSTART | TC_GRPTERM | \
195         TC_BINOP | TC_OPTERM)
196
197 /* what can expression begin with */
198 #define TC_OPSEQ        (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
199 /* what can group begin with */
200 #define TC_GRPSEQ       (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
201
202 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
203 /* operator is inserted between them */
204 #define TC_CONCAT1      (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM | \
205         TC_STRING | TC_NUMBER | TC_UOPPOST)
206 #define TC_CONCAT2      (TC_OPERAND | TC_UOPPRE)
207
208 #define OF_RES1         0x010000
209 #define OF_RES2         0x020000
210 #define OF_STR1         0x040000
211 #define OF_STR2         0x080000
212 #define OF_NUM1         0x100000
213 #define OF_CHECKED      0x200000
214
215 /* combined operator flags */
216 #define xx      0
217 #define xV      OF_RES2
218 #define xS      (OF_RES2 | OF_STR2)
219 #define Vx      OF_RES1
220 #define VV      (OF_RES1 | OF_RES2)
221 #define Nx      (OF_RES1 | OF_NUM1)
222 #define NV      (OF_RES1 | OF_NUM1 | OF_RES2)
223 #define Sx      (OF_RES1 | OF_STR1)
224 #define SV      (OF_RES1 | OF_STR1 | OF_RES2)
225 #define SS      (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
226
227 #define OPCLSMASK       0xFF00
228 #define OPNMASK         0x007F
229
230 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
231  * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
232  * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
233  */
234 #define P(x)    (x << 24)
235 #define PRIMASK         0x7F000000
236 #define PRIMASK2        0x7E000000
237
238 /* Operation classes */
239
240 #define SHIFT_TIL_THIS  0x0600
241 #define RECUR_FROM_THIS 0x1000
242
243 enum {
244         OC_DELETE=0x0100,       OC_EXEC=0x0200,         OC_NEWSOURCE=0x0300,
245         OC_PRINT=0x0400,        OC_PRINTF=0x0500,       OC_WALKINIT=0x0600,
246
247         OC_BR=0x0700,           OC_BREAK=0x0800,        OC_CONTINUE=0x0900,
248         OC_EXIT=0x0a00,         OC_NEXT=0x0b00,         OC_NEXTFILE=0x0c00,
249         OC_TEST=0x0d00,         OC_WALKNEXT=0x0e00,
250
251         OC_BINARY=0x1000,       OC_BUILTIN=0x1100,      OC_COLON=0x1200,
252         OC_COMMA=0x1300,        OC_COMPARE=0x1400,      OC_CONCAT=0x1500,
253         OC_FBLTIN=0x1600,       OC_FIELD=0x1700,        OC_FNARG=0x1800,
254         OC_FUNC=0x1900,         OC_GETLINE=0x1a00,      OC_IN=0x1b00,
255         OC_LAND=0x1c00,         OC_LOR=0x1d00,          OC_MATCH=0x1e00,
256         OC_MOVE=0x1f00,         OC_PGETLINE=0x2000,     OC_REGEXP=0x2100,
257         OC_REPLACE=0x2200,      OC_RETURN=0x2300,       OC_SPRINTF=0x2400,
258         OC_TERNARY=0x2500,      OC_UNARY=0x2600,        OC_VAR=0x2700,
259         OC_DONE=0x2800,
260
261         ST_IF=0x3000,           ST_DO=0x3100,           ST_FOR=0x3200,
262         ST_WHILE=0x3300
263 };
264
265 /* simple builtins */
266 enum {
267         F_in=0, F_rn,   F_co,   F_ex,   F_lg,   F_si,   F_sq,   F_sr,
268         F_ti,   F_le,   F_sy,   F_ff,   F_cl
269 };
270
271 /* builtins */
272 enum {
273         B_a2=0, B_ix,   B_ma,   B_sp,   B_ss,   B_ti,   B_lo,   B_up,
274         B_ge,   B_gs,   B_su
275 };
276
277 /* tokens and their corresponding info values */
278
279 #define NTC             "\377"          /* switch to next token class (tc<<1) */
280 #define NTCC    '\377'
281
282 #define OC_B    OC_BUILTIN
283
284 static char * const tokenlist =
285         "\1("           NTC
286         "\1)"           NTC
287         "\1/"           NTC                                                                     /* REGEXP */
288         "\2>>"          "\1>"           "\1|"           NTC                     /* OUTRDR */
289         "\2++"          "\2--"          NTC                                             /* UOPPOST */
290         "\2++"          "\2--"          "\1$"           NTC                     /* UOPPRE1 */
291         "\2=="          "\1="           "\2+="          "\2-="          /* BINOPX */
292         "\2*="          "\2/="          "\2%="          "\2^="
293         "\1+"           "\1-"           "\3**="         "\2**"
294         "\1/"           "\1%"           "\1^"           "\1*"
295         "\2!="          "\2>="          "\2<="          "\1>"
296         "\1<"           "\2!~"          "\1~"           "\2&&"
297         "\2||"          "\1?"           "\1:"           NTC
298         "\2in"          NTC
299         "\1,"           NTC
300         "\1|"           NTC
301         "\1+"           "\1-"           "\1!"           NTC                     /* UOPPRE2 */
302         "\1]"           NTC
303         "\1{"           NTC
304         "\1}"           NTC
305         "\1;"           NTC
306         "\1\n"          NTC
307         "\2if"          "\2do"          "\3for"         "\5break"       /* STATX */
308         "\10continue"                   "\6delete"      "\5print"
309         "\6printf"      "\4next"        "\10nextfile"
310         "\6return"      "\4exit"        NTC
311         "\5while"       NTC
312         "\4else"        NTC
313
314         "\5close"       "\6system"      "\6fflush"      "\5atan2"       /* BUILTIN */
315         "\3cos"         "\3exp"         "\3int"         "\3log"
316         "\4rand"        "\3sin"         "\4sqrt"        "\5srand"
317         "\6gensub"      "\4gsub"        "\5index"       "\6length"
318         "\5match"       "\5split"       "\7sprintf"     "\3sub"
319         "\6substr"      "\7systime"     "\10strftime"
320         "\7tolower"     "\7toupper"     NTC
321         "\7getline"     NTC
322         "\4func"        "\10function"   NTC
323         "\5BEGIN"       NTC
324         "\3END"         "\0"
325         ;
326
327 static unsigned long tokeninfo[] = {
328
329         0,
330         0,
331         OC_REGEXP,
332         xS|'a',         xS|'w',         xS|'|',
333         OC_UNARY|xV|P(9)|'p',           OC_UNARY|xV|P(9)|'m',
334         OC_UNARY|xV|P(9)|'P',           OC_UNARY|xV|P(9)|'M',
335                 OC_FIELD|xV|P(5),
336         OC_COMPARE|VV|P(39)|5,          OC_MOVE|VV|P(74),
337                 OC_REPLACE|NV|P(74)|'+',        OC_REPLACE|NV|P(74)|'-',
338         OC_REPLACE|NV|P(74)|'*',        OC_REPLACE|NV|P(74)|'/',
339                 OC_REPLACE|NV|P(74)|'%',        OC_REPLACE|NV|P(74)|'&',
340         OC_BINARY|NV|P(29)|'+',         OC_BINARY|NV|P(29)|'-',
341                 OC_REPLACE|NV|P(74)|'&',        OC_BINARY|NV|P(15)|'&',
342         OC_BINARY|NV|P(25)|'/',         OC_BINARY|NV|P(25)|'%',
343                 OC_BINARY|NV|P(15)|'&',         OC_BINARY|NV|P(25)|'*',
344         OC_COMPARE|VV|P(39)|4,          OC_COMPARE|VV|P(39)|3,
345                 OC_COMPARE|VV|P(39)|0,          OC_COMPARE|VV|P(39)|1,
346         OC_COMPARE|VV|P(39)|2,          OC_MATCH|Sx|P(45)|'!',
347                 OC_MATCH|Sx|P(45)|'~',          OC_LAND|Vx|P(55),
348         OC_LOR|Vx|P(59),                        OC_TERNARY|Vx|P(64)|'?',
349                 OC_COLON|xx|P(67)|':',
350         OC_IN|SV|P(49),
351         OC_COMMA|SS|P(80),
352         OC_PGETLINE|SV|P(37),
353         OC_UNARY|xV|P(19)|'+',          OC_UNARY|xV|P(19)|'-',
354                 OC_UNARY|xV|P(19)|'!',
355         0,
356         0,
357         0,
358         0,
359         0,
360         ST_IF,                  ST_DO,                  ST_FOR,                 OC_BREAK,
361         OC_CONTINUE,                                    OC_DELETE|Vx,   OC_PRINT,
362         OC_PRINTF,              OC_NEXT,                OC_NEXTFILE,
363         OC_RETURN|Vx,   OC_EXIT|Nx,
364         ST_WHILE,
365         0,
366
367         OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
368         OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
369         OC_FBLTIN|F_rn,    OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
370         OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
371         OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF,        OC_B|B_su|P(0xb6),
372         OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti,    OC_B|B_ti|P(0x0b),
373         OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
374         OC_GETLINE|SV|P(0),
375         0,      0,
376         0,
377         0
378 };
379
380 /* internal variable names and their initial values       */
381 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
382 enum {
383         CONVFMT=0,      OFMT,           FS,                     OFS,
384         ORS,            RS,                     RT,                     FILENAME,
385         SUBSEP,         ARGIND,         ARGC,           ARGV,
386         ERRNO,          FNR,
387         NR,                     NF,                     IGNORECASE,
388         ENVIRON,        F0,                     _intvarcount_
389 };
390
391 static char * vNames =
392         "CONVFMT\0"     "OFMT\0"        "FS\0*"         "OFS\0"
393         "ORS\0"         "RS\0*"         "RT\0"          "FILENAME\0"
394         "SUBSEP\0"      "ARGIND\0"      "ARGC\0"        "ARGV\0"
395         "ERRNO\0"       "FNR\0"
396         "NR\0"          "NF\0*"         "IGNORECASE\0*"
397         "ENVIRON\0"     "$\0*"          "\0";
398
399 static char * vValues =
400         "%.6g\0"        "%.6g\0"        " \0"           " \0"
401         "\n\0"          "\n\0"          "\0"            "\0"
402         "\034\0"
403         "\377";
404
405 /* hash size may grow to these values */
406 #define FIRST_PRIME 61;
407 static const unsigned int PRIMES[] = { 251, 1021, 4093, 16381, 65521 };
408 static const unsigned int NPRIMES = sizeof(PRIMES) / sizeof(unsigned int);
409
410 /* globals */
411
412 extern char **environ;
413
414 static var * V[_intvarcount_];
415 static chain beginseq, mainseq, endseq, *seq;
416 static int nextrec, nextfile;
417 static node *break_ptr, *continue_ptr;
418 static rstream *iF;
419 static xhash *vhash, *ahash, *fdhash, *fnhash;
420 static char *programname;
421 static short lineno;
422 static int is_f0_split;
423 static int nfields = 0;
424 static var *Fields = NULL;
425 static tsplitter fsplitter, rsplitter;
426 static nvblock *cb = NULL;
427 static char *pos;
428 static char *buf;
429 static int icase = FALSE;
430 static int exiting = FALSE;
431
432 static struct {
433         unsigned long tclass;
434         unsigned long info;
435         char *string;
436         double number;
437         short lineno;
438         int rollback;
439 } t;
440
441 /* function prototypes */
442 extern void xregcomp(regex_t *preg, const char *regex, int cflags);
443 static void handle_special(var *);
444 static node *parse_expr(unsigned long);
445 static void chain_group(void);
446 static var *evaluate(node *, var *);
447 static rstream *next_input_file(void);
448 static int fmt_num(char *, int, char *, double, int);
449 static int awk_exit(int);
450
451 /* ---- error handling ---- */
452
453 static const char EMSG_INTERNAL_ERROR[] = "Internal error";
454 static const char EMSG_UNEXP_EOS[] = "Unexpected end of string";
455 static const char EMSG_UNEXP_TOKEN[] = "Unexpected token";
456 static const char EMSG_DIV_BY_ZERO[] = "Division by zero";
457 static const char EMSG_INV_FMT[] = "Invalid format specifier";
458 static const char EMSG_TOO_FEW_ARGS[] = "Too few arguments for builtin";
459 static const char EMSG_NOT_ARRAY[] = "Not an array";
460 static const char EMSG_POSSIBLE_ERROR[] = "Possible syntax error";
461 static const char EMSG_UNDEF_FUNC[] = "Call to undefined function";
462 #ifndef CONFIG_FEATURE_AWK_MATH
463 static const char EMSG_NO_MATH[] = "Math support is not compiled in";
464 #endif
465
466 static void syntax_error(const char * const message)
467 {
468         bb_error_msg("%s:%i: %s", programname, lineno, message);
469         exit(1);
470 }
471
472 #define runtime_error(x) syntax_error(x)
473
474
475 /* ---- hash stuff ---- */
476
477 static unsigned int hashidx(char *name) {
478
479         register unsigned int idx=0;
480
481         while (*name)  idx = *name++ + (idx << 6) - idx;
482         return idx;
483 }
484
485 /* create new hash */
486 static xhash *hash_init(void) {
487
488         xhash *newhash;
489
490         newhash = (xhash *)xcalloc(1, sizeof(xhash));
491         newhash->csize = FIRST_PRIME;
492         newhash->items = (hash_item **)xcalloc(newhash->csize, sizeof(hash_item *));
493
494         return newhash;
495 }
496
497 /* find item in hash, return ptr to data, NULL if not found */
498 static void *hash_search(xhash *hash, char *name) {
499
500         hash_item *hi;
501
502         hi = hash->items [ hashidx(name) % hash->csize ];
503         while (hi) {
504                 if (strcmp(hi->name, name) == 0)
505                         return &(hi->data);
506                 hi = hi->next;
507         }
508         return NULL;
509 }
510
511 /* grow hash if it becomes too big */
512 static void hash_rebuild(xhash *hash) {
513
514         unsigned int newsize, i, idx;
515         hash_item **newitems, *hi, *thi;
516
517         if (hash->nprime == NPRIMES)
518                 return;
519
520         newsize = PRIMES[hash->nprime++];
521         newitems = (hash_item **)xcalloc(newsize, sizeof(hash_item *));
522
523         for (i=0; i<hash->csize; i++) {
524                 hi = hash->items[i];
525                 while (hi) {
526                         thi = hi;
527                         hi = thi->next;
528                         idx = hashidx(thi->name) % newsize;
529                         thi->next = newitems[idx];
530                         newitems[idx] = thi;
531                 }
532         }
533
534         free(hash->items);
535         hash->csize = newsize;
536         hash->items = newitems;
537 }
538
539 /* find item in hash, add it if necessary. Return ptr to data */
540 static void *hash_find(xhash *hash, char *name) {
541
542         hash_item *hi;
543         unsigned int idx;
544         int l;
545
546         hi = hash_search(hash, name);
547         if (! hi) {
548                 if (++hash->nel / hash->csize > 10)
549                         hash_rebuild(hash);
550
551                 l = bb_strlen(name) + 1;
552                 hi = xcalloc(sizeof(hash_item) + l, 1);
553                 memcpy(hi->name, name, l);
554
555                 idx = hashidx(name) % hash->csize;
556                 hi->next = hash->items[idx];
557                 hash->items[idx] = hi;
558                 hash->glen += l;
559         }
560         return &(hi->data);
561 }
562
563 #define findvar(hash, name) (var *) hash_find ( (hash) , (name) )
564 #define newvar(name) (var *) hash_find ( vhash , (name) )
565 #define newfile(name) (rstream *) hash_find ( fdhash , (name) )
566 #define newfunc(name) (func *) hash_find ( fnhash , (name) )
567
568 static void hash_remove(xhash *hash, char *name) {
569
570         hash_item *hi, **phi;
571
572         phi = &(hash->items[ hashidx(name) % hash->csize ]);
573         while (*phi) {
574                 hi = *phi;
575                 if (strcmp(hi->name, name) == 0) {
576                         hash->glen -= (bb_strlen(name) + 1);
577                         hash->nel--;
578                         *phi = hi->next;
579                         free(hi);
580                         break;
581                 }
582                 phi = &(hi->next);
583         }
584 }
585
586 /* ------ some useful functions ------ */
587
588 static void skip_spaces(char **s) {
589
590         register char *p = *s;
591
592         while(*p == ' ' || *p == '\t' ||
593                                         (*p == '\\' && *(p+1) == '\n' && (++p, ++t.lineno))) {
594                 p++;
595         }
596         *s = p;
597 }
598
599 static char *nextword(char **s) {
600
601         register char *p = *s;
602
603         while (*(*s)++) ;
604
605         return p;
606 }
607
608 static char nextchar(char **s) {
609
610         register char c, *pps;
611
612         c = *((*s)++);
613         pps = *s;
614         if (c == '\\') c = bb_process_escape_sequence((const char**)s);
615         if (c == '\\' && *s == pps) c = *((*s)++);
616         return c;
617 }
618
619 static inline int isalnum_(int c) {
620
621         return (isalnum(c) || c == '_');
622 }
623
624 static FILE *afopen(const char *path, const char *mode) {
625
626         return (*path == '-' && *(path+1) == '\0') ? stdin : bb_xfopen(path, mode);
627 }
628
629 /* -------- working with variables (set/get/copy/etc) -------- */
630
631 static xhash *iamarray(var *v) {
632
633         var *a = v;
634
635         while (a->type & VF_CHILD)
636                 a = a->x.parent;
637
638         if (! (a->type & VF_ARRAY)) {
639                 a->type |= VF_ARRAY;
640                 a->x.array = hash_init();
641         }
642         return a->x.array;
643 }
644
645 static void clear_array(xhash *array) {
646
647         unsigned int i;
648         hash_item *hi, *thi;
649
650         for (i=0; i<array->csize; i++) {
651                 hi = array->items[i];
652                 while (hi) {
653                         thi = hi;
654                         hi = hi->next;
655                         free(thi->data.v.string);
656                         free(thi);
657                 }
658                 array->items[i] = NULL;
659         }
660         array->glen = array->nel = 0;
661 }
662
663 /* clear a variable */
664 static var *clrvar(var *v) {
665
666         if (!(v->type & VF_FSTR))
667                 free(v->string);
668
669         v->type &= VF_DONTTOUCH;
670         v->type |= VF_DIRTY;
671         v->string = NULL;
672         return v;
673 }
674
675 /* assign string value to variable */
676 static var *setvar_p(var *v, char *value) {
677
678         clrvar(v);
679         v->string = value;
680         handle_special(v);
681
682         return v;
683 }
684
685 /* same as setvar_p but make a copy of string */
686 static var *setvar_s(var *v, char *value) {
687
688         return setvar_p(v, (value && *value) ? bb_xstrdup(value) : NULL);
689 }
690
691 /* same as setvar_s but set USER flag */
692 static var *setvar_u(var *v, char *value) {
693
694         setvar_s(v, value);
695         v->type |= VF_USER;
696         return v;
697 }
698
699 /* set array element to user string */
700 static void setari_u(var *a, int idx, char *s) {
701
702         register var *v;
703         static char sidx[12];
704
705         sprintf(sidx, "%d", idx);
706         v = findvar(iamarray(a), sidx);
707         setvar_u(v, s);
708 }
709
710 /* assign numeric value to variable */
711 static var *setvar_i(var *v, double value) {
712
713         clrvar(v);
714         v->type |= VF_NUMBER;
715         v->number = value;
716         handle_special(v);
717         return v;
718 }
719
720 static char *getvar_s(var *v) {
721
722         /* if v is numeric and has no cached string, convert it to string */
723         if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
724                 fmt_num(buf, MAXVARFMT, getvar_s(V[CONVFMT]), v->number, TRUE);
725                 v->string = bb_xstrdup(buf);
726                 v->type |= VF_CACHED;
727         }
728         return (v->string == NULL) ? "" : v->string;
729 }
730
731 static double getvar_i(var *v) {
732
733         char *s;
734
735         if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
736                 v->number = 0;
737                 s = v->string;
738                 if (s && *s) {
739                         v->number = strtod(s, &s);
740                         if (v->type & VF_USER) {
741                                 skip_spaces(&s);
742                                 if (*s != '\0')
743                                         v->type &= ~VF_USER;
744                         }
745                 } else {
746                         v->type &= ~VF_USER;
747                 }
748                 v->type |= VF_CACHED;
749         }
750         return v->number;
751 }
752
753 static var *copyvar(var *dest, var *src) {
754
755         if (dest != src) {
756                 clrvar(dest);
757                 dest->type |= (src->type & ~VF_DONTTOUCH);
758                 dest->number = src->number;
759                 if (src->string)
760                         dest->string = bb_xstrdup(src->string);
761         }
762         handle_special(dest);
763         return dest;
764 }
765
766 static var *incvar(var *v) {
767
768         return setvar_i(v, getvar_i(v)+1.);
769 }
770
771 /* return true if v is number or numeric string */
772 static int is_numeric(var *v) {
773
774         getvar_i(v);
775         return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
776 }
777
778 /* return 1 when value of v corresponds to true, 0 otherwise */
779 static int istrue(var *v) {
780
781         if (is_numeric(v))
782                 return (v->number == 0) ? 0 : 1;
783         else
784                 return (v->string && *(v->string)) ? 1 : 0;
785 }
786
787 /* temporary variables allocator. Last allocated should be first freed */
788 static var *nvalloc(int n) {
789
790         nvblock *pb = NULL;
791         var *v, *r;
792         int size;
793
794         while (cb) {
795                 pb = cb;
796                 if ((cb->pos - cb->nv) + n <= cb->size) break;
797                 cb = cb->next;
798         }
799
800         if (! cb) {
801                 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
802                 cb = (nvblock *)xmalloc(sizeof(nvblock) + size * sizeof(var));
803                 cb->size = size;
804                 cb->pos = cb->nv;
805                 cb->prev = pb;
806                 cb->next = NULL;
807                 if (pb) pb->next = cb;
808         }
809
810         v = r = cb->pos;
811         cb->pos += n;
812
813         while (v < cb->pos) {
814                 v->type = 0;
815                 v->string = NULL;
816                 v++;
817         }
818
819         return r;
820 }
821
822 static void nvfree(var *v) {
823
824         var *p;
825
826         if (v < cb->nv || v >= cb->pos)
827                 runtime_error(EMSG_INTERNAL_ERROR);
828
829         for (p=v; p<cb->pos; p++) {
830                 if ((p->type & (VF_ARRAY|VF_CHILD)) == VF_ARRAY) {
831                         clear_array(iamarray(p));
832                         free(p->x.array->items);
833                         free(p->x.array);
834                 }
835                 if (p->type & VF_WALK)
836                         free(p->x.walker);
837
838                 clrvar(p);
839         }
840
841         cb->pos = v;
842         while (cb->prev && cb->pos == cb->nv) {
843                 cb = cb->prev;
844         }
845 }
846
847 /* ------- awk program text parsing ------- */
848
849 /* Parse next token pointed by global pos, place results into global t.
850  * If token isn't expected, give away. Return token class
851  */
852 static unsigned long next_token(unsigned long expected) {
853
854         char *p, *pp, *s;
855         char *tl;
856         unsigned long tc, *ti;
857         int l;
858         static int concat_inserted = FALSE;
859         static unsigned long save_tclass, save_info;
860         static unsigned long ltclass = TC_OPTERM;
861
862         if (t.rollback) {
863
864                 t.rollback = FALSE;
865
866         } else if (concat_inserted) {
867
868                 concat_inserted = FALSE;
869                 t.tclass = save_tclass;
870                 t.info = save_info;
871
872         } else {
873
874                 p = pos;
875
876         readnext:
877                 skip_spaces(&p);
878                 lineno = t.lineno;
879                 if (*p == '#')
880                         while (*p != '\n' && *p != '\0') p++;
881
882                 if (*p == '\n')
883                         t.lineno++;
884
885                 if (*p == '\0') {
886                         tc = TC_EOF;
887
888                 } else if (*p == '\"') {
889                         /* it's a string */
890                         t.string = s = ++p;
891                         while (*p != '\"') {
892                                 if (*p == '\0' || *p == '\n')
893                                         syntax_error(EMSG_UNEXP_EOS);
894                                 *(s++) = nextchar(&p);
895                         }
896                         p++;
897                         *s = '\0';
898                         tc = TC_STRING;
899
900                 } else if ((expected & TC_REGEXP) && *p == '/') {
901                         /* it's regexp */
902                         t.string = s = ++p;
903                         while (*p != '/') {
904                                 if (*p == '\0' || *p == '\n')
905                                         syntax_error(EMSG_UNEXP_EOS);
906                                 if ((*s++ = *p++) == '\\') {
907                                         pp = p;
908                                         *(s-1) = bb_process_escape_sequence((const char **)&p);
909                                         if (*pp == '\\') *s++ = '\\';
910                                         if (p == pp) *s++ = *p++;
911                                 }
912                         }
913                         p++;
914                         *s = '\0';
915                         tc = TC_REGEXP;
916
917                 } else if (*p == '.' || isdigit(*p)) {
918                         /* it's a number */
919                         t.number = strtod(p, &p);
920                         if (*p == '.')
921                                 syntax_error(EMSG_UNEXP_TOKEN);
922                         tc = TC_NUMBER;
923
924                 } else {
925                         /* search for something known */
926                         tl = tokenlist;
927                         tc = 0x00000001;
928                         ti = tokeninfo;
929                         while (*tl) {
930                                 l = *(tl++);
931                                 if (l == NTCC) {
932                                         tc <<= 1;
933                                         continue;
934                                 }
935                                 /* if token class is expected, token
936                                  * matches and it's not a longer word,
937                                  * then this is what we are looking for
938                                  */
939                                 if ((tc & (expected | TC_WORD | TC_NEWLINE)) &&
940                                 *tl == *p && strncmp(p, tl, l) == 0 &&
941                                 !((tc & TC_WORD) && isalnum_(*(p + l)))) {
942                                         t.info = *ti;
943                                         p += l;
944                                         break;
945                                 }
946                                 ti++;
947                                 tl += l;
948                         }
949
950                         if (! *tl) {
951                                 /* it's a name (var/array/function),
952                                  * otherwise it's something wrong
953                                  */
954                                 if (! isalnum_(*p))
955                                         syntax_error(EMSG_UNEXP_TOKEN);
956
957                                 t.string = --p;
958                                 while(isalnum_(*(++p))) {
959                                         *(p-1) = *p;
960                                 }
961                                 *(p-1) = '\0';
962                                 tc = TC_VARIABLE;
963                                 if (*p == '(') {
964                                         tc = TC_FUNCTION;
965                                 } else {
966                                         skip_spaces(&p);
967                                         if (*p == '[') {
968                                                 p++;
969                                                 tc = TC_ARRAY;
970                                         }
971                                 }
972                         }
973                 }
974                 pos = p;
975
976                 /* skipping newlines in some cases */
977                 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
978                         goto readnext;
979
980                 /* insert concatenation operator when needed */
981                 if ((ltclass&TC_CONCAT1) && (tc&TC_CONCAT2) && (expected&TC_BINOP)) {
982                         concat_inserted = TRUE;
983                         save_tclass = tc;
984                         save_info = t.info;
985                         tc = TC_BINOP;
986                         t.info = OC_CONCAT | SS | P(35);
987                 }
988
989                 t.tclass = tc;
990         }
991         ltclass = t.tclass;
992
993         /* Are we ready for this? */
994         if (! (ltclass & expected))
995                 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
996                                                                 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
997
998         return ltclass;
999 }
1000
1001 static void rollback_token(void) { t.rollback = TRUE; }
1002
1003 static node *new_node(unsigned long info) {
1004
1005         register node *n;
1006
1007         n = (node *)xcalloc(sizeof(node), 1);
1008         n->info = info;
1009         n->lineno = lineno;
1010         return n;
1011 }
1012
1013 static node *mk_re_node(char *s, node *n, regex_t *re) {
1014
1015         n->info = OC_REGEXP;
1016         n->l.re = re;
1017         n->r.ire = re + 1;
1018         xregcomp(re, s, REG_EXTENDED);
1019         xregcomp(re+1, s, REG_EXTENDED | REG_ICASE);
1020
1021         return n;
1022 }
1023
1024 static node *condition(void) {
1025
1026         next_token(TC_SEQSTART);
1027         return parse_expr(TC_SEQTERM);
1028 }
1029
1030 /* parse expression terminated by given argument, return ptr
1031  * to built subtree. Terminator is eaten by parse_expr */
1032 static node *parse_expr(unsigned long iexp) {
1033
1034         node sn;
1035         node *cn = &sn;
1036         node *vn, *glptr;
1037         unsigned long tc, xtc;
1038         var *v;
1039
1040         sn.info = PRIMASK;
1041         sn.r.n = glptr = NULL;
1042         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1043
1044         while (! ((tc = next_token(xtc)) & iexp)) {
1045                 if (glptr && (t.info == (OC_COMPARE|VV|P(39)|2))) {
1046                         /* input redirection (<) attached to glptr node */
1047                         cn = glptr->l.n = new_node(OC_CONCAT|SS|P(37));
1048                         cn->a.n = glptr;
1049                         xtc = TC_OPERAND | TC_UOPPRE;
1050                         glptr = NULL;
1051
1052                 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1053                         /* for binary and postfix-unary operators, jump back over
1054                          * previous operators with higher priority */
1055                         vn = cn;
1056                         while ( ((t.info & PRIMASK) > (vn->a.n->info & PRIMASK2)) ||
1057                           ((t.info == vn->info) && ((t.info & OPCLSMASK) == OC_COLON)) )
1058                                 vn = vn->a.n;
1059                         if ((t.info & OPCLSMASK) == OC_TERNARY)
1060                                 t.info += P(6);
1061                         cn = vn->a.n->r.n = new_node(t.info);
1062                         cn->a.n = vn->a.n;
1063                         if (tc & TC_BINOP) {
1064                                 cn->l.n = vn;
1065                                 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1066                                 if ((t.info & OPCLSMASK) == OC_PGETLINE) {
1067                                         /* it's a pipe */
1068                                         next_token(TC_GETLINE);
1069                                         /* give maximum priority to this pipe */
1070                                         cn->info &= ~PRIMASK;
1071                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1072                                 }
1073                         } else {
1074                                 cn->r.n = vn;
1075                                 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1076                         }
1077                         vn->a.n = cn;
1078
1079                 } else {
1080                         /* for operands and prefix-unary operators, attach them
1081                          * to last node */
1082                         vn = cn;
1083                         cn = vn->r.n = new_node(t.info);
1084                         cn->a.n = vn;
1085                         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1086                         if (tc & (TC_OPERAND | TC_REGEXP)) {
1087                                 xtc = TC_UOPPRE | TC_BINOP | TC_OPERAND | iexp;
1088                                 /* one should be very careful with switch on tclass -
1089                                  * only simple tclasses should be used! */
1090                                 switch (tc) {
1091                                   case TC_VARIABLE:
1092                                   case TC_ARRAY:
1093                                         cn->info = OC_VAR;
1094                                         if ((v = hash_search(ahash, t.string)) != NULL) {
1095                                                 cn->info = OC_FNARG;
1096                                                 cn->l.i = v->x.aidx;
1097                                         } else {
1098                                                 cn->l.v = newvar(t.string);
1099                                         }
1100                                         if (tc & TC_ARRAY) {
1101                                                 cn->info |= xS;
1102                                                 cn->r.n = parse_expr(TC_ARRTERM);
1103                                         }
1104                                         xtc = TC_UOPPOST | TC_UOPPRE | TC_BINOP | TC_OPERAND | iexp;
1105                                         break;
1106                                 
1107                                   case TC_NUMBER:
1108                                   case TC_STRING:
1109                                         cn->info = OC_VAR;
1110                                         v = cn->l.v = xcalloc(sizeof(var), 1);
1111                                         if (tc & TC_NUMBER)
1112                                                 setvar_i(v, t.number);
1113                                         else
1114                                                 setvar_s(v, t.string);
1115                                         break;
1116
1117                                   case TC_REGEXP:
1118                                         mk_re_node(t.string, cn,
1119                                                                         (regex_t *)xcalloc(sizeof(regex_t),2));
1120                                         break;
1121
1122                                   case TC_FUNCTION:
1123                                         cn->info = OC_FUNC;
1124                                         cn->r.f = newfunc(t.string);
1125                                         cn->l.n = condition();
1126                                         break;
1127
1128                                   case TC_SEQSTART:
1129                                         cn = vn->r.n = parse_expr(TC_SEQTERM);
1130                                         cn->a.n = vn;
1131                                         break;
1132
1133                                   case TC_GETLINE:
1134                                         glptr = cn;
1135                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1136                                         break;
1137
1138                                   case TC_BUILTIN:
1139                                         cn->l.n = condition();
1140                                         break;
1141                                 }
1142                         }
1143                 }
1144         }
1145         return sn.r.n;
1146 }
1147
1148 /* add node to chain. Return ptr to alloc'd node */
1149 static node *chain_node(unsigned long info) {
1150
1151         register node *n;
1152
1153         if (! seq->first)
1154                 seq->first = seq->last = new_node(0);
1155
1156         if (seq->programname != programname) {
1157                 seq->programname = programname;
1158                 n = chain_node(OC_NEWSOURCE);
1159                 n->l.s = bb_xstrdup(programname);
1160         }
1161
1162         n = seq->last;
1163         n->info = info;
1164         seq->last = n->a.n = new_node(OC_DONE);
1165
1166         return n;
1167 }
1168
1169 static void chain_expr(unsigned long info) {
1170
1171         node *n;
1172
1173         n = chain_node(info);
1174         n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1175         if (t.tclass & TC_GRPTERM)
1176                 rollback_token();
1177 }
1178
1179 static node *chain_loop(node *nn) {
1180
1181         node *n, *n2, *save_brk, *save_cont;
1182
1183         save_brk = break_ptr;
1184         save_cont = continue_ptr;
1185
1186         n = chain_node(OC_BR | Vx);
1187         continue_ptr = new_node(OC_EXEC);
1188         break_ptr = new_node(OC_EXEC);
1189         chain_group();
1190         n2 = chain_node(OC_EXEC | Vx);
1191         n2->l.n = nn;
1192         n2->a.n = n;
1193         continue_ptr->a.n = n2;
1194         break_ptr->a.n = n->r.n = seq->last;
1195
1196         continue_ptr = save_cont;
1197         break_ptr = save_brk;
1198
1199         return n;
1200 }
1201
1202 /* parse group and attach it to chain */
1203 static void chain_group(void) {
1204
1205         unsigned long c;
1206         node *n, *n2, *n3;
1207
1208         do {
1209                 c = next_token(TC_GRPSEQ);
1210         } while (c & TC_NEWLINE);
1211
1212         if (c & TC_GRPSTART) {
1213                 while(next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1214                         if (t.tclass & TC_NEWLINE) continue;    
1215                         rollback_token();
1216                         chain_group();
1217                 }
1218         } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1219                 rollback_token();
1220                 chain_expr(OC_EXEC | Vx);
1221         } else {                                                /* TC_STATEMNT */
1222                 switch (t.info & OPCLSMASK) {
1223                         case ST_IF:
1224                                 n = chain_node(OC_BR | Vx);
1225                                 n->l.n = condition();
1226                                 chain_group();
1227                                 n2 = chain_node(OC_EXEC);
1228                                 n->r.n = seq->last;
1229                                 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE)==TC_ELSE) {
1230                                         chain_group();
1231                                         n2->a.n = seq->last;
1232                                 } else {
1233                                         rollback_token();
1234                                 }
1235                                 break;
1236
1237                         case ST_WHILE:
1238                                 n2 = condition();
1239                                 n = chain_loop(NULL);
1240                                 n->l.n = n2;
1241                                 break;
1242
1243                         case ST_DO:
1244                                 n2 = chain_node(OC_EXEC);
1245                                 n = chain_loop(NULL);
1246                                 n2->a.n = n->a.n;
1247                                 next_token(TC_WHILE);
1248                                 n->l.n = condition();
1249                                 break;
1250
1251                         case ST_FOR:
1252                                 next_token(TC_SEQSTART);
1253                                 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1254                                 if (t.tclass & TC_SEQTERM) {                            /* for-in */
1255                                         if ((n2->info & OPCLSMASK) != OC_IN)
1256                                                 syntax_error(EMSG_UNEXP_TOKEN);
1257                                         n = chain_node(OC_WALKINIT | VV);
1258                                         n->l.n = n2->l.n;
1259                                         n->r.n = n2->r.n;
1260                                         n = chain_loop(NULL);
1261                                         n->info = OC_WALKNEXT | Vx;
1262                                         n->l.n = n2->l.n;
1263                                 } else {                                                                        /* for(;;) */
1264                                         n = chain_node(OC_EXEC | Vx);
1265                                         n->l.n = n2;
1266                                         n2 = parse_expr(TC_SEMICOL);
1267                                         n3 = parse_expr(TC_SEQTERM);
1268                                         n = chain_loop(n3);
1269                                         n->l.n = n2;
1270                                         if (! n2)
1271                                                 n->info = OC_EXEC;
1272                                 }
1273                                 break;
1274
1275                         case OC_PRINT:
1276                         case OC_PRINTF:
1277                                 n = chain_node(t.info);
1278                                 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1279                                 if (t.tclass & TC_OUTRDR) {
1280                                         n->info |= t.info;
1281                                         n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1282                                 }
1283                                 if (t.tclass & TC_GRPTERM)
1284                                         rollback_token();
1285                                 break;
1286
1287                         case OC_BREAK:
1288                                 n = chain_node(OC_EXEC);
1289                                 n->a.n = break_ptr;
1290                                 break;
1291
1292                         case OC_CONTINUE:
1293                                 n = chain_node(OC_EXEC);
1294                                 n->a.n = continue_ptr;
1295                                 break;
1296
1297                         /* delete, next, nextfile, return, exit */
1298                         default:
1299                                 chain_expr(t.info);
1300
1301                 }
1302         }
1303 }
1304
1305 static void parse_program(char *p) {
1306
1307         unsigned long tclass;
1308         node *cn;
1309         func *f;
1310         var *v;
1311
1312         pos = p;
1313         t.lineno = 1;
1314         while((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1315                                 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1316
1317                 if (tclass & TC_OPTERM)
1318                         continue;
1319
1320                 seq = &mainseq;
1321                 if (tclass & TC_BEGIN) {
1322                         seq = &beginseq;
1323                         chain_group();
1324
1325                 } else if (tclass & TC_END) {
1326                         seq = &endseq;
1327                         chain_group();
1328
1329                 } else if (tclass & TC_FUNCDECL) {
1330                         next_token(TC_FUNCTION);
1331                         pos++;
1332                         f = newfunc(t.string);
1333                         f->body.first = NULL;
1334                         f->nargs = 0;
1335                         while(next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1336                                 v = findvar(ahash, t.string);
1337                                 v->x.aidx = (f->nargs)++;
1338
1339                                 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1340                                         break;
1341                         }
1342                         seq = &(f->body);
1343                         chain_group();
1344                         clear_array(ahash);
1345
1346                 } else if (tclass & TC_OPSEQ) {
1347                         rollback_token();
1348                         cn = chain_node(OC_TEST);
1349                         cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1350                         if (t.tclass & TC_GRPSTART) {
1351                                 rollback_token();
1352                                 chain_group();
1353                         } else {
1354                                 chain_node(OC_PRINT);
1355                         }
1356                         cn->r.n = mainseq.last;
1357
1358                 } else /* if (tclass & TC_GRPSTART) */ {
1359                         rollback_token();
1360                         chain_group();
1361                 }
1362         }
1363 }
1364
1365
1366 /* -------- program execution part -------- */
1367
1368 static node *mk_splitter(char *s, tsplitter *spl) {
1369
1370         register regex_t *re, *ire;
1371         node *n;
1372
1373         re = &spl->re[0];
1374         ire = &spl->re[1];
1375         n = &spl->n;
1376         if ((n->info && OPCLSMASK) == OC_REGEXP) {
1377                 regfree(re);
1378                 regfree(ire);
1379         }
1380         if (bb_strlen(s) > 1) {
1381                 mk_re_node(s, n, re);
1382         } else {
1383                 n->info = (unsigned long) *s;
1384         }
1385
1386         return n;
1387 }
1388
1389 /* use node as a regular expression. Supplied with node ptr and regex_t
1390  * storage space. Return ptr to regex (if result points to preg, it should
1391  * be later regfree'd manually
1392  */
1393 static regex_t *as_regex(node *op, regex_t *preg) {
1394
1395         var *v;
1396         char *s;
1397
1398         if ((op->info & OPCLSMASK) == OC_REGEXP) {
1399                 return icase ? op->r.ire : op->l.re;
1400         } else {
1401                 v = nvalloc(1);
1402                 s = getvar_s(evaluate(op, v));
1403                 xregcomp(preg, s, icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED);
1404                 nvfree(v);
1405                 return preg;
1406         }
1407 }
1408
1409 /* gradually increasing buffer */
1410 static void qrealloc(char **b, int n, int *size) {
1411
1412         if (! *b || n >= *size)
1413                 *b = xrealloc(*b, *size = n + (n>>1) + 80);
1414 }
1415
1416 /* resize field storage space */
1417 static void fsrealloc(int size) {
1418
1419         static int maxfields = 0;
1420         int i;
1421
1422         if (size >= maxfields) {
1423                 i = maxfields;
1424                 maxfields = size + 16;
1425                 Fields = (var *)xrealloc(Fields, maxfields * sizeof(var));
1426                 for (; i<maxfields; i++) {
1427                         Fields[i].type = VF_SPECIAL;
1428                         Fields[i].string = NULL;
1429                 }
1430         }
1431
1432         if (size < nfields) {
1433                 for (i=size; i<nfields; i++) {
1434                         clrvar(Fields+i);
1435                 }
1436         }
1437         nfields = size;
1438 }
1439
1440 static int awk_split(char *s, node *spl, char **slist) {
1441
1442         int l, n=0;
1443         char c[4];
1444         char *s1;
1445         regmatch_t pmatch[2];
1446
1447         /* in worst case, each char would be a separate field */
1448         *slist = s1 = bb_xstrndup(s, bb_strlen(s) * 2 + 3);
1449
1450         c[0] = c[1] = (char)spl->info;
1451         c[2] = c[3] = '\0';
1452         if (*getvar_s(V[RS]) == '\0') c[2] = '\n';
1453
1454         if ((spl->info & OPCLSMASK) == OC_REGEXP) {             /* regex split */
1455                 while (*s) {
1456                         l = strcspn(s, c+2);
1457                         if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0 &&
1458                         pmatch[0].rm_so <= l) {
1459                                 l = pmatch[0].rm_so;
1460                                 if (pmatch[0].rm_eo == 0) { l++; pmatch[0].rm_eo++; }
1461                         } else {
1462                                 pmatch[0].rm_eo = l;
1463                                 if (*(s+l)) pmatch[0].rm_eo++;
1464                         }
1465
1466                         memcpy(s1, s, l);
1467                         *(s1+l) = '\0';
1468                         nextword(&s1);
1469                         s += pmatch[0].rm_eo;
1470                         n++;
1471                 }
1472         } else if (c[0] == '\0') {              /* null split */
1473                 while(*s) {
1474                         *(s1++) = *(s++);
1475                         *(s1++) = '\0';
1476                         n++;
1477                 }
1478         } else if (c[0] != ' ') {               /* single-character split */
1479                 if (icase) {
1480                         c[0] = toupper(c[0]);
1481                         c[1] = tolower(c[1]);
1482                 }
1483                 if (*s1) n++;
1484                 while ((s1 = strpbrk(s1, c))) {
1485                         *(s1++) = '\0';
1486                         n++;
1487                 }
1488         } else {                                /* space split */
1489                 while (*s) {
1490                         while (isspace(*s)) s++;
1491                         if (! *s) break;
1492                         n++;
1493                         while (*s && !isspace(*s))
1494                                 *(s1++) = *(s++);
1495                         *(s1++) = '\0';
1496                 }
1497         }
1498         return n;
1499 }
1500
1501 static void split_f0(void) {
1502
1503         static char *fstrings = NULL;
1504         int i, n;
1505         char *s;
1506
1507         if (is_f0_split)
1508                 return;
1509
1510         is_f0_split = TRUE;
1511         free(fstrings);
1512         fsrealloc(0);
1513         n = awk_split(getvar_s(V[F0]), &fsplitter.n, &fstrings);
1514         fsrealloc(n);
1515         s = fstrings;
1516         for (i=0; i<n; i++) {
1517                 Fields[i].string = nextword(&s);
1518                 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1519         }
1520
1521         /* set NF manually to avoid side effects */
1522         clrvar(V[NF]);
1523         V[NF]->type = VF_NUMBER | VF_SPECIAL;
1524         V[NF]->number = nfields;
1525 }
1526
1527 /* perform additional actions when some internal variables changed */
1528 static void handle_special(var *v) {
1529
1530         int n;
1531         char *b, *sep, *s;
1532         int sl, l, len, i, bsize;
1533
1534         if (! (v->type & VF_SPECIAL))
1535                 return;
1536
1537         if (v == V[NF]) {
1538                 n = (int)getvar_i(v);
1539                 fsrealloc(n);
1540
1541                 /* recalculate $0 */
1542                 sep = getvar_s(V[OFS]);
1543                 sl = bb_strlen(sep);
1544                 b = NULL;
1545                 len = 0;
1546                 for (i=0; i<n; i++) {
1547                         s = getvar_s(&Fields[i]);
1548                         l = bb_strlen(s);
1549                         if (b) {
1550                                 memcpy(b+len, sep, sl);
1551                                 len += sl;
1552                         }
1553                         qrealloc(&b, len+l+sl, &bsize);
1554                         memcpy(b+len, s, l);
1555                         len += l;
1556                 }
1557                 b[len] = '\0';
1558                 setvar_p(V[F0], b);
1559                 is_f0_split = TRUE;
1560
1561         } else if (v == V[F0]) {
1562                 is_f0_split = FALSE;
1563
1564         } else if (v == V[FS]) {
1565                 mk_splitter(getvar_s(v), &fsplitter);
1566
1567         } else if (v == V[RS]) {
1568                 mk_splitter(getvar_s(v), &rsplitter);
1569
1570         } else if (v == V[IGNORECASE]) {
1571                 icase = istrue(v);
1572
1573         } else {                                                /* $n */
1574                 n = getvar_i(V[NF]);
1575                 setvar_i(V[NF], n > v-Fields ? n : v-Fields+1);
1576                 /* right here v is invalid. Just to note... */
1577         }
1578 }
1579
1580 /* step through func/builtin/etc arguments */
1581 static node *nextarg(node **pn) {
1582
1583         node *n;
1584
1585         n = *pn;
1586         if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1587                 *pn = n->r.n;
1588                 n = n->l.n;
1589         } else {
1590                 *pn = NULL;
1591         }
1592         return n;
1593 }
1594
1595 static void hashwalk_init(var *v, xhash *array) {
1596
1597         char **w;
1598         hash_item *hi;
1599         int i;
1600
1601         if (v->type & VF_WALK)
1602                 free(v->x.walker);
1603
1604         v->type |= VF_WALK;
1605         w = v->x.walker = (char **)xcalloc(2 + 2*sizeof(char *) + array->glen, 1);
1606         *w = *(w+1) = (char *)(w + 2);
1607         for (i=0; i<array->csize; i++) {
1608                 hi = array->items[i];
1609                 while(hi) {
1610                         strcpy(*w, hi->name);
1611                         nextword(w);
1612                         hi = hi->next;
1613                 }
1614         }
1615 }
1616
1617 static int hashwalk_next(var *v) {
1618
1619         char **w;
1620
1621         w = v->x.walker;
1622         if (*(w+1) == *w)
1623                 return FALSE;
1624
1625         setvar_s(v, nextword(w+1));
1626         return TRUE;
1627 }
1628
1629 /* evaluate node, return 1 when result is true, 0 otherwise */
1630 static int ptest(node *pattern) {
1631         static var v;
1632
1633         return istrue(evaluate(pattern, &v));
1634 }
1635
1636 /* read next record from stream rsm into a variable v */
1637 static int awk_getline(rstream *rsm, var *v) {
1638
1639         char *b;
1640         regmatch_t pmatch[2];
1641         int a, p, pp=0, size;
1642         int fd, so, eo, r, rp;
1643         char c, *m, *s;
1644
1645         /* we're using our own buffer since we need access to accumulating
1646          * characters
1647          */
1648         fd = fileno(rsm->F);
1649         m = rsm->buffer;
1650         a = rsm->adv;
1651         p = rsm->pos;
1652         size = rsm->size;
1653         c = (char) rsplitter.n.info;
1654         rp = 0;
1655
1656         if (! m) qrealloc(&m, 256, &size);
1657         do {
1658                 b = m + a;
1659                 so = eo = p;
1660                 r = 1;
1661                 if (p > 0) {
1662                         if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1663                                 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1664                                                                                                 b, 1, pmatch, 0) == 0) {
1665                                         so = pmatch[0].rm_so;
1666                                         eo = pmatch[0].rm_eo;
1667                                         if (b[eo] != '\0')
1668                                                 break;
1669                                 }
1670                         } else if (c != '\0') {
1671                                 s = strchr(b+pp, c);
1672                                 if (s) {
1673                                         so = eo = s-b;
1674                                         eo++;
1675                                         break;
1676                                 }
1677                         } else {
1678                                 while (b[rp] == '\n')
1679                                         rp++;
1680                                 s = strstr(b+rp, "\n\n");
1681                                 if (s) {
1682                                         so = eo = s-b;
1683                                         while (b[eo] == '\n') eo++;
1684                                         if (b[eo] != '\0')
1685                                                 break;
1686                                 }
1687                         }
1688                 }
1689
1690                 if (a > 0) {
1691                         memmove(m, (const void *)(m+a), p+1);
1692                         b = m;
1693                         a = 0;
1694                 }
1695
1696                 qrealloc(&m, a+p+128, &size);
1697                 b = m + a;
1698                 pp = p;
1699                 p += safe_read(fd, b+p, size-p-1);
1700                 if (p < pp) {
1701                         p = 0;
1702                         r = 0;
1703                         setvar_i(V[ERRNO], errno);
1704                 }
1705                 b[p] = '\0';
1706
1707         } while (p > pp);
1708
1709         if (p == 0) {
1710                 r--;
1711         } else {
1712                 c = b[so]; b[so] = '\0';
1713                 setvar_s(v, b+rp);
1714                 v->type |= VF_USER;
1715                 b[so] = c;
1716                 c = b[eo]; b[eo] = '\0';
1717                 setvar_s(V[RT], b+so);
1718                 b[eo] = c;
1719         }
1720
1721         rsm->buffer = m;
1722         rsm->adv = a + eo;
1723         rsm->pos = p - eo;
1724         rsm->size = size;
1725
1726         return r;
1727 }
1728
1729 static int fmt_num(char *b, int size, char *format, double n, int int_as_int) {
1730
1731         int r=0;
1732         char c, *s=format;
1733
1734         if (int_as_int && n == (int)n) {
1735                 r = snprintf(b, size, "%d", (int)n);
1736         } else {
1737                 do { c = *s; } while (*s && *++s);
1738                 if (strchr("diouxX", c)) {
1739                         r = snprintf(b, size, format, (int)n);
1740                 } else if (strchr("eEfgG", c)) {
1741                         r = snprintf(b, size, format, n);
1742                 } else {
1743                         runtime_error(EMSG_INV_FMT);
1744                 }
1745         }
1746         return r;
1747 }
1748
1749
1750 /* formatted output into an allocated buffer, return ptr to buffer */
1751 static char *awk_printf(node *n) {
1752
1753         char *b = NULL;
1754         char *fmt, *s, *s1, *f;
1755         int i, j, incr, bsize;
1756         char c, c1;
1757         var *v, *arg;
1758
1759         v = nvalloc(1);
1760         fmt = f = bb_xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1761
1762         i = 0;
1763         while (*f) {
1764                 s = f;
1765                 while (*f && (*f != '%' || *(++f) == '%'))
1766                         f++;
1767                 while (*f && !isalpha(*f))
1768                         f++;
1769
1770                 incr = (f - s) + MAXVARFMT;
1771                 qrealloc(&b, incr+i, &bsize);
1772                 c = *f; if (c != '\0') f++;
1773                 c1 = *f ; *f = '\0';
1774                 arg = evaluate(nextarg(&n), v);
1775
1776                 j = i;
1777                 if (c == 'c' || !c) {
1778                         i += sprintf(b+i, s,
1779                                         is_numeric(arg) ? (char)getvar_i(arg) : *getvar_s(arg));
1780
1781                 } else if (c == 's') {
1782                     s1 = getvar_s(arg);
1783                         qrealloc(&b, incr+i+bb_strlen(s1), &bsize);
1784                         i += sprintf(b+i, s, s1);
1785
1786                 } else {
1787                         i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1788                 }
1789                 *f = c1;
1790
1791                 /* if there was an error while sprintf, return value is negative */
1792                 if (i < j) i = j;
1793
1794         }
1795
1796         b = xrealloc(b, i+1);
1797         free(fmt);
1798         nvfree(v);
1799         b[i] = '\0';
1800         return b;
1801 }
1802
1803 /* common substitution routine
1804  * replace (nm) substring of (src) that match (n) with (repl), store
1805  * result into (dest), return number of substitutions. If nm=0, replace
1806  * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1807  * subexpression matching (\1-\9)
1808  */
1809 static int awk_sub(node *rn, char *repl, int nm, var *src, var *dest, int ex) {
1810
1811         char *ds = NULL;
1812         char *sp, *s;
1813         int c, i, j, di, rl, so, eo, nbs, n, dssize;
1814         regmatch_t pmatch[10];
1815         regex_t sreg, *re;
1816
1817         re = as_regex(rn, &sreg);
1818         if (! src) src = V[F0];
1819         if (! dest) dest = V[F0];
1820
1821         i = di = 0;
1822         sp = getvar_s(src);
1823         rl = bb_strlen(repl);
1824         while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0:REG_NOTBOL) == 0) {
1825                 so = pmatch[0].rm_so;
1826                 eo = pmatch[0].rm_eo;
1827
1828                 qrealloc(&ds, di + eo + rl, &dssize);
1829                 memcpy(ds + di, sp, eo);
1830                 di += eo;
1831                 if (++i >= nm) {
1832                         /* replace */
1833                         di -= (eo - so);
1834                         nbs = 0;
1835                         for (s = repl; *s; s++) {
1836                                 ds[di++] = c = *s;
1837                                 if (c == '\\') {
1838                                         nbs++;
1839                                         continue;
1840                                 }
1841                                 if (c == '&' || (ex && c >= '0' && c <= '9')) {
1842                                         di -= ((nbs + 3) >> 1);
1843                                         j = 0;
1844                                         if (c != '&') {
1845                                                 j = c - '0';
1846                                                 nbs++;
1847                                         }
1848                                         if (nbs % 2) {
1849                                                 ds[di++] = c;
1850                                         } else {
1851                                                 n = pmatch[j].rm_eo - pmatch[j].rm_so;
1852                                                 qrealloc(&ds, di + rl + n, &dssize);
1853                                                 memcpy(ds + di, sp + pmatch[j].rm_so, n);
1854                                                 di += n;
1855                                         }
1856                                 }
1857                                 nbs = 0;
1858                         }
1859                 }
1860
1861                 sp += eo;
1862                 if (i == nm) break;
1863                 if (eo == so) {
1864                         if (! (ds[di++] = *sp++)) break;
1865                 }
1866         }
1867
1868         qrealloc(&ds, di + strlen(sp), &dssize);
1869         strcpy(ds + di, sp);
1870         setvar_p(dest, ds);
1871         if (re == &sreg) regfree(re);
1872         return i;
1873 }
1874
1875 static var *exec_builtin(node *op, var *res) {
1876
1877         int (*to_xxx)(int);
1878         var *tv;
1879         node *an[4];
1880         var  *av[4];
1881         char *as[4];
1882         regmatch_t pmatch[2];
1883         regex_t sreg, *re;
1884         static tsplitter tspl;
1885         node *spl;
1886         unsigned long isr, info;
1887         int nargs;
1888         time_t tt;
1889         char *s, *s1;
1890         int i, l, ll, n;
1891
1892         tv = nvalloc(4);
1893         isr = info = op->info;
1894         op = op->l.n;
1895
1896         av[2] = av[3] = NULL;
1897         for (i=0 ; i<4 && op ; i++) {
1898                 an[i] = nextarg(&op);
1899                 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
1900                 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
1901                 isr >>= 1;
1902         }
1903
1904         nargs = i;
1905         if (nargs < (info >> 30))
1906                 runtime_error(EMSG_TOO_FEW_ARGS);
1907
1908         switch (info & OPNMASK) {
1909
1910           case B_a2:
1911 #ifdef CONFIG_FEATURE_AWK_MATH
1912                 setvar_i(res, atan2(getvar_i(av[i]), getvar_i(av[1])));
1913 #else
1914                 runtime_error(EMSG_NO_MATH);
1915 #endif
1916                 break;
1917
1918           case B_sp:
1919                 if (nargs > 2) {
1920                         spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
1921                                 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
1922                 } else {
1923                         spl = &fsplitter.n;
1924                 }
1925
1926                 n = awk_split(as[0], spl, &s);
1927                 s1 = s;
1928                 clear_array(iamarray(av[1]));
1929                 for (i=1; i<=n; i++)
1930                         setari_u(av[1], i, nextword(&s1));
1931                 free(s);
1932                 setvar_i(res, n);
1933                 break;
1934
1935           case B_ss:
1936                 l = bb_strlen(as[0]);
1937                 i = getvar_i(av[1]) - 1;
1938                 if (i>l) i=l; if (i<0) i=0;
1939                 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
1940                 if (n<0) n=0;
1941                 s = xmalloc(n+1);
1942                 strncpy(s, as[0]+i, n);
1943                 s[n] = '\0';
1944                 setvar_p(res, s);
1945                 break;
1946
1947           case B_lo:
1948                 to_xxx = tolower;
1949                 goto lo_cont;
1950
1951           case B_up:
1952                 to_xxx = toupper;
1953 lo_cont:
1954                 s1 = s = bb_xstrdup(as[0]);
1955                 while (*s1) {
1956                         *s1 = (*to_xxx)(*s1);
1957                         s1++;
1958                 }
1959                 setvar_p(res, s);
1960                 break;
1961
1962           case B_ix:
1963                 n = 0;
1964                 ll = bb_strlen(as[1]);
1965                 l = bb_strlen(as[0]) - ll;
1966                 if (ll > 0 && l >= 0) {
1967                         if (! icase) {
1968                                 s = strstr(as[0], as[1]);
1969                                 if (s) n = (s - as[0]) + 1;
1970                         } else {
1971                                 /* this piece of code is terribly slow and
1972                                  * really should be rewritten
1973                                  */
1974                                 for (i=0; i<=l; i++) {
1975                                         if (strncasecmp(as[0]+i, as[1], ll) == 0) {
1976                                                 n = i+1;
1977                                                 break;
1978                                         }
1979                                 }
1980                         }
1981                 }
1982                 setvar_i(res, n);
1983                 break;
1984
1985           case B_ti:
1986                 if (nargs > 1)
1987                         tt = getvar_i(av[1]);
1988                 else
1989                         time(&tt);
1990                 s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
1991                 i = strftime(buf, MAXVARFMT, s, localtime(&tt));
1992                 buf[i] = '\0';
1993                 setvar_s(res, buf);
1994                 break;
1995
1996           case B_ma:
1997                 re = as_regex(an[1], &sreg);
1998                 n = regexec(re, as[0], 1, pmatch, 0);
1999                 if (n == 0) {
2000                         pmatch[0].rm_so++;
2001                         pmatch[0].rm_eo++;
2002                 } else {
2003                         pmatch[0].rm_so = 0;
2004                         pmatch[0].rm_eo = -1;
2005                 }
2006                 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2007                 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2008                 setvar_i(res, pmatch[0].rm_so);
2009                 if (re == &sreg) regfree(re);
2010                 break;
2011
2012           case B_ge:
2013                 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2014                 break;
2015
2016           case B_gs:
2017                 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2018                 break;
2019
2020           case B_su:
2021                 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2022                 break;
2023         }
2024
2025         nvfree(tv);
2026         return res;
2027 }
2028
2029 /*
2030  * Evaluate node - the heart of the program. Supplied with subtree
2031  * and place where to store result. returns ptr to result.
2032  */
2033 #define XC(n) ((n) >> 8)
2034
2035 static var *evaluate(node *op, var *res) {
2036
2037         /* This procedure is recursive so we should count every byte */
2038         static var *fnargs = NULL;
2039         static unsigned int seed = 1;
2040         static regex_t sreg;
2041         node *op1;
2042         var *v1;
2043         union {
2044                 var *v;
2045                 char *s;
2046                 double d;
2047                 int i;
2048         } L, R;
2049         unsigned long opinfo;
2050         short opn;
2051         union {
2052                 char *s;
2053                 rstream *rsm;
2054                 FILE *F;
2055                 var *v;
2056                 regex_t *re;
2057                 unsigned long info;
2058         } X;
2059
2060         if (! op)
2061                 return setvar_s(res, NULL);
2062
2063         v1 = nvalloc(2);
2064
2065         while (op) {
2066
2067                 opinfo = op->info;
2068                 opn = (short)(opinfo & OPNMASK);
2069                 lineno = op->lineno;
2070
2071                 /* execute inevitable things */
2072                 op1 = op->l.n;
2073                 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2074                 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2075                 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2076                 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2077                 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2078
2079                 switch (XC(opinfo & OPCLSMASK)) {
2080
2081                   /* -- iterative node type -- */
2082
2083                   /* test pattern */
2084                   case XC( OC_TEST ):
2085                         if ((op1->info & OPCLSMASK) == OC_COMMA) {
2086                                 /* it's range pattern */
2087                                 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2088                                         op->info |= OF_CHECKED;
2089                                         if (ptest(op1->r.n))
2090                                                 op->info &= ~OF_CHECKED;
2091
2092                                         op = op->a.n;
2093                                 } else {
2094                                         op = op->r.n;
2095                                 }
2096                         } else {
2097                                 op = (ptest(op1)) ? op->a.n : op->r.n;
2098                         }
2099                         break;
2100
2101                   /* just evaluate an expression, also used as unconditional jump */
2102                   case XC( OC_EXEC ):
2103                         break;
2104
2105                   /* branch, used in if-else and various loops */
2106                   case XC( OC_BR ):
2107                         op = istrue(L.v) ? op->a.n : op->r.n;
2108                         break;
2109
2110                   /* initialize for-in loop */
2111                   case XC( OC_WALKINIT ):
2112                         hashwalk_init(L.v, iamarray(R.v));
2113                         break;
2114
2115                   /* get next array item */
2116                   case XC( OC_WALKNEXT ):
2117                         op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2118                         break;
2119
2120                   case XC( OC_PRINT ):
2121                   case XC( OC_PRINTF ):
2122                         X.F = stdout;
2123                         if (op->r.n) {
2124                                 X.rsm = newfile(R.s);
2125                                 if (! X.rsm->F) {
2126                                         if (opn == '|') {
2127                                                 if((X.rsm->F = popen(R.s, "w")) == NULL)
2128                                                         bb_perror_msg_and_die("popen");
2129                                                 X.rsm->is_pipe = 1;
2130                                         } else {
2131                                                 X.rsm->F = bb_xfopen(R.s, opn=='w' ? "w" : "a");
2132                                         }
2133                                 }
2134                                 X.F = X.rsm->F;
2135                         }
2136
2137                         if ((opinfo & OPCLSMASK) == OC_PRINT) {
2138                                 if (! op1) {
2139                                         fputs(getvar_s(V[F0]), X.F);
2140                                 } else {
2141                                         while (op1) {
2142                                                 L.v = evaluate(nextarg(&op1), v1);
2143                                                 if (L.v->type & VF_NUMBER) {
2144                                                         fmt_num(buf, MAXVARFMT, getvar_s(V[OFMT]),
2145                                                                                                                 getvar_i(L.v), TRUE);
2146                                                         fputs(buf, X.F);
2147                                                 } else {
2148                                                         fputs(getvar_s(L.v), X.F);
2149                                                 }
2150
2151                                                 if (op1) fputs(getvar_s(V[OFS]), X.F);
2152                                         }
2153                                 }
2154                                 fputs(getvar_s(V[ORS]), X.F);
2155
2156                         } else {        /* OC_PRINTF */
2157                                 L.s = awk_printf(op1);
2158                                 fputs(L.s, X.F);
2159                                 free(L.s);
2160                         }
2161                         fflush(X.F);
2162                         break;
2163
2164                   case XC( OC_DELETE ):
2165                         X.info = op1->info & OPCLSMASK;
2166                         if (X.info == OC_VAR) {
2167                                 R.v = op1->l.v;
2168                         } else if (X.info == OC_FNARG) {
2169                                 R.v = &fnargs[op1->l.i];
2170                         } else {
2171                                 runtime_error(EMSG_NOT_ARRAY);
2172                         }
2173
2174                         if (op1->r.n) {
2175                                 clrvar(L.v);
2176                                 L.s = getvar_s(evaluate(op1->r.n, v1));
2177                                 hash_remove(iamarray(R.v), L.s);
2178                         } else {
2179                                 clear_array(iamarray(R.v));
2180                         }
2181                         break;
2182
2183                   case XC( OC_NEWSOURCE ):
2184                         programname = op->l.s;
2185                         break;
2186
2187                   case XC( OC_RETURN ):
2188                         copyvar(res, L.v);
2189                         break;
2190
2191                   case XC( OC_NEXTFILE ):
2192                         nextfile = TRUE;
2193                   case XC( OC_NEXT ):
2194                         nextrec = TRUE;
2195                   case XC( OC_DONE ):
2196                         clrvar(res);
2197                         break;
2198
2199                   case XC( OC_EXIT ):
2200                         awk_exit(L.d);
2201
2202                   /* -- recursive node type -- */
2203
2204                   case XC( OC_VAR ):
2205                         L.v = op->l.v;
2206                         if (L.v == V[NF])
2207                                 split_f0();
2208                         goto v_cont;
2209
2210                   case XC( OC_FNARG ):
2211                         L.v = &fnargs[op->l.i];
2212
2213 v_cont:
2214                         res = (op->r.n) ? findvar(iamarray(L.v), R.s) : L.v;
2215                         break;
2216
2217                   case XC( OC_IN ):
2218                         setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2219                         break;
2220
2221                   case XC( OC_REGEXP ):
2222                         op1 = op;
2223                         L.s = getvar_s(V[F0]);
2224                         goto re_cont;
2225
2226                   case XC( OC_MATCH ):
2227                         op1 = op->r.n;
2228 re_cont:
2229                         X.re = as_regex(op1, &sreg);
2230                         R.i = regexec(X.re, L.s, 0, NULL, 0);
2231                         if (X.re == &sreg) regfree(X.re);
2232                         setvar_i(res, (R.i == 0 ? 1 : 0) ^ (opn == '!' ? 1 : 0));
2233                         break;
2234
2235                   case XC( OC_MOVE ):
2236                         /* if source is a temporary string, jusk relink it to dest */
2237                         if (R.v == v1+1 && R.v->string) {
2238                                 res = setvar_p(L.v, R.v->string);
2239                                 R.v->string = NULL;
2240                         } else {
2241                                 res = copyvar(L.v, R.v);
2242                         }
2243                         break;
2244
2245                   case XC( OC_TERNARY ):
2246                         if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2247                                 runtime_error(EMSG_POSSIBLE_ERROR);
2248                         res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2249                         break;
2250
2251                   case XC( OC_FUNC ):
2252                         if (! op->r.f->body.first)
2253                                 runtime_error(EMSG_UNDEF_FUNC);
2254
2255                         X.v = R.v = nvalloc(op->r.f->nargs+1);
2256                         while (op1) {
2257                                 L.v = evaluate(nextarg(&op1), v1);
2258                                 copyvar(R.v, L.v);
2259                                 R.v->type |= VF_CHILD;
2260                                 R.v->x.parent = L.v;
2261                                 if (++R.v - X.v >= op->r.f->nargs)
2262                                         break;
2263                         }
2264
2265                         R.v = fnargs;
2266                         fnargs = X.v;
2267
2268                         L.s = programname;
2269                         res = evaluate(op->r.f->body.first, res);
2270                         programname = L.s;
2271
2272                         nvfree(fnargs);
2273                         fnargs = R.v;
2274                         break;
2275
2276                   case XC( OC_GETLINE ):
2277                   case XC( OC_PGETLINE ):
2278                         if (op1) {
2279                                 X.rsm = newfile(L.s);
2280                                 if (! X.rsm->F) {
2281                                         if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2282                                                 X.rsm->F = popen(L.s, "r");
2283                                                 X.rsm->is_pipe = TRUE;
2284                                         } else {
2285                                                 X.rsm->F = fopen(L.s, "r");             /* not bb_xfopen! */
2286                                         }
2287                                 }
2288                         } else {
2289                                 if (! iF) iF = next_input_file();
2290                                 X.rsm = iF;
2291                         }
2292
2293                         if (! X.rsm->F) {
2294                                 setvar_i(V[ERRNO], errno);
2295                                 setvar_i(res, -1);
2296                                 break;
2297                         }
2298
2299                         if (! op->r.n)
2300                                 R.v = V[F0];
2301
2302                         L.i = awk_getline(X.rsm, R.v);
2303                         if (L.i > 0) {
2304                                 if (! op1) {
2305                                         incvar(V[FNR]);
2306                                         incvar(V[NR]);
2307                                 }
2308                         }
2309                         setvar_i(res, L.i);
2310                         break;
2311
2312                   /* simple builtins */
2313                   case XC( OC_FBLTIN ):
2314                         switch (opn) {
2315
2316                           case F_in:
2317                                 R.d = (int)L.d;
2318                                 break;
2319
2320                           case F_rn:
2321                                 R.d =  (double)rand() / (double)RAND_MAX;
2322                                 break;
2323
2324 #ifdef CONFIG_FEATURE_AWK_MATH
2325                           case F_co:
2326                                 R.d = cos(L.d);
2327                                 break;
2328
2329                           case F_ex:
2330                                 R.d = exp(L.d);
2331                                 break;
2332
2333                           case F_lg:
2334                                 R.d = log(L.d);
2335                                 break;
2336
2337                           case F_si:
2338                                 R.d = sin(L.d);
2339                                 break;
2340
2341                           case F_sq:
2342                                 R.d = sqrt(L.d);
2343                                 break;
2344 #else
2345                           case F_co:
2346                           case F_ex:
2347                           case F_lg:
2348                           case F_si:
2349                           case F_sq:
2350                                 runtime_error(EMSG_NO_MATH);
2351                                 break;
2352 #endif
2353
2354                           case F_sr:
2355                                 R.d = (double)seed;
2356                                 seed = op1 ? (unsigned int)L.d : (unsigned int)time(NULL);
2357                                 srand(seed);
2358                                 break;
2359
2360                           case F_ti:
2361                                 R.d = time(NULL);
2362                                 break;
2363
2364                           case F_le:
2365                                 if (! op1)
2366                                         L.s = getvar_s(V[F0]);
2367                                 R.d = bb_strlen(L.s);
2368                                 break;
2369
2370                           case F_sy:
2371                                 fflush(NULL);
2372                                 R.d = (L.s && *L.s) ? system(L.s) : 0;
2373                                 break;
2374
2375                           case F_ff:
2376                                 if (! op1)
2377                                         fflush(stdout);
2378                                 else {
2379                                         if (L.s && *L.s) {
2380                                                 X.rsm = newfile(L.s);
2381                                                 fflush(X.rsm->F);
2382                                         } else {
2383                                                 fflush(NULL);
2384                                         }
2385                                 }
2386                                 break;
2387
2388                           case F_cl:
2389                                 X.rsm = (rstream *)hash_search(fdhash, L.s);
2390                                 if (X.rsm) {
2391                                         R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
2392                                         free(X.rsm->buffer);
2393                                         hash_remove(fdhash, L.s);
2394                                 }
2395                                 if (R.i != 0)
2396                                         setvar_i(V[ERRNO], errno);
2397                                 R.d = (double)R.i;
2398                                 break;
2399                         }
2400                         setvar_i(res, R.d);
2401                         break;
2402
2403                   case XC( OC_BUILTIN ):
2404                         res = exec_builtin(op, res);
2405                         break;
2406
2407                   case XC( OC_SPRINTF ):
2408                         setvar_p(res, awk_printf(op1));
2409                         break;
2410
2411                   case XC( OC_UNARY ):
2412                         X.v = R.v;
2413                         L.d = R.d = getvar_i(R.v);
2414                         switch (opn) {
2415                           case 'P':
2416                                 L.d = ++R.d;
2417                                 goto r_op_change;
2418                           case 'p':
2419                                 R.d++;
2420                                 goto r_op_change;
2421                           case 'M':
2422                                 L.d = --R.d;
2423                                 goto r_op_change;
2424                           case 'm':
2425                                 R.d--;
2426                                 goto r_op_change;
2427                           case '!':
2428                             L.d = istrue(X.v) ? 0 : 1;
2429                                 break;
2430                           case '-':
2431                                 L.d = -R.d;
2432                                 break;
2433                         r_op_change:
2434                                 setvar_i(X.v, R.d);
2435                         }
2436                         setvar_i(res, L.d);
2437                         break;
2438
2439                   case XC( OC_FIELD ):
2440                         R.i = (int)getvar_i(R.v);
2441                         if (R.i == 0) {
2442                                 res = V[F0];
2443                         } else {
2444                                 split_f0();
2445                                 if (R.i > nfields)
2446                                         fsrealloc(R.i);
2447
2448                                 res = &Fields[R.i-1];
2449                         }
2450                         break;
2451
2452                   /* concatenation (" ") and index joining (",") */
2453                   case XC( OC_CONCAT ):
2454                   case XC( OC_COMMA ):
2455                         opn = bb_strlen(L.s) + bb_strlen(R.s) + 2;
2456                         X.s = (char *)xmalloc(opn);
2457                         strcpy(X.s, L.s);
2458                         if ((opinfo & OPCLSMASK) == OC_COMMA) {
2459                                 L.s = getvar_s(V[SUBSEP]);
2460                                 X.s = (char *)xrealloc(X.s, opn + bb_strlen(L.s));
2461                                 strcat(X.s, L.s);
2462                         }
2463                         strcat(X.s, R.s);
2464                         setvar_p(res, X.s);
2465                         break;
2466
2467                   case XC( OC_LAND ):
2468                         setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2469                         break;
2470
2471                   case XC( OC_LOR ):
2472                         setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2473                         break;
2474
2475                   case XC( OC_BINARY ):
2476                   case XC( OC_REPLACE ):
2477                         R.d = getvar_i(R.v);
2478                         switch (opn) {
2479                           case '+':
2480                                 L.d += R.d;
2481                                 break;
2482                           case '-':
2483                                 L.d -= R.d;
2484                                 break;
2485                           case '*':
2486                                 L.d *= R.d;
2487                                 break;
2488                           case '/':
2489                                 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2490                                 L.d /= R.d;
2491                                 break;
2492                           case '&':
2493 #ifdef CONFIG_FEATURE_AWK_MATH
2494                                 L.d = pow(L.d, R.d);
2495 #else
2496                                 runtime_error(EMSG_NO_MATH);
2497 #endif
2498                                 break;
2499                           case '%':
2500                                 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2501                                 L.d -= (int)(L.d / R.d) * R.d;
2502                                 break;
2503                         }
2504                         res = setvar_i(((opinfo&OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2505                         break;
2506
2507                   case XC( OC_COMPARE ):
2508                         if (is_numeric(L.v) && is_numeric(R.v)) {
2509                                 L.d = getvar_i(L.v) - getvar_i(R.v);
2510                         } else {
2511                                 L.s = getvar_s(L.v);
2512                                 R.s = getvar_s(R.v);
2513                                 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2514                         }
2515                         switch (opn & 0xfe) {
2516                           case 0:
2517                                 R.i = (L.d > 0);
2518                                 break;
2519                           case 2:
2520                                 R.i = (L.d >= 0);
2521                                 break;
2522                           case 4:
2523                                 R.i = (L.d == 0);
2524                                 break;
2525                         }
2526                         setvar_i(res, (opn & 0x1 ? R.i : !R.i) ? 1 : 0);
2527                         break;
2528
2529                   default:
2530                         runtime_error(EMSG_POSSIBLE_ERROR);
2531                 }
2532                 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2533                         op = op->a.n;
2534                 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2535                         break;
2536                 if (nextrec)
2537                         break;
2538         }
2539         nvfree(v1);
2540         return res;
2541 }
2542
2543
2544 /* -------- main & co. -------- */
2545
2546 static int awk_exit(int r) {
2547
2548         unsigned int i;
2549         hash_item *hi;
2550         static var tv;
2551
2552         if (! exiting) {
2553                 exiting = TRUE;
2554                 evaluate(endseq.first, &tv);
2555         }
2556
2557         /* waiting for children */
2558         for (i=0; i<fdhash->csize; i++) {
2559                 hi = fdhash->items[i];
2560                 while(hi) {
2561                         if (hi->data.rs.F && hi->data.rs.is_pipe)
2562                                 pclose(hi->data.rs.F);
2563                         hi = hi->next;
2564                 }
2565         }
2566
2567         exit(r);
2568 }
2569
2570 /* if expr looks like "var=value", perform assignment and return 1,
2571  * otherwise return 0 */
2572 static int is_assignment(char *expr) {
2573
2574         char *exprc, *s, *s0, *s1;
2575
2576         exprc = bb_xstrdup(expr);
2577         if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2578                 free(exprc);
2579                 return FALSE;
2580         }
2581
2582         *(s++) = '\0';
2583         s0 = s1 = s;
2584         while (*s)
2585                 *(s1++) = nextchar(&s);
2586
2587         *s1 = '\0';
2588         setvar_u(newvar(exprc), s0);
2589         free(exprc);
2590         return TRUE;
2591 }
2592
2593 /* switch to next input file */
2594 static rstream *next_input_file(void) {
2595
2596         static rstream rsm;
2597         FILE *F = NULL;
2598         char *fname, *ind;
2599         static int files_happen = FALSE;
2600
2601         if (rsm.F) fclose(rsm.F);
2602         rsm.F = NULL;
2603         rsm.pos = rsm.adv = 0;
2604
2605         do {
2606                 if (getvar_i(V[ARGIND])+1 >= getvar_i(V[ARGC])) {
2607                         if (files_happen)
2608                                 return NULL;
2609                         fname = "-";
2610                         F = stdin;
2611                 } else {
2612                         ind = getvar_s(incvar(V[ARGIND]));
2613                         fname = getvar_s(findvar(iamarray(V[ARGV]), ind));
2614                         if (fname && *fname && !is_assignment(fname))
2615                                 F = afopen(fname, "r");
2616                 }
2617         } while (!F);
2618
2619         files_happen = TRUE;
2620         setvar_s(V[FILENAME], fname);
2621         rsm.F = F;
2622         return &rsm;
2623 }
2624
2625 extern int awk_main(int argc, char **argv) {
2626
2627         char *s, *s1;
2628         int i, j, c;
2629         var *v;
2630         static var tv;
2631         char **envp;
2632         static int from_file = FALSE;
2633         rstream *rsm;
2634         FILE *F, *stdfiles[3];
2635         static char * stdnames = "/dev/stdin\0/dev/stdout\0/dev/stderr";
2636
2637         /* allocate global buffer */
2638         buf = xmalloc(MAXVARFMT+1);
2639
2640         vhash = hash_init();
2641         ahash = hash_init();
2642         fdhash = hash_init();
2643         fnhash = hash_init();
2644
2645         /* initialize variables */
2646         for (i=0;  *vNames;  i++) {
2647                 V[i] = v = newvar(nextword(&vNames));
2648                 if (*vValues != '\377')
2649                         setvar_s(v, nextword(&vValues));
2650                 else
2651                         setvar_i(v, 0);
2652
2653                 if (*vNames == '*') {
2654                         v->type |= VF_SPECIAL;
2655                         vNames++;
2656                 }
2657         }
2658
2659         handle_special(V[FS]);
2660         handle_special(V[RS]);
2661
2662         stdfiles[0] = stdin;
2663         stdfiles[1] = stdout;
2664         stdfiles[2] = stderr;
2665         for (i=0; i<3; i++) {
2666                 rsm = newfile(nextword(&stdnames));
2667                 rsm->F = stdfiles[i];
2668         }
2669
2670         for (envp=environ; *envp; envp++) {
2671                 s = bb_xstrdup(*envp);
2672                 s1 = strchr(s, '=');
2673                 if (!s1) {
2674                         goto keep_going;
2675                 }
2676                 *(s1++) = '\0';
2677                 setvar_u(findvar(iamarray(V[ENVIRON]), s), s1);
2678 keep_going:
2679                 free(s);
2680         }
2681
2682         while((c = getopt(argc, argv, "F:v:f:W:")) != EOF) {
2683                 switch (c) {
2684                         case 'F':
2685                                 setvar_s(V[FS], optarg);
2686                                 break;
2687                         case 'v':
2688                                 if (! is_assignment(optarg))
2689                                         bb_show_usage();
2690                                 break;
2691                         case 'f':
2692                                 from_file = TRUE;
2693                                 F = afopen(programname = optarg, "r");
2694                                 s = NULL;
2695                                 /* one byte is reserved for some trick in next_token */
2696                                 for (i=j=1; j>0; i+=j) {
2697                                         s = (char *)xrealloc(s, i+4096);
2698                                         j = fread(s+i, 1, 4094, F);
2699                                 }
2700                                 s[i] = '\0';
2701                                 fclose(F);
2702                                 parse_program(s+1);
2703                                 free(s);
2704                                 break;
2705                         case 'W':
2706                                 bb_error_msg("Warning: unrecognized option '-W %s' ignored\n", optarg);
2707                                 break;
2708
2709                         default:
2710                                 bb_show_usage();
2711                 }
2712         }
2713
2714         if (!from_file) {
2715                 if (argc == optind)
2716                         bb_show_usage();
2717                 programname="cmd. line";
2718                 parse_program(argv[optind++]);
2719
2720         }
2721
2722         /* fill in ARGV array */
2723         setvar_i(V[ARGC], argc - optind + 1);
2724         setari_u(V[ARGV], 0, "awk");
2725         for(i=optind; i < argc; i++)
2726                 setari_u(V[ARGV], i+1-optind, argv[i]);
2727
2728         evaluate(beginseq.first, &tv);
2729         if (! mainseq.first && ! endseq.first)
2730                 awk_exit(EXIT_SUCCESS);
2731
2732         /* input file could already be opened in BEGIN block */
2733         if (! iF) iF = next_input_file();
2734
2735         /* passing through input files */
2736         while (iF) {
2737
2738                 nextfile = FALSE;
2739                 setvar_i(V[FNR], 0);
2740
2741                 while ((c = awk_getline(iF, V[F0])) > 0) {
2742
2743                         nextrec = FALSE;
2744                         incvar(V[NR]);
2745                         incvar(V[FNR]);
2746                         evaluate(mainseq.first, &tv);
2747
2748                         if (nextfile)
2749                                 break;
2750                 }
2751
2752                 if (c < 0)
2753                         runtime_error(strerror(errno));
2754
2755                 iF = next_input_file();
2756
2757         }
2758
2759         awk_exit(EXIT_SUCCESS);
2760
2761         return 0;
2762 }
2763