ed8b0f20f5abfd1557766c962637623313a7663a
[oweals/busybox.git] / editors / awk.c
1 /* vi: set sw=4 ts=4: */
2 /*
3  * awk implementation for busybox
4  *
5  * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20  *
21  */
22
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <unistd.h>
26 #include <errno.h>
27 #include <string.h>
28 #include <time.h>
29 #include <math.h>
30 #include <ctype.h>
31 #include <getopt.h>
32 #include <regex.h>
33
34 #include "busybox.h"
35
36
37 #define MAXVARFMT       240
38 #define MINNVBLOCK      64
39
40 /* variable flags */
41 #define VF_NUMBER       0x0001  /* 1 = primary type is number */
42 #define VF_ARRAY        0x0002  /* 1 = it's an array */
43
44 #define VF_CACHED       0x0100  /* 1 = num/str value has cached str/num eq */
45 #define VF_USER         0x0200  /* 1 = user input (may be numeric string) */
46 #define VF_SPECIAL      0x0400  /* 1 = requires extra handling when changed */
47 #define VF_WALK         0x0800  /* 1 = variable has alloc'd x.walker list */
48 #define VF_FSTR         0x1000  /* 1 = string points to fstring buffer */
49 #define VF_CHILD        0x2000  /* 1 = function arg; x.parent points to source */
50 #define VF_DIRTY        0x4000  /* 1 = variable was set explicitly */
51
52 /* these flags are static, don't change them when value is changed */
53 #define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
54
55 /* Variable */
56 typedef struct var_s {
57         unsigned short type;            /* flags */
58         double number;
59         char *string;
60         union {
61                 int aidx;                               /* func arg index (on compilation stage) */
62                 struct xhash_s *array;  /* array ptr */
63                 struct var_s *parent;   /* for func args, ptr to actual parameter */
64                 char **walker;                  /* list of array elements (for..in) */
65         } x;
66 } var;
67
68 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
69 typedef struct chain_s {
70         struct node_s *first;
71         struct node_s *last;
72         char *programname;
73 } chain;
74
75 /* Function */
76 typedef struct func_s {
77         unsigned short nargs;
78         struct chain_s body;
79 } func;
80
81 /* I/O stream */
82 typedef struct rstream_s {
83         FILE *F;
84         char *buffer;
85         int adv;
86         int size;
87         int pos;
88         unsigned short is_pipe;
89 } rstream;
90
91 typedef struct hash_item_s {
92         union {
93                 struct var_s v;                 /* variable/array hash */
94                 struct rstream_s rs;    /* redirect streams hash */
95                 struct func_s f;                /* functions hash */
96         } data;
97         struct hash_item_s *next;       /* next in chain */
98         char name[1];                           /* really it's longer */
99 } hash_item;
100
101 typedef struct xhash_s {
102         unsigned int nel;                                       /* num of elements */
103         unsigned int csize;                                     /* current hash size */
104         unsigned int nprime;                            /* next hash size in PRIMES[] */
105         unsigned int glen;                                      /* summary length of item names */
106         struct hash_item_s **items;
107 } xhash;
108
109 /* Tree node */
110 typedef struct node_s {
111         unsigned long info;
112         unsigned short lineno;
113         union {
114                 struct node_s *n;
115                 var *v;
116                 int i;
117                 char *s;
118                 regex_t *re;
119         } l;
120         union {
121                 struct node_s *n;
122                 regex_t *ire;
123                 func *f;
124                 int argno;
125         } r;
126         union {
127                 struct node_s *n;
128         } a;
129 } node;
130
131 /* Block of temporary variables */
132 typedef struct nvblock_s {
133         int size;
134         var *pos;
135         struct nvblock_s *prev;
136         struct nvblock_s *next;
137         var nv[0];
138 } nvblock;
139
140 typedef struct tsplitter_s {
141         node n;
142         regex_t re[2];
143 } tsplitter;
144
145 /* simple token classes */
146 /* Order and hex values are very important!!!  See next_token() */
147 #define TC_SEQSTART      1                              /* ( */
148 #define TC_SEQTERM      (1 << 1)                /* ) */
149 #define TC_REGEXP       (1 << 2)                /* /.../ */
150 #define TC_OUTRDR       (1 << 3)                /* | > >> */
151 #define TC_UOPPOST      (1 << 4)                /* unary postfix operator */
152 #define TC_UOPPRE1      (1 << 5)                /* unary prefix operator */
153 #define TC_BINOPX       (1 << 6)                /* two-opnd operator */
154 #define TC_IN           (1 << 7)
155 #define TC_COMMA        (1 << 8)
156 #define TC_PIPE         (1 << 9)                /* input redirection pipe */
157 #define TC_UOPPRE2      (1 << 10)               /* unary prefix operator */
158 #define TC_ARRTERM      (1 << 11)               /* ] */
159 #define TC_GRPSTART     (1 << 12)               /* { */
160 #define TC_GRPTERM      (1 << 13)               /* } */
161 #define TC_SEMICOL      (1 << 14)
162 #define TC_NEWLINE      (1 << 15)
163 #define TC_STATX        (1 << 16)               /* ctl statement (for, next...) */
164 #define TC_WHILE        (1 << 17)
165 #define TC_ELSE         (1 << 18)
166 #define TC_BUILTIN      (1 << 19)
167 #define TC_GETLINE      (1 << 20)
168 #define TC_FUNCDECL     (1 << 21)               /* `function' `func' */
169 #define TC_BEGIN        (1 << 22)
170 #define TC_END          (1 << 23)
171 #define TC_EOF          (1 << 24)
172 #define TC_VARIABLE     (1 << 25)
173 #define TC_ARRAY        (1 << 26)
174 #define TC_FUNCTION     (1 << 27)
175 #define TC_STRING       (1 << 28)
176 #define TC_NUMBER       (1 << 29)
177
178 #define TC_UOPPRE       (TC_UOPPRE1 | TC_UOPPRE2)
179
180 /* combined token classes */
181 #define TC_BINOP        (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
182 #define TC_UNARYOP      (TC_UOPPRE | TC_UOPPOST)
183 #define TC_OPERAND      (TC_VARIABLE | TC_ARRAY | TC_FUNCTION | \
184         TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
185
186 #define TC_STATEMNT     (TC_STATX | TC_WHILE)
187 #define TC_OPTERM       (TC_SEMICOL | TC_NEWLINE)
188
189 /* word tokens, cannot mean something else if not expected */
190 #define TC_WORD         (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN | \
191         TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
192
193 /* discard newlines after these */
194 #define TC_NOTERM       (TC_COMMA | TC_GRPSTART | TC_GRPTERM | \
195         TC_BINOP | TC_OPTERM)
196
197 /* what can expression begin with */
198 #define TC_OPSEQ        (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
199 /* what can group begin with */
200 #define TC_GRPSEQ       (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
201
202 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
203 /* operator is inserted between them */
204 #define TC_CONCAT1      (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM | \
205         TC_STRING | TC_NUMBER | TC_UOPPOST)
206 #define TC_CONCAT2      (TC_OPERAND | TC_UOPPRE)
207
208 #define OF_RES1         0x010000
209 #define OF_RES2         0x020000
210 #define OF_STR1         0x040000
211 #define OF_STR2         0x080000
212 #define OF_NUM1         0x100000
213 #define OF_CHECKED      0x200000
214
215 /* combined operator flags */
216 #define xx      0
217 #define xV      OF_RES2
218 #define xS      (OF_RES2 | OF_STR2)
219 #define Vx      OF_RES1
220 #define VV      (OF_RES1 | OF_RES2)
221 #define Nx      (OF_RES1 | OF_NUM1)
222 #define NV      (OF_RES1 | OF_NUM1 | OF_RES2)
223 #define Sx      (OF_RES1 | OF_STR1)
224 #define SV      (OF_RES1 | OF_STR1 | OF_RES2)
225 #define SS      (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
226
227 #define OPCLSMASK       0xFF00
228 #define OPNMASK         0x007F
229
230 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
231  * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
232  * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
233  */
234 #define P(x)    (x << 24)
235 #define PRIMASK         0x7F000000
236 #define PRIMASK2        0x7E000000
237
238 /* Operation classes */
239
240 #define SHIFT_TIL_THIS  0x0600
241 #define RECUR_FROM_THIS 0x1000
242
243 enum {
244         OC_DELETE=0x0100,       OC_EXEC=0x0200,         OC_NEWSOURCE=0x0300,
245         OC_PRINT=0x0400,        OC_PRINTF=0x0500,       OC_WALKINIT=0x0600,
246
247         OC_BR=0x0700,           OC_BREAK=0x0800,        OC_CONTINUE=0x0900,
248         OC_EXIT=0x0a00,         OC_NEXT=0x0b00,         OC_NEXTFILE=0x0c00,
249         OC_TEST=0x0d00,         OC_WALKNEXT=0x0e00,
250
251         OC_BINARY=0x1000,       OC_BUILTIN=0x1100,      OC_COLON=0x1200,
252         OC_COMMA=0x1300,        OC_COMPARE=0x1400,      OC_CONCAT=0x1500,
253         OC_FBLTIN=0x1600,       OC_FIELD=0x1700,        OC_FNARG=0x1800,
254         OC_FUNC=0x1900,         OC_GETLINE=0x1a00,      OC_IN=0x1b00,
255         OC_LAND=0x1c00,         OC_LOR=0x1d00,          OC_MATCH=0x1e00,
256         OC_MOVE=0x1f00,         OC_PGETLINE=0x2000,     OC_REGEXP=0x2100,
257         OC_REPLACE=0x2200,      OC_RETURN=0x2300,       OC_SPRINTF=0x2400,
258         OC_TERNARY=0x2500,      OC_UNARY=0x2600,        OC_VAR=0x2700,
259         OC_DONE=0x2800,
260
261         ST_IF=0x3000,           ST_DO=0x3100,           ST_FOR=0x3200,
262         ST_WHILE=0x3300
263 };
264
265 /* simple builtins */
266 enum {
267         F_in=0, F_rn,   F_co,   F_ex,   F_lg,   F_si,   F_sq,   F_sr,
268         F_ti,   F_le,   F_sy,   F_ff,   F_cl
269 };
270
271 /* builtins */
272 enum {
273         B_a2=0, B_ix,   B_ma,   B_sp,   B_ss,   B_ti,   B_lo,   B_up,
274         B_ge,   B_gs,   B_su
275 };
276
277 /* tokens and their corresponding info values */
278
279 #define NTC             "\377"          /* switch to next token class (tc<<1) */
280 #define NTCC    '\377'
281
282 #define OC_B    OC_BUILTIN
283
284 static char * const tokenlist =
285         "\1("           NTC
286         "\1)"           NTC
287         "\1/"           NTC                                                                     /* REGEXP */
288         "\2>>"          "\1>"           "\1|"           NTC                     /* OUTRDR */
289         "\2++"          "\2--"          NTC                                             /* UOPPOST */
290         "\2++"          "\2--"          "\1$"           NTC                     /* UOPPRE1 */
291         "\2=="          "\1="           "\2+="          "\2-="          /* BINOPX */
292         "\2*="          "\2/="          "\2%="          "\2^="
293         "\1+"           "\1-"           "\3**="         "\2**"
294         "\1/"           "\1%"           "\1^"           "\1*"
295         "\2!="          "\2>="          "\2<="          "\1>"
296         "\1<"           "\2!~"          "\1~"           "\2&&"
297         "\2||"          "\1?"           "\1:"           NTC
298         "\2in"          NTC
299         "\1,"           NTC
300         "\1|"           NTC
301         "\1+"           "\1-"           "\1!"           NTC                     /* UOPPRE2 */
302         "\1]"           NTC
303         "\1{"           NTC
304         "\1}"           NTC
305         "\1;"           NTC
306         "\1\n"          NTC
307         "\2if"          "\2do"          "\3for"         "\5break"       /* STATX */
308         "\10continue"                   "\6delete"      "\5print"
309         "\6printf"      "\4next"        "\10nextfile"
310         "\6return"      "\4exit"        NTC
311         "\5while"       NTC
312         "\4else"        NTC
313
314         "\5close"       "\6system"      "\6fflush"      "\5atan2"       /* BUILTIN */
315         "\3cos"         "\3exp"         "\3int"         "\3log"
316         "\4rand"        "\3sin"         "\4sqrt"        "\5srand"
317         "\6gensub"      "\4gsub"        "\5index"       "\6length"
318         "\5match"       "\5split"       "\7sprintf"     "\3sub"
319         "\6substr"      "\7systime"     "\10strftime"
320         "\7tolower"     "\7toupper"     NTC
321         "\7getline"     NTC
322         "\4func"        "\10function"   NTC
323         "\5BEGIN"       NTC
324         "\3END"         "\0"
325         ;
326
327 static unsigned long tokeninfo[] = {
328
329         0,
330         0,
331         OC_REGEXP,
332         xS|'a',         xS|'w',         xS|'|',
333         OC_UNARY|xV|P(9)|'p',           OC_UNARY|xV|P(9)|'m',
334         OC_UNARY|xV|P(9)|'P',           OC_UNARY|xV|P(9)|'M',
335                 OC_FIELD|xV|P(5),
336         OC_COMPARE|VV|P(39)|5,          OC_MOVE|VV|P(74),
337                 OC_REPLACE|NV|P(74)|'+',        OC_REPLACE|NV|P(74)|'-',
338         OC_REPLACE|NV|P(74)|'*',        OC_REPLACE|NV|P(74)|'/',
339                 OC_REPLACE|NV|P(74)|'%',        OC_REPLACE|NV|P(74)|'&',
340         OC_BINARY|NV|P(29)|'+',         OC_BINARY|NV|P(29)|'-',
341                 OC_REPLACE|NV|P(74)|'&',        OC_BINARY|NV|P(15)|'&',
342         OC_BINARY|NV|P(25)|'/',         OC_BINARY|NV|P(25)|'%',
343                 OC_BINARY|NV|P(15)|'&',         OC_BINARY|NV|P(25)|'*',
344         OC_COMPARE|VV|P(39)|4,          OC_COMPARE|VV|P(39)|3,
345                 OC_COMPARE|VV|P(39)|0,          OC_COMPARE|VV|P(39)|1,
346         OC_COMPARE|VV|P(39)|2,          OC_MATCH|Sx|P(45)|'!',
347                 OC_MATCH|Sx|P(45)|'~',          OC_LAND|Vx|P(55),
348         OC_LOR|Vx|P(59),                        OC_TERNARY|Vx|P(64)|'?',
349                 OC_COLON|xx|P(67)|':',
350         OC_IN|SV|P(49),
351         OC_COMMA|SS|P(80),
352         OC_PGETLINE|SV|P(37),
353         OC_UNARY|xV|P(19)|'+',          OC_UNARY|xV|P(19)|'-',
354                 OC_UNARY|xV|P(19)|'!',
355         0,
356         0,
357         0,
358         0,
359         0,
360         ST_IF,                  ST_DO,                  ST_FOR,                 OC_BREAK,
361         OC_CONTINUE,                                    OC_DELETE|Vx,   OC_PRINT,
362         OC_PRINTF,              OC_NEXT,                OC_NEXTFILE,
363         OC_RETURN|Vx,   OC_EXIT|Nx,
364         ST_WHILE,
365         0,
366
367         OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
368         OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
369         OC_FBLTIN|F_rn,    OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
370         OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
371         OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF,        OC_B|B_su|P(0xb6),
372         OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti,    OC_B|B_ti|P(0x0b),
373         OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
374         OC_GETLINE|SV|P(0),
375         0,      0,
376         0,
377         0
378 };
379
380 /* internal variable names and their initial values       */
381 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
382 enum {
383         CONVFMT=0,      OFMT,           FS,                     OFS,
384         ORS,            RS,                     RT,                     FILENAME,
385         SUBSEP,         ARGIND,         ARGC,           ARGV,
386         ERRNO,          FNR,
387         NR,                     NF,                     IGNORECASE,
388         ENVIRON,        F0,                     _intvarcount_
389 };
390
391 static char * vNames =
392         "CONVFMT\0"     "OFMT\0"        "FS\0*"         "OFS\0"
393         "ORS\0"         "RS\0*"         "RT\0"          "FILENAME\0"
394         "SUBSEP\0"      "ARGIND\0"      "ARGC\0"        "ARGV\0"
395         "ERRNO\0"       "FNR\0"
396         "NR\0"          "NF\0*"         "IGNORECASE\0*"
397         "ENVIRON\0"     "$\0*"          "\0";
398
399 static char * vValues =
400         "%.6g\0"        "%.6g\0"        " \0"           " \0"
401         "\n\0"          "\n\0"          "\0"            "\0"
402         "\034\0"
403         "\377";
404
405 /* hash size may grow to these values */
406 #define FIRST_PRIME 61;
407 static const unsigned int PRIMES[] = { 251, 1021, 4093, 16381, 65521 };
408 static const unsigned int NPRIMES = sizeof(PRIMES) / sizeof(unsigned int);
409
410 /* globals */
411
412 extern char **environ;
413
414 static var * V[_intvarcount_];
415 static chain beginseq, mainseq, endseq, *seq;
416 static int nextrec, nextfile;
417 static node *break_ptr, *continue_ptr;
418 static rstream *iF;
419 static xhash *vhash, *ahash, *fdhash, *fnhash;
420 static char *programname;
421 static short lineno;
422 static int is_f0_split;
423 static int nfields = 0;
424 static var *Fields = NULL;
425 static tsplitter fsplitter, rsplitter;
426 static nvblock *cb = NULL;
427 static char *pos;
428 static char *buf;
429 static int icase = FALSE;
430 static int exiting = FALSE;
431
432 static struct {
433         unsigned long tclass;
434         unsigned long info;
435         char *string;
436         double number;
437         short lineno;
438         int rollback;
439 } t;
440
441 /* function prototypes */
442 extern void xregcomp(regex_t *preg, const char *regex, int cflags);
443 static void handle_special(var *);
444 static node *parse_expr(unsigned long);
445 static void chain_group(void);
446 static var *evaluate(node *, var *);
447 static rstream *next_input_file(void);
448 static int fmt_num(char *, int, char *, double, int);
449 static int awk_exit(int);
450
451 /* ---- error handling ---- */
452
453 static const char EMSG_INTERNAL_ERROR[] = "Internal error";
454 static const char EMSG_UNEXP_EOS[] = "Unexpected end of string";
455 static const char EMSG_UNEXP_TOKEN[] = "Unexpected token";
456 static const char EMSG_DIV_BY_ZERO[] = "Division by zero";
457 static const char EMSG_INV_FMT[] = "Invalid format specifier";
458 static const char EMSG_TOO_FEW_ARGS[] = "Too few arguments for builtin";
459 static const char EMSG_NOT_ARRAY[] = "Not an array";
460 static const char EMSG_POSSIBLE_ERROR[] = "Possible syntax error";
461 static const char EMSG_UNDEF_FUNC[] = "Call to undefined function";
462 #ifndef CONFIG_FEATURE_AWK_MATH
463 static const char EMSG_NO_MATH[] = "Math support is not compiled in";
464 #endif
465
466 static void syntax_error(const char * const message)
467 {
468         bb_error_msg("%s:%i: %s", programname, lineno, message);
469         exit(1);
470 }
471
472 #define runtime_error(x) syntax_error(x)
473
474
475 /* ---- hash stuff ---- */
476
477 static unsigned int hashidx(char *name) {
478
479         register unsigned int idx=0;
480
481         while (*name)  idx = *name++ + (idx << 6) - idx;
482         return idx;
483 }
484
485 /* create new hash */
486 static xhash *hash_init(void) {
487
488         xhash *newhash;
489
490         newhash = (xhash *)xcalloc(1, sizeof(xhash));
491         newhash->csize = FIRST_PRIME;
492         newhash->items = (hash_item **)xcalloc(newhash->csize, sizeof(hash_item *));
493
494         return newhash;
495 }
496
497 /* find item in hash, return ptr to data, NULL if not found */
498 static void *hash_search(xhash *hash, char *name) {
499
500         hash_item *hi;
501
502         hi = hash->items [ hashidx(name) % hash->csize ];
503         while (hi) {
504                 if (strcmp(hi->name, name) == 0)
505                         return &(hi->data);
506                 hi = hi->next;
507         }
508         return NULL;
509 }
510
511 /* grow hash if it becomes too big */
512 static void hash_rebuild(xhash *hash) {
513
514         unsigned int newsize, i, idx;
515         hash_item **newitems, *hi, *thi;
516
517         if (hash->nprime == NPRIMES)
518                 return;
519
520         newsize = PRIMES[hash->nprime++];
521         newitems = (hash_item **)xcalloc(newsize, sizeof(hash_item *));
522
523         for (i=0; i<hash->csize; i++) {
524                 hi = hash->items[i];
525                 while (hi) {
526                         thi = hi;
527                         hi = thi->next;
528                         idx = hashidx(thi->name) % newsize;
529                         thi->next = newitems[idx];
530                         newitems[idx] = thi;
531                 }
532         }
533
534         free(hash->items);
535         hash->csize = newsize;
536         hash->items = newitems;
537 }
538
539 /* find item in hash, add it if necessary. Return ptr to data */
540 static void *hash_find(xhash *hash, char *name) {
541
542         hash_item *hi;
543         unsigned int idx;
544         int l;
545
546         hi = hash_search(hash, name);
547         if (! hi) {
548                 if (++hash->nel / hash->csize > 10)
549                         hash_rebuild(hash);
550
551                 l = bb_strlen(name) + 1;
552                 hi = xcalloc(sizeof(hash_item) + l, 1);
553                 memcpy(hi->name, name, l);
554
555                 idx = hashidx(name) % hash->csize;
556                 hi->next = hash->items[idx];
557                 hash->items[idx] = hi;
558                 hash->glen += l;
559         }
560         return &(hi->data);
561 }
562
563 #define findvar(hash, name) (var *) hash_find ( (hash) , (name) )
564 #define newvar(name) (var *) hash_find ( vhash , (name) )
565 #define newfile(name) (rstream *) hash_find ( fdhash , (name) )
566 #define newfunc(name) (func *) hash_find ( fnhash , (name) )
567
568 static void hash_remove(xhash *hash, char *name) {
569
570         hash_item *hi, **phi;
571
572         phi = &(hash->items[ hashidx(name) % hash->csize ]);
573         while (*phi) {
574                 hi = *phi;
575                 if (strcmp(hi->name, name) == 0) {
576                         hash->glen -= (bb_strlen(name) + 1);
577                         hash->nel--;
578                         *phi = hi->next;
579                         free(hi);
580                         break;
581                 }
582                 phi = &(hi->next);
583         }
584 }
585
586 /* ------ some useful functions ------ */
587
588 static void skip_spaces(char **s) {
589
590         register char *p = *s;
591
592         while(*p == ' ' || *p == '\t' ||
593                                         (*p == '\\' && *(p+1) == '\n' && (++p, ++t.lineno))) {
594                 p++;
595         }
596         *s = p;
597 }
598
599 static char *nextword(char **s) {
600
601         register char *p = *s;
602
603         while (*(*s)++) ;
604
605         return p;
606 }
607
608 static char nextchar(char **s) {
609
610         register char c, *pps;
611
612         c = *((*s)++);
613         pps = *s;
614         if (c == '\\') c = bb_process_escape_sequence((const char**)s);
615         if (c == '\\' && *s == pps) c = *((*s)++);
616         return c;
617 }
618
619 static inline int isalnum_(int c) {
620
621         return (isalnum(c) || c == '_');
622 }
623
624 static FILE *afopen(const char *path, const char *mode) {
625
626         return (*path == '-' && *(path+1) == '\0') ? stdin : bb_xfopen(path, mode);
627 }
628
629 /* -------- working with variables (set/get/copy/etc) -------- */
630
631 static xhash *iamarray(var *v) {
632
633         var *a = v;
634
635         while (a->type & VF_CHILD)
636                 a = a->x.parent;
637
638         if (! (a->type & VF_ARRAY)) {
639                 a->type |= VF_ARRAY;
640                 a->x.array = hash_init();
641         }
642         return a->x.array;
643 }
644
645 static void clear_array(xhash *array) {
646
647         unsigned int i;
648         hash_item *hi, *thi;
649
650         for (i=0; i<array->csize; i++) {
651                 hi = array->items[i];
652                 while (hi) {
653                         thi = hi;
654                         hi = hi->next;
655                         free(thi->data.v.string);
656                         free(thi);
657                 }
658                 array->items[i] = NULL;
659         }
660         array->glen = array->nel = 0;
661 }
662
663 /* clear a variable */
664 static var *clrvar(var *v) {
665
666         if (!(v->type & VF_FSTR))
667                 free(v->string);
668
669         v->type &= VF_DONTTOUCH;
670         v->type |= VF_DIRTY;
671         v->string = NULL;
672         return v;
673 }
674
675 /* assign string value to variable */
676 static var *setvar_p(var *v, char *value) {
677
678         clrvar(v);
679         v->string = value;
680         handle_special(v);
681
682         return v;
683 }
684
685 /* same as setvar_p but make a copy of string */
686 static var *setvar_s(var *v, char *value) {
687
688         return setvar_p(v, (value && *value) ? bb_xstrdup(value) : NULL);
689 }
690
691 /* same as setvar_s but set USER flag */
692 static var *setvar_u(var *v, char *value) {
693
694         setvar_s(v, value);
695         v->type |= VF_USER;
696         return v;
697 }
698
699 /* set array element to user string */
700 static void setari_u(var *a, int idx, char *s) {
701
702         register var *v;
703         static char sidx[12];
704
705         sprintf(sidx, "%d", idx);
706         v = findvar(iamarray(a), sidx);
707         setvar_u(v, s);
708 }
709
710 /* assign numeric value to variable */
711 static var *setvar_i(var *v, double value) {
712
713         clrvar(v);
714         v->type |= VF_NUMBER;
715         v->number = value;
716         handle_special(v);
717         return v;
718 }
719
720 static char *getvar_s(var *v) {
721
722         /* if v is numeric and has no cached string, convert it to string */
723         if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
724                 fmt_num(buf, MAXVARFMT, getvar_s(V[CONVFMT]), v->number, TRUE);
725                 v->string = bb_xstrdup(buf);
726                 v->type |= VF_CACHED;
727         }
728         return (v->string == NULL) ? "" : v->string;
729 }
730
731 static double getvar_i(var *v) {
732
733         char *s;
734
735         if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
736                 v->number = 0;
737                 s = v->string;
738                 if (s && *s) {
739                         v->number = strtod(s, &s);
740                         if (v->type & VF_USER) {
741                                 skip_spaces(&s);
742                                 if (*s != '\0')
743                                         v->type &= ~VF_USER;
744                         }
745                 } else {
746                         v->type &= ~VF_USER;
747                 }
748                 v->type |= VF_CACHED;
749         }
750         return v->number;
751 }
752
753 static var *copyvar(var *dest, var *src) {
754
755         if (dest != src) {
756                 clrvar(dest);
757                 dest->type |= (src->type & ~VF_DONTTOUCH);
758                 dest->number = src->number;
759                 if (src->string)
760                         dest->string = bb_xstrdup(src->string);
761         }
762         handle_special(dest);
763         return dest;
764 }
765
766 static var *incvar(var *v) {
767
768         return setvar_i(v, getvar_i(v)+1.);
769 }
770
771 /* return true if v is number or numeric string */
772 static int is_numeric(var *v) {
773
774         getvar_i(v);
775         return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
776 }
777
778 /* return 1 when value of v corresponds to true, 0 otherwise */
779 static int istrue(var *v) {
780
781         if (is_numeric(v))
782                 return (v->number == 0) ? 0 : 1;
783         else
784                 return (v->string && *(v->string)) ? 1 : 0;
785 }
786
787 /* temporary variables allocator. Last allocated should be first freed */
788 static var *nvalloc(int n) {
789
790         nvblock *pb = NULL;
791         var *v, *r;
792         int size;
793
794         while (cb) {
795                 pb = cb;
796                 if ((cb->pos - cb->nv) + n <= cb->size) break;
797                 cb = cb->next;
798         }
799
800         if (! cb) {
801                 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
802                 cb = (nvblock *)xmalloc(sizeof(nvblock) + size * sizeof(var));
803                 cb->size = size;
804                 cb->pos = cb->nv;
805                 cb->prev = pb;
806                 cb->next = NULL;
807                 if (pb) pb->next = cb;
808         }
809
810         v = r = cb->pos;
811         cb->pos += n;
812
813         while (v < cb->pos) {
814                 v->type = 0;
815                 v->string = NULL;
816                 v++;
817         }
818
819         return r;
820 }
821
822 static void nvfree(var *v) {
823
824         var *p;
825
826         if (v < cb->nv || v >= cb->pos)
827                 runtime_error(EMSG_INTERNAL_ERROR);
828
829         for (p=v; p<cb->pos; p++) {
830                 if ((p->type & (VF_ARRAY|VF_CHILD)) == VF_ARRAY) {
831                         clear_array(iamarray(p));
832                         free(p->x.array->items);
833                         free(p->x.array);
834                 }
835                 if (p->type & VF_WALK)
836                         free(p->x.walker);
837
838                 clrvar(p);
839         }
840
841         cb->pos = v;
842         while (cb->prev && cb->pos == cb->nv) {
843                 cb = cb->prev;
844         }
845 }
846
847 /* ------- awk program text parsing ------- */
848
849 /* Parse next token pointed by global pos, place results into global t.
850  * If token isn't expected, give away. Return token class
851  */
852 static unsigned long next_token(unsigned long expected) {
853
854         char *p, *pp, *s;
855         char *tl;
856         unsigned long tc, *ti;
857         int l;
858         static int concat_inserted = FALSE;
859         static unsigned long save_tclass, save_info;
860         static unsigned long ltclass = TC_OPTERM;
861
862         if (t.rollback) {
863
864                 t.rollback = FALSE;
865
866         } else if (concat_inserted) {
867
868                 concat_inserted = FALSE;
869                 t.tclass = save_tclass;
870                 t.info = save_info;
871
872         } else {
873
874                 p = pos;
875
876         readnext:
877                 skip_spaces(&p);
878                 lineno = t.lineno;
879                 if (*p == '#')
880                         while (*p != '\n' && *p != '\0') p++;
881
882                 if (*p == '\n')
883                         t.lineno++;
884
885                 if (*p == '\0') {
886                         tc = TC_EOF;
887
888                 } else if (*p == '\"') {
889                         /* it's a string */
890                         t.string = s = ++p;
891                         while (*p != '\"') {
892                                 if (*p == '\0' || *p == '\n')
893                                         syntax_error(EMSG_UNEXP_EOS);
894                                 *(s++) = nextchar(&p);
895                         }
896                         p++;
897                         *s = '\0';
898                         tc = TC_STRING;
899
900                 } else if ((expected & TC_REGEXP) && *p == '/') {
901                         /* it's regexp */
902                         t.string = s = ++p;
903                         while (*p != '/') {
904                                 if (*p == '\0' || *p == '\n')
905                                         syntax_error(EMSG_UNEXP_EOS);
906                                 if ((*s++ = *p++) == '\\') {
907                                         pp = p;
908                                         *(s-1) = bb_process_escape_sequence((const char **)&p);
909                                         if (*pp == '\\') *s++ = '\\';
910                                         if (p == pp) *s++ = *p++;
911                                 }
912                         }
913                         p++;
914                         *s = '\0';
915                         tc = TC_REGEXP;
916
917                 } else if (*p == '.' || isdigit(*p)) {
918                         /* it's a number */
919                         t.number = strtod(p, &p);
920                         if (*p == '.')
921                                 syntax_error(EMSG_UNEXP_TOKEN);
922                         tc = TC_NUMBER;
923
924                 } else {
925                         /* search for something known */
926                         tl = tokenlist;
927                         tc = 0x00000001;
928                         ti = tokeninfo;
929                         while (*tl) {
930                                 l = *(tl++);
931                                 if (l == NTCC) {
932                                         tc <<= 1;
933                                         continue;
934                                 }
935                                 /* if token class is expected, token
936                                  * matches and it's not a longer word,
937                                  * then this is what we are looking for
938                                  */
939                                 if ((tc & (expected | TC_WORD | TC_NEWLINE)) &&
940                                 *tl == *p && strncmp(p, tl, l) == 0 &&
941                                 !((tc & TC_WORD) && isalnum_(*(p + l)))) {
942                                         t.info = *ti;
943                                         p += l;
944                                         break;
945                                 }
946                                 ti++;
947                                 tl += l;
948                         }
949
950                         if (! *tl) {
951                                 /* it's a name (var/array/function),
952                                  * otherwise it's something wrong
953                                  */
954                                 if (! isalnum_(*p))
955                                         syntax_error(EMSG_UNEXP_TOKEN);
956
957                                 t.string = --p;
958                                 while(isalnum_(*(++p))) {
959                                         *(p-1) = *p;
960                                 }
961                                 *(p-1) = '\0';
962                                 tc = TC_VARIABLE;
963                                 if (*p == '(') {
964                                         tc = TC_FUNCTION;
965                                 } else {
966                                         skip_spaces(&p);
967                                         if (*p == '[') {
968                                                 p++;
969                                                 tc = TC_ARRAY;
970                                         }
971                                 }
972                         }
973                 }
974                 pos = p;
975
976                 /* skipping newlines in some cases */
977                 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
978                         goto readnext;
979
980                 /* insert concatenation operator when needed */
981                 if ((ltclass&TC_CONCAT1) && (tc&TC_CONCAT2) && (expected&TC_BINOP)) {
982                         concat_inserted = TRUE;
983                         save_tclass = tc;
984                         save_info = t.info;
985                         tc = TC_BINOP;
986                         t.info = OC_CONCAT | SS | P(35);
987                 }
988
989                 t.tclass = tc;
990         }
991         ltclass = t.tclass;
992
993         /* Are we ready for this? */
994         if (! (ltclass & expected))
995                 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
996                                                                 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
997
998         return ltclass;
999 }
1000
1001 static void rollback_token(void) { t.rollback = TRUE; }
1002
1003 static node *new_node(unsigned long info) {
1004
1005         register node *n;
1006
1007         n = (node *)xcalloc(sizeof(node), 1);
1008         n->info = info;
1009         n->lineno = lineno;
1010         return n;
1011 }
1012
1013 static node *mk_re_node(char *s, node *n, regex_t *re) {
1014
1015         n->info = OC_REGEXP;
1016         n->l.re = re;
1017         n->r.ire = re + 1;
1018         xregcomp(re, s, REG_EXTENDED);
1019         xregcomp(re+1, s, REG_EXTENDED | REG_ICASE);
1020
1021         return n;
1022 }
1023
1024 static node *condition(void) {
1025
1026         next_token(TC_SEQSTART);
1027         return parse_expr(TC_SEQTERM);
1028 }
1029
1030 /* parse expression terminated by given argument, return ptr
1031  * to built subtree. Terminator is eaten by parse_expr */
1032 static node *parse_expr(unsigned long iexp) {
1033
1034         node sn;
1035         node *cn = &sn;
1036         node *vn, *glptr;
1037         unsigned long tc, xtc;
1038         var *v;
1039
1040         sn.info = PRIMASK;
1041         sn.r.n = glptr = NULL;
1042         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1043
1044         while (! ((tc = next_token(xtc)) & iexp)) {
1045                 if (glptr && (t.info == (OC_COMPARE|VV|P(39)|2))) {
1046                         /* input redirection (<) attached to glptr node */
1047                         cn = glptr->l.n = new_node(OC_CONCAT|SS|P(37));
1048                         cn->a.n = glptr;
1049                         xtc = TC_OPERAND | TC_UOPPRE;
1050                         glptr = NULL;
1051
1052                 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1053                         /* for binary and postfix-unary operators, jump back over
1054                          * previous operators with higher priority */
1055                         vn = cn;
1056                         while ( ((t.info & PRIMASK) > (vn->a.n->info & PRIMASK2)) ||
1057                           ((t.info == vn->info) && ((t.info & OPCLSMASK) == OC_COLON)) )
1058                                 vn = vn->a.n;
1059                         if ((t.info & OPCLSMASK) == OC_TERNARY)
1060                                 t.info += P(6);
1061                         cn = vn->a.n->r.n = new_node(t.info);
1062                         cn->a.n = vn->a.n;
1063                         if (tc & TC_BINOP) {
1064                                 cn->l.n = vn;
1065                                 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1066                                 if ((t.info & OPCLSMASK) == OC_PGETLINE) {
1067                                         /* it's a pipe */
1068                                         next_token(TC_GETLINE);
1069                                         /* give maximum priority to this pipe */
1070                                         cn->info &= ~PRIMASK;
1071                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1072                                 }
1073                         } else {
1074                                 cn->r.n = vn;
1075                                 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1076                         }
1077                         vn->a.n = cn;
1078
1079                 } else {
1080                         /* for operands and prefix-unary operators, attach them
1081                          * to last node */
1082                         vn = cn;
1083                         cn = vn->r.n = new_node(t.info);
1084                         cn->a.n = vn;
1085                         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1086                         if (tc & (TC_OPERAND | TC_REGEXP)) {
1087                                 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1088                                 /* one should be very careful with switch on tclass -
1089                                  * only simple tclasses should be used! */
1090                                 switch (tc) {
1091                                   case TC_VARIABLE:
1092                                   case TC_ARRAY:
1093                                         cn->info = OC_VAR;
1094                                         if ((v = hash_search(ahash, t.string)) != NULL) {
1095                                                 cn->info = OC_FNARG;
1096                                                 cn->l.i = v->x.aidx;
1097                                         } else {
1098                                                 cn->l.v = newvar(t.string);
1099                                         }
1100                                         if (tc & TC_ARRAY) {
1101                                                 cn->info |= xS;
1102                                                 cn->r.n = parse_expr(TC_ARRTERM);
1103                                         }
1104                                         break;
1105                                 
1106                                   case TC_NUMBER:
1107                                   case TC_STRING:
1108                                         cn->info = OC_VAR;
1109                                         v = cn->l.v = xcalloc(sizeof(var), 1);
1110                                         if (tc & TC_NUMBER)
1111                                                 setvar_i(v, t.number);
1112                                         else
1113                                                 setvar_s(v, t.string);
1114                                         break;
1115
1116                                   case TC_REGEXP:
1117                                         mk_re_node(t.string, cn,
1118                                                                         (regex_t *)xcalloc(sizeof(regex_t),2));
1119                                         break;
1120
1121                                   case TC_FUNCTION:
1122                                         cn->info = OC_FUNC;
1123                                         cn->r.f = newfunc(t.string);
1124                                         cn->l.n = condition();
1125                                         break;
1126
1127                                   case TC_SEQSTART:
1128                                         cn = vn->r.n = parse_expr(TC_SEQTERM);
1129                                         cn->a.n = vn;
1130                                         break;
1131
1132                                   case TC_GETLINE:
1133                                         glptr = cn;
1134                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1135                                         break;
1136
1137                                   case TC_BUILTIN:
1138                                         cn->l.n = condition();
1139                                         break;
1140                                 }
1141                         }
1142                 }
1143         }
1144         return sn.r.n;
1145 }
1146
1147 /* add node to chain. Return ptr to alloc'd node */
1148 static node *chain_node(unsigned long info) {
1149
1150         register node *n;
1151
1152         if (! seq->first)
1153                 seq->first = seq->last = new_node(0);
1154
1155         if (seq->programname != programname) {
1156                 seq->programname = programname;
1157                 n = chain_node(OC_NEWSOURCE);
1158                 n->l.s = bb_xstrdup(programname);
1159         }
1160
1161         n = seq->last;
1162         n->info = info;
1163         seq->last = n->a.n = new_node(OC_DONE);
1164
1165         return n;
1166 }
1167
1168 static void chain_expr(unsigned long info) {
1169
1170         node *n;
1171
1172         n = chain_node(info);
1173         n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1174         if (t.tclass & TC_GRPTERM)
1175                 rollback_token();
1176 }
1177
1178 static node *chain_loop(node *nn) {
1179
1180         node *n, *n2, *save_brk, *save_cont;
1181
1182         save_brk = break_ptr;
1183         save_cont = continue_ptr;
1184
1185         n = chain_node(OC_BR | Vx);
1186         continue_ptr = new_node(OC_EXEC);
1187         break_ptr = new_node(OC_EXEC);
1188         chain_group();
1189         n2 = chain_node(OC_EXEC | Vx);
1190         n2->l.n = nn;
1191         n2->a.n = n;
1192         continue_ptr->a.n = n2;
1193         break_ptr->a.n = n->r.n = seq->last;
1194
1195         continue_ptr = save_cont;
1196         break_ptr = save_brk;
1197
1198         return n;
1199 }
1200
1201 /* parse group and attach it to chain */
1202 static void chain_group(void) {
1203
1204         unsigned long c;
1205         node *n, *n2, *n3;
1206
1207         do {
1208                 c = next_token(TC_GRPSEQ);
1209         } while (c & TC_NEWLINE);
1210
1211         if (c & TC_GRPSTART) {
1212                 while(next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1213                         if (t.tclass & TC_NEWLINE) continue;    
1214                         rollback_token();
1215                         chain_group();
1216                 }
1217         } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1218                 rollback_token();
1219                 chain_expr(OC_EXEC | Vx);
1220         } else {                                                /* TC_STATEMNT */
1221                 switch (t.info & OPCLSMASK) {
1222                         case ST_IF:
1223                                 n = chain_node(OC_BR | Vx);
1224                                 n->l.n = condition();
1225                                 chain_group();
1226                                 n2 = chain_node(OC_EXEC);
1227                                 n->r.n = seq->last;
1228                                 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE)==TC_ELSE) {
1229                                         chain_group();
1230                                         n2->a.n = seq->last;
1231                                 } else {
1232                                         rollback_token();
1233                                 }
1234                                 break;
1235
1236                         case ST_WHILE:
1237                                 n2 = condition();
1238                                 n = chain_loop(NULL);
1239                                 n->l.n = n2;
1240                                 break;
1241
1242                         case ST_DO:
1243                                 n2 = chain_node(OC_EXEC);
1244                                 n = chain_loop(NULL);
1245                                 n2->a.n = n->a.n;
1246                                 next_token(TC_WHILE);
1247                                 n->l.n = condition();
1248                                 break;
1249
1250                         case ST_FOR:
1251                                 next_token(TC_SEQSTART);
1252                                 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1253                                 if (t.tclass & TC_SEQTERM) {                            /* for-in */
1254                                         if ((n2->info & OPCLSMASK) != OC_IN)
1255                                                 syntax_error(EMSG_UNEXP_TOKEN);
1256                                         n = chain_node(OC_WALKINIT | VV);
1257                                         n->l.n = n2->l.n;
1258                                         n->r.n = n2->r.n;
1259                                         n = chain_loop(NULL);
1260                                         n->info = OC_WALKNEXT | Vx;
1261                                         n->l.n = n2->l.n;
1262                                 } else {                                                                        /* for(;;) */
1263                                         n = chain_node(OC_EXEC | Vx);
1264                                         n->l.n = n2;
1265                                         n2 = parse_expr(TC_SEMICOL);
1266                                         n3 = parse_expr(TC_SEQTERM);
1267                                         n = chain_loop(n3);
1268                                         n->l.n = n2;
1269                                         if (! n2)
1270                                                 n->info = OC_EXEC;
1271                                 }
1272                                 break;
1273
1274                         case OC_PRINT:
1275                         case OC_PRINTF:
1276                                 n = chain_node(t.info);
1277                                 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1278                                 if (t.tclass & TC_OUTRDR) {
1279                                         n->info |= t.info;
1280                                         n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1281                                 }
1282                                 if (t.tclass & TC_GRPTERM)
1283                                         rollback_token();
1284                                 break;
1285
1286                         case OC_BREAK:
1287                                 n = chain_node(OC_EXEC);
1288                                 n->a.n = break_ptr;
1289                                 break;
1290
1291                         case OC_CONTINUE:
1292                                 n = chain_node(OC_EXEC);
1293                                 n->a.n = continue_ptr;
1294                                 break;
1295
1296                         /* delete, next, nextfile, return, exit */
1297                         default:
1298                                 chain_expr(t.info);
1299
1300                 }
1301         }
1302 }
1303
1304 static void parse_program(char *p) {
1305
1306         unsigned long tclass;
1307         node *cn;
1308         func *f;
1309         var *v;
1310
1311         pos = p;
1312         t.lineno = 1;
1313         while((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1314                                 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1315
1316                 if (tclass & TC_OPTERM)
1317                         continue;
1318
1319                 seq = &mainseq;
1320                 if (tclass & TC_BEGIN) {
1321                         seq = &beginseq;
1322                         chain_group();
1323
1324                 } else if (tclass & TC_END) {
1325                         seq = &endseq;
1326                         chain_group();
1327
1328                 } else if (tclass & TC_FUNCDECL) {
1329                         next_token(TC_FUNCTION);
1330                         pos++;
1331                         f = newfunc(t.string);
1332                         f->body.first = NULL;
1333                         f->nargs = 0;
1334                         while(next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1335                                 v = findvar(ahash, t.string);
1336                                 v->x.aidx = (f->nargs)++;
1337
1338                                 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1339                                         break;
1340                         }
1341                         seq = &(f->body);
1342                         chain_group();
1343                         clear_array(ahash);
1344
1345                 } else if (tclass & TC_OPSEQ) {
1346                         rollback_token();
1347                         cn = chain_node(OC_TEST);
1348                         cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1349                         if (t.tclass & TC_GRPSTART) {
1350                                 rollback_token();
1351                                 chain_group();
1352                         } else {
1353                                 chain_node(OC_PRINT);
1354                         }
1355                         cn->r.n = mainseq.last;
1356
1357                 } else /* if (tclass & TC_GRPSTART) */ {
1358                         rollback_token();
1359                         chain_group();
1360                 }
1361         }
1362 }
1363
1364
1365 /* -------- program execution part -------- */
1366
1367 static node *mk_splitter(char *s, tsplitter *spl) {
1368
1369         register regex_t *re, *ire;
1370         node *n;
1371
1372         re = &spl->re[0];
1373         ire = &spl->re[1];
1374         n = &spl->n;
1375         if ((n->info && OPCLSMASK) == OC_REGEXP) {
1376                 regfree(re);
1377                 regfree(ire);
1378         }
1379         if (bb_strlen(s) > 1) {
1380                 mk_re_node(s, n, re);
1381         } else {
1382                 n->info = (unsigned long) *s;
1383         }
1384
1385         return n;
1386 }
1387
1388 /* use node as a regular expression. Supplied with node ptr and regex_t
1389  * storage space. Return ptr to regex (if result points to preg, it should
1390  * be later regfree'd manually
1391  */
1392 static regex_t *as_regex(node *op, regex_t *preg) {
1393
1394         var *v;
1395         char *s;
1396
1397         if ((op->info & OPCLSMASK) == OC_REGEXP) {
1398                 return icase ? op->r.ire : op->l.re;
1399         } else {
1400                 v = nvalloc(1);
1401                 s = getvar_s(evaluate(op, v));
1402                 xregcomp(preg, s, icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED);
1403                 nvfree(v);
1404                 return preg;
1405         }
1406 }
1407
1408 /* gradually increasing buffer */
1409 static void qrealloc(char **b, int n, int *size) {
1410
1411         if (! *b || n >= *size)
1412                 *b = xrealloc(*b, *size = n + (n>>1) + 80);
1413 }
1414
1415 /* resize field storage space */
1416 static void fsrealloc(int size) {
1417
1418         static int maxfields = 0;
1419         int i;
1420
1421         if (size >= maxfields) {
1422                 i = maxfields;
1423                 maxfields = size + 16;
1424                 Fields = (var *)xrealloc(Fields, maxfields * sizeof(var));
1425                 for (; i<maxfields; i++) {
1426                         Fields[i].type = VF_SPECIAL;
1427                         Fields[i].string = NULL;
1428                 }
1429         }
1430
1431         if (size < nfields) {
1432                 for (i=size; i<nfields; i++) {
1433                         clrvar(Fields+i);
1434                 }
1435         }
1436         nfields = size;
1437 }
1438
1439 static int awk_split(char *s, node *spl, char **slist) {
1440
1441         int l, n=0;
1442         char c[4];
1443         char *s1;
1444         regmatch_t pmatch[2];
1445
1446         /* in worst case, each char would be a separate field */
1447         *slist = s1 = bb_xstrndup(s, bb_strlen(s) * 2 + 3);
1448
1449         c[0] = c[1] = (char)spl->info;
1450         c[2] = c[3] = '\0';
1451         if (*getvar_s(V[RS]) == '\0') c[2] = '\n';
1452
1453         if ((spl->info & OPCLSMASK) == OC_REGEXP) {             /* regex split */
1454                 while (*s) {
1455                         l = strcspn(s, c+2);
1456                         if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0 &&
1457                         pmatch[0].rm_so <= l) {
1458                                 l = pmatch[0].rm_so;
1459                                 if (pmatch[0].rm_eo == 0) { l++; pmatch[0].rm_eo++; }
1460                         } else {
1461                                 pmatch[0].rm_eo = l;
1462                                 if (*(s+l)) pmatch[0].rm_eo++;
1463                         }
1464
1465                         memcpy(s1, s, l);
1466                         *(s1+l) = '\0';
1467                         nextword(&s1);
1468                         s += pmatch[0].rm_eo;
1469                         n++;
1470                 }
1471         } else if (c[0] == '\0') {              /* null split */
1472                 while(*s) {
1473                         *(s1++) = *(s++);
1474                         *(s1++) = '\0';
1475                         n++;
1476                 }
1477         } else if (c[0] != ' ') {               /* single-character split */
1478                 if (icase) {
1479                         c[0] = toupper(c[0]);
1480                         c[1] = tolower(c[1]);
1481                 }
1482                 if (*s1) n++;
1483                 while ((s1 = strpbrk(s1, c))) {
1484                         *(s1++) = '\0';
1485                         n++;
1486                 }
1487         } else {                                /* space split */
1488                 while (*s) {
1489                         while (isspace(*s)) s++;
1490                         if (! *s) break;
1491                         n++;
1492                         while (*s && !isspace(*s))
1493                                 *(s1++) = *(s++);
1494                         *(s1++) = '\0';
1495                 }
1496         }
1497         return n;
1498 }
1499
1500 static void split_f0(void) {
1501
1502         static char *fstrings = NULL;
1503         int i, n;
1504         char *s;
1505
1506         if (is_f0_split)
1507                 return;
1508
1509         is_f0_split = TRUE;
1510         free(fstrings);
1511         fsrealloc(0);
1512         n = awk_split(getvar_s(V[F0]), &fsplitter.n, &fstrings);
1513         fsrealloc(n);
1514         s = fstrings;
1515         for (i=0; i<n; i++) {
1516                 Fields[i].string = nextword(&s);
1517                 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1518         }
1519
1520         /* set NF manually to avoid side effects */
1521         clrvar(V[NF]);
1522         V[NF]->type = VF_NUMBER | VF_SPECIAL;
1523         V[NF]->number = nfields;
1524 }
1525
1526 /* perform additional actions when some internal variables changed */
1527 static void handle_special(var *v) {
1528
1529         int n;
1530         char *b, *sep, *s;
1531         int sl, l, len, i, bsize;
1532
1533         if (! (v->type & VF_SPECIAL))
1534                 return;
1535
1536         if (v == V[NF]) {
1537                 n = (int)getvar_i(v);
1538                 fsrealloc(n);
1539
1540                 /* recalculate $0 */
1541                 sep = getvar_s(V[OFS]);
1542                 sl = bb_strlen(sep);
1543                 b = NULL;
1544                 len = 0;
1545                 for (i=0; i<n; i++) {
1546                         s = getvar_s(&Fields[i]);
1547                         l = bb_strlen(s);
1548                         if (b) {
1549                                 memcpy(b+len, sep, sl);
1550                                 len += sl;
1551                         }
1552                         qrealloc(&b, len+l+sl, &bsize);
1553                         memcpy(b+len, s, l);
1554                         len += l;
1555                 }
1556                 if (b) b[len] = '\0';
1557                 setvar_p(V[F0], b);
1558                 is_f0_split = TRUE;
1559
1560         } else if (v == V[F0]) {
1561                 is_f0_split = FALSE;
1562
1563         } else if (v == V[FS]) {
1564                 mk_splitter(getvar_s(v), &fsplitter);
1565
1566         } else if (v == V[RS]) {
1567                 mk_splitter(getvar_s(v), &rsplitter);
1568
1569         } else if (v == V[IGNORECASE]) {
1570                 icase = istrue(v);
1571
1572         } else {                                                /* $n */
1573                 n = getvar_i(V[NF]);
1574                 setvar_i(V[NF], n > v-Fields ? n : v-Fields+1);
1575                 /* right here v is invalid. Just to note... */
1576         }
1577 }
1578
1579 /* step through func/builtin/etc arguments */
1580 static node *nextarg(node **pn) {
1581
1582         node *n;
1583
1584         n = *pn;
1585         if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1586                 *pn = n->r.n;
1587                 n = n->l.n;
1588         } else {
1589                 *pn = NULL;
1590         }
1591         return n;
1592 }
1593
1594 static void hashwalk_init(var *v, xhash *array) {
1595
1596         char **w;
1597         hash_item *hi;
1598         int i;
1599
1600         if (v->type & VF_WALK)
1601                 free(v->x.walker);
1602
1603         v->type |= VF_WALK;
1604         w = v->x.walker = (char **)xcalloc(2 + 2*sizeof(char *) + array->glen, 1);
1605         *w = *(w+1) = (char *)(w + 2);
1606         for (i=0; i<array->csize; i++) {
1607                 hi = array->items[i];
1608                 while(hi) {
1609                         strcpy(*w, hi->name);
1610                         nextword(w);
1611                         hi = hi->next;
1612                 }
1613         }
1614 }
1615
1616 static int hashwalk_next(var *v) {
1617
1618         char **w;
1619
1620         w = v->x.walker;
1621         if (*(w+1) == *w)
1622                 return FALSE;
1623
1624         setvar_s(v, nextword(w+1));
1625         return TRUE;
1626 }
1627
1628 /* evaluate node, return 1 when result is true, 0 otherwise */
1629 static int ptest(node *pattern) {
1630         static var v;
1631
1632         return istrue(evaluate(pattern, &v));
1633 }
1634
1635 /* read next record from stream rsm into a variable v */
1636 static int awk_getline(rstream *rsm, var *v) {
1637
1638         char *b;
1639         regmatch_t pmatch[2];
1640         int a, p, pp=0, size;
1641         int fd, so, eo, r, rp;
1642         char c, *m, *s;
1643
1644         /* we're using our own buffer since we need access to accumulating
1645          * characters
1646          */
1647         fd = fileno(rsm->F);
1648         m = rsm->buffer;
1649         a = rsm->adv;
1650         p = rsm->pos;
1651         size = rsm->size;
1652         c = (char) rsplitter.n.info;
1653         rp = 0;
1654
1655         if (! m) qrealloc(&m, 256, &size);
1656         do {
1657                 b = m + a;
1658                 so = eo = p;
1659                 r = 1;
1660                 if (p > 0) {
1661                         if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1662                                 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1663                                                                                                 b, 1, pmatch, 0) == 0) {
1664                                         so = pmatch[0].rm_so;
1665                                         eo = pmatch[0].rm_eo;
1666                                         if (b[eo] != '\0')
1667                                                 break;
1668                                 }
1669                         } else if (c != '\0') {
1670                                 s = strchr(b+pp, c);
1671                                 if (s) {
1672                                         so = eo = s-b;
1673                                         eo++;
1674                                         break;
1675                                 }
1676                         } else {
1677                                 while (b[rp] == '\n')
1678                                         rp++;
1679                                 s = strstr(b+rp, "\n\n");
1680                                 if (s) {
1681                                         so = eo = s-b;
1682                                         while (b[eo] == '\n') eo++;
1683                                         if (b[eo] != '\0')
1684                                                 break;
1685                                 }
1686                         }
1687                 }
1688
1689                 if (a > 0) {
1690                         memmove(m, (const void *)(m+a), p+1);
1691                         b = m;
1692                         a = 0;
1693                 }
1694
1695                 qrealloc(&m, a+p+128, &size);
1696                 b = m + a;
1697                 pp = p;
1698                 p += safe_read(fd, b+p, size-p-1);
1699                 if (p < pp) {
1700                         p = 0;
1701                         r = 0;
1702                         setvar_i(V[ERRNO], errno);
1703                 }
1704                 b[p] = '\0';
1705
1706         } while (p > pp);
1707
1708         if (p == 0) {
1709                 r--;
1710         } else {
1711                 c = b[so]; b[so] = '\0';
1712                 setvar_s(v, b+rp);
1713                 v->type |= VF_USER;
1714                 b[so] = c;
1715                 c = b[eo]; b[eo] = '\0';
1716                 setvar_s(V[RT], b+so);
1717                 b[eo] = c;
1718         }
1719
1720         rsm->buffer = m;
1721         rsm->adv = a + eo;
1722         rsm->pos = p - eo;
1723         rsm->size = size;
1724
1725         return r;
1726 }
1727
1728 static int fmt_num(char *b, int size, char *format, double n, int int_as_int) {
1729
1730         int r=0;
1731         char c, *s=format;
1732
1733         if (int_as_int && n == (int)n) {
1734                 r = snprintf(b, size, "%d", (int)n);
1735         } else {
1736                 do { c = *s; } while (*s && *++s);
1737                 if (strchr("diouxX", c)) {
1738                         r = snprintf(b, size, format, (int)n);
1739                 } else if (strchr("eEfgG", c)) {
1740                         r = snprintf(b, size, format, n);
1741                 } else {
1742                         runtime_error(EMSG_INV_FMT);
1743                 }
1744         }
1745         return r;
1746 }
1747
1748
1749 /* formatted output into an allocated buffer, return ptr to buffer */
1750 static char *awk_printf(node *n) {
1751
1752         char *b = NULL;
1753         char *fmt, *s, *s1, *f;
1754         int i, j, incr, bsize;
1755         char c, c1;
1756         var *v, *arg;
1757
1758         v = nvalloc(1);
1759         fmt = f = bb_xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1760
1761         i = 0;
1762         while (*f) {
1763                 s = f;
1764                 while (*f && (*f != '%' || *(++f) == '%'))
1765                         f++;
1766                 while (*f && !isalpha(*f))
1767                         f++;
1768
1769                 incr = (f - s) + MAXVARFMT;
1770                 qrealloc(&b, incr+i, &bsize);
1771                 c = *f; if (c != '\0') f++;
1772                 c1 = *f ; *f = '\0';
1773                 arg = evaluate(nextarg(&n), v);
1774
1775                 j = i;
1776                 if (c == 'c' || !c) {
1777                         i += sprintf(b+i, s,
1778                                         is_numeric(arg) ? (char)getvar_i(arg) : *getvar_s(arg));
1779
1780                 } else if (c == 's') {
1781                     s1 = getvar_s(arg);
1782                         qrealloc(&b, incr+i+bb_strlen(s1), &bsize);
1783                         i += sprintf(b+i, s, s1);
1784
1785                 } else {
1786                         i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1787                 }
1788                 *f = c1;
1789
1790                 /* if there was an error while sprintf, return value is negative */
1791                 if (i < j) i = j;
1792
1793         }
1794
1795         b = xrealloc(b, i+1);
1796         free(fmt);
1797         nvfree(v);
1798         b[i] = '\0';
1799         return b;
1800 }
1801
1802 /* common substitution routine
1803  * replace (nm) substring of (src) that match (n) with (repl), store
1804  * result into (dest), return number of substitutions. If nm=0, replace
1805  * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1806  * subexpression matching (\1-\9)
1807  */
1808 static int awk_sub(node *rn, char *repl, int nm, var *src, var *dest, int ex) {
1809
1810         char *ds = NULL;
1811         char *sp, *s;
1812         int c, i, j, di, rl, so, eo, nbs, n, dssize;
1813         regmatch_t pmatch[10];
1814         regex_t sreg, *re;
1815
1816         re = as_regex(rn, &sreg);
1817         if (! src) src = V[F0];
1818         if (! dest) dest = V[F0];
1819
1820         i = di = 0;
1821         sp = getvar_s(src);
1822         rl = bb_strlen(repl);
1823         while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0:REG_NOTBOL) == 0) {
1824                 so = pmatch[0].rm_so;
1825                 eo = pmatch[0].rm_eo;
1826
1827                 qrealloc(&ds, di + eo + rl, &dssize);
1828                 memcpy(ds + di, sp, eo);
1829                 di += eo;
1830                 if (++i >= nm) {
1831                         /* replace */
1832                         di -= (eo - so);
1833                         nbs = 0;
1834                         for (s = repl; *s; s++) {
1835                                 ds[di++] = c = *s;
1836                                 if (c == '\\') {
1837                                         nbs++;
1838                                         continue;
1839                                 }
1840                                 if (c == '&' || (ex && c >= '0' && c <= '9')) {
1841                                         di -= ((nbs + 3) >> 1);
1842                                         j = 0;
1843                                         if (c != '&') {
1844                                                 j = c - '0';
1845                                                 nbs++;
1846                                         }
1847                                         if (nbs % 2) {
1848                                                 ds[di++] = c;
1849                                         } else {
1850                                                 n = pmatch[j].rm_eo - pmatch[j].rm_so;
1851                                                 qrealloc(&ds, di + rl + n, &dssize);
1852                                                 memcpy(ds + di, sp + pmatch[j].rm_so, n);
1853                                                 di += n;
1854                                         }
1855                                 }
1856                                 nbs = 0;
1857                         }
1858                 }
1859
1860                 sp += eo;
1861                 if (i == nm) break;
1862                 if (eo == so) {
1863                         if (! (ds[di++] = *sp++)) break;
1864                 }
1865         }
1866
1867         qrealloc(&ds, di + strlen(sp), &dssize);
1868         strcpy(ds + di, sp);
1869         setvar_p(dest, ds);
1870         if (re == &sreg) regfree(re);
1871         return i;
1872 }
1873
1874 static var *exec_builtin(node *op, var *res) {
1875
1876         int (*to_xxx)(int);
1877         var *tv;
1878         node *an[4];
1879         var  *av[4];
1880         char *as[4];
1881         regmatch_t pmatch[2];
1882         regex_t sreg, *re;
1883         static tsplitter tspl;
1884         node *spl;
1885         unsigned long isr, info;
1886         int nargs;
1887         time_t tt;
1888         char *s, *s1;
1889         int i, l, ll, n;
1890
1891         tv = nvalloc(4);
1892         isr = info = op->info;
1893         op = op->l.n;
1894
1895         av[2] = av[3] = NULL;
1896         for (i=0 ; i<4 && op ; i++) {
1897                 an[i] = nextarg(&op);
1898                 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
1899                 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
1900                 isr >>= 1;
1901         }
1902
1903         nargs = i;
1904         if (nargs < (info >> 30))
1905                 runtime_error(EMSG_TOO_FEW_ARGS);
1906
1907         switch (info & OPNMASK) {
1908
1909           case B_a2:
1910 #ifdef CONFIG_FEATURE_AWK_MATH
1911                 setvar_i(res, atan2(getvar_i(av[i]), getvar_i(av[1])));
1912 #else
1913                 runtime_error(EMSG_NO_MATH);
1914 #endif
1915                 break;
1916
1917           case B_sp:
1918                 if (nargs > 2) {
1919                         spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
1920                                 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
1921                 } else {
1922                         spl = &fsplitter.n;
1923                 }
1924
1925                 n = awk_split(as[0], spl, &s);
1926                 s1 = s;
1927                 clear_array(iamarray(av[1]));
1928                 for (i=1; i<=n; i++)
1929                         setari_u(av[1], i, nextword(&s1));
1930                 free(s);
1931                 setvar_i(res, n);
1932                 break;
1933
1934           case B_ss:
1935                 l = bb_strlen(as[0]);
1936                 i = getvar_i(av[1]) - 1;
1937                 if (i>l) i=l; if (i<0) i=0;
1938                 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
1939                 if (n<0) n=0;
1940                 s = xmalloc(n+1);
1941                 strncpy(s, as[0]+i, n);
1942                 s[n] = '\0';
1943                 setvar_p(res, s);
1944                 break;
1945
1946           case B_lo:
1947                 to_xxx = tolower;
1948                 goto lo_cont;
1949
1950           case B_up:
1951                 to_xxx = toupper;
1952 lo_cont:
1953                 s1 = s = bb_xstrdup(as[0]);
1954                 while (*s1) {
1955                         *s1 = (*to_xxx)(*s1);
1956                         s1++;
1957                 }
1958                 setvar_p(res, s);
1959                 break;
1960
1961           case B_ix:
1962                 n = 0;
1963                 ll = bb_strlen(as[1]);
1964                 l = bb_strlen(as[0]) - ll;
1965                 if (ll > 0 && l >= 0) {
1966                         if (! icase) {
1967                                 s = strstr(as[0], as[1]);
1968                                 if (s) n = (s - as[0]) + 1;
1969                         } else {
1970                                 /* this piece of code is terribly slow and
1971                                  * really should be rewritten
1972                                  */
1973                                 for (i=0; i<=l; i++) {
1974                                         if (strncasecmp(as[0]+i, as[1], ll) == 0) {
1975                                                 n = i+1;
1976                                                 break;
1977                                         }
1978                                 }
1979                         }
1980                 }
1981                 setvar_i(res, n);
1982                 break;
1983
1984           case B_ti:
1985                 if (nargs > 1)
1986                         tt = getvar_i(av[1]);
1987                 else
1988                         time(&tt);
1989                 s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
1990                 i = strftime(buf, MAXVARFMT, s, localtime(&tt));
1991                 buf[i] = '\0';
1992                 setvar_s(res, buf);
1993                 break;
1994
1995           case B_ma:
1996                 re = as_regex(an[1], &sreg);
1997                 n = regexec(re, as[0], 1, pmatch, 0);
1998                 if (n == 0) {
1999                         pmatch[0].rm_so++;
2000                         pmatch[0].rm_eo++;
2001                 } else {
2002                         pmatch[0].rm_so = 0;
2003                         pmatch[0].rm_eo = -1;
2004                 }
2005                 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2006                 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2007                 setvar_i(res, pmatch[0].rm_so);
2008                 if (re == &sreg) regfree(re);
2009                 break;
2010
2011           case B_ge:
2012                 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2013                 break;
2014
2015           case B_gs:
2016                 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2017                 break;
2018
2019           case B_su:
2020                 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2021                 break;
2022         }
2023
2024         nvfree(tv);
2025         return res;
2026 }
2027
2028 /*
2029  * Evaluate node - the heart of the program. Supplied with subtree
2030  * and place where to store result. returns ptr to result.
2031  */
2032 #define XC(n) ((n) >> 8)
2033
2034 static var *evaluate(node *op, var *res) {
2035
2036         /* This procedure is recursive so we should count every byte */
2037         static var *fnargs = NULL;
2038         static unsigned int seed = 1;
2039         static regex_t sreg;
2040         node *op1;
2041         var *v1;
2042         union {
2043                 var *v;
2044                 char *s;
2045                 double d;
2046                 int i;
2047         } L, R;
2048         unsigned long opinfo;
2049         short opn;
2050         union {
2051                 char *s;
2052                 rstream *rsm;
2053                 FILE *F;
2054                 var *v;
2055                 regex_t *re;
2056                 unsigned long info;
2057         } X;
2058
2059         if (! op)
2060                 return setvar_s(res, NULL);
2061
2062         v1 = nvalloc(2);
2063
2064         while (op) {
2065
2066                 opinfo = op->info;
2067                 opn = (short)(opinfo & OPNMASK);
2068                 lineno = op->lineno;
2069
2070                 /* execute inevitable things */
2071                 op1 = op->l.n;
2072                 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2073                 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2074                 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2075                 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2076                 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2077
2078                 switch (XC(opinfo & OPCLSMASK)) {
2079
2080                   /* -- iterative node type -- */
2081
2082                   /* test pattern */
2083                   case XC( OC_TEST ):
2084                         if ((op1->info & OPCLSMASK) == OC_COMMA) {
2085                                 /* it's range pattern */
2086                                 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2087                                         op->info |= OF_CHECKED;
2088                                         if (ptest(op1->r.n))
2089                                                 op->info &= ~OF_CHECKED;
2090
2091                                         op = op->a.n;
2092                                 } else {
2093                                         op = op->r.n;
2094                                 }
2095                         } else {
2096                                 op = (ptest(op1)) ? op->a.n : op->r.n;
2097                         }
2098                         break;
2099
2100                   /* just evaluate an expression, also used as unconditional jump */
2101                   case XC( OC_EXEC ):
2102                         break;
2103
2104                   /* branch, used in if-else and various loops */
2105                   case XC( OC_BR ):
2106                         op = istrue(L.v) ? op->a.n : op->r.n;
2107                         break;
2108
2109                   /* initialize for-in loop */
2110                   case XC( OC_WALKINIT ):
2111                         hashwalk_init(L.v, iamarray(R.v));
2112                         break;
2113
2114                   /* get next array item */
2115                   case XC( OC_WALKNEXT ):
2116                         op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2117                         break;
2118
2119                   case XC( OC_PRINT ):
2120                   case XC( OC_PRINTF ):
2121                         X.F = stdout;
2122                         if (op->r.n) {
2123                                 X.rsm = newfile(R.s);
2124                                 if (! X.rsm->F) {
2125                                         if (opn == '|') {
2126                                                 if((X.rsm->F = popen(R.s, "w")) == NULL)
2127                                                         bb_perror_msg_and_die("popen");
2128                                                 X.rsm->is_pipe = 1;
2129                                         } else {
2130                                                 X.rsm->F = bb_xfopen(R.s, opn=='w' ? "w" : "a");
2131                                         }
2132                                 }
2133                                 X.F = X.rsm->F;
2134                         }
2135
2136                         if ((opinfo & OPCLSMASK) == OC_PRINT) {
2137                                 if (! op1) {
2138                                         fputs(getvar_s(V[F0]), X.F);
2139                                 } else {
2140                                         while (op1) {
2141                                                 L.v = evaluate(nextarg(&op1), v1);
2142                                                 if (L.v->type & VF_NUMBER) {
2143                                                         fmt_num(buf, MAXVARFMT, getvar_s(V[OFMT]),
2144                                                                                                                 getvar_i(L.v), TRUE);
2145                                                         fputs(buf, X.F);
2146                                                 } else {
2147                                                         fputs(getvar_s(L.v), X.F);
2148                                                 }
2149
2150                                                 if (op1) fputs(getvar_s(V[OFS]), X.F);
2151                                         }
2152                                 }
2153                                 fputs(getvar_s(V[ORS]), X.F);
2154
2155                         } else {        /* OC_PRINTF */
2156                                 L.s = awk_printf(op1);
2157                                 fputs(L.s, X.F);
2158                                 free(L.s);
2159                         }
2160                         fflush(X.F);
2161                         break;
2162
2163                   case XC( OC_DELETE ):
2164                         X.info = op1->info & OPCLSMASK;
2165                         if (X.info == OC_VAR) {
2166                                 R.v = op1->l.v;
2167                         } else if (X.info == OC_FNARG) {
2168                                 R.v = &fnargs[op1->l.i];
2169                         } else {
2170                                 runtime_error(EMSG_NOT_ARRAY);
2171                         }
2172
2173                         if (op1->r.n) {
2174                                 clrvar(L.v);
2175                                 L.s = getvar_s(evaluate(op1->r.n, v1));
2176                                 hash_remove(iamarray(R.v), L.s);
2177                         } else {
2178                                 clear_array(iamarray(R.v));
2179                         }
2180                         break;
2181
2182                   case XC( OC_NEWSOURCE ):
2183                         programname = op->l.s;
2184                         break;
2185
2186                   case XC( OC_RETURN ):
2187                         copyvar(res, L.v);
2188                         break;
2189
2190                   case XC( OC_NEXTFILE ):
2191                         nextfile = TRUE;
2192                   case XC( OC_NEXT ):
2193                         nextrec = TRUE;
2194                   case XC( OC_DONE ):
2195                         clrvar(res);
2196                         break;
2197
2198                   case XC( OC_EXIT ):
2199                         awk_exit(L.d);
2200
2201                   /* -- recursive node type -- */
2202
2203                   case XC( OC_VAR ):
2204                         L.v = op->l.v;
2205                         if (L.v == V[NF])
2206                                 split_f0();
2207                         goto v_cont;
2208
2209                   case XC( OC_FNARG ):
2210                         L.v = &fnargs[op->l.i];
2211
2212 v_cont:
2213                         res = (op->r.n) ? findvar(iamarray(L.v), R.s) : L.v;
2214                         break;
2215
2216                   case XC( OC_IN ):
2217                         setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2218                         break;
2219
2220                   case XC( OC_REGEXP ):
2221                         op1 = op;
2222                         L.s = getvar_s(V[F0]);
2223                         goto re_cont;
2224
2225                   case XC( OC_MATCH ):
2226                         op1 = op->r.n;
2227 re_cont:
2228                         X.re = as_regex(op1, &sreg);
2229                         R.i = regexec(X.re, L.s, 0, NULL, 0);
2230                         if (X.re == &sreg) regfree(X.re);
2231                         setvar_i(res, (R.i == 0 ? 1 : 0) ^ (opn == '!' ? 1 : 0));
2232                         break;
2233
2234                   case XC( OC_MOVE ):
2235                         /* if source is a temporary string, jusk relink it to dest */
2236                         if (R.v == v1+1 && R.v->string) {
2237                                 res = setvar_p(L.v, R.v->string);
2238                                 R.v->string = NULL;
2239                         } else {
2240                                 res = copyvar(L.v, R.v);
2241                         }
2242                         break;
2243
2244                   case XC( OC_TERNARY ):
2245                         if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2246                                 runtime_error(EMSG_POSSIBLE_ERROR);
2247                         res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2248                         break;
2249
2250                   case XC( OC_FUNC ):
2251                         if (! op->r.f->body.first)
2252                                 runtime_error(EMSG_UNDEF_FUNC);
2253
2254                         X.v = R.v = nvalloc(op->r.f->nargs+1);
2255                         while (op1) {
2256                                 L.v = evaluate(nextarg(&op1), v1);
2257                                 copyvar(R.v, L.v);
2258                                 R.v->type |= VF_CHILD;
2259                                 R.v->x.parent = L.v;
2260                                 if (++R.v - X.v >= op->r.f->nargs)
2261                                         break;
2262                         }
2263
2264                         R.v = fnargs;
2265                         fnargs = X.v;
2266
2267                         L.s = programname;
2268                         res = evaluate(op->r.f->body.first, res);
2269                         programname = L.s;
2270
2271                         nvfree(fnargs);
2272                         fnargs = R.v;
2273                         break;
2274
2275                   case XC( OC_GETLINE ):
2276                   case XC( OC_PGETLINE ):
2277                         if (op1) {
2278                                 X.rsm = newfile(L.s);
2279                                 if (! X.rsm->F) {
2280                                         if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2281                                                 X.rsm->F = popen(L.s, "r");
2282                                                 X.rsm->is_pipe = TRUE;
2283                                         } else {
2284                                                 X.rsm->F = fopen(L.s, "r");             /* not bb_xfopen! */
2285                                         }
2286                                 }
2287                         } else {
2288                                 if (! iF) iF = next_input_file();
2289                                 X.rsm = iF;
2290                         }
2291
2292                         if (! X.rsm->F) {
2293                                 setvar_i(V[ERRNO], errno);
2294                                 setvar_i(res, -1);
2295                                 break;
2296                         }
2297
2298                         if (! op->r.n)
2299                                 R.v = V[F0];
2300
2301                         L.i = awk_getline(X.rsm, R.v);
2302                         if (L.i > 0) {
2303                                 if (! op1) {
2304                                         incvar(V[FNR]);
2305                                         incvar(V[NR]);
2306                                 }
2307                         }
2308                         setvar_i(res, L.i);
2309                         break;
2310
2311                   /* simple builtins */
2312                   case XC( OC_FBLTIN ):
2313                         switch (opn) {
2314
2315                           case F_in:
2316                                 R.d = (int)L.d;
2317                                 break;
2318
2319                           case F_rn:
2320                                 R.d =  (double)rand() / (double)RAND_MAX;
2321                                 break;
2322
2323 #ifdef CONFIG_FEATURE_AWK_MATH
2324                           case F_co:
2325                                 R.d = cos(L.d);
2326                                 break;
2327
2328                           case F_ex:
2329                                 R.d = exp(L.d);
2330                                 break;
2331
2332                           case F_lg:
2333                                 R.d = log(L.d);
2334                                 break;
2335
2336                           case F_si:
2337                                 R.d = sin(L.d);
2338                                 break;
2339
2340                           case F_sq:
2341                                 R.d = sqrt(L.d);
2342                                 break;
2343 #else
2344                           case F_co:
2345                           case F_ex:
2346                           case F_lg:
2347                           case F_si:
2348                           case F_sq:
2349                                 runtime_error(EMSG_NO_MATH);
2350                                 break;
2351 #endif
2352
2353                           case F_sr:
2354                                 R.d = (double)seed;
2355                                 seed = op1 ? (unsigned int)L.d : (unsigned int)time(NULL);
2356                                 srand(seed);
2357                                 break;
2358
2359                           case F_ti:
2360                                 R.d = time(NULL);
2361                                 break;
2362
2363                           case F_le:
2364                                 if (! op1)
2365                                         L.s = getvar_s(V[F0]);
2366                                 R.d = bb_strlen(L.s);
2367                                 break;
2368
2369                           case F_sy:
2370                                 fflush(NULL);
2371                                 R.d = (L.s && *L.s) ? system(L.s) : 0;
2372                                 break;
2373
2374                           case F_ff:
2375                                 if (! op1)
2376                                         fflush(stdout);
2377                                 else {
2378                                         if (L.s && *L.s) {
2379                                                 X.rsm = newfile(L.s);
2380                                                 fflush(X.rsm->F);
2381                                         } else {
2382                                                 fflush(NULL);
2383                                         }
2384                                 }
2385                                 break;
2386
2387                           case F_cl:
2388                                 X.rsm = (rstream *)hash_search(fdhash, L.s);
2389                                 if (X.rsm) {
2390                                         R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
2391                                         free(X.rsm->buffer);
2392                                         hash_remove(fdhash, L.s);
2393                                 }
2394                                 if (R.i != 0)
2395                                         setvar_i(V[ERRNO], errno);
2396                                 R.d = (double)R.i;
2397                                 break;
2398                         }
2399                         setvar_i(res, R.d);
2400                         break;
2401
2402                   case XC( OC_BUILTIN ):
2403                         res = exec_builtin(op, res);
2404                         break;
2405
2406                   case XC( OC_SPRINTF ):
2407                         setvar_p(res, awk_printf(op1));
2408                         break;
2409
2410                   case XC( OC_UNARY ):
2411                         X.v = R.v;
2412                         L.d = R.d = getvar_i(R.v);
2413                         switch (opn) {
2414                           case 'P':
2415                                 L.d = ++R.d;
2416                                 goto r_op_change;
2417                           case 'p':
2418                                 R.d++;
2419                                 goto r_op_change;
2420                           case 'M':
2421                                 L.d = --R.d;
2422                                 goto r_op_change;
2423                           case 'm':
2424                                 R.d--;
2425                                 goto r_op_change;
2426                           case '!':
2427                             L.d = istrue(X.v) ? 0 : 1;
2428                                 break;
2429                           case '-':
2430                                 L.d = -R.d;
2431                                 break;
2432                         r_op_change:
2433                                 setvar_i(X.v, R.d);
2434                         }
2435                         setvar_i(res, L.d);
2436                         break;
2437
2438                   case XC( OC_FIELD ):
2439                         R.i = (int)getvar_i(R.v);
2440                         if (R.i == 0) {
2441                                 res = V[F0];
2442                         } else {
2443                                 split_f0();
2444                                 if (R.i > nfields)
2445                                         fsrealloc(R.i);
2446
2447                                 res = &Fields[R.i-1];
2448                         }
2449                         break;
2450
2451                   /* concatenation (" ") and index joining (",") */
2452                   case XC( OC_CONCAT ):
2453                   case XC( OC_COMMA ):
2454                         opn = bb_strlen(L.s) + bb_strlen(R.s) + 2;
2455                         X.s = (char *)xmalloc(opn);
2456                         strcpy(X.s, L.s);
2457                         if ((opinfo & OPCLSMASK) == OC_COMMA) {
2458                                 L.s = getvar_s(V[SUBSEP]);
2459                                 X.s = (char *)xrealloc(X.s, opn + bb_strlen(L.s));
2460                                 strcat(X.s, L.s);
2461                         }
2462                         strcat(X.s, R.s);
2463                         setvar_p(res, X.s);
2464                         break;
2465
2466                   case XC( OC_LAND ):
2467                         setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2468                         break;
2469
2470                   case XC( OC_LOR ):
2471                         setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2472                         break;
2473
2474                   case XC( OC_BINARY ):
2475                   case XC( OC_REPLACE ):
2476                         R.d = getvar_i(R.v);
2477                         switch (opn) {
2478                           case '+':
2479                                 L.d += R.d;
2480                                 break;
2481                           case '-':
2482                                 L.d -= R.d;
2483                                 break;
2484                           case '*':
2485                                 L.d *= R.d;
2486                                 break;
2487                           case '/':
2488                                 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2489                                 L.d /= R.d;
2490                                 break;
2491                           case '&':
2492 #ifdef CONFIG_FEATURE_AWK_MATH
2493                                 L.d = pow(L.d, R.d);
2494 #else
2495                                 runtime_error(EMSG_NO_MATH);
2496 #endif
2497                                 break;
2498                           case '%':
2499                                 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2500                                 L.d -= (int)(L.d / R.d) * R.d;
2501                                 break;
2502                         }
2503                         res = setvar_i(((opinfo&OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2504                         break;
2505
2506                   case XC( OC_COMPARE ):
2507                         if (is_numeric(L.v) && is_numeric(R.v)) {
2508                                 L.d = getvar_i(L.v) - getvar_i(R.v);
2509                         } else {
2510                                 L.s = getvar_s(L.v);
2511                                 R.s = getvar_s(R.v);
2512                                 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2513                         }
2514                         switch (opn & 0xfe) {
2515                           case 0:
2516                                 R.i = (L.d > 0);
2517                                 break;
2518                           case 2:
2519                                 R.i = (L.d >= 0);
2520                                 break;
2521                           case 4:
2522                                 R.i = (L.d == 0);
2523                                 break;
2524                         }
2525                         setvar_i(res, (opn & 0x1 ? R.i : !R.i) ? 1 : 0);
2526                         break;
2527
2528                   default:
2529                         runtime_error(EMSG_POSSIBLE_ERROR);
2530                 }
2531                 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2532                         op = op->a.n;
2533                 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2534                         break;
2535                 if (nextrec)
2536                         break;
2537         }
2538         nvfree(v1);
2539         return res;
2540 }
2541
2542
2543 /* -------- main & co. -------- */
2544
2545 static int awk_exit(int r) {
2546
2547         unsigned int i;
2548         hash_item *hi;
2549         static var tv;
2550
2551         if (! exiting) {
2552                 exiting = TRUE;
2553                 nextrec = FALSE;
2554                 evaluate(endseq.first, &tv);
2555         }
2556
2557         /* waiting for children */
2558         for (i=0; i<fdhash->csize; i++) {
2559                 hi = fdhash->items[i];
2560                 while(hi) {
2561                         if (hi->data.rs.F && hi->data.rs.is_pipe)
2562                                 pclose(hi->data.rs.F);
2563                         hi = hi->next;
2564                 }
2565         }
2566
2567         exit(r);
2568 }
2569
2570 /* if expr looks like "var=value", perform assignment and return 1,
2571  * otherwise return 0 */
2572 static int is_assignment(char *expr) {
2573
2574         char *exprc, *s, *s0, *s1;
2575
2576         exprc = bb_xstrdup(expr);
2577         if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2578                 free(exprc);
2579                 return FALSE;
2580         }
2581
2582         *(s++) = '\0';
2583         s0 = s1 = s;
2584         while (*s)
2585                 *(s1++) = nextchar(&s);
2586
2587         *s1 = '\0';
2588         setvar_u(newvar(exprc), s0);
2589         free(exprc);
2590         return TRUE;
2591 }
2592
2593 /* switch to next input file */
2594 static rstream *next_input_file(void) {
2595
2596         static rstream rsm;
2597         FILE *F = NULL;
2598         char *fname, *ind;
2599         static int files_happen = FALSE;
2600
2601         if (rsm.F) fclose(rsm.F);
2602         rsm.F = NULL;
2603         rsm.pos = rsm.adv = 0;
2604
2605         do {
2606                 if (getvar_i(V[ARGIND])+1 >= getvar_i(V[ARGC])) {
2607                         if (files_happen)
2608                                 return NULL;
2609                         fname = "-";
2610                         F = stdin;
2611                 } else {
2612                         ind = getvar_s(incvar(V[ARGIND]));
2613                         fname = getvar_s(findvar(iamarray(V[ARGV]), ind));
2614                         if (fname && *fname && !is_assignment(fname))
2615                                 F = afopen(fname, "r");
2616                 }
2617         } while (!F);
2618
2619         files_happen = TRUE;
2620         setvar_s(V[FILENAME], fname);
2621         rsm.F = F;
2622         return &rsm;
2623 }
2624
2625 extern int awk_main(int argc, char **argv) {
2626
2627         char *s, *s1;
2628         int i, j, c;
2629         var *v;
2630         static var tv;
2631         char **envp;
2632         static int from_file = FALSE;
2633         rstream *rsm;
2634         FILE *F, *stdfiles[3];
2635         static char * stdnames = "/dev/stdin\0/dev/stdout\0/dev/stderr";
2636
2637         /* allocate global buffer */
2638         buf = xmalloc(MAXVARFMT+1);
2639
2640         vhash = hash_init();
2641         ahash = hash_init();
2642         fdhash = hash_init();
2643         fnhash = hash_init();
2644
2645         /* initialize variables */
2646         for (i=0;  *vNames;  i++) {
2647                 V[i] = v = newvar(nextword(&vNames));
2648                 if (*vValues != '\377')
2649                         setvar_s(v, nextword(&vValues));
2650                 else
2651                         setvar_i(v, 0);
2652
2653                 if (*vNames == '*') {
2654                         v->type |= VF_SPECIAL;
2655                         vNames++;
2656                 }
2657         }
2658
2659         handle_special(V[FS]);
2660         handle_special(V[RS]);
2661
2662         stdfiles[0] = stdin;
2663         stdfiles[1] = stdout;
2664         stdfiles[2] = stderr;
2665         for (i=0; i<3; i++) {
2666                 rsm = newfile(nextword(&stdnames));
2667                 rsm->F = stdfiles[i];
2668         }
2669
2670         for (envp=environ; *envp; envp++) {
2671                 s = bb_xstrdup(*envp);
2672                 s1 = strchr(s, '=');
2673                 if (!s1) {
2674                         goto keep_going;
2675                 }
2676                 *(s1++) = '\0';
2677                 setvar_u(findvar(iamarray(V[ENVIRON]), s), s1);
2678 keep_going:
2679                 free(s);
2680         }
2681
2682         while((c = getopt(argc, argv, "F:v:f:W:")) != EOF) {
2683                 switch (c) {
2684                         case 'F':
2685                                 setvar_s(V[FS], optarg);
2686                                 break;
2687                         case 'v':
2688                                 if (! is_assignment(optarg))
2689                                         bb_show_usage();
2690                                 break;
2691                         case 'f':
2692                                 from_file = TRUE;
2693                                 F = afopen(programname = optarg, "r");
2694                                 s = NULL;
2695                                 /* one byte is reserved for some trick in next_token */
2696                                 for (i=j=1; j>0; i+=j) {
2697                                         s = (char *)xrealloc(s, i+4096);
2698                                         j = fread(s+i, 1, 4094, F);
2699                                 }
2700                                 s[i] = '\0';
2701                                 fclose(F);
2702                                 parse_program(s+1);
2703                                 free(s);
2704                                 break;
2705                         case 'W':
2706                                 bb_error_msg("Warning: unrecognized option '-W %s' ignored\n", optarg);
2707                                 break;
2708
2709                         default:
2710                                 bb_show_usage();
2711                 }
2712         }
2713
2714         if (!from_file) {
2715                 if (argc == optind)
2716                         bb_show_usage();
2717                 programname="cmd. line";
2718                 parse_program(argv[optind++]);
2719
2720         }
2721
2722         /* fill in ARGV array */
2723         setvar_i(V[ARGC], argc - optind + 1);
2724         setari_u(V[ARGV], 0, "awk");
2725         for(i=optind; i < argc; i++)
2726                 setari_u(V[ARGV], i+1-optind, argv[i]);
2727
2728         evaluate(beginseq.first, &tv);
2729         if (! mainseq.first && ! endseq.first)
2730                 awk_exit(EXIT_SUCCESS);
2731
2732         /* input file could already be opened in BEGIN block */
2733         if (! iF) iF = next_input_file();
2734
2735         /* passing through input files */
2736         while (iF) {
2737
2738                 nextfile = FALSE;
2739                 setvar_i(V[FNR], 0);
2740
2741                 while ((c = awk_getline(iF, V[F0])) > 0) {
2742
2743                         nextrec = FALSE;
2744                         incvar(V[NR]);
2745                         incvar(V[FNR]);
2746                         evaluate(mainseq.first, &tv);
2747
2748                         if (nextfile)
2749                                 break;
2750                 }
2751
2752                 if (c < 0)
2753                         runtime_error(strerror(errno));
2754
2755                 iF = next_input_file();
2756
2757         }
2758
2759         awk_exit(EXIT_SUCCESS);
2760
2761         return 0;
2762 }
2763