aea852b0d1498d7f8055deb7bad9b1e6938e96f7
[oweals/busybox.git] / editors / awk.c
1 /* vi: set sw=4 ts=4: */
2 /*
3  * awk implementation for busybox
4  *
5  * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20  *
21  */
22
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <unistd.h>
26 #include <errno.h>
27 #include <string.h>
28 #include <time.h>
29 #include <math.h>
30 #include <ctype.h>
31 #include <getopt.h>
32 #include <regex.h>
33
34 #include "busybox.h"
35
36
37 #define MAXVARFMT       240
38 #define MINNVBLOCK      64
39
40 /* variable flags */
41 #define VF_NUMBER       0x0001  /* 1 = primary type is number */
42 #define VF_ARRAY        0x0002  /* 1 = it's an array */
43
44 #define VF_CACHED       0x0100  /* 1 = num/str value has cached str/num eq */
45 #define VF_USER         0x0200  /* 1 = user input (may be numeric string) */
46 #define VF_SPECIAL      0x0400  /* 1 = requires extra handling when changed */
47 #define VF_WALK         0x0800  /* 1 = variable has alloc'd x.walker list */
48 #define VF_FSTR         0x1000  /* 1 = string points to fstring buffer */
49 #define VF_CHILD        0x2000  /* 1 = function arg; x.parent points to source */
50 #define VF_DIRTY        0x4000  /* 1 = variable was set explicitly */
51
52 /* these flags are static, don't change them when value is changed */
53 #define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
54
55 /* Variable */
56 typedef struct var_s {
57         unsigned short type;            /* flags */
58         double number;
59         char *string;
60         union {
61                 int aidx;                               /* func arg index (on compilation stage) */
62                 struct xhash_s *array;  /* array ptr */
63                 struct var_s *parent;   /* for func args, ptr to actual parameter */
64                 char **walker;                  /* list of array elements (for..in) */
65         } x;
66 } var;
67
68 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
69 typedef struct chain_s {
70         struct node_s *first;
71         struct node_s *last;
72         char *programname;
73 } chain;
74
75 /* Function */
76 typedef struct func_s {
77         unsigned short nargs;
78         struct chain_s body;
79 } func;
80
81 /* I/O stream */
82 typedef struct rstream_s {
83         FILE *F;
84         char *buffer;
85         int size;
86         int pos;
87         unsigned short is_pipe;
88 } rstream;
89
90 typedef struct hash_item_s {
91         union {
92                 struct var_s v;                 /* variable/array hash */
93                 struct rstream_s rs;    /* redirect streams hash */
94                 struct func_s f;                /* functions hash */
95         } data;
96         struct hash_item_s *next;       /* next in chain */
97         char name[1];                           /* really it's longer */
98 } hash_item;
99
100 typedef struct xhash_s {
101         unsigned int nel;                                       /* num of elements */
102         unsigned int csize;                                     /* current hash size */
103         unsigned int nprime;                            /* next hash size in PRIMES[] */
104         unsigned int glen;                                      /* summary length of item names */
105         struct hash_item_s **items;
106 } xhash;
107
108 /* Tree node */
109 typedef struct node_s {
110         unsigned long info;
111         unsigned short lineno;
112         union {
113                 struct node_s *n;
114                 var *v;
115                 int i;
116                 char *s;
117                 regex_t *re;
118         } l;
119         union {
120                 struct node_s *n;
121                 regex_t *ire;
122                 func *f;
123                 int argno;
124         } r;
125         union {
126                 struct node_s *n;
127         } a;
128 } node;
129
130 /* Block of temporary variables */
131 typedef struct nvblock_s {
132         int size;
133         var *pos;
134         struct nvblock_s *prev;
135         struct nvblock_s *next;
136         var nv[0];
137 } nvblock;
138
139 typedef struct tsplitter_s {
140         node n;
141         regex_t re[2];
142 } tsplitter;
143
144 /* simple token classes */
145 /* Order and hex values are very important!!!  See next_token() */
146 #define TC_SEQSTART      1                              /* ( */
147 #define TC_SEQTERM      (1 << 1)                /* ) */
148 #define TC_REGEXP       (1 << 2)                /* /.../ */
149 #define TC_OUTRDR       (1 << 3)                /* | > >> */
150 #define TC_UOPPOST      (1 << 4)                /* unary postfix operator */
151 #define TC_UOPPRE1      (1 << 5)                /* unary prefix operator */
152 #define TC_BINOPX       (1 << 6)                /* two-opnd operator */
153 #define TC_IN           (1 << 7)
154 #define TC_COMMA        (1 << 8)
155 #define TC_PIPE         (1 << 9)                /* input redirection pipe */
156 #define TC_UOPPRE2      (1 << 10)               /* unary prefix operator */
157 #define TC_ARRTERM      (1 << 11)               /* ] */
158 #define TC_GRPSTART     (1 << 12)               /* { */
159 #define TC_GRPTERM      (1 << 13)               /* } */
160 #define TC_SEMICOL      (1 << 14)
161 #define TC_NEWLINE      (1 << 15)
162 #define TC_STATX        (1 << 16)               /* ctl statement (for, next...) */
163 #define TC_WHILE        (1 << 17)
164 #define TC_ELSE         (1 << 18)
165 #define TC_BUILTIN      (1 << 19)
166 #define TC_GETLINE      (1 << 20)
167 #define TC_FUNCDECL     (1 << 21)               /* `function' `func' */
168 #define TC_BEGIN        (1 << 22)
169 #define TC_END          (1 << 23)
170 #define TC_EOF          (1 << 24)
171 #define TC_VARIABLE     (1 << 25)
172 #define TC_ARRAY        (1 << 26)
173 #define TC_FUNCTION     (1 << 27)
174 #define TC_STRING       (1 << 28)
175 #define TC_NUMBER       (1 << 29)
176
177 #define TC_UOPPRE       (TC_UOPPRE1 | TC_UOPPRE2)
178
179 /* combined token classes */
180 #define TC_BINOP        (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
181 #define TC_UNARYOP      (TC_UOPPRE | TC_UOPPOST)
182 #define TC_OPERAND      (TC_VARIABLE | TC_ARRAY | TC_FUNCTION | \
183         TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
184
185 #define TC_STATEMNT     (TC_STATX | TC_WHILE)
186 #define TC_OPTERM       (TC_SEMICOL | TC_NEWLINE)
187
188 /* word tokens, cannot mean something else if not expected */
189 #define TC_WORD         (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN | \
190         TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
191
192 /* discard newlines after these */
193 #define TC_NOTERM       (TC_COMMA | TC_GRPSTART | TC_GRPTERM | \
194         TC_BINOP | TC_OPTERM)
195
196 /* what can expression begin with */
197 #define TC_OPSEQ        (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
198 /* what can group begin with */
199 #define TC_GRPSEQ       (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
200
201 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
202 /* operator is inserted between them */
203 #define TC_CONCAT1      (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM | \
204         TC_STRING | TC_NUMBER | TC_UOPPOST)
205 #define TC_CONCAT2      (TC_OPERAND | TC_UOPPRE)
206
207 #define OF_RES1         0x010000
208 #define OF_RES2         0x020000
209 #define OF_STR1         0x040000
210 #define OF_STR2         0x080000
211 #define OF_NUM1         0x100000
212 #define OF_CHECKED      0x200000
213
214 /* combined operator flags */
215 #define xx      0
216 #define xV      OF_RES2
217 #define xS      (OF_RES2 | OF_STR2)
218 #define Vx      OF_RES1
219 #define VV      (OF_RES1 | OF_RES2)
220 #define Nx      (OF_RES1 | OF_NUM1)
221 #define NV      (OF_RES1 | OF_NUM1 | OF_RES2)
222 #define Sx      (OF_RES1 | OF_STR1)
223 #define SV      (OF_RES1 | OF_STR1 | OF_RES2)
224 #define SS      (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
225
226 #define OPCLSMASK       0xFF00
227 #define OPNMASK         0x007F
228
229 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
230  * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
231  * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
232  */
233 #define P(x)    (x << 24)
234 #define PRIMASK         0x7F000000
235 #define PRIMASK2        0x7E000000
236
237 /* Operation classes */
238
239 #define SHIFT_TIL_THIS  0x0600
240 #define RECUR_FROM_THIS 0x1000
241
242 enum {
243         OC_DELETE=0x0100,       OC_EXEC=0x0200,         OC_NEWSOURCE=0x0300,
244         OC_PRINT=0x0400,        OC_PRINTF=0x0500,       OC_WALKINIT=0x0600,
245
246         OC_BR=0x0700,           OC_BREAK=0x0800,        OC_CONTINUE=0x0900,
247         OC_EXIT=0x0a00,         OC_NEXT=0x0b00,         OC_NEXTFILE=0x0c00,
248         OC_TEST=0x0d00,         OC_WALKNEXT=0x0e00,
249
250         OC_BINARY=0x1000,       OC_BUILTIN=0x1100,      OC_COLON=0x1200,
251         OC_COMMA=0x1300,        OC_COMPARE=0x1400,      OC_CONCAT=0x1500,
252         OC_FBLTIN=0x1600,       OC_FIELD=0x1700,        OC_FNARG=0x1800,
253         OC_FUNC=0x1900,         OC_GETLINE=0x1a00,      OC_IN=0x1b00,
254         OC_LAND=0x1c00,         OC_LOR=0x1d00,          OC_MATCH=0x1e00,
255         OC_MOVE=0x1f00,         OC_PGETLINE=0x2000,     OC_REGEXP=0x2100,
256         OC_REPLACE=0x2200,      OC_RETURN=0x2300,       OC_SPRINTF=0x2400,
257         OC_TERNARY=0x2500,      OC_UNARY=0x2600,        OC_VAR=0x2700,
258         OC_DONE=0x2800,
259
260         ST_IF=0x3000,           ST_DO=0x3100,           ST_FOR=0x3200,
261         ST_WHILE=0x3300
262 };
263
264 /* simple builtins */
265 enum {
266         F_in=0, F_rn,   F_co,   F_ex,   F_lg,   F_si,   F_sq,   F_sr,
267         F_ti,   F_le,   F_sy,   F_ff,   F_cl
268 };
269
270 /* builtins */
271 enum {
272         B_a2=0, B_ix,   B_ma,   B_sp,   B_ss,   B_ti,   B_lo,   B_up,
273         B_ge,   B_gs,   B_su
274 };
275
276 /* tokens and their corresponding info values */
277
278 #define NTC             "\377"          /* switch to next token class (tc<<1) */
279 #define NTCC    '\377'
280
281 #define OC_B    OC_BUILTIN
282
283 static char * const tokenlist =
284         "\1("           NTC
285         "\1)"           NTC
286         "\1/"           NTC                                                                     /* REGEXP */
287         "\2>>"          "\1>"           "\1|"           NTC                     /* OUTRDR */
288         "\2++"          "\2--"          NTC                                             /* UOPPOST */
289         "\2++"          "\2--"          "\1$"           NTC                     /* UOPPRE1 */
290         "\2=="          "\1="           "\2+="          "\2-="          /* BINOPX */
291         "\2*="          "\2/="          "\2%="          "\2^="
292         "\1+"           "\1-"           "\3**="         "\2**"
293         "\1/"           "\1%"           "\1^"           "\1*"
294         "\2!="          "\2>="          "\2<="          "\1>"
295         "\1<"           "\2!~"          "\1~"           "\2&&"
296         "\2||"          "\1?"           "\1:"           NTC
297         "\2in"          NTC
298         "\1,"           NTC
299         "\1|"           NTC
300         "\1+"           "\1-"           "\1!"           NTC                     /* UOPPRE2 */
301         "\1]"           NTC
302         "\1{"           NTC
303         "\1}"           NTC
304         "\1;"           NTC
305         "\1\n"          NTC
306         "\2if"          "\2do"          "\3for"         "\5break"       /* STATX */
307         "\10continue"                   "\6delete"      "\5print"
308         "\6printf"      "\4next"        "\10nextfile"
309         "\6return"      "\4exit"        NTC
310         "\5while"       NTC
311         "\4else"        NTC
312
313         "\5close"       "\6system"      "\6fflush"      "\5atan2"       /* BUILTIN */
314         "\3cos"         "\3exp"         "\3int"         "\3log"
315         "\4rand"        "\3sin"         "\4sqrt"        "\5srand"
316         "\6gensub"      "\4gsub"        "\5index"       "\6length"
317         "\5match"       "\5split"       "\7sprintf"     "\3sub"
318         "\6substr"      "\7systime"     "\10strftime"
319         "\7tolower"     "\7toupper"     NTC
320         "\7getline"     NTC
321         "\4func"        "\10function"   NTC
322         "\5BEGIN"       NTC
323         "\3END"         "\0"
324         ;
325
326 static unsigned long tokeninfo[] = {
327
328         0,
329         0,
330         OC_REGEXP,
331         xS|'a',         xS|'w',         xS|'|',
332         OC_UNARY|xV|P(9)|'p',           OC_UNARY|xV|P(9)|'m',
333         OC_UNARY|xV|P(9)|'P',           OC_UNARY|xV|P(9)|'M',
334                 OC_FIELD|xV|P(5),
335         OC_COMPARE|VV|P(39)|5,          OC_MOVE|VV|P(74),
336                 OC_REPLACE|NV|P(74)|'+',        OC_REPLACE|NV|P(74)|'-',
337         OC_REPLACE|NV|P(74)|'*',        OC_REPLACE|NV|P(74)|'/',
338                 OC_REPLACE|NV|P(74)|'%',        OC_REPLACE|NV|P(74)|'&',
339         OC_BINARY|NV|P(29)|'+',         OC_BINARY|NV|P(29)|'-',
340                 OC_REPLACE|NV|P(74)|'&',        OC_BINARY|NV|P(15)|'&',
341         OC_BINARY|NV|P(25)|'/',         OC_BINARY|NV|P(25)|'%',
342                 OC_BINARY|NV|P(15)|'&',         OC_BINARY|NV|P(25)|'*',
343         OC_COMPARE|VV|P(39)|4,          OC_COMPARE|VV|P(39)|3,
344                 OC_COMPARE|VV|P(39)|0,          OC_COMPARE|VV|P(39)|1,
345         OC_COMPARE|VV|P(39)|2,          OC_MATCH|Sx|P(45)|'!',
346                 OC_MATCH|Sx|P(45)|'~',          OC_LAND|Vx|P(55),
347         OC_LOR|Vx|P(59),                        OC_TERNARY|Vx|P(64)|'?',
348                 OC_COLON|xx|P(67)|':',
349         OC_IN|SV|P(49),
350         OC_COMMA|SS|P(80),
351         OC_PGETLINE|SV|P(37),
352         OC_UNARY|xV|P(19)|'+',          OC_UNARY|xV|P(19)|'-',
353                 OC_UNARY|xV|P(19)|'!',
354         0,
355         0,
356         0,
357         0,
358         0,
359         ST_IF,                  ST_DO,                  ST_FOR,                 OC_BREAK,
360         OC_CONTINUE,                                    OC_DELETE|Vx,   OC_PRINT,
361         OC_PRINTF,              OC_NEXT,                OC_NEXTFILE,
362         OC_RETURN|Vx,   OC_EXIT|Nx,
363         ST_WHILE,
364         0,
365
366         OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
367         OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
368         OC_FBLTIN|F_rn,    OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
369         OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
370         OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF,        OC_B|B_su|P(0xb6),
371         OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti,    OC_B|B_ti|P(0x0b),
372         OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
373         OC_GETLINE|SV|P(0),
374         0,      0,
375         0,
376         0
377 };
378
379 /* internal variable names and their initial values       */
380 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */ 
381 enum {
382         CONVFMT=0,      OFMT,           FS,                     OFS,
383         ORS,            RS,                     RT,                     FILENAME,
384         SUBSEP,         ARGIND,         ARGC,           ARGV,
385         ERRNO,          FNR,
386         NR,                     NF,                     IGNORECASE,
387         ENVIRON,        F0,                     _intvarcount_
388 };
389
390 static char * vNames =
391         "CONVFMT\0"     "OFMT\0"        "FS\0*"         "OFS\0"
392         "ORS\0"         "RS\0*"         "RT\0"          "FILENAME\0"    
393         "SUBSEP\0"      "ARGIND\0"      "ARGC\0"        "ARGV\0"
394         "ERRNO\0"       "FNR\0"
395         "NR\0"          "NF\0*"         "IGNORECASE\0*"
396         "ENVIRON\0"     "$\0*"          "\0";
397
398 static char * vValues =
399         "%.6g\0"        "%.6g\0"        " \0"           " \0"
400         "\n\0"          "\n\0"          "\0"            "\0"
401         "\034\0"
402         "\377";
403
404 /* hash size may grow to these values */
405 #define FIRST_PRIME 61;
406 static const unsigned int PRIMES[] = { 251, 1021, 4093, 16381, 65521 };
407 static const unsigned int NPRIMES = sizeof(PRIMES) / sizeof(unsigned int);
408
409 /* globals */
410
411 extern char **environ;
412
413 static var * V[_intvarcount_];
414 static chain beginseq, mainseq, endseq, *seq;
415 static int nextrec, nextfile;
416 static node *break_ptr, *continue_ptr;
417 static rstream *iF;
418 static xhash *vhash, *ahash, *fdhash, *fnhash;
419 static char *programname;
420 static short lineno;
421 static int is_f0_split;
422 static int nfields = 0;
423 static var *Fields = NULL;
424 static tsplitter fsplitter, rsplitter;
425 static nvblock *cb = NULL;
426 static char *pos;
427 static char *buf;
428 static int icase = FALSE;
429
430 static struct {
431         unsigned long tclass;
432         unsigned long info;
433         char *string;
434         double number;
435         short lineno;
436         int rollback;
437 } t;
438
439 /* function prototypes */
440 extern void xregcomp(regex_t *preg, const char *regex, int cflags);
441 static void handle_special(var *);
442 static node *parse_expr(unsigned long);
443 static void chain_group(void);
444 static var *evaluate(node *, var *);
445 static rstream *next_input_file(void);
446 static int fmt_num(char *, int, char *, double, int);
447 static int awk_exit(int);
448
449 /* ---- error handling ---- */
450
451 static const char EMSG_INTERNAL_ERROR[] = "Internal error";
452 static const char EMSG_UNEXP_EOS[] = "Unexpected end of string";
453 static const char EMSG_UNEXP_TOKEN[] = "Unexpected token";
454 static const char EMSG_DIV_BY_ZERO[] = "Division by zero";
455 static const char EMSG_INV_FMT[] = "Invalid format specifier";
456 static const char EMSG_TOO_FEW_ARGS[] = "Too few arguments for builtin";
457 static const char EMSG_NOT_ARRAY[] = "Not an array";
458 static const char EMSG_POSSIBLE_ERROR[] = "Possible syntax error";
459 static const char EMSG_UNDEF_FUNC[] = "Call to undefined function";
460 #ifndef CONFIG_FEATURE_AWK_MATH
461 static const char EMSG_NO_MATH[] = "Math support is not compiled in";
462 #endif
463
464 static void syntax_error(const char * const message)
465 {
466         error_msg("%s:%i: %s", programname, lineno, message);
467         awk_exit(1);
468 }
469
470 #define runtime_error(x) syntax_error(x)
471
472
473 /* ---- hash stuff ---- */
474
475 static unsigned int hashidx(char *name) {
476
477         register unsigned int idx=0;
478
479         while (*name)  idx = *name++ + (idx << 6) - idx;
480         return idx;
481 }
482
483 /* create new hash */
484 static xhash *hash_init(void) {
485
486         xhash *newhash;
487         
488         newhash = (xhash *)xcalloc(1, sizeof(xhash));
489         newhash->csize = FIRST_PRIME;
490         newhash->items = (hash_item **)xcalloc(newhash->csize, sizeof(hash_item *));
491
492         return newhash;
493 }
494
495 /* find item in hash, return ptr to data, NULL if not found */
496 static void *hash_search(xhash *hash, char *name) {
497
498         hash_item *hi;
499
500         hi = hash->items [ hashidx(name) % hash->csize ];
501         while (hi) {
502                 if (strcmp(hi->name, name) == 0)
503                         return &(hi->data);
504                 hi = hi->next;
505         }
506         return NULL;
507 }
508
509 /* grow hash if it becomes too big */
510 static void hash_rebuild(xhash *hash) {
511
512         unsigned int newsize, i, idx;
513         hash_item **newitems, *hi, *thi;
514
515         if (hash->nprime == NPRIMES)
516                 return;
517
518         newsize = PRIMES[hash->nprime++];
519         newitems = (hash_item **)xcalloc(newsize, sizeof(hash_item *));
520
521         for (i=0; i<hash->csize; i++) {
522                 hi = hash->items[i];
523                 while (hi) {
524                         thi = hi;
525                         hi = thi->next;
526                         idx = hashidx(thi->name) % newsize;
527                         thi->next = newitems[idx];
528                         newitems[idx] = thi;
529                 }
530         }
531
532         free(hash->items);
533         hash->csize = newsize;
534         hash->items = newitems;
535 }
536
537 /* find item in hash, add it if necessary. Return ptr to data */
538 static void *hash_find(xhash *hash, char *name) {
539
540         hash_item *hi;
541         unsigned int idx;
542         int l;
543
544         hi = hash_search(hash, name);
545         if (! hi) {
546                 if (++hash->nel / hash->csize > 10)
547                         hash_rebuild(hash);
548
549                 l = xstrlen(name) + 1;
550                 hi = xcalloc(sizeof(hash_item) + l, 1);
551                 memcpy(hi->name, name, l);
552
553                 idx = hashidx(name) % hash->csize;
554                 hi->next = hash->items[idx];
555                 hash->items[idx] = hi;
556                 hash->glen += l;
557         }
558         return &(hi->data);
559 }
560
561 #define findvar(hash, name) (var *) hash_find ( (hash) , (name) )
562 #define newvar(name) (var *) hash_find ( vhash , (name) )
563 #define newfile(name) (rstream *) hash_find ( fdhash , (name) )
564 #define newfunc(name) (func *) hash_find ( fnhash , (name) )
565
566 static void hash_remove(xhash *hash, char *name) {
567
568         hash_item *hi, **phi;
569
570         phi = &(hash->items[ hashidx(name) % hash->csize ]);
571         while (*phi) {
572                 hi = *phi;
573                 if (strcmp(hi->name, name) == 0) {
574                         hash->glen -= (xstrlen(name) + 1);
575                         hash->nel--;
576                         *phi = hi->next;
577                         free(hi);
578                         break;
579                 }
580                 phi = &(hi->next);
581         }
582 }
583
584 /* ------ some useful functions ------ */
585
586 static void skip_spaces(char **s) {
587
588         register char *p = *s;
589
590         while(*p == ' ' || *p == '\t' ||
591                                         (*p == '\\' && *(p+1) == '\n' && (++p, ++t.lineno))) {
592                 p++;
593         }
594         *s = p;
595 }
596
597 static char *nextword(char **s) {
598
599         register char *p = *s;
600
601         while (*(*s)++) ;
602
603         return p;
604 }
605
606 static char nextchar(char **s) {
607
608         register char c, *pps;
609
610         c = *((*s)++);
611         pps = *s;
612         if (c == '\\') c = process_escape_sequence((const char**)s);
613         if (c == '\\' && *s == pps) c = *((*s)++);
614         return c;
615 }
616
617 static inline int isalnum_(int c) {
618
619         return (isalnum(c) || c == '_');
620 }
621
622 static FILE *afopen(const char *path, const char *mode) {
623
624         return (*path == '-' && *(path+1) == '\0') ? stdin : xfopen(path, mode);
625 }
626
627 /* -------- working with variables (set/get/copy/etc) -------- */
628
629 static xhash *iamarray(var *v) {
630
631         var *a = v;
632
633         while (a->type & VF_CHILD)
634                 a = a->x.parent;
635
636         if (! (a->type & VF_ARRAY)) {
637                 a->type |= VF_ARRAY;
638                 a->x.array = hash_init();
639         }
640         return a->x.array;
641 }
642
643 static void clear_array(xhash *array) {
644
645         unsigned int i;
646         hash_item *hi, *thi;
647
648         for (i=0; i<array->csize; i++) {
649                 hi = array->items[i];
650                 while (hi) {
651                         thi = hi;
652                         hi = hi->next;
653                         if (thi->data.v.string) free(thi->data.v.string);
654                         free(thi);
655                 }
656                 array->items[i] = NULL;
657         }
658         array->glen = array->nel = 0;
659 }
660
661 /* clear a variable */
662 static var *clrvar(var *v) {
663
664         if (v->string && !(v->type & VF_FSTR))
665                 free(v->string);
666
667         v->type &= VF_DONTTOUCH;
668         v->type |= VF_DIRTY;
669         v->string = NULL;
670         return v;
671 }
672
673 /* assign string value to variable */
674 static var *setvar_p(var *v, char *value) {
675
676         clrvar(v);
677         v->string = value;
678         handle_special(v);
679
680         return v;
681 }
682
683 /* same as setvar_p but make a copy of string */
684 static var *setvar_s(var *v, char *value) {
685
686         return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
687 }
688
689 /* same as setvar_s but set USER flag */
690 static var *setvar_u(var *v, char *value) {
691
692         setvar_s(v, value);
693         v->type |= VF_USER;
694         return v;
695 }
696
697 /* set array element to user string */
698 static void setari_u(var *a, int idx, char *s) {
699
700         register var *v;
701         static char sidx[12];
702
703         sprintf(sidx, "%d", idx);
704         v = findvar(iamarray(a), sidx);
705         setvar_u(v, s);
706 }
707
708 /* assign numeric value to variable */
709 static var *setvar_i(var *v, double value) {
710
711         clrvar(v);
712         v->type |= VF_NUMBER;
713         v->number = value;
714         handle_special(v);
715         return v;
716 }
717
718 static char *getvar_s(var *v) {
719
720         /* if v is numeric and has no cached string, convert it to string */
721         if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
722                 fmt_num(buf, MAXVARFMT, getvar_s(V[CONVFMT]), v->number, TRUE);
723                 v->string = xstrdup(buf);
724                 v->type |= VF_CACHED;
725         }
726         return (v->string == NULL) ? "" : v->string;
727 }
728
729 static double getvar_i(var *v) {
730
731         char *s;
732
733         if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
734                 v->number = 0;
735                 s = v->string;
736                 if (s && *s) {
737                         v->number = strtod(s, &s);
738                         if (v->type & VF_USER) {
739                                 skip_spaces(&s);
740                                 if (*s != '\0')
741                                         v->type &= ~VF_USER;
742                         }
743                 } else {
744                         v->type &= ~VF_USER;
745                 }
746                 v->type |= VF_CACHED;
747         }
748         return v->number;
749 }
750
751 static var *copyvar(var *dest, var *src) {
752
753         if (dest != src) {
754                 clrvar(dest);
755                 dest->type |= (src->type & ~VF_DONTTOUCH);
756                 dest->number = src->number;
757                 if (src->string)
758                         dest->string = xstrdup(src->string);
759         }
760         handle_special(dest);
761         return dest;
762 }
763
764 static var *incvar(var *v) {
765
766         return setvar_i(v, getvar_i(v)+1.);
767 }
768
769 /* return true if v is number or numeric string */
770 static int is_numeric(var *v) {
771
772         getvar_i(v);
773         return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
774 }
775
776 /* return 1 when value of v corresponds to true, 0 otherwise */
777 static int istrue(var *v) {
778
779         if (is_numeric(v))
780                 return (v->number == 0) ? 0 : 1;
781         else
782                 return (v->string && *(v->string)) ? 1 : 0;
783 }
784
785 /* temporary varables allocator. Last allocated should be first freed */
786 static var *nvalloc(int n) {
787
788         nvblock *pb = NULL;
789         var *v, *r;
790         int size;
791
792         while (cb) {
793                 pb = cb;
794                 if ((cb->pos - cb->nv) + n <= cb->size) break;
795                 cb = cb->next;
796         }
797
798         if (! cb) {
799                 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
800                 cb = (nvblock *)xmalloc(sizeof(nvblock) + size * sizeof(var));
801                 cb->size = size;
802                 cb->pos = cb->nv;
803                 cb->prev = pb;
804                 cb->next = NULL;
805                 if (pb) pb->next = cb;
806         }
807
808         v = r = cb->pos;
809         cb->pos += n;
810
811         while (v < cb->pos) {
812                 v->type = 0;
813                 v->string = NULL;
814                 v++;
815         }
816
817         return r;
818 }
819
820 static void nvfree(var *v) {
821
822         var *p;
823
824         if (v < cb->nv || v >= cb->pos)
825                 runtime_error(EMSG_INTERNAL_ERROR);
826
827         for (p=v; p<cb->pos; p++) {
828                 if ((p->type & (VF_ARRAY|VF_CHILD)) == VF_ARRAY) {
829                         clear_array(iamarray(p));
830                         free(p->x.array->items);
831                         free(p->x.array);
832                 }
833                 if (p->type & VF_WALK)
834                         free(p->x.walker);
835
836                 clrvar(p);
837         }
838
839         cb->pos = v;
840         while (cb->prev && cb->pos == cb->nv) {
841                 cb = cb->prev;
842         }
843 }
844
845 /* ------- awk program text parsing ------- */
846
847 /* Parse next token pointed by global pos, place results into global t.
848  * If token isn't expected, give away. Return token class
849  */
850 static unsigned long next_token(unsigned long expected) {
851
852         char *p, *pp, *s;
853         char *tl;
854         unsigned long tc, *ti;
855         int l;
856         static int concat_inserted = FALSE;
857         static unsigned long save_tclass, save_info;
858         static unsigned long ltclass = TC_OPTERM;
859
860         if (t.rollback) {
861
862                 t.rollback = FALSE;
863
864         } else if (concat_inserted) {
865
866                 concat_inserted = FALSE;
867                 t.tclass = save_tclass;
868                 t.info = save_info;
869
870         } else {
871
872                 p = pos;
873
874         readnext:
875                 skip_spaces(&p);
876                 lineno = t.lineno;
877                 if (*p == '#')
878                         while (*p != '\n' && *p != '\0') p++;
879
880                 if (*p == '\n')
881                         t.lineno++;
882
883                 if (*p == '\0') {
884                         tc = TC_EOF;
885
886                 } else if (*p == '\"') {
887                         /* it's a string */
888                         t.string = s = ++p;
889                         while (*p != '\"') {
890                                 if (*p == '\0' || *p == '\n')
891                                         syntax_error(EMSG_UNEXP_EOS);
892                                 *(s++) = nextchar(&p);
893                         }
894                         p++;
895                         *s = '\0';
896                         tc = TC_STRING;
897
898                 } else if ((expected & TC_REGEXP) && *p == '/') {
899                         /* it's regexp */
900                         t.string = s = ++p;
901                         while (*p != '/') {
902                                 if (*p == '\0' || *p == '\n')
903                                         syntax_error(EMSG_UNEXP_EOS);
904                                 if ((*s++ = *p++) == '\\') {
905                                         pp = p;
906                                         *(s-1) = process_escape_sequence((const char **)&p);
907                                         if (*pp == '\\') *s++ = '\\';
908                                         if (p == pp) *s++ = *p++;
909                                 }
910                         }
911                         p++;
912                         *s = '\0';
913                         tc = TC_REGEXP;
914
915                 } else if (*p == '.' || isdigit(*p)) {
916                         /* it's a number */
917                         t.number = strtod(p, &p);
918                         if (*p == '.')
919                                 syntax_error(EMSG_UNEXP_TOKEN);
920                         tc = TC_NUMBER;
921
922                 } else {
923                         /* search for something known */
924                         tl = tokenlist;
925                         tc = 0x00000001;
926                         ti = tokeninfo;
927                         while (*tl) {
928                                 l = *(tl++);
929                                 if (l == NTCC) {
930                                         tc <<= 1;
931                                         continue;
932                                 }
933                                 /* if token class is expected, token
934                                  * matches and it's not a longer word,
935                                  * then this is what we are looking for
936                                  */
937                                 if ((tc & (expected | TC_WORD | TC_NEWLINE)) &&
938                                 *tl == *p && strncmp(p, tl, l) == 0 &&
939                                 !((tc & TC_WORD) && isalnum_(*(p + l)))) {
940                                         t.info = *ti;
941                                         p += l;
942                                         break;
943                                 }
944                                 ti++;
945                                 tl += l;
946                         }
947
948                         if (! *tl) {
949                                 /* it's a name (var/array/function),
950                                  * otherwise it's something wrong
951                                  */
952                                 if (! isalnum_(*p))
953                                         syntax_error(EMSG_UNEXP_TOKEN);
954
955                                 t.string = --p;
956                                 while(isalnum_(*(++p))) {
957                                         *(p-1) = *p;
958                                 }
959                                 *(p-1) = '\0';
960                                 tc = TC_VARIABLE;
961                                 if (*p == '(') {
962                                         tc = TC_FUNCTION;
963                                 } else {
964                                         skip_spaces(&p);
965                                         if (*p == '[') {
966                                                 p++;
967                                                 tc = TC_ARRAY;
968                                         }
969                                 }
970                         }
971                 }
972                 pos = p;
973
974                 /* skipping newlines in some cases */
975                 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
976                         goto readnext;
977
978                 /* insert concatenation operator when needed */
979                 if ((ltclass&TC_CONCAT1) && (tc&TC_CONCAT2) && (expected&TC_BINOP)) {
980                         concat_inserted = TRUE;
981                         save_tclass = tc;
982                         save_info = t.info;
983                         tc = TC_BINOP;
984                         t.info = OC_CONCAT | SS | P(35);
985                 }
986
987                 t.tclass = tc;
988         }
989         ltclass = t.tclass;
990
991         /* Are we ready for this? */
992         if (! (ltclass & expected))
993                 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
994                                                                 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
995
996         return ltclass;
997 }
998
999 static void rollback_token(void) { t.rollback = TRUE; }
1000
1001 static node *new_node(unsigned long info) {
1002
1003         register node *n;
1004
1005         n = (node *)xcalloc(sizeof(node), 1);
1006         n->info = info;
1007         n->lineno = lineno;
1008         return n;
1009 }
1010
1011 static node *mk_re_node(char *s, node *n, regex_t *re) {
1012
1013         n->info = OC_REGEXP;
1014         n->l.re = re;
1015         n->r.ire = re + 1;
1016         xregcomp(re, s, REG_EXTENDED);
1017         xregcomp(re+1, s, REG_EXTENDED | REG_ICASE);
1018
1019         return n;
1020 }
1021
1022 static node *condition(void) {
1023
1024         next_token(TC_SEQSTART);
1025         return parse_expr(TC_SEQTERM);
1026 }
1027
1028 /* parse expression terminated by given argument, return ptr
1029  * to built subtree. Terminator is eaten by parse_expr */
1030 static node *parse_expr(unsigned long iexp) {
1031
1032         node sn;
1033         node *cn = &sn;
1034         node *vn, *glptr;
1035         unsigned long tc, xtc;
1036         var *v;
1037
1038         sn.info = PRIMASK;
1039         sn.r.n = glptr = NULL;
1040         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1041
1042         while (! ((tc = next_token(xtc)) & iexp)) {
1043                 if (glptr && (t.info == (OC_COMPARE|VV|P(39)|2))) {
1044                         /* input redirection (<) attached to glptr node */
1045                         cn = glptr->l.n = new_node(OC_CONCAT|SS|P(37));
1046                         xtc = TC_OPERAND | TC_UOPPRE;
1047                         glptr = NULL;
1048
1049                 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1050                         /* for binary and postfix-unary operators, jump back over
1051                          * previous operators with higher priority */
1052                         vn = cn;
1053                         while ( ((t.info & PRIMASK) > (vn->a.n->info & PRIMASK2)) || 
1054                           ((t.info == vn->info) && ((t.info & OPCLSMASK) == OC_COLON)) )
1055                                 vn = vn->a.n;
1056                         if ((t.info & OPCLSMASK) == OC_TERNARY)
1057                                 t.info += P(6);
1058                         cn = vn->a.n->r.n = new_node(t.info);
1059                         cn->a.n = vn->a.n;
1060                         if (tc & TC_BINOP) {
1061                                 cn->l.n = vn;
1062                                 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1063                                 if ((t.info & OPCLSMASK) == OC_PGETLINE) {
1064                                         /* it's a pipe */
1065                                         next_token(TC_GETLINE);
1066                                         /* give maximum priority to this pipe */
1067                                         cn->info &= ~PRIMASK;
1068                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1069                                 }
1070                         } else {
1071                                 cn->r.n = vn;
1072                                 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1073                         }
1074                         vn->a.n = cn;
1075
1076                 } else {
1077                         /* for operands and prefix-unary operators, attach them
1078                          * to last node */
1079                         vn = cn;
1080                         cn = vn->r.n = new_node(t.info);
1081                         cn->a.n = vn;
1082                         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1083                         if (tc & (TC_OPERAND | TC_REGEXP)) {
1084                                 xtc = TC_UOPPRE | TC_BINOP | TC_OPERAND | iexp;
1085                                 /* one should be very careful with switch on tclass - 
1086                                  * only simple tclasses should be used! */
1087                                 switch (tc) {
1088                                   case TC_VARIABLE:
1089                                   case TC_ARRAY:
1090                                         cn->info = OC_VAR;
1091                                         if ((v = hash_search(ahash, t.string)) != NULL) {
1092                                                 cn->info = OC_FNARG;
1093                                                 cn->l.i = v->x.aidx;
1094                                         } else {
1095                                                 cn->l.v = newvar(t.string);
1096                                         }
1097                                         if (tc & TC_ARRAY) {
1098                                                 cn->info |= xS;
1099                                                 cn->r.n = parse_expr(TC_ARRTERM);
1100                                         }
1101                                         xtc = TC_UOPPOST | TC_UOPPRE | TC_BINOP | TC_OPERAND | iexp;
1102                                         break;
1103                                         
1104                                   case TC_NUMBER:
1105                                   case TC_STRING:
1106                                         cn->info = OC_VAR;
1107                                         v = cn->l.v = xcalloc(sizeof(var), 1);
1108                                         if (tc & TC_NUMBER)
1109                                                 setvar_i(v, t.number);
1110                                         else
1111                                                 setvar_s(v, t.string);
1112                                         break;
1113
1114                                   case TC_REGEXP:
1115                                         mk_re_node(t.string, cn,
1116                                                                         (regex_t *)xcalloc(sizeof(regex_t),2));
1117                                         break;
1118
1119                                   case TC_FUNCTION:
1120                                         cn->info = OC_FUNC;
1121                                         cn->r.f = newfunc(t.string);
1122                                         cn->l.n = condition();
1123                                         break;
1124
1125                                   case TC_SEQSTART:
1126                                         cn = vn->r.n = parse_expr(TC_SEQTERM);
1127                                         cn->a.n = vn;
1128                                         break;
1129
1130                                   case TC_GETLINE:
1131                                         glptr = cn;
1132                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1133                                         break;
1134
1135                                   case TC_BUILTIN:
1136                                         cn->l.n = condition();
1137                                         break;
1138                                 }
1139                         }
1140                 }
1141         }
1142         return sn.r.n;
1143 }
1144
1145 /* add node to chain. Return ptr to alloc'd node */
1146 static node *chain_node(unsigned long info) {
1147
1148         register node *n;
1149
1150         if (! seq->first)
1151                 seq->first = seq->last = new_node(0);
1152
1153         if (seq->programname != programname) {
1154                 seq->programname = programname;
1155                 n = chain_node(OC_NEWSOURCE);
1156                 n->l.s = xstrdup(programname);
1157         }
1158
1159         n = seq->last;
1160         n->info = info;
1161         seq->last = n->a.n = new_node(OC_DONE);
1162
1163         return n;
1164 }
1165
1166 static void chain_expr(unsigned long info) {
1167
1168         node *n;
1169
1170         n = chain_node(info);
1171         n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1172         if (t.tclass & TC_GRPTERM)
1173                 rollback_token();
1174 }
1175
1176 static node *chain_loop(node *nn) {
1177
1178         node *n, *n2, *save_brk, *save_cont;
1179
1180         save_brk = break_ptr;
1181         save_cont = continue_ptr;
1182
1183         n = chain_node(OC_BR | Vx);
1184         continue_ptr = new_node(OC_EXEC);
1185         break_ptr = new_node(OC_EXEC);
1186         chain_group();
1187         n2 = chain_node(OC_EXEC | Vx);
1188         n2->l.n = nn;
1189         n2->a.n = n;
1190         continue_ptr->a.n = n2;
1191         break_ptr->a.n = n->r.n = seq->last;
1192
1193         continue_ptr = save_cont;
1194         break_ptr = save_brk;
1195
1196         return n;
1197 }
1198
1199 /* parse group and attach it to chain */
1200 static void chain_group(void) {
1201
1202         unsigned long c;
1203         node *n, *n2, *n3;
1204
1205         do {
1206                 c = next_token(TC_GRPSEQ);
1207         } while (c & TC_NEWLINE);
1208
1209         if (c & TC_GRPSTART) {
1210                 while(next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1211                         rollback_token();
1212                         chain_group();
1213                 }
1214         } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1215                 rollback_token();
1216                 chain_expr(OC_EXEC | Vx);
1217         } else {                                                /* TC_STATEMNT */
1218                 switch (t.info & OPCLSMASK) {
1219                         case ST_IF:
1220                                 n = chain_node(OC_BR | Vx);
1221                                 n->l.n = condition();
1222                                 chain_group();
1223                                 n2 = chain_node(OC_EXEC);
1224                                 n->r.n = seq->last;
1225                                 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE)==TC_ELSE) {
1226                                         chain_group();
1227                                         n2->a.n = seq->last;
1228                                 } else {
1229                                         rollback_token();
1230                                 }
1231                                 break;
1232
1233                         case ST_WHILE:
1234                                 n2 = condition();
1235                                 n = chain_loop(NULL);
1236                                 n->l.n = n2;
1237                                 break;
1238
1239                         case ST_DO:
1240                                 n2 = chain_node(OC_EXEC);
1241                                 n = chain_loop(NULL);
1242                                 n2->a.n = n->a.n;
1243                                 next_token(TC_WHILE);
1244                                 n->l.n = condition();
1245                                 break;
1246
1247                         case ST_FOR:
1248                                 next_token(TC_SEQSTART);
1249                                 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1250                                 if (t.tclass & TC_SEQTERM) {                            /* for-in */
1251                                         if ((n2->info & OPCLSMASK) != OC_IN)
1252                                                 syntax_error(EMSG_UNEXP_TOKEN);
1253                                         n = chain_node(OC_WALKINIT | VV);
1254                                         n->l.n = n2->l.n;
1255                                         n->r.n = n2->r.n;
1256                                         n = chain_loop(NULL);
1257                                         n->info = OC_WALKNEXT | Vx;
1258                                         n->l.n = n2->l.n;
1259                                 } else {                                                                        /* for(;;) */
1260                                         n = chain_node(OC_EXEC | Vx);
1261                                         n->l.n = n2;
1262                                         n2 = parse_expr(TC_SEMICOL);
1263                                         n3 = parse_expr(TC_SEQTERM);
1264                                         n = chain_loop(n3);
1265                                         n->l.n = n2;
1266                                         if (! n2)
1267                                                 n->info = OC_EXEC;
1268                                 }
1269                                 break;
1270
1271                         case OC_PRINT:
1272                         case OC_PRINTF:
1273                                 n = chain_node(t.info);
1274                                 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1275                                 if (t.tclass & TC_OUTRDR) {
1276                                         n->info |= t.info;
1277                                         n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1278                                 }
1279                                 if (t.tclass & TC_GRPTERM)
1280                                         rollback_token();
1281                                 break;
1282
1283                         case OC_BREAK:
1284                                 n = chain_node(OC_EXEC);
1285                                 n->a.n = break_ptr;
1286                                 break;
1287
1288                         case OC_CONTINUE:
1289                                 n = chain_node(OC_EXEC);
1290                                 n->a.n = continue_ptr;
1291                                 break;
1292
1293                         /* delete, next, nextfile, return, exit */
1294                         default:
1295                                 chain_expr(t.info);
1296
1297                 }
1298         }
1299 }
1300
1301 static void parse_program(char *p) {
1302
1303         unsigned long tclass;
1304         node *cn;
1305         func *f;
1306         var *v;
1307
1308         pos = p;
1309         t.lineno = 1;
1310         while((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1311                                 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1312
1313                 if (tclass & TC_OPTERM)
1314                         continue;
1315
1316                 seq = &mainseq;
1317                 if (tclass & TC_BEGIN) {
1318                         seq = &beginseq;
1319                         chain_group();
1320
1321                 } else if (tclass & TC_END) {
1322                         seq = &endseq;
1323                         chain_group();
1324
1325                 } else if (tclass & TC_FUNCDECL) {
1326                         next_token(TC_FUNCTION);
1327                         pos++;
1328                         f = newfunc(t.string);
1329                         f->body.first = NULL;
1330                         f->nargs = 0;
1331                         while(next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1332                                 v = findvar(ahash, t.string);
1333                                 v->x.aidx = (f->nargs)++;
1334
1335                                 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1336                                         break;
1337                         }
1338                         seq = &(f->body);
1339                         chain_group();
1340                         clear_array(ahash);
1341
1342                 } else if (tclass & TC_OPSEQ) {
1343                         rollback_token();
1344                         cn = chain_node(OC_TEST);
1345                         cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1346                         if (t.tclass & TC_GRPSTART) {
1347                                 rollback_token();
1348                                 chain_group();
1349                         } else {
1350                                 chain_node(OC_PRINT);
1351                         }
1352                         cn->r.n = mainseq.last;
1353
1354                 } else /* if (tclass & TC_GRPSTART) */ {
1355                         rollback_token();
1356                         chain_group();
1357                 }
1358         }
1359 }
1360
1361
1362 /* -------- program execution part -------- */
1363
1364 static node *mk_splitter(char *s, tsplitter *spl) {
1365
1366         register regex_t *re, *ire;
1367         node *n;
1368
1369         re = &spl->re[0];
1370         ire = &spl->re[1];
1371         n = &spl->n;
1372         if ((n->info && OPCLSMASK) == OC_REGEXP) {
1373                 regfree(re);
1374                 regfree(ire);
1375         }
1376         if (xstrlen(s) > 1) {
1377                 mk_re_node(s, n, re);
1378         } else {
1379                 n->info = (unsigned long) *s;
1380         }
1381
1382         return n;
1383 }
1384
1385 /* use node as a regular expression. Supplied with node ptr and regex_t
1386  * storage space. Return ptr to regex (if result points to preg, it shuold
1387  * be later regfree'd manually
1388  */
1389 static regex_t *as_regex(node *op, regex_t *preg) {
1390
1391         var *v;
1392         char *s;
1393
1394         if ((op->info & OPCLSMASK) == OC_REGEXP) {
1395                 return icase ? op->r.ire : op->l.re;
1396         } else {
1397                 v = nvalloc(1);
1398                 s = getvar_s(evaluate(op, v));
1399                 xregcomp(preg, s, icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED);
1400                 nvfree(v);
1401                 return preg;
1402         }
1403 }
1404
1405 /* gradually increasing buffer */
1406 static void qrealloc(char **b, int n, int *size) {
1407
1408         if (! *b || n >= *size)
1409                 *b = xrealloc(*b, *size = n + (n>>1) + 80);
1410 }
1411
1412 /* resize field storage space */
1413 static void fsrealloc(int size) {
1414
1415         static int maxfields = 0;
1416         int i;
1417
1418         if (size >= maxfields) {
1419                 i = maxfields;
1420                 maxfields = size + 16;
1421                 Fields = (var *)xrealloc(Fields, maxfields * sizeof(var));
1422                 for (; i<maxfields; i++) {
1423                         Fields[i].type = VF_SPECIAL;
1424                         Fields[i].string = NULL;
1425                 }
1426         }
1427
1428         if (size < nfields) {
1429                 for (i=size; i<nfields; i++) {
1430                         clrvar(Fields+i);
1431                 }
1432         }
1433         nfields = size;
1434 }
1435
1436 static int awk_split(char *s, node *spl, char **slist) {
1437
1438         int l, n=0;
1439         char c[4];
1440         char *s1;
1441         regmatch_t pmatch[2];
1442
1443         /* in worst case, each char would be a separate field */
1444         *slist = s1 = xstrndup(s, xstrlen(s) * 2 + 3);
1445
1446         c[0] = c[1] = (char)spl->info;
1447         c[2] = c[3] = '\0';
1448         if (*getvar_s(V[RS]) == '\0') c[2] = '\n';
1449
1450         if ((spl->info & OPCLSMASK) == OC_REGEXP) {             /* regex split */
1451                 while (*s) {
1452                         l = strcspn(s, c+2);
1453                         if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0 &&
1454                         pmatch[0].rm_so <= l) {
1455                                 l = pmatch[0].rm_so;
1456                                 if (pmatch[0].rm_eo == 0) { l++; pmatch[0].rm_eo++; }
1457                         } else {
1458                                 pmatch[0].rm_eo = l;
1459                                 if (*(s+l)) pmatch[0].rm_eo++;
1460                         }
1461
1462                         memcpy(s1, s, l);
1463                         *(s1+l) = '\0';
1464                         nextword(&s1);
1465                         s += pmatch[0].rm_eo;
1466                         n++;
1467                 }
1468         } else if (c[0] == '\0') {              /* null split */
1469                 while(*s) {
1470                         *(s1++) = *(s++);
1471                         *(s1++) = '\0';
1472                         n++;
1473                 }
1474         } else if (c[0] != ' ') {               /* single-character split */
1475                 if (icase) {
1476                         c[0] = toupper(c[0]);
1477                         c[1] = tolower(c[1]);
1478                 }
1479                 if (*s1) n++;
1480                 while ((s1 = strpbrk(s1, c))) {
1481                         *(s1++) = '\0';
1482                         n++;
1483                 }
1484         } else {                                /* space split */
1485                 while (*s) {
1486                         while (isspace(*s)) s++;
1487                         if (! *s) break;
1488                         n++;
1489                         while (*s && !isspace(*s))
1490                                 *(s1++) = *(s++);
1491                         *(s1++) = '\0';
1492                 }
1493         }
1494         return n;
1495 }
1496
1497 static void split_f0(void) {
1498
1499         static char *fstrings = NULL;
1500         int i, n;
1501         char *s;
1502
1503         if (is_f0_split)
1504                 return;
1505
1506         is_f0_split = TRUE;
1507         if (fstrings) free(fstrings);
1508         fsrealloc(0);
1509         n = awk_split(getvar_s(V[F0]), &fsplitter.n, &fstrings);
1510         fsrealloc(n);
1511         s = fstrings;
1512         for (i=0; i<n; i++) {
1513                 Fields[i].string = nextword(&s);
1514                 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1515         }
1516
1517         /* set NF manually to avoid side effects */
1518         clrvar(V[NF]);
1519         V[NF]->type = VF_NUMBER | VF_SPECIAL;
1520         V[NF]->number = nfields;
1521 }
1522
1523 /* perform additional actions when some internal variables changed */
1524 static void handle_special(var *v) {
1525
1526         int n;
1527         char *b, *sep, *s;
1528         int sl, l, len, i, bsize;
1529
1530         if (! (v->type & VF_SPECIAL))
1531                 return;
1532
1533         if (v == V[NF]) {
1534                 n = (int)getvar_i(v);
1535                 fsrealloc(n);
1536
1537                 /* recalculate $0 */
1538                 sep = getvar_s(V[OFS]);
1539                 sl = xstrlen(sep);
1540                 b = NULL;
1541                 len = 0;
1542                 for (i=0; i<n; i++) {
1543                         s = getvar_s(&Fields[i]);
1544                         l = xstrlen(s);
1545                         if (b) {
1546                                 memcpy(b+len, sep, sl);
1547                                 len += sl;
1548                         }
1549                         qrealloc(&b, len+l+sl, &bsize);
1550                         memcpy(b+len, s, l);
1551                         len += l;
1552                 }
1553                 b[len] = '\0';
1554                 setvar_p(V[F0], b);
1555                 is_f0_split = TRUE;
1556
1557         } else if (v == V[F0]) {
1558                 is_f0_split = FALSE;
1559
1560         } else if (v == V[FS]) {
1561                 mk_splitter(getvar_s(v), &fsplitter);
1562
1563         } else if (v == V[RS]) {
1564                 mk_splitter(getvar_s(v), &rsplitter);
1565
1566         } else if (v == V[IGNORECASE]) {
1567                 icase = istrue(v);
1568
1569         } else {                                                /* $n */
1570                 n = getvar_i(V[NF]);
1571                 setvar_i(V[NF], n > v-Fields ? n : v-Fields+1);
1572                 /* right here v is invalid. Just to note... */
1573         }
1574 }
1575
1576 /* step through func/builtin/etc arguments */
1577 static node *nextarg(node **pn) {
1578
1579         node *n;
1580
1581         n = *pn;
1582         if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1583                 *pn = n->r.n;
1584                 n = n->l.n;
1585         } else {
1586                 *pn = NULL;
1587         }
1588         return n;
1589 }
1590
1591 static void hashwalk_init(var *v, xhash *array) {
1592
1593         char **w;
1594         hash_item *hi;
1595         int i;
1596
1597         if (v->type & VF_WALK)
1598                 free(v->x.walker);
1599
1600         v->type |= VF_WALK;
1601         w = v->x.walker = (char **)xcalloc(2 + 2*sizeof(char *) + array->glen, 1);
1602         *w = *(w+1) = (char *)(w + 2);
1603         for (i=0; i<array->csize; i++) {
1604                 hi = array->items[i];
1605                 while(hi) {
1606                         strcpy(*w, hi->name);
1607                         nextword(w);
1608                         hi = hi->next;
1609                 }
1610         }
1611 }
1612
1613 static int hashwalk_next(var *v) {
1614
1615         char **w;
1616
1617         w = v->x.walker;
1618         if (*(w+1) == *w)
1619                 return FALSE;
1620
1621         setvar_s(v, nextword(w+1));
1622         return TRUE;
1623 }
1624
1625 /* evaluate node, return 1 when result is true, 0 otherwise */
1626 static int ptest(node *pattern) {
1627         static var v;
1628
1629         return istrue(evaluate(pattern, &v));
1630 }
1631
1632 /* read next record from stream rsm into a variable v */
1633 static int awk_getline(rstream *rsm, var *v) {
1634
1635         char *b;
1636         regmatch_t pmatch[2];
1637         int p, pp=0, size;
1638         int fd, so, eo, r, rp;
1639         char c, *s;
1640
1641         /* we're using our own buffer since we need access to accumulating
1642          * characters
1643          */
1644         fd = fileno(rsm->F);
1645         b = rsm->buffer;
1646         p = rsm->pos;
1647         size = rsm->size;
1648         c = (char) rsplitter.n.info;
1649         rp = 0;
1650         do {
1651                 qrealloc(&b, p+128, &size);
1652                 so = eo = p;
1653                 r = 1;
1654                 if (p > 0) {
1655                         if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1656                                 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1657                                                                                                 b, 1, pmatch, 0) == 0) {
1658                                         so = pmatch[0].rm_so;
1659                                         eo = pmatch[0].rm_eo;
1660                                         if (b[eo] != '\0')
1661                                                 break;
1662                                 }
1663                         } else if (c != '\0') {
1664                                 s = strchr(b+pp, c);
1665                                 if (s) {
1666                                         so = eo = s-b;
1667                                         eo++;
1668                                         break;
1669                                 }
1670                         } else {
1671                                 while (b[rp] == '\n')
1672                                         rp++;
1673                                 s = strstr(b+rp, "\n\n");
1674                                 if (s) {
1675                                         so = eo = s-b;
1676                                         while (b[eo] == '\n') eo++;
1677                                         if (b[eo] != '\0')
1678                                                 break;
1679                                 }
1680                         }
1681                 }
1682
1683                 pp = p;
1684                 p += safe_read(fd, b+p, size-p-1);
1685                 if (p < pp) {
1686                         p = 0;
1687                         r = 0;
1688                         setvar_i(V[ERRNO], errno);
1689                 }
1690                 b[p] = '\0';
1691
1692         } while (p > pp);
1693
1694         if (p == 0) {
1695                 r--;
1696         } else {
1697                 c = b[so]; b[so] = '\0';
1698                 setvar_s(v, b+rp);
1699                 v->type |= VF_USER;
1700                 b[so] = c;
1701                 c = b[eo]; b[eo] = '\0';
1702                 setvar_s(V[RT], b+so);
1703                 b[eo] = c;
1704         }
1705
1706         p -= eo;
1707         if (p) memmove(b, (const void *)(b+eo), p+1);
1708
1709         rsm->buffer = b;
1710         rsm->pos = p;
1711         rsm->size = size;
1712
1713         return r;
1714 }
1715
1716 static int fmt_num(char *b, int size, char *format, double n, int int_as_int) {
1717
1718         int r=0;
1719         char c, *s=format;
1720
1721         if (int_as_int && n == (int)n) {
1722                 r = snprintf(b, size, "%d", (int)n);
1723         } else {
1724                 do { c = *s; } while (*s && *++s);
1725                 if (strchr("diouxX", c)) {
1726                         r = snprintf(b, size, format, (int)n);
1727                 } else if (strchr("eEfgG", c)) {
1728                         r = snprintf(b, size, format, n);
1729                 } else {
1730                         runtime_error(EMSG_INV_FMT);
1731                 }
1732         }
1733         return r;
1734 }
1735
1736
1737 /* formatted output into an allocated buffer, return ptr to buffer */
1738 static char *awk_printf(node *n) {
1739
1740         char *b = NULL;
1741         char *fmt, *s, *s1, *f;
1742         int i, j, incr, bsize;
1743         char c, c1;
1744         var *v, *arg;
1745
1746         v = nvalloc(1);
1747         fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1748
1749         i = 0;
1750         while (*f) {
1751                 s = f;
1752                 while (*f && (*f != '%' || *(++f) == '%'))
1753                         f++;
1754                 while (*f && !isalpha(*f)) 
1755                         f++;
1756
1757                 incr = (f - s) + MAXVARFMT;
1758                 qrealloc(&b, incr+i, &bsize);
1759                 c = *f; if (c != '\0') f++;
1760                 c1 = *f ; *f = '\0';
1761                 arg = evaluate(nextarg(&n), v);
1762
1763                 j = i;
1764                 if (c == 'c' || !c) {
1765                         i += sprintf(b+i, s,
1766                                         is_numeric(arg) ? (char)getvar_i(arg) : *getvar_s(arg));
1767
1768                 } else if (c == 's') {
1769                     s1 = getvar_s(arg);
1770                         qrealloc(&b, incr+i+xstrlen(s1), &bsize);
1771                         i += sprintf(b+i, s, s1);
1772
1773                 } else {
1774                         i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1775                 }
1776                 *f = c1;
1777
1778                 /* if there was an error while sprintf, return value is negative */
1779                 if (i < j) i = j;
1780
1781         }
1782
1783         b = xrealloc(b, i+1);
1784         free(fmt);
1785         nvfree(v);
1786         b[i] = '\0';
1787         return b;
1788 }
1789
1790 /* common substitution routine
1791  * replace (nm) substring of (src) that match (n) with (repl), store
1792  * result into (dest), return number of substitutions. If nm=0, replace
1793  * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1794  * subexpression matching (\1-\9)
1795  */
1796 static int awk_sub(node *rn, char *repl, int nm, var *src, var *dest, int ex) {
1797
1798         char *ds = NULL;
1799         char *sp, *s;
1800         int c, i, j, di, rl, so, eo, nbs, n, dssize;
1801         regmatch_t pmatch[10];
1802         regex_t sreg, *re;
1803
1804         re = as_regex(rn, &sreg);
1805         if (! src) src = V[F0];
1806         if (! dest) dest = V[F0];
1807
1808         i = di = 0;
1809         sp = getvar_s(src);
1810         rl = xstrlen(repl);
1811         while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0:REG_NOTBOL) == 0) {
1812                 so = pmatch[0].rm_so;
1813                 eo = pmatch[0].rm_eo;
1814
1815                 qrealloc(&ds, di + eo + rl, &dssize);
1816                 memcpy(ds + di, sp, eo);
1817                 di += eo;
1818                 if (++i >= nm) {
1819                         /* replace */
1820                         di -= (eo - so);
1821                         nbs = 0;
1822                         for (s = repl; *s; s++) {
1823                                 ds[di++] = c = *s;
1824                                 if (c == '\\') {
1825                                         nbs++;
1826                                         continue;
1827                                 }
1828                                 if (c == '&' || (ex && c >= '0' && c <= '9')) {
1829                                         di -= ((nbs + 3) >> 1);
1830                                         j = 0;
1831                                         if (c != '&') {
1832                                                 j = c - '0';
1833                                                 nbs++;
1834                                         }
1835                                         if (nbs % 2) {
1836                                                 ds[di++] = c;
1837                                         } else {
1838                                                 n = pmatch[j].rm_eo - pmatch[j].rm_so;
1839                                                 qrealloc(&ds, di + rl + n, &dssize);
1840                                                 memcpy(ds + di, sp + pmatch[j].rm_so, n);
1841                                                 di += n;
1842                                         }
1843                                 }
1844                                 nbs = 0;
1845                         }
1846                 }
1847
1848                 sp += eo;
1849                 if (i == nm) break;
1850                 if (eo == so) {
1851                         if (! (ds[di++] = *sp++)) break;
1852                 }
1853         }
1854
1855         qrealloc(&ds, di + strlen(sp), &dssize);
1856         strcpy(ds + di, sp);
1857         setvar_p(dest, ds);
1858         if (re == &sreg) regfree(re);
1859         return i;
1860 }
1861
1862 static var *exec_builtin(node *op, var *res) {
1863
1864         int (*to_xxx)(int);
1865         var *tv;
1866         node *an[4];
1867         var  *av[4];
1868         char *as[4];
1869         regmatch_t pmatch[2];
1870         regex_t sreg, *re;
1871         static tsplitter tspl;
1872         node *spl;
1873         unsigned long isr, info;
1874         int nargs;
1875         time_t tt;
1876         char *s, *s1;
1877         int i, l, ll, n;
1878
1879         tv = nvalloc(4);
1880         isr = info = op->info;
1881         op = op->l.n;
1882
1883         av[2] = av[3] = NULL;
1884         for (i=0 ; i<4 && op ; i++) {
1885                 an[i] = nextarg(&op);
1886                 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
1887                 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
1888                 isr >>= 1;
1889         }
1890
1891         nargs = i;
1892         if (nargs < (info >> 30))
1893                 runtime_error(EMSG_TOO_FEW_ARGS);
1894
1895         switch (info & OPNMASK) {
1896
1897           case B_a2:
1898 #ifdef CONFIG_FEATURE_AWK_MATH
1899                 setvar_i(res, atan2(getvar_i(av[i]), getvar_i(av[1])));
1900 #else
1901                 runtime_error(EMSG_NO_MATH);
1902 #endif
1903                 break;
1904
1905           case B_sp:
1906                 if (nargs > 2) {
1907                         spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
1908                                 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
1909                 } else {
1910                         spl = &fsplitter.n;
1911                 }
1912
1913                 n = awk_split(as[0], spl, &s);
1914                 s1 = s;
1915                 clear_array(iamarray(av[1]));
1916                 for (i=1; i<=n; i++)
1917                         setari_u(av[1], i, nextword(&s1));
1918                 free(s);
1919                 setvar_i(res, n);
1920                 break;
1921
1922           case B_ss:
1923                 l = xstrlen(as[0]);
1924                 i = getvar_i(av[1]) - 1;
1925                 if (i>l) i=l; if (i<0) i=0;
1926                 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
1927                 if (n<0) n=0;
1928                 s = xmalloc(n+1);
1929                 strncpy(s, as[0]+i, n);
1930                 s[n] = '\0';
1931                 setvar_p(res, s);
1932                 break;
1933
1934           case B_lo:
1935                 to_xxx = tolower;
1936                 goto lo_cont;
1937
1938           case B_up:
1939                 to_xxx = toupper;
1940 lo_cont:
1941                 s1 = s = xstrdup(as[0]);
1942                 while (*s1) {
1943                         *s1 = (*to_xxx)(*s1);
1944                         s1++;
1945                 }
1946                 setvar_p(res, s);
1947                 break;
1948
1949           case B_ix:
1950                 n = 0;
1951                 ll = xstrlen(as[1]);
1952                 l = xstrlen(as[0]) - ll;
1953                 if (ll > 0 && l >= 0) {
1954                         if (! icase) {
1955                                 s = strstr(as[0], as[1]);
1956                                 if (s) n = (s - as[0]) + 1;
1957                         } else {
1958                                 /* this piece of code is terribly slow and
1959                                  * really should be rewritten
1960                                  */
1961                                 for (i=0; i<=l; i++) {
1962                                         if (strncasecmp(as[0]+i, as[1], ll) == 0) {
1963                                                 n = i+1;
1964                                                 break;
1965                                         }
1966                                 }
1967                         }
1968                 }
1969                 setvar_i(res, n);
1970                 break;
1971
1972           case B_ti:
1973                 if (nargs > 1)
1974                         tt = getvar_i(av[1]);
1975                 else
1976                         time(&tt);
1977                 s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
1978                 i = strftime(buf, MAXVARFMT, s, localtime(&tt));
1979                 buf[i] = '\0';
1980                 setvar_s(res, buf);
1981                 break;
1982
1983           case B_ma:
1984                 re = as_regex(an[1], &sreg);
1985                 n = regexec(re, as[0], 1, pmatch, 0);
1986                 if (n == 0) {
1987                         pmatch[0].rm_so++;
1988                         pmatch[0].rm_eo++;
1989                 } else {
1990                         pmatch[0].rm_so = 0;
1991                         pmatch[0].rm_eo = -1;
1992                 }
1993                 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
1994                 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
1995                 setvar_i(res, pmatch[0].rm_so);
1996                 if (re == &sreg) regfree(re);
1997                 break;
1998
1999           case B_ge:
2000                 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2001                 break;
2002
2003           case B_gs:
2004                 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2005                 break;
2006
2007           case B_su:
2008                 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2009                 break;
2010         }
2011
2012         nvfree(tv);
2013         return res;
2014 }
2015
2016 /*
2017  * Evaluate node - the heart of the program. Supplied with subtree
2018  * and place where to store result. returns ptr to result.
2019  */
2020 #define XC(n) ((n) >> 8)
2021
2022 static var *evaluate(node *op, var *res) {
2023
2024         /* This procedure is recursive so we should count every byte */
2025         static var *fnargs = NULL;
2026         static unsigned int seed = 1;
2027         static regex_t sreg;
2028         node *op1;
2029         var *v1;
2030         union {
2031                 var *v;
2032                 char *s;
2033                 double d;
2034                 int i;
2035         } L, R;
2036         unsigned long opinfo;
2037         short opn;
2038         union {
2039                 char *s;
2040                 rstream *rsm;
2041                 FILE *F;
2042                 var *v;
2043                 regex_t *re;
2044                 unsigned long info;
2045         } X;
2046
2047         if (! op)
2048                 return setvar_s(res, NULL);
2049
2050         v1 = nvalloc(2);
2051
2052         while (op) {
2053
2054                 opinfo = op->info;
2055                 opn = (short)(opinfo & OPNMASK);
2056                 lineno = op->lineno;
2057
2058                 /* execute inevitable things */
2059                 op1 = op->l.n;
2060                 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2061                 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2062                 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2063                 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2064                 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2065
2066                 switch (XC(opinfo & OPCLSMASK)) {
2067
2068                   /* -- iterative node type -- */
2069
2070                   /* test pattern */
2071                   case XC( OC_TEST ):
2072                         if ((op1->info & OPCLSMASK) == OC_COMMA) {
2073                                 /* it's range pattern */
2074                                 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2075                                         op->info |= OF_CHECKED;
2076                                         if (ptest(op1->r.n))
2077                                                 op->info &= ~OF_CHECKED;
2078
2079                                         op = op->a.n;
2080                                 } else {
2081                                         op = op->r.n;
2082                                 }
2083                         } else {
2084                                 op = (ptest(op1)) ? op->a.n : op->r.n;
2085                         }
2086                         break;
2087
2088                   /* just evaluate an expression, also used as unconditional jump */
2089                   case XC( OC_EXEC ):
2090                         break;
2091
2092                   /* branch, used in if-else and various loops */
2093                   case XC( OC_BR ):
2094                         op = istrue(L.v) ? op->a.n : op->r.n;
2095                         break;
2096
2097                   /* initialize for-in loop */
2098                   case XC( OC_WALKINIT ):
2099                         hashwalk_init(L.v, iamarray(R.v));
2100                         break;
2101
2102                   /* get next array item */
2103                   case XC( OC_WALKNEXT ):
2104                         op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2105                         break;
2106
2107                   case XC( OC_PRINT ):
2108                   case XC( OC_PRINTF ):
2109                         X.F = stdout;
2110                         if (op->r.n) {
2111                                 X.rsm = newfile(R.s);
2112                                 if (! X.rsm->F) {
2113                                         if (opn == '|') {
2114                                                 if((X.rsm->F = popen(R.s, "w")) == NULL)
2115                                                         perror_msg_and_die("popen");
2116                                                 X.rsm->is_pipe = 1;
2117                                         } else {
2118                                                 X.rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2119                                         }
2120                                 }
2121                                 X.F = X.rsm->F;
2122                         }
2123
2124                         if ((opinfo & OPCLSMASK) == OC_PRINT) {
2125                                 if (! op1) {
2126                                         fputs(getvar_s(V[F0]), X.F);
2127                                 } else {
2128                                         while (op1) {
2129                                                 L.v = evaluate(nextarg(&op1), v1);
2130                                                 if (L.v->type & VF_NUMBER) {
2131                                                         fmt_num(buf, MAXVARFMT, getvar_s(V[OFMT]),
2132                                                                                                                 getvar_i(L.v), TRUE);
2133                                                         fputs(buf, X.F);
2134                                                 } else {
2135                                                         fputs(getvar_s(L.v), X.F);
2136                                                 }
2137
2138                                                 if (op1) fputs(getvar_s(V[OFS]), X.F);
2139                                         }
2140                                 }
2141                                 fputs(getvar_s(V[ORS]), X.F);
2142
2143                         } else {        /* OC_PRINTF */
2144                                 L.s = awk_printf(op1);
2145                                 fputs(L.s, X.F);
2146                                 free(L.s);
2147                         }
2148                         fflush(X.F);
2149                         break;
2150
2151                   case XC( OC_DELETE ):
2152                         X.info = op1->info & OPCLSMASK;
2153                         if (X.info == OC_VAR) {
2154                                 R.v = op1->l.v;
2155                         } else if (X.info == OC_FNARG) {
2156                                 R.v = &fnargs[op1->l.i];
2157                         } else {
2158                                 runtime_error(EMSG_NOT_ARRAY);
2159                         }
2160
2161                         if (op1->r.n) {
2162                                 clrvar(L.v);
2163                                 L.s = getvar_s(evaluate(op1->r.n, v1));
2164                                 hash_remove(iamarray(R.v), L.s);
2165                         } else {
2166                                 clear_array(iamarray(R.v));
2167                         }
2168                         break;
2169
2170                   case XC( OC_NEWSOURCE ):
2171                         programname = op->l.s;
2172                         break;
2173
2174                   case XC( OC_RETURN ):
2175                         copyvar(res, L.v);
2176                         break;
2177
2178                   case XC( OC_NEXTFILE ):
2179                         nextfile = TRUE;
2180                   case XC( OC_NEXT ):
2181                         nextrec = TRUE;
2182                   case XC( OC_DONE ):
2183                         clrvar(res);
2184                         break;
2185
2186                   case XC( OC_EXIT ):
2187                         awk_exit(L.d);
2188
2189                   /* -- recursive node type -- */
2190
2191                   case XC( OC_VAR ):
2192                         L.v = op->l.v;
2193                         if (L.v == V[NF])
2194                                 split_f0();
2195                         goto v_cont;
2196
2197                   case XC( OC_FNARG ):
2198                         L.v = &fnargs[op->l.i];
2199
2200 v_cont:
2201                         res = (op->r.n) ? findvar(iamarray(L.v), R.s) : L.v;
2202                         break;
2203
2204                   case XC( OC_IN ):
2205                         setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2206                         break;
2207
2208                   case XC( OC_REGEXP ):
2209                         op1 = op;
2210                         L.s = getvar_s(V[F0]);
2211                         goto re_cont;
2212
2213                   case XC( OC_MATCH ):
2214                         op1 = op->r.n;
2215 re_cont:
2216                         X.re = as_regex(op1, &sreg);
2217                         R.i = regexec(X.re, L.s, 0, NULL, 0);
2218                         if (X.re == &sreg) regfree(X.re);
2219                         setvar_i(res, (R.i == 0 ? 1 : 0) ^ (opn == '!' ? 1 : 0));
2220                         break;
2221
2222                   case XC( OC_MOVE ):
2223                         /* if source is a temporary string, jusk relink it to dest */
2224                         if (R.v == v1+1 && R.v->string) {
2225                                 res = setvar_p(L.v, R.v->string);
2226                                 R.v->string = NULL;
2227                         } else {
2228                                 res = copyvar(L.v, R.v);
2229                         }
2230                         break;
2231
2232                   case XC( OC_TERNARY ):
2233                         if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2234                                 runtime_error(EMSG_POSSIBLE_ERROR);
2235                         res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2236                         break;
2237
2238                   case XC( OC_FUNC ):
2239                         if (! op->r.f->body.first)
2240                                 runtime_error(EMSG_UNDEF_FUNC);
2241
2242                         X.v = R.v = nvalloc(op->r.f->nargs+1);
2243                         while (op1) {
2244                                 L.v = evaluate(nextarg(&op1), v1);
2245                                 copyvar(R.v, L.v);
2246                                 R.v->type |= VF_CHILD;
2247                                 R.v->x.parent = L.v;
2248                                 if (++R.v - X.v >= op->r.f->nargs)
2249                                         break;
2250                         }
2251
2252                         R.v = fnargs;
2253                         fnargs = X.v;
2254
2255                         L.s = programname;
2256                         res = evaluate(op->r.f->body.first, res);
2257                         programname = L.s;
2258
2259                         nvfree(fnargs);
2260                         fnargs = R.v;
2261                         break;
2262
2263                   case XC( OC_GETLINE ):
2264                   case XC( OC_PGETLINE ):
2265                         if (op1) {
2266                                 X.rsm = newfile(L.s);
2267                                 if (! X.rsm->F) {
2268                                         if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2269                                                 X.rsm->F = popen(L.s, "r");
2270                                                 X.rsm->is_pipe = TRUE;
2271                                         } else {
2272                                                 X.rsm->F = fopen(L.s, "r");             /* not xfopen! */
2273                                         }
2274                                 }
2275                         } else {
2276                                 if (! iF) iF = next_input_file();
2277                                 X.rsm = iF;
2278                         }
2279
2280                         if (! X.rsm->F) {
2281                                 setvar_i(V[ERRNO], errno);
2282                                 setvar_i(res, -1);
2283                                 break;
2284                         }
2285
2286                         if (! op->r.n)
2287                                 R.v = V[F0];
2288
2289                         L.i = awk_getline(X.rsm, R.v);
2290                         if (L.i > 0) {
2291                                 if (! op1) {
2292                                         incvar(V[FNR]);
2293                                         incvar(V[NR]);
2294                                 }
2295                         }
2296                         setvar_i(res, L.i);
2297                         break;
2298
2299                   /* simple builtins */
2300                   case XC( OC_FBLTIN ):
2301                         switch (opn) {
2302
2303                           case F_in:
2304                                 R.d = (int)L.d;
2305                                 break;
2306
2307                           case F_rn:
2308                                 R.d =  (double)rand() / (double)RAND_MAX;
2309                                 break;
2310
2311 #ifdef CONFIG_FEATURE_AWK_MATH
2312                           case F_co:
2313                                 R.d = cos(L.d);
2314                                 break;
2315
2316                           case F_ex:
2317                                 R.d = exp(L.d);
2318                                 break;
2319
2320                           case F_lg:
2321                                 R.d = log(L.d);
2322                                 break;
2323
2324                           case F_si:
2325                                 R.d = sin(L.d);
2326                                 break;
2327
2328                           case F_sq:
2329                                 R.d = sqrt(L.d);
2330                                 break;
2331 #else
2332                           case F_co:
2333                           case F_ex:
2334                           case F_lg:
2335                           case F_si:
2336                           case F_sq:
2337                                 runtime_error(EMSG_NO_MATH);
2338                                 break;
2339 #endif
2340
2341                           case F_sr:
2342                                 R.d = (double)seed;
2343                                 seed = op1 ? (unsigned int)L.d : (unsigned int)time(NULL);
2344                                 srand(seed);
2345                                 break;
2346
2347                           case F_ti:
2348                                 R.d = time(NULL);
2349                                 break;
2350
2351                           case F_le:
2352                                 if (! op1)
2353                                         L.s = getvar_s(V[F0]);
2354                                 R.d = xstrlen(L.s);
2355                                 break;
2356
2357                           case F_sy:
2358                                 fflush(NULL);
2359                                 R.d = (L.s && *L.s) ? system(L.s) : 0;
2360                                 break;
2361
2362                           case F_ff:
2363                                 if (! op1)
2364                                         fflush(stdout);
2365                                 else {
2366                                         if (L.s && *L.s) {
2367                                                 X.rsm = newfile(L.s);
2368                                                 fflush(X.rsm->F);
2369                                         } else {
2370                                                 fflush(NULL);
2371                                         }
2372                                 }
2373                                 break;
2374
2375                           case F_cl:
2376                                 X.rsm = (rstream *)hash_search(fdhash, L.s);
2377                                 if (X.rsm) {
2378                                         R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
2379                                         if (X.rsm->buffer)
2380                                                 free(X.rsm->buffer);
2381                                         hash_remove(fdhash, L.s);
2382                                 }
2383                                 if (R.i != 0)
2384                                         setvar_i(V[ERRNO], errno);
2385                                 R.d = (double)R.i;
2386                                 break;
2387                         }
2388                         setvar_i(res, R.d);
2389                         break;
2390
2391                   case XC( OC_BUILTIN ):
2392                         res = exec_builtin(op, res);
2393                         break;
2394
2395                   case XC( OC_SPRINTF ):
2396                         setvar_p(res, awk_printf(op1));
2397                         break;
2398
2399                   case XC( OC_UNARY ):
2400                         X.v = R.v;
2401                         L.d = R.d = getvar_i(R.v);
2402                         switch (opn) {
2403                           case 'P':
2404                                 L.d = ++R.d;
2405                                 goto r_op_change;
2406                           case 'p':
2407                                 R.d++;
2408                                 goto r_op_change;
2409                           case 'M':
2410                                 L.d = --R.d;
2411                                 goto r_op_change;
2412                           case 'm':
2413                                 R.d--;
2414                                 goto r_op_change;
2415                           case '!':
2416                             L.d = istrue(X.v) ? 0 : 1;
2417                                 break;
2418                           case '-':
2419                                 L.d = -R.d;
2420                                 break;
2421                         r_op_change:
2422                                 setvar_i(X.v, R.d);
2423                         }
2424                         setvar_i(res, L.d);
2425                         break;
2426
2427                   case XC( OC_FIELD ):
2428                         R.i = (int)getvar_i(R.v);
2429                         if (R.i == 0) {
2430                                 res = V[F0];
2431                         } else {
2432                                 split_f0();
2433                                 if (R.i > nfields)
2434                                         fsrealloc(R.i);
2435
2436                                 res = &Fields[R.i-1];
2437                         }
2438                         break;
2439
2440                   /* concatenation (" ") and index joining (",") */
2441                   case XC( OC_CONCAT ):
2442                   case XC( OC_COMMA ):
2443                         opn = xstrlen(L.s) + xstrlen(R.s) + 2;
2444                         X.s = (char *)xmalloc(opn);
2445                         strcpy(X.s, L.s);
2446                         if ((opinfo & OPCLSMASK) == OC_COMMA) {
2447                                 L.s = getvar_s(V[SUBSEP]);
2448                                 X.s = (char *)xrealloc(X.s, opn + xstrlen(L.s));
2449                                 strcat(X.s, L.s);
2450                         }
2451                         strcat(X.s, R.s);
2452                         setvar_p(res, X.s);
2453                         break;
2454
2455                   case XC( OC_LAND ):
2456                         setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2457                         break;
2458
2459                   case XC( OC_LOR ):
2460                         setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2461                         break;
2462
2463                   case XC( OC_BINARY ):
2464                   case XC( OC_REPLACE ):
2465                         R.d = getvar_i(R.v);
2466                         switch (opn) {
2467                           case '+':
2468                                 L.d += R.d;
2469                                 break;
2470                           case '-':
2471                                 L.d -= R.d;
2472                                 break;
2473                           case '*':
2474                                 L.d *= R.d;
2475                                 break;
2476                           case '/':
2477                                 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2478                                 L.d /= R.d;
2479                                 break;
2480                           case '&':
2481 #ifdef CONFIG_FEATURE_AWK_MATH
2482                                 L.d = pow(L.d, R.d);
2483 #else
2484                                 runtime_error(EMSG_NO_MATH);
2485 #endif
2486                                 break;
2487                           case '%':
2488                                 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2489                                 L.d -= (int)(L.d / R.d) * R.d;
2490                                 break;
2491                         }
2492                         res = setvar_i(((opinfo&OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2493                         break;
2494
2495                   case XC( OC_COMPARE ):
2496                         if (is_numeric(L.v) && is_numeric(R.v)) {
2497                                 L.d = getvar_i(L.v) - getvar_i(R.v);
2498                         } else {
2499                                 L.s = getvar_s(L.v);
2500                                 R.s = getvar_s(R.v);
2501                                 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2502                         }
2503                         switch (opn & 0xfe) {
2504                           case 0:
2505                                 R.i = (L.d > 0);
2506                                 break;
2507                           case 2:
2508                                 R.i = (L.d >= 0);
2509                                 break;
2510                           case 4:
2511                                 R.i = (L.d == 0);
2512                                 break;
2513                         }
2514                         setvar_i(res, (opn & 0x1 ? R.i : !R.i) ? 1 : 0);
2515                         break;
2516
2517                   default:
2518                         runtime_error(EMSG_POSSIBLE_ERROR);
2519                 }
2520                 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2521                         op = op->a.n;
2522                 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2523                         break;
2524                 if (nextrec)
2525                         break;
2526         }
2527         nvfree(v1);
2528         return res;
2529 }
2530
2531
2532 /* -------- main & co. -------- */
2533
2534 static int awk_exit(int r) {
2535
2536         unsigned int i;
2537         hash_item *hi;
2538
2539         /* waiting for children */
2540         for (i=0; i<fdhash->csize; i++) {
2541                 hi = fdhash->items[i];
2542                 while(hi) {
2543                         if (hi->data.rs.F && hi->data.rs.is_pipe)
2544                                 pclose(hi->data.rs.F);
2545                         hi = hi->next;
2546                 }
2547         }
2548
2549         exit(r);
2550 }
2551
2552 /* if expr looks like "var=value", perform assignment and return 1,
2553  * otherwise return 0 */
2554 static int is_assignment(char *expr) {
2555
2556         char *exprc, *s, *s0, *s1;
2557
2558         exprc = xstrdup(expr);
2559         if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2560                 free(exprc);
2561                 return FALSE;
2562         }
2563
2564         *(s++) = '\0';
2565         s0 = s1 = s;
2566         while (*s)
2567                 *(s1++) = nextchar(&s);
2568
2569         *s1 = '\0';
2570         setvar_u(newvar(exprc), s0);
2571         free(exprc);
2572         return TRUE;
2573 }
2574
2575 /* switch to next input file */
2576 static rstream *next_input_file(void) {
2577
2578         static rstream rsm;
2579         FILE *F = NULL;
2580         char *fname, *ind;
2581         static int files_happen = FALSE;
2582
2583         if (rsm.F) fclose(rsm.F);
2584         rsm.F = NULL;
2585         rsm.pos = 0;
2586
2587         do {
2588                 if (getvar_i(V[ARGIND])+1 >= getvar_i(V[ARGC])) {
2589                         if (files_happen)
2590                                 return NULL;
2591                         fname = "-";
2592                         F = stdin;
2593                 } else {
2594                         ind = getvar_s(incvar(V[ARGIND]));
2595                         fname = getvar_s(findvar(iamarray(V[ARGV]), ind));
2596                         if (fname && *fname && !is_assignment(fname))
2597                                 F = afopen(fname, "r");
2598                 }
2599         } while (!F);
2600
2601         files_happen = TRUE;
2602         setvar_s(V[FILENAME], fname);
2603         rsm.F = F;
2604         return &rsm;
2605 }
2606
2607 extern int awk_main(int argc, char **argv) {
2608
2609         char *s, *s1;
2610         int i, j, c;
2611         var *v;
2612         static var tv;
2613         char **envp;
2614         static int from_file = FALSE;
2615         rstream *rsm;
2616         FILE *F, *stdfiles[3];
2617         static char * stdnames = "/dev/stdin\0/dev/stdout\0/dev/stderr";
2618
2619         /* allocate global buffer */
2620         buf = xmalloc(MAXVARFMT+1);
2621
2622         vhash = hash_init();
2623         ahash = hash_init();
2624         fdhash = hash_init();
2625         fnhash = hash_init();
2626
2627         /* initialize variables */
2628         for (i=0;  *vNames;  i++) {
2629                 V[i] = v = newvar(nextword(&vNames));
2630                 if (*vValues != '\377')
2631                         setvar_s(v, nextword(&vValues));
2632                 else
2633                         setvar_i(v, 0);
2634
2635                 if (*vNames == '*') {
2636                         v->type |= VF_SPECIAL;
2637                         vNames++;
2638                 }
2639         }
2640
2641         handle_special(V[FS]);
2642         handle_special(V[RS]);
2643
2644         stdfiles[0] = stdin;
2645         stdfiles[1] = stdout;
2646         stdfiles[2] = stderr;
2647         for (i=0; i<3; i++) {
2648                 rsm = newfile(nextword(&stdnames));
2649                 rsm->F = stdfiles[i];
2650         }
2651
2652         for (envp=environ; *envp; envp++) {
2653                 s = xstrdup(*envp);
2654                 s1 = strchr(s, '=');
2655                 *(s1++) = '\0';
2656                 setvar_u(findvar(iamarray(V[ENVIRON]), s), s1);
2657                 free(s);
2658         }
2659
2660         while((c = getopt(argc, argv, "F:v:f:W:")) != EOF) {
2661                 switch (c) {
2662                         case 'F':
2663                                 setvar_s(V[FS], optarg);
2664                                 break;
2665                         case 'v':
2666                                 if (! is_assignment(optarg))
2667                                         show_usage();
2668                                 break;
2669                         case 'f':
2670                                 from_file = TRUE;
2671                                 F = afopen(programname = optarg, "r");
2672                                 s = NULL;
2673                                 /* one byte is reserved for some trick in next_token */
2674                                 for (i=j=1; j>0; i+=j) {
2675                                         s = (char *)xrealloc(s, i+4096);
2676                                         j = fread(s+i, 1, 4094, F);
2677                                 }
2678                                 s[i] = '\0';
2679                                 fclose(F);
2680                                 parse_program(s+1);
2681                                 free(s);
2682                                 break;
2683                         case 'W':
2684                                 error_msg("Warning: unrecognized option '-W %s' ignored\n", optarg);
2685                                 break;
2686
2687                         default:
2688                                 show_usage();
2689                 }
2690         }
2691
2692         if (!from_file) {
2693                 if (argc == optind)
2694                         show_usage();
2695                 programname="cmd. line";
2696                 parse_program(argv[optind++]);
2697
2698         }
2699
2700         /* fill in ARGV array */
2701         setvar_i(V[ARGC], argc - optind + 1);
2702         setari_u(V[ARGV], 0, "awk");
2703         for(i=optind; i < argc; i++)
2704                 setari_u(V[ARGV], i+1-optind, argv[i]);
2705
2706         evaluate(beginseq.first, &tv);
2707         if (! mainseq.first && ! endseq.first)
2708                 awk_exit(EXIT_SUCCESS);
2709
2710         /* input file could already be opened in BEGIN block */
2711         if (! iF) iF = next_input_file();
2712
2713         /* passing through input files */
2714         while (iF) {
2715
2716                 nextfile = FALSE;
2717                 setvar_i(V[FNR], 0);
2718
2719                 while ((c = awk_getline(iF, V[F0])) > 0) {
2720
2721                         nextrec = FALSE;
2722                         incvar(V[NR]);
2723                         incvar(V[FNR]);
2724                         evaluate(mainseq.first, &tv);
2725
2726                         if (nextfile)
2727                                 break;
2728                 }
2729
2730                 if (c < 0)
2731                         runtime_error(strerror(errno));
2732
2733                 iF = next_input_file();
2734
2735         }
2736
2737         evaluate(endseq.first, &tv);
2738         awk_exit(EXIT_SUCCESS);
2739
2740         return 0;
2741 }
2742