Remove bb_ prefixes from xfuncs.c (and a few other places), consolidate
[oweals/busybox.git] / editors / awk.c
1 /* vi: set sw=4 ts=4: */
2 /*
3  * awk implementation for busybox
4  *
5  * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
6  *
7  * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
8  */
9
10 #include "busybox.h"
11 #include "xregex.h"
12 #include <math.h>
13
14
15 #define MAXVARFMT       240
16 #define MINNVBLOCK      64
17
18 /* variable flags */
19 #define VF_NUMBER       0x0001  /* 1 = primary type is number */
20 #define VF_ARRAY        0x0002  /* 1 = it's an array */
21
22 #define VF_CACHED       0x0100  /* 1 = num/str value has cached str/num eq */
23 #define VF_USER         0x0200  /* 1 = user input (may be numeric string) */
24 #define VF_SPECIAL      0x0400  /* 1 = requires extra handling when changed */
25 #define VF_WALK         0x0800  /* 1 = variable has alloc'd x.walker list */
26 #define VF_FSTR         0x1000  /* 1 = string points to fstring buffer */
27 #define VF_CHILD        0x2000  /* 1 = function arg; x.parent points to source */
28 #define VF_DIRTY        0x4000  /* 1 = variable was set explicitly */
29
30 /* these flags are static, don't change them when value is changed */
31 #define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
32
33 /* Variable */
34 typedef struct var_s {
35         unsigned short type;            /* flags */
36         double number;
37         char *string;
38         union {
39                 int aidx;                               /* func arg idx (for compilation stage) */
40                 struct xhash_s *array;  /* array ptr */
41                 struct var_s *parent;   /* for func args, ptr to actual parameter */
42                 char **walker;                  /* list of array elements (for..in) */
43         } x;
44 } var;
45
46 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
47 typedef struct chain_s {
48         struct node_s *first;
49         struct node_s *last;
50         char *programname;
51 } chain;
52
53 /* Function */
54 typedef struct func_s {
55         unsigned short nargs;
56         struct chain_s body;
57 } func;
58
59 /* I/O stream */
60 typedef struct rstream_s {
61         FILE *F;
62         char *buffer;
63         int adv;
64         int size;
65         int pos;
66         unsigned short is_pipe;
67 } rstream;
68
69 typedef struct hash_item_s {
70         union {
71                 struct var_s v;                 /* variable/array hash */
72                 struct rstream_s rs;    /* redirect streams hash */
73                 struct func_s f;                /* functions hash */
74         } data;
75         struct hash_item_s *next;       /* next in chain */
76         char name[1];                           /* really it's longer */
77 } hash_item;
78
79 typedef struct xhash_s {
80         unsigned int nel;                                       /* num of elements */
81         unsigned int csize;                                     /* current hash size */
82         unsigned int nprime;                            /* next hash size in PRIMES[] */
83         unsigned int glen;                                      /* summary length of item names */
84         struct hash_item_s **items;
85 } xhash;
86
87 /* Tree node */
88 typedef struct node_s {
89         uint32_t info;
90         unsigned short lineno;
91         union {
92                 struct node_s *n;
93                 var *v;
94                 int i;
95                 char *s;
96                 regex_t *re;
97         } l;
98         union {
99                 struct node_s *n;
100                 regex_t *ire;
101                 func *f;
102                 int argno;
103         } r;
104         union {
105                 struct node_s *n;
106         } a;
107 } node;
108
109 /* Block of temporary variables */
110 typedef struct nvblock_s {
111         int size;
112         var *pos;
113         struct nvblock_s *prev;
114         struct nvblock_s *next;
115         var nv[0];
116 } nvblock;
117
118 typedef struct tsplitter_s {
119         node n;
120         regex_t re[2];
121 } tsplitter;
122
123 /* simple token classes */
124 /* Order and hex values are very important!!!  See next_token() */
125 #define TC_SEQSTART      1                              /* ( */
126 #define TC_SEQTERM      (1 << 1)                /* ) */
127 #define TC_REGEXP       (1 << 2)                /* /.../ */
128 #define TC_OUTRDR       (1 << 3)                /* | > >> */
129 #define TC_UOPPOST      (1 << 4)                /* unary postfix operator */
130 #define TC_UOPPRE1      (1 << 5)                /* unary prefix operator */
131 #define TC_BINOPX       (1 << 6)                /* two-opnd operator */
132 #define TC_IN           (1 << 7)
133 #define TC_COMMA        (1 << 8)
134 #define TC_PIPE         (1 << 9)                /* input redirection pipe */
135 #define TC_UOPPRE2      (1 << 10)               /* unary prefix operator */
136 #define TC_ARRTERM      (1 << 11)               /* ] */
137 #define TC_GRPSTART     (1 << 12)               /* { */
138 #define TC_GRPTERM      (1 << 13)               /* } */
139 #define TC_SEMICOL      (1 << 14)
140 #define TC_NEWLINE      (1 << 15)
141 #define TC_STATX        (1 << 16)               /* ctl statement (for, next...) */
142 #define TC_WHILE        (1 << 17)
143 #define TC_ELSE         (1 << 18)
144 #define TC_BUILTIN      (1 << 19)
145 #define TC_GETLINE      (1 << 20)
146 #define TC_FUNCDECL     (1 << 21)               /* `function' `func' */
147 #define TC_BEGIN        (1 << 22)
148 #define TC_END          (1 << 23)
149 #define TC_EOF          (1 << 24)
150 #define TC_VARIABLE     (1 << 25)
151 #define TC_ARRAY        (1 << 26)
152 #define TC_FUNCTION     (1 << 27)
153 #define TC_STRING       (1 << 28)
154 #define TC_NUMBER       (1 << 29)
155
156 #define TC_UOPPRE       (TC_UOPPRE1 | TC_UOPPRE2)
157
158 /* combined token classes */
159 #define TC_BINOP        (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
160 #define TC_UNARYOP      (TC_UOPPRE | TC_UOPPOST)
161 #define TC_OPERAND      (TC_VARIABLE | TC_ARRAY | TC_FUNCTION | \
162         TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
163
164 #define TC_STATEMNT     (TC_STATX | TC_WHILE)
165 #define TC_OPTERM       (TC_SEMICOL | TC_NEWLINE)
166
167 /* word tokens, cannot mean something else if not expected */
168 #define TC_WORD         (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN | \
169         TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
170
171 /* discard newlines after these */
172 #define TC_NOTERM       (TC_COMMA | TC_GRPSTART | TC_GRPTERM | \
173         TC_BINOP | TC_OPTERM)
174
175 /* what can expression begin with */
176 #define TC_OPSEQ        (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
177 /* what can group begin with */
178 #define TC_GRPSEQ       (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
179
180 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
181 /* operator is inserted between them */
182 #define TC_CONCAT1      (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM | \
183         TC_STRING | TC_NUMBER | TC_UOPPOST)
184 #define TC_CONCAT2      (TC_OPERAND | TC_UOPPRE)
185
186 #define OF_RES1         0x010000
187 #define OF_RES2         0x020000
188 #define OF_STR1         0x040000
189 #define OF_STR2         0x080000
190 #define OF_NUM1         0x100000
191 #define OF_CHECKED      0x200000
192
193 /* combined operator flags */
194 #define xx      0
195 #define xV      OF_RES2
196 #define xS      (OF_RES2 | OF_STR2)
197 #define Vx      OF_RES1
198 #define VV      (OF_RES1 | OF_RES2)
199 #define Nx      (OF_RES1 | OF_NUM1)
200 #define NV      (OF_RES1 | OF_NUM1 | OF_RES2)
201 #define Sx      (OF_RES1 | OF_STR1)
202 #define SV      (OF_RES1 | OF_STR1 | OF_RES2)
203 #define SS      (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
204
205 #define OPCLSMASK       0xFF00
206 #define OPNMASK         0x007F
207
208 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
209  * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
210  * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
211  */
212 #define P(x)    (x << 24)
213 #define PRIMASK         0x7F000000
214 #define PRIMASK2        0x7E000000
215
216 /* Operation classes */
217
218 #define SHIFT_TIL_THIS  0x0600
219 #define RECUR_FROM_THIS 0x1000
220
221 enum {
222         OC_DELETE=0x0100,       OC_EXEC=0x0200,         OC_NEWSOURCE=0x0300,
223         OC_PRINT=0x0400,        OC_PRINTF=0x0500,       OC_WALKINIT=0x0600,
224
225         OC_BR=0x0700,           OC_BREAK=0x0800,        OC_CONTINUE=0x0900,
226         OC_EXIT=0x0a00,         OC_NEXT=0x0b00,         OC_NEXTFILE=0x0c00,
227         OC_TEST=0x0d00,         OC_WALKNEXT=0x0e00,
228
229         OC_BINARY=0x1000,       OC_BUILTIN=0x1100,      OC_COLON=0x1200,
230         OC_COMMA=0x1300,        OC_COMPARE=0x1400,      OC_CONCAT=0x1500,
231         OC_FBLTIN=0x1600,       OC_FIELD=0x1700,        OC_FNARG=0x1800,
232         OC_FUNC=0x1900,         OC_GETLINE=0x1a00,      OC_IN=0x1b00,
233         OC_LAND=0x1c00,         OC_LOR=0x1d00,          OC_MATCH=0x1e00,
234         OC_MOVE=0x1f00,         OC_PGETLINE=0x2000,     OC_REGEXP=0x2100,
235         OC_REPLACE=0x2200,      OC_RETURN=0x2300,       OC_SPRINTF=0x2400,
236         OC_TERNARY=0x2500,      OC_UNARY=0x2600,        OC_VAR=0x2700,
237         OC_DONE=0x2800,
238
239         ST_IF=0x3000,           ST_DO=0x3100,           ST_FOR=0x3200,
240         ST_WHILE=0x3300
241 };
242
243 /* simple builtins */
244 enum {
245         F_in=0, F_rn,   F_co,   F_ex,   F_lg,   F_si,   F_sq,   F_sr,
246         F_ti,   F_le,   F_sy,   F_ff,   F_cl
247 };
248
249 /* builtins */
250 enum {
251         B_a2=0, B_ix,   B_ma,   B_sp,   B_ss,   B_ti,   B_lo,   B_up,
252         B_ge,   B_gs,   B_su
253 };
254
255 /* tokens and their corresponding info values */
256
257 #define NTC             "\377"          /* switch to next token class (tc<<1) */
258 #define NTCC    '\377'
259
260 #define OC_B    OC_BUILTIN
261
262 static char * const tokenlist =
263         "\1("           NTC
264         "\1)"           NTC
265         "\1/"           NTC                                                                     /* REGEXP */
266         "\2>>"          "\1>"           "\1|"           NTC                     /* OUTRDR */
267         "\2++"          "\2--"          NTC                                             /* UOPPOST */
268         "\2++"          "\2--"          "\1$"           NTC                     /* UOPPRE1 */
269         "\2=="          "\1="           "\2+="          "\2-="          /* BINOPX */
270         "\2*="          "\2/="          "\2%="          "\2^="
271         "\1+"           "\1-"           "\3**="         "\2**"
272         "\1/"           "\1%"           "\1^"           "\1*"
273         "\2!="          "\2>="          "\2<="          "\1>"
274         "\1<"           "\2!~"          "\1~"           "\2&&"
275         "\2||"          "\1?"           "\1:"           NTC
276         "\2in"          NTC
277         "\1,"           NTC
278         "\1|"           NTC
279         "\1+"           "\1-"           "\1!"           NTC                     /* UOPPRE2 */
280         "\1]"           NTC
281         "\1{"           NTC
282         "\1}"           NTC
283         "\1;"           NTC
284         "\1\n"          NTC
285         "\2if"          "\2do"          "\3for"         "\5break"       /* STATX */
286         "\10continue"                   "\6delete"      "\5print"
287         "\6printf"      "\4next"        "\10nextfile"
288         "\6return"      "\4exit"        NTC
289         "\5while"       NTC
290         "\4else"        NTC
291
292         "\5close"       "\6system"      "\6fflush"      "\5atan2"       /* BUILTIN */
293         "\3cos"         "\3exp"         "\3int"         "\3log"
294         "\4rand"        "\3sin"         "\4sqrt"        "\5srand"
295         "\6gensub"      "\4gsub"        "\5index"       "\6length"
296         "\5match"       "\5split"       "\7sprintf"     "\3sub"
297         "\6substr"      "\7systime"     "\10strftime"
298         "\7tolower"     "\7toupper"     NTC
299         "\7getline"     NTC
300         "\4func"        "\10function"   NTC
301         "\5BEGIN"       NTC
302         "\3END"         "\0"
303         ;
304
305 static const uint32_t tokeninfo[] = {
306
307         0,
308         0,
309         OC_REGEXP,
310         xS|'a',         xS|'w',         xS|'|',
311         OC_UNARY|xV|P(9)|'p',           OC_UNARY|xV|P(9)|'m',
312         OC_UNARY|xV|P(9)|'P',           OC_UNARY|xV|P(9)|'M',
313                 OC_FIELD|xV|P(5),
314         OC_COMPARE|VV|P(39)|5,          OC_MOVE|VV|P(74),
315                 OC_REPLACE|NV|P(74)|'+',        OC_REPLACE|NV|P(74)|'-',
316         OC_REPLACE|NV|P(74)|'*',        OC_REPLACE|NV|P(74)|'/',
317                 OC_REPLACE|NV|P(74)|'%',        OC_REPLACE|NV|P(74)|'&',
318         OC_BINARY|NV|P(29)|'+',         OC_BINARY|NV|P(29)|'-',
319                 OC_REPLACE|NV|P(74)|'&',        OC_BINARY|NV|P(15)|'&',
320         OC_BINARY|NV|P(25)|'/',         OC_BINARY|NV|P(25)|'%',
321                 OC_BINARY|NV|P(15)|'&',         OC_BINARY|NV|P(25)|'*',
322         OC_COMPARE|VV|P(39)|4,          OC_COMPARE|VV|P(39)|3,
323                 OC_COMPARE|VV|P(39)|0,          OC_COMPARE|VV|P(39)|1,
324         OC_COMPARE|VV|P(39)|2,          OC_MATCH|Sx|P(45)|'!',
325                 OC_MATCH|Sx|P(45)|'~',          OC_LAND|Vx|P(55),
326         OC_LOR|Vx|P(59),                        OC_TERNARY|Vx|P(64)|'?',
327                 OC_COLON|xx|P(67)|':',
328         OC_IN|SV|P(49),
329         OC_COMMA|SS|P(80),
330         OC_PGETLINE|SV|P(37),
331         OC_UNARY|xV|P(19)|'+',          OC_UNARY|xV|P(19)|'-',
332                 OC_UNARY|xV|P(19)|'!',
333         0,
334         0,
335         0,
336         0,
337         0,
338         ST_IF,                  ST_DO,                  ST_FOR,                 OC_BREAK,
339         OC_CONTINUE,                                    OC_DELETE|Vx,   OC_PRINT,
340         OC_PRINTF,              OC_NEXT,                OC_NEXTFILE,
341         OC_RETURN|Vx,   OC_EXIT|Nx,
342         ST_WHILE,
343         0,
344
345         OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
346         OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
347         OC_FBLTIN|F_rn,    OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
348         OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
349         OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF,        OC_B|B_su|P(0xb6),
350         OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti,    OC_B|B_ti|P(0x0b),
351         OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
352         OC_GETLINE|SV|P(0),
353         0,      0,
354         0,
355         0
356 };
357
358 /* internal variable names and their initial values       */
359 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
360 enum {
361         CONVFMT=0,      OFMT,           FS,                     OFS,
362         ORS,            RS,                     RT,                     FILENAME,
363         SUBSEP,         ARGIND,         ARGC,           ARGV,
364         ERRNO,          FNR,
365         NR,                     NF,                     IGNORECASE,
366         ENVIRON,        F0,                     _intvarcount_
367 };
368
369 static char * vNames =
370         "CONVFMT\0"     "OFMT\0"        "FS\0*"         "OFS\0"
371         "ORS\0"         "RS\0*"         "RT\0"          "FILENAME\0"
372         "SUBSEP\0"      "ARGIND\0"      "ARGC\0"        "ARGV\0"
373         "ERRNO\0"       "FNR\0"
374         "NR\0"          "NF\0*"         "IGNORECASE\0*"
375         "ENVIRON\0"     "$\0*"          "\0";
376
377 static char * vValues =
378         "%.6g\0"        "%.6g\0"        " \0"           " \0"
379         "\n\0"          "\n\0"          "\0"            "\0"
380         "\034\0"
381         "\377";
382
383 /* hash size may grow to these values */
384 #define FIRST_PRIME 61;
385 static const unsigned int PRIMES[] = { 251, 1021, 4093, 16381, 65521 };
386 enum { NPRIMES = sizeof(PRIMES) / sizeof(unsigned int) };
387
388 /* globals */
389
390 extern char **environ;
391
392 static var * V[_intvarcount_];
393 static chain beginseq, mainseq, endseq, *seq;
394 static int nextrec, nextfile;
395 static node *break_ptr, *continue_ptr;
396 static rstream *iF;
397 static xhash *vhash, *ahash, *fdhash, *fnhash;
398 static char *programname;
399 static short lineno;
400 static int is_f0_split;
401 static int nfields;
402 static var *Fields;
403 static tsplitter fsplitter, rsplitter;
404 static nvblock *cb;
405 static char *pos;
406 static char *buf;
407 static int icase;
408 static int exiting;
409
410 static struct {
411         uint32_t tclass;
412         uint32_t info;
413         char *string;
414         double number;
415         short lineno;
416         int rollback;
417 } t;
418
419 /* function prototypes */
420 static void handle_special(var *);
421 static node *parse_expr(uint32_t);
422 static void chain_group(void);
423 static var *evaluate(node *, var *);
424 static rstream *next_input_file(void);
425 static int fmt_num(char *, int, const char *, double, int);
426 static int awk_exit(int) ATTRIBUTE_NORETURN;
427
428 /* ---- error handling ---- */
429
430 static const char EMSG_INTERNAL_ERROR[] = "Internal error";
431 static const char EMSG_UNEXP_EOS[] = "Unexpected end of string";
432 static const char EMSG_UNEXP_TOKEN[] = "Unexpected token";
433 static const char EMSG_DIV_BY_ZERO[] = "Division by zero";
434 static const char EMSG_INV_FMT[] = "Invalid format specifier";
435 static const char EMSG_TOO_FEW_ARGS[] = "Too few arguments for builtin";
436 static const char EMSG_NOT_ARRAY[] = "Not an array";
437 static const char EMSG_POSSIBLE_ERROR[] = "Possible syntax error";
438 static const char EMSG_UNDEF_FUNC[] = "Call to undefined function";
439 #ifndef CONFIG_FEATURE_AWK_MATH
440 static const char EMSG_NO_MATH[] = "Math support is not compiled in";
441 #endif
442
443 static void syntax_error(const char * const message) ATTRIBUTE_NORETURN;
444 static void syntax_error(const char * const message)
445 {
446         bb_error_msg_and_die("%s:%i: %s", programname, lineno, message);
447 }
448
449 #define runtime_error(x) syntax_error(x)
450
451
452 /* ---- hash stuff ---- */
453
454 static unsigned int hashidx(const char *name)
455 {
456         unsigned int idx=0;
457
458         while (*name)  idx = *name++ + (idx << 6) - idx;
459         return idx;
460 }
461
462 /* create new hash */
463 static xhash *hash_init(void)
464 {
465         xhash *newhash;
466
467         newhash = (xhash *)xzalloc(sizeof(xhash));
468         newhash->csize = FIRST_PRIME;
469         newhash->items = (hash_item **)xzalloc(newhash->csize * sizeof(hash_item *));
470
471         return newhash;
472 }
473
474 /* find item in hash, return ptr to data, NULL if not found */
475 static void *hash_search(xhash *hash, const char *name)
476 {
477         hash_item *hi;
478
479         hi = hash->items [ hashidx(name) % hash->csize ];
480         while (hi) {
481                 if (strcmp(hi->name, name) == 0)
482                         return &(hi->data);
483                 hi = hi->next;
484         }
485         return NULL;
486 }
487
488 /* grow hash if it becomes too big */
489 static void hash_rebuild(xhash *hash)
490 {
491         unsigned int newsize, i, idx;
492         hash_item **newitems, *hi, *thi;
493
494         if (hash->nprime == NPRIMES)
495                 return;
496
497         newsize = PRIMES[hash->nprime++];
498         newitems = (hash_item **)xzalloc(newsize * sizeof(hash_item *));
499
500         for (i=0; i<hash->csize; i++) {
501                 hi = hash->items[i];
502                 while (hi) {
503                         thi = hi;
504                         hi = thi->next;
505                         idx = hashidx(thi->name) % newsize;
506                         thi->next = newitems[idx];
507                         newitems[idx] = thi;
508                 }
509         }
510
511         free(hash->items);
512         hash->csize = newsize;
513         hash->items = newitems;
514 }
515
516 /* find item in hash, add it if necessary. Return ptr to data */
517 static void *hash_find(xhash *hash, const char *name)
518 {
519         hash_item *hi;
520         unsigned int idx;
521         int l;
522
523         hi = hash_search(hash, name);
524         if (! hi) {
525                 if (++hash->nel / hash->csize > 10)
526                         hash_rebuild(hash);
527
528                 l = strlen(name) + 1;
529                 hi = xzalloc(sizeof(hash_item) + l);
530                 memcpy(hi->name, name, l);
531
532                 idx = hashidx(name) % hash->csize;
533                 hi->next = hash->items[idx];
534                 hash->items[idx] = hi;
535                 hash->glen += l;
536         }
537         return &(hi->data);
538 }
539
540 #define findvar(hash, name) (var *) hash_find ( (hash) , (name) )
541 #define newvar(name) (var *) hash_find ( vhash , (name) )
542 #define newfile(name) (rstream *) hash_find ( fdhash , (name) )
543 #define newfunc(name) (func *) hash_find ( fnhash , (name) )
544
545 static void hash_remove(xhash *hash, const char *name)
546 {
547         hash_item *hi, **phi;
548
549         phi = &(hash->items[ hashidx(name) % hash->csize ]);
550         while (*phi) {
551                 hi = *phi;
552                 if (strcmp(hi->name, name) == 0) {
553                         hash->glen -= (strlen(name) + 1);
554                         hash->nel--;
555                         *phi = hi->next;
556                         free(hi);
557                         break;
558                 }
559                 phi = &(hi->next);
560         }
561 }
562
563 /* ------ some useful functions ------ */
564
565 static void skip_spaces(char **s)
566 {
567         char *p = *s;
568
569         while(*p == ' ' || *p == '\t' ||
570                         (*p == '\\' && *(p+1) == '\n' && (++p, ++t.lineno))) {
571                 p++;
572         }
573         *s = p;
574 }
575
576 static char *nextword(char **s)
577 {
578         char *p = *s;
579
580         while (*(*s)++) ;
581
582         return p;
583 }
584
585 static char nextchar(char **s)
586 {
587         char c, *pps;
588
589         c = *((*s)++);
590         pps = *s;
591         if (c == '\\') c = bb_process_escape_sequence((const char**)s);
592         if (c == '\\' && *s == pps) c = *((*s)++);
593         return c;
594 }
595
596 static inline int isalnum_(int c)
597 {
598         return (isalnum(c) || c == '_');
599 }
600
601 static FILE *afopen(const char *path, const char *mode)
602 {
603         return (*path == '-' && *(path+1) == '\0') ? stdin : xfopen(path, mode);
604 }
605
606 /* -------- working with variables (set/get/copy/etc) -------- */
607
608 static xhash *iamarray(var *v)
609 {
610         var *a = v;
611
612         while (a->type & VF_CHILD)
613                 a = a->x.parent;
614
615         if (! (a->type & VF_ARRAY)) {
616                 a->type |= VF_ARRAY;
617                 a->x.array = hash_init();
618         }
619         return a->x.array;
620 }
621
622 static void clear_array(xhash *array)
623 {
624         unsigned int i;
625         hash_item *hi, *thi;
626
627         for (i=0; i<array->csize; i++) {
628                 hi = array->items[i];
629                 while (hi) {
630                         thi = hi;
631                         hi = hi->next;
632                         free(thi->data.v.string);
633                         free(thi);
634                 }
635                 array->items[i] = NULL;
636         }
637         array->glen = array->nel = 0;
638 }
639
640 /* clear a variable */
641 static var *clrvar(var *v)
642 {
643         if (!(v->type & VF_FSTR))
644                 free(v->string);
645
646         v->type &= VF_DONTTOUCH;
647         v->type |= VF_DIRTY;
648         v->string = NULL;
649         return v;
650 }
651
652 /* assign string value to variable */
653 static var *setvar_p(var *v, char *value)
654 {
655         clrvar(v);
656         v->string = value;
657         handle_special(v);
658
659         return v;
660 }
661
662 /* same as setvar_p but make a copy of string */
663 static var *setvar_s(var *v, const char *value)
664 {
665         return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
666 }
667
668 /* same as setvar_s but set USER flag */
669 static var *setvar_u(var *v, const char *value)
670 {
671         setvar_s(v, value);
672         v->type |= VF_USER;
673         return v;
674 }
675
676 /* set array element to user string */
677 static void setari_u(var *a, int idx, const char *s)
678 {
679         var *v;
680         static char sidx[12];
681
682         sprintf(sidx, "%d", idx);
683         v = findvar(iamarray(a), sidx);
684         setvar_u(v, s);
685 }
686
687 /* assign numeric value to variable */
688 static var *setvar_i(var *v, double value)
689 {
690         clrvar(v);
691         v->type |= VF_NUMBER;
692         v->number = value;
693         handle_special(v);
694         return v;
695 }
696
697 static char *getvar_s(var *v)
698 {
699         /* if v is numeric and has no cached string, convert it to string */
700         if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
701                 fmt_num(buf, MAXVARFMT, getvar_s(V[CONVFMT]), v->number, TRUE);
702                 v->string = xstrdup(buf);
703                 v->type |= VF_CACHED;
704         }
705         return (v->string == NULL) ? "" : v->string;
706 }
707
708 static double getvar_i(var *v)
709 {
710         char *s;
711
712         if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
713                 v->number = 0;
714                 s = v->string;
715                 if (s && *s) {
716                         v->number = strtod(s, &s);
717                         if (v->type & VF_USER) {
718                                 skip_spaces(&s);
719                                 if (*s != '\0')
720                                         v->type &= ~VF_USER;
721                         }
722                 } else {
723                         v->type &= ~VF_USER;
724                 }
725                 v->type |= VF_CACHED;
726         }
727         return v->number;
728 }
729
730 static var *copyvar(var *dest, const var *src)
731 {
732         if (dest != src) {
733                 clrvar(dest);
734                 dest->type |= (src->type & ~VF_DONTTOUCH);
735                 dest->number = src->number;
736                 if (src->string)
737                         dest->string = xstrdup(src->string);
738         }
739         handle_special(dest);
740         return dest;
741 }
742
743 static var *incvar(var *v)
744 {
745         return setvar_i(v, getvar_i(v)+1.);
746 }
747
748 /* return true if v is number or numeric string */
749 static int is_numeric(var *v)
750 {
751         getvar_i(v);
752         return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
753 }
754
755 /* return 1 when value of v corresponds to true, 0 otherwise */
756 static int istrue(var *v)
757 {
758         if (is_numeric(v))
759                 return (v->number == 0) ? 0 : 1;
760         else
761                 return (v->string && *(v->string)) ? 1 : 0;
762 }
763
764 /* temporary variables allocator. Last allocated should be first freed */
765 static var *nvalloc(int n)
766 {
767         nvblock *pb = NULL;
768         var *v, *r;
769         int size;
770
771         while (cb) {
772                 pb = cb;
773                 if ((cb->pos - cb->nv) + n <= cb->size) break;
774                 cb = cb->next;
775         }
776
777         if (! cb) {
778                 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
779                 cb = (nvblock *)xmalloc(sizeof(nvblock) + size * sizeof(var));
780                 cb->size = size;
781                 cb->pos = cb->nv;
782                 cb->prev = pb;
783                 cb->next = NULL;
784                 if (pb) pb->next = cb;
785         }
786
787         v = r = cb->pos;
788         cb->pos += n;
789
790         while (v < cb->pos) {
791                 v->type = 0;
792                 v->string = NULL;
793                 v++;
794         }
795
796         return r;
797 }
798
799 static void nvfree(var *v)
800 {
801         var *p;
802
803         if (v < cb->nv || v >= cb->pos)
804                 runtime_error(EMSG_INTERNAL_ERROR);
805
806         for (p=v; p<cb->pos; p++) {
807                 if ((p->type & (VF_ARRAY|VF_CHILD)) == VF_ARRAY) {
808                         clear_array(iamarray(p));
809                         free(p->x.array->items);
810                         free(p->x.array);
811                 }
812                 if (p->type & VF_WALK)
813                         free(p->x.walker);
814
815                 clrvar(p);
816         }
817
818         cb->pos = v;
819         while (cb->prev && cb->pos == cb->nv) {
820                 cb = cb->prev;
821         }
822 }
823
824 /* ------- awk program text parsing ------- */
825
826 /* Parse next token pointed by global pos, place results into global t.
827  * If token isn't expected, give away. Return token class
828  */
829 static uint32_t next_token(uint32_t expected)
830 {
831         char *p, *pp, *s;
832         char *tl;
833         uint32_t tc;
834         const uint32_t *ti;
835         int l;
836         static int concat_inserted;
837         static uint32_t save_tclass, save_info;
838         static uint32_t ltclass = TC_OPTERM;
839
840         if (t.rollback) {
841
842                 t.rollback = FALSE;
843
844         } else if (concat_inserted) {
845
846                 concat_inserted = FALSE;
847                 t.tclass = save_tclass;
848                 t.info = save_info;
849
850         } else {
851
852                 p = pos;
853
854         readnext:
855                 skip_spaces(&p);
856                 lineno = t.lineno;
857                 if (*p == '#')
858                         while (*p != '\n' && *p != '\0') p++;
859
860                 if (*p == '\n')
861                         t.lineno++;
862
863                 if (*p == '\0') {
864                         tc = TC_EOF;
865
866                 } else if (*p == '\"') {
867                         /* it's a string */
868                         t.string = s = ++p;
869                         while (*p != '\"') {
870                                 if (*p == '\0' || *p == '\n')
871                                         syntax_error(EMSG_UNEXP_EOS);
872                                 *(s++) = nextchar(&p);
873                         }
874                         p++;
875                         *s = '\0';
876                         tc = TC_STRING;
877
878                 } else if ((expected & TC_REGEXP) && *p == '/') {
879                         /* it's regexp */
880                         t.string = s = ++p;
881                         while (*p != '/') {
882                                 if (*p == '\0' || *p == '\n')
883                                         syntax_error(EMSG_UNEXP_EOS);
884                                 if ((*s++ = *p++) == '\\') {
885                                         pp = p;
886                                         *(s-1) = bb_process_escape_sequence((const char **)&p);
887                                         if (*pp == '\\') *s++ = '\\';
888                                         if (p == pp) *s++ = *p++;
889                                 }
890                         }
891                         p++;
892                         *s = '\0';
893                         tc = TC_REGEXP;
894
895                 } else if (*p == '.' || isdigit(*p)) {
896                         /* it's a number */
897                         t.number = strtod(p, &p);
898                         if (*p == '.')
899                                 syntax_error(EMSG_UNEXP_TOKEN);
900                         tc = TC_NUMBER;
901
902                 } else {
903                         /* search for something known */
904                         tl = tokenlist;
905                         tc = 0x00000001;
906                         ti = tokeninfo;
907                         while (*tl) {
908                                 l = *(tl++);
909                                 if (l == NTCC) {
910                                         tc <<= 1;
911                                         continue;
912                                 }
913                                 /* if token class is expected, token
914                                  * matches and it's not a longer word,
915                                  * then this is what we are looking for
916                                  */
917                                 if ((tc & (expected | TC_WORD | TC_NEWLINE)) &&
918                                 *tl == *p && strncmp(p, tl, l) == 0 &&
919                                 !((tc & TC_WORD) && isalnum_(*(p + l)))) {
920                                         t.info = *ti;
921                                         p += l;
922                                         break;
923                                 }
924                                 ti++;
925                                 tl += l;
926                         }
927
928                         if (! *tl) {
929                                 /* it's a name (var/array/function),
930                                  * otherwise it's something wrong
931                                  */
932                                 if (! isalnum_(*p))
933                                         syntax_error(EMSG_UNEXP_TOKEN);
934
935                                 t.string = --p;
936                                 while(isalnum_(*(++p))) {
937                                         *(p-1) = *p;
938                                 }
939                                 *(p-1) = '\0';
940                                 tc = TC_VARIABLE;
941                                 /* also consume whitespace between functionname and bracket */
942                                 if (! (expected & TC_VARIABLE)) skip_spaces(&p);
943                                 if (*p == '(') {
944                                         tc = TC_FUNCTION;
945                                 } else {
946                                         if (*p == '[') {
947                                                 p++;
948                                                 tc = TC_ARRAY;
949                                         }
950                                 }
951                         }
952                 }
953                 pos = p;
954
955                 /* skipping newlines in some cases */
956                 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
957                         goto readnext;
958
959                 /* insert concatenation operator when needed */
960                 if ((ltclass&TC_CONCAT1) && (tc&TC_CONCAT2) && (expected&TC_BINOP)) {
961                         concat_inserted = TRUE;
962                         save_tclass = tc;
963                         save_info = t.info;
964                         tc = TC_BINOP;
965                         t.info = OC_CONCAT | SS | P(35);
966                 }
967
968                 t.tclass = tc;
969         }
970         ltclass = t.tclass;
971
972         /* Are we ready for this? */
973         if (! (ltclass & expected))
974                 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
975                                                                 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
976
977         return ltclass;
978 }
979
980 static void rollback_token(void) { t.rollback = TRUE; }
981
982 static node *new_node(uint32_t info)
983 {
984         node *n;
985
986         n = (node *)xzalloc(sizeof(node));
987         n->info = info;
988         n->lineno = lineno;
989         return n;
990 }
991
992 static node *mk_re_node(char *s, node *n, regex_t *re)
993 {
994         n->info = OC_REGEXP;
995         n->l.re = re;
996         n->r.ire = re + 1;
997         xregcomp(re, s, REG_EXTENDED);
998         xregcomp(re+1, s, REG_EXTENDED | REG_ICASE);
999
1000         return n;
1001 }
1002
1003 static node *condition(void)
1004 {
1005         next_token(TC_SEQSTART);
1006         return parse_expr(TC_SEQTERM);
1007 }
1008
1009 /* parse expression terminated by given argument, return ptr
1010  * to built subtree. Terminator is eaten by parse_expr */
1011 static node *parse_expr(uint32_t iexp)
1012 {
1013         node sn;
1014         node *cn = &sn;
1015         node *vn, *glptr;
1016         uint32_t tc, xtc;
1017         var *v;
1018
1019         sn.info = PRIMASK;
1020         sn.r.n = glptr = NULL;
1021         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1022
1023         while (! ((tc = next_token(xtc)) & iexp)) {
1024                 if (glptr && (t.info == (OC_COMPARE|VV|P(39)|2))) {
1025                         /* input redirection (<) attached to glptr node */
1026                         cn = glptr->l.n = new_node(OC_CONCAT|SS|P(37));
1027                         cn->a.n = glptr;
1028                         xtc = TC_OPERAND | TC_UOPPRE;
1029                         glptr = NULL;
1030
1031                 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1032                         /* for binary and postfix-unary operators, jump back over
1033                          * previous operators with higher priority */
1034                         vn = cn;
1035                         while ( ((t.info & PRIMASK) > (vn->a.n->info & PRIMASK2)) ||
1036                           ((t.info == vn->info) && ((t.info & OPCLSMASK) == OC_COLON)) )
1037                                 vn = vn->a.n;
1038                         if ((t.info & OPCLSMASK) == OC_TERNARY)
1039                                 t.info += P(6);
1040                         cn = vn->a.n->r.n = new_node(t.info);
1041                         cn->a.n = vn->a.n;
1042                         if (tc & TC_BINOP) {
1043                                 cn->l.n = vn;
1044                                 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1045                                 if ((t.info & OPCLSMASK) == OC_PGETLINE) {
1046                                         /* it's a pipe */
1047                                         next_token(TC_GETLINE);
1048                                         /* give maximum priority to this pipe */
1049                                         cn->info &= ~PRIMASK;
1050                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1051                                 }
1052                         } else {
1053                                 cn->r.n = vn;
1054                                 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1055                         }
1056                         vn->a.n = cn;
1057
1058                 } else {
1059                         /* for operands and prefix-unary operators, attach them
1060                          * to last node */
1061                         vn = cn;
1062                         cn = vn->r.n = new_node(t.info);
1063                         cn->a.n = vn;
1064                         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1065                         if (tc & (TC_OPERAND | TC_REGEXP)) {
1066                                 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1067                                 /* one should be very careful with switch on tclass -
1068                                  * only simple tclasses should be used! */
1069                                 switch (tc) {
1070                                   case TC_VARIABLE:
1071                                   case TC_ARRAY:
1072                                         cn->info = OC_VAR;
1073                                         if ((v = hash_search(ahash, t.string)) != NULL) {
1074                                                 cn->info = OC_FNARG;
1075                                                 cn->l.i = v->x.aidx;
1076                                         } else {
1077                                                 cn->l.v = newvar(t.string);
1078                                         }
1079                                         if (tc & TC_ARRAY) {
1080                                                 cn->info |= xS;
1081                                                 cn->r.n = parse_expr(TC_ARRTERM);
1082                                         }
1083                                         break;
1084
1085                                   case TC_NUMBER:
1086                                   case TC_STRING:
1087                                         cn->info = OC_VAR;
1088                                         v = cn->l.v = xzalloc(sizeof(var));
1089                                         if (tc & TC_NUMBER)
1090                                                 setvar_i(v, t.number);
1091                                         else
1092                                                 setvar_s(v, t.string);
1093                                         break;
1094
1095                                   case TC_REGEXP:
1096                                         mk_re_node(t.string, cn,
1097                                                                         (regex_t *)xzalloc(sizeof(regex_t)*2));
1098                                         break;
1099
1100                                   case TC_FUNCTION:
1101                                         cn->info = OC_FUNC;
1102                                         cn->r.f = newfunc(t.string);
1103                                         cn->l.n = condition();
1104                                         break;
1105
1106                                   case TC_SEQSTART:
1107                                         cn = vn->r.n = parse_expr(TC_SEQTERM);
1108                                         cn->a.n = vn;
1109                                         break;
1110
1111                                   case TC_GETLINE:
1112                                         glptr = cn;
1113                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1114                                         break;
1115
1116                                   case TC_BUILTIN:
1117                                         cn->l.n = condition();
1118                                         break;
1119                                 }
1120                         }
1121                 }
1122         }
1123         return sn.r.n;
1124 }
1125
1126 /* add node to chain. Return ptr to alloc'd node */
1127 static node *chain_node(uint32_t info)
1128 {
1129         node *n;
1130
1131         if (! seq->first)
1132                 seq->first = seq->last = new_node(0);
1133
1134         if (seq->programname != programname) {
1135                 seq->programname = programname;
1136                 n = chain_node(OC_NEWSOURCE);
1137                 n->l.s = xstrdup(programname);
1138         }
1139
1140         n = seq->last;
1141         n->info = info;
1142         seq->last = n->a.n = new_node(OC_DONE);
1143
1144         return n;
1145 }
1146
1147 static void chain_expr(uint32_t info)
1148 {
1149         node *n;
1150
1151         n = chain_node(info);
1152         n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1153         if (t.tclass & TC_GRPTERM)
1154                 rollback_token();
1155 }
1156
1157 static node *chain_loop(node *nn)
1158 {
1159         node *n, *n2, *save_brk, *save_cont;
1160
1161         save_brk = break_ptr;
1162         save_cont = continue_ptr;
1163
1164         n = chain_node(OC_BR | Vx);
1165         continue_ptr = new_node(OC_EXEC);
1166         break_ptr = new_node(OC_EXEC);
1167         chain_group();
1168         n2 = chain_node(OC_EXEC | Vx);
1169         n2->l.n = nn;
1170         n2->a.n = n;
1171         continue_ptr->a.n = n2;
1172         break_ptr->a.n = n->r.n = seq->last;
1173
1174         continue_ptr = save_cont;
1175         break_ptr = save_brk;
1176
1177         return n;
1178 }
1179
1180 /* parse group and attach it to chain */
1181 static void chain_group(void)
1182 {
1183         uint32_t c;
1184         node *n, *n2, *n3;
1185
1186         do {
1187                 c = next_token(TC_GRPSEQ);
1188         } while (c & TC_NEWLINE);
1189
1190         if (c & TC_GRPSTART) {
1191                 while(next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1192                         if (t.tclass & TC_NEWLINE) continue;
1193                         rollback_token();
1194                         chain_group();
1195                 }
1196         } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1197                 rollback_token();
1198                 chain_expr(OC_EXEC | Vx);
1199         } else {                                                /* TC_STATEMNT */
1200                 switch (t.info & OPCLSMASK) {
1201                         case ST_IF:
1202                                 n = chain_node(OC_BR | Vx);
1203                                 n->l.n = condition();
1204                                 chain_group();
1205                                 n2 = chain_node(OC_EXEC);
1206                                 n->r.n = seq->last;
1207                                 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE)==TC_ELSE) {
1208                                         chain_group();
1209                                         n2->a.n = seq->last;
1210                                 } else {
1211                                         rollback_token();
1212                                 }
1213                                 break;
1214
1215                         case ST_WHILE:
1216                                 n2 = condition();
1217                                 n = chain_loop(NULL);
1218                                 n->l.n = n2;
1219                                 break;
1220
1221                         case ST_DO:
1222                                 n2 = chain_node(OC_EXEC);
1223                                 n = chain_loop(NULL);
1224                                 n2->a.n = n->a.n;
1225                                 next_token(TC_WHILE);
1226                                 n->l.n = condition();
1227                                 break;
1228
1229                         case ST_FOR:
1230                                 next_token(TC_SEQSTART);
1231                                 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1232                                 if (t.tclass & TC_SEQTERM) {                            /* for-in */
1233                                         if ((n2->info & OPCLSMASK) != OC_IN)
1234                                                 syntax_error(EMSG_UNEXP_TOKEN);
1235                                         n = chain_node(OC_WALKINIT | VV);
1236                                         n->l.n = n2->l.n;
1237                                         n->r.n = n2->r.n;
1238                                         n = chain_loop(NULL);
1239                                         n->info = OC_WALKNEXT | Vx;
1240                                         n->l.n = n2->l.n;
1241                                 } else {                                                                        /* for(;;) */
1242                                         n = chain_node(OC_EXEC | Vx);
1243                                         n->l.n = n2;
1244                                         n2 = parse_expr(TC_SEMICOL);
1245                                         n3 = parse_expr(TC_SEQTERM);
1246                                         n = chain_loop(n3);
1247                                         n->l.n = n2;
1248                                         if (! n2)
1249                                                 n->info = OC_EXEC;
1250                                 }
1251                                 break;
1252
1253                         case OC_PRINT:
1254                         case OC_PRINTF:
1255                                 n = chain_node(t.info);
1256                                 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1257                                 if (t.tclass & TC_OUTRDR) {
1258                                         n->info |= t.info;
1259                                         n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1260                                 }
1261                                 if (t.tclass & TC_GRPTERM)
1262                                         rollback_token();
1263                                 break;
1264
1265                         case OC_BREAK:
1266                                 n = chain_node(OC_EXEC);
1267                                 n->a.n = break_ptr;
1268                                 break;
1269
1270                         case OC_CONTINUE:
1271                                 n = chain_node(OC_EXEC);
1272                                 n->a.n = continue_ptr;
1273                                 break;
1274
1275                         /* delete, next, nextfile, return, exit */
1276                         default:
1277                                 chain_expr(t.info);
1278
1279                 }
1280         }
1281 }
1282
1283 static void parse_program(char *p)
1284 {
1285         uint32_t tclass;
1286         node *cn;
1287         func *f;
1288         var *v;
1289
1290         pos = p;
1291         t.lineno = 1;
1292         while((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1293                                 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1294
1295                 if (tclass & TC_OPTERM)
1296                         continue;
1297
1298                 seq = &mainseq;
1299                 if (tclass & TC_BEGIN) {
1300                         seq = &beginseq;
1301                         chain_group();
1302
1303                 } else if (tclass & TC_END) {
1304                         seq = &endseq;
1305                         chain_group();
1306
1307                 } else if (tclass & TC_FUNCDECL) {
1308                         next_token(TC_FUNCTION);
1309                         pos++;
1310                         f = newfunc(t.string);
1311                         f->body.first = NULL;
1312                         f->nargs = 0;
1313                         while(next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1314                                 v = findvar(ahash, t.string);
1315                                 v->x.aidx = (f->nargs)++;
1316
1317                                 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1318                                         break;
1319                         }
1320                         seq = &(f->body);
1321                         chain_group();
1322                         clear_array(ahash);
1323
1324                 } else if (tclass & TC_OPSEQ) {
1325                         rollback_token();
1326                         cn = chain_node(OC_TEST);
1327                         cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1328                         if (t.tclass & TC_GRPSTART) {
1329                                 rollback_token();
1330                                 chain_group();
1331                         } else {
1332                                 chain_node(OC_PRINT);
1333                         }
1334                         cn->r.n = mainseq.last;
1335
1336                 } else /* if (tclass & TC_GRPSTART) */ {
1337                         rollback_token();
1338                         chain_group();
1339                 }
1340         }
1341 }
1342
1343
1344 /* -------- program execution part -------- */
1345
1346 static node *mk_splitter(char *s, tsplitter *spl)
1347 {
1348         regex_t *re, *ire;
1349         node *n;
1350
1351         re = &spl->re[0];
1352         ire = &spl->re[1];
1353         n = &spl->n;
1354         if ((n->info && OPCLSMASK) == OC_REGEXP) {
1355                 regfree(re);
1356                 regfree(ire);
1357         }
1358         if (strlen(s) > 1) {
1359                 mk_re_node(s, n, re);
1360         } else {
1361                 n->info = (uint32_t) *s;
1362         }
1363
1364         return n;
1365 }
1366
1367 /* use node as a regular expression. Supplied with node ptr and regex_t
1368  * storage space. Return ptr to regex (if result points to preg, it should
1369  * be later regfree'd manually
1370  */
1371 static regex_t *as_regex(node *op, regex_t *preg)
1372 {
1373         var *v;
1374         char *s;
1375
1376         if ((op->info & OPCLSMASK) == OC_REGEXP) {
1377                 return icase ? op->r.ire : op->l.re;
1378         } else {
1379                 v = nvalloc(1);
1380                 s = getvar_s(evaluate(op, v));
1381                 xregcomp(preg, s, icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED);
1382                 nvfree(v);
1383                 return preg;
1384         }
1385 }
1386
1387 /* gradually increasing buffer */
1388 static void qrealloc(char **b, int n, int *size)
1389 {
1390         if (! *b || n >= *size)
1391                 *b = xrealloc(*b, *size = n + (n>>1) + 80);
1392 }
1393
1394 /* resize field storage space */
1395 static void fsrealloc(int size)
1396 {
1397         static int maxfields = 0;
1398         int i;
1399
1400         if (size >= maxfields) {
1401                 i = maxfields;
1402                 maxfields = size + 16;
1403                 Fields = (var *)xrealloc(Fields, maxfields * sizeof(var));
1404                 for (; i<maxfields; i++) {
1405                         Fields[i].type = VF_SPECIAL;
1406                         Fields[i].string = NULL;
1407                 }
1408         }
1409
1410         if (size < nfields) {
1411                 for (i=size; i<nfields; i++) {
1412                         clrvar(Fields+i);
1413                 }
1414         }
1415         nfields = size;
1416 }
1417
1418 static int awk_split(char *s, node *spl, char **slist)
1419 {
1420         int l, n=0;
1421         char c[4];
1422         char *s1;
1423         regmatch_t pmatch[2];
1424
1425         /* in worst case, each char would be a separate field */
1426         *slist = s1 = xstrndup(s, strlen(s) * 2 + 3);
1427
1428         c[0] = c[1] = (char)spl->info;
1429         c[2] = c[3] = '\0';
1430         if (*getvar_s(V[RS]) == '\0') c[2] = '\n';
1431
1432         if ((spl->info & OPCLSMASK) == OC_REGEXP) {             /* regex split */
1433                 while (*s) {
1434                         l = strcspn(s, c+2);
1435                         if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0 &&
1436                         pmatch[0].rm_so <= l) {
1437                                 l = pmatch[0].rm_so;
1438                                 if (pmatch[0].rm_eo == 0) { l++; pmatch[0].rm_eo++; }
1439                         } else {
1440                                 pmatch[0].rm_eo = l;
1441                                 if (*(s+l)) pmatch[0].rm_eo++;
1442                         }
1443
1444                         memcpy(s1, s, l);
1445                         *(s1+l) = '\0';
1446                         nextword(&s1);
1447                         s += pmatch[0].rm_eo;
1448                         n++;
1449                 }
1450         } else if (c[0] == '\0') {              /* null split */
1451                 while(*s) {
1452                         *(s1++) = *(s++);
1453                         *(s1++) = '\0';
1454                         n++;
1455                 }
1456         } else if (c[0] != ' ') {               /* single-character split */
1457                 if (icase) {
1458                         c[0] = toupper(c[0]);
1459                         c[1] = tolower(c[1]);
1460                 }
1461                 if (*s1) n++;
1462                 while ((s1 = strpbrk(s1, c))) {
1463                         *(s1++) = '\0';
1464                         n++;
1465                 }
1466         } else {                                /* space split */
1467                 while (*s) {
1468                         while (isspace(*s)) s++;
1469                         if (! *s) break;
1470                         n++;
1471                         while (*s && !isspace(*s))
1472                                 *(s1++) = *(s++);
1473                         *(s1++) = '\0';
1474                 }
1475         }
1476         return n;
1477 }
1478
1479 static void split_f0(void)
1480 {
1481         static char *fstrings = NULL;
1482         int i, n;
1483         char *s;
1484
1485         if (is_f0_split)
1486                 return;
1487
1488         is_f0_split = TRUE;
1489         free(fstrings);
1490         fsrealloc(0);
1491         n = awk_split(getvar_s(V[F0]), &fsplitter.n, &fstrings);
1492         fsrealloc(n);
1493         s = fstrings;
1494         for (i=0; i<n; i++) {
1495                 Fields[i].string = nextword(&s);
1496                 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1497         }
1498
1499         /* set NF manually to avoid side effects */
1500         clrvar(V[NF]);
1501         V[NF]->type = VF_NUMBER | VF_SPECIAL;
1502         V[NF]->number = nfields;
1503 }
1504
1505 /* perform additional actions when some internal variables changed */
1506 static void handle_special(var *v)
1507 {
1508         int n;
1509         char *b, *sep, *s;
1510         int sl, l, len, i, bsize;
1511
1512         if (! (v->type & VF_SPECIAL))
1513                 return;
1514
1515         if (v == V[NF]) {
1516                 n = (int)getvar_i(v);
1517                 fsrealloc(n);
1518
1519                 /* recalculate $0 */
1520                 sep = getvar_s(V[OFS]);
1521                 sl = strlen(sep);
1522                 b = NULL;
1523                 len = 0;
1524                 for (i=0; i<n; i++) {
1525                         s = getvar_s(&Fields[i]);
1526                         l = strlen(s);
1527                         if (b) {
1528                                 memcpy(b+len, sep, sl);
1529                                 len += sl;
1530                         }
1531                         qrealloc(&b, len+l+sl, &bsize);
1532                         memcpy(b+len, s, l);
1533                         len += l;
1534                 }
1535                 if (b) b[len] = '\0';
1536                 setvar_p(V[F0], b);
1537                 is_f0_split = TRUE;
1538
1539         } else if (v == V[F0]) {
1540                 is_f0_split = FALSE;
1541
1542         } else if (v == V[FS]) {
1543                 mk_splitter(getvar_s(v), &fsplitter);
1544
1545         } else if (v == V[RS]) {
1546                 mk_splitter(getvar_s(v), &rsplitter);
1547
1548         } else if (v == V[IGNORECASE]) {
1549                 icase = istrue(v);
1550
1551         } else {                                                /* $n */
1552                 n = getvar_i(V[NF]);
1553                 setvar_i(V[NF], n > v-Fields ? n : v-Fields+1);
1554                 /* right here v is invalid. Just to note... */
1555         }
1556 }
1557
1558 /* step through func/builtin/etc arguments */
1559 static node *nextarg(node **pn)
1560 {
1561         node *n;
1562
1563         n = *pn;
1564         if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1565                 *pn = n->r.n;
1566                 n = n->l.n;
1567         } else {
1568                 *pn = NULL;
1569         }
1570         return n;
1571 }
1572
1573 static void hashwalk_init(var *v, xhash *array)
1574 {
1575         char **w;
1576         hash_item *hi;
1577         int i;
1578
1579         if (v->type & VF_WALK)
1580                 free(v->x.walker);
1581
1582         v->type |= VF_WALK;
1583         w = v->x.walker = (char **)xzalloc(2 + 2*sizeof(char *) + array->glen);
1584         *w = *(w+1) = (char *)(w + 2);
1585         for (i=0; i<array->csize; i++) {
1586                 hi = array->items[i];
1587                 while(hi) {
1588                         strcpy(*w, hi->name);
1589                         nextword(w);
1590                         hi = hi->next;
1591                 }
1592         }
1593 }
1594
1595 static int hashwalk_next(var *v)
1596 {
1597         char **w;
1598
1599         w = v->x.walker;
1600         if (*(w+1) == *w)
1601                 return FALSE;
1602
1603         setvar_s(v, nextword(w+1));
1604         return TRUE;
1605 }
1606
1607 /* evaluate node, return 1 when result is true, 0 otherwise */
1608 static int ptest(node *pattern)
1609 {
1610         static var v;
1611         return istrue(evaluate(pattern, &v));
1612 }
1613
1614 /* read next record from stream rsm into a variable v */
1615 static int awk_getline(rstream *rsm, var *v)
1616 {
1617         char *b;
1618         regmatch_t pmatch[2];
1619         int a, p, pp=0, size;
1620         int fd, so, eo, r, rp;
1621         char c, *m, *s;
1622
1623         /* we're using our own buffer since we need access to accumulating
1624          * characters
1625          */
1626         fd = fileno(rsm->F);
1627         m = rsm->buffer;
1628         a = rsm->adv;
1629         p = rsm->pos;
1630         size = rsm->size;
1631         c = (char) rsplitter.n.info;
1632         rp = 0;
1633
1634         if (! m) qrealloc(&m, 256, &size);
1635         do {
1636                 b = m + a;
1637                 so = eo = p;
1638                 r = 1;
1639                 if (p > 0) {
1640                         if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1641                                 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1642                                                                                                 b, 1, pmatch, 0) == 0) {
1643                                         so = pmatch[0].rm_so;
1644                                         eo = pmatch[0].rm_eo;
1645                                         if (b[eo] != '\0')
1646                                                 break;
1647                                 }
1648                         } else if (c != '\0') {
1649                                 s = strchr(b+pp, c);
1650                                 if (! s) s = memchr(b+pp, '\0', p - pp);
1651                                 if (s) {
1652                                         so = eo = s-b;
1653                                         eo++;
1654                                         break;
1655                                 }
1656                         } else {
1657                                 while (b[rp] == '\n')
1658                                         rp++;
1659                                 s = strstr(b+rp, "\n\n");
1660                                 if (s) {
1661                                         so = eo = s-b;
1662                                         while (b[eo] == '\n') eo++;
1663                                         if (b[eo] != '\0')
1664                                                 break;
1665                                 }
1666                         }
1667                 }
1668
1669                 if (a > 0) {
1670                         memmove(m, (const void *)(m+a), p+1);
1671                         b = m;
1672                         a = 0;
1673                 }
1674
1675                 qrealloc(&m, a+p+128, &size);
1676                 b = m + a;
1677                 pp = p;
1678                 p += safe_read(fd, b+p, size-p-1);
1679                 if (p < pp) {
1680                         p = 0;
1681                         r = 0;
1682                         setvar_i(V[ERRNO], errno);
1683                 }
1684                 b[p] = '\0';
1685
1686         } while (p > pp);
1687
1688         if (p == 0) {
1689                 r--;
1690         } else {
1691                 c = b[so]; b[so] = '\0';
1692                 setvar_s(v, b+rp);
1693                 v->type |= VF_USER;
1694                 b[so] = c;
1695                 c = b[eo]; b[eo] = '\0';
1696                 setvar_s(V[RT], b+so);
1697                 b[eo] = c;
1698         }
1699
1700         rsm->buffer = m;
1701         rsm->adv = a + eo;
1702         rsm->pos = p - eo;
1703         rsm->size = size;
1704
1705         return r;
1706 }
1707
1708 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1709 {
1710         int r=0;
1711         char c;
1712         const char *s=format;
1713
1714         if (int_as_int && n == (int)n) {
1715                 r = snprintf(b, size, "%d", (int)n);
1716         } else {
1717                 do { c = *s; } while (*s && *++s);
1718                 if (strchr("diouxX", c)) {
1719                         r = snprintf(b, size, format, (int)n);
1720                 } else if (strchr("eEfgG", c)) {
1721                         r = snprintf(b, size, format, n);
1722                 } else {
1723                         runtime_error(EMSG_INV_FMT);
1724                 }
1725         }
1726         return r;
1727 }
1728
1729
1730 /* formatted output into an allocated buffer, return ptr to buffer */
1731 static char *awk_printf(node *n)
1732 {
1733         char *b = NULL;
1734         char *fmt, *s, *s1, *f;
1735         int i, j, incr, bsize;
1736         char c, c1;
1737         var *v, *arg;
1738
1739         v = nvalloc(1);
1740         fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1741
1742         i = 0;
1743         while (*f) {
1744                 s = f;
1745                 while (*f && (*f != '%' || *(++f) == '%'))
1746                         f++;
1747                 while (*f && !isalpha(*f))
1748                         f++;
1749
1750                 incr = (f - s) + MAXVARFMT;
1751                 qrealloc(&b, incr+i, &bsize);
1752                 c = *f; if (c != '\0') f++;
1753                 c1 = *f ; *f = '\0';
1754                 arg = evaluate(nextarg(&n), v);
1755
1756                 j = i;
1757                 if (c == 'c' || !c) {
1758                         i += sprintf(b+i, s,
1759                                         is_numeric(arg) ? (char)getvar_i(arg) : *getvar_s(arg));
1760
1761                 } else if (c == 's') {
1762                     s1 = getvar_s(arg);
1763                         qrealloc(&b, incr+i+strlen(s1), &bsize);
1764                         i += sprintf(b+i, s, s1);
1765
1766                 } else {
1767                         i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1768                 }
1769                 *f = c1;
1770
1771                 /* if there was an error while sprintf, return value is negative */
1772                 if (i < j) i = j;
1773
1774         }
1775
1776         b = xrealloc(b, i+1);
1777         free(fmt);
1778         nvfree(v);
1779         b[i] = '\0';
1780         return b;
1781 }
1782
1783 /* common substitution routine
1784  * replace (nm) substring of (src) that match (n) with (repl), store
1785  * result into (dest), return number of substitutions. If nm=0, replace
1786  * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1787  * subexpression matching (\1-\9)
1788  */
1789 static int awk_sub(node *rn, char *repl, int nm, var *src, var *dest, int ex)
1790 {
1791         char *ds = NULL;
1792         char *sp, *s;
1793         int c, i, j, di, rl, so, eo, nbs, n, dssize;
1794         regmatch_t pmatch[10];
1795         regex_t sreg, *re;
1796
1797         re = as_regex(rn, &sreg);
1798         if (! src) src = V[F0];
1799         if (! dest) dest = V[F0];
1800
1801         i = di = 0;
1802         sp = getvar_s(src);
1803         rl = strlen(repl);
1804         while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0:REG_NOTBOL) == 0) {
1805                 so = pmatch[0].rm_so;
1806                 eo = pmatch[0].rm_eo;
1807
1808                 qrealloc(&ds, di + eo + rl, &dssize);
1809                 memcpy(ds + di, sp, eo);
1810                 di += eo;
1811                 if (++i >= nm) {
1812                         /* replace */
1813                         di -= (eo - so);
1814                         nbs = 0;
1815                         for (s = repl; *s; s++) {
1816                                 ds[di++] = c = *s;
1817                                 if (c == '\\') {
1818                                         nbs++;
1819                                         continue;
1820                                 }
1821                                 if (c == '&' || (ex && c >= '0' && c <= '9')) {
1822                                         di -= ((nbs + 3) >> 1);
1823                                         j = 0;
1824                                         if (c != '&') {
1825                                                 j = c - '0';
1826                                                 nbs++;
1827                                         }
1828                                         if (nbs % 2) {
1829                                                 ds[di++] = c;
1830                                         } else {
1831                                                 n = pmatch[j].rm_eo - pmatch[j].rm_so;
1832                                                 qrealloc(&ds, di + rl + n, &dssize);
1833                                                 memcpy(ds + di, sp + pmatch[j].rm_so, n);
1834                                                 di += n;
1835                                         }
1836                                 }
1837                                 nbs = 0;
1838                         }
1839                 }
1840
1841                 sp += eo;
1842                 if (i == nm) break;
1843                 if (eo == so) {
1844                         if (! (ds[di++] = *sp++)) break;
1845                 }
1846         }
1847
1848         qrealloc(&ds, di + strlen(sp), &dssize);
1849         strcpy(ds + di, sp);
1850         setvar_p(dest, ds);
1851         if (re == &sreg) regfree(re);
1852         return i;
1853 }
1854
1855 static var *exec_builtin(node *op, var *res)
1856 {
1857         int (*to_xxx)(int);
1858         var *tv;
1859         node *an[4];
1860         var  *av[4];
1861         char *as[4];
1862         regmatch_t pmatch[2];
1863         regex_t sreg, *re;
1864         static tsplitter tspl;
1865         node *spl;
1866         uint32_t isr, info;
1867         int nargs;
1868         time_t tt;
1869         char *s, *s1;
1870         int i, l, ll, n;
1871
1872         tv = nvalloc(4);
1873         isr = info = op->info;
1874         op = op->l.n;
1875
1876         av[2] = av[3] = NULL;
1877         for (i=0 ; i<4 && op ; i++) {
1878                 an[i] = nextarg(&op);
1879                 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
1880                 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
1881                 isr >>= 1;
1882         }
1883
1884         nargs = i;
1885         if (nargs < (info >> 30))
1886                 runtime_error(EMSG_TOO_FEW_ARGS);
1887
1888         switch (info & OPNMASK) {
1889
1890           case B_a2:
1891 #ifdef CONFIG_FEATURE_AWK_MATH
1892                 setvar_i(res, atan2(getvar_i(av[i]), getvar_i(av[1])));
1893 #else
1894                 runtime_error(EMSG_NO_MATH);
1895 #endif
1896                 break;
1897
1898           case B_sp:
1899                 if (nargs > 2) {
1900                         spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
1901                                 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
1902                 } else {
1903                         spl = &fsplitter.n;
1904                 }
1905
1906                 n = awk_split(as[0], spl, &s);
1907                 s1 = s;
1908                 clear_array(iamarray(av[1]));
1909                 for (i=1; i<=n; i++)
1910                         setari_u(av[1], i, nextword(&s1));
1911                 free(s);
1912                 setvar_i(res, n);
1913                 break;
1914
1915           case B_ss:
1916                 l = strlen(as[0]);
1917                 i = getvar_i(av[1]) - 1;
1918                 if (i>l) i=l; if (i<0) i=0;
1919                 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
1920                 if (n<0) n=0;
1921                 s = xmalloc(n+1);
1922                 strncpy(s, as[0]+i, n);
1923                 s[n] = '\0';
1924                 setvar_p(res, s);
1925                 break;
1926
1927           case B_lo:
1928                 to_xxx = tolower;
1929                 goto lo_cont;
1930
1931           case B_up:
1932                 to_xxx = toupper;
1933 lo_cont:
1934                 s1 = s = xstrdup(as[0]);
1935                 while (*s1) {
1936                         *s1 = (*to_xxx)(*s1);
1937                         s1++;
1938                 }
1939                 setvar_p(res, s);
1940                 break;
1941
1942           case B_ix:
1943                 n = 0;
1944                 ll = strlen(as[1]);
1945                 l = strlen(as[0]) - ll;
1946                 if (ll > 0 && l >= 0) {
1947                         if (! icase) {
1948                                 s = strstr(as[0], as[1]);
1949                                 if (s) n = (s - as[0]) + 1;
1950                         } else {
1951                                 /* this piece of code is terribly slow and
1952                                  * really should be rewritten
1953                                  */
1954                                 for (i=0; i<=l; i++) {
1955                                         if (strncasecmp(as[0]+i, as[1], ll) == 0) {
1956                                                 n = i+1;
1957                                                 break;
1958                                         }
1959                                 }
1960                         }
1961                 }
1962                 setvar_i(res, n);
1963                 break;
1964
1965           case B_ti:
1966                 if (nargs > 1)
1967                         tt = getvar_i(av[1]);
1968                 else
1969                         time(&tt);
1970                 s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
1971                 i = strftime(buf, MAXVARFMT, s, localtime(&tt));
1972                 buf[i] = '\0';
1973                 setvar_s(res, buf);
1974                 break;
1975
1976           case B_ma:
1977                 re = as_regex(an[1], &sreg);
1978                 n = regexec(re, as[0], 1, pmatch, 0);
1979                 if (n == 0) {
1980                         pmatch[0].rm_so++;
1981                         pmatch[0].rm_eo++;
1982                 } else {
1983                         pmatch[0].rm_so = 0;
1984                         pmatch[0].rm_eo = -1;
1985                 }
1986                 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
1987                 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
1988                 setvar_i(res, pmatch[0].rm_so);
1989                 if (re == &sreg) regfree(re);
1990                 break;
1991
1992           case B_ge:
1993                 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
1994                 break;
1995
1996           case B_gs:
1997                 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
1998                 break;
1999
2000           case B_su:
2001                 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2002                 break;
2003         }
2004
2005         nvfree(tv);
2006         return res;
2007 }
2008
2009 /*
2010  * Evaluate node - the heart of the program. Supplied with subtree
2011  * and place where to store result. returns ptr to result.
2012  */
2013 #define XC(n) ((n) >> 8)
2014
2015 static var *evaluate(node *op, var *res)
2016 {
2017         /* This procedure is recursive so we should count every byte */
2018         static var *fnargs = NULL;
2019         static unsigned int seed = 1;
2020         static regex_t sreg;
2021         node *op1;
2022         var *v1;
2023         union {
2024                 var *v;
2025                 char *s;
2026                 double d;
2027                 int i;
2028         } L, R;
2029         uint32_t opinfo;
2030         short opn;
2031         union {
2032                 char *s;
2033                 rstream *rsm;
2034                 FILE *F;
2035                 var *v;
2036                 regex_t *re;
2037                 uint32_t info;
2038         } X;
2039
2040         if (! op)
2041                 return setvar_s(res, NULL);
2042
2043         v1 = nvalloc(2);
2044
2045         while (op) {
2046
2047                 opinfo = op->info;
2048                 opn = (short)(opinfo & OPNMASK);
2049                 lineno = op->lineno;
2050
2051                 /* execute inevitable things */
2052                 op1 = op->l.n;
2053                 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2054                 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2055                 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2056                 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2057                 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2058
2059                 switch (XC(opinfo & OPCLSMASK)) {
2060
2061                   /* -- iterative node type -- */
2062
2063                   /* test pattern */
2064                   case XC( OC_TEST ):
2065                         if ((op1->info & OPCLSMASK) == OC_COMMA) {
2066                                 /* it's range pattern */
2067                                 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2068                                         op->info |= OF_CHECKED;
2069                                         if (ptest(op1->r.n))
2070                                                 op->info &= ~OF_CHECKED;
2071
2072                                         op = op->a.n;
2073                                 } else {
2074                                         op = op->r.n;
2075                                 }
2076                         } else {
2077                                 op = (ptest(op1)) ? op->a.n : op->r.n;
2078                         }
2079                         break;
2080
2081                   /* just evaluate an expression, also used as unconditional jump */
2082                   case XC( OC_EXEC ):
2083                         break;
2084
2085                   /* branch, used in if-else and various loops */
2086                   case XC( OC_BR ):
2087                         op = istrue(L.v) ? op->a.n : op->r.n;
2088                         break;
2089
2090                   /* initialize for-in loop */
2091                   case XC( OC_WALKINIT ):
2092                         hashwalk_init(L.v, iamarray(R.v));
2093                         break;
2094
2095                   /* get next array item */
2096                   case XC( OC_WALKNEXT ):
2097                         op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2098                         break;
2099
2100                   case XC( OC_PRINT ):
2101                   case XC( OC_PRINTF ):
2102                         X.F = stdout;
2103                         if (op->r.n) {
2104                                 X.rsm = newfile(R.s);
2105                                 if (! X.rsm->F) {
2106                                         if (opn == '|') {
2107                                                 if((X.rsm->F = popen(R.s, "w")) == NULL)
2108                                                         bb_perror_msg_and_die("popen");
2109                                                 X.rsm->is_pipe = 1;
2110                                         } else {
2111                                                 X.rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2112                                         }
2113                                 }
2114                                 X.F = X.rsm->F;
2115                         }
2116
2117                         if ((opinfo & OPCLSMASK) == OC_PRINT) {
2118                                 if (! op1) {
2119                                         fputs(getvar_s(V[F0]), X.F);
2120                                 } else {
2121                                         while (op1) {
2122                                                 L.v = evaluate(nextarg(&op1), v1);
2123                                                 if (L.v->type & VF_NUMBER) {
2124                                                         fmt_num(buf, MAXVARFMT, getvar_s(V[OFMT]),
2125                                                                                                                 getvar_i(L.v), TRUE);
2126                                                         fputs(buf, X.F);
2127                                                 } else {
2128                                                         fputs(getvar_s(L.v), X.F);
2129                                                 }
2130
2131                                                 if (op1) fputs(getvar_s(V[OFS]), X.F);
2132                                         }
2133                                 }
2134                                 fputs(getvar_s(V[ORS]), X.F);
2135
2136                         } else {        /* OC_PRINTF */
2137                                 L.s = awk_printf(op1);
2138                                 fputs(L.s, X.F);
2139                                 free(L.s);
2140                         }
2141                         fflush(X.F);
2142                         break;
2143
2144                   case XC( OC_DELETE ):
2145                         X.info = op1->info & OPCLSMASK;
2146                         if (X.info == OC_VAR) {
2147                                 R.v = op1->l.v;
2148                         } else if (X.info == OC_FNARG) {
2149                                 R.v = &fnargs[op1->l.i];
2150                         } else {
2151                                 runtime_error(EMSG_NOT_ARRAY);
2152                         }
2153
2154                         if (op1->r.n) {
2155                                 clrvar(L.v);
2156                                 L.s = getvar_s(evaluate(op1->r.n, v1));
2157                                 hash_remove(iamarray(R.v), L.s);
2158                         } else {
2159                                 clear_array(iamarray(R.v));
2160                         }
2161                         break;
2162
2163                   case XC( OC_NEWSOURCE ):
2164                         programname = op->l.s;
2165                         break;
2166
2167                   case XC( OC_RETURN ):
2168                         copyvar(res, L.v);
2169                         break;
2170
2171                   case XC( OC_NEXTFILE ):
2172                         nextfile = TRUE;
2173                   case XC( OC_NEXT ):
2174                         nextrec = TRUE;
2175                   case XC( OC_DONE ):
2176                         clrvar(res);
2177                         break;
2178
2179                   case XC( OC_EXIT ):
2180                         awk_exit(L.d);
2181
2182                   /* -- recursive node type -- */
2183
2184                   case XC( OC_VAR ):
2185                         L.v = op->l.v;
2186                         if (L.v == V[NF])
2187                                 split_f0();
2188                         goto v_cont;
2189
2190                   case XC( OC_FNARG ):
2191                         L.v = &fnargs[op->l.i];
2192
2193 v_cont:
2194                         res = (op->r.n) ? findvar(iamarray(L.v), R.s) : L.v;
2195                         break;
2196
2197                   case XC( OC_IN ):
2198                         setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2199                         break;
2200
2201                   case XC( OC_REGEXP ):
2202                         op1 = op;
2203                         L.s = getvar_s(V[F0]);
2204                         goto re_cont;
2205
2206                   case XC( OC_MATCH ):
2207                         op1 = op->r.n;
2208 re_cont:
2209                         X.re = as_regex(op1, &sreg);
2210                         R.i = regexec(X.re, L.s, 0, NULL, 0);
2211                         if (X.re == &sreg) regfree(X.re);
2212                         setvar_i(res, (R.i == 0 ? 1 : 0) ^ (opn == '!' ? 1 : 0));
2213                         break;
2214
2215                   case XC( OC_MOVE ):
2216                         /* if source is a temporary string, jusk relink it to dest */
2217                         if (R.v == v1+1 && R.v->string) {
2218                                 res = setvar_p(L.v, R.v->string);
2219                                 R.v->string = NULL;
2220                         } else {
2221                                 res = copyvar(L.v, R.v);
2222                         }
2223                         break;
2224
2225                   case XC( OC_TERNARY ):
2226                         if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2227                                 runtime_error(EMSG_POSSIBLE_ERROR);
2228                         res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2229                         break;
2230
2231                   case XC( OC_FUNC ):
2232                         if (! op->r.f->body.first)
2233                                 runtime_error(EMSG_UNDEF_FUNC);
2234
2235                         X.v = R.v = nvalloc(op->r.f->nargs+1);
2236                         while (op1) {
2237                                 L.v = evaluate(nextarg(&op1), v1);
2238                                 copyvar(R.v, L.v);
2239                                 R.v->type |= VF_CHILD;
2240                                 R.v->x.parent = L.v;
2241                                 if (++R.v - X.v >= op->r.f->nargs)
2242                                         break;
2243                         }
2244
2245                         R.v = fnargs;
2246                         fnargs = X.v;
2247
2248                         L.s = programname;
2249                         res = evaluate(op->r.f->body.first, res);
2250                         programname = L.s;
2251
2252                         nvfree(fnargs);
2253                         fnargs = R.v;
2254                         break;
2255
2256                   case XC( OC_GETLINE ):
2257                   case XC( OC_PGETLINE ):
2258                         if (op1) {
2259                                 X.rsm = newfile(L.s);
2260                                 if (! X.rsm->F) {
2261                                         if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2262                                                 X.rsm->F = popen(L.s, "r");
2263                                                 X.rsm->is_pipe = TRUE;
2264                                         } else {
2265                                                 X.rsm->F = fopen(L.s, "r");             /* not xfopen! */
2266                                         }
2267                                 }
2268                         } else {
2269                                 if (! iF) iF = next_input_file();
2270                                 X.rsm = iF;
2271                         }
2272
2273                         if (! X.rsm->F) {
2274                                 setvar_i(V[ERRNO], errno);
2275                                 setvar_i(res, -1);
2276                                 break;
2277                         }
2278
2279                         if (! op->r.n)
2280                                 R.v = V[F0];
2281
2282                         L.i = awk_getline(X.rsm, R.v);
2283                         if (L.i > 0) {
2284                                 if (! op1) {
2285                                         incvar(V[FNR]);
2286                                         incvar(V[NR]);
2287                                 }
2288                         }
2289                         setvar_i(res, L.i);
2290                         break;
2291
2292                   /* simple builtins */
2293                   case XC( OC_FBLTIN ):
2294                         switch (opn) {
2295
2296                           case F_in:
2297                                 R.d = (int)L.d;
2298                                 break;
2299
2300                           case F_rn:
2301                                 R.d =  (double)rand() / (double)RAND_MAX;
2302                                 break;
2303
2304 #ifdef CONFIG_FEATURE_AWK_MATH
2305                           case F_co:
2306                                 R.d = cos(L.d);
2307                                 break;
2308
2309                           case F_ex:
2310                                 R.d = exp(L.d);
2311                                 break;
2312
2313                           case F_lg:
2314                                 R.d = log(L.d);
2315                                 break;
2316
2317                           case F_si:
2318                                 R.d = sin(L.d);
2319                                 break;
2320
2321                           case F_sq:
2322                                 R.d = sqrt(L.d);
2323                                 break;
2324 #else
2325                           case F_co:
2326                           case F_ex:
2327                           case F_lg:
2328                           case F_si:
2329                           case F_sq:
2330                                 runtime_error(EMSG_NO_MATH);
2331                                 break;
2332 #endif
2333
2334                           case F_sr:
2335                                 R.d = (double)seed;
2336                                 seed = op1 ? (unsigned int)L.d : (unsigned int)time(NULL);
2337                                 srand(seed);
2338                                 break;
2339
2340                           case F_ti:
2341                                 R.d = time(NULL);
2342                                 break;
2343
2344                           case F_le:
2345                                 if (! op1)
2346                                         L.s = getvar_s(V[F0]);
2347                                 R.d = strlen(L.s);
2348                                 break;
2349
2350                           case F_sy:
2351                                 fflush(NULL);
2352                                 R.d = (L.s && *L.s) ? (system(L.s) >> 8) : 0;
2353                                 break;
2354
2355                           case F_ff:
2356                                 if (! op1)
2357                                         fflush(stdout);
2358                                 else {
2359                                         if (L.s && *L.s) {
2360                                                 X.rsm = newfile(L.s);
2361                                                 fflush(X.rsm->F);
2362                                         } else {
2363                                                 fflush(NULL);
2364                                         }
2365                                 }
2366                                 break;
2367
2368                           case F_cl:
2369                                 X.rsm = (rstream *)hash_search(fdhash, L.s);
2370                                 if (X.rsm) {
2371                                         R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
2372                                         free(X.rsm->buffer);
2373                                         hash_remove(fdhash, L.s);
2374                                 }
2375                                 if (R.i != 0)
2376                                         setvar_i(V[ERRNO], errno);
2377                                 R.d = (double)R.i;
2378                                 break;
2379                         }
2380                         setvar_i(res, R.d);
2381                         break;
2382
2383                   case XC( OC_BUILTIN ):
2384                         res = exec_builtin(op, res);
2385                         break;
2386
2387                   case XC( OC_SPRINTF ):
2388                         setvar_p(res, awk_printf(op1));
2389                         break;
2390
2391                   case XC( OC_UNARY ):
2392                         X.v = R.v;
2393                         L.d = R.d = getvar_i(R.v);
2394                         switch (opn) {
2395                           case 'P':
2396                                 L.d = ++R.d;
2397                                 goto r_op_change;
2398                           case 'p':
2399                                 R.d++;
2400                                 goto r_op_change;
2401                           case 'M':
2402                                 L.d = --R.d;
2403                                 goto r_op_change;
2404                           case 'm':
2405                                 R.d--;
2406                                 goto r_op_change;
2407                           case '!':
2408                             L.d = istrue(X.v) ? 0 : 1;
2409                                 break;
2410                           case '-':
2411                                 L.d = -R.d;
2412                                 break;
2413                         r_op_change:
2414                                 setvar_i(X.v, R.d);
2415                         }
2416                         setvar_i(res, L.d);
2417                         break;
2418
2419                   case XC( OC_FIELD ):
2420                         R.i = (int)getvar_i(R.v);
2421                         if (R.i == 0) {
2422                                 res = V[F0];
2423                         } else {
2424                                 split_f0();
2425                                 if (R.i > nfields)
2426                                         fsrealloc(R.i);
2427
2428                                 res = &Fields[R.i-1];
2429                         }
2430                         break;
2431
2432                   /* concatenation (" ") and index joining (",") */
2433                   case XC( OC_CONCAT ):
2434                   case XC( OC_COMMA ):
2435                         opn = strlen(L.s) + strlen(R.s) + 2;
2436                         X.s = (char *)xmalloc(opn);
2437                         strcpy(X.s, L.s);
2438                         if ((opinfo & OPCLSMASK) == OC_COMMA) {
2439                                 L.s = getvar_s(V[SUBSEP]);
2440                                 X.s = (char *)xrealloc(X.s, opn + strlen(L.s));
2441                                 strcat(X.s, L.s);
2442                         }
2443                         strcat(X.s, R.s);
2444                         setvar_p(res, X.s);
2445                         break;
2446
2447                   case XC( OC_LAND ):
2448                         setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2449                         break;
2450
2451                   case XC( OC_LOR ):
2452                         setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2453                         break;
2454
2455                   case XC( OC_BINARY ):
2456                   case XC( OC_REPLACE ):
2457                         R.d = getvar_i(R.v);
2458                         switch (opn) {
2459                           case '+':
2460                                 L.d += R.d;
2461                                 break;
2462                           case '-':
2463                                 L.d -= R.d;
2464                                 break;
2465                           case '*':
2466                                 L.d *= R.d;
2467                                 break;
2468                           case '/':
2469                                 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2470                                 L.d /= R.d;
2471                                 break;
2472                           case '&':
2473 #ifdef CONFIG_FEATURE_AWK_MATH
2474                                 L.d = pow(L.d, R.d);
2475 #else
2476                                 runtime_error(EMSG_NO_MATH);
2477 #endif
2478                                 break;
2479                           case '%':
2480                                 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2481                                 L.d -= (int)(L.d / R.d) * R.d;
2482                                 break;
2483                         }
2484                         res = setvar_i(((opinfo&OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2485                         break;
2486
2487                   case XC( OC_COMPARE ):
2488                         if (is_numeric(L.v) && is_numeric(R.v)) {
2489                                 L.d = getvar_i(L.v) - getvar_i(R.v);
2490                         } else {
2491                                 L.s = getvar_s(L.v);
2492                                 R.s = getvar_s(R.v);
2493                                 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2494                         }
2495                         switch (opn & 0xfe) {
2496                           case 0:
2497                                 R.i = (L.d > 0);
2498                                 break;
2499                           case 2:
2500                                 R.i = (L.d >= 0);
2501                                 break;
2502                           case 4:
2503                                 R.i = (L.d == 0);
2504                                 break;
2505                         }
2506                         setvar_i(res, (opn & 0x1 ? R.i : !R.i) ? 1 : 0);
2507                         break;
2508
2509                   default:
2510                         runtime_error(EMSG_POSSIBLE_ERROR);
2511                 }
2512                 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2513                         op = op->a.n;
2514                 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2515                         break;
2516                 if (nextrec)
2517                         break;
2518         }
2519         nvfree(v1);
2520         return res;
2521 }
2522
2523
2524 /* -------- main & co. -------- */
2525
2526 static int awk_exit(int r)
2527 {
2528         unsigned int i;
2529         hash_item *hi;
2530         static var tv;
2531
2532         if (! exiting) {
2533                 exiting = TRUE;
2534                 nextrec = FALSE;
2535                 evaluate(endseq.first, &tv);
2536         }
2537
2538         /* waiting for children */
2539         for (i=0; i<fdhash->csize; i++) {
2540                 hi = fdhash->items[i];
2541                 while(hi) {
2542                         if (hi->data.rs.F && hi->data.rs.is_pipe)
2543                                 pclose(hi->data.rs.F);
2544                         hi = hi->next;
2545                 }
2546         }
2547
2548         exit(r);
2549 }
2550
2551 /* if expr looks like "var=value", perform assignment and return 1,
2552  * otherwise return 0 */
2553 static int is_assignment(const char *expr)
2554 {
2555         char *exprc, *s, *s0, *s1;
2556
2557         exprc = xstrdup(expr);
2558         if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2559                 free(exprc);
2560                 return FALSE;
2561         }
2562
2563         *(s++) = '\0';
2564         s0 = s1 = s;
2565         while (*s)
2566                 *(s1++) = nextchar(&s);
2567
2568         *s1 = '\0';
2569         setvar_u(newvar(exprc), s0);
2570         free(exprc);
2571         return TRUE;
2572 }
2573
2574 /* switch to next input file */
2575 static rstream *next_input_file(void)
2576 {
2577         static rstream rsm;
2578         FILE *F = NULL;
2579         char *fname, *ind;
2580         static int files_happen = FALSE;
2581
2582         if (rsm.F) fclose(rsm.F);
2583         rsm.F = NULL;
2584         rsm.pos = rsm.adv = 0;
2585
2586         do {
2587                 if (getvar_i(V[ARGIND])+1 >= getvar_i(V[ARGC])) {
2588                         if (files_happen)
2589                                 return NULL;
2590                         fname = "-";
2591                         F = stdin;
2592                 } else {
2593                         ind = getvar_s(incvar(V[ARGIND]));
2594                         fname = getvar_s(findvar(iamarray(V[ARGV]), ind));
2595                         if (fname && *fname && !is_assignment(fname))
2596                                 F = afopen(fname, "r");
2597                 }
2598         } while (!F);
2599
2600         files_happen = TRUE;
2601         setvar_s(V[FILENAME], fname);
2602         rsm.F = F;
2603         return &rsm;
2604 }
2605
2606 int awk_main(int argc, char **argv)
2607 {
2608         char *s, *s1;
2609         int i, j, c, flen;
2610         var *v;
2611         static var tv;
2612         char **envp;
2613         static int from_file = FALSE;
2614         rstream *rsm;
2615         FILE *F, *stdfiles[3];
2616         static char * stdnames = "/dev/stdin\0/dev/stdout\0/dev/stderr";
2617
2618         /* allocate global buffer */
2619         buf = xmalloc(MAXVARFMT+1);
2620
2621         vhash = hash_init();
2622         ahash = hash_init();
2623         fdhash = hash_init();
2624         fnhash = hash_init();
2625
2626         /* initialize variables */
2627         for (i=0;  *vNames;  i++) {
2628                 V[i] = v = newvar(nextword(&vNames));
2629                 if (*vValues != '\377')
2630                         setvar_s(v, nextword(&vValues));
2631                 else
2632                         setvar_i(v, 0);
2633
2634                 if (*vNames == '*') {
2635                         v->type |= VF_SPECIAL;
2636                         vNames++;
2637                 }
2638         }
2639
2640         handle_special(V[FS]);
2641         handle_special(V[RS]);
2642
2643         stdfiles[0] = stdin;
2644         stdfiles[1] = stdout;
2645         stdfiles[2] = stderr;
2646         for (i=0; i<3; i++) {
2647                 rsm = newfile(nextword(&stdnames));
2648                 rsm->F = stdfiles[i];
2649         }
2650
2651         for (envp=environ; *envp; envp++) {
2652                 s = xstrdup(*envp);
2653                 s1 = strchr(s, '=');
2654                 if (!s1) {
2655                         goto keep_going;
2656                 }
2657                 *(s1++) = '\0';
2658                 setvar_u(findvar(iamarray(V[ENVIRON]), s), s1);
2659 keep_going:
2660                 free(s);
2661         }
2662
2663         while((c = getopt(argc, argv, "F:v:f:W:")) != EOF) {
2664                 switch (c) {
2665                         case 'F':
2666                                 setvar_s(V[FS], optarg);
2667                                 break;
2668                         case 'v':
2669                                 if (! is_assignment(optarg))
2670                                         bb_show_usage();
2671                                 break;
2672                         case 'f':
2673                                 from_file = TRUE;
2674                                 F = afopen(programname = optarg, "r");
2675                                 s = NULL;
2676                                 /* one byte is reserved for some trick in next_token */
2677                                 if (fseek(F, 0, SEEK_END) == 0) {
2678                                         flen = ftell(F);
2679                                         s = (char *)xmalloc(flen+4);
2680                                         fseek(F, 0, SEEK_SET);
2681                                         i = 1 + fread(s+1, 1, flen, F);
2682                                 } else {
2683                                         for (i=j=1; j>0; i+=j) {
2684                                                 s = (char *)xrealloc(s, i+4096);
2685                                                 j = fread(s+i, 1, 4094, F);
2686                                         }
2687                                 }
2688                                 s[i] = '\0';
2689                                 fclose(F);
2690                                 parse_program(s+1);
2691                                 free(s);
2692                                 break;
2693                         case 'W':
2694                                 bb_error_msg("Warning: unrecognized option '-W %s' ignored\n", optarg);
2695                                 break;
2696
2697                         default:
2698                                 bb_show_usage();
2699                 }
2700         }
2701
2702         if (!from_file) {
2703                 if (argc == optind)
2704                         bb_show_usage();
2705                 programname="cmd. line";
2706                 parse_program(argv[optind++]);
2707
2708         }
2709
2710         /* fill in ARGV array */
2711         setvar_i(V[ARGC], argc - optind + 1);
2712         setari_u(V[ARGV], 0, "awk");
2713         for(i=optind; i < argc; i++)
2714                 setari_u(V[ARGV], i+1-optind, argv[i]);
2715
2716         evaluate(beginseq.first, &tv);
2717         if (! mainseq.first && ! endseq.first)
2718                 awk_exit(EXIT_SUCCESS);
2719
2720         /* input file could already be opened in BEGIN block */
2721         if (! iF) iF = next_input_file();
2722
2723         /* passing through input files */
2724         while (iF) {
2725
2726                 nextfile = FALSE;
2727                 setvar_i(V[FNR], 0);
2728
2729                 while ((c = awk_getline(iF, V[F0])) > 0) {
2730
2731                         nextrec = FALSE;
2732                         incvar(V[NR]);
2733                         incvar(V[FNR]);
2734                         evaluate(mainseq.first, &tv);
2735
2736                         if (nextfile)
2737                                 break;
2738                 }
2739
2740                 if (c < 0)
2741                         runtime_error(strerror(errno));
2742
2743                 iF = next_input_file();
2744
2745         }
2746
2747         awk_exit(EXIT_SUCCESS);
2748
2749         return 0;
2750 }
2751