getopt_ulflags -> getopt32.
[oweals/busybox.git] / editors / awk.c
1 /* vi: set sw=4 ts=4: */
2 /*
3  * awk implementation for busybox
4  *
5  * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
6  *
7  * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
8  */
9
10 #include "busybox.h"
11 #include "xregex.h"
12 #include <math.h>
13
14
15 #define MAXVARFMT       240
16 #define MINNVBLOCK      64
17
18 /* variable flags */
19 #define VF_NUMBER       0x0001  /* 1 = primary type is number */
20 #define VF_ARRAY        0x0002  /* 1 = it's an array */
21
22 #define VF_CACHED       0x0100  /* 1 = num/str value has cached str/num eq */
23 #define VF_USER         0x0200  /* 1 = user input (may be numeric string) */
24 #define VF_SPECIAL      0x0400  /* 1 = requires extra handling when changed */
25 #define VF_WALK         0x0800  /* 1 = variable has alloc'd x.walker list */
26 #define VF_FSTR         0x1000  /* 1 = string points to fstring buffer */
27 #define VF_CHILD        0x2000  /* 1 = function arg; x.parent points to source */
28 #define VF_DIRTY        0x4000  /* 1 = variable was set explicitly */
29
30 /* these flags are static, don't change them when value is changed */
31 #define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
32
33 /* Variable */
34 typedef struct var_s {
35         unsigned short type;            /* flags */
36         double number;
37         char *string;
38         union {
39                 int aidx;                               /* func arg idx (for compilation stage) */
40                 struct xhash_s *array;  /* array ptr */
41                 struct var_s *parent;   /* for func args, ptr to actual parameter */
42                 char **walker;                  /* list of array elements (for..in) */
43         } x;
44 } var;
45
46 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
47 typedef struct chain_s {
48         struct node_s *first;
49         struct node_s *last;
50         char *programname;
51 } chain;
52
53 /* Function */
54 typedef struct func_s {
55         unsigned short nargs;
56         struct chain_s body;
57 } func;
58
59 /* I/O stream */
60 typedef struct rstream_s {
61         FILE *F;
62         char *buffer;
63         int adv;
64         int size;
65         int pos;
66         unsigned short is_pipe;
67 } rstream;
68
69 typedef struct hash_item_s {
70         union {
71                 struct var_s v;                 /* variable/array hash */
72                 struct rstream_s rs;    /* redirect streams hash */
73                 struct func_s f;                /* functions hash */
74         } data;
75         struct hash_item_s *next;       /* next in chain */
76         char name[1];                           /* really it's longer */
77 } hash_item;
78
79 typedef struct xhash_s {
80         unsigned int nel;                                       /* num of elements */
81         unsigned int csize;                                     /* current hash size */
82         unsigned int nprime;                            /* next hash size in PRIMES[] */
83         unsigned int glen;                                      /* summary length of item names */
84         struct hash_item_s **items;
85 } xhash;
86
87 /* Tree node */
88 typedef struct node_s {
89         uint32_t info;
90         unsigned short lineno;
91         union {
92                 struct node_s *n;
93                 var *v;
94                 int i;
95                 char *s;
96                 regex_t *re;
97         } l;
98         union {
99                 struct node_s *n;
100                 regex_t *ire;
101                 func *f;
102                 int argno;
103         } r;
104         union {
105                 struct node_s *n;
106         } a;
107 } node;
108
109 /* Block of temporary variables */
110 typedef struct nvblock_s {
111         int size;
112         var *pos;
113         struct nvblock_s *prev;
114         struct nvblock_s *next;
115         var nv[0];
116 } nvblock;
117
118 typedef struct tsplitter_s {
119         node n;
120         regex_t re[2];
121 } tsplitter;
122
123 /* simple token classes */
124 /* Order and hex values are very important!!!  See next_token() */
125 #define TC_SEQSTART      1                              /* ( */
126 #define TC_SEQTERM      (1 << 1)                /* ) */
127 #define TC_REGEXP       (1 << 2)                /* /.../ */
128 #define TC_OUTRDR       (1 << 3)                /* | > >> */
129 #define TC_UOPPOST      (1 << 4)                /* unary postfix operator */
130 #define TC_UOPPRE1      (1 << 5)                /* unary prefix operator */
131 #define TC_BINOPX       (1 << 6)                /* two-opnd operator */
132 #define TC_IN           (1 << 7)
133 #define TC_COMMA        (1 << 8)
134 #define TC_PIPE         (1 << 9)                /* input redirection pipe */
135 #define TC_UOPPRE2      (1 << 10)               /* unary prefix operator */
136 #define TC_ARRTERM      (1 << 11)               /* ] */
137 #define TC_GRPSTART     (1 << 12)               /* { */
138 #define TC_GRPTERM      (1 << 13)               /* } */
139 #define TC_SEMICOL      (1 << 14)
140 #define TC_NEWLINE      (1 << 15)
141 #define TC_STATX        (1 << 16)               /* ctl statement (for, next...) */
142 #define TC_WHILE        (1 << 17)
143 #define TC_ELSE         (1 << 18)
144 #define TC_BUILTIN      (1 << 19)
145 #define TC_GETLINE      (1 << 20)
146 #define TC_FUNCDECL     (1 << 21)               /* `function' `func' */
147 #define TC_BEGIN        (1 << 22)
148 #define TC_END          (1 << 23)
149 #define TC_EOF          (1 << 24)
150 #define TC_VARIABLE     (1 << 25)
151 #define TC_ARRAY        (1 << 26)
152 #define TC_FUNCTION     (1 << 27)
153 #define TC_STRING       (1 << 28)
154 #define TC_NUMBER       (1 << 29)
155
156 #define TC_UOPPRE       (TC_UOPPRE1 | TC_UOPPRE2)
157
158 /* combined token classes */
159 #define TC_BINOP        (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
160 #define TC_UNARYOP      (TC_UOPPRE | TC_UOPPOST)
161 #define TC_OPERAND      (TC_VARIABLE | TC_ARRAY | TC_FUNCTION | \
162         TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
163
164 #define TC_STATEMNT     (TC_STATX | TC_WHILE)
165 #define TC_OPTERM       (TC_SEMICOL | TC_NEWLINE)
166
167 /* word tokens, cannot mean something else if not expected */
168 #define TC_WORD         (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN | \
169         TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
170
171 /* discard newlines after these */
172 #define TC_NOTERM       (TC_COMMA | TC_GRPSTART | TC_GRPTERM | \
173         TC_BINOP | TC_OPTERM)
174
175 /* what can expression begin with */
176 #define TC_OPSEQ        (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
177 /* what can group begin with */
178 #define TC_GRPSEQ       (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
179
180 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
181 /* operator is inserted between them */
182 #define TC_CONCAT1      (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM | \
183         TC_STRING | TC_NUMBER | TC_UOPPOST)
184 #define TC_CONCAT2      (TC_OPERAND | TC_UOPPRE)
185
186 #define OF_RES1         0x010000
187 #define OF_RES2         0x020000
188 #define OF_STR1         0x040000
189 #define OF_STR2         0x080000
190 #define OF_NUM1         0x100000
191 #define OF_CHECKED      0x200000
192
193 /* combined operator flags */
194 #define xx      0
195 #define xV      OF_RES2
196 #define xS      (OF_RES2 | OF_STR2)
197 #define Vx      OF_RES1
198 #define VV      (OF_RES1 | OF_RES2)
199 #define Nx      (OF_RES1 | OF_NUM1)
200 #define NV      (OF_RES1 | OF_NUM1 | OF_RES2)
201 #define Sx      (OF_RES1 | OF_STR1)
202 #define SV      (OF_RES1 | OF_STR1 | OF_RES2)
203 #define SS      (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
204
205 #define OPCLSMASK       0xFF00
206 #define OPNMASK         0x007F
207
208 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
209  * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
210  * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
211  */
212 #define P(x)    (x << 24)
213 #define PRIMASK         0x7F000000
214 #define PRIMASK2        0x7E000000
215
216 /* Operation classes */
217
218 #define SHIFT_TIL_THIS  0x0600
219 #define RECUR_FROM_THIS 0x1000
220
221 enum {
222         OC_DELETE=0x0100,       OC_EXEC=0x0200,         OC_NEWSOURCE=0x0300,
223         OC_PRINT=0x0400,        OC_PRINTF=0x0500,       OC_WALKINIT=0x0600,
224
225         OC_BR=0x0700,           OC_BREAK=0x0800,        OC_CONTINUE=0x0900,
226         OC_EXIT=0x0a00,         OC_NEXT=0x0b00,         OC_NEXTFILE=0x0c00,
227         OC_TEST=0x0d00,         OC_WALKNEXT=0x0e00,
228
229         OC_BINARY=0x1000,       OC_BUILTIN=0x1100,      OC_COLON=0x1200,
230         OC_COMMA=0x1300,        OC_COMPARE=0x1400,      OC_CONCAT=0x1500,
231         OC_FBLTIN=0x1600,       OC_FIELD=0x1700,        OC_FNARG=0x1800,
232         OC_FUNC=0x1900,         OC_GETLINE=0x1a00,      OC_IN=0x1b00,
233         OC_LAND=0x1c00,         OC_LOR=0x1d00,          OC_MATCH=0x1e00,
234         OC_MOVE=0x1f00,         OC_PGETLINE=0x2000,     OC_REGEXP=0x2100,
235         OC_REPLACE=0x2200,      OC_RETURN=0x2300,       OC_SPRINTF=0x2400,
236         OC_TERNARY=0x2500,      OC_UNARY=0x2600,        OC_VAR=0x2700,
237         OC_DONE=0x2800,
238
239         ST_IF=0x3000,           ST_DO=0x3100,           ST_FOR=0x3200,
240         ST_WHILE=0x3300
241 };
242
243 /* simple builtins */
244 enum {
245         F_in=0, F_rn,   F_co,   F_ex,   F_lg,   F_si,   F_sq,   F_sr,
246         F_ti,   F_le,   F_sy,   F_ff,   F_cl
247 };
248
249 /* builtins */
250 enum {
251         B_a2=0, B_ix,   B_ma,   B_sp,   B_ss,   B_ti,   B_lo,   B_up,
252         B_ge,   B_gs,   B_su,
253         B_an,   B_co,   B_ls,   B_or,   B_rs,   B_xo,
254 };
255
256 /* tokens and their corresponding info values */
257
258 #define NTC             "\377"          /* switch to next token class (tc<<1) */
259 #define NTCC    '\377'
260
261 #define OC_B    OC_BUILTIN
262
263 static char * const tokenlist =
264         "\1("           NTC
265         "\1)"           NTC
266         "\1/"           NTC                                                                     /* REGEXP */
267         "\2>>"          "\1>"           "\1|"           NTC                     /* OUTRDR */
268         "\2++"          "\2--"          NTC                                             /* UOPPOST */
269         "\2++"          "\2--"          "\1$"           NTC                     /* UOPPRE1 */
270         "\2=="          "\1="           "\2+="          "\2-="          /* BINOPX */
271         "\2*="          "\2/="          "\2%="          "\2^="
272         "\1+"           "\1-"           "\3**="         "\2**"
273         "\1/"           "\1%"           "\1^"           "\1*"
274         "\2!="          "\2>="          "\2<="          "\1>"
275         "\1<"           "\2!~"          "\1~"           "\2&&"
276         "\2||"          "\1?"           "\1:"           NTC
277         "\2in"          NTC
278         "\1,"           NTC
279         "\1|"           NTC
280         "\1+"           "\1-"           "\1!"           NTC                     /* UOPPRE2 */
281         "\1]"           NTC
282         "\1{"           NTC
283         "\1}"           NTC
284         "\1;"           NTC
285         "\1\n"          NTC
286         "\2if"          "\2do"          "\3for"         "\5break"       /* STATX */
287         "\10continue"                   "\6delete"      "\5print"
288         "\6printf"      "\4next"        "\10nextfile"
289         "\6return"      "\4exit"        NTC
290         "\5while"       NTC
291         "\4else"        NTC
292
293         "\3and"         "\5compl"       "\6lshift"      "\2or"
294         "\6rshift"      "\3xor"
295         "\5close"       "\6system"      "\6fflush"      "\5atan2"       /* BUILTIN */
296         "\3cos"         "\3exp"         "\3int"         "\3log"
297         "\4rand"        "\3sin"         "\4sqrt"        "\5srand"
298         "\6gensub"      "\4gsub"        "\5index"       "\6length"
299         "\5match"       "\5split"       "\7sprintf"     "\3sub"
300         "\6substr"      "\7systime"     "\10strftime"
301         "\7tolower"     "\7toupper"     NTC
302         "\7getline"     NTC
303         "\4func"        "\10function"   NTC
304         "\5BEGIN"       NTC
305         "\3END"         "\0"
306         ;
307
308 static const uint32_t tokeninfo[] = {
309
310         0,
311         0,
312         OC_REGEXP,
313         xS|'a',         xS|'w',         xS|'|',
314         OC_UNARY|xV|P(9)|'p',           OC_UNARY|xV|P(9)|'m',
315         OC_UNARY|xV|P(9)|'P',           OC_UNARY|xV|P(9)|'M',
316                 OC_FIELD|xV|P(5),
317         OC_COMPARE|VV|P(39)|5,          OC_MOVE|VV|P(74),
318                 OC_REPLACE|NV|P(74)|'+',        OC_REPLACE|NV|P(74)|'-',
319         OC_REPLACE|NV|P(74)|'*',        OC_REPLACE|NV|P(74)|'/',
320                 OC_REPLACE|NV|P(74)|'%',        OC_REPLACE|NV|P(74)|'&',
321         OC_BINARY|NV|P(29)|'+',         OC_BINARY|NV|P(29)|'-',
322                 OC_REPLACE|NV|P(74)|'&',        OC_BINARY|NV|P(15)|'&',
323         OC_BINARY|NV|P(25)|'/',         OC_BINARY|NV|P(25)|'%',
324                 OC_BINARY|NV|P(15)|'&',         OC_BINARY|NV|P(25)|'*',
325         OC_COMPARE|VV|P(39)|4,          OC_COMPARE|VV|P(39)|3,
326                 OC_COMPARE|VV|P(39)|0,          OC_COMPARE|VV|P(39)|1,
327         OC_COMPARE|VV|P(39)|2,          OC_MATCH|Sx|P(45)|'!',
328                 OC_MATCH|Sx|P(45)|'~',          OC_LAND|Vx|P(55),
329         OC_LOR|Vx|P(59),                        OC_TERNARY|Vx|P(64)|'?',
330                 OC_COLON|xx|P(67)|':',
331         OC_IN|SV|P(49),
332         OC_COMMA|SS|P(80),
333         OC_PGETLINE|SV|P(37),
334         OC_UNARY|xV|P(19)|'+',          OC_UNARY|xV|P(19)|'-',
335                 OC_UNARY|xV|P(19)|'!',
336         0,
337         0,
338         0,
339         0,
340         0,
341         ST_IF,                  ST_DO,                  ST_FOR,                 OC_BREAK,
342         OC_CONTINUE,                                    OC_DELETE|Vx,   OC_PRINT,
343         OC_PRINTF,              OC_NEXT,                OC_NEXTFILE,
344         OC_RETURN|Vx,   OC_EXIT|Nx,
345         ST_WHILE,
346         0,
347
348         OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
349         OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
350         OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
351         OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
352         OC_FBLTIN|F_rn,    OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
353         OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
354         OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF,        OC_B|B_su|P(0xb6),
355         OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti,    OC_B|B_ti|P(0x0b),
356         OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
357         OC_GETLINE|SV|P(0),
358         0,      0,
359         0,
360         0
361 };
362
363 /* internal variable names and their initial values       */
364 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
365 enum {
366         CONVFMT=0,      OFMT,           FS,                     OFS,
367         ORS,            RS,                     RT,                     FILENAME,
368         SUBSEP,         ARGIND,         ARGC,           ARGV,
369         ERRNO,          FNR,
370         NR,                     NF,                     IGNORECASE,
371         ENVIRON,        F0,                     _intvarcount_
372 };
373
374 static char * vNames =
375         "CONVFMT\0"     "OFMT\0"        "FS\0*"         "OFS\0"
376         "ORS\0"         "RS\0*"         "RT\0"          "FILENAME\0"
377         "SUBSEP\0"      "ARGIND\0"      "ARGC\0"        "ARGV\0"
378         "ERRNO\0"       "FNR\0"
379         "NR\0"          "NF\0*"         "IGNORECASE\0*"
380         "ENVIRON\0"     "$\0*"          "\0";
381
382 static char * vValues =
383         "%.6g\0"        "%.6g\0"        " \0"           " \0"
384         "\n\0"          "\n\0"          "\0"            "\0"
385         "\034\0"
386         "\377";
387
388 /* hash size may grow to these values */
389 #define FIRST_PRIME 61;
390 static const unsigned int PRIMES[] = { 251, 1021, 4093, 16381, 65521 };
391 enum { NPRIMES = sizeof(PRIMES) / sizeof(unsigned int) };
392
393 /* globals */
394
395 extern char **environ;
396
397 static var * V[_intvarcount_];
398 static chain beginseq, mainseq, endseq, *seq;
399 static int nextrec, nextfile;
400 static node *break_ptr, *continue_ptr;
401 static rstream *iF;
402 static xhash *vhash, *ahash, *fdhash, *fnhash;
403 static char *programname;
404 static short lineno;
405 static int is_f0_split;
406 static int nfields;
407 static var *Fields;
408 static tsplitter fsplitter, rsplitter;
409 static nvblock *cb;
410 static char *pos;
411 static char *buf;
412 static int icase;
413 static int exiting;
414
415 static struct {
416         uint32_t tclass;
417         uint32_t info;
418         char *string;
419         double number;
420         short lineno;
421         int rollback;
422 } t;
423
424 /* function prototypes */
425 static void handle_special(var *);
426 static node *parse_expr(uint32_t);
427 static void chain_group(void);
428 static var *evaluate(node *, var *);
429 static rstream *next_input_file(void);
430 static int fmt_num(char *, int, const char *, double, int);
431 static int awk_exit(int) ATTRIBUTE_NORETURN;
432
433 /* ---- error handling ---- */
434
435 static const char EMSG_INTERNAL_ERROR[] = "Internal error";
436 static const char EMSG_UNEXP_EOS[] = "Unexpected end of string";
437 static const char EMSG_UNEXP_TOKEN[] = "Unexpected token";
438 static const char EMSG_DIV_BY_ZERO[] = "Division by zero";
439 static const char EMSG_INV_FMT[] = "Invalid format specifier";
440 static const char EMSG_TOO_FEW_ARGS[] = "Too few arguments for builtin";
441 static const char EMSG_NOT_ARRAY[] = "Not an array";
442 static const char EMSG_POSSIBLE_ERROR[] = "Possible syntax error";
443 static const char EMSG_UNDEF_FUNC[] = "Call to undefined function";
444 #ifndef CONFIG_FEATURE_AWK_MATH
445 static const char EMSG_NO_MATH[] = "Math support is not compiled in";
446 #endif
447
448 static void syntax_error(const char * const message) ATTRIBUTE_NORETURN;
449 static void syntax_error(const char * const message)
450 {
451         bb_error_msg_and_die("%s:%i: %s", programname, lineno, message);
452 }
453
454 #define runtime_error(x) syntax_error(x)
455
456
457 /* ---- hash stuff ---- */
458
459 static unsigned int hashidx(const char *name)
460 {
461         unsigned int idx=0;
462
463         while (*name)  idx = *name++ + (idx << 6) - idx;
464         return idx;
465 }
466
467 /* create new hash */
468 static xhash *hash_init(void)
469 {
470         xhash *newhash;
471
472         newhash = (xhash *)xzalloc(sizeof(xhash));
473         newhash->csize = FIRST_PRIME;
474         newhash->items = (hash_item **)xzalloc(newhash->csize * sizeof(hash_item *));
475
476         return newhash;
477 }
478
479 /* find item in hash, return ptr to data, NULL if not found */
480 static void *hash_search(xhash *hash, const char *name)
481 {
482         hash_item *hi;
483
484         hi = hash->items [ hashidx(name) % hash->csize ];
485         while (hi) {
486                 if (strcmp(hi->name, name) == 0)
487                         return &(hi->data);
488                 hi = hi->next;
489         }
490         return NULL;
491 }
492
493 /* grow hash if it becomes too big */
494 static void hash_rebuild(xhash *hash)
495 {
496         unsigned int newsize, i, idx;
497         hash_item **newitems, *hi, *thi;
498
499         if (hash->nprime == NPRIMES)
500                 return;
501
502         newsize = PRIMES[hash->nprime++];
503         newitems = (hash_item **)xzalloc(newsize * sizeof(hash_item *));
504
505         for (i=0; i<hash->csize; i++) {
506                 hi = hash->items[i];
507                 while (hi) {
508                         thi = hi;
509                         hi = thi->next;
510                         idx = hashidx(thi->name) % newsize;
511                         thi->next = newitems[idx];
512                         newitems[idx] = thi;
513                 }
514         }
515
516         free(hash->items);
517         hash->csize = newsize;
518         hash->items = newitems;
519 }
520
521 /* find item in hash, add it if necessary. Return ptr to data */
522 static void *hash_find(xhash *hash, const char *name)
523 {
524         hash_item *hi;
525         unsigned int idx;
526         int l;
527
528         hi = hash_search(hash, name);
529         if (! hi) {
530                 if (++hash->nel / hash->csize > 10)
531                         hash_rebuild(hash);
532
533                 l = strlen(name) + 1;
534                 hi = xzalloc(sizeof(hash_item) + l);
535                 memcpy(hi->name, name, l);
536
537                 idx = hashidx(name) % hash->csize;
538                 hi->next = hash->items[idx];
539                 hash->items[idx] = hi;
540                 hash->glen += l;
541         }
542         return &(hi->data);
543 }
544
545 #define findvar(hash, name) (var *) hash_find ( (hash) , (name) )
546 #define newvar(name) (var *) hash_find ( vhash , (name) )
547 #define newfile(name) (rstream *) hash_find ( fdhash , (name) )
548 #define newfunc(name) (func *) hash_find ( fnhash , (name) )
549
550 static void hash_remove(xhash *hash, const char *name)
551 {
552         hash_item *hi, **phi;
553
554         phi = &(hash->items[ hashidx(name) % hash->csize ]);
555         while (*phi) {
556                 hi = *phi;
557                 if (strcmp(hi->name, name) == 0) {
558                         hash->glen -= (strlen(name) + 1);
559                         hash->nel--;
560                         *phi = hi->next;
561                         free(hi);
562                         break;
563                 }
564                 phi = &(hi->next);
565         }
566 }
567
568 /* ------ some useful functions ------ */
569
570 static void skip_spaces(char **s)
571 {
572         char *p = *s;
573
574         while(*p == ' ' || *p == '\t' ||
575                         (*p == '\\' && *(p+1) == '\n' && (++p, ++t.lineno))) {
576                 p++;
577         }
578         *s = p;
579 }
580
581 static char *nextword(char **s)
582 {
583         char *p = *s;
584
585         while (*(*s)++) ;
586
587         return p;
588 }
589
590 static char nextchar(char **s)
591 {
592         char c, *pps;
593
594         c = *((*s)++);
595         pps = *s;
596         if (c == '\\') c = bb_process_escape_sequence((const char**)s);
597         if (c == '\\' && *s == pps) c = *((*s)++);
598         return c;
599 }
600
601 static int ATTRIBUTE_ALWAYS_INLINE isalnum_(int c)
602 {
603         return (isalnum(c) || c == '_');
604 }
605
606 static FILE *afopen(const char *path, const char *mode)
607 {
608         return (*path == '-' && *(path+1) == '\0') ? stdin : xfopen(path, mode);
609 }
610
611 /* -------- working with variables (set/get/copy/etc) -------- */
612
613 static xhash *iamarray(var *v)
614 {
615         var *a = v;
616
617         while (a->type & VF_CHILD)
618                 a = a->x.parent;
619
620         if (! (a->type & VF_ARRAY)) {
621                 a->type |= VF_ARRAY;
622                 a->x.array = hash_init();
623         }
624         return a->x.array;
625 }
626
627 static void clear_array(xhash *array)
628 {
629         unsigned int i;
630         hash_item *hi, *thi;
631
632         for (i=0; i<array->csize; i++) {
633                 hi = array->items[i];
634                 while (hi) {
635                         thi = hi;
636                         hi = hi->next;
637                         free(thi->data.v.string);
638                         free(thi);
639                 }
640                 array->items[i] = NULL;
641         }
642         array->glen = array->nel = 0;
643 }
644
645 /* clear a variable */
646 static var *clrvar(var *v)
647 {
648         if (!(v->type & VF_FSTR))
649                 free(v->string);
650
651         v->type &= VF_DONTTOUCH;
652         v->type |= VF_DIRTY;
653         v->string = NULL;
654         return v;
655 }
656
657 /* assign string value to variable */
658 static var *setvar_p(var *v, char *value)
659 {
660         clrvar(v);
661         v->string = value;
662         handle_special(v);
663
664         return v;
665 }
666
667 /* same as setvar_p but make a copy of string */
668 static var *setvar_s(var *v, const char *value)
669 {
670         return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
671 }
672
673 /* same as setvar_s but set USER flag */
674 static var *setvar_u(var *v, const char *value)
675 {
676         setvar_s(v, value);
677         v->type |= VF_USER;
678         return v;
679 }
680
681 /* set array element to user string */
682 static void setari_u(var *a, int idx, const char *s)
683 {
684         var *v;
685         static char sidx[12];
686
687         sprintf(sidx, "%d", idx);
688         v = findvar(iamarray(a), sidx);
689         setvar_u(v, s);
690 }
691
692 /* assign numeric value to variable */
693 static var *setvar_i(var *v, double value)
694 {
695         clrvar(v);
696         v->type |= VF_NUMBER;
697         v->number = value;
698         handle_special(v);
699         return v;
700 }
701
702 static char *getvar_s(var *v)
703 {
704         /* if v is numeric and has no cached string, convert it to string */
705         if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
706                 fmt_num(buf, MAXVARFMT, getvar_s(V[CONVFMT]), v->number, TRUE);
707                 v->string = xstrdup(buf);
708                 v->type |= VF_CACHED;
709         }
710         return (v->string == NULL) ? "" : v->string;
711 }
712
713 static double getvar_i(var *v)
714 {
715         char *s;
716
717         if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
718                 v->number = 0;
719                 s = v->string;
720                 if (s && *s) {
721                         v->number = strtod(s, &s);
722                         if (v->type & VF_USER) {
723                                 skip_spaces(&s);
724                                 if (*s != '\0')
725                                         v->type &= ~VF_USER;
726                         }
727                 } else {
728                         v->type &= ~VF_USER;
729                 }
730                 v->type |= VF_CACHED;
731         }
732         return v->number;
733 }
734
735 static var *copyvar(var *dest, const var *src)
736 {
737         if (dest != src) {
738                 clrvar(dest);
739                 dest->type |= (src->type & ~VF_DONTTOUCH);
740                 dest->number = src->number;
741                 if (src->string)
742                         dest->string = xstrdup(src->string);
743         }
744         handle_special(dest);
745         return dest;
746 }
747
748 static var *incvar(var *v)
749 {
750         return setvar_i(v, getvar_i(v)+1.);
751 }
752
753 /* return true if v is number or numeric string */
754 static int is_numeric(var *v)
755 {
756         getvar_i(v);
757         return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
758 }
759
760 /* return 1 when value of v corresponds to true, 0 otherwise */
761 static int istrue(var *v)
762 {
763         if (is_numeric(v))
764                 return (v->number == 0) ? 0 : 1;
765         else
766                 return (v->string && *(v->string)) ? 1 : 0;
767 }
768
769 /* temporary variables allocator. Last allocated should be first freed */
770 static var *nvalloc(int n)
771 {
772         nvblock *pb = NULL;
773         var *v, *r;
774         int size;
775
776         while (cb) {
777                 pb = cb;
778                 if ((cb->pos - cb->nv) + n <= cb->size) break;
779                 cb = cb->next;
780         }
781
782         if (! cb) {
783                 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
784                 cb = (nvblock *)xmalloc(sizeof(nvblock) + size * sizeof(var));
785                 cb->size = size;
786                 cb->pos = cb->nv;
787                 cb->prev = pb;
788                 cb->next = NULL;
789                 if (pb) pb->next = cb;
790         }
791
792         v = r = cb->pos;
793         cb->pos += n;
794
795         while (v < cb->pos) {
796                 v->type = 0;
797                 v->string = NULL;
798                 v++;
799         }
800
801         return r;
802 }
803
804 static void nvfree(var *v)
805 {
806         var *p;
807
808         if (v < cb->nv || v >= cb->pos)
809                 runtime_error(EMSG_INTERNAL_ERROR);
810
811         for (p=v; p<cb->pos; p++) {
812                 if ((p->type & (VF_ARRAY|VF_CHILD)) == VF_ARRAY) {
813                         clear_array(iamarray(p));
814                         free(p->x.array->items);
815                         free(p->x.array);
816                 }
817                 if (p->type & VF_WALK)
818                         free(p->x.walker);
819
820                 clrvar(p);
821         }
822
823         cb->pos = v;
824         while (cb->prev && cb->pos == cb->nv) {
825                 cb = cb->prev;
826         }
827 }
828
829 /* ------- awk program text parsing ------- */
830
831 /* Parse next token pointed by global pos, place results into global t.
832  * If token isn't expected, give away. Return token class
833  */
834 static uint32_t next_token(uint32_t expected)
835 {
836         char *p, *pp, *s;
837         char *tl;
838         uint32_t tc;
839         const uint32_t *ti;
840         int l;
841         static int concat_inserted;
842         static uint32_t save_tclass, save_info;
843         static uint32_t ltclass = TC_OPTERM;
844
845         if (t.rollback) {
846
847                 t.rollback = FALSE;
848
849         } else if (concat_inserted) {
850
851                 concat_inserted = FALSE;
852                 t.tclass = save_tclass;
853                 t.info = save_info;
854
855         } else {
856
857                 p = pos;
858
859         readnext:
860                 skip_spaces(&p);
861                 lineno = t.lineno;
862                 if (*p == '#')
863                         while (*p != '\n' && *p != '\0') p++;
864
865                 if (*p == '\n')
866                         t.lineno++;
867
868                 if (*p == '\0') {
869                         tc = TC_EOF;
870
871                 } else if (*p == '\"') {
872                         /* it's a string */
873                         t.string = s = ++p;
874                         while (*p != '\"') {
875                                 if (*p == '\0' || *p == '\n')
876                                         syntax_error(EMSG_UNEXP_EOS);
877                                 *(s++) = nextchar(&p);
878                         }
879                         p++;
880                         *s = '\0';
881                         tc = TC_STRING;
882
883                 } else if ((expected & TC_REGEXP) && *p == '/') {
884                         /* it's regexp */
885                         t.string = s = ++p;
886                         while (*p != '/') {
887                                 if (*p == '\0' || *p == '\n')
888                                         syntax_error(EMSG_UNEXP_EOS);
889                                 if ((*s++ = *p++) == '\\') {
890                                         pp = p;
891                                         *(s-1) = bb_process_escape_sequence((const char **)&p);
892                                         if (*pp == '\\') *s++ = '\\';
893                                         if (p == pp) *s++ = *p++;
894                                 }
895                         }
896                         p++;
897                         *s = '\0';
898                         tc = TC_REGEXP;
899
900                 } else if (*p == '.' || isdigit(*p)) {
901                         /* it's a number */
902                         t.number = strtod(p, &p);
903                         if (*p == '.')
904                                 syntax_error(EMSG_UNEXP_TOKEN);
905                         tc = TC_NUMBER;
906
907                 } else {
908                         /* search for something known */
909                         tl = tokenlist;
910                         tc = 0x00000001;
911                         ti = tokeninfo;
912                         while (*tl) {
913                                 l = *(tl++);
914                                 if (l == NTCC) {
915                                         tc <<= 1;
916                                         continue;
917                                 }
918                                 /* if token class is expected, token
919                                  * matches and it's not a longer word,
920                                  * then this is what we are looking for
921                                  */
922                                 if ((tc & (expected | TC_WORD | TC_NEWLINE)) &&
923                                 *tl == *p && strncmp(p, tl, l) == 0 &&
924                                 !((tc & TC_WORD) && isalnum_(*(p + l)))) {
925                                         t.info = *ti;
926                                         p += l;
927                                         break;
928                                 }
929                                 ti++;
930                                 tl += l;
931                         }
932
933                         if (! *tl) {
934                                 /* it's a name (var/array/function),
935                                  * otherwise it's something wrong
936                                  */
937                                 if (! isalnum_(*p))
938                                         syntax_error(EMSG_UNEXP_TOKEN);
939
940                                 t.string = --p;
941                                 while(isalnum_(*(++p))) {
942                                         *(p-1) = *p;
943                                 }
944                                 *(p-1) = '\0';
945                                 tc = TC_VARIABLE;
946                                 /* also consume whitespace between functionname and bracket */
947                                 if (! (expected & TC_VARIABLE)) skip_spaces(&p);
948                                 if (*p == '(') {
949                                         tc = TC_FUNCTION;
950                                 } else {
951                                         if (*p == '[') {
952                                                 p++;
953                                                 tc = TC_ARRAY;
954                                         }
955                                 }
956                         }
957                 }
958                 pos = p;
959
960                 /* skipping newlines in some cases */
961                 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
962                         goto readnext;
963
964                 /* insert concatenation operator when needed */
965                 if ((ltclass&TC_CONCAT1) && (tc&TC_CONCAT2) && (expected&TC_BINOP)) {
966                         concat_inserted = TRUE;
967                         save_tclass = tc;
968                         save_info = t.info;
969                         tc = TC_BINOP;
970                         t.info = OC_CONCAT | SS | P(35);
971                 }
972
973                 t.tclass = tc;
974         }
975         ltclass = t.tclass;
976
977         /* Are we ready for this? */
978         if (! (ltclass & expected))
979                 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
980                                                                 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
981
982         return ltclass;
983 }
984
985 static void rollback_token(void) { t.rollback = TRUE; }
986
987 static node *new_node(uint32_t info)
988 {
989         node *n;
990
991         n = (node *)xzalloc(sizeof(node));
992         n->info = info;
993         n->lineno = lineno;
994         return n;
995 }
996
997 static node *mk_re_node(char *s, node *n, regex_t *re)
998 {
999         n->info = OC_REGEXP;
1000         n->l.re = re;
1001         n->r.ire = re + 1;
1002         xregcomp(re, s, REG_EXTENDED);
1003         xregcomp(re+1, s, REG_EXTENDED | REG_ICASE);
1004
1005         return n;
1006 }
1007
1008 static node *condition(void)
1009 {
1010         next_token(TC_SEQSTART);
1011         return parse_expr(TC_SEQTERM);
1012 }
1013
1014 /* parse expression terminated by given argument, return ptr
1015  * to built subtree. Terminator is eaten by parse_expr */
1016 static node *parse_expr(uint32_t iexp)
1017 {
1018         node sn;
1019         node *cn = &sn;
1020         node *vn, *glptr;
1021         uint32_t tc, xtc;
1022         var *v;
1023
1024         sn.info = PRIMASK;
1025         sn.r.n = glptr = NULL;
1026         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1027
1028         while (! ((tc = next_token(xtc)) & iexp)) {
1029                 if (glptr && (t.info == (OC_COMPARE|VV|P(39)|2))) {
1030                         /* input redirection (<) attached to glptr node */
1031                         cn = glptr->l.n = new_node(OC_CONCAT|SS|P(37));
1032                         cn->a.n = glptr;
1033                         xtc = TC_OPERAND | TC_UOPPRE;
1034                         glptr = NULL;
1035
1036                 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1037                         /* for binary and postfix-unary operators, jump back over
1038                          * previous operators with higher priority */
1039                         vn = cn;
1040                         while ( ((t.info & PRIMASK) > (vn->a.n->info & PRIMASK2)) ||
1041                           ((t.info == vn->info) && ((t.info & OPCLSMASK) == OC_COLON)) )
1042                                 vn = vn->a.n;
1043                         if ((t.info & OPCLSMASK) == OC_TERNARY)
1044                                 t.info += P(6);
1045                         cn = vn->a.n->r.n = new_node(t.info);
1046                         cn->a.n = vn->a.n;
1047                         if (tc & TC_BINOP) {
1048                                 cn->l.n = vn;
1049                                 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1050                                 if ((t.info & OPCLSMASK) == OC_PGETLINE) {
1051                                         /* it's a pipe */
1052                                         next_token(TC_GETLINE);
1053                                         /* give maximum priority to this pipe */
1054                                         cn->info &= ~PRIMASK;
1055                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1056                                 }
1057                         } else {
1058                                 cn->r.n = vn;
1059                                 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1060                         }
1061                         vn->a.n = cn;
1062
1063                 } else {
1064                         /* for operands and prefix-unary operators, attach them
1065                          * to last node */
1066                         vn = cn;
1067                         cn = vn->r.n = new_node(t.info);
1068                         cn->a.n = vn;
1069                         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1070                         if (tc & (TC_OPERAND | TC_REGEXP)) {
1071                                 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1072                                 /* one should be very careful with switch on tclass -
1073                                  * only simple tclasses should be used! */
1074                                 switch (tc) {
1075                                   case TC_VARIABLE:
1076                                   case TC_ARRAY:
1077                                         cn->info = OC_VAR;
1078                                         if ((v = hash_search(ahash, t.string)) != NULL) {
1079                                                 cn->info = OC_FNARG;
1080                                                 cn->l.i = v->x.aidx;
1081                                         } else {
1082                                                 cn->l.v = newvar(t.string);
1083                                         }
1084                                         if (tc & TC_ARRAY) {
1085                                                 cn->info |= xS;
1086                                                 cn->r.n = parse_expr(TC_ARRTERM);
1087                                         }
1088                                         break;
1089
1090                                   case TC_NUMBER:
1091                                   case TC_STRING:
1092                                         cn->info = OC_VAR;
1093                                         v = cn->l.v = xzalloc(sizeof(var));
1094                                         if (tc & TC_NUMBER)
1095                                                 setvar_i(v, t.number);
1096                                         else
1097                                                 setvar_s(v, t.string);
1098                                         break;
1099
1100                                   case TC_REGEXP:
1101                                         mk_re_node(t.string, cn,
1102                                                                         (regex_t *)xzalloc(sizeof(regex_t)*2));
1103                                         break;
1104
1105                                   case TC_FUNCTION:
1106                                         cn->info = OC_FUNC;
1107                                         cn->r.f = newfunc(t.string);
1108                                         cn->l.n = condition();
1109                                         break;
1110
1111                                   case TC_SEQSTART:
1112                                         cn = vn->r.n = parse_expr(TC_SEQTERM);
1113                                         cn->a.n = vn;
1114                                         break;
1115
1116                                   case TC_GETLINE:
1117                                         glptr = cn;
1118                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1119                                         break;
1120
1121                                   case TC_BUILTIN:
1122                                         cn->l.n = condition();
1123                                         break;
1124                                 }
1125                         }
1126                 }
1127         }
1128         return sn.r.n;
1129 }
1130
1131 /* add node to chain. Return ptr to alloc'd node */
1132 static node *chain_node(uint32_t info)
1133 {
1134         node *n;
1135
1136         if (! seq->first)
1137                 seq->first = seq->last = new_node(0);
1138
1139         if (seq->programname != programname) {
1140                 seq->programname = programname;
1141                 n = chain_node(OC_NEWSOURCE);
1142                 n->l.s = xstrdup(programname);
1143         }
1144
1145         n = seq->last;
1146         n->info = info;
1147         seq->last = n->a.n = new_node(OC_DONE);
1148
1149         return n;
1150 }
1151
1152 static void chain_expr(uint32_t info)
1153 {
1154         node *n;
1155
1156         n = chain_node(info);
1157         n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1158         if (t.tclass & TC_GRPTERM)
1159                 rollback_token();
1160 }
1161
1162 static node *chain_loop(node *nn)
1163 {
1164         node *n, *n2, *save_brk, *save_cont;
1165
1166         save_brk = break_ptr;
1167         save_cont = continue_ptr;
1168
1169         n = chain_node(OC_BR | Vx);
1170         continue_ptr = new_node(OC_EXEC);
1171         break_ptr = new_node(OC_EXEC);
1172         chain_group();
1173         n2 = chain_node(OC_EXEC | Vx);
1174         n2->l.n = nn;
1175         n2->a.n = n;
1176         continue_ptr->a.n = n2;
1177         break_ptr->a.n = n->r.n = seq->last;
1178
1179         continue_ptr = save_cont;
1180         break_ptr = save_brk;
1181
1182         return n;
1183 }
1184
1185 /* parse group and attach it to chain */
1186 static void chain_group(void)
1187 {
1188         uint32_t c;
1189         node *n, *n2, *n3;
1190
1191         do {
1192                 c = next_token(TC_GRPSEQ);
1193         } while (c & TC_NEWLINE);
1194
1195         if (c & TC_GRPSTART) {
1196                 while(next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1197                         if (t.tclass & TC_NEWLINE) continue;
1198                         rollback_token();
1199                         chain_group();
1200                 }
1201         } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1202                 rollback_token();
1203                 chain_expr(OC_EXEC | Vx);
1204         } else {                                                /* TC_STATEMNT */
1205                 switch (t.info & OPCLSMASK) {
1206                         case ST_IF:
1207                                 n = chain_node(OC_BR | Vx);
1208                                 n->l.n = condition();
1209                                 chain_group();
1210                                 n2 = chain_node(OC_EXEC);
1211                                 n->r.n = seq->last;
1212                                 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE)==TC_ELSE) {
1213                                         chain_group();
1214                                         n2->a.n = seq->last;
1215                                 } else {
1216                                         rollback_token();
1217                                 }
1218                                 break;
1219
1220                         case ST_WHILE:
1221                                 n2 = condition();
1222                                 n = chain_loop(NULL);
1223                                 n->l.n = n2;
1224                                 break;
1225
1226                         case ST_DO:
1227                                 n2 = chain_node(OC_EXEC);
1228                                 n = chain_loop(NULL);
1229                                 n2->a.n = n->a.n;
1230                                 next_token(TC_WHILE);
1231                                 n->l.n = condition();
1232                                 break;
1233
1234                         case ST_FOR:
1235                                 next_token(TC_SEQSTART);
1236                                 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1237                                 if (t.tclass & TC_SEQTERM) {                            /* for-in */
1238                                         if ((n2->info & OPCLSMASK) != OC_IN)
1239                                                 syntax_error(EMSG_UNEXP_TOKEN);
1240                                         n = chain_node(OC_WALKINIT | VV);
1241                                         n->l.n = n2->l.n;
1242                                         n->r.n = n2->r.n;
1243                                         n = chain_loop(NULL);
1244                                         n->info = OC_WALKNEXT | Vx;
1245                                         n->l.n = n2->l.n;
1246                                 } else {                                                                        /* for(;;) */
1247                                         n = chain_node(OC_EXEC | Vx);
1248                                         n->l.n = n2;
1249                                         n2 = parse_expr(TC_SEMICOL);
1250                                         n3 = parse_expr(TC_SEQTERM);
1251                                         n = chain_loop(n3);
1252                                         n->l.n = n2;
1253                                         if (! n2)
1254                                                 n->info = OC_EXEC;
1255                                 }
1256                                 break;
1257
1258                         case OC_PRINT:
1259                         case OC_PRINTF:
1260                                 n = chain_node(t.info);
1261                                 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1262                                 if (t.tclass & TC_OUTRDR) {
1263                                         n->info |= t.info;
1264                                         n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1265                                 }
1266                                 if (t.tclass & TC_GRPTERM)
1267                                         rollback_token();
1268                                 break;
1269
1270                         case OC_BREAK:
1271                                 n = chain_node(OC_EXEC);
1272                                 n->a.n = break_ptr;
1273                                 break;
1274
1275                         case OC_CONTINUE:
1276                                 n = chain_node(OC_EXEC);
1277                                 n->a.n = continue_ptr;
1278                                 break;
1279
1280                         /* delete, next, nextfile, return, exit */
1281                         default:
1282                                 chain_expr(t.info);
1283
1284                 }
1285         }
1286 }
1287
1288 static void parse_program(char *p)
1289 {
1290         uint32_t tclass;
1291         node *cn;
1292         func *f;
1293         var *v;
1294
1295         pos = p;
1296         t.lineno = 1;
1297         while((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1298                                 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1299
1300                 if (tclass & TC_OPTERM)
1301                         continue;
1302
1303                 seq = &mainseq;
1304                 if (tclass & TC_BEGIN) {
1305                         seq = &beginseq;
1306                         chain_group();
1307
1308                 } else if (tclass & TC_END) {
1309                         seq = &endseq;
1310                         chain_group();
1311
1312                 } else if (tclass & TC_FUNCDECL) {
1313                         next_token(TC_FUNCTION);
1314                         pos++;
1315                         f = newfunc(t.string);
1316                         f->body.first = NULL;
1317                         f->nargs = 0;
1318                         while(next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1319                                 v = findvar(ahash, t.string);
1320                                 v->x.aidx = (f->nargs)++;
1321
1322                                 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1323                                         break;
1324                         }
1325                         seq = &(f->body);
1326                         chain_group();
1327                         clear_array(ahash);
1328
1329                 } else if (tclass & TC_OPSEQ) {
1330                         rollback_token();
1331                         cn = chain_node(OC_TEST);
1332                         cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1333                         if (t.tclass & TC_GRPSTART) {
1334                                 rollback_token();
1335                                 chain_group();
1336                         } else {
1337                                 chain_node(OC_PRINT);
1338                         }
1339                         cn->r.n = mainseq.last;
1340
1341                 } else /* if (tclass & TC_GRPSTART) */ {
1342                         rollback_token();
1343                         chain_group();
1344                 }
1345         }
1346 }
1347
1348
1349 /* -------- program execution part -------- */
1350
1351 static node *mk_splitter(char *s, tsplitter *spl)
1352 {
1353         regex_t *re, *ire;
1354         node *n;
1355
1356         re = &spl->re[0];
1357         ire = &spl->re[1];
1358         n = &spl->n;
1359         if ((n->info && OPCLSMASK) == OC_REGEXP) {
1360                 regfree(re);
1361                 regfree(ire);
1362         }
1363         if (strlen(s) > 1) {
1364                 mk_re_node(s, n, re);
1365         } else {
1366                 n->info = (uint32_t) *s;
1367         }
1368
1369         return n;
1370 }
1371
1372 /* use node as a regular expression. Supplied with node ptr and regex_t
1373  * storage space. Return ptr to regex (if result points to preg, it should
1374  * be later regfree'd manually
1375  */
1376 static regex_t *as_regex(node *op, regex_t *preg)
1377 {
1378         var *v;
1379         char *s;
1380
1381         if ((op->info & OPCLSMASK) == OC_REGEXP) {
1382                 return icase ? op->r.ire : op->l.re;
1383         } else {
1384                 v = nvalloc(1);
1385                 s = getvar_s(evaluate(op, v));
1386                 xregcomp(preg, s, icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED);
1387                 nvfree(v);
1388                 return preg;
1389         }
1390 }
1391
1392 /* gradually increasing buffer */
1393 static void qrealloc(char **b, int n, int *size)
1394 {
1395         if (! *b || n >= *size)
1396                 *b = xrealloc(*b, *size = n + (n>>1) + 80);
1397 }
1398
1399 /* resize field storage space */
1400 static void fsrealloc(int size)
1401 {
1402         static int maxfields = 0;
1403         int i;
1404
1405         if (size >= maxfields) {
1406                 i = maxfields;
1407                 maxfields = size + 16;
1408                 Fields = (var *)xrealloc(Fields, maxfields * sizeof(var));
1409                 for (; i<maxfields; i++) {
1410                         Fields[i].type = VF_SPECIAL;
1411                         Fields[i].string = NULL;
1412                 }
1413         }
1414
1415         if (size < nfields) {
1416                 for (i=size; i<nfields; i++) {
1417                         clrvar(Fields+i);
1418                 }
1419         }
1420         nfields = size;
1421 }
1422
1423 static int awk_split(char *s, node *spl, char **slist)
1424 {
1425         int l, n=0;
1426         char c[4];
1427         char *s1;
1428         regmatch_t pmatch[2];
1429
1430         /* in worst case, each char would be a separate field */
1431         *slist = s1 = xstrndup(s, strlen(s) * 2 + 3);
1432
1433         c[0] = c[1] = (char)spl->info;
1434         c[2] = c[3] = '\0';
1435         if (*getvar_s(V[RS]) == '\0') c[2] = '\n';
1436
1437         if ((spl->info & OPCLSMASK) == OC_REGEXP) {             /* regex split */
1438                 while (*s) {
1439                         l = strcspn(s, c+2);
1440                         if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0 &&
1441                         pmatch[0].rm_so <= l) {
1442                                 l = pmatch[0].rm_so;
1443                                 if (pmatch[0].rm_eo == 0) { l++; pmatch[0].rm_eo++; }
1444                         } else {
1445                                 pmatch[0].rm_eo = l;
1446                                 if (*(s+l)) pmatch[0].rm_eo++;
1447                         }
1448
1449                         memcpy(s1, s, l);
1450                         *(s1+l) = '\0';
1451                         nextword(&s1);
1452                         s += pmatch[0].rm_eo;
1453                         n++;
1454                 }
1455         } else if (c[0] == '\0') {              /* null split */
1456                 while(*s) {
1457                         *(s1++) = *(s++);
1458                         *(s1++) = '\0';
1459                         n++;
1460                 }
1461         } else if (c[0] != ' ') {               /* single-character split */
1462                 if (icase) {
1463                         c[0] = toupper(c[0]);
1464                         c[1] = tolower(c[1]);
1465                 }
1466                 if (*s1) n++;
1467                 while ((s1 = strpbrk(s1, c))) {
1468                         *(s1++) = '\0';
1469                         n++;
1470                 }
1471         } else {                                /* space split */
1472                 while (*s) {
1473                         while (isspace(*s)) s++;
1474                         if (! *s) break;
1475                         n++;
1476                         while (*s && !isspace(*s))
1477                                 *(s1++) = *(s++);
1478                         *(s1++) = '\0';
1479                 }
1480         }
1481         return n;
1482 }
1483
1484 static void split_f0(void)
1485 {
1486         static char *fstrings = NULL;
1487         int i, n;
1488         char *s;
1489
1490         if (is_f0_split)
1491                 return;
1492
1493         is_f0_split = TRUE;
1494         free(fstrings);
1495         fsrealloc(0);
1496         n = awk_split(getvar_s(V[F0]), &fsplitter.n, &fstrings);
1497         fsrealloc(n);
1498         s = fstrings;
1499         for (i=0; i<n; i++) {
1500                 Fields[i].string = nextword(&s);
1501                 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1502         }
1503
1504         /* set NF manually to avoid side effects */
1505         clrvar(V[NF]);
1506         V[NF]->type = VF_NUMBER | VF_SPECIAL;
1507         V[NF]->number = nfields;
1508 }
1509
1510 /* perform additional actions when some internal variables changed */
1511 static void handle_special(var *v)
1512 {
1513         int n;
1514         char *b, *sep, *s;
1515         int sl, l, len, i, bsize;
1516
1517         if (! (v->type & VF_SPECIAL))
1518                 return;
1519
1520         if (v == V[NF]) {
1521                 n = (int)getvar_i(v);
1522                 fsrealloc(n);
1523
1524                 /* recalculate $0 */
1525                 sep = getvar_s(V[OFS]);
1526                 sl = strlen(sep);
1527                 b = NULL;
1528                 len = 0;
1529                 for (i=0; i<n; i++) {
1530                         s = getvar_s(&Fields[i]);
1531                         l = strlen(s);
1532                         if (b) {
1533                                 memcpy(b+len, sep, sl);
1534                                 len += sl;
1535                         }
1536                         qrealloc(&b, len+l+sl, &bsize);
1537                         memcpy(b+len, s, l);
1538                         len += l;
1539                 }
1540                 if (b) b[len] = '\0';
1541                 setvar_p(V[F0], b);
1542                 is_f0_split = TRUE;
1543
1544         } else if (v == V[F0]) {
1545                 is_f0_split = FALSE;
1546
1547         } else if (v == V[FS]) {
1548                 mk_splitter(getvar_s(v), &fsplitter);
1549
1550         } else if (v == V[RS]) {
1551                 mk_splitter(getvar_s(v), &rsplitter);
1552
1553         } else if (v == V[IGNORECASE]) {
1554                 icase = istrue(v);
1555
1556         } else {                                                /* $n */
1557                 n = getvar_i(V[NF]);
1558                 setvar_i(V[NF], n > v-Fields ? n : v-Fields+1);
1559                 /* right here v is invalid. Just to note... */
1560         }
1561 }
1562
1563 /* step through func/builtin/etc arguments */
1564 static node *nextarg(node **pn)
1565 {
1566         node *n;
1567
1568         n = *pn;
1569         if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1570                 *pn = n->r.n;
1571                 n = n->l.n;
1572         } else {
1573                 *pn = NULL;
1574         }
1575         return n;
1576 }
1577
1578 static void hashwalk_init(var *v, xhash *array)
1579 {
1580         char **w;
1581         hash_item *hi;
1582         int i;
1583
1584         if (v->type & VF_WALK)
1585                 free(v->x.walker);
1586
1587         v->type |= VF_WALK;
1588         w = v->x.walker = (char **)xzalloc(2 + 2*sizeof(char *) + array->glen);
1589         *w = *(w+1) = (char *)(w + 2);
1590         for (i=0; i<array->csize; i++) {
1591                 hi = array->items[i];
1592                 while(hi) {
1593                         strcpy(*w, hi->name);
1594                         nextword(w);
1595                         hi = hi->next;
1596                 }
1597         }
1598 }
1599
1600 static int hashwalk_next(var *v)
1601 {
1602         char **w;
1603
1604         w = v->x.walker;
1605         if (*(w+1) == *w)
1606                 return FALSE;
1607
1608         setvar_s(v, nextword(w+1));
1609         return TRUE;
1610 }
1611
1612 /* evaluate node, return 1 when result is true, 0 otherwise */
1613 static int ptest(node *pattern)
1614 {
1615         static var v;
1616         return istrue(evaluate(pattern, &v));
1617 }
1618
1619 /* read next record from stream rsm into a variable v */
1620 static int awk_getline(rstream *rsm, var *v)
1621 {
1622         char *b;
1623         regmatch_t pmatch[2];
1624         int a, p, pp=0, size;
1625         int fd, so, eo, r, rp;
1626         char c, *m, *s;
1627
1628         /* we're using our own buffer since we need access to accumulating
1629          * characters
1630          */
1631         fd = fileno(rsm->F);
1632         m = rsm->buffer;
1633         a = rsm->adv;
1634         p = rsm->pos;
1635         size = rsm->size;
1636         c = (char) rsplitter.n.info;
1637         rp = 0;
1638
1639         if (! m) qrealloc(&m, 256, &size);
1640         do {
1641                 b = m + a;
1642                 so = eo = p;
1643                 r = 1;
1644                 if (p > 0) {
1645                         if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1646                                 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1647                                                                                                 b, 1, pmatch, 0) == 0) {
1648                                         so = pmatch[0].rm_so;
1649                                         eo = pmatch[0].rm_eo;
1650                                         if (b[eo] != '\0')
1651                                                 break;
1652                                 }
1653                         } else if (c != '\0') {
1654                                 s = strchr(b+pp, c);
1655                                 if (! s) s = memchr(b+pp, '\0', p - pp);
1656                                 if (s) {
1657                                         so = eo = s-b;
1658                                         eo++;
1659                                         break;
1660                                 }
1661                         } else {
1662                                 while (b[rp] == '\n')
1663                                         rp++;
1664                                 s = strstr(b+rp, "\n\n");
1665                                 if (s) {
1666                                         so = eo = s-b;
1667                                         while (b[eo] == '\n') eo++;
1668                                         if (b[eo] != '\0')
1669                                                 break;
1670                                 }
1671                         }
1672                 }
1673
1674                 if (a > 0) {
1675                         memmove(m, (const void *)(m+a), p+1);
1676                         b = m;
1677                         a = 0;
1678                 }
1679
1680                 qrealloc(&m, a+p+128, &size);
1681                 b = m + a;
1682                 pp = p;
1683                 p += safe_read(fd, b+p, size-p-1);
1684                 if (p < pp) {
1685                         p = 0;
1686                         r = 0;
1687                         setvar_i(V[ERRNO], errno);
1688                 }
1689                 b[p] = '\0';
1690
1691         } while (p > pp);
1692
1693         if (p == 0) {
1694                 r--;
1695         } else {
1696                 c = b[so]; b[so] = '\0';
1697                 setvar_s(v, b+rp);
1698                 v->type |= VF_USER;
1699                 b[so] = c;
1700                 c = b[eo]; b[eo] = '\0';
1701                 setvar_s(V[RT], b+so);
1702                 b[eo] = c;
1703         }
1704
1705         rsm->buffer = m;
1706         rsm->adv = a + eo;
1707         rsm->pos = p - eo;
1708         rsm->size = size;
1709
1710         return r;
1711 }
1712
1713 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1714 {
1715         int r=0;
1716         char c;
1717         const char *s=format;
1718
1719         if (int_as_int && n == (int)n) {
1720                 r = snprintf(b, size, "%d", (int)n);
1721         } else {
1722                 do { c = *s; } while (*s && *++s);
1723                 if (strchr("diouxX", c)) {
1724                         r = snprintf(b, size, format, (int)n);
1725                 } else if (strchr("eEfgG", c)) {
1726                         r = snprintf(b, size, format, n);
1727                 } else {
1728                         runtime_error(EMSG_INV_FMT);
1729                 }
1730         }
1731         return r;
1732 }
1733
1734
1735 /* formatted output into an allocated buffer, return ptr to buffer */
1736 static char *awk_printf(node *n)
1737 {
1738         char *b = NULL;
1739         char *fmt, *s, *s1, *f;
1740         int i, j, incr, bsize;
1741         char c, c1;
1742         var *v, *arg;
1743
1744         v = nvalloc(1);
1745         fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1746
1747         i = 0;
1748         while (*f) {
1749                 s = f;
1750                 while (*f && (*f != '%' || *(++f) == '%'))
1751                         f++;
1752                 while (*f && !isalpha(*f))
1753                         f++;
1754
1755                 incr = (f - s) + MAXVARFMT;
1756                 qrealloc(&b, incr+i, &bsize);
1757                 c = *f; if (c != '\0') f++;
1758                 c1 = *f ; *f = '\0';
1759                 arg = evaluate(nextarg(&n), v);
1760
1761                 j = i;
1762                 if (c == 'c' || !c) {
1763                         i += sprintf(b+i, s,
1764                                         is_numeric(arg) ? (char)getvar_i(arg) : *getvar_s(arg));
1765
1766                 } else if (c == 's') {
1767                         s1 = getvar_s(arg);
1768                         qrealloc(&b, incr+i+strlen(s1), &bsize);
1769                         i += sprintf(b+i, s, s1);
1770
1771                 } else {
1772                         i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1773                 }
1774                 *f = c1;
1775
1776                 /* if there was an error while sprintf, return value is negative */
1777                 if (i < j) i = j;
1778
1779         }
1780
1781         b = xrealloc(b, i+1);
1782         free(fmt);
1783         nvfree(v);
1784         b[i] = '\0';
1785         return b;
1786 }
1787
1788 /* common substitution routine
1789  * replace (nm) substring of (src) that match (n) with (repl), store
1790  * result into (dest), return number of substitutions. If nm=0, replace
1791  * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1792  * subexpression matching (\1-\9)
1793  */
1794 static int awk_sub(node *rn, char *repl, int nm, var *src, var *dest, int ex)
1795 {
1796         char *ds = NULL;
1797         char *sp, *s;
1798         int c, i, j, di, rl, so, eo, nbs, n, dssize;
1799         regmatch_t pmatch[10];
1800         regex_t sreg, *re;
1801
1802         re = as_regex(rn, &sreg);
1803         if (! src) src = V[F0];
1804         if (! dest) dest = V[F0];
1805
1806         i = di = 0;
1807         sp = getvar_s(src);
1808         rl = strlen(repl);
1809         while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0:REG_NOTBOL) == 0) {
1810                 so = pmatch[0].rm_so;
1811                 eo = pmatch[0].rm_eo;
1812
1813                 qrealloc(&ds, di + eo + rl, &dssize);
1814                 memcpy(ds + di, sp, eo);
1815                 di += eo;
1816                 if (++i >= nm) {
1817                         /* replace */
1818                         di -= (eo - so);
1819                         nbs = 0;
1820                         for (s = repl; *s; s++) {
1821                                 ds[di++] = c = *s;
1822                                 if (c == '\\') {
1823                                         nbs++;
1824                                         continue;
1825                                 }
1826                                 if (c == '&' || (ex && c >= '0' && c <= '9')) {
1827                                         di -= ((nbs + 3) >> 1);
1828                                         j = 0;
1829                                         if (c != '&') {
1830                                                 j = c - '0';
1831                                                 nbs++;
1832                                         }
1833                                         if (nbs % 2) {
1834                                                 ds[di++] = c;
1835                                         } else {
1836                                                 n = pmatch[j].rm_eo - pmatch[j].rm_so;
1837                                                 qrealloc(&ds, di + rl + n, &dssize);
1838                                                 memcpy(ds + di, sp + pmatch[j].rm_so, n);
1839                                                 di += n;
1840                                         }
1841                                 }
1842                                 nbs = 0;
1843                         }
1844                 }
1845
1846                 sp += eo;
1847                 if (i == nm) break;
1848                 if (eo == so) {
1849                         if (! (ds[di++] = *sp++)) break;
1850                 }
1851         }
1852
1853         qrealloc(&ds, di + strlen(sp), &dssize);
1854         strcpy(ds + di, sp);
1855         setvar_p(dest, ds);
1856         if (re == &sreg) regfree(re);
1857         return i;
1858 }
1859
1860 static var *exec_builtin(node *op, var *res)
1861 {
1862         int (*to_xxx)(int);
1863         var *tv;
1864         node *an[4];
1865         var  *av[4];
1866         char *as[4];
1867         regmatch_t pmatch[2];
1868         regex_t sreg, *re;
1869         static tsplitter tspl;
1870         node *spl;
1871         uint32_t isr, info;
1872         int nargs;
1873         time_t tt;
1874         char *s, *s1;
1875         int i, l, ll, n;
1876
1877         tv = nvalloc(4);
1878         isr = info = op->info;
1879         op = op->l.n;
1880
1881         av[2] = av[3] = NULL;
1882         for (i=0 ; i<4 && op ; i++) {
1883                 an[i] = nextarg(&op);
1884                 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
1885                 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
1886                 isr >>= 1;
1887         }
1888
1889         nargs = i;
1890         if (nargs < (info >> 30))
1891                 runtime_error(EMSG_TOO_FEW_ARGS);
1892
1893         switch (info & OPNMASK) {
1894
1895           case B_a2:
1896 #ifdef CONFIG_FEATURE_AWK_MATH
1897                 setvar_i(res, atan2(getvar_i(av[i]), getvar_i(av[1])));
1898 #else
1899                 runtime_error(EMSG_NO_MATH);
1900 #endif
1901                 break;
1902
1903           case B_sp:
1904                 if (nargs > 2) {
1905                         spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
1906                                 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
1907                 } else {
1908                         spl = &fsplitter.n;
1909                 }
1910
1911                 n = awk_split(as[0], spl, &s);
1912                 s1 = s;
1913                 clear_array(iamarray(av[1]));
1914                 for (i=1; i<=n; i++)
1915                         setari_u(av[1], i, nextword(&s1));
1916                 free(s);
1917                 setvar_i(res, n);
1918                 break;
1919
1920           case B_ss:
1921                 l = strlen(as[0]);
1922                 i = getvar_i(av[1]) - 1;
1923                 if (i>l) i=l; if (i<0) i=0;
1924                 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
1925                 if (n<0) n=0;
1926                 s = xmalloc(n+1);
1927                 strncpy(s, as[0]+i, n);
1928                 s[n] = '\0';
1929                 setvar_p(res, s);
1930                 break;
1931                 
1932          case B_an:
1933                 setvar_i(res, (long)getvar_i(av[0]) & (long)getvar_i(av[1]));
1934                 break;
1935                 
1936          case B_co:
1937                 setvar_i(res, ~(long)getvar_i(av[0]));
1938                 break;
1939
1940          case B_ls:
1941                 setvar_i(res, (long)getvar_i(av[0]) << (long)getvar_i(av[1]));
1942                 break;
1943
1944          case B_or:
1945                 setvar_i(res, (long)getvar_i(av[0]) | (long)getvar_i(av[1]));
1946                 break;
1947
1948          case B_rs:
1949                 setvar_i(res, (long)((unsigned long)getvar_i(av[0]) >> (unsigned long)getvar_i(av[1])));
1950                 break;
1951
1952          case B_xo:
1953                 setvar_i(res, (long)getvar_i(av[0]) ^ (long)getvar_i(av[1]));
1954                 break;
1955
1956           case B_lo:
1957                 to_xxx = tolower;
1958                 goto lo_cont;
1959
1960           case B_up:
1961                 to_xxx = toupper;
1962 lo_cont:
1963                 s1 = s = xstrdup(as[0]);
1964                 while (*s1) {
1965                         *s1 = (*to_xxx)(*s1);
1966                         s1++;
1967                 }
1968                 setvar_p(res, s);
1969                 break;
1970
1971           case B_ix:
1972                 n = 0;
1973                 ll = strlen(as[1]);
1974                 l = strlen(as[0]) - ll;
1975                 if (ll > 0 && l >= 0) {
1976                         if (! icase) {
1977                                 s = strstr(as[0], as[1]);
1978                                 if (s) n = (s - as[0]) + 1;
1979                         } else {
1980                                 /* this piece of code is terribly slow and
1981                                  * really should be rewritten
1982                                  */
1983                                 for (i=0; i<=l; i++) {
1984                                         if (strncasecmp(as[0]+i, as[1], ll) == 0) {
1985                                                 n = i+1;
1986                                                 break;
1987                                         }
1988                                 }
1989                         }
1990                 }
1991                 setvar_i(res, n);
1992                 break;
1993
1994           case B_ti:
1995                 if (nargs > 1)
1996                         tt = getvar_i(av[1]);
1997                 else
1998                         time(&tt);
1999                 s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2000                 i = strftime(buf, MAXVARFMT, s, localtime(&tt));
2001                 buf[i] = '\0';
2002                 setvar_s(res, buf);
2003                 break;
2004
2005           case B_ma:
2006                 re = as_regex(an[1], &sreg);
2007                 n = regexec(re, as[0], 1, pmatch, 0);
2008                 if (n == 0) {
2009                         pmatch[0].rm_so++;
2010                         pmatch[0].rm_eo++;
2011                 } else {
2012                         pmatch[0].rm_so = 0;
2013                         pmatch[0].rm_eo = -1;
2014                 }
2015                 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2016                 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2017                 setvar_i(res, pmatch[0].rm_so);
2018                 if (re == &sreg) regfree(re);
2019                 break;
2020
2021           case B_ge:
2022                 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2023                 break;
2024
2025           case B_gs:
2026                 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2027                 break;
2028
2029           case B_su:
2030                 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2031                 break;
2032         }
2033
2034         nvfree(tv);
2035         return res;
2036 }
2037
2038 /*
2039  * Evaluate node - the heart of the program. Supplied with subtree
2040  * and place where to store result. returns ptr to result.
2041  */
2042 #define XC(n) ((n) >> 8)
2043
2044 static var *evaluate(node *op, var *res)
2045 {
2046         /* This procedure is recursive so we should count every byte */
2047         static var *fnargs = NULL;
2048         static unsigned int seed = 1;
2049         static regex_t sreg;
2050         node *op1;
2051         var *v1;
2052         union {
2053                 var *v;
2054                 char *s;
2055                 double d;
2056                 int i;
2057         } L, R;
2058         uint32_t opinfo;
2059         short opn;
2060         union {
2061                 char *s;
2062                 rstream *rsm;
2063                 FILE *F;
2064                 var *v;
2065                 regex_t *re;
2066                 uint32_t info;
2067         } X;
2068
2069         if (! op)
2070                 return setvar_s(res, NULL);
2071
2072         v1 = nvalloc(2);
2073
2074         while (op) {
2075
2076                 opinfo = op->info;
2077                 opn = (short)(opinfo & OPNMASK);
2078                 lineno = op->lineno;
2079
2080                 /* execute inevitable things */
2081                 op1 = op->l.n;
2082                 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2083                 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2084                 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2085                 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2086                 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2087
2088                 switch (XC(opinfo & OPCLSMASK)) {
2089
2090                   /* -- iterative node type -- */
2091
2092                   /* test pattern */
2093                   case XC( OC_TEST ):
2094                         if ((op1->info & OPCLSMASK) == OC_COMMA) {
2095                                 /* it's range pattern */
2096                                 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2097                                         op->info |= OF_CHECKED;
2098                                         if (ptest(op1->r.n))
2099                                                 op->info &= ~OF_CHECKED;
2100
2101                                         op = op->a.n;
2102                                 } else {
2103                                         op = op->r.n;
2104                                 }
2105                         } else {
2106                                 op = (ptest(op1)) ? op->a.n : op->r.n;
2107                         }
2108                         break;
2109
2110                   /* just evaluate an expression, also used as unconditional jump */
2111                   case XC( OC_EXEC ):
2112                         break;
2113
2114                   /* branch, used in if-else and various loops */
2115                   case XC( OC_BR ):
2116                         op = istrue(L.v) ? op->a.n : op->r.n;
2117                         break;
2118
2119                   /* initialize for-in loop */
2120                   case XC( OC_WALKINIT ):
2121                         hashwalk_init(L.v, iamarray(R.v));
2122                         break;
2123
2124                   /* get next array item */
2125                   case XC( OC_WALKNEXT ):
2126                         op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2127                         break;
2128
2129                   case XC( OC_PRINT ):
2130                   case XC( OC_PRINTF ):
2131                         X.F = stdout;
2132                         if (op->r.n) {
2133                                 X.rsm = newfile(R.s);
2134                                 if (! X.rsm->F) {
2135                                         if (opn == '|') {
2136                                                 if((X.rsm->F = popen(R.s, "w")) == NULL)
2137                                                         bb_perror_msg_and_die("popen");
2138                                                 X.rsm->is_pipe = 1;
2139                                         } else {
2140                                                 X.rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2141                                         }
2142                                 }
2143                                 X.F = X.rsm->F;
2144                         }
2145
2146                         if ((opinfo & OPCLSMASK) == OC_PRINT) {
2147                                 if (! op1) {
2148                                         fputs(getvar_s(V[F0]), X.F);
2149                                 } else {
2150                                         while (op1) {
2151                                                 L.v = evaluate(nextarg(&op1), v1);
2152                                                 if (L.v->type & VF_NUMBER) {
2153                                                         fmt_num(buf, MAXVARFMT, getvar_s(V[OFMT]),
2154                                                                                                                 getvar_i(L.v), TRUE);
2155                                                         fputs(buf, X.F);
2156                                                 } else {
2157                                                         fputs(getvar_s(L.v), X.F);
2158                                                 }
2159
2160                                                 if (op1) fputs(getvar_s(V[OFS]), X.F);
2161                                         }
2162                                 }
2163                                 fputs(getvar_s(V[ORS]), X.F);
2164
2165                         } else {        /* OC_PRINTF */
2166                                 L.s = awk_printf(op1);
2167                                 fputs(L.s, X.F);
2168                                 free(L.s);
2169                         }
2170                         fflush(X.F);
2171                         break;
2172
2173                   case XC( OC_DELETE ):
2174                         X.info = op1->info & OPCLSMASK;
2175                         if (X.info == OC_VAR) {
2176                                 R.v = op1->l.v;
2177                         } else if (X.info == OC_FNARG) {
2178                                 R.v = &fnargs[op1->l.i];
2179                         } else {
2180                                 runtime_error(EMSG_NOT_ARRAY);
2181                         }
2182
2183                         if (op1->r.n) {
2184                                 clrvar(L.v);
2185                                 L.s = getvar_s(evaluate(op1->r.n, v1));
2186                                 hash_remove(iamarray(R.v), L.s);
2187                         } else {
2188                                 clear_array(iamarray(R.v));
2189                         }
2190                         break;
2191
2192                   case XC( OC_NEWSOURCE ):
2193                         programname = op->l.s;
2194                         break;
2195
2196                   case XC( OC_RETURN ):
2197                         copyvar(res, L.v);
2198                         break;
2199
2200                   case XC( OC_NEXTFILE ):
2201                         nextfile = TRUE;
2202                   case XC( OC_NEXT ):
2203                         nextrec = TRUE;
2204                   case XC( OC_DONE ):
2205                         clrvar(res);
2206                         break;
2207
2208                   case XC( OC_EXIT ):
2209                         awk_exit(L.d);
2210
2211                   /* -- recursive node type -- */
2212
2213                   case XC( OC_VAR ):
2214                         L.v = op->l.v;
2215                         if (L.v == V[NF])
2216                                 split_f0();
2217                         goto v_cont;
2218
2219                   case XC( OC_FNARG ):
2220                         L.v = &fnargs[op->l.i];
2221
2222 v_cont:
2223                         res = (op->r.n) ? findvar(iamarray(L.v), R.s) : L.v;
2224                         break;
2225
2226                   case XC( OC_IN ):
2227                         setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2228                         break;
2229
2230                   case XC( OC_REGEXP ):
2231                         op1 = op;
2232                         L.s = getvar_s(V[F0]);
2233                         goto re_cont;
2234
2235                   case XC( OC_MATCH ):
2236                         op1 = op->r.n;
2237 re_cont:
2238                         X.re = as_regex(op1, &sreg);
2239                         R.i = regexec(X.re, L.s, 0, NULL, 0);
2240                         if (X.re == &sreg) regfree(X.re);
2241                         setvar_i(res, (R.i == 0 ? 1 : 0) ^ (opn == '!' ? 1 : 0));
2242                         break;
2243
2244                   case XC( OC_MOVE ):
2245                         /* if source is a temporary string, jusk relink it to dest */
2246                         if (R.v == v1+1 && R.v->string) {
2247                                 res = setvar_p(L.v, R.v->string);
2248                                 R.v->string = NULL;
2249                         } else {
2250                                 res = copyvar(L.v, R.v);
2251                         }
2252                         break;
2253
2254                   case XC( OC_TERNARY ):
2255                         if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2256                                 runtime_error(EMSG_POSSIBLE_ERROR);
2257                         res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2258                         break;
2259
2260                   case XC( OC_FUNC ):
2261                         if (! op->r.f->body.first)
2262                                 runtime_error(EMSG_UNDEF_FUNC);
2263
2264                         X.v = R.v = nvalloc(op->r.f->nargs+1);
2265                         while (op1) {
2266                                 L.v = evaluate(nextarg(&op1), v1);
2267                                 copyvar(R.v, L.v);
2268                                 R.v->type |= VF_CHILD;
2269                                 R.v->x.parent = L.v;
2270                                 if (++R.v - X.v >= op->r.f->nargs)
2271                                         break;
2272                         }
2273
2274                         R.v = fnargs;
2275                         fnargs = X.v;
2276
2277                         L.s = programname;
2278                         res = evaluate(op->r.f->body.first, res);
2279                         programname = L.s;
2280
2281                         nvfree(fnargs);
2282                         fnargs = R.v;
2283                         break;
2284
2285                   case XC( OC_GETLINE ):
2286                   case XC( OC_PGETLINE ):
2287                         if (op1) {
2288                                 X.rsm = newfile(L.s);
2289                                 if (! X.rsm->F) {
2290                                         if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2291                                                 X.rsm->F = popen(L.s, "r");
2292                                                 X.rsm->is_pipe = TRUE;
2293                                         } else {
2294                                                 X.rsm->F = fopen(L.s, "r");             /* not xfopen! */
2295                                         }
2296                                 }
2297                         } else {
2298                                 if (! iF) iF = next_input_file();
2299                                 X.rsm = iF;
2300                         }
2301
2302                         if (! X.rsm->F) {
2303                                 setvar_i(V[ERRNO], errno);
2304                                 setvar_i(res, -1);
2305                                 break;
2306                         }
2307
2308                         if (! op->r.n)
2309                                 R.v = V[F0];
2310
2311                         L.i = awk_getline(X.rsm, R.v);
2312                         if (L.i > 0) {
2313                                 if (! op1) {
2314                                         incvar(V[FNR]);
2315                                         incvar(V[NR]);
2316                                 }
2317                         }
2318                         setvar_i(res, L.i);
2319                         break;
2320
2321                   /* simple builtins */
2322                   case XC( OC_FBLTIN ):
2323                         switch (opn) {
2324
2325                           case F_in:
2326                                 R.d = (int)L.d;
2327                                 break;
2328
2329                           case F_rn:
2330                                 R.d =  (double)rand() / (double)RAND_MAX;
2331                                 break;
2332
2333 #ifdef CONFIG_FEATURE_AWK_MATH
2334                           case F_co:
2335                                 R.d = cos(L.d);
2336                                 break;
2337
2338                           case F_ex:
2339                                 R.d = exp(L.d);
2340                                 break;
2341
2342                           case F_lg:
2343                                 R.d = log(L.d);
2344                                 break;
2345
2346                           case F_si:
2347                                 R.d = sin(L.d);
2348                                 break;
2349
2350                           case F_sq:
2351                                 R.d = sqrt(L.d);
2352                                 break;
2353 #else
2354                           case F_co:
2355                           case F_ex:
2356                           case F_lg:
2357                           case F_si:
2358                           case F_sq:
2359                                 runtime_error(EMSG_NO_MATH);
2360                                 break;
2361 #endif
2362
2363                           case F_sr:
2364                                 R.d = (double)seed;
2365                                 seed = op1 ? (unsigned int)L.d : (unsigned int)time(NULL);
2366                                 srand(seed);
2367                                 break;
2368
2369                           case F_ti:
2370                                 R.d = time(NULL);
2371                                 break;
2372
2373                           case F_le:
2374                                 if (! op1)
2375                                         L.s = getvar_s(V[F0]);
2376                                 R.d = strlen(L.s);
2377                                 break;
2378
2379                           case F_sy:
2380                                 fflush(NULL);
2381                                 R.d = (L.s && *L.s) ? (system(L.s) >> 8) : 0;
2382                                 break;
2383
2384                           case F_ff:
2385                                 if (! op1)
2386                                         fflush(stdout);
2387                                 else {
2388                                         if (L.s && *L.s) {
2389                                                 X.rsm = newfile(L.s);
2390                                                 fflush(X.rsm->F);
2391                                         } else {
2392                                                 fflush(NULL);
2393                                         }
2394                                 }
2395                                 break;
2396
2397                           case F_cl:
2398                                 X.rsm = (rstream *)hash_search(fdhash, L.s);
2399                                 if (X.rsm) {
2400                                         R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
2401                                         free(X.rsm->buffer);
2402                                         hash_remove(fdhash, L.s);
2403                                 }
2404                                 if (R.i != 0)
2405                                         setvar_i(V[ERRNO], errno);
2406                                 R.d = (double)R.i;
2407                                 break;
2408                         }
2409                         setvar_i(res, R.d);
2410                         break;
2411
2412                   case XC( OC_BUILTIN ):
2413                         res = exec_builtin(op, res);
2414                         break;
2415
2416                   case XC( OC_SPRINTF ):
2417                         setvar_p(res, awk_printf(op1));
2418                         break;
2419
2420                   case XC( OC_UNARY ):
2421                         X.v = R.v;
2422                         L.d = R.d = getvar_i(R.v);
2423                         switch (opn) {
2424                           case 'P':
2425                                 L.d = ++R.d;
2426                                 goto r_op_change;
2427                           case 'p':
2428                                 R.d++;
2429                                 goto r_op_change;
2430                           case 'M':
2431                                 L.d = --R.d;
2432                                 goto r_op_change;
2433                           case 'm':
2434                                 R.d--;
2435                                 goto r_op_change;
2436                           case '!':
2437                                 L.d = istrue(X.v) ? 0 : 1;
2438                                 break;
2439                           case '-':
2440                                 L.d = -R.d;
2441                                 break;
2442                         r_op_change:
2443                                 setvar_i(X.v, R.d);
2444                         }
2445                         setvar_i(res, L.d);
2446                         break;
2447
2448                   case XC( OC_FIELD ):
2449                         R.i = (int)getvar_i(R.v);
2450                         if (R.i == 0) {
2451                                 res = V[F0];
2452                         } else {
2453                                 split_f0();
2454                                 if (R.i > nfields)
2455                                         fsrealloc(R.i);
2456
2457                                 res = &Fields[R.i-1];
2458                         }
2459                         break;
2460
2461                   /* concatenation (" ") and index joining (",") */
2462                   case XC( OC_CONCAT ):
2463                   case XC( OC_COMMA ):
2464                         opn = strlen(L.s) + strlen(R.s) + 2;
2465                         X.s = (char *)xmalloc(opn);
2466                         strcpy(X.s, L.s);
2467                         if ((opinfo & OPCLSMASK) == OC_COMMA) {
2468                                 L.s = getvar_s(V[SUBSEP]);
2469                                 X.s = (char *)xrealloc(X.s, opn + strlen(L.s));
2470                                 strcat(X.s, L.s);
2471                         }
2472                         strcat(X.s, R.s);
2473                         setvar_p(res, X.s);
2474                         break;
2475
2476                   case XC( OC_LAND ):
2477                         setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2478                         break;
2479
2480                   case XC( OC_LOR ):
2481                         setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2482                         break;
2483
2484                   case XC( OC_BINARY ):
2485                   case XC( OC_REPLACE ):
2486                         R.d = getvar_i(R.v);
2487                         switch (opn) {
2488                           case '+':
2489                                 L.d += R.d;
2490                                 break;
2491                           case '-':
2492                                 L.d -= R.d;
2493                                 break;
2494                           case '*':
2495                                 L.d *= R.d;
2496                                 break;
2497                           case '/':
2498                                 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2499                                 L.d /= R.d;
2500                                 break;
2501                           case '&':
2502 #ifdef CONFIG_FEATURE_AWK_MATH
2503                                 L.d = pow(L.d, R.d);
2504 #else
2505                                 runtime_error(EMSG_NO_MATH);
2506 #endif
2507                                 break;
2508                           case '%':
2509                                 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2510                                 L.d -= (int)(L.d / R.d) * R.d;
2511                                 break;
2512                         }
2513                         res = setvar_i(((opinfo&OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2514                         break;
2515
2516                   case XC( OC_COMPARE ):
2517                         if (is_numeric(L.v) && is_numeric(R.v)) {
2518                                 L.d = getvar_i(L.v) - getvar_i(R.v);
2519                         } else {
2520                                 L.s = getvar_s(L.v);
2521                                 R.s = getvar_s(R.v);
2522                                 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2523                         }
2524                         switch (opn & 0xfe) {
2525                           case 0:
2526                                 R.i = (L.d > 0);
2527                                 break;
2528                           case 2:
2529                                 R.i = (L.d >= 0);
2530                                 break;
2531                           case 4:
2532                                 R.i = (L.d == 0);
2533                                 break;
2534                         }
2535                         setvar_i(res, (opn & 0x1 ? R.i : !R.i) ? 1 : 0);
2536                         break;
2537
2538                   default:
2539                         runtime_error(EMSG_POSSIBLE_ERROR);
2540                 }
2541                 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2542                         op = op->a.n;
2543                 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2544                         break;
2545                 if (nextrec)
2546                         break;
2547         }
2548         nvfree(v1);
2549         return res;
2550 }
2551
2552
2553 /* -------- main & co. -------- */
2554
2555 static int awk_exit(int r)
2556 {
2557         unsigned int i;
2558         hash_item *hi;
2559         static var tv;
2560
2561         if (! exiting) {
2562                 exiting = TRUE;
2563                 nextrec = FALSE;
2564                 evaluate(endseq.first, &tv);
2565         }
2566
2567         /* waiting for children */
2568         for (i=0; i<fdhash->csize; i++) {
2569                 hi = fdhash->items[i];
2570                 while(hi) {
2571                         if (hi->data.rs.F && hi->data.rs.is_pipe)
2572                                 pclose(hi->data.rs.F);
2573                         hi = hi->next;
2574                 }
2575         }
2576
2577         exit(r);
2578 }
2579
2580 /* if expr looks like "var=value", perform assignment and return 1,
2581  * otherwise return 0 */
2582 static int is_assignment(const char *expr)
2583 {
2584         char *exprc, *s, *s0, *s1;
2585
2586         exprc = xstrdup(expr);
2587         if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2588                 free(exprc);
2589                 return FALSE;
2590         }
2591
2592         *(s++) = '\0';
2593         s0 = s1 = s;
2594         while (*s)
2595                 *(s1++) = nextchar(&s);
2596
2597         *s1 = '\0';
2598         setvar_u(newvar(exprc), s0);
2599         free(exprc);
2600         return TRUE;
2601 }
2602
2603 /* switch to next input file */
2604 static rstream *next_input_file(void)
2605 {
2606         static rstream rsm;
2607         FILE *F = NULL;
2608         char *fname, *ind;
2609         static int files_happen = FALSE;
2610
2611         if (rsm.F) fclose(rsm.F);
2612         rsm.F = NULL;
2613         rsm.pos = rsm.adv = 0;
2614
2615         do {
2616                 if (getvar_i(V[ARGIND])+1 >= getvar_i(V[ARGC])) {
2617                         if (files_happen)
2618                                 return NULL;
2619                         fname = "-";
2620                         F = stdin;
2621                 } else {
2622                         ind = getvar_s(incvar(V[ARGIND]));
2623                         fname = getvar_s(findvar(iamarray(V[ARGV]), ind));
2624                         if (fname && *fname && !is_assignment(fname))
2625                                 F = afopen(fname, "r");
2626                 }
2627         } while (!F);
2628
2629         files_happen = TRUE;
2630         setvar_s(V[FILENAME], fname);
2631         rsm.F = F;
2632         return &rsm;
2633 }
2634
2635 int awk_main(int argc, char **argv)
2636 {
2637         unsigned opt;
2638         char *opt_F, *opt_v, *opt_W;
2639         char *s, *s1;
2640         int i, j, c, flen;
2641         var *v;
2642         static var tv;
2643         char **envp;
2644         static int from_file = FALSE;
2645         rstream *rsm;
2646         FILE *F, *stdfiles[3];
2647         static char * stdnames = "/dev/stdin\0/dev/stdout\0/dev/stderr";
2648
2649         /* allocate global buffer */
2650         buf = xmalloc(MAXVARFMT+1);
2651
2652         vhash = hash_init();
2653         ahash = hash_init();
2654         fdhash = hash_init();
2655         fnhash = hash_init();
2656
2657         /* initialize variables */
2658         for (i=0;  *vNames;  i++) {
2659                 V[i] = v = newvar(nextword(&vNames));
2660                 if (*vValues != '\377')
2661                         setvar_s(v, nextword(&vValues));
2662                 else
2663                         setvar_i(v, 0);
2664
2665                 if (*vNames == '*') {
2666                         v->type |= VF_SPECIAL;
2667                         vNames++;
2668                 }
2669         }
2670
2671         handle_special(V[FS]);
2672         handle_special(V[RS]);
2673
2674         stdfiles[0] = stdin;
2675         stdfiles[1] = stdout;
2676         stdfiles[2] = stderr;
2677         for (i=0; i<3; i++) {
2678                 rsm = newfile(nextword(&stdnames));
2679                 rsm->F = stdfiles[i];
2680         }
2681
2682         for (envp=environ; *envp; envp++) {
2683                 s = xstrdup(*envp);
2684                 s1 = strchr(s, '=');
2685                 if (!s1) {
2686                         goto keep_going;
2687                 }
2688                 *(s1++) = '\0';
2689                 setvar_u(findvar(iamarray(V[ENVIRON]), s), s1);
2690 keep_going:
2691                 free(s);
2692         }
2693
2694         opt = getopt32(argc, argv, "F:v:f:W:", &opt_F, &opt_v, &programname, &opt_W);
2695         if (opt & 0x1) setvar_s(V[FS], opt_F); // -F
2696         if (opt & 0x2) if (!is_assignment(opt_v)) bb_show_usage(); // -v
2697         if (opt & 0x4) { // -f
2698                 from_file = TRUE;
2699                 F = afopen(programname, "r");
2700                 s = NULL;
2701                 /* one byte is reserved for some trick in next_token */
2702                 if (fseek(F, 0, SEEK_END) == 0) {
2703                         flen = ftell(F);
2704                         s = (char *)xmalloc(flen+4);
2705                         fseek(F, 0, SEEK_SET);
2706                         i = 1 + fread(s+1, 1, flen, F);
2707                 } else {
2708                         for (i=j=1; j>0; i+=j) {
2709                                 s = (char *)xrealloc(s, i+4096);
2710                                 j = fread(s+i, 1, 4094, F);
2711                         }
2712                 }
2713                 s[i] = '\0';
2714                 fclose(F);
2715                 parse_program(s+1);
2716                 free(s);
2717         }
2718         if (opt & 0x8) // -W
2719                 bb_error_msg("Warning: unrecognized option '-W %s' ignored", opt_W);
2720
2721         if (!from_file) {
2722                 if (argc == optind)
2723                         bb_show_usage();
2724                 programname = "cmd. line";
2725                 parse_program(argv[optind++]);
2726
2727         }
2728
2729         /* fill in ARGV array */
2730         setvar_i(V[ARGC], argc - optind + 1);
2731         setari_u(V[ARGV], 0, "awk");
2732         for(i=optind; i < argc; i++)
2733                 setari_u(V[ARGV], i+1-optind, argv[i]);
2734
2735         evaluate(beginseq.first, &tv);
2736         if (! mainseq.first && ! endseq.first)
2737                 awk_exit(EXIT_SUCCESS);
2738
2739         /* input file could already be opened in BEGIN block */
2740         if (! iF) iF = next_input_file();
2741
2742         /* passing through input files */
2743         while (iF) {
2744
2745                 nextfile = FALSE;
2746                 setvar_i(V[FNR], 0);
2747
2748                 while ((c = awk_getline(iF, V[F0])) > 0) {
2749
2750                         nextrec = FALSE;
2751                         incvar(V[NR]);
2752                         incvar(V[FNR]);
2753                         evaluate(mainseq.first, &tv);
2754
2755                         if (nextfile)
2756                                 break;
2757                 }
2758
2759                 if (c < 0)
2760                         runtime_error(strerror(errno));
2761
2762                 iF = next_input_file();
2763
2764         }
2765
2766         awk_exit(EXIT_SUCCESS);
2767
2768         return 0;
2769 }
2770