Replace current verbose GPL stuff in libbb/*.c with one-line GPL boilerplate.
[oweals/busybox.git] / editors / awk.c
1 /* vi: set sw=4 ts=4: */
2 /*
3  * awk implementation for busybox
4  *
5  * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
6  *
7  * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
8  */
9
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include <unistd.h>
13 #include <errno.h>
14 #include <string.h>
15 #include <strings.h>
16 #include <time.h>
17 #include <math.h>
18 #include <ctype.h>
19 #include <getopt.h>
20
21 #include "xregex.h"
22 #include "busybox.h"
23
24
25 #define MAXVARFMT       240
26 #define MINNVBLOCK      64
27
28 /* variable flags */
29 #define VF_NUMBER       0x0001  /* 1 = primary type is number */
30 #define VF_ARRAY        0x0002  /* 1 = it's an array */
31
32 #define VF_CACHED       0x0100  /* 1 = num/str value has cached str/num eq */
33 #define VF_USER         0x0200  /* 1 = user input (may be numeric string) */
34 #define VF_SPECIAL      0x0400  /* 1 = requires extra handling when changed */
35 #define VF_WALK         0x0800  /* 1 = variable has alloc'd x.walker list */
36 #define VF_FSTR         0x1000  /* 1 = string points to fstring buffer */
37 #define VF_CHILD        0x2000  /* 1 = function arg; x.parent points to source */
38 #define VF_DIRTY        0x4000  /* 1 = variable was set explicitly */
39
40 /* these flags are static, don't change them when value is changed */
41 #define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
42
43 /* Variable */
44 typedef struct var_s {
45         unsigned short type;            /* flags */
46         double number;
47         char *string;
48         union {
49                 int aidx;                               /* func arg idx (for compilation stage) */
50                 struct xhash_s *array;  /* array ptr */
51                 struct var_s *parent;   /* for func args, ptr to actual parameter */
52                 char **walker;                  /* list of array elements (for..in) */
53         } x;
54 } var;
55
56 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
57 typedef struct chain_s {
58         struct node_s *first;
59         struct node_s *last;
60         char *programname;
61 } chain;
62
63 /* Function */
64 typedef struct func_s {
65         unsigned short nargs;
66         struct chain_s body;
67 } func;
68
69 /* I/O stream */
70 typedef struct rstream_s {
71         FILE *F;
72         char *buffer;
73         int adv;
74         int size;
75         int pos;
76         unsigned short is_pipe;
77 } rstream;
78
79 typedef struct hash_item_s {
80         union {
81                 struct var_s v;                 /* variable/array hash */
82                 struct rstream_s rs;    /* redirect streams hash */
83                 struct func_s f;                /* functions hash */
84         } data;
85         struct hash_item_s *next;       /* next in chain */
86         char name[1];                           /* really it's longer */
87 } hash_item;
88
89 typedef struct xhash_s {
90         unsigned int nel;                                       /* num of elements */
91         unsigned int csize;                                     /* current hash size */
92         unsigned int nprime;                            /* next hash size in PRIMES[] */
93         unsigned int glen;                                      /* summary length of item names */
94         struct hash_item_s **items;
95 } xhash;
96
97 /* Tree node */
98 typedef struct node_s {
99         uint32_t info;
100         unsigned short lineno;
101         union {
102                 struct node_s *n;
103                 var *v;
104                 int i;
105                 char *s;
106                 regex_t *re;
107         } l;
108         union {
109                 struct node_s *n;
110                 regex_t *ire;
111                 func *f;
112                 int argno;
113         } r;
114         union {
115                 struct node_s *n;
116         } a;
117 } node;
118
119 /* Block of temporary variables */
120 typedef struct nvblock_s {
121         int size;
122         var *pos;
123         struct nvblock_s *prev;
124         struct nvblock_s *next;
125         var nv[0];
126 } nvblock;
127
128 typedef struct tsplitter_s {
129         node n;
130         regex_t re[2];
131 } tsplitter;
132
133 /* simple token classes */
134 /* Order and hex values are very important!!!  See next_token() */
135 #define TC_SEQSTART      1                              /* ( */
136 #define TC_SEQTERM      (1 << 1)                /* ) */
137 #define TC_REGEXP       (1 << 2)                /* /.../ */
138 #define TC_OUTRDR       (1 << 3)                /* | > >> */
139 #define TC_UOPPOST      (1 << 4)                /* unary postfix operator */
140 #define TC_UOPPRE1      (1 << 5)                /* unary prefix operator */
141 #define TC_BINOPX       (1 << 6)                /* two-opnd operator */
142 #define TC_IN           (1 << 7)
143 #define TC_COMMA        (1 << 8)
144 #define TC_PIPE         (1 << 9)                /* input redirection pipe */
145 #define TC_UOPPRE2      (1 << 10)               /* unary prefix operator */
146 #define TC_ARRTERM      (1 << 11)               /* ] */
147 #define TC_GRPSTART     (1 << 12)               /* { */
148 #define TC_GRPTERM      (1 << 13)               /* } */
149 #define TC_SEMICOL      (1 << 14)
150 #define TC_NEWLINE      (1 << 15)
151 #define TC_STATX        (1 << 16)               /* ctl statement (for, next...) */
152 #define TC_WHILE        (1 << 17)
153 #define TC_ELSE         (1 << 18)
154 #define TC_BUILTIN      (1 << 19)
155 #define TC_GETLINE      (1 << 20)
156 #define TC_FUNCDECL     (1 << 21)               /* `function' `func' */
157 #define TC_BEGIN        (1 << 22)
158 #define TC_END          (1 << 23)
159 #define TC_EOF          (1 << 24)
160 #define TC_VARIABLE     (1 << 25)
161 #define TC_ARRAY        (1 << 26)
162 #define TC_FUNCTION     (1 << 27)
163 #define TC_STRING       (1 << 28)
164 #define TC_NUMBER       (1 << 29)
165
166 #define TC_UOPPRE       (TC_UOPPRE1 | TC_UOPPRE2)
167
168 /* combined token classes */
169 #define TC_BINOP        (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
170 #define TC_UNARYOP      (TC_UOPPRE | TC_UOPPOST)
171 #define TC_OPERAND      (TC_VARIABLE | TC_ARRAY | TC_FUNCTION | \
172         TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
173
174 #define TC_STATEMNT     (TC_STATX | TC_WHILE)
175 #define TC_OPTERM       (TC_SEMICOL | TC_NEWLINE)
176
177 /* word tokens, cannot mean something else if not expected */
178 #define TC_WORD         (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN | \
179         TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
180
181 /* discard newlines after these */
182 #define TC_NOTERM       (TC_COMMA | TC_GRPSTART | TC_GRPTERM | \
183         TC_BINOP | TC_OPTERM)
184
185 /* what can expression begin with */
186 #define TC_OPSEQ        (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
187 /* what can group begin with */
188 #define TC_GRPSEQ       (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
189
190 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
191 /* operator is inserted between them */
192 #define TC_CONCAT1      (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM | \
193         TC_STRING | TC_NUMBER | TC_UOPPOST)
194 #define TC_CONCAT2      (TC_OPERAND | TC_UOPPRE)
195
196 #define OF_RES1         0x010000
197 #define OF_RES2         0x020000
198 #define OF_STR1         0x040000
199 #define OF_STR2         0x080000
200 #define OF_NUM1         0x100000
201 #define OF_CHECKED      0x200000
202
203 /* combined operator flags */
204 #define xx      0
205 #define xV      OF_RES2
206 #define xS      (OF_RES2 | OF_STR2)
207 #define Vx      OF_RES1
208 #define VV      (OF_RES1 | OF_RES2)
209 #define Nx      (OF_RES1 | OF_NUM1)
210 #define NV      (OF_RES1 | OF_NUM1 | OF_RES2)
211 #define Sx      (OF_RES1 | OF_STR1)
212 #define SV      (OF_RES1 | OF_STR1 | OF_RES2)
213 #define SS      (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
214
215 #define OPCLSMASK       0xFF00
216 #define OPNMASK         0x007F
217
218 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
219  * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
220  * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
221  */
222 #define P(x)    (x << 24)
223 #define PRIMASK         0x7F000000
224 #define PRIMASK2        0x7E000000
225
226 /* Operation classes */
227
228 #define SHIFT_TIL_THIS  0x0600
229 #define RECUR_FROM_THIS 0x1000
230
231 enum {
232         OC_DELETE=0x0100,       OC_EXEC=0x0200,         OC_NEWSOURCE=0x0300,
233         OC_PRINT=0x0400,        OC_PRINTF=0x0500,       OC_WALKINIT=0x0600,
234
235         OC_BR=0x0700,           OC_BREAK=0x0800,        OC_CONTINUE=0x0900,
236         OC_EXIT=0x0a00,         OC_NEXT=0x0b00,         OC_NEXTFILE=0x0c00,
237         OC_TEST=0x0d00,         OC_WALKNEXT=0x0e00,
238
239         OC_BINARY=0x1000,       OC_BUILTIN=0x1100,      OC_COLON=0x1200,
240         OC_COMMA=0x1300,        OC_COMPARE=0x1400,      OC_CONCAT=0x1500,
241         OC_FBLTIN=0x1600,       OC_FIELD=0x1700,        OC_FNARG=0x1800,
242         OC_FUNC=0x1900,         OC_GETLINE=0x1a00,      OC_IN=0x1b00,
243         OC_LAND=0x1c00,         OC_LOR=0x1d00,          OC_MATCH=0x1e00,
244         OC_MOVE=0x1f00,         OC_PGETLINE=0x2000,     OC_REGEXP=0x2100,
245         OC_REPLACE=0x2200,      OC_RETURN=0x2300,       OC_SPRINTF=0x2400,
246         OC_TERNARY=0x2500,      OC_UNARY=0x2600,        OC_VAR=0x2700,
247         OC_DONE=0x2800,
248
249         ST_IF=0x3000,           ST_DO=0x3100,           ST_FOR=0x3200,
250         ST_WHILE=0x3300
251 };
252
253 /* simple builtins */
254 enum {
255         F_in=0, F_rn,   F_co,   F_ex,   F_lg,   F_si,   F_sq,   F_sr,
256         F_ti,   F_le,   F_sy,   F_ff,   F_cl
257 };
258
259 /* builtins */
260 enum {
261         B_a2=0, B_ix,   B_ma,   B_sp,   B_ss,   B_ti,   B_lo,   B_up,
262         B_ge,   B_gs,   B_su
263 };
264
265 /* tokens and their corresponding info values */
266
267 #define NTC             "\377"          /* switch to next token class (tc<<1) */
268 #define NTCC    '\377'
269
270 #define OC_B    OC_BUILTIN
271
272 static char * const tokenlist =
273         "\1("           NTC
274         "\1)"           NTC
275         "\1/"           NTC                                                                     /* REGEXP */
276         "\2>>"          "\1>"           "\1|"           NTC                     /* OUTRDR */
277         "\2++"          "\2--"          NTC                                             /* UOPPOST */
278         "\2++"          "\2--"          "\1$"           NTC                     /* UOPPRE1 */
279         "\2=="          "\1="           "\2+="          "\2-="          /* BINOPX */
280         "\2*="          "\2/="          "\2%="          "\2^="
281         "\1+"           "\1-"           "\3**="         "\2**"
282         "\1/"           "\1%"           "\1^"           "\1*"
283         "\2!="          "\2>="          "\2<="          "\1>"
284         "\1<"           "\2!~"          "\1~"           "\2&&"
285         "\2||"          "\1?"           "\1:"           NTC
286         "\2in"          NTC
287         "\1,"           NTC
288         "\1|"           NTC
289         "\1+"           "\1-"           "\1!"           NTC                     /* UOPPRE2 */
290         "\1]"           NTC
291         "\1{"           NTC
292         "\1}"           NTC
293         "\1;"           NTC
294         "\1\n"          NTC
295         "\2if"          "\2do"          "\3for"         "\5break"       /* STATX */
296         "\10continue"                   "\6delete"      "\5print"
297         "\6printf"      "\4next"        "\10nextfile"
298         "\6return"      "\4exit"        NTC
299         "\5while"       NTC
300         "\4else"        NTC
301
302         "\5close"       "\6system"      "\6fflush"      "\5atan2"       /* BUILTIN */
303         "\3cos"         "\3exp"         "\3int"         "\3log"
304         "\4rand"        "\3sin"         "\4sqrt"        "\5srand"
305         "\6gensub"      "\4gsub"        "\5index"       "\6length"
306         "\5match"       "\5split"       "\7sprintf"     "\3sub"
307         "\6substr"      "\7systime"     "\10strftime"
308         "\7tolower"     "\7toupper"     NTC
309         "\7getline"     NTC
310         "\4func"        "\10function"   NTC
311         "\5BEGIN"       NTC
312         "\3END"         "\0"
313         ;
314
315 static const uint32_t tokeninfo[] = {
316
317         0,
318         0,
319         OC_REGEXP,
320         xS|'a',         xS|'w',         xS|'|',
321         OC_UNARY|xV|P(9)|'p',           OC_UNARY|xV|P(9)|'m',
322         OC_UNARY|xV|P(9)|'P',           OC_UNARY|xV|P(9)|'M',
323                 OC_FIELD|xV|P(5),
324         OC_COMPARE|VV|P(39)|5,          OC_MOVE|VV|P(74),
325                 OC_REPLACE|NV|P(74)|'+',        OC_REPLACE|NV|P(74)|'-',
326         OC_REPLACE|NV|P(74)|'*',        OC_REPLACE|NV|P(74)|'/',
327                 OC_REPLACE|NV|P(74)|'%',        OC_REPLACE|NV|P(74)|'&',
328         OC_BINARY|NV|P(29)|'+',         OC_BINARY|NV|P(29)|'-',
329                 OC_REPLACE|NV|P(74)|'&',        OC_BINARY|NV|P(15)|'&',
330         OC_BINARY|NV|P(25)|'/',         OC_BINARY|NV|P(25)|'%',
331                 OC_BINARY|NV|P(15)|'&',         OC_BINARY|NV|P(25)|'*',
332         OC_COMPARE|VV|P(39)|4,          OC_COMPARE|VV|P(39)|3,
333                 OC_COMPARE|VV|P(39)|0,          OC_COMPARE|VV|P(39)|1,
334         OC_COMPARE|VV|P(39)|2,          OC_MATCH|Sx|P(45)|'!',
335                 OC_MATCH|Sx|P(45)|'~',          OC_LAND|Vx|P(55),
336         OC_LOR|Vx|P(59),                        OC_TERNARY|Vx|P(64)|'?',
337                 OC_COLON|xx|P(67)|':',
338         OC_IN|SV|P(49),
339         OC_COMMA|SS|P(80),
340         OC_PGETLINE|SV|P(37),
341         OC_UNARY|xV|P(19)|'+',          OC_UNARY|xV|P(19)|'-',
342                 OC_UNARY|xV|P(19)|'!',
343         0,
344         0,
345         0,
346         0,
347         0,
348         ST_IF,                  ST_DO,                  ST_FOR,                 OC_BREAK,
349         OC_CONTINUE,                                    OC_DELETE|Vx,   OC_PRINT,
350         OC_PRINTF,              OC_NEXT,                OC_NEXTFILE,
351         OC_RETURN|Vx,   OC_EXIT|Nx,
352         ST_WHILE,
353         0,
354
355         OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
356         OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
357         OC_FBLTIN|F_rn,    OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
358         OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
359         OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF,        OC_B|B_su|P(0xb6),
360         OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti,    OC_B|B_ti|P(0x0b),
361         OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
362         OC_GETLINE|SV|P(0),
363         0,      0,
364         0,
365         0
366 };
367
368 /* internal variable names and their initial values       */
369 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
370 enum {
371         CONVFMT=0,      OFMT,           FS,                     OFS,
372         ORS,            RS,                     RT,                     FILENAME,
373         SUBSEP,         ARGIND,         ARGC,           ARGV,
374         ERRNO,          FNR,
375         NR,                     NF,                     IGNORECASE,
376         ENVIRON,        F0,                     _intvarcount_
377 };
378
379 static char * vNames =
380         "CONVFMT\0"     "OFMT\0"        "FS\0*"         "OFS\0"
381         "ORS\0"         "RS\0*"         "RT\0"          "FILENAME\0"
382         "SUBSEP\0"      "ARGIND\0"      "ARGC\0"        "ARGV\0"
383         "ERRNO\0"       "FNR\0"
384         "NR\0"          "NF\0*"         "IGNORECASE\0*"
385         "ENVIRON\0"     "$\0*"          "\0";
386
387 static char * vValues =
388         "%.6g\0"        "%.6g\0"        " \0"           " \0"
389         "\n\0"          "\n\0"          "\0"            "\0"
390         "\034\0"
391         "\377";
392
393 /* hash size may grow to these values */
394 #define FIRST_PRIME 61;
395 static const unsigned int PRIMES[] = { 251, 1021, 4093, 16381, 65521 };
396 enum { NPRIMES = sizeof(PRIMES) / sizeof(unsigned int) };
397
398 /* globals */
399
400 extern char **environ;
401
402 static var * V[_intvarcount_];
403 static chain beginseq, mainseq, endseq, *seq;
404 static int nextrec, nextfile;
405 static node *break_ptr, *continue_ptr;
406 static rstream *iF;
407 static xhash *vhash, *ahash, *fdhash, *fnhash;
408 static char *programname;
409 static short lineno;
410 static int is_f0_split;
411 static int nfields;
412 static var *Fields;
413 static tsplitter fsplitter, rsplitter;
414 static nvblock *cb;
415 static char *pos;
416 static char *buf;
417 static int icase;
418 static int exiting;
419
420 static struct {
421         uint32_t tclass;
422         uint32_t info;
423         char *string;
424         double number;
425         short lineno;
426         int rollback;
427 } t;
428
429 /* function prototypes */
430 static void handle_special(var *);
431 static node *parse_expr(uint32_t);
432 static void chain_group(void);
433 static var *evaluate(node *, var *);
434 static rstream *next_input_file(void);
435 static int fmt_num(char *, int, const char *, double, int);
436 static int awk_exit(int) ATTRIBUTE_NORETURN;
437
438 /* ---- error handling ---- */
439
440 static const char EMSG_INTERNAL_ERROR[] = "Internal error";
441 static const char EMSG_UNEXP_EOS[] = "Unexpected end of string";
442 static const char EMSG_UNEXP_TOKEN[] = "Unexpected token";
443 static const char EMSG_DIV_BY_ZERO[] = "Division by zero";
444 static const char EMSG_INV_FMT[] = "Invalid format specifier";
445 static const char EMSG_TOO_FEW_ARGS[] = "Too few arguments for builtin";
446 static const char EMSG_NOT_ARRAY[] = "Not an array";
447 static const char EMSG_POSSIBLE_ERROR[] = "Possible syntax error";
448 static const char EMSG_UNDEF_FUNC[] = "Call to undefined function";
449 #ifndef CONFIG_FEATURE_AWK_MATH
450 static const char EMSG_NO_MATH[] = "Math support is not compiled in";
451 #endif
452
453 static void syntax_error(const char * const message) ATTRIBUTE_NORETURN;
454 static void syntax_error(const char * const message)
455 {
456         bb_error_msg_and_die("%s:%i: %s", programname, lineno, message);
457 }
458
459 #define runtime_error(x) syntax_error(x)
460
461
462 /* ---- hash stuff ---- */
463
464 static unsigned int hashidx(const char *name)
465 {
466         unsigned int idx=0;
467
468         while (*name)  idx = *name++ + (idx << 6) - idx;
469         return idx;
470 }
471
472 /* create new hash */
473 static xhash *hash_init(void)
474 {
475         xhash *newhash;
476
477         newhash = (xhash *)xzalloc(sizeof(xhash));
478         newhash->csize = FIRST_PRIME;
479         newhash->items = (hash_item **)xzalloc(newhash->csize * sizeof(hash_item *));
480
481         return newhash;
482 }
483
484 /* find item in hash, return ptr to data, NULL if not found */
485 static void *hash_search(xhash *hash, const char *name)
486 {
487         hash_item *hi;
488
489         hi = hash->items [ hashidx(name) % hash->csize ];
490         while (hi) {
491                 if (strcmp(hi->name, name) == 0)
492                         return &(hi->data);
493                 hi = hi->next;
494         }
495         return NULL;
496 }
497
498 /* grow hash if it becomes too big */
499 static void hash_rebuild(xhash *hash)
500 {
501         unsigned int newsize, i, idx;
502         hash_item **newitems, *hi, *thi;
503
504         if (hash->nprime == NPRIMES)
505                 return;
506
507         newsize = PRIMES[hash->nprime++];
508         newitems = (hash_item **)xzalloc(newsize * sizeof(hash_item *));
509
510         for (i=0; i<hash->csize; i++) {
511                 hi = hash->items[i];
512                 while (hi) {
513                         thi = hi;
514                         hi = thi->next;
515                         idx = hashidx(thi->name) % newsize;
516                         thi->next = newitems[idx];
517                         newitems[idx] = thi;
518                 }
519         }
520
521         free(hash->items);
522         hash->csize = newsize;
523         hash->items = newitems;
524 }
525
526 /* find item in hash, add it if necessary. Return ptr to data */
527 static void *hash_find(xhash *hash, const char *name)
528 {
529         hash_item *hi;
530         unsigned int idx;
531         int l;
532
533         hi = hash_search(hash, name);
534         if (! hi) {
535                 if (++hash->nel / hash->csize > 10)
536                         hash_rebuild(hash);
537
538                 l = strlen(name) + 1;
539                 hi = xzalloc(sizeof(hash_item) + l);
540                 memcpy(hi->name, name, l);
541
542                 idx = hashidx(name) % hash->csize;
543                 hi->next = hash->items[idx];
544                 hash->items[idx] = hi;
545                 hash->glen += l;
546         }
547         return &(hi->data);
548 }
549
550 #define findvar(hash, name) (var *) hash_find ( (hash) , (name) )
551 #define newvar(name) (var *) hash_find ( vhash , (name) )
552 #define newfile(name) (rstream *) hash_find ( fdhash , (name) )
553 #define newfunc(name) (func *) hash_find ( fnhash , (name) )
554
555 static void hash_remove(xhash *hash, const char *name)
556 {
557         hash_item *hi, **phi;
558
559         phi = &(hash->items[ hashidx(name) % hash->csize ]);
560         while (*phi) {
561                 hi = *phi;
562                 if (strcmp(hi->name, name) == 0) {
563                         hash->glen -= (strlen(name) + 1);
564                         hash->nel--;
565                         *phi = hi->next;
566                         free(hi);
567                         break;
568                 }
569                 phi = &(hi->next);
570         }
571 }
572
573 /* ------ some useful functions ------ */
574
575 static void skip_spaces(char **s)
576 {
577         char *p = *s;
578
579         while(*p == ' ' || *p == '\t' ||
580                         (*p == '\\' && *(p+1) == '\n' && (++p, ++t.lineno))) {
581                 p++;
582         }
583         *s = p;
584 }
585
586 static char *nextword(char **s)
587 {
588         char *p = *s;
589
590         while (*(*s)++) ;
591
592         return p;
593 }
594
595 static char nextchar(char **s)
596 {
597         char c, *pps;
598
599         c = *((*s)++);
600         pps = *s;
601         if (c == '\\') c = bb_process_escape_sequence((const char**)s);
602         if (c == '\\' && *s == pps) c = *((*s)++);
603         return c;
604 }
605
606 static inline int isalnum_(int c)
607 {
608         return (isalnum(c) || c == '_');
609 }
610
611 static FILE *afopen(const char *path, const char *mode)
612 {
613         return (*path == '-' && *(path+1) == '\0') ? stdin : bb_xfopen(path, mode);
614 }
615
616 /* -------- working with variables (set/get/copy/etc) -------- */
617
618 static xhash *iamarray(var *v)
619 {
620         var *a = v;
621
622         while (a->type & VF_CHILD)
623                 a = a->x.parent;
624
625         if (! (a->type & VF_ARRAY)) {
626                 a->type |= VF_ARRAY;
627                 a->x.array = hash_init();
628         }
629         return a->x.array;
630 }
631
632 static void clear_array(xhash *array)
633 {
634         unsigned int i;
635         hash_item *hi, *thi;
636
637         for (i=0; i<array->csize; i++) {
638                 hi = array->items[i];
639                 while (hi) {
640                         thi = hi;
641                         hi = hi->next;
642                         free(thi->data.v.string);
643                         free(thi);
644                 }
645                 array->items[i] = NULL;
646         }
647         array->glen = array->nel = 0;
648 }
649
650 /* clear a variable */
651 static var *clrvar(var *v)
652 {
653         if (!(v->type & VF_FSTR))
654                 free(v->string);
655
656         v->type &= VF_DONTTOUCH;
657         v->type |= VF_DIRTY;
658         v->string = NULL;
659         return v;
660 }
661
662 /* assign string value to variable */
663 static var *setvar_p(var *v, char *value)
664 {
665         clrvar(v);
666         v->string = value;
667         handle_special(v);
668
669         return v;
670 }
671
672 /* same as setvar_p but make a copy of string */
673 static var *setvar_s(var *v, const char *value)
674 {
675         return setvar_p(v, (value && *value) ? bb_xstrdup(value) : NULL);
676 }
677
678 /* same as setvar_s but set USER flag */
679 static var *setvar_u(var *v, const char *value)
680 {
681         setvar_s(v, value);
682         v->type |= VF_USER;
683         return v;
684 }
685
686 /* set array element to user string */
687 static void setari_u(var *a, int idx, const char *s)
688 {
689         var *v;
690         static char sidx[12];
691
692         sprintf(sidx, "%d", idx);
693         v = findvar(iamarray(a), sidx);
694         setvar_u(v, s);
695 }
696
697 /* assign numeric value to variable */
698 static var *setvar_i(var *v, double value)
699 {
700         clrvar(v);
701         v->type |= VF_NUMBER;
702         v->number = value;
703         handle_special(v);
704         return v;
705 }
706
707 static char *getvar_s(var *v)
708 {
709         /* if v is numeric and has no cached string, convert it to string */
710         if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
711                 fmt_num(buf, MAXVARFMT, getvar_s(V[CONVFMT]), v->number, TRUE);
712                 v->string = bb_xstrdup(buf);
713                 v->type |= VF_CACHED;
714         }
715         return (v->string == NULL) ? "" : v->string;
716 }
717
718 static double getvar_i(var *v)
719 {
720         char *s;
721
722         if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
723                 v->number = 0;
724                 s = v->string;
725                 if (s && *s) {
726                         v->number = strtod(s, &s);
727                         if (v->type & VF_USER) {
728                                 skip_spaces(&s);
729                                 if (*s != '\0')
730                                         v->type &= ~VF_USER;
731                         }
732                 } else {
733                         v->type &= ~VF_USER;
734                 }
735                 v->type |= VF_CACHED;
736         }
737         return v->number;
738 }
739
740 static var *copyvar(var *dest, const var *src)
741 {
742         if (dest != src) {
743                 clrvar(dest);
744                 dest->type |= (src->type & ~VF_DONTTOUCH);
745                 dest->number = src->number;
746                 if (src->string)
747                         dest->string = bb_xstrdup(src->string);
748         }
749         handle_special(dest);
750         return dest;
751 }
752
753 static var *incvar(var *v)
754 {
755         return setvar_i(v, getvar_i(v)+1.);
756 }
757
758 /* return true if v is number or numeric string */
759 static int is_numeric(var *v)
760 {
761         getvar_i(v);
762         return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
763 }
764
765 /* return 1 when value of v corresponds to true, 0 otherwise */
766 static int istrue(var *v)
767 {
768         if (is_numeric(v))
769                 return (v->number == 0) ? 0 : 1;
770         else
771                 return (v->string && *(v->string)) ? 1 : 0;
772 }
773
774 /* temporary variables allocator. Last allocated should be first freed */
775 static var *nvalloc(int n)
776 {
777         nvblock *pb = NULL;
778         var *v, *r;
779         int size;
780
781         while (cb) {
782                 pb = cb;
783                 if ((cb->pos - cb->nv) + n <= cb->size) break;
784                 cb = cb->next;
785         }
786
787         if (! cb) {
788                 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
789                 cb = (nvblock *)xmalloc(sizeof(nvblock) + size * sizeof(var));
790                 cb->size = size;
791                 cb->pos = cb->nv;
792                 cb->prev = pb;
793                 cb->next = NULL;
794                 if (pb) pb->next = cb;
795         }
796
797         v = r = cb->pos;
798         cb->pos += n;
799
800         while (v < cb->pos) {
801                 v->type = 0;
802                 v->string = NULL;
803                 v++;
804         }
805
806         return r;
807 }
808
809 static void nvfree(var *v)
810 {
811         var *p;
812
813         if (v < cb->nv || v >= cb->pos)
814                 runtime_error(EMSG_INTERNAL_ERROR);
815
816         for (p=v; p<cb->pos; p++) {
817                 if ((p->type & (VF_ARRAY|VF_CHILD)) == VF_ARRAY) {
818                         clear_array(iamarray(p));
819                         free(p->x.array->items);
820                         free(p->x.array);
821                 }
822                 if (p->type & VF_WALK)
823                         free(p->x.walker);
824
825                 clrvar(p);
826         }
827
828         cb->pos = v;
829         while (cb->prev && cb->pos == cb->nv) {
830                 cb = cb->prev;
831         }
832 }
833
834 /* ------- awk program text parsing ------- */
835
836 /* Parse next token pointed by global pos, place results into global t.
837  * If token isn't expected, give away. Return token class
838  */
839 static uint32_t next_token(uint32_t expected)
840 {
841         char *p, *pp, *s;
842         char *tl;
843         uint32_t tc;
844         const uint32_t *ti;
845         int l;
846         static int concat_inserted;
847         static uint32_t save_tclass, save_info;
848         static uint32_t ltclass = TC_OPTERM;
849
850         if (t.rollback) {
851
852                 t.rollback = FALSE;
853
854         } else if (concat_inserted) {
855
856                 concat_inserted = FALSE;
857                 t.tclass = save_tclass;
858                 t.info = save_info;
859
860         } else {
861
862                 p = pos;
863
864         readnext:
865                 skip_spaces(&p);
866                 lineno = t.lineno;
867                 if (*p == '#')
868                         while (*p != '\n' && *p != '\0') p++;
869
870                 if (*p == '\n')
871                         t.lineno++;
872
873                 if (*p == '\0') {
874                         tc = TC_EOF;
875
876                 } else if (*p == '\"') {
877                         /* it's a string */
878                         t.string = s = ++p;
879                         while (*p != '\"') {
880                                 if (*p == '\0' || *p == '\n')
881                                         syntax_error(EMSG_UNEXP_EOS);
882                                 *(s++) = nextchar(&p);
883                         }
884                         p++;
885                         *s = '\0';
886                         tc = TC_STRING;
887
888                 } else if ((expected & TC_REGEXP) && *p == '/') {
889                         /* it's regexp */
890                         t.string = s = ++p;
891                         while (*p != '/') {
892                                 if (*p == '\0' || *p == '\n')
893                                         syntax_error(EMSG_UNEXP_EOS);
894                                 if ((*s++ = *p++) == '\\') {
895                                         pp = p;
896                                         *(s-1) = bb_process_escape_sequence((const char **)&p);
897                                         if (*pp == '\\') *s++ = '\\';
898                                         if (p == pp) *s++ = *p++;
899                                 }
900                         }
901                         p++;
902                         *s = '\0';
903                         tc = TC_REGEXP;
904
905                 } else if (*p == '.' || isdigit(*p)) {
906                         /* it's a number */
907                         t.number = strtod(p, &p);
908                         if (*p == '.')
909                                 syntax_error(EMSG_UNEXP_TOKEN);
910                         tc = TC_NUMBER;
911
912                 } else {
913                         /* search for something known */
914                         tl = tokenlist;
915                         tc = 0x00000001;
916                         ti = tokeninfo;
917                         while (*tl) {
918                                 l = *(tl++);
919                                 if (l == NTCC) {
920                                         tc <<= 1;
921                                         continue;
922                                 }
923                                 /* if token class is expected, token
924                                  * matches and it's not a longer word,
925                                  * then this is what we are looking for
926                                  */
927                                 if ((tc & (expected | TC_WORD | TC_NEWLINE)) &&
928                                 *tl == *p && strncmp(p, tl, l) == 0 &&
929                                 !((tc & TC_WORD) && isalnum_(*(p + l)))) {
930                                         t.info = *ti;
931                                         p += l;
932                                         break;
933                                 }
934                                 ti++;
935                                 tl += l;
936                         }
937
938                         if (! *tl) {
939                                 /* it's a name (var/array/function),
940                                  * otherwise it's something wrong
941                                  */
942                                 if (! isalnum_(*p))
943                                         syntax_error(EMSG_UNEXP_TOKEN);
944
945                                 t.string = --p;
946                                 while(isalnum_(*(++p))) {
947                                         *(p-1) = *p;
948                                 }
949                                 *(p-1) = '\0';
950                                 tc = TC_VARIABLE;
951                                 /* also consume whitespace between functionname and bracket */
952                                 if (! (expected & TC_VARIABLE)) skip_spaces(&p);
953                                 if (*p == '(') {
954                                         tc = TC_FUNCTION;
955                                 } else {
956                                         if (*p == '[') {
957                                                 p++;
958                                                 tc = TC_ARRAY;
959                                         }
960                                 }
961                         }
962                 }
963                 pos = p;
964
965                 /* skipping newlines in some cases */
966                 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
967                         goto readnext;
968
969                 /* insert concatenation operator when needed */
970                 if ((ltclass&TC_CONCAT1) && (tc&TC_CONCAT2) && (expected&TC_BINOP)) {
971                         concat_inserted = TRUE;
972                         save_tclass = tc;
973                         save_info = t.info;
974                         tc = TC_BINOP;
975                         t.info = OC_CONCAT | SS | P(35);
976                 }
977
978                 t.tclass = tc;
979         }
980         ltclass = t.tclass;
981
982         /* Are we ready for this? */
983         if (! (ltclass & expected))
984                 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
985                                                                 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
986
987         return ltclass;
988 }
989
990 static void rollback_token(void) { t.rollback = TRUE; }
991
992 static node *new_node(uint32_t info)
993 {
994         node *n;
995
996         n = (node *)xzalloc(sizeof(node));
997         n->info = info;
998         n->lineno = lineno;
999         return n;
1000 }
1001
1002 static node *mk_re_node(char *s, node *n, regex_t *re)
1003 {
1004         n->info = OC_REGEXP;
1005         n->l.re = re;
1006         n->r.ire = re + 1;
1007         xregcomp(re, s, REG_EXTENDED);
1008         xregcomp(re+1, s, REG_EXTENDED | REG_ICASE);
1009
1010         return n;
1011 }
1012
1013 static node *condition(void)
1014 {
1015         next_token(TC_SEQSTART);
1016         return parse_expr(TC_SEQTERM);
1017 }
1018
1019 /* parse expression terminated by given argument, return ptr
1020  * to built subtree. Terminator is eaten by parse_expr */
1021 static node *parse_expr(uint32_t iexp)
1022 {
1023         node sn;
1024         node *cn = &sn;
1025         node *vn, *glptr;
1026         uint32_t tc, xtc;
1027         var *v;
1028
1029         sn.info = PRIMASK;
1030         sn.r.n = glptr = NULL;
1031         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1032
1033         while (! ((tc = next_token(xtc)) & iexp)) {
1034                 if (glptr && (t.info == (OC_COMPARE|VV|P(39)|2))) {
1035                         /* input redirection (<) attached to glptr node */
1036                         cn = glptr->l.n = new_node(OC_CONCAT|SS|P(37));
1037                         cn->a.n = glptr;
1038                         xtc = TC_OPERAND | TC_UOPPRE;
1039                         glptr = NULL;
1040
1041                 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1042                         /* for binary and postfix-unary operators, jump back over
1043                          * previous operators with higher priority */
1044                         vn = cn;
1045                         while ( ((t.info & PRIMASK) > (vn->a.n->info & PRIMASK2)) ||
1046                           ((t.info == vn->info) && ((t.info & OPCLSMASK) == OC_COLON)) )
1047                                 vn = vn->a.n;
1048                         if ((t.info & OPCLSMASK) == OC_TERNARY)
1049                                 t.info += P(6);
1050                         cn = vn->a.n->r.n = new_node(t.info);
1051                         cn->a.n = vn->a.n;
1052                         if (tc & TC_BINOP) {
1053                                 cn->l.n = vn;
1054                                 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1055                                 if ((t.info & OPCLSMASK) == OC_PGETLINE) {
1056                                         /* it's a pipe */
1057                                         next_token(TC_GETLINE);
1058                                         /* give maximum priority to this pipe */
1059                                         cn->info &= ~PRIMASK;
1060                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1061                                 }
1062                         } else {
1063                                 cn->r.n = vn;
1064                                 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1065                         }
1066                         vn->a.n = cn;
1067
1068                 } else {
1069                         /* for operands and prefix-unary operators, attach them
1070                          * to last node */
1071                         vn = cn;
1072                         cn = vn->r.n = new_node(t.info);
1073                         cn->a.n = vn;
1074                         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1075                         if (tc & (TC_OPERAND | TC_REGEXP)) {
1076                                 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1077                                 /* one should be very careful with switch on tclass -
1078                                  * only simple tclasses should be used! */
1079                                 switch (tc) {
1080                                   case TC_VARIABLE:
1081                                   case TC_ARRAY:
1082                                         cn->info = OC_VAR;
1083                                         if ((v = hash_search(ahash, t.string)) != NULL) {
1084                                                 cn->info = OC_FNARG;
1085                                                 cn->l.i = v->x.aidx;
1086                                         } else {
1087                                                 cn->l.v = newvar(t.string);
1088                                         }
1089                                         if (tc & TC_ARRAY) {
1090                                                 cn->info |= xS;
1091                                                 cn->r.n = parse_expr(TC_ARRTERM);
1092                                         }
1093                                         break;
1094
1095                                   case TC_NUMBER:
1096                                   case TC_STRING:
1097                                         cn->info = OC_VAR;
1098                                         v = cn->l.v = xzalloc(sizeof(var));
1099                                         if (tc & TC_NUMBER)
1100                                                 setvar_i(v, t.number);
1101                                         else
1102                                                 setvar_s(v, t.string);
1103                                         break;
1104
1105                                   case TC_REGEXP:
1106                                         mk_re_node(t.string, cn,
1107                                                                         (regex_t *)xzalloc(sizeof(regex_t)*2));
1108                                         break;
1109
1110                                   case TC_FUNCTION:
1111                                         cn->info = OC_FUNC;
1112                                         cn->r.f = newfunc(t.string);
1113                                         cn->l.n = condition();
1114                                         break;
1115
1116                                   case TC_SEQSTART:
1117                                         cn = vn->r.n = parse_expr(TC_SEQTERM);
1118                                         cn->a.n = vn;
1119                                         break;
1120
1121                                   case TC_GETLINE:
1122                                         glptr = cn;
1123                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1124                                         break;
1125
1126                                   case TC_BUILTIN:
1127                                         cn->l.n = condition();
1128                                         break;
1129                                 }
1130                         }
1131                 }
1132         }
1133         return sn.r.n;
1134 }
1135
1136 /* add node to chain. Return ptr to alloc'd node */
1137 static node *chain_node(uint32_t info)
1138 {
1139         node *n;
1140
1141         if (! seq->first)
1142                 seq->first = seq->last = new_node(0);
1143
1144         if (seq->programname != programname) {
1145                 seq->programname = programname;
1146                 n = chain_node(OC_NEWSOURCE);
1147                 n->l.s = bb_xstrdup(programname);
1148         }
1149
1150         n = seq->last;
1151         n->info = info;
1152         seq->last = n->a.n = new_node(OC_DONE);
1153
1154         return n;
1155 }
1156
1157 static void chain_expr(uint32_t info)
1158 {
1159         node *n;
1160
1161         n = chain_node(info);
1162         n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1163         if (t.tclass & TC_GRPTERM)
1164                 rollback_token();
1165 }
1166
1167 static node *chain_loop(node *nn)
1168 {
1169         node *n, *n2, *save_brk, *save_cont;
1170
1171         save_brk = break_ptr;
1172         save_cont = continue_ptr;
1173
1174         n = chain_node(OC_BR | Vx);
1175         continue_ptr = new_node(OC_EXEC);
1176         break_ptr = new_node(OC_EXEC);
1177         chain_group();
1178         n2 = chain_node(OC_EXEC | Vx);
1179         n2->l.n = nn;
1180         n2->a.n = n;
1181         continue_ptr->a.n = n2;
1182         break_ptr->a.n = n->r.n = seq->last;
1183
1184         continue_ptr = save_cont;
1185         break_ptr = save_brk;
1186
1187         return n;
1188 }
1189
1190 /* parse group and attach it to chain */
1191 static void chain_group(void)
1192 {
1193         uint32_t c;
1194         node *n, *n2, *n3;
1195
1196         do {
1197                 c = next_token(TC_GRPSEQ);
1198         } while (c & TC_NEWLINE);
1199
1200         if (c & TC_GRPSTART) {
1201                 while(next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1202                         if (t.tclass & TC_NEWLINE) continue;
1203                         rollback_token();
1204                         chain_group();
1205                 }
1206         } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1207                 rollback_token();
1208                 chain_expr(OC_EXEC | Vx);
1209         } else {                                                /* TC_STATEMNT */
1210                 switch (t.info & OPCLSMASK) {
1211                         case ST_IF:
1212                                 n = chain_node(OC_BR | Vx);
1213                                 n->l.n = condition();
1214                                 chain_group();
1215                                 n2 = chain_node(OC_EXEC);
1216                                 n->r.n = seq->last;
1217                                 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE)==TC_ELSE) {
1218                                         chain_group();
1219                                         n2->a.n = seq->last;
1220                                 } else {
1221                                         rollback_token();
1222                                 }
1223                                 break;
1224
1225                         case ST_WHILE:
1226                                 n2 = condition();
1227                                 n = chain_loop(NULL);
1228                                 n->l.n = n2;
1229                                 break;
1230
1231                         case ST_DO:
1232                                 n2 = chain_node(OC_EXEC);
1233                                 n = chain_loop(NULL);
1234                                 n2->a.n = n->a.n;
1235                                 next_token(TC_WHILE);
1236                                 n->l.n = condition();
1237                                 break;
1238
1239                         case ST_FOR:
1240                                 next_token(TC_SEQSTART);
1241                                 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1242                                 if (t.tclass & TC_SEQTERM) {                            /* for-in */
1243                                         if ((n2->info & OPCLSMASK) != OC_IN)
1244                                                 syntax_error(EMSG_UNEXP_TOKEN);
1245                                         n = chain_node(OC_WALKINIT | VV);
1246                                         n->l.n = n2->l.n;
1247                                         n->r.n = n2->r.n;
1248                                         n = chain_loop(NULL);
1249                                         n->info = OC_WALKNEXT | Vx;
1250                                         n->l.n = n2->l.n;
1251                                 } else {                                                                        /* for(;;) */
1252                                         n = chain_node(OC_EXEC | Vx);
1253                                         n->l.n = n2;
1254                                         n2 = parse_expr(TC_SEMICOL);
1255                                         n3 = parse_expr(TC_SEQTERM);
1256                                         n = chain_loop(n3);
1257                                         n->l.n = n2;
1258                                         if (! n2)
1259                                                 n->info = OC_EXEC;
1260                                 }
1261                                 break;
1262
1263                         case OC_PRINT:
1264                         case OC_PRINTF:
1265                                 n = chain_node(t.info);
1266                                 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1267                                 if (t.tclass & TC_OUTRDR) {
1268                                         n->info |= t.info;
1269                                         n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1270                                 }
1271                                 if (t.tclass & TC_GRPTERM)
1272                                         rollback_token();
1273                                 break;
1274
1275                         case OC_BREAK:
1276                                 n = chain_node(OC_EXEC);
1277                                 n->a.n = break_ptr;
1278                                 break;
1279
1280                         case OC_CONTINUE:
1281                                 n = chain_node(OC_EXEC);
1282                                 n->a.n = continue_ptr;
1283                                 break;
1284
1285                         /* delete, next, nextfile, return, exit */
1286                         default:
1287                                 chain_expr(t.info);
1288
1289                 }
1290         }
1291 }
1292
1293 static void parse_program(char *p)
1294 {
1295         uint32_t tclass;
1296         node *cn;
1297         func *f;
1298         var *v;
1299
1300         pos = p;
1301         t.lineno = 1;
1302         while((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1303                                 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1304
1305                 if (tclass & TC_OPTERM)
1306                         continue;
1307
1308                 seq = &mainseq;
1309                 if (tclass & TC_BEGIN) {
1310                         seq = &beginseq;
1311                         chain_group();
1312
1313                 } else if (tclass & TC_END) {
1314                         seq = &endseq;
1315                         chain_group();
1316
1317                 } else if (tclass & TC_FUNCDECL) {
1318                         next_token(TC_FUNCTION);
1319                         pos++;
1320                         f = newfunc(t.string);
1321                         f->body.first = NULL;
1322                         f->nargs = 0;
1323                         while(next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1324                                 v = findvar(ahash, t.string);
1325                                 v->x.aidx = (f->nargs)++;
1326
1327                                 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1328                                         break;
1329                         }
1330                         seq = &(f->body);
1331                         chain_group();
1332                         clear_array(ahash);
1333
1334                 } else if (tclass & TC_OPSEQ) {
1335                         rollback_token();
1336                         cn = chain_node(OC_TEST);
1337                         cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1338                         if (t.tclass & TC_GRPSTART) {
1339                                 rollback_token();
1340                                 chain_group();
1341                         } else {
1342                                 chain_node(OC_PRINT);
1343                         }
1344                         cn->r.n = mainseq.last;
1345
1346                 } else /* if (tclass & TC_GRPSTART) */ {
1347                         rollback_token();
1348                         chain_group();
1349                 }
1350         }
1351 }
1352
1353
1354 /* -------- program execution part -------- */
1355
1356 static node *mk_splitter(char *s, tsplitter *spl)
1357 {
1358         regex_t *re, *ire;
1359         node *n;
1360
1361         re = &spl->re[0];
1362         ire = &spl->re[1];
1363         n = &spl->n;
1364         if ((n->info && OPCLSMASK) == OC_REGEXP) {
1365                 regfree(re);
1366                 regfree(ire);
1367         }
1368         if (strlen(s) > 1) {
1369                 mk_re_node(s, n, re);
1370         } else {
1371                 n->info = (uint32_t) *s;
1372         }
1373
1374         return n;
1375 }
1376
1377 /* use node as a regular expression. Supplied with node ptr and regex_t
1378  * storage space. Return ptr to regex (if result points to preg, it should
1379  * be later regfree'd manually
1380  */
1381 static regex_t *as_regex(node *op, regex_t *preg)
1382 {
1383         var *v;
1384         char *s;
1385
1386         if ((op->info & OPCLSMASK) == OC_REGEXP) {
1387                 return icase ? op->r.ire : op->l.re;
1388         } else {
1389                 v = nvalloc(1);
1390                 s = getvar_s(evaluate(op, v));
1391                 xregcomp(preg, s, icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED);
1392                 nvfree(v);
1393                 return preg;
1394         }
1395 }
1396
1397 /* gradually increasing buffer */
1398 static void qrealloc(char **b, int n, int *size)
1399 {
1400         if (! *b || n >= *size)
1401                 *b = xrealloc(*b, *size = n + (n>>1) + 80);
1402 }
1403
1404 /* resize field storage space */
1405 static void fsrealloc(int size)
1406 {
1407         static int maxfields = 0;
1408         int i;
1409
1410         if (size >= maxfields) {
1411                 i = maxfields;
1412                 maxfields = size + 16;
1413                 Fields = (var *)xrealloc(Fields, maxfields * sizeof(var));
1414                 for (; i<maxfields; i++) {
1415                         Fields[i].type = VF_SPECIAL;
1416                         Fields[i].string = NULL;
1417                 }
1418         }
1419
1420         if (size < nfields) {
1421                 for (i=size; i<nfields; i++) {
1422                         clrvar(Fields+i);
1423                 }
1424         }
1425         nfields = size;
1426 }
1427
1428 static int awk_split(char *s, node *spl, char **slist)
1429 {
1430         int l, n=0;
1431         char c[4];
1432         char *s1;
1433         regmatch_t pmatch[2];
1434
1435         /* in worst case, each char would be a separate field */
1436         *slist = s1 = bb_xstrndup(s, strlen(s) * 2 + 3);
1437
1438         c[0] = c[1] = (char)spl->info;
1439         c[2] = c[3] = '\0';
1440         if (*getvar_s(V[RS]) == '\0') c[2] = '\n';
1441
1442         if ((spl->info & OPCLSMASK) == OC_REGEXP) {             /* regex split */
1443                 while (*s) {
1444                         l = strcspn(s, c+2);
1445                         if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0 &&
1446                         pmatch[0].rm_so <= l) {
1447                                 l = pmatch[0].rm_so;
1448                                 if (pmatch[0].rm_eo == 0) { l++; pmatch[0].rm_eo++; }
1449                         } else {
1450                                 pmatch[0].rm_eo = l;
1451                                 if (*(s+l)) pmatch[0].rm_eo++;
1452                         }
1453
1454                         memcpy(s1, s, l);
1455                         *(s1+l) = '\0';
1456                         nextword(&s1);
1457                         s += pmatch[0].rm_eo;
1458                         n++;
1459                 }
1460         } else if (c[0] == '\0') {              /* null split */
1461                 while(*s) {
1462                         *(s1++) = *(s++);
1463                         *(s1++) = '\0';
1464                         n++;
1465                 }
1466         } else if (c[0] != ' ') {               /* single-character split */
1467                 if (icase) {
1468                         c[0] = toupper(c[0]);
1469                         c[1] = tolower(c[1]);
1470                 }
1471                 if (*s1) n++;
1472                 while ((s1 = strpbrk(s1, c))) {
1473                         *(s1++) = '\0';
1474                         n++;
1475                 }
1476         } else {                                /* space split */
1477                 while (*s) {
1478                         while (isspace(*s)) s++;
1479                         if (! *s) break;
1480                         n++;
1481                         while (*s && !isspace(*s))
1482                                 *(s1++) = *(s++);
1483                         *(s1++) = '\0';
1484                 }
1485         }
1486         return n;
1487 }
1488
1489 static void split_f0(void)
1490 {
1491         static char *fstrings = NULL;
1492         int i, n;
1493         char *s;
1494
1495         if (is_f0_split)
1496                 return;
1497
1498         is_f0_split = TRUE;
1499         free(fstrings);
1500         fsrealloc(0);
1501         n = awk_split(getvar_s(V[F0]), &fsplitter.n, &fstrings);
1502         fsrealloc(n);
1503         s = fstrings;
1504         for (i=0; i<n; i++) {
1505                 Fields[i].string = nextword(&s);
1506                 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1507         }
1508
1509         /* set NF manually to avoid side effects */
1510         clrvar(V[NF]);
1511         V[NF]->type = VF_NUMBER | VF_SPECIAL;
1512         V[NF]->number = nfields;
1513 }
1514
1515 /* perform additional actions when some internal variables changed */
1516 static void handle_special(var *v)
1517 {
1518         int n;
1519         char *b, *sep, *s;
1520         int sl, l, len, i, bsize;
1521
1522         if (! (v->type & VF_SPECIAL))
1523                 return;
1524
1525         if (v == V[NF]) {
1526                 n = (int)getvar_i(v);
1527                 fsrealloc(n);
1528
1529                 /* recalculate $0 */
1530                 sep = getvar_s(V[OFS]);
1531                 sl = strlen(sep);
1532                 b = NULL;
1533                 len = 0;
1534                 for (i=0; i<n; i++) {
1535                         s = getvar_s(&Fields[i]);
1536                         l = strlen(s);
1537                         if (b) {
1538                                 memcpy(b+len, sep, sl);
1539                                 len += sl;
1540                         }
1541                         qrealloc(&b, len+l+sl, &bsize);
1542                         memcpy(b+len, s, l);
1543                         len += l;
1544                 }
1545                 if (b) b[len] = '\0';
1546                 setvar_p(V[F0], b);
1547                 is_f0_split = TRUE;
1548
1549         } else if (v == V[F0]) {
1550                 is_f0_split = FALSE;
1551
1552         } else if (v == V[FS]) {
1553                 mk_splitter(getvar_s(v), &fsplitter);
1554
1555         } else if (v == V[RS]) {
1556                 mk_splitter(getvar_s(v), &rsplitter);
1557
1558         } else if (v == V[IGNORECASE]) {
1559                 icase = istrue(v);
1560
1561         } else {                                                /* $n */
1562                 n = getvar_i(V[NF]);
1563                 setvar_i(V[NF], n > v-Fields ? n : v-Fields+1);
1564                 /* right here v is invalid. Just to note... */
1565         }
1566 }
1567
1568 /* step through func/builtin/etc arguments */
1569 static node *nextarg(node **pn)
1570 {
1571         node *n;
1572
1573         n = *pn;
1574         if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1575                 *pn = n->r.n;
1576                 n = n->l.n;
1577         } else {
1578                 *pn = NULL;
1579         }
1580         return n;
1581 }
1582
1583 static void hashwalk_init(var *v, xhash *array)
1584 {
1585         char **w;
1586         hash_item *hi;
1587         int i;
1588
1589         if (v->type & VF_WALK)
1590                 free(v->x.walker);
1591
1592         v->type |= VF_WALK;
1593         w = v->x.walker = (char **)xzalloc(2 + 2*sizeof(char *) + array->glen);
1594         *w = *(w+1) = (char *)(w + 2);
1595         for (i=0; i<array->csize; i++) {
1596                 hi = array->items[i];
1597                 while(hi) {
1598                         strcpy(*w, hi->name);
1599                         nextword(w);
1600                         hi = hi->next;
1601                 }
1602         }
1603 }
1604
1605 static int hashwalk_next(var *v)
1606 {
1607         char **w;
1608
1609         w = v->x.walker;
1610         if (*(w+1) == *w)
1611                 return FALSE;
1612
1613         setvar_s(v, nextword(w+1));
1614         return TRUE;
1615 }
1616
1617 /* evaluate node, return 1 when result is true, 0 otherwise */
1618 static int ptest(node *pattern)
1619 {
1620         static var v;
1621         return istrue(evaluate(pattern, &v));
1622 }
1623
1624 /* read next record from stream rsm into a variable v */
1625 static int awk_getline(rstream *rsm, var *v)
1626 {
1627         char *b;
1628         regmatch_t pmatch[2];
1629         int a, p, pp=0, size;
1630         int fd, so, eo, r, rp;
1631         char c, *m, *s;
1632
1633         /* we're using our own buffer since we need access to accumulating
1634          * characters
1635          */
1636         fd = fileno(rsm->F);
1637         m = rsm->buffer;
1638         a = rsm->adv;
1639         p = rsm->pos;
1640         size = rsm->size;
1641         c = (char) rsplitter.n.info;
1642         rp = 0;
1643
1644         if (! m) qrealloc(&m, 256, &size);
1645         do {
1646                 b = m + a;
1647                 so = eo = p;
1648                 r = 1;
1649                 if (p > 0) {
1650                         if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1651                                 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1652                                                                                                 b, 1, pmatch, 0) == 0) {
1653                                         so = pmatch[0].rm_so;
1654                                         eo = pmatch[0].rm_eo;
1655                                         if (b[eo] != '\0')
1656                                                 break;
1657                                 }
1658                         } else if (c != '\0') {
1659                                 s = strchr(b+pp, c);
1660                                 if (! s) s = memchr(b+pp, '\0', p - pp);
1661                                 if (s) {
1662                                         so = eo = s-b;
1663                                         eo++;
1664                                         break;
1665                                 }
1666                         } else {
1667                                 while (b[rp] == '\n')
1668                                         rp++;
1669                                 s = strstr(b+rp, "\n\n");
1670                                 if (s) {
1671                                         so = eo = s-b;
1672                                         while (b[eo] == '\n') eo++;
1673                                         if (b[eo] != '\0')
1674                                                 break;
1675                                 }
1676                         }
1677                 }
1678
1679                 if (a > 0) {
1680                         memmove(m, (const void *)(m+a), p+1);
1681                         b = m;
1682                         a = 0;
1683                 }
1684
1685                 qrealloc(&m, a+p+128, &size);
1686                 b = m + a;
1687                 pp = p;
1688                 p += safe_read(fd, b+p, size-p-1);
1689                 if (p < pp) {
1690                         p = 0;
1691                         r = 0;
1692                         setvar_i(V[ERRNO], errno);
1693                 }
1694                 b[p] = '\0';
1695
1696         } while (p > pp);
1697
1698         if (p == 0) {
1699                 r--;
1700         } else {
1701                 c = b[so]; b[so] = '\0';
1702                 setvar_s(v, b+rp);
1703                 v->type |= VF_USER;
1704                 b[so] = c;
1705                 c = b[eo]; b[eo] = '\0';
1706                 setvar_s(V[RT], b+so);
1707                 b[eo] = c;
1708         }
1709
1710         rsm->buffer = m;
1711         rsm->adv = a + eo;
1712         rsm->pos = p - eo;
1713         rsm->size = size;
1714
1715         return r;
1716 }
1717
1718 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1719 {
1720         int r=0;
1721         char c;
1722         const char *s=format;
1723
1724         if (int_as_int && n == (int)n) {
1725                 r = snprintf(b, size, "%d", (int)n);
1726         } else {
1727                 do { c = *s; } while (*s && *++s);
1728                 if (strchr("diouxX", c)) {
1729                         r = snprintf(b, size, format, (int)n);
1730                 } else if (strchr("eEfgG", c)) {
1731                         r = snprintf(b, size, format, n);
1732                 } else {
1733                         runtime_error(EMSG_INV_FMT);
1734                 }
1735         }
1736         return r;
1737 }
1738
1739
1740 /* formatted output into an allocated buffer, return ptr to buffer */
1741 static char *awk_printf(node *n)
1742 {
1743         char *b = NULL;
1744         char *fmt, *s, *s1, *f;
1745         int i, j, incr, bsize;
1746         char c, c1;
1747         var *v, *arg;
1748
1749         v = nvalloc(1);
1750         fmt = f = bb_xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1751
1752         i = 0;
1753         while (*f) {
1754                 s = f;
1755                 while (*f && (*f != '%' || *(++f) == '%'))
1756                         f++;
1757                 while (*f && !isalpha(*f))
1758                         f++;
1759
1760                 incr = (f - s) + MAXVARFMT;
1761                 qrealloc(&b, incr+i, &bsize);
1762                 c = *f; if (c != '\0') f++;
1763                 c1 = *f ; *f = '\0';
1764                 arg = evaluate(nextarg(&n), v);
1765
1766                 j = i;
1767                 if (c == 'c' || !c) {
1768                         i += sprintf(b+i, s,
1769                                         is_numeric(arg) ? (char)getvar_i(arg) : *getvar_s(arg));
1770
1771                 } else if (c == 's') {
1772                     s1 = getvar_s(arg);
1773                         qrealloc(&b, incr+i+strlen(s1), &bsize);
1774                         i += sprintf(b+i, s, s1);
1775
1776                 } else {
1777                         i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1778                 }
1779                 *f = c1;
1780
1781                 /* if there was an error while sprintf, return value is negative */
1782                 if (i < j) i = j;
1783
1784         }
1785
1786         b = xrealloc(b, i+1);
1787         free(fmt);
1788         nvfree(v);
1789         b[i] = '\0';
1790         return b;
1791 }
1792
1793 /* common substitution routine
1794  * replace (nm) substring of (src) that match (n) with (repl), store
1795  * result into (dest), return number of substitutions. If nm=0, replace
1796  * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1797  * subexpression matching (\1-\9)
1798  */
1799 static int awk_sub(node *rn, char *repl, int nm, var *src, var *dest, int ex)
1800 {
1801         char *ds = NULL;
1802         char *sp, *s;
1803         int c, i, j, di, rl, so, eo, nbs, n, dssize;
1804         regmatch_t pmatch[10];
1805         regex_t sreg, *re;
1806
1807         re = as_regex(rn, &sreg);
1808         if (! src) src = V[F0];
1809         if (! dest) dest = V[F0];
1810
1811         i = di = 0;
1812         sp = getvar_s(src);
1813         rl = strlen(repl);
1814         while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0:REG_NOTBOL) == 0) {
1815                 so = pmatch[0].rm_so;
1816                 eo = pmatch[0].rm_eo;
1817
1818                 qrealloc(&ds, di + eo + rl, &dssize);
1819                 memcpy(ds + di, sp, eo);
1820                 di += eo;
1821                 if (++i >= nm) {
1822                         /* replace */
1823                         di -= (eo - so);
1824                         nbs = 0;
1825                         for (s = repl; *s; s++) {
1826                                 ds[di++] = c = *s;
1827                                 if (c == '\\') {
1828                                         nbs++;
1829                                         continue;
1830                                 }
1831                                 if (c == '&' || (ex && c >= '0' && c <= '9')) {
1832                                         di -= ((nbs + 3) >> 1);
1833                                         j = 0;
1834                                         if (c != '&') {
1835                                                 j = c - '0';
1836                                                 nbs++;
1837                                         }
1838                                         if (nbs % 2) {
1839                                                 ds[di++] = c;
1840                                         } else {
1841                                                 n = pmatch[j].rm_eo - pmatch[j].rm_so;
1842                                                 qrealloc(&ds, di + rl + n, &dssize);
1843                                                 memcpy(ds + di, sp + pmatch[j].rm_so, n);
1844                                                 di += n;
1845                                         }
1846                                 }
1847                                 nbs = 0;
1848                         }
1849                 }
1850
1851                 sp += eo;
1852                 if (i == nm) break;
1853                 if (eo == so) {
1854                         if (! (ds[di++] = *sp++)) break;
1855                 }
1856         }
1857
1858         qrealloc(&ds, di + strlen(sp), &dssize);
1859         strcpy(ds + di, sp);
1860         setvar_p(dest, ds);
1861         if (re == &sreg) regfree(re);
1862         return i;
1863 }
1864
1865 static var *exec_builtin(node *op, var *res)
1866 {
1867         int (*to_xxx)(int);
1868         var *tv;
1869         node *an[4];
1870         var  *av[4];
1871         char *as[4];
1872         regmatch_t pmatch[2];
1873         regex_t sreg, *re;
1874         static tsplitter tspl;
1875         node *spl;
1876         uint32_t isr, info;
1877         int nargs;
1878         time_t tt;
1879         char *s, *s1;
1880         int i, l, ll, n;
1881
1882         tv = nvalloc(4);
1883         isr = info = op->info;
1884         op = op->l.n;
1885
1886         av[2] = av[3] = NULL;
1887         for (i=0 ; i<4 && op ; i++) {
1888                 an[i] = nextarg(&op);
1889                 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
1890                 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
1891                 isr >>= 1;
1892         }
1893
1894         nargs = i;
1895         if (nargs < (info >> 30))
1896                 runtime_error(EMSG_TOO_FEW_ARGS);
1897
1898         switch (info & OPNMASK) {
1899
1900           case B_a2:
1901 #ifdef CONFIG_FEATURE_AWK_MATH
1902                 setvar_i(res, atan2(getvar_i(av[i]), getvar_i(av[1])));
1903 #else
1904                 runtime_error(EMSG_NO_MATH);
1905 #endif
1906                 break;
1907
1908           case B_sp:
1909                 if (nargs > 2) {
1910                         spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
1911                                 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
1912                 } else {
1913                         spl = &fsplitter.n;
1914                 }
1915
1916                 n = awk_split(as[0], spl, &s);
1917                 s1 = s;
1918                 clear_array(iamarray(av[1]));
1919                 for (i=1; i<=n; i++)
1920                         setari_u(av[1], i, nextword(&s1));
1921                 free(s);
1922                 setvar_i(res, n);
1923                 break;
1924
1925           case B_ss:
1926                 l = strlen(as[0]);
1927                 i = getvar_i(av[1]) - 1;
1928                 if (i>l) i=l; if (i<0) i=0;
1929                 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
1930                 if (n<0) n=0;
1931                 s = xmalloc(n+1);
1932                 strncpy(s, as[0]+i, n);
1933                 s[n] = '\0';
1934                 setvar_p(res, s);
1935                 break;
1936
1937           case B_lo:
1938                 to_xxx = tolower;
1939                 goto lo_cont;
1940
1941           case B_up:
1942                 to_xxx = toupper;
1943 lo_cont:
1944                 s1 = s = bb_xstrdup(as[0]);
1945                 while (*s1) {
1946                         *s1 = (*to_xxx)(*s1);
1947                         s1++;
1948                 }
1949                 setvar_p(res, s);
1950                 break;
1951
1952           case B_ix:
1953                 n = 0;
1954                 ll = strlen(as[1]);
1955                 l = strlen(as[0]) - ll;
1956                 if (ll > 0 && l >= 0) {
1957                         if (! icase) {
1958                                 s = strstr(as[0], as[1]);
1959                                 if (s) n = (s - as[0]) + 1;
1960                         } else {
1961                                 /* this piece of code is terribly slow and
1962                                  * really should be rewritten
1963                                  */
1964                                 for (i=0; i<=l; i++) {
1965                                         if (strncasecmp(as[0]+i, as[1], ll) == 0) {
1966                                                 n = i+1;
1967                                                 break;
1968                                         }
1969                                 }
1970                         }
1971                 }
1972                 setvar_i(res, n);
1973                 break;
1974
1975           case B_ti:
1976                 if (nargs > 1)
1977                         tt = getvar_i(av[1]);
1978                 else
1979                         time(&tt);
1980                 s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
1981                 i = strftime(buf, MAXVARFMT, s, localtime(&tt));
1982                 buf[i] = '\0';
1983                 setvar_s(res, buf);
1984                 break;
1985
1986           case B_ma:
1987                 re = as_regex(an[1], &sreg);
1988                 n = regexec(re, as[0], 1, pmatch, 0);
1989                 if (n == 0) {
1990                         pmatch[0].rm_so++;
1991                         pmatch[0].rm_eo++;
1992                 } else {
1993                         pmatch[0].rm_so = 0;
1994                         pmatch[0].rm_eo = -1;
1995                 }
1996                 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
1997                 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
1998                 setvar_i(res, pmatch[0].rm_so);
1999                 if (re == &sreg) regfree(re);
2000                 break;
2001
2002           case B_ge:
2003                 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2004                 break;
2005
2006           case B_gs:
2007                 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2008                 break;
2009
2010           case B_su:
2011                 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2012                 break;
2013         }
2014
2015         nvfree(tv);
2016         return res;
2017 }
2018
2019 /*
2020  * Evaluate node - the heart of the program. Supplied with subtree
2021  * and place where to store result. returns ptr to result.
2022  */
2023 #define XC(n) ((n) >> 8)
2024
2025 static var *evaluate(node *op, var *res)
2026 {
2027         /* This procedure is recursive so we should count every byte */
2028         static var *fnargs = NULL;
2029         static unsigned int seed = 1;
2030         static regex_t sreg;
2031         node *op1;
2032         var *v1;
2033         union {
2034                 var *v;
2035                 char *s;
2036                 double d;
2037                 int i;
2038         } L, R;
2039         uint32_t opinfo;
2040         short opn;
2041         union {
2042                 char *s;
2043                 rstream *rsm;
2044                 FILE *F;
2045                 var *v;
2046                 regex_t *re;
2047                 uint32_t info;
2048         } X;
2049
2050         if (! op)
2051                 return setvar_s(res, NULL);
2052
2053         v1 = nvalloc(2);
2054
2055         while (op) {
2056
2057                 opinfo = op->info;
2058                 opn = (short)(opinfo & OPNMASK);
2059                 lineno = op->lineno;
2060
2061                 /* execute inevitable things */
2062                 op1 = op->l.n;
2063                 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2064                 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2065                 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2066                 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2067                 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2068
2069                 switch (XC(opinfo & OPCLSMASK)) {
2070
2071                   /* -- iterative node type -- */
2072
2073                   /* test pattern */
2074                   case XC( OC_TEST ):
2075                         if ((op1->info & OPCLSMASK) == OC_COMMA) {
2076                                 /* it's range pattern */
2077                                 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2078                                         op->info |= OF_CHECKED;
2079                                         if (ptest(op1->r.n))
2080                                                 op->info &= ~OF_CHECKED;
2081
2082                                         op = op->a.n;
2083                                 } else {
2084                                         op = op->r.n;
2085                                 }
2086                         } else {
2087                                 op = (ptest(op1)) ? op->a.n : op->r.n;
2088                         }
2089                         break;
2090
2091                   /* just evaluate an expression, also used as unconditional jump */
2092                   case XC( OC_EXEC ):
2093                         break;
2094
2095                   /* branch, used in if-else and various loops */
2096                   case XC( OC_BR ):
2097                         op = istrue(L.v) ? op->a.n : op->r.n;
2098                         break;
2099
2100                   /* initialize for-in loop */
2101                   case XC( OC_WALKINIT ):
2102                         hashwalk_init(L.v, iamarray(R.v));
2103                         break;
2104
2105                   /* get next array item */
2106                   case XC( OC_WALKNEXT ):
2107                         op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2108                         break;
2109
2110                   case XC( OC_PRINT ):
2111                   case XC( OC_PRINTF ):
2112                         X.F = stdout;
2113                         if (op->r.n) {
2114                                 X.rsm = newfile(R.s);
2115                                 if (! X.rsm->F) {
2116                                         if (opn == '|') {
2117                                                 if((X.rsm->F = popen(R.s, "w")) == NULL)
2118                                                         bb_perror_msg_and_die("popen");
2119                                                 X.rsm->is_pipe = 1;
2120                                         } else {
2121                                                 X.rsm->F = bb_xfopen(R.s, opn=='w' ? "w" : "a");
2122                                         }
2123                                 }
2124                                 X.F = X.rsm->F;
2125                         }
2126
2127                         if ((opinfo & OPCLSMASK) == OC_PRINT) {
2128                                 if (! op1) {
2129                                         fputs(getvar_s(V[F0]), X.F);
2130                                 } else {
2131                                         while (op1) {
2132                                                 L.v = evaluate(nextarg(&op1), v1);
2133                                                 if (L.v->type & VF_NUMBER) {
2134                                                         fmt_num(buf, MAXVARFMT, getvar_s(V[OFMT]),
2135                                                                                                                 getvar_i(L.v), TRUE);
2136                                                         fputs(buf, X.F);
2137                                                 } else {
2138                                                         fputs(getvar_s(L.v), X.F);
2139                                                 }
2140
2141                                                 if (op1) fputs(getvar_s(V[OFS]), X.F);
2142                                         }
2143                                 }
2144                                 fputs(getvar_s(V[ORS]), X.F);
2145
2146                         } else {        /* OC_PRINTF */
2147                                 L.s = awk_printf(op1);
2148                                 fputs(L.s, X.F);
2149                                 free(L.s);
2150                         }
2151                         fflush(X.F);
2152                         break;
2153
2154                   case XC( OC_DELETE ):
2155                         X.info = op1->info & OPCLSMASK;
2156                         if (X.info == OC_VAR) {
2157                                 R.v = op1->l.v;
2158                         } else if (X.info == OC_FNARG) {
2159                                 R.v = &fnargs[op1->l.i];
2160                         } else {
2161                                 runtime_error(EMSG_NOT_ARRAY);
2162                         }
2163
2164                         if (op1->r.n) {
2165                                 clrvar(L.v);
2166                                 L.s = getvar_s(evaluate(op1->r.n, v1));
2167                                 hash_remove(iamarray(R.v), L.s);
2168                         } else {
2169                                 clear_array(iamarray(R.v));
2170                         }
2171                         break;
2172
2173                   case XC( OC_NEWSOURCE ):
2174                         programname = op->l.s;
2175                         break;
2176
2177                   case XC( OC_RETURN ):
2178                         copyvar(res, L.v);
2179                         break;
2180
2181                   case XC( OC_NEXTFILE ):
2182                         nextfile = TRUE;
2183                   case XC( OC_NEXT ):
2184                         nextrec = TRUE;
2185                   case XC( OC_DONE ):
2186                         clrvar(res);
2187                         break;
2188
2189                   case XC( OC_EXIT ):
2190                         awk_exit(L.d);
2191
2192                   /* -- recursive node type -- */
2193
2194                   case XC( OC_VAR ):
2195                         L.v = op->l.v;
2196                         if (L.v == V[NF])
2197                                 split_f0();
2198                         goto v_cont;
2199
2200                   case XC( OC_FNARG ):
2201                         L.v = &fnargs[op->l.i];
2202
2203 v_cont:
2204                         res = (op->r.n) ? findvar(iamarray(L.v), R.s) : L.v;
2205                         break;
2206
2207                   case XC( OC_IN ):
2208                         setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2209                         break;
2210
2211                   case XC( OC_REGEXP ):
2212                         op1 = op;
2213                         L.s = getvar_s(V[F0]);
2214                         goto re_cont;
2215
2216                   case XC( OC_MATCH ):
2217                         op1 = op->r.n;
2218 re_cont:
2219                         X.re = as_regex(op1, &sreg);
2220                         R.i = regexec(X.re, L.s, 0, NULL, 0);
2221                         if (X.re == &sreg) regfree(X.re);
2222                         setvar_i(res, (R.i == 0 ? 1 : 0) ^ (opn == '!' ? 1 : 0));
2223                         break;
2224
2225                   case XC( OC_MOVE ):
2226                         /* if source is a temporary string, jusk relink it to dest */
2227                         if (R.v == v1+1 && R.v->string) {
2228                                 res = setvar_p(L.v, R.v->string);
2229                                 R.v->string = NULL;
2230                         } else {
2231                                 res = copyvar(L.v, R.v);
2232                         }
2233                         break;
2234
2235                   case XC( OC_TERNARY ):
2236                         if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2237                                 runtime_error(EMSG_POSSIBLE_ERROR);
2238                         res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2239                         break;
2240
2241                   case XC( OC_FUNC ):
2242                         if (! op->r.f->body.first)
2243                                 runtime_error(EMSG_UNDEF_FUNC);
2244
2245                         X.v = R.v = nvalloc(op->r.f->nargs+1);
2246                         while (op1) {
2247                                 L.v = evaluate(nextarg(&op1), v1);
2248                                 copyvar(R.v, L.v);
2249                                 R.v->type |= VF_CHILD;
2250                                 R.v->x.parent = L.v;
2251                                 if (++R.v - X.v >= op->r.f->nargs)
2252                                         break;
2253                         }
2254
2255                         R.v = fnargs;
2256                         fnargs = X.v;
2257
2258                         L.s = programname;
2259                         res = evaluate(op->r.f->body.first, res);
2260                         programname = L.s;
2261
2262                         nvfree(fnargs);
2263                         fnargs = R.v;
2264                         break;
2265
2266                   case XC( OC_GETLINE ):
2267                   case XC( OC_PGETLINE ):
2268                         if (op1) {
2269                                 X.rsm = newfile(L.s);
2270                                 if (! X.rsm->F) {
2271                                         if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2272                                                 X.rsm->F = popen(L.s, "r");
2273                                                 X.rsm->is_pipe = TRUE;
2274                                         } else {
2275                                                 X.rsm->F = fopen(L.s, "r");             /* not bb_xfopen! */
2276                                         }
2277                                 }
2278                         } else {
2279                                 if (! iF) iF = next_input_file();
2280                                 X.rsm = iF;
2281                         }
2282
2283                         if (! X.rsm->F) {
2284                                 setvar_i(V[ERRNO], errno);
2285                                 setvar_i(res, -1);
2286                                 break;
2287                         }
2288
2289                         if (! op->r.n)
2290                                 R.v = V[F0];
2291
2292                         L.i = awk_getline(X.rsm, R.v);
2293                         if (L.i > 0) {
2294                                 if (! op1) {
2295                                         incvar(V[FNR]);
2296                                         incvar(V[NR]);
2297                                 }
2298                         }
2299                         setvar_i(res, L.i);
2300                         break;
2301
2302                   /* simple builtins */
2303                   case XC( OC_FBLTIN ):
2304                         switch (opn) {
2305
2306                           case F_in:
2307                                 R.d = (int)L.d;
2308                                 break;
2309
2310                           case F_rn:
2311                                 R.d =  (double)rand() / (double)RAND_MAX;
2312                                 break;
2313
2314 #ifdef CONFIG_FEATURE_AWK_MATH
2315                           case F_co:
2316                                 R.d = cos(L.d);
2317                                 break;
2318
2319                           case F_ex:
2320                                 R.d = exp(L.d);
2321                                 break;
2322
2323                           case F_lg:
2324                                 R.d = log(L.d);
2325                                 break;
2326
2327                           case F_si:
2328                                 R.d = sin(L.d);
2329                                 break;
2330
2331                           case F_sq:
2332                                 R.d = sqrt(L.d);
2333                                 break;
2334 #else
2335                           case F_co:
2336                           case F_ex:
2337                           case F_lg:
2338                           case F_si:
2339                           case F_sq:
2340                                 runtime_error(EMSG_NO_MATH);
2341                                 break;
2342 #endif
2343
2344                           case F_sr:
2345                                 R.d = (double)seed;
2346                                 seed = op1 ? (unsigned int)L.d : (unsigned int)time(NULL);
2347                                 srand(seed);
2348                                 break;
2349
2350                           case F_ti:
2351                                 R.d = time(NULL);
2352                                 break;
2353
2354                           case F_le:
2355                                 if (! op1)
2356                                         L.s = getvar_s(V[F0]);
2357                                 R.d = strlen(L.s);
2358                                 break;
2359
2360                           case F_sy:
2361                                 fflush(NULL);
2362                                 R.d = (L.s && *L.s) ? (system(L.s) >> 8) : 0;
2363                                 break;
2364
2365                           case F_ff:
2366                                 if (! op1)
2367                                         fflush(stdout);
2368                                 else {
2369                                         if (L.s && *L.s) {
2370                                                 X.rsm = newfile(L.s);
2371                                                 fflush(X.rsm->F);
2372                                         } else {
2373                                                 fflush(NULL);
2374                                         }
2375                                 }
2376                                 break;
2377
2378                           case F_cl:
2379                                 X.rsm = (rstream *)hash_search(fdhash, L.s);
2380                                 if (X.rsm) {
2381                                         R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
2382                                         free(X.rsm->buffer);
2383                                         hash_remove(fdhash, L.s);
2384                                 }
2385                                 if (R.i != 0)
2386                                         setvar_i(V[ERRNO], errno);
2387                                 R.d = (double)R.i;
2388                                 break;
2389                         }
2390                         setvar_i(res, R.d);
2391                         break;
2392
2393                   case XC( OC_BUILTIN ):
2394                         res = exec_builtin(op, res);
2395                         break;
2396
2397                   case XC( OC_SPRINTF ):
2398                         setvar_p(res, awk_printf(op1));
2399                         break;
2400
2401                   case XC( OC_UNARY ):
2402                         X.v = R.v;
2403                         L.d = R.d = getvar_i(R.v);
2404                         switch (opn) {
2405                           case 'P':
2406                                 L.d = ++R.d;
2407                                 goto r_op_change;
2408                           case 'p':
2409                                 R.d++;
2410                                 goto r_op_change;
2411                           case 'M':
2412                                 L.d = --R.d;
2413                                 goto r_op_change;
2414                           case 'm':
2415                                 R.d--;
2416                                 goto r_op_change;
2417                           case '!':
2418                             L.d = istrue(X.v) ? 0 : 1;
2419                                 break;
2420                           case '-':
2421                                 L.d = -R.d;
2422                                 break;
2423                         r_op_change:
2424                                 setvar_i(X.v, R.d);
2425                         }
2426                         setvar_i(res, L.d);
2427                         break;
2428
2429                   case XC( OC_FIELD ):
2430                         R.i = (int)getvar_i(R.v);
2431                         if (R.i == 0) {
2432                                 res = V[F0];
2433                         } else {
2434                                 split_f0();
2435                                 if (R.i > nfields)
2436                                         fsrealloc(R.i);
2437
2438                                 res = &Fields[R.i-1];
2439                         }
2440                         break;
2441
2442                   /* concatenation (" ") and index joining (",") */
2443                   case XC( OC_CONCAT ):
2444                   case XC( OC_COMMA ):
2445                         opn = strlen(L.s) + strlen(R.s) + 2;
2446                         X.s = (char *)xmalloc(opn);
2447                         strcpy(X.s, L.s);
2448                         if ((opinfo & OPCLSMASK) == OC_COMMA) {
2449                                 L.s = getvar_s(V[SUBSEP]);
2450                                 X.s = (char *)xrealloc(X.s, opn + strlen(L.s));
2451                                 strcat(X.s, L.s);
2452                         }
2453                         strcat(X.s, R.s);
2454                         setvar_p(res, X.s);
2455                         break;
2456
2457                   case XC( OC_LAND ):
2458                         setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2459                         break;
2460
2461                   case XC( OC_LOR ):
2462                         setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2463                         break;
2464
2465                   case XC( OC_BINARY ):
2466                   case XC( OC_REPLACE ):
2467                         R.d = getvar_i(R.v);
2468                         switch (opn) {
2469                           case '+':
2470                                 L.d += R.d;
2471                                 break;
2472                           case '-':
2473                                 L.d -= R.d;
2474                                 break;
2475                           case '*':
2476                                 L.d *= R.d;
2477                                 break;
2478                           case '/':
2479                                 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2480                                 L.d /= R.d;
2481                                 break;
2482                           case '&':
2483 #ifdef CONFIG_FEATURE_AWK_MATH
2484                                 L.d = pow(L.d, R.d);
2485 #else
2486                                 runtime_error(EMSG_NO_MATH);
2487 #endif
2488                                 break;
2489                           case '%':
2490                                 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2491                                 L.d -= (int)(L.d / R.d) * R.d;
2492                                 break;
2493                         }
2494                         res = setvar_i(((opinfo&OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2495                         break;
2496
2497                   case XC( OC_COMPARE ):
2498                         if (is_numeric(L.v) && is_numeric(R.v)) {
2499                                 L.d = getvar_i(L.v) - getvar_i(R.v);
2500                         } else {
2501                                 L.s = getvar_s(L.v);
2502                                 R.s = getvar_s(R.v);
2503                                 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2504                         }
2505                         switch (opn & 0xfe) {
2506                           case 0:
2507                                 R.i = (L.d > 0);
2508                                 break;
2509                           case 2:
2510                                 R.i = (L.d >= 0);
2511                                 break;
2512                           case 4:
2513                                 R.i = (L.d == 0);
2514                                 break;
2515                         }
2516                         setvar_i(res, (opn & 0x1 ? R.i : !R.i) ? 1 : 0);
2517                         break;
2518
2519                   default:
2520                         runtime_error(EMSG_POSSIBLE_ERROR);
2521                 }
2522                 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2523                         op = op->a.n;
2524                 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2525                         break;
2526                 if (nextrec)
2527                         break;
2528         }
2529         nvfree(v1);
2530         return res;
2531 }
2532
2533
2534 /* -------- main & co. -------- */
2535
2536 static int awk_exit(int r)
2537 {
2538         unsigned int i;
2539         hash_item *hi;
2540         static var tv;
2541
2542         if (! exiting) {
2543                 exiting = TRUE;
2544                 nextrec = FALSE;
2545                 evaluate(endseq.first, &tv);
2546         }
2547
2548         /* waiting for children */
2549         for (i=0; i<fdhash->csize; i++) {
2550                 hi = fdhash->items[i];
2551                 while(hi) {
2552                         if (hi->data.rs.F && hi->data.rs.is_pipe)
2553                                 pclose(hi->data.rs.F);
2554                         hi = hi->next;
2555                 }
2556         }
2557
2558         exit(r);
2559 }
2560
2561 /* if expr looks like "var=value", perform assignment and return 1,
2562  * otherwise return 0 */
2563 static int is_assignment(const char *expr)
2564 {
2565         char *exprc, *s, *s0, *s1;
2566
2567         exprc = bb_xstrdup(expr);
2568         if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2569                 free(exprc);
2570                 return FALSE;
2571         }
2572
2573         *(s++) = '\0';
2574         s0 = s1 = s;
2575         while (*s)
2576                 *(s1++) = nextchar(&s);
2577
2578         *s1 = '\0';
2579         setvar_u(newvar(exprc), s0);
2580         free(exprc);
2581         return TRUE;
2582 }
2583
2584 /* switch to next input file */
2585 static rstream *next_input_file(void)
2586 {
2587         static rstream rsm;
2588         FILE *F = NULL;
2589         char *fname, *ind;
2590         static int files_happen = FALSE;
2591
2592         if (rsm.F) fclose(rsm.F);
2593         rsm.F = NULL;
2594         rsm.pos = rsm.adv = 0;
2595
2596         do {
2597                 if (getvar_i(V[ARGIND])+1 >= getvar_i(V[ARGC])) {
2598                         if (files_happen)
2599                                 return NULL;
2600                         fname = "-";
2601                         F = stdin;
2602                 } else {
2603                         ind = getvar_s(incvar(V[ARGIND]));
2604                         fname = getvar_s(findvar(iamarray(V[ARGV]), ind));
2605                         if (fname && *fname && !is_assignment(fname))
2606                                 F = afopen(fname, "r");
2607                 }
2608         } while (!F);
2609
2610         files_happen = TRUE;
2611         setvar_s(V[FILENAME], fname);
2612         rsm.F = F;
2613         return &rsm;
2614 }
2615
2616 int awk_main(int argc, char **argv)
2617 {
2618         char *s, *s1;
2619         int i, j, c, flen;
2620         var *v;
2621         static var tv;
2622         char **envp;
2623         static int from_file = FALSE;
2624         rstream *rsm;
2625         FILE *F, *stdfiles[3];
2626         static char * stdnames = "/dev/stdin\0/dev/stdout\0/dev/stderr";
2627
2628         /* allocate global buffer */
2629         buf = xmalloc(MAXVARFMT+1);
2630
2631         vhash = hash_init();
2632         ahash = hash_init();
2633         fdhash = hash_init();
2634         fnhash = hash_init();
2635
2636         /* initialize variables */
2637         for (i=0;  *vNames;  i++) {
2638                 V[i] = v = newvar(nextword(&vNames));
2639                 if (*vValues != '\377')
2640                         setvar_s(v, nextword(&vValues));
2641                 else
2642                         setvar_i(v, 0);
2643
2644                 if (*vNames == '*') {
2645                         v->type |= VF_SPECIAL;
2646                         vNames++;
2647                 }
2648         }
2649
2650         handle_special(V[FS]);
2651         handle_special(V[RS]);
2652
2653         stdfiles[0] = stdin;
2654         stdfiles[1] = stdout;
2655         stdfiles[2] = stderr;
2656         for (i=0; i<3; i++) {
2657                 rsm = newfile(nextword(&stdnames));
2658                 rsm->F = stdfiles[i];
2659         }
2660
2661         for (envp=environ; *envp; envp++) {
2662                 s = bb_xstrdup(*envp);
2663                 s1 = strchr(s, '=');
2664                 if (!s1) {
2665                         goto keep_going;
2666                 }
2667                 *(s1++) = '\0';
2668                 setvar_u(findvar(iamarray(V[ENVIRON]), s), s1);
2669 keep_going:
2670                 free(s);
2671         }
2672
2673         while((c = getopt(argc, argv, "F:v:f:W:")) != EOF) {
2674                 switch (c) {
2675                         case 'F':
2676                                 setvar_s(V[FS], optarg);
2677                                 break;
2678                         case 'v':
2679                                 if (! is_assignment(optarg))
2680                                         bb_show_usage();
2681                                 break;
2682                         case 'f':
2683                                 from_file = TRUE;
2684                                 F = afopen(programname = optarg, "r");
2685                                 s = NULL;
2686                                 /* one byte is reserved for some trick in next_token */
2687                                 if (fseek(F, 0, SEEK_END) == 0) {
2688                                         flen = ftell(F);
2689                                         s = (char *)xmalloc(flen+4);
2690                                         fseek(F, 0, SEEK_SET);
2691                                         i = 1 + fread(s+1, 1, flen, F);
2692                                 } else {
2693                                         for (i=j=1; j>0; i+=j) {
2694                                                 s = (char *)xrealloc(s, i+4096);
2695                                                 j = fread(s+i, 1, 4094, F);
2696                                         }
2697                                 }
2698                                 s[i] = '\0';
2699                                 fclose(F);
2700                                 parse_program(s+1);
2701                                 free(s);
2702                                 break;
2703                         case 'W':
2704                                 bb_error_msg("Warning: unrecognized option '-W %s' ignored\n", optarg);
2705                                 break;
2706
2707                         default:
2708                                 bb_show_usage();
2709                 }
2710         }
2711
2712         if (!from_file) {
2713                 if (argc == optind)
2714                         bb_show_usage();
2715                 programname="cmd. line";
2716                 parse_program(argv[optind++]);
2717
2718         }
2719
2720         /* fill in ARGV array */
2721         setvar_i(V[ARGC], argc - optind + 1);
2722         setari_u(V[ARGV], 0, "awk");
2723         for(i=optind; i < argc; i++)
2724                 setari_u(V[ARGV], i+1-optind, argv[i]);
2725
2726         evaluate(beginseq.first, &tv);
2727         if (! mainseq.first && ! endseq.first)
2728                 awk_exit(EXIT_SUCCESS);
2729
2730         /* input file could already be opened in BEGIN block */
2731         if (! iF) iF = next_input_file();
2732
2733         /* passing through input files */
2734         while (iF) {
2735
2736                 nextfile = FALSE;
2737                 setvar_i(V[FNR], 0);
2738
2739                 while ((c = awk_getline(iF, V[F0])) > 0) {
2740
2741                         nextrec = FALSE;
2742                         incvar(V[NR]);
2743                         incvar(V[FNR]);
2744                         evaluate(mainseq.first, &tv);
2745
2746                         if (nextfile)
2747                                 break;
2748                 }
2749
2750                 if (c < 0)
2751                         runtime_error(strerror(errno));
2752
2753                 iF = next_input_file();
2754
2755         }
2756
2757         awk_exit(EXIT_SUCCESS);
2758
2759         return 0;
2760 }
2761