awk: fix breakage in last commit
[oweals/busybox.git] / editors / awk.c
1 /* vi: set sw=4 ts=4: */
2 /*
3  * awk implementation for busybox
4  *
5  * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
6  *
7  * Licensed under GPLv2 or later, see file LICENSE in this source tree.
8  */
9
10 #include "libbb.h"
11 #include "xregex.h"
12 #include <math.h>
13
14 /* This is a NOEXEC applet. Be very careful! */
15
16
17 /* If you comment out one of these below, it will be #defined later
18  * to perform debug printfs to stderr: */
19 #define debug_printf_walker(...)  do {} while (0)
20 #define debug_printf_eval(...)  do {} while (0)
21
22 #ifndef debug_printf_walker
23 # define debug_printf_walker(...) (fprintf(stderr, __VA_ARGS__))
24 #endif
25 #ifndef debug_printf_eval
26 # define debug_printf_eval(...) (fprintf(stderr, __VA_ARGS__))
27 #endif
28
29
30
31 #define MAXVARFMT       240
32 #define MINNVBLOCK      64
33
34 /* variable flags */
35 #define VF_NUMBER       0x0001  /* 1 = primary type is number */
36 #define VF_ARRAY        0x0002  /* 1 = it's an array */
37
38 #define VF_CACHED       0x0100  /* 1 = num/str value has cached str/num eq */
39 #define VF_USER         0x0200  /* 1 = user input (may be numeric string) */
40 #define VF_SPECIAL      0x0400  /* 1 = requires extra handling when changed */
41 #define VF_WALK         0x0800  /* 1 = variable has alloc'd x.walker list */
42 #define VF_FSTR         0x1000  /* 1 = var::string points to fstring buffer */
43 #define VF_CHILD        0x2000  /* 1 = function arg; x.parent points to source */
44 #define VF_DIRTY        0x4000  /* 1 = variable was set explicitly */
45
46 /* these flags are static, don't change them when value is changed */
47 #define VF_DONTTOUCH    (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
48
49 typedef struct walker_list {
50         char *end;
51         char *cur;
52         struct walker_list *prev;
53         char wbuf[1];
54 } walker_list;
55
56 /* Variable */
57 typedef struct var_s {
58         unsigned type;            /* flags */
59         double number;
60         char *string;
61         union {
62                 int aidx;               /* func arg idx (for compilation stage) */
63                 struct xhash_s *array;  /* array ptr */
64                 struct var_s *parent;   /* for func args, ptr to actual parameter */
65                 walker_list *walker;    /* list of array elements (for..in) */
66         } x;
67 } var;
68
69 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
70 typedef struct chain_s {
71         struct node_s *first;
72         struct node_s *last;
73         const char *programname;
74 } chain;
75
76 /* Function */
77 typedef struct func_s {
78         unsigned nargs;
79         struct chain_s body;
80 } func;
81
82 /* I/O stream */
83 typedef struct rstream_s {
84         FILE *F;
85         char *buffer;
86         int adv;
87         int size;
88         int pos;
89         smallint is_pipe;
90 } rstream;
91
92 typedef struct hash_item_s {
93         union {
94                 struct var_s v;         /* variable/array hash */
95                 struct rstream_s rs;    /* redirect streams hash */
96                 struct func_s f;        /* functions hash */
97         } data;
98         struct hash_item_s *next;       /* next in chain */
99         char name[1];                   /* really it's longer */
100 } hash_item;
101
102 typedef struct xhash_s {
103         unsigned nel;           /* num of elements */
104         unsigned csize;         /* current hash size */
105         unsigned nprime;        /* next hash size in PRIMES[] */
106         unsigned glen;          /* summary length of item names */
107         struct hash_item_s **items;
108 } xhash;
109
110 /* Tree node */
111 typedef struct node_s {
112         uint32_t info;
113         unsigned lineno;
114         union {
115                 struct node_s *n;
116                 var *v;
117                 int aidx;
118                 char *new_progname;
119                 regex_t *re;
120         } l;
121         union {
122                 struct node_s *n;
123                 regex_t *ire;
124                 func *f;
125         } r;
126         union {
127                 struct node_s *n;
128         } a;
129 } node;
130
131 /* Block of temporary variables */
132 typedef struct nvblock_s {
133         int size;
134         var *pos;
135         struct nvblock_s *prev;
136         struct nvblock_s *next;
137         var nv[];
138 } nvblock;
139
140 typedef struct tsplitter_s {
141         node n;
142         regex_t re[2];
143 } tsplitter;
144
145 /* simple token classes */
146 /* Order and hex values are very important!!!  See next_token() */
147 #define TC_SEQSTART      1                              /* ( */
148 #define TC_SEQTERM      (1 << 1)                /* ) */
149 #define TC_REGEXP       (1 << 2)                /* /.../ */
150 #define TC_OUTRDR       (1 << 3)                /* | > >> */
151 #define TC_UOPPOST      (1 << 4)                /* unary postfix operator */
152 #define TC_UOPPRE1      (1 << 5)                /* unary prefix operator */
153 #define TC_BINOPX       (1 << 6)                /* two-opnd operator */
154 #define TC_IN           (1 << 7)
155 #define TC_COMMA        (1 << 8)
156 #define TC_PIPE         (1 << 9)                /* input redirection pipe */
157 #define TC_UOPPRE2      (1 << 10)               /* unary prefix operator */
158 #define TC_ARRTERM      (1 << 11)               /* ] */
159 #define TC_GRPSTART     (1 << 12)               /* { */
160 #define TC_GRPTERM      (1 << 13)               /* } */
161 #define TC_SEMICOL      (1 << 14)
162 #define TC_NEWLINE      (1 << 15)
163 #define TC_STATX        (1 << 16)               /* ctl statement (for, next...) */
164 #define TC_WHILE        (1 << 17)
165 #define TC_ELSE         (1 << 18)
166 #define TC_BUILTIN      (1 << 19)
167 #define TC_GETLINE      (1 << 20)
168 #define TC_FUNCDECL     (1 << 21)               /* `function' `func' */
169 #define TC_BEGIN        (1 << 22)
170 #define TC_END          (1 << 23)
171 #define TC_EOF          (1 << 24)
172 #define TC_VARIABLE     (1 << 25)
173 #define TC_ARRAY        (1 << 26)
174 #define TC_FUNCTION     (1 << 27)
175 #define TC_STRING       (1 << 28)
176 #define TC_NUMBER       (1 << 29)
177
178 #define TC_UOPPRE  (TC_UOPPRE1 | TC_UOPPRE2)
179
180 /* combined token classes */
181 #define TC_BINOP   (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
182 #define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
183 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
184                    | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
185
186 #define TC_STATEMNT (TC_STATX | TC_WHILE)
187 #define TC_OPTERM  (TC_SEMICOL | TC_NEWLINE)
188
189 /* word tokens, cannot mean something else if not expected */
190 #define TC_WORD    (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
191                    | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
192
193 /* discard newlines after these */
194 #define TC_NOTERM  (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
195                    | TC_BINOP | TC_OPTERM)
196
197 /* what can expression begin with */
198 #define TC_OPSEQ   (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
199 /* what can group begin with */
200 #define TC_GRPSEQ  (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
201
202 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
203 /* operator is inserted between them */
204 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
205                    | TC_STRING | TC_NUMBER | TC_UOPPOST)
206 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
207
208 #define OF_RES1    0x010000
209 #define OF_RES2    0x020000
210 #define OF_STR1    0x040000
211 #define OF_STR2    0x080000
212 #define OF_NUM1    0x100000
213 #define OF_CHECKED 0x200000
214
215 /* combined operator flags */
216 #define xx      0
217 #define xV      OF_RES2
218 #define xS      (OF_RES2 | OF_STR2)
219 #define Vx      OF_RES1
220 #define VV      (OF_RES1 | OF_RES2)
221 #define Nx      (OF_RES1 | OF_NUM1)
222 #define NV      (OF_RES1 | OF_NUM1 | OF_RES2)
223 #define Sx      (OF_RES1 | OF_STR1)
224 #define SV      (OF_RES1 | OF_STR1 | OF_RES2)
225 #define SS      (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
226
227 #define OPCLSMASK 0xFF00
228 #define OPNMASK   0x007F
229
230 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
231  * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
232  * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
233  */
234 #define P(x)      (x << 24)
235 #define PRIMASK   0x7F000000
236 #define PRIMASK2  0x7E000000
237
238 /* Operation classes */
239
240 #define SHIFT_TIL_THIS  0x0600
241 #define RECUR_FROM_THIS 0x1000
242
243 enum {
244         OC_DELETE = 0x0100,     OC_EXEC = 0x0200,       OC_NEWSOURCE = 0x0300,
245         OC_PRINT = 0x0400,      OC_PRINTF = 0x0500,     OC_WALKINIT = 0x0600,
246
247         OC_BR = 0x0700,         OC_BREAK = 0x0800,      OC_CONTINUE = 0x0900,
248         OC_EXIT = 0x0a00,       OC_NEXT = 0x0b00,       OC_NEXTFILE = 0x0c00,
249         OC_TEST = 0x0d00,       OC_WALKNEXT = 0x0e00,
250
251         OC_BINARY = 0x1000,     OC_BUILTIN = 0x1100,    OC_COLON = 0x1200,
252         OC_COMMA = 0x1300,      OC_COMPARE = 0x1400,    OC_CONCAT = 0x1500,
253         OC_FBLTIN = 0x1600,     OC_FIELD = 0x1700,      OC_FNARG = 0x1800,
254         OC_FUNC = 0x1900,       OC_GETLINE = 0x1a00,    OC_IN = 0x1b00,
255         OC_LAND = 0x1c00,       OC_LOR = 0x1d00,        OC_MATCH = 0x1e00,
256         OC_MOVE = 0x1f00,       OC_PGETLINE = 0x2000,   OC_REGEXP = 0x2100,
257         OC_REPLACE = 0x2200,    OC_RETURN = 0x2300,     OC_SPRINTF = 0x2400,
258         OC_TERNARY = 0x2500,    OC_UNARY = 0x2600,      OC_VAR = 0x2700,
259         OC_DONE = 0x2800,
260
261         ST_IF = 0x3000,         ST_DO = 0x3100,         ST_FOR = 0x3200,
262         ST_WHILE = 0x3300
263 };
264
265 /* simple builtins */
266 enum {
267         F_in,   F_rn,   F_co,   F_ex,   F_lg,   F_si,   F_sq,   F_sr,
268         F_ti,   F_le,   F_sy,   F_ff,   F_cl
269 };
270
271 /* builtins */
272 enum {
273         B_a2,   B_ix,   B_ma,   B_sp,   B_ss,   B_ti,   B_mt,   B_lo,   B_up,
274         B_ge,   B_gs,   B_su,
275         B_an,   B_co,   B_ls,   B_or,   B_rs,   B_xo,
276 };
277
278 /* tokens and their corresponding info values */
279
280 #define NTC     "\377"  /* switch to next token class (tc<<1) */
281 #define NTCC    '\377'
282
283 #define OC_B    OC_BUILTIN
284
285 static const char tokenlist[] ALIGN1 =
286         "\1("         NTC
287         "\1)"         NTC
288         "\1/"         NTC                                   /* REGEXP */
289         "\2>>"        "\1>"         "\1|"       NTC         /* OUTRDR */
290         "\2++"        "\2--"        NTC                     /* UOPPOST */
291         "\2++"        "\2--"        "\1$"       NTC         /* UOPPRE1 */
292         "\2=="        "\1="         "\2+="      "\2-="      /* BINOPX */
293         "\2*="        "\2/="        "\2%="      "\2^="
294         "\1+"         "\1-"         "\3**="     "\2**"
295         "\1/"         "\1%"         "\1^"       "\1*"
296         "\2!="        "\2>="        "\2<="      "\1>"
297         "\1<"         "\2!~"        "\1~"       "\2&&"
298         "\2||"        "\1?"         "\1:"       NTC
299         "\2in"        NTC
300         "\1,"         NTC
301         "\1|"         NTC
302         "\1+"         "\1-"         "\1!"       NTC         /* UOPPRE2 */
303         "\1]"         NTC
304         "\1{"         NTC
305         "\1}"         NTC
306         "\1;"         NTC
307         "\1\n"        NTC
308         "\2if"        "\2do"        "\3for"     "\5break"   /* STATX */
309         "\10continue" "\6delete"    "\5print"
310         "\6printf"    "\4next"      "\10nextfile"
311         "\6return"    "\4exit"      NTC
312         "\5while"     NTC
313         "\4else"      NTC
314
315         "\3and"       "\5compl"     "\6lshift"  "\2or"
316         "\6rshift"    "\3xor"
317         "\5close"     "\6system"    "\6fflush"  "\5atan2"   /* BUILTIN */
318         "\3cos"       "\3exp"       "\3int"     "\3log"
319         "\4rand"      "\3sin"       "\4sqrt"    "\5srand"
320         "\6gensub"    "\4gsub"      "\5index"   "\6length"
321         "\5match"     "\5split"     "\7sprintf" "\3sub"
322         "\6substr"    "\7systime"   "\10strftime" "\6mktime"
323         "\7tolower"   "\7toupper"   NTC
324         "\7getline"   NTC
325         "\4func"      "\10function" NTC
326         "\5BEGIN"     NTC
327         "\3END"
328         /* compiler adds trailing "\0" */
329         ;
330
331 static const uint32_t tokeninfo[] = {
332         0,
333         0,
334         OC_REGEXP,
335         xS|'a',                  xS|'w',                  xS|'|',
336         OC_UNARY|xV|P(9)|'p',    OC_UNARY|xV|P(9)|'m',
337         OC_UNARY|xV|P(9)|'P',    OC_UNARY|xV|P(9)|'M',    OC_FIELD|xV|P(5),
338         OC_COMPARE|VV|P(39)|5,   OC_MOVE|VV|P(74),        OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
339         OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/', OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
340         OC_BINARY|NV|P(29)|'+',  OC_BINARY|NV|P(29)|'-',  OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
341         OC_BINARY|NV|P(25)|'/',  OC_BINARY|NV|P(25)|'%',  OC_BINARY|NV|P(15)|'&',  OC_BINARY|NV|P(25)|'*',
342         OC_COMPARE|VV|P(39)|4,   OC_COMPARE|VV|P(39)|3,   OC_COMPARE|VV|P(39)|0,   OC_COMPARE|VV|P(39)|1,
343         OC_COMPARE|VV|P(39)|2,   OC_MATCH|Sx|P(45)|'!',   OC_MATCH|Sx|P(45)|'~',   OC_LAND|Vx|P(55),
344         OC_LOR|Vx|P(59),         OC_TERNARY|Vx|P(64)|'?', OC_COLON|xx|P(67)|':',
345         OC_IN|SV|P(49), /* in */
346         OC_COMMA|SS|P(80),
347         OC_PGETLINE|SV|P(37),
348         OC_UNARY|xV|P(19)|'+',   OC_UNARY|xV|P(19)|'-',   OC_UNARY|xV|P(19)|'!',
349         0, /* ] */
350         0,
351         0,
352         0,
353         0, /* \n */
354         ST_IF,        ST_DO,        ST_FOR,      OC_BREAK,
355         OC_CONTINUE,  OC_DELETE|Vx, OC_PRINT,
356         OC_PRINTF,    OC_NEXT,      OC_NEXTFILE,
357         OC_RETURN|Vx, OC_EXIT|Nx,
358         ST_WHILE,
359         0, /* else */
360
361         OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
362         OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
363         OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
364         OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
365         OC_FBLTIN|F_rn,    OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
366         OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
367         OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF,        OC_B|B_su|P(0xb6),
368         OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti,    OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b),
369         OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
370         OC_GETLINE|SV|P(0),
371         0,                 0,
372         0,
373         0 /* END */
374 };
375
376 /* internal variable names and their initial values       */
377 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
378 enum {
379         CONVFMT,    OFMT,       FS,         OFS,
380         ORS,        RS,         RT,         FILENAME,
381         SUBSEP,     F0,         ARGIND,     ARGC,
382         ARGV,       ERRNO,      FNR,        NR,
383         NF,         IGNORECASE, ENVIRON,    NUM_INTERNAL_VARS
384 };
385
386 static const char vNames[] ALIGN1 =
387         "CONVFMT\0" "OFMT\0"    "FS\0*"     "OFS\0"
388         "ORS\0"     "RS\0*"     "RT\0"      "FILENAME\0"
389         "SUBSEP\0"  "$\0*"      "ARGIND\0"  "ARGC\0"
390         "ARGV\0"    "ERRNO\0"   "FNR\0"     "NR\0"
391         "NF\0*"     "IGNORECASE\0*" "ENVIRON\0" "\0";
392
393 static const char vValues[] ALIGN1 =
394         "%.6g\0"    "%.6g\0"    " \0"       " \0"
395         "\n\0"      "\n\0"      "\0"        "\0"
396         "\034\0"    "\0"        "\377";
397
398 /* hash size may grow to these values */
399 #define FIRST_PRIME 61
400 static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
401
402
403 /* Globals. Split in two parts so that first one is addressed
404  * with (mostly short) negative offsets.
405  * NB: it's unsafe to put members of type "double"
406  * into globals2 (gcc may fail to align them).
407  */
408 struct globals {
409         double t_double;
410         chain beginseq, mainseq, endseq;
411         chain *seq;
412         node *break_ptr, *continue_ptr;
413         rstream *iF;
414         xhash *vhash, *ahash, *fdhash, *fnhash;
415         const char *g_progname;
416         int g_lineno;
417         int nfields;
418         int maxfields; /* used in fsrealloc() only */
419         var *Fields;
420         nvblock *g_cb;
421         char *g_pos;
422         char *g_buf;
423         smallint icase;
424         smallint exiting;
425         smallint nextrec;
426         smallint nextfile;
427         smallint is_f0_split;
428 };
429 struct globals2 {
430         uint32_t t_info; /* often used */
431         uint32_t t_tclass;
432         char *t_string;
433         int t_lineno;
434         int t_rollback;
435
436         var *intvar[NUM_INTERNAL_VARS]; /* often used */
437
438         /* former statics from various functions */
439         char *split_f0__fstrings;
440
441         uint32_t next_token__save_tclass;
442         uint32_t next_token__save_info;
443         uint32_t next_token__ltclass;
444         smallint next_token__concat_inserted;
445
446         smallint next_input_file__files_happen;
447         rstream next_input_file__rsm;
448
449         var *evaluate__fnargs;
450         unsigned evaluate__seed;
451         regex_t evaluate__sreg;
452
453         var ptest__v;
454
455         tsplitter exec_builtin__tspl;
456
457         /* biggest and least used members go last */
458         tsplitter fsplitter, rsplitter;
459 };
460 #define G1 (ptr_to_globals[-1])
461 #define G (*(struct globals2 *)ptr_to_globals)
462 /* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
463 /*char G1size[sizeof(G1)]; - 0x74 */
464 /*char Gsize[sizeof(G)]; - 0x1c4 */
465 /* Trying to keep most of members accessible with short offsets: */
466 /*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
467 #define t_double     (G1.t_double    )
468 #define beginseq     (G1.beginseq    )
469 #define mainseq      (G1.mainseq     )
470 #define endseq       (G1.endseq      )
471 #define seq          (G1.seq         )
472 #define break_ptr    (G1.break_ptr   )
473 #define continue_ptr (G1.continue_ptr)
474 #define iF           (G1.iF          )
475 #define vhash        (G1.vhash       )
476 #define ahash        (G1.ahash       )
477 #define fdhash       (G1.fdhash      )
478 #define fnhash       (G1.fnhash      )
479 #define g_progname   (G1.g_progname  )
480 #define g_lineno     (G1.g_lineno    )
481 #define nfields      (G1.nfields     )
482 #define maxfields    (G1.maxfields   )
483 #define Fields       (G1.Fields      )
484 #define g_cb         (G1.g_cb        )
485 #define g_pos        (G1.g_pos       )
486 #define g_buf        (G1.g_buf       )
487 #define icase        (G1.icase       )
488 #define exiting      (G1.exiting     )
489 #define nextrec      (G1.nextrec     )
490 #define nextfile     (G1.nextfile    )
491 #define is_f0_split  (G1.is_f0_split )
492 #define t_info       (G.t_info      )
493 #define t_tclass     (G.t_tclass    )
494 #define t_string     (G.t_string    )
495 #define t_lineno     (G.t_lineno    )
496 #define t_rollback   (G.t_rollback  )
497 #define intvar       (G.intvar      )
498 #define fsplitter    (G.fsplitter   )
499 #define rsplitter    (G.rsplitter   )
500 #define INIT_G() do { \
501         SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
502         G.next_token__ltclass = TC_OPTERM; \
503         G.evaluate__seed = 1; \
504 } while (0)
505
506
507 /* function prototypes */
508 static void handle_special(var *);
509 static node *parse_expr(uint32_t);
510 static void chain_group(void);
511 static var *evaluate(node *, var *);
512 static rstream *next_input_file(void);
513 static int fmt_num(char *, int, const char *, double, int);
514 static int awk_exit(int) NORETURN;
515
516 /* ---- error handling ---- */
517
518 static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
519 static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
520 static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
521 static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
522 static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
523 static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin";
524 static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
525 static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
526 static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
527 #if !ENABLE_FEATURE_AWK_LIBM
528 static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
529 #endif
530
531 static void zero_out_var(var *vp)
532 {
533         memset(vp, 0, sizeof(*vp));
534 }
535
536 static void syntax_error(const char *message) NORETURN;
537 static void syntax_error(const char *message)
538 {
539         bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
540 }
541
542 /* ---- hash stuff ---- */
543
544 static unsigned hashidx(const char *name)
545 {
546         unsigned idx = 0;
547
548         while (*name)
549                 idx = *name++ + (idx << 6) - idx;
550         return idx;
551 }
552
553 /* create new hash */
554 static xhash *hash_init(void)
555 {
556         xhash *newhash;
557
558         newhash = xzalloc(sizeof(*newhash));
559         newhash->csize = FIRST_PRIME;
560         newhash->items = xzalloc(FIRST_PRIME * sizeof(newhash->items[0]));
561
562         return newhash;
563 }
564
565 /* find item in hash, return ptr to data, NULL if not found */
566 static void *hash_search(xhash *hash, const char *name)
567 {
568         hash_item *hi;
569
570         hi = hash->items[hashidx(name) % hash->csize];
571         while (hi) {
572                 if (strcmp(hi->name, name) == 0)
573                         return &hi->data;
574                 hi = hi->next;
575         }
576         return NULL;
577 }
578
579 /* grow hash if it becomes too big */
580 static void hash_rebuild(xhash *hash)
581 {
582         unsigned newsize, i, idx;
583         hash_item **newitems, *hi, *thi;
584
585         if (hash->nprime == ARRAY_SIZE(PRIMES))
586                 return;
587
588         newsize = PRIMES[hash->nprime++];
589         newitems = xzalloc(newsize * sizeof(newitems[0]));
590
591         for (i = 0; i < hash->csize; i++) {
592                 hi = hash->items[i];
593                 while (hi) {
594                         thi = hi;
595                         hi = thi->next;
596                         idx = hashidx(thi->name) % newsize;
597                         thi->next = newitems[idx];
598                         newitems[idx] = thi;
599                 }
600         }
601
602         free(hash->items);
603         hash->csize = newsize;
604         hash->items = newitems;
605 }
606
607 /* find item in hash, add it if necessary. Return ptr to data */
608 static void *hash_find(xhash *hash, const char *name)
609 {
610         hash_item *hi;
611         unsigned idx;
612         int l;
613
614         hi = hash_search(hash, name);
615         if (!hi) {
616                 if (++hash->nel / hash->csize > 10)
617                         hash_rebuild(hash);
618
619                 l = strlen(name) + 1;
620                 hi = xzalloc(sizeof(*hi) + l);
621                 strcpy(hi->name, name);
622
623                 idx = hashidx(name) % hash->csize;
624                 hi->next = hash->items[idx];
625                 hash->items[idx] = hi;
626                 hash->glen += l;
627         }
628         return &hi->data;
629 }
630
631 #define findvar(hash, name) ((var*)    hash_find((hash), (name)))
632 #define newvar(name)        ((var*)    hash_find(vhash, (name)))
633 #define newfile(name)       ((rstream*)hash_find(fdhash, (name)))
634 #define newfunc(name)       ((func*)   hash_find(fnhash, (name)))
635
636 static void hash_remove(xhash *hash, const char *name)
637 {
638         hash_item *hi, **phi;
639
640         phi = &hash->items[hashidx(name) % hash->csize];
641         while (*phi) {
642                 hi = *phi;
643                 if (strcmp(hi->name, name) == 0) {
644                         hash->glen -= (strlen(name) + 1);
645                         hash->nel--;
646                         *phi = hi->next;
647                         free(hi);
648                         break;
649                 }
650                 phi = &hi->next;
651         }
652 }
653
654 /* ------ some useful functions ------ */
655
656 static char *skip_spaces(char *p)
657 {
658         while (1) {
659                 if (*p == '\\' && p[1] == '\n') {
660                         p++;
661                         t_lineno++;
662                 } else if (*p != ' ' && *p != '\t') {
663                         break;
664                 }
665                 p++;
666         }
667         return p;
668 }
669
670 /* returns old *s, advances *s past word and terminating NUL */
671 static char *nextword(char **s)
672 {
673         char *p = *s;
674         while (*(*s)++ != '\0')
675                 continue;
676         return p;
677 }
678
679 static char nextchar(char **s)
680 {
681         char c, *pps;
682
683         c = *(*s)++;
684         pps = *s;
685         if (c == '\\')
686                 c = bb_process_escape_sequence((const char**)s);
687         if (c == '\\' && *s == pps) { /* unrecognized \z? */
688                 c = *(*s); /* yes, fetch z */
689                 if (c)
690                         (*s)++; /* advance unless z = NUL */
691         }
692         return c;
693 }
694
695 static ALWAYS_INLINE int isalnum_(int c)
696 {
697         return (isalnum(c) || c == '_');
698 }
699
700 static double my_strtod(char **pp)
701 {
702         char *cp = *pp;
703 #if ENABLE_DESKTOP
704         if (cp[0] == '0') {
705                 /* Might be hex or octal integer: 0x123abc or 07777 */
706                 char c = (cp[1] | 0x20);
707                 if (c == 'x' || isdigit(cp[1])) {
708                         unsigned long long ull = strtoull(cp, pp, 0);
709                         if (c == 'x')
710                                 return ull;
711                         c = **pp;
712                         if (!isdigit(c) && c != '.')
713                                 return ull;
714                         /* else: it may be a floating number. Examples:
715                          * 009.123 (*pp points to '9')
716                          * 000.123 (*pp points to '.')
717                          * fall through to strtod.
718                          */
719                 }
720         }
721 #endif
722         return strtod(cp, pp);
723 }
724
725 /* -------- working with variables (set/get/copy/etc) -------- */
726
727 static xhash *iamarray(var *v)
728 {
729         var *a = v;
730
731         while (a->type & VF_CHILD)
732                 a = a->x.parent;
733
734         if (!(a->type & VF_ARRAY)) {
735                 a->type |= VF_ARRAY;
736                 a->x.array = hash_init();
737         }
738         return a->x.array;
739 }
740
741 static void clear_array(xhash *array)
742 {
743         unsigned i;
744         hash_item *hi, *thi;
745
746         for (i = 0; i < array->csize; i++) {
747                 hi = array->items[i];
748                 while (hi) {
749                         thi = hi;
750                         hi = hi->next;
751                         free(thi->data.v.string);
752                         free(thi);
753                 }
754                 array->items[i] = NULL;
755         }
756         array->glen = array->nel = 0;
757 }
758
759 /* clear a variable */
760 static var *clrvar(var *v)
761 {
762         if (!(v->type & VF_FSTR))
763                 free(v->string);
764
765         v->type &= VF_DONTTOUCH;
766         v->type |= VF_DIRTY;
767         v->string = NULL;
768         return v;
769 }
770
771 /* assign string value to variable */
772 static var *setvar_p(var *v, char *value)
773 {
774         clrvar(v);
775         v->string = value;
776         handle_special(v);
777         return v;
778 }
779
780 /* same as setvar_p but make a copy of string */
781 static var *setvar_s(var *v, const char *value)
782 {
783         return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
784 }
785
786 /* same as setvar_s but sets USER flag */
787 static var *setvar_u(var *v, const char *value)
788 {
789         v = setvar_s(v, value);
790         v->type |= VF_USER;
791         return v;
792 }
793
794 /* set array element to user string */
795 static void setari_u(var *a, int idx, const char *s)
796 {
797         var *v;
798
799         v = findvar(iamarray(a), itoa(idx));
800         setvar_u(v, s);
801 }
802
803 /* assign numeric value to variable */
804 static var *setvar_i(var *v, double value)
805 {
806         clrvar(v);
807         v->type |= VF_NUMBER;
808         v->number = value;
809         handle_special(v);
810         return v;
811 }
812
813 static const char *getvar_s(var *v)
814 {
815         /* if v is numeric and has no cached string, convert it to string */
816         if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
817                 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
818                 v->string = xstrdup(g_buf);
819                 v->type |= VF_CACHED;
820         }
821         return (v->string == NULL) ? "" : v->string;
822 }
823
824 static double getvar_i(var *v)
825 {
826         char *s;
827
828         if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
829                 v->number = 0;
830                 s = v->string;
831                 if (s && *s) {
832                         debug_printf_eval("getvar_i: '%s'->", s);
833                         v->number = my_strtod(&s);
834                         debug_printf_eval("%f (s:'%s')\n", v->number, s);
835                         if (v->type & VF_USER) {
836                                 s = skip_spaces(s);
837                                 if (*s != '\0')
838                                         v->type &= ~VF_USER;
839                         }
840                 } else {
841                         debug_printf_eval("getvar_i: '%s'->zero\n", s);
842                         v->type &= ~VF_USER;
843                 }
844                 v->type |= VF_CACHED;
845         }
846         debug_printf_eval("getvar_i: %f\n", v->number);
847         return v->number;
848 }
849
850 /* Used for operands of bitwise ops */
851 static unsigned long getvar_i_int(var *v)
852 {
853         double d = getvar_i(v);
854
855         /* Casting doubles to longs is undefined for values outside
856          * of target type range. Try to widen it as much as possible */
857         if (d >= 0)
858                 return (unsigned long)d;
859         /* Why? Think about d == -4294967295.0 (assuming 32bit longs) */
860         return - (long) (unsigned long) (-d);
861 }
862
863 static var *copyvar(var *dest, const var *src)
864 {
865         if (dest != src) {
866                 clrvar(dest);
867                 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
868                 debug_printf_eval("copyvar: number:%f string:'%s'\n", src->number, src->string);
869                 dest->number = src->number;
870                 if (src->string)
871                         dest->string = xstrdup(src->string);
872         }
873         handle_special(dest);
874         return dest;
875 }
876
877 static var *incvar(var *v)
878 {
879         return setvar_i(v, getvar_i(v) + 1.0);
880 }
881
882 /* return true if v is number or numeric string */
883 static int is_numeric(var *v)
884 {
885         getvar_i(v);
886         return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
887 }
888
889 /* return 1 when value of v corresponds to true, 0 otherwise */
890 static int istrue(var *v)
891 {
892         if (is_numeric(v))
893                 return (v->number != 0);
894         return (v->string && v->string[0]);
895 }
896
897 /* temporary variables allocator. Last allocated should be first freed */
898 static var *nvalloc(int n)
899 {
900         nvblock *pb = NULL;
901         var *v, *r;
902         int size;
903
904         while (g_cb) {
905                 pb = g_cb;
906                 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size)
907                         break;
908                 g_cb = g_cb->next;
909         }
910
911         if (!g_cb) {
912                 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
913                 g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
914                 g_cb->size = size;
915                 g_cb->pos = g_cb->nv;
916                 g_cb->prev = pb;
917                 /*g_cb->next = NULL; - xzalloc did it */
918                 if (pb)
919                         pb->next = g_cb;
920         }
921
922         v = r = g_cb->pos;
923         g_cb->pos += n;
924
925         while (v < g_cb->pos) {
926                 v->type = 0;
927                 v->string = NULL;
928                 v++;
929         }
930
931         return r;
932 }
933
934 static void nvfree(var *v)
935 {
936         var *p;
937
938         if (v < g_cb->nv || v >= g_cb->pos)
939                 syntax_error(EMSG_INTERNAL_ERROR);
940
941         for (p = v; p < g_cb->pos; p++) {
942                 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
943                         clear_array(iamarray(p));
944                         free(p->x.array->items);
945                         free(p->x.array);
946                 }
947                 if (p->type & VF_WALK) {
948                         walker_list *n;
949                         walker_list *w = p->x.walker;
950                         debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker);
951                         p->x.walker = NULL;
952                         while (w) {
953                                 n = w->prev;
954                                 debug_printf_walker(" free(%p)\n", w);
955                                 free(w);
956                                 w = n;
957                         }
958                 }
959                 clrvar(p);
960         }
961
962         g_cb->pos = v;
963         while (g_cb->prev && g_cb->pos == g_cb->nv) {
964                 g_cb = g_cb->prev;
965         }
966 }
967
968 /* ------- awk program text parsing ------- */
969
970 /* Parse next token pointed by global pos, place results into global ttt.
971  * If token isn't expected, give away. Return token class
972  */
973 static uint32_t next_token(uint32_t expected)
974 {
975 #define concat_inserted (G.next_token__concat_inserted)
976 #define save_tclass     (G.next_token__save_tclass)
977 #define save_info       (G.next_token__save_info)
978 /* Initialized to TC_OPTERM: */
979 #define ltclass         (G.next_token__ltclass)
980
981         char *p, *s;
982         const char *tl;
983         uint32_t tc;
984         const uint32_t *ti;
985
986         if (t_rollback) {
987                 t_rollback = FALSE;
988
989         } else if (concat_inserted) {
990                 concat_inserted = FALSE;
991                 t_tclass = save_tclass;
992                 t_info = save_info;
993
994         } else {
995                 p = g_pos;
996  readnext:
997                 p = skip_spaces(p);
998                 g_lineno = t_lineno;
999                 if (*p == '#')
1000                         while (*p != '\n' && *p != '\0')
1001                                 p++;
1002
1003                 if (*p == '\n')
1004                         t_lineno++;
1005
1006                 if (*p == '\0') {
1007                         tc = TC_EOF;
1008
1009                 } else if (*p == '\"') {
1010                         /* it's a string */
1011                         t_string = s = ++p;
1012                         while (*p != '\"') {
1013                                 char *pp;
1014                                 if (*p == '\0' || *p == '\n')
1015                                         syntax_error(EMSG_UNEXP_EOS);
1016                                 pp = p;
1017                                 *s++ = nextchar(&pp);
1018                                 p = pp;
1019                         }
1020                         p++;
1021                         *s = '\0';
1022                         tc = TC_STRING;
1023
1024                 } else if ((expected & TC_REGEXP) && *p == '/') {
1025                         /* it's regexp */
1026                         t_string = s = ++p;
1027                         while (*p != '/') {
1028                                 if (*p == '\0' || *p == '\n')
1029                                         syntax_error(EMSG_UNEXP_EOS);
1030                                 *s = *p++;
1031                                 if (*s++ == '\\') {
1032                                         char *pp = p;
1033                                         s[-1] = bb_process_escape_sequence((const char **)&pp);
1034                                         if (*p == '\\')
1035                                                 *s++ = '\\';
1036                                         if (pp == p)
1037                                                 *s++ = *p++;
1038                                         else
1039                                                 p = pp;
1040                                 }
1041                         }
1042                         p++;
1043                         *s = '\0';
1044                         tc = TC_REGEXP;
1045
1046                 } else if (*p == '.' || isdigit(*p)) {
1047                         /* it's a number */
1048                         char *pp = p;
1049                         t_double = my_strtod(&pp);
1050                         p = pp;
1051                         if (*p == '.')
1052                                 syntax_error(EMSG_UNEXP_TOKEN);
1053                         tc = TC_NUMBER;
1054
1055                 } else {
1056                         /* search for something known */
1057                         tl = tokenlist;
1058                         tc = 0x00000001;
1059                         ti = tokeninfo;
1060                         while (*tl) {
1061                                 int l = (unsigned char) *tl++;
1062                                 if (l == (unsigned char) NTCC) {
1063                                         tc <<= 1;
1064                                         continue;
1065                                 }
1066                                 /* if token class is expected,
1067                                  * token matches,
1068                                  * and it's not a longer word,
1069                                  */
1070                                 if ((tc & (expected | TC_WORD | TC_NEWLINE))
1071                                  && strncmp(p, tl, l) == 0
1072                                  && !((tc & TC_WORD) && isalnum_(p[l]))
1073                                 ) {
1074                                         /* then this is what we are looking for */
1075                                         t_info = *ti;
1076                                         p += l;
1077                                         goto token_found;
1078                                 }
1079                                 ti++;
1080                                 tl += l;
1081                         }
1082                         /* not a known token */
1083
1084                         /* is it a name? (var/array/function) */
1085                         if (!isalnum_(*p))
1086                                 syntax_error(EMSG_UNEXP_TOKEN); /* no */
1087                         /* yes */
1088                         t_string = --p;
1089                         while (isalnum_(*++p)) {
1090                                 p[-1] = *p;
1091                         }
1092                         p[-1] = '\0';
1093                         tc = TC_VARIABLE;
1094                         /* also consume whitespace between functionname and bracket */
1095                         if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY))
1096                                 p = skip_spaces(p);
1097                         if (*p == '(') {
1098                                 tc = TC_FUNCTION;
1099                         } else {
1100                                 if (*p == '[') {
1101                                         p++;
1102                                         tc = TC_ARRAY;
1103                                 }
1104                         }
1105  token_found: ;
1106                 }
1107                 g_pos = p;
1108
1109                 /* skipping newlines in some cases */
1110                 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1111                         goto readnext;
1112
1113                 /* insert concatenation operator when needed */
1114                 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1115                         concat_inserted = TRUE;
1116                         save_tclass = tc;
1117                         save_info = t_info;
1118                         tc = TC_BINOP;
1119                         t_info = OC_CONCAT | SS | P(35);
1120                 }
1121
1122                 t_tclass = tc;
1123         }
1124         ltclass = t_tclass;
1125
1126         /* Are we ready for this? */
1127         if (!(ltclass & expected))
1128                 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1129                                 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1130
1131         return ltclass;
1132 #undef concat_inserted
1133 #undef save_tclass
1134 #undef save_info
1135 #undef ltclass
1136 }
1137
1138 static void rollback_token(void)
1139 {
1140         t_rollback = TRUE;
1141 }
1142
1143 static node *new_node(uint32_t info)
1144 {
1145         node *n;
1146
1147         n = xzalloc(sizeof(node));
1148         n->info = info;
1149         n->lineno = g_lineno;
1150         return n;
1151 }
1152
1153 static void mk_re_node(const char *s, node *n, regex_t *re)
1154 {
1155         n->info = OC_REGEXP;
1156         n->l.re = re;
1157         n->r.ire = re + 1;
1158         xregcomp(re, s, REG_EXTENDED);
1159         xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1160 }
1161
1162 static node *condition(void)
1163 {
1164         next_token(TC_SEQSTART);
1165         return parse_expr(TC_SEQTERM);
1166 }
1167
1168 /* parse expression terminated by given argument, return ptr
1169  * to built subtree. Terminator is eaten by parse_expr */
1170 static node *parse_expr(uint32_t iexp)
1171 {
1172         node sn;
1173         node *cn = &sn;
1174         node *vn, *glptr;
1175         uint32_t tc, xtc;
1176         var *v;
1177
1178         sn.info = PRIMASK;
1179         sn.r.n = glptr = NULL;
1180         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1181
1182         while (!((tc = next_token(xtc)) & iexp)) {
1183
1184                 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1185                         /* input redirection (<) attached to glptr node */
1186                         cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1187                         cn->a.n = glptr;
1188                         xtc = TC_OPERAND | TC_UOPPRE;
1189                         glptr = NULL;
1190
1191                 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1192                         /* for binary and postfix-unary operators, jump back over
1193                          * previous operators with higher priority */
1194                         vn = cn;
1195                         while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1196                             || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON))
1197                         ) {
1198                                 vn = vn->a.n;
1199                         }
1200                         if ((t_info & OPCLSMASK) == OC_TERNARY)
1201                                 t_info += P(6);
1202                         cn = vn->a.n->r.n = new_node(t_info);
1203                         cn->a.n = vn->a.n;
1204                         if (tc & TC_BINOP) {
1205                                 cn->l.n = vn;
1206                                 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1207                                 if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1208                                         /* it's a pipe */
1209                                         next_token(TC_GETLINE);
1210                                         /* give maximum priority to this pipe */
1211                                         cn->info &= ~PRIMASK;
1212                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1213                                 }
1214                         } else {
1215                                 cn->r.n = vn;
1216                                 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1217                         }
1218                         vn->a.n = cn;
1219
1220                 } else {
1221                         /* for operands and prefix-unary operators, attach them
1222                          * to last node */
1223                         vn = cn;
1224                         cn = vn->r.n = new_node(t_info);
1225                         cn->a.n = vn;
1226                         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1227                         if (tc & (TC_OPERAND | TC_REGEXP)) {
1228                                 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1229                                 /* one should be very careful with switch on tclass -
1230                                  * only simple tclasses should be used! */
1231                                 switch (tc) {
1232                                 case TC_VARIABLE:
1233                                 case TC_ARRAY:
1234                                         cn->info = OC_VAR;
1235                                         v = hash_search(ahash, t_string);
1236                                         if (v != NULL) {
1237                                                 cn->info = OC_FNARG;
1238                                                 cn->l.aidx = v->x.aidx;
1239                                         } else {
1240                                                 cn->l.v = newvar(t_string);
1241                                         }
1242                                         if (tc & TC_ARRAY) {
1243                                                 cn->info |= xS;
1244                                                 cn->r.n = parse_expr(TC_ARRTERM);
1245                                         }
1246                                         break;
1247
1248                                 case TC_NUMBER:
1249                                 case TC_STRING:
1250                                         cn->info = OC_VAR;
1251                                         v = cn->l.v = xzalloc(sizeof(var));
1252                                         if (tc & TC_NUMBER)
1253                                                 setvar_i(v, t_double);
1254                                         else
1255                                                 setvar_s(v, t_string);
1256                                         break;
1257
1258                                 case TC_REGEXP:
1259                                         mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1260                                         break;
1261
1262                                 case TC_FUNCTION:
1263                                         cn->info = OC_FUNC;
1264                                         cn->r.f = newfunc(t_string);
1265                                         cn->l.n = condition();
1266                                         break;
1267
1268                                 case TC_SEQSTART:
1269                                         cn = vn->r.n = parse_expr(TC_SEQTERM);
1270                                         cn->a.n = vn;
1271                                         break;
1272
1273                                 case TC_GETLINE:
1274                                         glptr = cn;
1275                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1276                                         break;
1277
1278                                 case TC_BUILTIN:
1279                                         cn->l.n = condition();
1280                                         break;
1281                                 }
1282                         }
1283                 }
1284         }
1285         return sn.r.n;
1286 }
1287
1288 /* add node to chain. Return ptr to alloc'd node */
1289 static node *chain_node(uint32_t info)
1290 {
1291         node *n;
1292
1293         if (!seq->first)
1294                 seq->first = seq->last = new_node(0);
1295
1296         if (seq->programname != g_progname) {
1297                 seq->programname = g_progname;
1298                 n = chain_node(OC_NEWSOURCE);
1299                 n->l.new_progname = xstrdup(g_progname);
1300         }
1301
1302         n = seq->last;
1303         n->info = info;
1304         seq->last = n->a.n = new_node(OC_DONE);
1305
1306         return n;
1307 }
1308
1309 static void chain_expr(uint32_t info)
1310 {
1311         node *n;
1312
1313         n = chain_node(info);
1314         n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1315         if (t_tclass & TC_GRPTERM)
1316                 rollback_token();
1317 }
1318
1319 static node *chain_loop(node *nn)
1320 {
1321         node *n, *n2, *save_brk, *save_cont;
1322
1323         save_brk = break_ptr;
1324         save_cont = continue_ptr;
1325
1326         n = chain_node(OC_BR | Vx);
1327         continue_ptr = new_node(OC_EXEC);
1328         break_ptr = new_node(OC_EXEC);
1329         chain_group();
1330         n2 = chain_node(OC_EXEC | Vx);
1331         n2->l.n = nn;
1332         n2->a.n = n;
1333         continue_ptr->a.n = n2;
1334         break_ptr->a.n = n->r.n = seq->last;
1335
1336         continue_ptr = save_cont;
1337         break_ptr = save_brk;
1338
1339         return n;
1340 }
1341
1342 /* parse group and attach it to chain */
1343 static void chain_group(void)
1344 {
1345         uint32_t c;
1346         node *n, *n2, *n3;
1347
1348         do {
1349                 c = next_token(TC_GRPSEQ);
1350         } while (c & TC_NEWLINE);
1351
1352         if (c & TC_GRPSTART) {
1353                 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1354                         if (t_tclass & TC_NEWLINE)
1355                                 continue;
1356                         rollback_token();
1357                         chain_group();
1358                 }
1359         } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1360                 rollback_token();
1361                 chain_expr(OC_EXEC | Vx);
1362         } else {                                                /* TC_STATEMNT */
1363                 switch (t_info & OPCLSMASK) {
1364                 case ST_IF:
1365                         n = chain_node(OC_BR | Vx);
1366                         n->l.n = condition();
1367                         chain_group();
1368                         n2 = chain_node(OC_EXEC);
1369                         n->r.n = seq->last;
1370                         if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1371                                 chain_group();
1372                                 n2->a.n = seq->last;
1373                         } else {
1374                                 rollback_token();
1375                         }
1376                         break;
1377
1378                 case ST_WHILE:
1379                         n2 = condition();
1380                         n = chain_loop(NULL);
1381                         n->l.n = n2;
1382                         break;
1383
1384                 case ST_DO:
1385                         n2 = chain_node(OC_EXEC);
1386                         n = chain_loop(NULL);
1387                         n2->a.n = n->a.n;
1388                         next_token(TC_WHILE);
1389                         n->l.n = condition();
1390                         break;
1391
1392                 case ST_FOR:
1393                         next_token(TC_SEQSTART);
1394                         n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1395                         if (t_tclass & TC_SEQTERM) {    /* for-in */
1396                                 if ((n2->info & OPCLSMASK) != OC_IN)
1397                                         syntax_error(EMSG_UNEXP_TOKEN);
1398                                 n = chain_node(OC_WALKINIT | VV);
1399                                 n->l.n = n2->l.n;
1400                                 n->r.n = n2->r.n;
1401                                 n = chain_loop(NULL);
1402                                 n->info = OC_WALKNEXT | Vx;
1403                                 n->l.n = n2->l.n;
1404                         } else {                        /* for (;;) */
1405                                 n = chain_node(OC_EXEC | Vx);
1406                                 n->l.n = n2;
1407                                 n2 = parse_expr(TC_SEMICOL);
1408                                 n3 = parse_expr(TC_SEQTERM);
1409                                 n = chain_loop(n3);
1410                                 n->l.n = n2;
1411                                 if (!n2)
1412                                         n->info = OC_EXEC;
1413                         }
1414                         break;
1415
1416                 case OC_PRINT:
1417                 case OC_PRINTF:
1418                         n = chain_node(t_info);
1419                         n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1420                         if (t_tclass & TC_OUTRDR) {
1421                                 n->info |= t_info;
1422                                 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1423                         }
1424                         if (t_tclass & TC_GRPTERM)
1425                                 rollback_token();
1426                         break;
1427
1428                 case OC_BREAK:
1429                         n = chain_node(OC_EXEC);
1430                         n->a.n = break_ptr;
1431                         break;
1432
1433                 case OC_CONTINUE:
1434                         n = chain_node(OC_EXEC);
1435                         n->a.n = continue_ptr;
1436                         break;
1437
1438                 /* delete, next, nextfile, return, exit */
1439                 default:
1440                         chain_expr(t_info);
1441                 }
1442         }
1443 }
1444
1445 static void parse_program(char *p)
1446 {
1447         uint32_t tclass;
1448         node *cn;
1449         func *f;
1450         var *v;
1451
1452         g_pos = p;
1453         t_lineno = 1;
1454         while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1455                         TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1456
1457                 if (tclass & TC_OPTERM)
1458                         continue;
1459
1460                 seq = &mainseq;
1461                 if (tclass & TC_BEGIN) {
1462                         seq = &beginseq;
1463                         chain_group();
1464
1465                 } else if (tclass & TC_END) {
1466                         seq = &endseq;
1467                         chain_group();
1468
1469                 } else if (tclass & TC_FUNCDECL) {
1470                         next_token(TC_FUNCTION);
1471                         g_pos++;
1472                         f = newfunc(t_string);
1473                         f->body.first = NULL;
1474                         f->nargs = 0;
1475                         while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1476                                 v = findvar(ahash, t_string);
1477                                 v->x.aidx = f->nargs++;
1478
1479                                 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1480                                         break;
1481                         }
1482                         seq = &f->body;
1483                         chain_group();
1484                         clear_array(ahash);
1485
1486                 } else if (tclass & TC_OPSEQ) {
1487                         rollback_token();
1488                         cn = chain_node(OC_TEST);
1489                         cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1490                         if (t_tclass & TC_GRPSTART) {
1491                                 rollback_token();
1492                                 chain_group();
1493                         } else {
1494                                 chain_node(OC_PRINT);
1495                         }
1496                         cn->r.n = mainseq.last;
1497
1498                 } else /* if (tclass & TC_GRPSTART) */ {
1499                         rollback_token();
1500                         chain_group();
1501                 }
1502         }
1503 }
1504
1505
1506 /* -------- program execution part -------- */
1507
1508 static node *mk_splitter(const char *s, tsplitter *spl)
1509 {
1510         regex_t *re, *ire;
1511         node *n;
1512
1513         re = &spl->re[0];
1514         ire = &spl->re[1];
1515         n = &spl->n;
1516         if ((n->info & OPCLSMASK) == OC_REGEXP) {
1517                 regfree(re);
1518                 regfree(ire); // TODO: nuke ire, use re+1?
1519         }
1520         if (s[0] && s[1]) { /* strlen(s) > 1 */
1521                 mk_re_node(s, n, re);
1522         } else {
1523                 n->info = (uint32_t) s[0];
1524         }
1525
1526         return n;
1527 }
1528
1529 /* use node as a regular expression. Supplied with node ptr and regex_t
1530  * storage space. Return ptr to regex (if result points to preg, it should
1531  * be later regfree'd manually
1532  */
1533 static regex_t *as_regex(node *op, regex_t *preg)
1534 {
1535         int cflags;
1536         var *v;
1537         const char *s;
1538
1539         if ((op->info & OPCLSMASK) == OC_REGEXP) {
1540                 return icase ? op->r.ire : op->l.re;
1541         }
1542         v = nvalloc(1);
1543         s = getvar_s(evaluate(op, v));
1544
1545         cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
1546         /* Testcase where REG_EXTENDED fails (unpaired '{'):
1547          * echo Hi | awk 'gsub("@(samp|code|file)\{","");'
1548          * gawk 3.1.5 eats this. We revert to ~REG_EXTENDED
1549          * (maybe gsub is not supposed to use REG_EXTENDED?).
1550          */
1551         if (regcomp(preg, s, cflags)) {
1552                 cflags &= ~REG_EXTENDED;
1553                 xregcomp(preg, s, cflags);
1554         }
1555         nvfree(v);
1556         return preg;
1557 }
1558
1559 /* gradually increasing buffer.
1560  * note that we reallocate even if n == old_size,
1561  * and thus there is at least one extra allocated byte.
1562  */
1563 static char* qrealloc(char *b, int n, int *size)
1564 {
1565         if (!b || n >= *size) {
1566                 *size = n + (n>>1) + 80;
1567                 b = xrealloc(b, *size);
1568         }
1569         return b;
1570 }
1571
1572 /* resize field storage space */
1573 static void fsrealloc(int size)
1574 {
1575         int i;
1576
1577         if (size >= maxfields) {
1578                 i = maxfields;
1579                 maxfields = size + 16;
1580                 Fields = xrealloc(Fields, maxfields * sizeof(Fields[0]));
1581                 for (; i < maxfields; i++) {
1582                         Fields[i].type = VF_SPECIAL;
1583                         Fields[i].string = NULL;
1584                 }
1585         }
1586         /* if size < nfields, clear extra field variables */
1587         for (i = size; i < nfields; i++) {
1588                 clrvar(Fields + i);
1589         }
1590         nfields = size;
1591 }
1592
1593 static int awk_split(const char *s, node *spl, char **slist)
1594 {
1595         int l, n;
1596         char c[4];
1597         char *s1;
1598         regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1599
1600         /* in worst case, each char would be a separate field */
1601         *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1602         strcpy(s1, s);
1603
1604         c[0] = c[1] = (char)spl->info;
1605         c[2] = c[3] = '\0';
1606         if (*getvar_s(intvar[RS]) == '\0')
1607                 c[2] = '\n';
1608
1609         n = 0;
1610         if ((spl->info & OPCLSMASK) == OC_REGEXP) {  /* regex split */
1611                 if (!*s)
1612                         return n; /* "": zero fields */
1613                 n++; /* at least one field will be there */
1614                 do {
1615                         l = strcspn(s, c+2); /* len till next NUL or \n */
1616                         if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1617                          && pmatch[0].rm_so <= l
1618                         ) {
1619                                 l = pmatch[0].rm_so;
1620                                 if (pmatch[0].rm_eo == 0) {
1621                                         l++;
1622                                         pmatch[0].rm_eo++;
1623                                 }
1624                                 n++; /* we saw yet another delimiter */
1625                         } else {
1626                                 pmatch[0].rm_eo = l;
1627                                 if (s[l])
1628                                         pmatch[0].rm_eo++;
1629                         }
1630                         memcpy(s1, s, l);
1631                         /* make sure we remove *all* of the separator chars */
1632                         do {
1633                                 s1[l] = '\0';
1634                         } while (++l < pmatch[0].rm_eo);
1635                         nextword(&s1);
1636                         s += pmatch[0].rm_eo;
1637                 } while (*s);
1638                 return n;
1639         }
1640         if (c[0] == '\0') {  /* null split */
1641                 while (*s) {
1642                         *s1++ = *s++;
1643                         *s1++ = '\0';
1644                         n++;
1645                 }
1646                 return n;
1647         }
1648         if (c[0] != ' ') {  /* single-character split */
1649                 if (icase) {
1650                         c[0] = toupper(c[0]);
1651                         c[1] = tolower(c[1]);
1652                 }
1653                 if (*s1)
1654                         n++;
1655                 while ((s1 = strpbrk(s1, c)) != NULL) {
1656                         *s1++ = '\0';
1657                         n++;
1658                 }
1659                 return n;
1660         }
1661         /* space split */
1662         while (*s) {
1663                 s = skip_whitespace(s);
1664                 if (!*s)
1665                         break;
1666                 n++;
1667                 while (*s && !isspace(*s))
1668                         *s1++ = *s++;
1669                 *s1++ = '\0';
1670         }
1671         return n;
1672 }
1673
1674 static void split_f0(void)
1675 {
1676 /* static char *fstrings; */
1677 #define fstrings (G.split_f0__fstrings)
1678
1679         int i, n;
1680         char *s;
1681
1682         if (is_f0_split)
1683                 return;
1684
1685         is_f0_split = TRUE;
1686         free(fstrings);
1687         fsrealloc(0);
1688         n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1689         fsrealloc(n);
1690         s = fstrings;
1691         for (i = 0; i < n; i++) {
1692                 Fields[i].string = nextword(&s);
1693                 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1694         }
1695
1696         /* set NF manually to avoid side effects */
1697         clrvar(intvar[NF]);
1698         intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1699         intvar[NF]->number = nfields;
1700 #undef fstrings
1701 }
1702
1703 /* perform additional actions when some internal variables changed */
1704 static void handle_special(var *v)
1705 {
1706         int n;
1707         char *b;
1708         const char *sep, *s;
1709         int sl, l, len, i, bsize;
1710
1711         if (!(v->type & VF_SPECIAL))
1712                 return;
1713
1714         if (v == intvar[NF]) {
1715                 n = (int)getvar_i(v);
1716                 fsrealloc(n);
1717
1718                 /* recalculate $0 */
1719                 sep = getvar_s(intvar[OFS]);
1720                 sl = strlen(sep);
1721                 b = NULL;
1722                 len = 0;
1723                 for (i = 0; i < n; i++) {
1724                         s = getvar_s(&Fields[i]);
1725                         l = strlen(s);
1726                         if (b) {
1727                                 memcpy(b+len, sep, sl);
1728                                 len += sl;
1729                         }
1730                         b = qrealloc(b, len+l+sl, &bsize);
1731                         memcpy(b+len, s, l);
1732                         len += l;
1733                 }
1734                 if (b)
1735                         b[len] = '\0';
1736                 setvar_p(intvar[F0], b);
1737                 is_f0_split = TRUE;
1738
1739         } else if (v == intvar[F0]) {
1740                 is_f0_split = FALSE;
1741
1742         } else if (v == intvar[FS]) {
1743                 mk_splitter(getvar_s(v), &fsplitter);
1744
1745         } else if (v == intvar[RS]) {
1746                 mk_splitter(getvar_s(v), &rsplitter);
1747
1748         } else if (v == intvar[IGNORECASE]) {
1749                 icase = istrue(v);
1750
1751         } else {                                /* $n */
1752                 n = getvar_i(intvar[NF]);
1753                 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1754                 /* right here v is invalid. Just to note... */
1755         }
1756 }
1757
1758 /* step through func/builtin/etc arguments */
1759 static node *nextarg(node **pn)
1760 {
1761         node *n;
1762
1763         n = *pn;
1764         if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1765                 *pn = n->r.n;
1766                 n = n->l.n;
1767         } else {
1768                 *pn = NULL;
1769         }
1770         return n;
1771 }
1772
1773 static void hashwalk_init(var *v, xhash *array)
1774 {
1775         hash_item *hi;
1776         unsigned i;
1777         walker_list *w;
1778         walker_list *prev_walker;
1779
1780         if (v->type & VF_WALK) {
1781                 prev_walker = v->x.walker;
1782         } else {
1783                 v->type |= VF_WALK;
1784                 prev_walker = NULL;
1785         }
1786         debug_printf_walker("hashwalk_init: prev_walker:%p\n", prev_walker);
1787
1788         w = v->x.walker = xzalloc(sizeof(*w) + array->glen + 1); /* why + 1? */
1789         debug_printf_walker(" walker@%p=%p\n", &v->x.walker, w);
1790         w->cur = w->end = w->wbuf;
1791         w->prev = prev_walker;
1792         for (i = 0; i < array->csize; i++) {
1793                 hi = array->items[i];
1794                 while (hi) {
1795                         strcpy(w->end, hi->name);
1796                         nextword(&w->end);
1797                         hi = hi->next;
1798                 }
1799         }
1800 }
1801
1802 static int hashwalk_next(var *v)
1803 {
1804         walker_list *w = v->x.walker;
1805
1806         if (w->cur >= w->end) {
1807                 walker_list *prev_walker = w->prev;
1808
1809                 debug_printf_walker("end of iteration, free(walker@%p:%p), prev_walker:%p\n", &v->x.walker, w, prev_walker);
1810                 free(w);
1811                 v->x.walker = prev_walker;
1812                 return FALSE;
1813         }
1814
1815         setvar_s(v, nextword(&w->cur));
1816         return TRUE;
1817 }
1818
1819 /* evaluate node, return 1 when result is true, 0 otherwise */
1820 static int ptest(node *pattern)
1821 {
1822         /* ptest__v is "static": to save stack space? */
1823         return istrue(evaluate(pattern, &G.ptest__v));
1824 }
1825
1826 /* read next record from stream rsm into a variable v */
1827 static int awk_getline(rstream *rsm, var *v)
1828 {
1829         char *b;
1830         regmatch_t pmatch[2];
1831         int size, a, p, pp = 0;
1832         int fd, so, eo, r, rp;
1833         char c, *m, *s;
1834
1835         debug_printf_eval("entered %s()\n", __func__);
1836
1837         /* we're using our own buffer since we need access to accumulating
1838          * characters
1839          */
1840         fd = fileno(rsm->F);
1841         m = rsm->buffer;
1842         a = rsm->adv;
1843         p = rsm->pos;
1844         size = rsm->size;
1845         c = (char) rsplitter.n.info;
1846         rp = 0;
1847
1848         if (!m)
1849                 m = qrealloc(m, 256, &size);
1850
1851         do {
1852                 b = m + a;
1853                 so = eo = p;
1854                 r = 1;
1855                 if (p > 0) {
1856                         if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1857                                 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1858                                                         b, 1, pmatch, 0) == 0) {
1859                                         so = pmatch[0].rm_so;
1860                                         eo = pmatch[0].rm_eo;
1861                                         if (b[eo] != '\0')
1862                                                 break;
1863                                 }
1864                         } else if (c != '\0') {
1865                                 s = strchr(b+pp, c);
1866                                 if (!s)
1867                                         s = memchr(b+pp, '\0', p - pp);
1868                                 if (s) {
1869                                         so = eo = s-b;
1870                                         eo++;
1871                                         break;
1872                                 }
1873                         } else {
1874                                 while (b[rp] == '\n')
1875                                         rp++;
1876                                 s = strstr(b+rp, "\n\n");
1877                                 if (s) {
1878                                         so = eo = s-b;
1879                                         while (b[eo] == '\n')
1880                                                 eo++;
1881                                         if (b[eo] != '\0')
1882                                                 break;
1883                                 }
1884                         }
1885                 }
1886
1887                 if (a > 0) {
1888                         memmove(m, m+a, p+1);
1889                         b = m;
1890                         a = 0;
1891                 }
1892
1893                 m = qrealloc(m, a+p+128, &size);
1894                 b = m + a;
1895                 pp = p;
1896                 p += safe_read(fd, b+p, size-p-1);
1897                 if (p < pp) {
1898                         p = 0;
1899                         r = 0;
1900                         setvar_i(intvar[ERRNO], errno);
1901                 }
1902                 b[p] = '\0';
1903
1904         } while (p > pp);
1905
1906         if (p == 0) {
1907                 r--;
1908         } else {
1909                 c = b[so]; b[so] = '\0';
1910                 setvar_s(v, b+rp);
1911                 v->type |= VF_USER;
1912                 b[so] = c;
1913                 c = b[eo]; b[eo] = '\0';
1914                 setvar_s(intvar[RT], b+so);
1915                 b[eo] = c;
1916         }
1917
1918         rsm->buffer = m;
1919         rsm->adv = a + eo;
1920         rsm->pos = p - eo;
1921         rsm->size = size;
1922
1923         debug_printf_eval("returning from %s(): %d\n", __func__, r);
1924
1925         return r;
1926 }
1927
1928 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1929 {
1930         int r = 0;
1931         char c;
1932         const char *s = format;
1933
1934         if (int_as_int && n == (int)n) {
1935                 r = snprintf(b, size, "%d", (int)n);
1936         } else {
1937                 do { c = *s; } while (c && *++s);
1938                 if (strchr("diouxX", c)) {
1939                         r = snprintf(b, size, format, (int)n);
1940                 } else if (strchr("eEfgG", c)) {
1941                         r = snprintf(b, size, format, n);
1942                 } else {
1943                         syntax_error(EMSG_INV_FMT);
1944                 }
1945         }
1946         return r;
1947 }
1948
1949 /* formatted output into an allocated buffer, return ptr to buffer */
1950 static char *awk_printf(node *n)
1951 {
1952         char *b = NULL;
1953         char *fmt, *s, *f;
1954         const char *s1;
1955         int i, j, incr, bsize;
1956         char c, c1;
1957         var *v, *arg;
1958
1959         v = nvalloc(1);
1960         fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1961
1962         i = 0;
1963         while (*f) {
1964                 s = f;
1965                 while (*f && (*f != '%' || *++f == '%'))
1966                         f++;
1967                 while (*f && !isalpha(*f)) {
1968                         if (*f == '*')
1969                                 syntax_error("%*x formats are not supported");
1970                         f++;
1971                 }
1972
1973                 incr = (f - s) + MAXVARFMT;
1974                 b = qrealloc(b, incr + i, &bsize);
1975                 c = *f;
1976                 if (c != '\0')
1977                         f++;
1978                 c1 = *f;
1979                 *f = '\0';
1980                 arg = evaluate(nextarg(&n), v);
1981
1982                 j = i;
1983                 if (c == 'c' || !c) {
1984                         i += sprintf(b+i, s, is_numeric(arg) ?
1985                                         (char)getvar_i(arg) : *getvar_s(arg));
1986                 } else if (c == 's') {
1987                         s1 = getvar_s(arg);
1988                         b = qrealloc(b, incr+i+strlen(s1), &bsize);
1989                         i += sprintf(b+i, s, s1);
1990                 } else {
1991                         i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1992                 }
1993                 *f = c1;
1994
1995                 /* if there was an error while sprintf, return value is negative */
1996                 if (i < j)
1997                         i = j;
1998         }
1999
2000         free(fmt);
2001         nvfree(v);
2002         b = xrealloc(b, i + 1);
2003         b[i] = '\0';
2004         return b;
2005 }
2006
2007 /* Common substitution routine.
2008  * Replace (nm)'th substring of (src) that matches (rn) with (repl),
2009  * store result into (dest), return number of substitutions.
2010  * If nm = 0, replace all matches.
2011  * If src or dst is NULL, use $0.
2012  * If subexp != 0, enable subexpression matching (\1-\9).
2013  */
2014 static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int subexp)
2015 {
2016         char *resbuf;
2017         const char *sp;
2018         int match_no, residx, replen, resbufsize;
2019         int regexec_flags;
2020         regmatch_t pmatch[10];
2021         regex_t sreg, *regex;
2022
2023         resbuf = NULL;
2024         residx = 0;
2025         match_no = 0;
2026         regexec_flags = 0;
2027         regex = as_regex(rn, &sreg);
2028         sp = getvar_s(src ? src : intvar[F0]);
2029         replen = strlen(repl);
2030         while (regexec(regex, sp, 10, pmatch, regexec_flags) == 0) {
2031                 int so = pmatch[0].rm_so;
2032                 int eo = pmatch[0].rm_eo;
2033
2034                 //bb_error_msg("match %u: [%u,%u] '%s'%p", match_no+1, so, eo, sp,sp);
2035                 resbuf = qrealloc(resbuf, residx + eo + replen, &resbufsize);
2036                 memcpy(resbuf + residx, sp, eo);
2037                 residx += eo;
2038                 if (++match_no >= nm) {
2039                         const char *s;
2040                         int nbs;
2041
2042                         /* replace */
2043                         residx -= (eo - so);
2044                         nbs = 0;
2045                         for (s = repl; *s; s++) {
2046                                 char c = resbuf[residx++] = *s;
2047                                 if (c == '\\') {
2048                                         nbs++;
2049                                         continue;
2050                                 }
2051                                 if (c == '&' || (subexp && c >= '0' && c <= '9')) {
2052                                         int j;
2053                                         residx -= ((nbs + 3) >> 1);
2054                                         j = 0;
2055                                         if (c != '&') {
2056                                                 j = c - '0';
2057                                                 nbs++;
2058                                         }
2059                                         if (nbs % 2) {
2060                                                 resbuf[residx++] = c;
2061                                         } else {
2062                                                 int n = pmatch[j].rm_eo - pmatch[j].rm_so;
2063                                                 resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize);
2064                                                 memcpy(resbuf + residx, sp + pmatch[j].rm_so, n);
2065                                                 residx += n;
2066                                         }
2067                                 }
2068                                 nbs = 0;
2069                         }
2070                 }
2071
2072                 regexec_flags = REG_NOTBOL;
2073                 sp += eo;
2074                 if (match_no == nm)
2075                         break;
2076                 if (eo == so) {
2077                         /* Empty match (e.g. "b*" will match anywhere).
2078                          * Advance by one char. */
2079 //BUG (bug 1333):
2080 //gsub(/\<b*/,"") on "abc" will reach this point, advance to "bc"
2081 //... and will erroneously match "b" even though it is NOT at the word start.
2082 //we need REG_NOTBOW but it does not exist...
2083 //TODO: if EXTRA_COMPAT=y, use GNU matching and re_search,
2084 //it should be able to do it correctly.
2085                         /* Subtle: this is safe only because
2086                          * qrealloc allocated at least one extra byte */
2087                         resbuf[residx] = *sp;
2088                         if (*sp == '\0')
2089                                 goto ret;
2090                         sp++;
2091                         residx++;
2092                 }
2093         }
2094
2095         resbuf = qrealloc(resbuf, residx + strlen(sp), &resbufsize);
2096         strcpy(resbuf + residx, sp);
2097  ret:
2098         //bb_error_msg("end sp:'%s'%p", sp,sp);
2099         setvar_p(dest ? dest : intvar[F0], resbuf);
2100         if (regex == &sreg)
2101                 regfree(regex);
2102         return match_no;
2103 }
2104
2105 static NOINLINE int do_mktime(const char *ds)
2106 {
2107         struct tm then;
2108         int count;
2109
2110         /*memset(&then, 0, sizeof(then)); - not needed */
2111         then.tm_isdst = -1; /* default is unknown */
2112
2113         /* manpage of mktime says these fields are ints,
2114          * so we can sscanf stuff directly into them */
2115         count = sscanf(ds, "%u %u %u %u %u %u %d",
2116                 &then.tm_year, &then.tm_mon, &then.tm_mday,
2117                 &then.tm_hour, &then.tm_min, &then.tm_sec,
2118                 &then.tm_isdst);
2119
2120         if (count < 6
2121          || (unsigned)then.tm_mon < 1
2122          || (unsigned)then.tm_year < 1900
2123         ) {
2124                 return -1;
2125         }
2126
2127         then.tm_mon -= 1;
2128         then.tm_year -= 1900;
2129
2130         return mktime(&then);
2131 }
2132
2133 static NOINLINE var *exec_builtin(node *op, var *res)
2134 {
2135 #define tspl (G.exec_builtin__tspl)
2136
2137         var *tv;
2138         node *an[4];
2139         var *av[4];
2140         const char *as[4];
2141         regmatch_t pmatch[2];
2142         regex_t sreg, *re;
2143         node *spl;
2144         uint32_t isr, info;
2145         int nargs;
2146         time_t tt;
2147         int i, l, ll, n;
2148
2149         tv = nvalloc(4);
2150         isr = info = op->info;
2151         op = op->l.n;
2152
2153         av[2] = av[3] = NULL;
2154         for (i = 0; i < 4 && op; i++) {
2155                 an[i] = nextarg(&op);
2156                 if (isr & 0x09000000)
2157                         av[i] = evaluate(an[i], &tv[i]);
2158                 if (isr & 0x08000000)
2159                         as[i] = getvar_s(av[i]);
2160                 isr >>= 1;
2161         }
2162
2163         nargs = i;
2164         if ((uint32_t)nargs < (info >> 30))
2165                 syntax_error(EMSG_TOO_FEW_ARGS);
2166
2167         info &= OPNMASK;
2168         switch (info) {
2169
2170         case B_a2:
2171 #if ENABLE_FEATURE_AWK_LIBM
2172                 setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1])));
2173 #else
2174                 syntax_error(EMSG_NO_MATH);
2175 #endif
2176                 break;
2177
2178         case B_sp: {
2179                 char *s, *s1;
2180
2181                 if (nargs > 2) {
2182                         spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2183                                 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2184                 } else {
2185                         spl = &fsplitter.n;
2186                 }
2187
2188                 n = awk_split(as[0], spl, &s);
2189                 s1 = s;
2190                 clear_array(iamarray(av[1]));
2191                 for (i = 1; i <= n; i++)
2192                         setari_u(av[1], i, nextword(&s));
2193                 free(s1);
2194                 setvar_i(res, n);
2195                 break;
2196         }
2197
2198         case B_ss: {
2199                 char *s;
2200
2201                 l = strlen(as[0]);
2202                 i = getvar_i(av[1]) - 1;
2203                 if (i > l)
2204                         i = l;
2205                 if (i < 0)
2206                         i = 0;
2207                 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2208                 if (n < 0)
2209                         n = 0;
2210                 s = xstrndup(as[0]+i, n);
2211                 setvar_p(res, s);
2212                 break;
2213         }
2214
2215         /* Bitwise ops must assume that operands are unsigned. GNU Awk 3.1.5:
2216          * awk '{ print or(-1,1) }' gives "4.29497e+09", not "-2.xxxe+09" */
2217         case B_an:
2218                 setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1]));
2219                 break;
2220
2221         case B_co:
2222                 setvar_i(res, ~getvar_i_int(av[0]));
2223                 break;
2224
2225         case B_ls:
2226                 setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1]));
2227                 break;
2228
2229         case B_or:
2230                 setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1]));
2231                 break;
2232
2233         case B_rs:
2234                 setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1]));
2235                 break;
2236
2237         case B_xo:
2238                 setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1]));
2239                 break;
2240
2241         case B_lo:
2242         case B_up: {
2243                 char *s, *s1;
2244                 s1 = s = xstrdup(as[0]);
2245                 while (*s1) {
2246                         //*s1 = (info == B_up) ? toupper(*s1) : tolower(*s1);
2247                         if ((unsigned char)((*s1 | 0x20) - 'a') <= ('z' - 'a'))
2248                                 *s1 = (info == B_up) ? (*s1 & 0xdf) : (*s1 | 0x20);
2249                         s1++;
2250                 }
2251                 setvar_p(res, s);
2252                 break;
2253         }
2254
2255         case B_ix:
2256                 n = 0;
2257                 ll = strlen(as[1]);
2258                 l = strlen(as[0]) - ll;
2259                 if (ll > 0 && l >= 0) {
2260                         if (!icase) {
2261                                 char *s = strstr(as[0], as[1]);
2262                                 if (s)
2263                                         n = (s - as[0]) + 1;
2264                         } else {
2265                                 /* this piece of code is terribly slow and
2266                                  * really should be rewritten
2267                                  */
2268                                 for (i = 0; i <= l; i++) {
2269                                         if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2270                                                 n = i+1;
2271                                                 break;
2272                                         }
2273                                 }
2274                         }
2275                 }
2276                 setvar_i(res, n);
2277                 break;
2278
2279         case B_ti:
2280                 if (nargs > 1)
2281                         tt = getvar_i(av[1]);
2282                 else
2283                         time(&tt);
2284                 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2285                 i = strftime(g_buf, MAXVARFMT,
2286                         ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2287                         localtime(&tt));
2288                 g_buf[i] = '\0';
2289                 setvar_s(res, g_buf);
2290                 break;
2291
2292         case B_mt:
2293                 setvar_i(res, do_mktime(as[0]));
2294                 break;
2295
2296         case B_ma:
2297                 re = as_regex(an[1], &sreg);
2298                 n = regexec(re, as[0], 1, pmatch, 0);
2299                 if (n == 0) {
2300                         pmatch[0].rm_so++;
2301                         pmatch[0].rm_eo++;
2302                 } else {
2303                         pmatch[0].rm_so = 0;
2304                         pmatch[0].rm_eo = -1;
2305                 }
2306                 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2307                 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2308                 setvar_i(res, pmatch[0].rm_so);
2309                 if (re == &sreg)
2310                         regfree(re);
2311                 break;
2312
2313         case B_ge:
2314                 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2315                 break;
2316
2317         case B_gs:
2318                 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2319                 break;
2320
2321         case B_su:
2322                 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2323                 break;
2324         }
2325
2326         nvfree(tv);
2327         return res;
2328 #undef tspl
2329 }
2330
2331 /*
2332  * Evaluate node - the heart of the program. Supplied with subtree
2333  * and place where to store result. returns ptr to result.
2334  */
2335 #define XC(n) ((n) >> 8)
2336
2337 static var *evaluate(node *op, var *res)
2338 {
2339 /* This procedure is recursive so we should count every byte */
2340 #define fnargs (G.evaluate__fnargs)
2341 /* seed is initialized to 1 */
2342 #define seed   (G.evaluate__seed)
2343 #define sreg   (G.evaluate__sreg)
2344
2345         var *v1;
2346
2347         if (!op)
2348                 return setvar_s(res, NULL);
2349
2350         debug_printf_eval("entered %s()\n", __func__);
2351
2352         v1 = nvalloc(2);
2353
2354         while (op) {
2355                 struct {
2356                         var *v;
2357                         const char *s;
2358                 } L = L; /* for compiler */
2359                 struct {
2360                         var *v;
2361                         const char *s;
2362                 } R = R;
2363                 double L_d = L_d;
2364                 uint32_t opinfo;
2365                 int opn;
2366                 node *op1;
2367
2368                 opinfo = op->info;
2369                 opn = (opinfo & OPNMASK);
2370                 g_lineno = op->lineno;
2371                 op1 = op->l.n;
2372                 debug_printf_eval("opinfo:%08x opn:%08x\n", opinfo, opn);
2373
2374                 /* execute inevitable things */
2375                 if (opinfo & OF_RES1)
2376                         L.v = evaluate(op1, v1);
2377                 if (opinfo & OF_RES2)
2378                         R.v = evaluate(op->r.n, v1+1);
2379                 if (opinfo & OF_STR1) {
2380                         L.s = getvar_s(L.v);
2381                         debug_printf_eval("L.s:'%s'\n", L.s);
2382                 }
2383                 if (opinfo & OF_STR2) {
2384                         R.s = getvar_s(R.v);
2385                         debug_printf_eval("R.s:'%s'\n", R.s);
2386                 }
2387                 if (opinfo & OF_NUM1) {
2388                         L_d = getvar_i(L.v);
2389                         debug_printf_eval("L_d:%f\n", L_d);
2390                 }
2391
2392                 debug_printf_eval("switch(0x%x)\n", XC(opinfo & OPCLSMASK));
2393                 switch (XC(opinfo & OPCLSMASK)) {
2394
2395                 /* -- iterative node type -- */
2396
2397                 /* test pattern */
2398                 case XC( OC_TEST ):
2399                         if ((op1->info & OPCLSMASK) == OC_COMMA) {
2400                                 /* it's range pattern */
2401                                 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2402                                         op->info |= OF_CHECKED;
2403                                         if (ptest(op1->r.n))
2404                                                 op->info &= ~OF_CHECKED;
2405                                         op = op->a.n;
2406                                 } else {
2407                                         op = op->r.n;
2408                                 }
2409                         } else {
2410                                 op = ptest(op1) ? op->a.n : op->r.n;
2411                         }
2412                         break;
2413
2414                 /* just evaluate an expression, also used as unconditional jump */
2415                 case XC( OC_EXEC ):
2416                         break;
2417
2418                 /* branch, used in if-else and various loops */
2419                 case XC( OC_BR ):
2420                         op = istrue(L.v) ? op->a.n : op->r.n;
2421                         break;
2422
2423                 /* initialize for-in loop */
2424                 case XC( OC_WALKINIT ):
2425                         hashwalk_init(L.v, iamarray(R.v));
2426                         break;
2427
2428                 /* get next array item */
2429                 case XC( OC_WALKNEXT ):
2430                         op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2431                         break;
2432
2433                 case XC( OC_PRINT ):
2434                 case XC( OC_PRINTF ): {
2435                         FILE *F = stdout;
2436
2437                         if (op->r.n) {
2438                                 rstream *rsm = newfile(R.s);
2439                                 if (!rsm->F) {
2440                                         if (opn == '|') {
2441                                                 rsm->F = popen(R.s, "w");
2442                                                 if (rsm->F == NULL)
2443                                                         bb_perror_msg_and_die("popen");
2444                                                 rsm->is_pipe = 1;
2445                                         } else {
2446                                                 rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2447                                         }
2448                                 }
2449                                 F = rsm->F;
2450                         }
2451
2452                         if ((opinfo & OPCLSMASK) == OC_PRINT) {
2453                                 if (!op1) {
2454                                         fputs(getvar_s(intvar[F0]), F);
2455                                 } else {
2456                                         while (op1) {
2457                                                 var *v = evaluate(nextarg(&op1), v1);
2458                                                 if (v->type & VF_NUMBER) {
2459                                                         fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2460                                                                         getvar_i(v), TRUE);
2461                                                         fputs(g_buf, F);
2462                                                 } else {
2463                                                         fputs(getvar_s(v), F);
2464                                                 }
2465
2466                                                 if (op1)
2467                                                         fputs(getvar_s(intvar[OFS]), F);
2468                                         }
2469                                 }
2470                                 fputs(getvar_s(intvar[ORS]), F);
2471
2472                         } else {        /* OC_PRINTF */
2473                                 char *s = awk_printf(op1);
2474                                 fputs(s, F);
2475                                 free(s);
2476                         }
2477                         fflush(F);
2478                         break;
2479                 }
2480
2481                 case XC( OC_DELETE ): {
2482                         uint32_t info = op1->info & OPCLSMASK;
2483                         var *v;
2484
2485                         if (info == OC_VAR) {
2486                                 v = op1->l.v;
2487                         } else if (info == OC_FNARG) {
2488                                 v = &fnargs[op1->l.aidx];
2489                         } else {
2490                                 syntax_error(EMSG_NOT_ARRAY);
2491                         }
2492
2493                         if (op1->r.n) {
2494                                 const char *s;
2495                                 clrvar(L.v);
2496                                 s = getvar_s(evaluate(op1->r.n, v1));
2497                                 hash_remove(iamarray(v), s);
2498                         } else {
2499                                 clear_array(iamarray(v));
2500                         }
2501                         break;
2502                 }
2503
2504                 case XC( OC_NEWSOURCE ):
2505                         g_progname = op->l.new_progname;
2506                         break;
2507
2508                 case XC( OC_RETURN ):
2509                         copyvar(res, L.v);
2510                         break;
2511
2512                 case XC( OC_NEXTFILE ):
2513                         nextfile = TRUE;
2514                 case XC( OC_NEXT ):
2515                         nextrec = TRUE;
2516                 case XC( OC_DONE ):
2517                         clrvar(res);
2518                         break;
2519
2520                 case XC( OC_EXIT ):
2521                         awk_exit(L_d);
2522
2523                 /* -- recursive node type -- */
2524
2525                 case XC( OC_VAR ):
2526                         L.v = op->l.v;
2527                         if (L.v == intvar[NF])
2528                                 split_f0();
2529                         goto v_cont;
2530
2531                 case XC( OC_FNARG ):
2532                         L.v = &fnargs[op->l.aidx];
2533  v_cont:
2534                         res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2535                         break;
2536
2537                 case XC( OC_IN ):
2538                         setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2539                         break;
2540
2541                 case XC( OC_REGEXP ):
2542                         op1 = op;
2543                         L.s = getvar_s(intvar[F0]);
2544                         goto re_cont;
2545
2546                 case XC( OC_MATCH ):
2547                         op1 = op->r.n;
2548  re_cont:
2549                         {
2550                                 regex_t *re = as_regex(op1, &sreg);
2551                                 int i = regexec(re, L.s, 0, NULL, 0);
2552                                 if (re == &sreg)
2553                                         regfree(re);
2554                                 setvar_i(res, (i == 0) ^ (opn == '!'));
2555                         }
2556                         break;
2557
2558                 case XC( OC_MOVE ):
2559                         debug_printf_eval("MOVE\n");
2560                         /* if source is a temporary string, jusk relink it to dest */
2561 //Disabled: if R.v is numeric but happens to have cached R.v->string,
2562 //then L.v ends up being a string, which is wrong
2563 //                      if (R.v == v1+1 && R.v->string) {
2564 //                              res = setvar_p(L.v, R.v->string);
2565 //                              R.v->string = NULL;
2566 //                      } else {
2567                                 res = copyvar(L.v, R.v);
2568 //                      }
2569                         break;
2570
2571                 case XC( OC_TERNARY ):
2572                         if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2573                                 syntax_error(EMSG_POSSIBLE_ERROR);
2574                         res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2575                         break;
2576
2577                 case XC( OC_FUNC ): {
2578                         var *vbeg, *v;
2579                         const char *sv_progname;
2580
2581                         if (!op->r.f->body.first)
2582                                 syntax_error(EMSG_UNDEF_FUNC);
2583
2584                         vbeg = v = nvalloc(op->r.f->nargs + 1);
2585                         while (op1) {
2586                                 var *arg = evaluate(nextarg(&op1), v1);
2587                                 copyvar(v, arg);
2588                                 v->type |= VF_CHILD;
2589                                 v->x.parent = arg;
2590                                 if (++v - vbeg >= op->r.f->nargs)
2591                                         break;
2592                         }
2593
2594                         v = fnargs;
2595                         fnargs = vbeg;
2596                         sv_progname = g_progname;
2597
2598                         res = evaluate(op->r.f->body.first, res);
2599
2600                         g_progname = sv_progname;
2601                         nvfree(fnargs);
2602                         fnargs = v;
2603
2604                         break;
2605                 }
2606
2607                 case XC( OC_GETLINE ):
2608                 case XC( OC_PGETLINE ): {
2609                         rstream *rsm;
2610                         int i;
2611
2612                         if (op1) {
2613                                 rsm = newfile(L.s);
2614                                 if (!rsm->F) {
2615                                         if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2616                                                 rsm->F = popen(L.s, "r");
2617                                                 rsm->is_pipe = TRUE;
2618                                         } else {
2619                                                 rsm->F = fopen_for_read(L.s);           /* not xfopen! */
2620                                         }
2621                                 }
2622                         } else {
2623                                 if (!iF)
2624                                         iF = next_input_file();
2625                                 rsm = iF;
2626                         }
2627
2628                         if (!rsm->F) {
2629                                 setvar_i(intvar[ERRNO], errno);
2630                                 setvar_i(res, -1);
2631                                 break;
2632                         }
2633
2634                         if (!op->r.n)
2635                                 R.v = intvar[F0];
2636
2637                         i = awk_getline(rsm, R.v);
2638                         if (i > 0 && !op1) {
2639                                 incvar(intvar[FNR]);
2640                                 incvar(intvar[NR]);
2641                         }
2642                         setvar_i(res, i);
2643                         break;
2644                 }
2645
2646                 /* simple builtins */
2647                 case XC( OC_FBLTIN ): {
2648                         double R_d = R_d; /* for compiler */
2649
2650                         switch (opn) {
2651                         case F_in:
2652                                 R_d = (int)L_d;
2653                                 break;
2654
2655                         case F_rn:
2656                                 R_d = (double)rand() / (double)RAND_MAX;
2657                                 break;
2658 #if ENABLE_FEATURE_AWK_LIBM
2659                         case F_co:
2660                                 R_d = cos(L_d);
2661                                 break;
2662
2663                         case F_ex:
2664                                 R_d = exp(L_d);
2665                                 break;
2666
2667                         case F_lg:
2668                                 R_d = log(L_d);
2669                                 break;
2670
2671                         case F_si:
2672                                 R_d = sin(L_d);
2673                                 break;
2674
2675                         case F_sq:
2676                                 R_d = sqrt(L_d);
2677                                 break;
2678 #else
2679                         case F_co:
2680                         case F_ex:
2681                         case F_lg:
2682                         case F_si:
2683                         case F_sq:
2684                                 syntax_error(EMSG_NO_MATH);
2685                                 break;
2686 #endif
2687                         case F_sr:
2688                                 R_d = (double)seed;
2689                                 seed = op1 ? (unsigned)L_d : (unsigned)time(NULL);
2690                                 srand(seed);
2691                                 break;
2692
2693                         case F_ti:
2694                                 R_d = time(NULL);
2695                                 break;
2696
2697                         case F_le:
2698                                 if (!op1)
2699                                         L.s = getvar_s(intvar[F0]);
2700                                 R_d = strlen(L.s);
2701                                 break;
2702
2703                         case F_sy:
2704                                 fflush_all();
2705                                 R_d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2706                                                 ? (system(L.s) >> 8) : 0;
2707                                 break;
2708
2709                         case F_ff:
2710                                 if (!op1) {
2711                                         fflush(stdout);
2712                                 } else if (L.s && *L.s) {
2713                                         rstream *rsm = newfile(L.s);
2714                                         fflush(rsm->F);
2715                                 } else {
2716                                         fflush_all();
2717                                 }
2718                                 break;
2719
2720                         case F_cl: {
2721                                 rstream *rsm;
2722                                 int err = 0;
2723                                 rsm = (rstream *)hash_search(fdhash, L.s);
2724                                 debug_printf_eval("OC_FBLTIN F_cl rsm:%p\n", rsm);
2725                                 if (rsm) {
2726                                         debug_printf_eval("OC_FBLTIN F_cl "
2727                                                 "rsm->is_pipe:%d, ->F:%p\n",
2728                                                 rsm->is_pipe, rsm->F);
2729                                         /* Can be NULL if open failed. Example:
2730                                          * getline line <"doesnt_exist";
2731                                          * close("doesnt_exist"); <--- here rsm->F is NULL
2732                                          */
2733                                         if (rsm->F)
2734                                                 err = rsm->is_pipe ? pclose(rsm->F) : fclose(rsm->F);
2735                                         free(rsm->buffer);
2736                                         hash_remove(fdhash, L.s);
2737                                 }
2738                                 if (err)
2739                                         setvar_i(intvar[ERRNO], errno);
2740                                 R_d = (double)err;
2741                                 break;
2742                         }
2743                         } /* switch */
2744                         setvar_i(res, R_d);
2745                         break;
2746                 }
2747
2748                 case XC( OC_BUILTIN ):
2749                         res = exec_builtin(op, res);
2750                         break;
2751
2752                 case XC( OC_SPRINTF ):
2753                         setvar_p(res, awk_printf(op1));
2754                         break;
2755
2756                 case XC( OC_UNARY ): {
2757                         double Ld, R_d;
2758
2759                         Ld = R_d = getvar_i(R.v);
2760                         switch (opn) {
2761                         case 'P':
2762                                 Ld = ++R_d;
2763                                 goto r_op_change;
2764                         case 'p':
2765                                 R_d++;
2766                                 goto r_op_change;
2767                         case 'M':
2768                                 Ld = --R_d;
2769                                 goto r_op_change;
2770                         case 'm':
2771                                 R_d--;
2772  r_op_change:
2773                                 setvar_i(R.v, R_d);
2774                                 break;
2775                         case '!':
2776                                 Ld = !istrue(R.v);
2777                                 break;
2778                         case '-':
2779                                 Ld = -R_d;
2780                                 break;
2781                         }
2782                         setvar_i(res, Ld);
2783                         break;
2784                 }
2785
2786                 case XC( OC_FIELD ): {
2787                         int i = (int)getvar_i(R.v);
2788                         if (i == 0) {
2789                                 res = intvar[F0];
2790                         } else {
2791                                 split_f0();
2792                                 if (i > nfields)
2793                                         fsrealloc(i);
2794                                 res = &Fields[i - 1];
2795                         }
2796                         break;
2797                 }
2798
2799                 /* concatenation (" ") and index joining (",") */
2800                 case XC( OC_CONCAT ):
2801                 case XC( OC_COMMA ): {
2802                         const char *sep = "";
2803                         if ((opinfo & OPCLSMASK) == OC_COMMA)
2804                                 sep = getvar_s(intvar[SUBSEP]);
2805                         setvar_p(res, xasprintf("%s%s%s", L.s, sep, R.s));
2806                         break;
2807                 }
2808
2809                 case XC( OC_LAND ):
2810                         setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2811                         break;
2812
2813                 case XC( OC_LOR ):
2814                         setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2815                         break;
2816
2817                 case XC( OC_BINARY ):
2818                 case XC( OC_REPLACE ): {
2819                         double R_d = getvar_i(R.v);
2820                         debug_printf_eval("BINARY/REPLACE: R_d:%f opn:%c\n", R_d, opn);
2821                         switch (opn) {
2822                         case '+':
2823                                 L_d += R_d;
2824                                 break;
2825                         case '-':
2826                                 L_d -= R_d;
2827                                 break;
2828                         case '*':
2829                                 L_d *= R_d;
2830                                 break;
2831                         case '/':
2832                                 if (R_d == 0)
2833                                         syntax_error(EMSG_DIV_BY_ZERO);
2834                                 L_d /= R_d;
2835                                 break;
2836                         case '&':
2837 #if ENABLE_FEATURE_AWK_LIBM
2838                                 L_d = pow(L_d, R_d);
2839 #else
2840                                 syntax_error(EMSG_NO_MATH);
2841 #endif
2842                                 break;
2843                         case '%':
2844                                 if (R_d == 0)
2845                                         syntax_error(EMSG_DIV_BY_ZERO);
2846                                 L_d -= (int)(L_d / R_d) * R_d;
2847                                 break;
2848                         }
2849                         debug_printf_eval("BINARY/REPLACE result:%f\n", L_d);
2850                         res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : L.v, L_d);
2851                         break;
2852                 }
2853
2854                 case XC( OC_COMPARE ): {
2855                         int i = i; /* for compiler */
2856                         double Ld;
2857
2858                         if (is_numeric(L.v) && is_numeric(R.v)) {
2859                                 Ld = getvar_i(L.v) - getvar_i(R.v);
2860                         } else {
2861                                 const char *l = getvar_s(L.v);
2862                                 const char *r = getvar_s(R.v);
2863                                 Ld = icase ? strcasecmp(l, r) : strcmp(l, r);
2864                         }
2865                         switch (opn & 0xfe) {
2866                         case 0:
2867                                 i = (Ld > 0);
2868                                 break;
2869                         case 2:
2870                                 i = (Ld >= 0);
2871                                 break;
2872                         case 4:
2873                                 i = (Ld == 0);
2874                                 break;
2875                         }
2876                         setvar_i(res, (i == 0) ^ (opn & 1));
2877                         break;
2878                 }
2879
2880                 default:
2881                         syntax_error(EMSG_POSSIBLE_ERROR);
2882                 }
2883                 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2884                         op = op->a.n;
2885                 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2886                         break;
2887                 if (nextrec)
2888                         break;
2889         } /* while (op) */
2890
2891         nvfree(v1);
2892         debug_printf_eval("returning from %s(): %p\n", __func__, res);
2893         return res;
2894 #undef fnargs
2895 #undef seed
2896 #undef sreg
2897 }
2898
2899
2900 /* -------- main & co. -------- */
2901
2902 static int awk_exit(int r)
2903 {
2904         var tv;
2905         unsigned i;
2906         hash_item *hi;
2907
2908         zero_out_var(&tv);
2909
2910         if (!exiting) {
2911                 exiting = TRUE;
2912                 nextrec = FALSE;
2913                 evaluate(endseq.first, &tv);
2914         }
2915
2916         /* waiting for children */
2917         for (i = 0; i < fdhash->csize; i++) {
2918                 hi = fdhash->items[i];
2919                 while (hi) {
2920                         if (hi->data.rs.F && hi->data.rs.is_pipe)
2921                                 pclose(hi->data.rs.F);
2922                         hi = hi->next;
2923                 }
2924         }
2925
2926         exit(r);
2927 }
2928
2929 /* if expr looks like "var=value", perform assignment and return 1,
2930  * otherwise return 0 */
2931 static int is_assignment(const char *expr)
2932 {
2933         char *exprc, *val, *s, *s1;
2934
2935         if (!isalnum_(*expr) || (val = strchr(expr, '=')) == NULL) {
2936                 return FALSE;
2937         }
2938
2939         exprc = xstrdup(expr);
2940         val = exprc + (val - expr);
2941         *val++ = '\0';
2942
2943         s = s1 = val;
2944         while ((*s1 = nextchar(&s)) != '\0')
2945                 s1++;
2946
2947         setvar_u(newvar(exprc), val);
2948         free(exprc);
2949         return TRUE;
2950 }
2951
2952 /* switch to next input file */
2953 static rstream *next_input_file(void)
2954 {
2955 #define rsm          (G.next_input_file__rsm)
2956 #define files_happen (G.next_input_file__files_happen)
2957
2958         FILE *F = NULL;
2959         const char *fname, *ind;
2960
2961         if (rsm.F)
2962                 fclose(rsm.F);
2963         rsm.F = NULL;
2964         rsm.pos = rsm.adv = 0;
2965
2966         do {
2967                 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
2968                         if (files_happen)
2969                                 return NULL;
2970                         fname = "-";
2971                         F = stdin;
2972                 } else {
2973                         ind = getvar_s(incvar(intvar[ARGIND]));
2974                         fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
2975                         if (fname && *fname && !is_assignment(fname))
2976                                 F = xfopen_stdin(fname);
2977                 }
2978         } while (!F);
2979
2980         files_happen = TRUE;
2981         setvar_s(intvar[FILENAME], fname);
2982         rsm.F = F;
2983         return &rsm;
2984 #undef rsm
2985 #undef files_happen
2986 }
2987
2988 int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
2989 int awk_main(int argc, char **argv)
2990 {
2991         unsigned opt;
2992         char *opt_F, *opt_W;
2993         llist_t *list_v = NULL;
2994         llist_t *list_f = NULL;
2995         int i, j;
2996         var *v;
2997         var tv;
2998         char **envp;
2999         char *vnames = (char *)vNames; /* cheat */
3000         char *vvalues = (char *)vValues;
3001
3002         INIT_G();
3003
3004         /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
3005          * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
3006         if (ENABLE_LOCALE_SUPPORT)
3007                 setlocale(LC_NUMERIC, "C");
3008
3009         zero_out_var(&tv);
3010
3011         /* allocate global buffer */
3012         g_buf = xmalloc(MAXVARFMT + 1);
3013
3014         vhash = hash_init();
3015         ahash = hash_init();
3016         fdhash = hash_init();
3017         fnhash = hash_init();
3018
3019         /* initialize variables */
3020         for (i = 0; *vnames; i++) {
3021                 intvar[i] = v = newvar(nextword(&vnames));
3022                 if (*vvalues != '\377')
3023                         setvar_s(v, nextword(&vvalues));
3024                 else
3025                         setvar_i(v, 0);
3026
3027                 if (*vnames == '*') {
3028                         v->type |= VF_SPECIAL;
3029                         vnames++;
3030                 }
3031         }
3032
3033         handle_special(intvar[FS]);
3034         handle_special(intvar[RS]);
3035
3036         newfile("/dev/stdin")->F = stdin;
3037         newfile("/dev/stdout")->F = stdout;
3038         newfile("/dev/stderr")->F = stderr;
3039
3040         /* Huh, people report that sometimes environ is NULL. Oh well. */
3041         if (environ) for (envp = environ; *envp; envp++) {
3042                 /* environ is writable, thus we don't strdup it needlessly */
3043                 char *s = *envp;
3044                 char *s1 = strchr(s, '=');
3045                 if (s1) {
3046                         *s1 = '\0';
3047                         /* Both findvar and setvar_u take const char*
3048                          * as 2nd arg -> environment is not trashed */
3049                         setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
3050                         *s1 = '=';
3051                 }
3052         }
3053         opt_complementary = "v::f::"; /* -v and -f can occur multiple times */
3054         opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, &opt_W);
3055         argv += optind;
3056         argc -= optind;
3057         if (opt & 0x1)
3058                 setvar_s(intvar[FS], opt_F); // -F
3059         while (list_v) { /* -v */
3060                 if (!is_assignment(llist_pop(&list_v)))
3061                         bb_show_usage();
3062         }
3063         if (list_f) { /* -f */
3064                 do {
3065                         char *s = NULL;
3066                         FILE *from_file;
3067
3068                         g_progname = llist_pop(&list_f);
3069                         from_file = xfopen_stdin(g_progname);
3070                         /* one byte is reserved for some trick in next_token */
3071                         for (i = j = 1; j > 0; i += j) {
3072                                 s = xrealloc(s, i + 4096);
3073                                 j = fread(s + i, 1, 4094, from_file);
3074                         }
3075                         s[i] = '\0';
3076                         fclose(from_file);
3077                         parse_program(s + 1);
3078                         free(s);
3079                 } while (list_f);
3080                 argc++;
3081         } else { // no -f: take program from 1st parameter
3082                 if (!argc)
3083                         bb_show_usage();
3084                 g_progname = "cmd. line";
3085                 parse_program(*argv++);
3086         }
3087         if (opt & 0x8) // -W
3088                 bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
3089
3090         /* fill in ARGV array */
3091         setvar_i(intvar[ARGC], argc);
3092         setari_u(intvar[ARGV], 0, "awk");
3093         i = 0;
3094         while (*argv)
3095                 setari_u(intvar[ARGV], ++i, *argv++);
3096
3097         evaluate(beginseq.first, &tv);
3098         if (!mainseq.first && !endseq.first)
3099                 awk_exit(EXIT_SUCCESS);
3100
3101         /* input file could already be opened in BEGIN block */
3102         if (!iF)
3103                 iF = next_input_file();
3104
3105         /* passing through input files */
3106         while (iF) {
3107                 nextfile = FALSE;
3108                 setvar_i(intvar[FNR], 0);
3109
3110                 while ((i = awk_getline(iF, intvar[F0])) > 0) {
3111                         nextrec = FALSE;
3112                         incvar(intvar[NR]);
3113                         incvar(intvar[FNR]);
3114                         evaluate(mainseq.first, &tv);
3115
3116                         if (nextfile)
3117                                 break;
3118                 }
3119
3120                 if (i < 0)
3121                         syntax_error(strerror(errno));
3122
3123                 iF = next_input_file();
3124         }
3125
3126         awk_exit(EXIT_SUCCESS);
3127         /*return 0;*/
3128 }