vi: fix reading of file after last line
[oweals/busybox.git] / editors / awk.c
1 /* vi: set sw=4 ts=4: */
2 /*
3  * awk implementation for busybox
4  *
5  * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
6  *
7  * Licensed under GPLv2 or later, see file LICENSE in this source tree.
8  */
9
10 //config:config AWK
11 //config:       bool "awk"
12 //config:       default y
13 //config:       help
14 //config:         Awk is used as a pattern scanning and processing language. This is
15 //config:         the BusyBox implementation of that programming language.
16 //config:
17 //config:config FEATURE_AWK_LIBM
18 //config:       bool "Enable math functions (requires libm)"
19 //config:       default y
20 //config:       depends on AWK
21 //config:       help
22 //config:         Enable math functions of the Awk programming language.
23 //config:         NOTE: This will require libm to be present for linking.
24 //config:
25 //config:config FEATURE_AWK_GNU_EXTENSIONS
26 //config:       bool "Enable a few GNU extensions"
27 //config:       default y
28 //config:       depends on AWK
29 //config:       help
30 //config:         Enable a few features from gawk:
31 //config:         * command line option -e AWK_PROGRAM
32 //config:         * simultaneous use of -f and -e on the command line.
33 //config:           This enables the use of awk library files.
34 //config:           Ex: awk -f mylib.awk -e '{print myfunction($1);}' ...
35
36 //applet:IF_AWK(APPLET_NOEXEC(awk, awk, BB_DIR_USR_BIN, BB_SUID_DROP, awk))
37
38 //kbuild:lib-$(CONFIG_AWK) += awk.o
39
40 //usage:#define awk_trivial_usage
41 //usage:       "[OPTIONS] [AWK_PROGRAM] [FILE]..."
42 //usage:#define awk_full_usage "\n\n"
43 //usage:       "        -v VAR=VAL      Set variable"
44 //usage:     "\n        -F SEP          Use SEP as field separator"
45 //usage:     "\n        -f FILE         Read program from FILE"
46 //usage:        IF_FEATURE_AWK_GNU_EXTENSIONS(
47 //usage:     "\n        -e AWK_PROGRAM"
48 //usage:        )
49
50 #include "libbb.h"
51 #include "xregex.h"
52 #include <math.h>
53
54 /* This is a NOEXEC applet. Be very careful! */
55
56
57 /* If you comment out one of these below, it will be #defined later
58  * to perform debug printfs to stderr: */
59 #define debug_printf_walker(...)  do {} while (0)
60 #define debug_printf_eval(...)  do {} while (0)
61 #define debug_printf_parse(...)  do {} while (0)
62
63 #ifndef debug_printf_walker
64 # define debug_printf_walker(...) (fprintf(stderr, __VA_ARGS__))
65 #endif
66 #ifndef debug_printf_eval
67 # define debug_printf_eval(...) (fprintf(stderr, __VA_ARGS__))
68 #endif
69 #ifndef debug_printf_parse
70 # define debug_printf_parse(...) (fprintf(stderr, __VA_ARGS__))
71 #endif
72
73
74 #define OPTSTR_AWK \
75         "F:v:f:" \
76         IF_FEATURE_AWK_GNU_EXTENSIONS("e:") \
77         "W:"
78 #define OPTCOMPLSTR_AWK \
79         "v::f::" \
80         IF_FEATURE_AWK_GNU_EXTENSIONS("e::")
81 enum {
82         OPTBIT_F,       /* define field separator */
83         OPTBIT_v,       /* define variable */
84         OPTBIT_f,       /* pull in awk program from file */
85         IF_FEATURE_AWK_GNU_EXTENSIONS(OPTBIT_e,) /* -e AWK_PROGRAM */
86         OPTBIT_W,       /* -W ignored */
87         OPT_F = 1 << OPTBIT_F,
88         OPT_v = 1 << OPTBIT_v,
89         OPT_f = 1 << OPTBIT_f,
90         OPT_e = IF_FEATURE_AWK_GNU_EXTENSIONS((1 << OPTBIT_e)) + 0,
91         OPT_W = 1 << OPTBIT_W
92 };
93
94 #define MAXVARFMT       240
95 #define MINNVBLOCK      64
96
97 /* variable flags */
98 #define VF_NUMBER       0x0001  /* 1 = primary type is number */
99 #define VF_ARRAY        0x0002  /* 1 = it's an array */
100
101 #define VF_CACHED       0x0100  /* 1 = num/str value has cached str/num eq */
102 #define VF_USER         0x0200  /* 1 = user input (may be numeric string) */
103 #define VF_SPECIAL      0x0400  /* 1 = requires extra handling when changed */
104 #define VF_WALK         0x0800  /* 1 = variable has alloc'd x.walker list */
105 #define VF_FSTR         0x1000  /* 1 = var::string points to fstring buffer */
106 #define VF_CHILD        0x2000  /* 1 = function arg; x.parent points to source */
107 #define VF_DIRTY        0x4000  /* 1 = variable was set explicitly */
108
109 /* these flags are static, don't change them when value is changed */
110 #define VF_DONTTOUCH    (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
111
112 typedef struct walker_list {
113         char *end;
114         char *cur;
115         struct walker_list *prev;
116         char wbuf[1];
117 } walker_list;
118
119 /* Variable */
120 typedef struct var_s {
121         unsigned type;            /* flags */
122         double number;
123         char *string;
124         union {
125                 int aidx;               /* func arg idx (for compilation stage) */
126                 struct xhash_s *array;  /* array ptr */
127                 struct var_s *parent;   /* for func args, ptr to actual parameter */
128                 walker_list *walker;    /* list of array elements (for..in) */
129         } x;
130 } var;
131
132 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
133 typedef struct chain_s {
134         struct node_s *first;
135         struct node_s *last;
136         const char *programname;
137 } chain;
138
139 /* Function */
140 typedef struct func_s {
141         unsigned nargs;
142         struct chain_s body;
143 } func;
144
145 /* I/O stream */
146 typedef struct rstream_s {
147         FILE *F;
148         char *buffer;
149         int adv;
150         int size;
151         int pos;
152         smallint is_pipe;
153 } rstream;
154
155 typedef struct hash_item_s {
156         union {
157                 struct var_s v;         /* variable/array hash */
158                 struct rstream_s rs;    /* redirect streams hash */
159                 struct func_s f;        /* functions hash */
160         } data;
161         struct hash_item_s *next;       /* next in chain */
162         char name[1];                   /* really it's longer */
163 } hash_item;
164
165 typedef struct xhash_s {
166         unsigned nel;           /* num of elements */
167         unsigned csize;         /* current hash size */
168         unsigned nprime;        /* next hash size in PRIMES[] */
169         unsigned glen;          /* summary length of item names */
170         struct hash_item_s **items;
171 } xhash;
172
173 /* Tree node */
174 typedef struct node_s {
175         uint32_t info;
176         unsigned lineno;
177         union {
178                 struct node_s *n;
179                 var *v;
180                 int aidx;
181                 char *new_progname;
182                 regex_t *re;
183         } l;
184         union {
185                 struct node_s *n;
186                 regex_t *ire;
187                 func *f;
188         } r;
189         union {
190                 struct node_s *n;
191         } a;
192 } node;
193
194 /* Block of temporary variables */
195 typedef struct nvblock_s {
196         int size;
197         var *pos;
198         struct nvblock_s *prev;
199         struct nvblock_s *next;
200         var nv[];
201 } nvblock;
202
203 typedef struct tsplitter_s {
204         node n;
205         regex_t re[2];
206 } tsplitter;
207
208 /* simple token classes */
209 /* Order and hex values are very important!!!  See next_token() */
210 #define TC_SEQSTART     1                       /* ( */
211 #define TC_SEQTERM      (1 << 1)                /* ) */
212 #define TC_REGEXP       (1 << 2)                /* /.../ */
213 #define TC_OUTRDR       (1 << 3)                /* | > >> */
214 #define TC_UOPPOST      (1 << 4)                /* unary postfix operator */
215 #define TC_UOPPRE1      (1 << 5)                /* unary prefix operator */
216 #define TC_BINOPX       (1 << 6)                /* two-opnd operator */
217 #define TC_IN           (1 << 7)
218 #define TC_COMMA        (1 << 8)
219 #define TC_PIPE         (1 << 9)                /* input redirection pipe */
220 #define TC_UOPPRE2      (1 << 10)               /* unary prefix operator */
221 #define TC_ARRTERM      (1 << 11)               /* ] */
222 #define TC_GRPSTART     (1 << 12)               /* { */
223 #define TC_GRPTERM      (1 << 13)               /* } */
224 #define TC_SEMICOL      (1 << 14)
225 #define TC_NEWLINE      (1 << 15)
226 #define TC_STATX        (1 << 16)               /* ctl statement (for, next...) */
227 #define TC_WHILE        (1 << 17)
228 #define TC_ELSE         (1 << 18)
229 #define TC_BUILTIN      (1 << 19)
230 #define TC_GETLINE      (1 << 20)
231 #define TC_FUNCDECL     (1 << 21)               /* `function' `func' */
232 #define TC_BEGIN        (1 << 22)
233 #define TC_END          (1 << 23)
234 #define TC_EOF          (1 << 24)
235 #define TC_VARIABLE     (1 << 25)
236 #define TC_ARRAY        (1 << 26)
237 #define TC_FUNCTION     (1 << 27)
238 #define TC_STRING       (1 << 28)
239 #define TC_NUMBER       (1 << 29)
240
241 #define TC_UOPPRE  (TC_UOPPRE1 | TC_UOPPRE2)
242
243 /* combined token classes */
244 #define TC_BINOP   (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
245 //#define       TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
246 #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
247                    | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
248
249 #define TC_STATEMNT (TC_STATX | TC_WHILE)
250 #define TC_OPTERM  (TC_SEMICOL | TC_NEWLINE)
251
252 /* word tokens, cannot mean something else if not expected */
253 #define TC_WORD    (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
254                    | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
255
256 /* discard newlines after these */
257 #define TC_NOTERM  (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
258                    | TC_BINOP | TC_OPTERM)
259
260 /* what can expression begin with */
261 #define TC_OPSEQ   (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
262 /* what can group begin with */
263 #define TC_GRPSEQ  (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
264
265 /* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
266 /* operator is inserted between them */
267 #define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
268                    | TC_STRING | TC_NUMBER | TC_UOPPOST)
269 #define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
270
271 #define OF_RES1    0x010000
272 #define OF_RES2    0x020000
273 #define OF_STR1    0x040000
274 #define OF_STR2    0x080000
275 #define OF_NUM1    0x100000
276 #define OF_CHECKED 0x200000
277
278 /* combined operator flags */
279 #define xx      0
280 #define xV      OF_RES2
281 #define xS      (OF_RES2 | OF_STR2)
282 #define Vx      OF_RES1
283 #define VV      (OF_RES1 | OF_RES2)
284 #define Nx      (OF_RES1 | OF_NUM1)
285 #define NV      (OF_RES1 | OF_NUM1 | OF_RES2)
286 #define Sx      (OF_RES1 | OF_STR1)
287 #define SV      (OF_RES1 | OF_STR1 | OF_RES2)
288 #define SS      (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
289
290 #define OPCLSMASK 0xFF00
291 #define OPNMASK   0x007F
292
293 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
294  * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
295  * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
296  */
297 #undef P
298 #undef PRIMASK
299 #undef PRIMASK2
300 #define P(x)      (x << 24)
301 #define PRIMASK   0x7F000000
302 #define PRIMASK2  0x7E000000
303
304 /* Operation classes */
305
306 #define SHIFT_TIL_THIS  0x0600
307 #define RECUR_FROM_THIS 0x1000
308
309 enum {
310         OC_DELETE = 0x0100,     OC_EXEC = 0x0200,       OC_NEWSOURCE = 0x0300,
311         OC_PRINT = 0x0400,      OC_PRINTF = 0x0500,     OC_WALKINIT = 0x0600,
312
313         OC_BR = 0x0700,         OC_BREAK = 0x0800,      OC_CONTINUE = 0x0900,
314         OC_EXIT = 0x0a00,       OC_NEXT = 0x0b00,       OC_NEXTFILE = 0x0c00,
315         OC_TEST = 0x0d00,       OC_WALKNEXT = 0x0e00,
316
317         OC_BINARY = 0x1000,     OC_BUILTIN = 0x1100,    OC_COLON = 0x1200,
318         OC_COMMA = 0x1300,      OC_COMPARE = 0x1400,    OC_CONCAT = 0x1500,
319         OC_FBLTIN = 0x1600,     OC_FIELD = 0x1700,      OC_FNARG = 0x1800,
320         OC_FUNC = 0x1900,       OC_GETLINE = 0x1a00,    OC_IN = 0x1b00,
321         OC_LAND = 0x1c00,       OC_LOR = 0x1d00,        OC_MATCH = 0x1e00,
322         OC_MOVE = 0x1f00,       OC_PGETLINE = 0x2000,   OC_REGEXP = 0x2100,
323         OC_REPLACE = 0x2200,    OC_RETURN = 0x2300,     OC_SPRINTF = 0x2400,
324         OC_TERNARY = 0x2500,    OC_UNARY = 0x2600,      OC_VAR = 0x2700,
325         OC_DONE = 0x2800,
326
327         ST_IF = 0x3000,         ST_DO = 0x3100,         ST_FOR = 0x3200,
328         ST_WHILE = 0x3300
329 };
330
331 /* simple builtins */
332 enum {
333         F_in,   F_rn,   F_co,   F_ex,   F_lg,   F_si,   F_sq,   F_sr,
334         F_ti,   F_le,   F_sy,   F_ff,   F_cl
335 };
336
337 /* builtins */
338 enum {
339         B_a2,   B_ix,   B_ma,   B_sp,   B_ss,   B_ti,   B_mt,   B_lo,   B_up,
340         B_ge,   B_gs,   B_su,
341         B_an,   B_co,   B_ls,   B_or,   B_rs,   B_xo,
342 };
343
344 /* tokens and their corresponding info values */
345
346 #define NTC     "\377"  /* switch to next token class (tc<<1) */
347 #define NTCC    '\377'
348
349 #define OC_B  OC_BUILTIN
350
351 static const char tokenlist[] ALIGN1 =
352         "\1("         NTC
353         "\1)"         NTC
354         "\1/"         NTC                                   /* REGEXP */
355         "\2>>"        "\1>"         "\1|"       NTC         /* OUTRDR */
356         "\2++"        "\2--"        NTC                     /* UOPPOST */
357         "\2++"        "\2--"        "\1$"       NTC         /* UOPPRE1 */
358         "\2=="        "\1="         "\2+="      "\2-="      /* BINOPX */
359         "\2*="        "\2/="        "\2%="      "\2^="
360         "\1+"         "\1-"         "\3**="     "\2**"
361         "\1/"         "\1%"         "\1^"       "\1*"
362         "\2!="        "\2>="        "\2<="      "\1>"
363         "\1<"         "\2!~"        "\1~"       "\2&&"
364         "\2||"        "\1?"         "\1:"       NTC
365         "\2in"        NTC
366         "\1,"         NTC
367         "\1|"         NTC
368         "\1+"         "\1-"         "\1!"       NTC         /* UOPPRE2 */
369         "\1]"         NTC
370         "\1{"         NTC
371         "\1}"         NTC
372         "\1;"         NTC
373         "\1\n"        NTC
374         "\2if"        "\2do"        "\3for"     "\5break"   /* STATX */
375         "\10continue" "\6delete"    "\5print"
376         "\6printf"    "\4next"      "\10nextfile"
377         "\6return"    "\4exit"      NTC
378         "\5while"     NTC
379         "\4else"      NTC
380
381         "\3and"       "\5compl"     "\6lshift"  "\2or"
382         "\6rshift"    "\3xor"
383         "\5close"     "\6system"    "\6fflush"  "\5atan2"   /* BUILTIN */
384         "\3cos"       "\3exp"       "\3int"     "\3log"
385         "\4rand"      "\3sin"       "\4sqrt"    "\5srand"
386         "\6gensub"    "\4gsub"      "\5index"   "\6length"
387         "\5match"     "\5split"     "\7sprintf" "\3sub"
388         "\6substr"    "\7systime"   "\10strftime" "\6mktime"
389         "\7tolower"   "\7toupper"   NTC
390         "\7getline"   NTC
391         "\4func"      "\10function" NTC
392         "\5BEGIN"     NTC
393         "\3END"
394         /* compiler adds trailing "\0" */
395         ;
396
397 static const uint32_t tokeninfo[] = {
398         0,
399         0,
400         OC_REGEXP,
401         xS|'a',                  xS|'w',                  xS|'|',
402         OC_UNARY|xV|P(9)|'p',    OC_UNARY|xV|P(9)|'m',
403         OC_UNARY|xV|P(9)|'P',    OC_UNARY|xV|P(9)|'M',    OC_FIELD|xV|P(5),
404         OC_COMPARE|VV|P(39)|5,   OC_MOVE|VV|P(74),        OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
405         OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/', OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
406         OC_BINARY|NV|P(29)|'+',  OC_BINARY|NV|P(29)|'-',  OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
407         OC_BINARY|NV|P(25)|'/',  OC_BINARY|NV|P(25)|'%',  OC_BINARY|NV|P(15)|'&',  OC_BINARY|NV|P(25)|'*',
408         OC_COMPARE|VV|P(39)|4,   OC_COMPARE|VV|P(39)|3,   OC_COMPARE|VV|P(39)|0,   OC_COMPARE|VV|P(39)|1,
409         OC_COMPARE|VV|P(39)|2,   OC_MATCH|Sx|P(45)|'!',   OC_MATCH|Sx|P(45)|'~',   OC_LAND|Vx|P(55),
410         OC_LOR|Vx|P(59),         OC_TERNARY|Vx|P(64)|'?', OC_COLON|xx|P(67)|':',
411         OC_IN|SV|P(49), /* in */
412         OC_COMMA|SS|P(80),
413         OC_PGETLINE|SV|P(37),
414         OC_UNARY|xV|P(19)|'+',   OC_UNARY|xV|P(19)|'-',   OC_UNARY|xV|P(19)|'!',
415         0, /* ] */
416         0,
417         0,
418         0,
419         0, /* \n */
420         ST_IF,        ST_DO,        ST_FOR,      OC_BREAK,
421         OC_CONTINUE,  OC_DELETE|Vx, OC_PRINT,
422         OC_PRINTF,    OC_NEXT,      OC_NEXTFILE,
423         OC_RETURN|Vx, OC_EXIT|Nx,
424         ST_WHILE,
425         0, /* else */
426
427         OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
428         OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
429         OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
430         OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
431         OC_FBLTIN|F_rn,    OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
432         OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
433         OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF,        OC_B|B_su|P(0xb6),
434         OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti,    OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b),
435         OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
436         OC_GETLINE|SV|P(0),
437         0,                 0,
438         0,
439         0 /* END */
440 };
441
442 /* internal variable names and their initial values       */
443 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
444 enum {
445         CONVFMT,    OFMT,       FS,         OFS,
446         ORS,        RS,         RT,         FILENAME,
447         SUBSEP,     F0,         ARGIND,     ARGC,
448         ARGV,       ERRNO,      FNR,        NR,
449         NF,         IGNORECASE, ENVIRON,    NUM_INTERNAL_VARS
450 };
451
452 static const char vNames[] ALIGN1 =
453         "CONVFMT\0" "OFMT\0"    "FS\0*"     "OFS\0"
454         "ORS\0"     "RS\0*"     "RT\0"      "FILENAME\0"
455         "SUBSEP\0"  "$\0*"      "ARGIND\0"  "ARGC\0"
456         "ARGV\0"    "ERRNO\0"   "FNR\0"     "NR\0"
457         "NF\0*"     "IGNORECASE\0*" "ENVIRON\0" "\0";
458
459 static const char vValues[] ALIGN1 =
460         "%.6g\0"    "%.6g\0"    " \0"       " \0"
461         "\n\0"      "\n\0"      "\0"        "\0"
462         "\034\0"    "\0"        "\377";
463
464 /* hash size may grow to these values */
465 #define FIRST_PRIME 61
466 static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
467
468
469 /* Globals. Split in two parts so that first one is addressed
470  * with (mostly short) negative offsets.
471  * NB: it's unsafe to put members of type "double"
472  * into globals2 (gcc may fail to align them).
473  */
474 struct globals {
475         double t_double;
476         chain beginseq, mainseq, endseq;
477         chain *seq;
478         node *break_ptr, *continue_ptr;
479         rstream *iF;
480         xhash *vhash, *ahash, *fdhash, *fnhash;
481         const char *g_progname;
482         int g_lineno;
483         int nfields;
484         int maxfields; /* used in fsrealloc() only */
485         var *Fields;
486         nvblock *g_cb;
487         char *g_pos;
488         char *g_buf;
489         smallint icase;
490         smallint exiting;
491         smallint nextrec;
492         smallint nextfile;
493         smallint is_f0_split;
494         smallint t_rollback;
495 };
496 struct globals2 {
497         uint32_t t_info; /* often used */
498         uint32_t t_tclass;
499         char *t_string;
500         int t_lineno;
501
502         var *intvar[NUM_INTERNAL_VARS]; /* often used */
503
504         /* former statics from various functions */
505         char *split_f0__fstrings;
506
507         uint32_t next_token__save_tclass;
508         uint32_t next_token__save_info;
509         uint32_t next_token__ltclass;
510         smallint next_token__concat_inserted;
511
512         smallint next_input_file__files_happen;
513         rstream next_input_file__rsm;
514
515         var *evaluate__fnargs;
516         unsigned evaluate__seed;
517         regex_t evaluate__sreg;
518
519         var ptest__v;
520
521         tsplitter exec_builtin__tspl;
522
523         /* biggest and least used members go last */
524         tsplitter fsplitter, rsplitter;
525 };
526 #define G1 (ptr_to_globals[-1])
527 #define G (*(struct globals2 *)ptr_to_globals)
528 /* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
529 /*char G1size[sizeof(G1)]; - 0x74 */
530 /*char Gsize[sizeof(G)]; - 0x1c4 */
531 /* Trying to keep most of members accessible with short offsets: */
532 /*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
533 #define t_double     (G1.t_double    )
534 #define beginseq     (G1.beginseq    )
535 #define mainseq      (G1.mainseq     )
536 #define endseq       (G1.endseq      )
537 #define seq          (G1.seq         )
538 #define break_ptr    (G1.break_ptr   )
539 #define continue_ptr (G1.continue_ptr)
540 #define iF           (G1.iF          )
541 #define vhash        (G1.vhash       )
542 #define ahash        (G1.ahash       )
543 #define fdhash       (G1.fdhash      )
544 #define fnhash       (G1.fnhash      )
545 #define g_progname   (G1.g_progname  )
546 #define g_lineno     (G1.g_lineno    )
547 #define nfields      (G1.nfields     )
548 #define maxfields    (G1.maxfields   )
549 #define Fields       (G1.Fields      )
550 #define g_cb         (G1.g_cb        )
551 #define g_pos        (G1.g_pos       )
552 #define g_buf        (G1.g_buf       )
553 #define icase        (G1.icase       )
554 #define exiting      (G1.exiting     )
555 #define nextrec      (G1.nextrec     )
556 #define nextfile     (G1.nextfile    )
557 #define is_f0_split  (G1.is_f0_split )
558 #define t_rollback   (G1.t_rollback  )
559 #define t_info       (G.t_info      )
560 #define t_tclass     (G.t_tclass    )
561 #define t_string     (G.t_string    )
562 #define t_lineno     (G.t_lineno    )
563 #define intvar       (G.intvar      )
564 #define fsplitter    (G.fsplitter   )
565 #define rsplitter    (G.rsplitter   )
566 #define INIT_G() do { \
567         SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
568         G.next_token__ltclass = TC_OPTERM; \
569         G.evaluate__seed = 1; \
570 } while (0)
571
572
573 /* function prototypes */
574 static void handle_special(var *);
575 static node *parse_expr(uint32_t);
576 static void chain_group(void);
577 static var *evaluate(node *, var *);
578 static rstream *next_input_file(void);
579 static int fmt_num(char *, int, const char *, double, int);
580 static int awk_exit(int) NORETURN;
581
582 /* ---- error handling ---- */
583
584 static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
585 static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
586 static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
587 static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
588 static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
589 static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin";
590 static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
591 static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
592 static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
593 static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
594
595 static void zero_out_var(var *vp)
596 {
597         memset(vp, 0, sizeof(*vp));
598 }
599
600 static void syntax_error(const char *message) NORETURN;
601 static void syntax_error(const char *message)
602 {
603         bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
604 }
605
606 /* ---- hash stuff ---- */
607
608 static unsigned hashidx(const char *name)
609 {
610         unsigned idx = 0;
611
612         while (*name)
613                 idx = *name++ + (idx << 6) - idx;
614         return idx;
615 }
616
617 /* create new hash */
618 static xhash *hash_init(void)
619 {
620         xhash *newhash;
621
622         newhash = xzalloc(sizeof(*newhash));
623         newhash->csize = FIRST_PRIME;
624         newhash->items = xzalloc(FIRST_PRIME * sizeof(newhash->items[0]));
625
626         return newhash;
627 }
628
629 /* find item in hash, return ptr to data, NULL if not found */
630 static void *hash_search(xhash *hash, const char *name)
631 {
632         hash_item *hi;
633
634         hi = hash->items[hashidx(name) % hash->csize];
635         while (hi) {
636                 if (strcmp(hi->name, name) == 0)
637                         return &hi->data;
638                 hi = hi->next;
639         }
640         return NULL;
641 }
642
643 /* grow hash if it becomes too big */
644 static void hash_rebuild(xhash *hash)
645 {
646         unsigned newsize, i, idx;
647         hash_item **newitems, *hi, *thi;
648
649         if (hash->nprime == ARRAY_SIZE(PRIMES))
650                 return;
651
652         newsize = PRIMES[hash->nprime++];
653         newitems = xzalloc(newsize * sizeof(newitems[0]));
654
655         for (i = 0; i < hash->csize; i++) {
656                 hi = hash->items[i];
657                 while (hi) {
658                         thi = hi;
659                         hi = thi->next;
660                         idx = hashidx(thi->name) % newsize;
661                         thi->next = newitems[idx];
662                         newitems[idx] = thi;
663                 }
664         }
665
666         free(hash->items);
667         hash->csize = newsize;
668         hash->items = newitems;
669 }
670
671 /* find item in hash, add it if necessary. Return ptr to data */
672 static void *hash_find(xhash *hash, const char *name)
673 {
674         hash_item *hi;
675         unsigned idx;
676         int l;
677
678         hi = hash_search(hash, name);
679         if (!hi) {
680                 if (++hash->nel / hash->csize > 10)
681                         hash_rebuild(hash);
682
683                 l = strlen(name) + 1;
684                 hi = xzalloc(sizeof(*hi) + l);
685                 strcpy(hi->name, name);
686
687                 idx = hashidx(name) % hash->csize;
688                 hi->next = hash->items[idx];
689                 hash->items[idx] = hi;
690                 hash->glen += l;
691         }
692         return &hi->data;
693 }
694
695 #define findvar(hash, name) ((var*)    hash_find((hash), (name)))
696 #define newvar(name)        ((var*)    hash_find(vhash, (name)))
697 #define newfile(name)       ((rstream*)hash_find(fdhash, (name)))
698 #define newfunc(name)       ((func*)   hash_find(fnhash, (name)))
699
700 static void hash_remove(xhash *hash, const char *name)
701 {
702         hash_item *hi, **phi;
703
704         phi = &hash->items[hashidx(name) % hash->csize];
705         while (*phi) {
706                 hi = *phi;
707                 if (strcmp(hi->name, name) == 0) {
708                         hash->glen -= (strlen(name) + 1);
709                         hash->nel--;
710                         *phi = hi->next;
711                         free(hi);
712                         break;
713                 }
714                 phi = &hi->next;
715         }
716 }
717
718 /* ------ some useful functions ------ */
719
720 static char *skip_spaces(char *p)
721 {
722         while (1) {
723                 if (*p == '\\' && p[1] == '\n') {
724                         p++;
725                         t_lineno++;
726                 } else if (*p != ' ' && *p != '\t') {
727                         break;
728                 }
729                 p++;
730         }
731         return p;
732 }
733
734 /* returns old *s, advances *s past word and terminating NUL */
735 static char *nextword(char **s)
736 {
737         char *p = *s;
738         while (*(*s)++ != '\0')
739                 continue;
740         return p;
741 }
742
743 static char nextchar(char **s)
744 {
745         char c, *pps;
746
747         c = *(*s)++;
748         pps = *s;
749         if (c == '\\')
750                 c = bb_process_escape_sequence((const char**)s);
751         /* Example awk statement:
752          * s = "abc\"def"
753          * we must treat \" as "
754          */
755         if (c == '\\' && *s == pps) { /* unrecognized \z? */
756                 c = *(*s); /* yes, fetch z */
757                 if (c)
758                         (*s)++; /* advance unless z = NUL */
759         }
760         return c;
761 }
762
763 /* TODO: merge with strcpy_and_process_escape_sequences()?
764  */
765 static void unescape_string_in_place(char *s1)
766 {
767         char *s = s1;
768         while ((*s1 = nextchar(&s)) != '\0')
769                 s1++;
770 }
771
772 static ALWAYS_INLINE int isalnum_(int c)
773 {
774         return (isalnum(c) || c == '_');
775 }
776
777 static double my_strtod(char **pp)
778 {
779         char *cp = *pp;
780         if (ENABLE_DESKTOP && cp[0] == '0') {
781                 /* Might be hex or octal integer: 0x123abc or 07777 */
782                 char c = (cp[1] | 0x20);
783                 if (c == 'x' || isdigit(cp[1])) {
784                         unsigned long long ull = strtoull(cp, pp, 0);
785                         if (c == 'x')
786                                 return ull;
787                         c = **pp;
788                         if (!isdigit(c) && c != '.')
789                                 return ull;
790                         /* else: it may be a floating number. Examples:
791                          * 009.123 (*pp points to '9')
792                          * 000.123 (*pp points to '.')
793                          * fall through to strtod.
794                          */
795                 }
796         }
797         return strtod(cp, pp);
798 }
799
800 /* -------- working with variables (set/get/copy/etc) -------- */
801
802 static xhash *iamarray(var *v)
803 {
804         var *a = v;
805
806         while (a->type & VF_CHILD)
807                 a = a->x.parent;
808
809         if (!(a->type & VF_ARRAY)) {
810                 a->type |= VF_ARRAY;
811                 a->x.array = hash_init();
812         }
813         return a->x.array;
814 }
815
816 static void clear_array(xhash *array)
817 {
818         unsigned i;
819         hash_item *hi, *thi;
820
821         for (i = 0; i < array->csize; i++) {
822                 hi = array->items[i];
823                 while (hi) {
824                         thi = hi;
825                         hi = hi->next;
826                         free(thi->data.v.string);
827                         free(thi);
828                 }
829                 array->items[i] = NULL;
830         }
831         array->glen = array->nel = 0;
832 }
833
834 /* clear a variable */
835 static var *clrvar(var *v)
836 {
837         if (!(v->type & VF_FSTR))
838                 free(v->string);
839
840         v->type &= VF_DONTTOUCH;
841         v->type |= VF_DIRTY;
842         v->string = NULL;
843         return v;
844 }
845
846 /* assign string value to variable */
847 static var *setvar_p(var *v, char *value)
848 {
849         clrvar(v);
850         v->string = value;
851         handle_special(v);
852         return v;
853 }
854
855 /* same as setvar_p but make a copy of string */
856 static var *setvar_s(var *v, const char *value)
857 {
858         return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
859 }
860
861 /* same as setvar_s but sets USER flag */
862 static var *setvar_u(var *v, const char *value)
863 {
864         v = setvar_s(v, value);
865         v->type |= VF_USER;
866         return v;
867 }
868
869 /* set array element to user string */
870 static void setari_u(var *a, int idx, const char *s)
871 {
872         var *v;
873
874         v = findvar(iamarray(a), itoa(idx));
875         setvar_u(v, s);
876 }
877
878 /* assign numeric value to variable */
879 static var *setvar_i(var *v, double value)
880 {
881         clrvar(v);
882         v->type |= VF_NUMBER;
883         v->number = value;
884         handle_special(v);
885         return v;
886 }
887
888 static const char *getvar_s(var *v)
889 {
890         /* if v is numeric and has no cached string, convert it to string */
891         if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
892                 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
893                 v->string = xstrdup(g_buf);
894                 v->type |= VF_CACHED;
895         }
896         return (v->string == NULL) ? "" : v->string;
897 }
898
899 static double getvar_i(var *v)
900 {
901         char *s;
902
903         if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
904                 v->number = 0;
905                 s = v->string;
906                 if (s && *s) {
907                         debug_printf_eval("getvar_i: '%s'->", s);
908                         v->number = my_strtod(&s);
909                         debug_printf_eval("%f (s:'%s')\n", v->number, s);
910                         if (v->type & VF_USER) {
911                                 s = skip_spaces(s);
912                                 if (*s != '\0')
913                                         v->type &= ~VF_USER;
914                         }
915                 } else {
916                         debug_printf_eval("getvar_i: '%s'->zero\n", s);
917                         v->type &= ~VF_USER;
918                 }
919                 v->type |= VF_CACHED;
920         }
921         debug_printf_eval("getvar_i: %f\n", v->number);
922         return v->number;
923 }
924
925 /* Used for operands of bitwise ops */
926 static unsigned long getvar_i_int(var *v)
927 {
928         double d = getvar_i(v);
929
930         /* Casting doubles to longs is undefined for values outside
931          * of target type range. Try to widen it as much as possible */
932         if (d >= 0)
933                 return (unsigned long)d;
934         /* Why? Think about d == -4294967295.0 (assuming 32bit longs) */
935         return - (long) (unsigned long) (-d);
936 }
937
938 static var *copyvar(var *dest, const var *src)
939 {
940         if (dest != src) {
941                 clrvar(dest);
942                 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
943                 debug_printf_eval("copyvar: number:%f string:'%s'\n", src->number, src->string);
944                 dest->number = src->number;
945                 if (src->string)
946                         dest->string = xstrdup(src->string);
947         }
948         handle_special(dest);
949         return dest;
950 }
951
952 static var *incvar(var *v)
953 {
954         return setvar_i(v, getvar_i(v) + 1.0);
955 }
956
957 /* return true if v is number or numeric string */
958 static int is_numeric(var *v)
959 {
960         getvar_i(v);
961         return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
962 }
963
964 /* return 1 when value of v corresponds to true, 0 otherwise */
965 static int istrue(var *v)
966 {
967         if (is_numeric(v))
968                 return (v->number != 0);
969         return (v->string && v->string[0]);
970 }
971
972 /* temporary variables allocator. Last allocated should be first freed */
973 static var *nvalloc(int n)
974 {
975         nvblock *pb = NULL;
976         var *v, *r;
977         int size;
978
979         while (g_cb) {
980                 pb = g_cb;
981                 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size)
982                         break;
983                 g_cb = g_cb->next;
984         }
985
986         if (!g_cb) {
987                 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
988                 g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
989                 g_cb->size = size;
990                 g_cb->pos = g_cb->nv;
991                 g_cb->prev = pb;
992                 /*g_cb->next = NULL; - xzalloc did it */
993                 if (pb)
994                         pb->next = g_cb;
995         }
996
997         v = r = g_cb->pos;
998         g_cb->pos += n;
999
1000         while (v < g_cb->pos) {
1001                 v->type = 0;
1002                 v->string = NULL;
1003                 v++;
1004         }
1005
1006         return r;
1007 }
1008
1009 static void nvfree(var *v)
1010 {
1011         var *p;
1012
1013         if (v < g_cb->nv || v >= g_cb->pos)
1014                 syntax_error(EMSG_INTERNAL_ERROR);
1015
1016         for (p = v; p < g_cb->pos; p++) {
1017                 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
1018                         clear_array(iamarray(p));
1019                         free(p->x.array->items);
1020                         free(p->x.array);
1021                 }
1022                 if (p->type & VF_WALK) {
1023                         walker_list *n;
1024                         walker_list *w = p->x.walker;
1025                         debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker);
1026                         p->x.walker = NULL;
1027                         while (w) {
1028                                 n = w->prev;
1029                                 debug_printf_walker(" free(%p)\n", w);
1030                                 free(w);
1031                                 w = n;
1032                         }
1033                 }
1034                 clrvar(p);
1035         }
1036
1037         g_cb->pos = v;
1038         while (g_cb->prev && g_cb->pos == g_cb->nv) {
1039                 g_cb = g_cb->prev;
1040         }
1041 }
1042
1043 /* ------- awk program text parsing ------- */
1044
1045 /* Parse next token pointed by global pos, place results into global ttt.
1046  * If token isn't expected, give away. Return token class
1047  */
1048 static uint32_t next_token(uint32_t expected)
1049 {
1050 #define concat_inserted (G.next_token__concat_inserted)
1051 #define save_tclass     (G.next_token__save_tclass)
1052 #define save_info       (G.next_token__save_info)
1053 /* Initialized to TC_OPTERM: */
1054 #define ltclass         (G.next_token__ltclass)
1055
1056         char *p, *s;
1057         const char *tl;
1058         uint32_t tc;
1059         const uint32_t *ti;
1060
1061         if (t_rollback) {
1062                 t_rollback = FALSE;
1063
1064         } else if (concat_inserted) {
1065                 concat_inserted = FALSE;
1066                 t_tclass = save_tclass;
1067                 t_info = save_info;
1068
1069         } else {
1070                 p = g_pos;
1071  readnext:
1072                 p = skip_spaces(p);
1073                 g_lineno = t_lineno;
1074                 if (*p == '#')
1075                         while (*p != '\n' && *p != '\0')
1076                                 p++;
1077
1078                 if (*p == '\n')
1079                         t_lineno++;
1080
1081                 if (*p == '\0') {
1082                         tc = TC_EOF;
1083                         debug_printf_parse("%s: token found: TC_EOF\n", __func__);
1084
1085                 } else if (*p == '\"') {
1086                         /* it's a string */
1087                         t_string = s = ++p;
1088                         while (*p != '\"') {
1089                                 char *pp;
1090                                 if (*p == '\0' || *p == '\n')
1091                                         syntax_error(EMSG_UNEXP_EOS);
1092                                 pp = p;
1093                                 *s++ = nextchar(&pp);
1094                                 p = pp;
1095                         }
1096                         p++;
1097                         *s = '\0';
1098                         tc = TC_STRING;
1099                         debug_printf_parse("%s: token found:'%s' TC_STRING\n", __func__, t_string);
1100
1101                 } else if ((expected & TC_REGEXP) && *p == '/') {
1102                         /* it's regexp */
1103                         t_string = s = ++p;
1104                         while (*p != '/') {
1105                                 if (*p == '\0' || *p == '\n')
1106                                         syntax_error(EMSG_UNEXP_EOS);
1107                                 *s = *p++;
1108                                 if (*s++ == '\\') {
1109                                         char *pp = p;
1110                                         s[-1] = bb_process_escape_sequence((const char **)&pp);
1111                                         if (*p == '\\')
1112                                                 *s++ = '\\';
1113                                         if (pp == p)
1114                                                 *s++ = *p++;
1115                                         else
1116                                                 p = pp;
1117                                 }
1118                         }
1119                         p++;
1120                         *s = '\0';
1121                         tc = TC_REGEXP;
1122                         debug_printf_parse("%s: token found:'%s' TC_REGEXP\n", __func__, t_string);
1123
1124                 } else if (*p == '.' || isdigit(*p)) {
1125                         /* it's a number */
1126                         char *pp = p;
1127                         t_double = my_strtod(&pp);
1128                         p = pp;
1129                         if (*p == '.')
1130                                 syntax_error(EMSG_UNEXP_TOKEN);
1131                         tc = TC_NUMBER;
1132                         debug_printf_parse("%s: token found:%f TC_NUMBER\n", __func__, t_double);
1133
1134                 } else {
1135                         /* search for something known */
1136                         tl = tokenlist;
1137                         tc = 0x00000001;
1138                         ti = tokeninfo;
1139                         while (*tl) {
1140                                 int l = (unsigned char) *tl++;
1141                                 if (l == (unsigned char) NTCC) {
1142                                         tc <<= 1;
1143                                         continue;
1144                                 }
1145                                 /* if token class is expected,
1146                                  * token matches,
1147                                  * and it's not a longer word,
1148                                  */
1149                                 if ((tc & (expected | TC_WORD | TC_NEWLINE))
1150                                  && strncmp(p, tl, l) == 0
1151                                  && !((tc & TC_WORD) && isalnum_(p[l]))
1152                                 ) {
1153                                         /* then this is what we are looking for */
1154                                         t_info = *ti;
1155                                         debug_printf_parse("%s: token found:'%.*s' t_info:%x\n", __func__, l, p, t_info);
1156                                         p += l;
1157                                         goto token_found;
1158                                 }
1159                                 ti++;
1160                                 tl += l;
1161                         }
1162                         /* not a known token */
1163
1164                         /* is it a name? (var/array/function) */
1165                         if (!isalnum_(*p))
1166                                 syntax_error(EMSG_UNEXP_TOKEN); /* no */
1167                         /* yes */
1168                         t_string = --p;
1169                         while (isalnum_(*++p)) {
1170                                 p[-1] = *p;
1171                         }
1172                         p[-1] = '\0';
1173                         tc = TC_VARIABLE;
1174                         /* also consume whitespace between functionname and bracket */
1175                         if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY))
1176                                 p = skip_spaces(p);
1177                         if (*p == '(') {
1178                                 tc = TC_FUNCTION;
1179                                 debug_printf_parse("%s: token found:'%s' TC_FUNCTION\n", __func__, t_string);
1180                         } else {
1181                                 if (*p == '[') {
1182                                         p++;
1183                                         tc = TC_ARRAY;
1184                                         debug_printf_parse("%s: token found:'%s' TC_ARRAY\n", __func__, t_string);
1185                                 } else
1186                                         debug_printf_parse("%s: token found:'%s' TC_VARIABLE\n", __func__, t_string);
1187                         }
1188                 }
1189  token_found:
1190                 g_pos = p;
1191
1192                 /* skipping newlines in some cases */
1193                 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1194                         goto readnext;
1195
1196                 /* insert concatenation operator when needed */
1197                 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1198                         concat_inserted = TRUE;
1199                         save_tclass = tc;
1200                         save_info = t_info;
1201                         tc = TC_BINOP;
1202                         t_info = OC_CONCAT | SS | P(35);
1203                 }
1204
1205                 t_tclass = tc;
1206         }
1207         ltclass = t_tclass;
1208
1209         /* Are we ready for this? */
1210         if (!(ltclass & expected))
1211                 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1212                                 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1213
1214         return ltclass;
1215 #undef concat_inserted
1216 #undef save_tclass
1217 #undef save_info
1218 #undef ltclass
1219 }
1220
1221 static void rollback_token(void)
1222 {
1223         t_rollback = TRUE;
1224 }
1225
1226 static node *new_node(uint32_t info)
1227 {
1228         node *n;
1229
1230         n = xzalloc(sizeof(node));
1231         n->info = info;
1232         n->lineno = g_lineno;
1233         return n;
1234 }
1235
1236 static void mk_re_node(const char *s, node *n, regex_t *re)
1237 {
1238         n->info = OC_REGEXP;
1239         n->l.re = re;
1240         n->r.ire = re + 1;
1241         xregcomp(re, s, REG_EXTENDED);
1242         xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1243 }
1244
1245 static node *condition(void)
1246 {
1247         next_token(TC_SEQSTART);
1248         return parse_expr(TC_SEQTERM);
1249 }
1250
1251 /* parse expression terminated by given argument, return ptr
1252  * to built subtree. Terminator is eaten by parse_expr */
1253 static node *parse_expr(uint32_t iexp)
1254 {
1255         node sn;
1256         node *cn = &sn;
1257         node *vn, *glptr;
1258         uint32_t tc, xtc;
1259         var *v;
1260
1261         debug_printf_parse("%s(%x)\n", __func__, iexp);
1262
1263         sn.info = PRIMASK;
1264         sn.r.n = glptr = NULL;
1265         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1266
1267         while (!((tc = next_token(xtc)) & iexp)) {
1268
1269                 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1270                         /* input redirection (<) attached to glptr node */
1271                         debug_printf_parse("%s: input redir\n", __func__);
1272                         cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1273                         cn->a.n = glptr;
1274                         xtc = TC_OPERAND | TC_UOPPRE;
1275                         glptr = NULL;
1276
1277                 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1278                         debug_printf_parse("%s: TC_BINOP | TC_UOPPOST\n", __func__);
1279                         /* for binary and postfix-unary operators, jump back over
1280                          * previous operators with higher priority */
1281                         vn = cn;
1282                         while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1283                             || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON))
1284                         ) {
1285                                 vn = vn->a.n;
1286                         }
1287                         if ((t_info & OPCLSMASK) == OC_TERNARY)
1288                                 t_info += P(6);
1289                         cn = vn->a.n->r.n = new_node(t_info);
1290                         cn->a.n = vn->a.n;
1291                         if (tc & TC_BINOP) {
1292                                 cn->l.n = vn;
1293                                 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1294                                 if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1295                                         /* it's a pipe */
1296                                         next_token(TC_GETLINE);
1297                                         /* give maximum priority to this pipe */
1298                                         cn->info &= ~PRIMASK;
1299                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1300                                 }
1301                         } else {
1302                                 cn->r.n = vn;
1303                                 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1304                         }
1305                         vn->a.n = cn;
1306
1307                 } else {
1308                         debug_printf_parse("%s: other\n", __func__);
1309                         /* for operands and prefix-unary operators, attach them
1310                          * to last node */
1311                         vn = cn;
1312                         cn = vn->r.n = new_node(t_info);
1313                         cn->a.n = vn;
1314                         xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1315                         if (tc & (TC_OPERAND | TC_REGEXP)) {
1316                                 debug_printf_parse("%s: TC_OPERAND | TC_REGEXP\n", __func__);
1317                                 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1318                                 /* one should be very careful with switch on tclass -
1319                                  * only simple tclasses should be used! */
1320                                 switch (tc) {
1321                                 case TC_VARIABLE:
1322                                 case TC_ARRAY:
1323                                         debug_printf_parse("%s: TC_VARIABLE | TC_ARRAY\n", __func__);
1324                                         cn->info = OC_VAR;
1325                                         v = hash_search(ahash, t_string);
1326                                         if (v != NULL) {
1327                                                 cn->info = OC_FNARG;
1328                                                 cn->l.aidx = v->x.aidx;
1329                                         } else {
1330                                                 cn->l.v = newvar(t_string);
1331                                         }
1332                                         if (tc & TC_ARRAY) {
1333                                                 cn->info |= xS;
1334                                                 cn->r.n = parse_expr(TC_ARRTERM);
1335                                         }
1336                                         break;
1337
1338                                 case TC_NUMBER:
1339                                 case TC_STRING:
1340                                         debug_printf_parse("%s: TC_NUMBER | TC_STRING\n", __func__);
1341                                         cn->info = OC_VAR;
1342                                         v = cn->l.v = xzalloc(sizeof(var));
1343                                         if (tc & TC_NUMBER)
1344                                                 setvar_i(v, t_double);
1345                                         else
1346                                                 setvar_s(v, t_string);
1347                                         break;
1348
1349                                 case TC_REGEXP:
1350                                         debug_printf_parse("%s: TC_REGEXP\n", __func__);
1351                                         mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1352                                         break;
1353
1354                                 case TC_FUNCTION:
1355                                         debug_printf_parse("%s: TC_FUNCTION\n", __func__);
1356                                         cn->info = OC_FUNC;
1357                                         cn->r.f = newfunc(t_string);
1358                                         cn->l.n = condition();
1359                                         break;
1360
1361                                 case TC_SEQSTART:
1362                                         debug_printf_parse("%s: TC_SEQSTART\n", __func__);
1363                                         cn = vn->r.n = parse_expr(TC_SEQTERM);
1364                                         if (!cn)
1365                                                 syntax_error("Empty sequence");
1366                                         cn->a.n = vn;
1367                                         break;
1368
1369                                 case TC_GETLINE:
1370                                         debug_printf_parse("%s: TC_GETLINE\n", __func__);
1371                                         glptr = cn;
1372                                         xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1373                                         break;
1374
1375                                 case TC_BUILTIN:
1376                                         debug_printf_parse("%s: TC_BUILTIN\n", __func__);
1377                                         cn->l.n = condition();
1378                                         break;
1379                                 }
1380                         }
1381                 }
1382         }
1383
1384         debug_printf_parse("%s() returns %p\n", __func__, sn.r.n);
1385         return sn.r.n;
1386 }
1387
1388 /* add node to chain. Return ptr to alloc'd node */
1389 static node *chain_node(uint32_t info)
1390 {
1391         node *n;
1392
1393         if (!seq->first)
1394                 seq->first = seq->last = new_node(0);
1395
1396         if (seq->programname != g_progname) {
1397                 seq->programname = g_progname;
1398                 n = chain_node(OC_NEWSOURCE);
1399                 n->l.new_progname = xstrdup(g_progname);
1400         }
1401
1402         n = seq->last;
1403         n->info = info;
1404         seq->last = n->a.n = new_node(OC_DONE);
1405
1406         return n;
1407 }
1408
1409 static void chain_expr(uint32_t info)
1410 {
1411         node *n;
1412
1413         n = chain_node(info);
1414         n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1415         if (t_tclass & TC_GRPTERM)
1416                 rollback_token();
1417 }
1418
1419 static node *chain_loop(node *nn)
1420 {
1421         node *n, *n2, *save_brk, *save_cont;
1422
1423         save_brk = break_ptr;
1424         save_cont = continue_ptr;
1425
1426         n = chain_node(OC_BR | Vx);
1427         continue_ptr = new_node(OC_EXEC);
1428         break_ptr = new_node(OC_EXEC);
1429         chain_group();
1430         n2 = chain_node(OC_EXEC | Vx);
1431         n2->l.n = nn;
1432         n2->a.n = n;
1433         continue_ptr->a.n = n2;
1434         break_ptr->a.n = n->r.n = seq->last;
1435
1436         continue_ptr = save_cont;
1437         break_ptr = save_brk;
1438
1439         return n;
1440 }
1441
1442 /* parse group and attach it to chain */
1443 static void chain_group(void)
1444 {
1445         uint32_t c;
1446         node *n, *n2, *n3;
1447
1448         do {
1449                 c = next_token(TC_GRPSEQ);
1450         } while (c & TC_NEWLINE);
1451
1452         if (c & TC_GRPSTART) {
1453                 debug_printf_parse("%s: TC_GRPSTART\n", __func__);
1454                 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1455                         debug_printf_parse("%s: !TC_GRPTERM\n", __func__);
1456                         if (t_tclass & TC_NEWLINE)
1457                                 continue;
1458                         rollback_token();
1459                         chain_group();
1460                 }
1461                 debug_printf_parse("%s: TC_GRPTERM\n", __func__);
1462         } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1463                 debug_printf_parse("%s: TC_OPSEQ | TC_OPTERM\n", __func__);
1464                 rollback_token();
1465                 chain_expr(OC_EXEC | Vx);
1466         } else {
1467                 /* TC_STATEMNT */
1468                 debug_printf_parse("%s: TC_STATEMNT(?)\n", __func__);
1469                 switch (t_info & OPCLSMASK) {
1470                 case ST_IF:
1471                         debug_printf_parse("%s: ST_IF\n", __func__);
1472                         n = chain_node(OC_BR | Vx);
1473                         n->l.n = condition();
1474                         chain_group();
1475                         n2 = chain_node(OC_EXEC);
1476                         n->r.n = seq->last;
1477                         if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1478                                 chain_group();
1479                                 n2->a.n = seq->last;
1480                         } else {
1481                                 rollback_token();
1482                         }
1483                         break;
1484
1485                 case ST_WHILE:
1486                         debug_printf_parse("%s: ST_WHILE\n", __func__);
1487                         n2 = condition();
1488                         n = chain_loop(NULL);
1489                         n->l.n = n2;
1490                         break;
1491
1492                 case ST_DO:
1493                         debug_printf_parse("%s: ST_DO\n", __func__);
1494                         n2 = chain_node(OC_EXEC);
1495                         n = chain_loop(NULL);
1496                         n2->a.n = n->a.n;
1497                         next_token(TC_WHILE);
1498                         n->l.n = condition();
1499                         break;
1500
1501                 case ST_FOR:
1502                         debug_printf_parse("%s: ST_FOR\n", __func__);
1503                         next_token(TC_SEQSTART);
1504                         n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1505                         if (t_tclass & TC_SEQTERM) {    /* for-in */
1506                                 if ((n2->info & OPCLSMASK) != OC_IN)
1507                                         syntax_error(EMSG_UNEXP_TOKEN);
1508                                 n = chain_node(OC_WALKINIT | VV);
1509                                 n->l.n = n2->l.n;
1510                                 n->r.n = n2->r.n;
1511                                 n = chain_loop(NULL);
1512                                 n->info = OC_WALKNEXT | Vx;
1513                                 n->l.n = n2->l.n;
1514                         } else {                        /* for (;;) */
1515                                 n = chain_node(OC_EXEC | Vx);
1516                                 n->l.n = n2;
1517                                 n2 = parse_expr(TC_SEMICOL);
1518                                 n3 = parse_expr(TC_SEQTERM);
1519                                 n = chain_loop(n3);
1520                                 n->l.n = n2;
1521                                 if (!n2)
1522                                         n->info = OC_EXEC;
1523                         }
1524                         break;
1525
1526                 case OC_PRINT:
1527                 case OC_PRINTF:
1528                         debug_printf_parse("%s: OC_PRINT[F]\n", __func__);
1529                         n = chain_node(t_info);
1530                         n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1531                         if (t_tclass & TC_OUTRDR) {
1532                                 n->info |= t_info;
1533                                 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1534                         }
1535                         if (t_tclass & TC_GRPTERM)
1536                                 rollback_token();
1537                         break;
1538
1539                 case OC_BREAK:
1540                         debug_printf_parse("%s: OC_BREAK\n", __func__);
1541                         n = chain_node(OC_EXEC);
1542                         n->a.n = break_ptr;
1543                         chain_expr(t_info);
1544                         break;
1545
1546                 case OC_CONTINUE:
1547                         debug_printf_parse("%s: OC_CONTINUE\n", __func__);
1548                         n = chain_node(OC_EXEC);
1549                         n->a.n = continue_ptr;
1550                         chain_expr(t_info);
1551                         break;
1552
1553                 /* delete, next, nextfile, return, exit */
1554                 default:
1555                         debug_printf_parse("%s: default\n", __func__);
1556                         chain_expr(t_info);
1557                 }
1558         }
1559 }
1560
1561 static void parse_program(char *p)
1562 {
1563         uint32_t tclass;
1564         node *cn;
1565         func *f;
1566         var *v;
1567
1568         g_pos = p;
1569         t_lineno = 1;
1570         while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1571                         TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1572
1573                 if (tclass & TC_OPTERM) {
1574                         debug_printf_parse("%s: TC_OPTERM\n", __func__);
1575                         continue;
1576                 }
1577
1578                 seq = &mainseq;
1579                 if (tclass & TC_BEGIN) {
1580                         debug_printf_parse("%s: TC_BEGIN\n", __func__);
1581                         seq = &beginseq;
1582                         chain_group();
1583
1584                 } else if (tclass & TC_END) {
1585                         debug_printf_parse("%s: TC_END\n", __func__);
1586                         seq = &endseq;
1587                         chain_group();
1588
1589                 } else if (tclass & TC_FUNCDECL) {
1590                         debug_printf_parse("%s: TC_FUNCDECL\n", __func__);
1591                         next_token(TC_FUNCTION);
1592                         g_pos++;
1593                         f = newfunc(t_string);
1594                         f->body.first = NULL;
1595                         f->nargs = 0;
1596                         while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1597                                 v = findvar(ahash, t_string);
1598                                 v->x.aidx = f->nargs++;
1599
1600                                 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1601                                         break;
1602                         }
1603                         seq = &f->body;
1604                         chain_group();
1605                         clear_array(ahash);
1606
1607                 } else if (tclass & TC_OPSEQ) {
1608                         debug_printf_parse("%s: TC_OPSEQ\n", __func__);
1609                         rollback_token();
1610                         cn = chain_node(OC_TEST);
1611                         cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1612                         if (t_tclass & TC_GRPSTART) {
1613                                 debug_printf_parse("%s: TC_GRPSTART\n", __func__);
1614                                 rollback_token();
1615                                 chain_group();
1616                         } else {
1617                                 debug_printf_parse("%s: !TC_GRPSTART\n", __func__);
1618                                 chain_node(OC_PRINT);
1619                         }
1620                         cn->r.n = mainseq.last;
1621
1622                 } else /* if (tclass & TC_GRPSTART) */ {
1623                         debug_printf_parse("%s: TC_GRPSTART(?)\n", __func__);
1624                         rollback_token();
1625                         chain_group();
1626                 }
1627         }
1628         debug_printf_parse("%s: TC_EOF\n", __func__);
1629 }
1630
1631
1632 /* -------- program execution part -------- */
1633
1634 static node *mk_splitter(const char *s, tsplitter *spl)
1635 {
1636         regex_t *re, *ire;
1637         node *n;
1638
1639         re = &spl->re[0];
1640         ire = &spl->re[1];
1641         n = &spl->n;
1642         if ((n->info & OPCLSMASK) == OC_REGEXP) {
1643                 regfree(re);
1644                 regfree(ire); // TODO: nuke ire, use re+1?
1645         }
1646         if (s[0] && s[1]) { /* strlen(s) > 1 */
1647                 mk_re_node(s, n, re);
1648         } else {
1649                 n->info = (uint32_t) s[0];
1650         }
1651
1652         return n;
1653 }
1654
1655 /* use node as a regular expression. Supplied with node ptr and regex_t
1656  * storage space. Return ptr to regex (if result points to preg, it should
1657  * be later regfree'd manually
1658  */
1659 static regex_t *as_regex(node *op, regex_t *preg)
1660 {
1661         int cflags;
1662         var *v;
1663         const char *s;
1664
1665         if ((op->info & OPCLSMASK) == OC_REGEXP) {
1666                 return icase ? op->r.ire : op->l.re;
1667         }
1668         v = nvalloc(1);
1669         s = getvar_s(evaluate(op, v));
1670
1671         cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
1672         /* Testcase where REG_EXTENDED fails (unpaired '{'):
1673          * echo Hi | awk 'gsub("@(samp|code|file)\{","");'
1674          * gawk 3.1.5 eats this. We revert to ~REG_EXTENDED
1675          * (maybe gsub is not supposed to use REG_EXTENDED?).
1676          */
1677         if (regcomp(preg, s, cflags)) {
1678                 cflags &= ~REG_EXTENDED;
1679                 xregcomp(preg, s, cflags);
1680         }
1681         nvfree(v);
1682         return preg;
1683 }
1684
1685 /* gradually increasing buffer.
1686  * note that we reallocate even if n == old_size,
1687  * and thus there is at least one extra allocated byte.
1688  */
1689 static char* qrealloc(char *b, int n, int *size)
1690 {
1691         if (!b || n >= *size) {
1692                 *size = n + (n>>1) + 80;
1693                 b = xrealloc(b, *size);
1694         }
1695         return b;
1696 }
1697
1698 /* resize field storage space */
1699 static void fsrealloc(int size)
1700 {
1701         int i;
1702
1703         if (size >= maxfields) {
1704                 i = maxfields;
1705                 maxfields = size + 16;
1706                 Fields = xrealloc(Fields, maxfields * sizeof(Fields[0]));
1707                 for (; i < maxfields; i++) {
1708                         Fields[i].type = VF_SPECIAL;
1709                         Fields[i].string = NULL;
1710                 }
1711         }
1712         /* if size < nfields, clear extra field variables */
1713         for (i = size; i < nfields; i++) {
1714                 clrvar(Fields + i);
1715         }
1716         nfields = size;
1717 }
1718
1719 static int awk_split(const char *s, node *spl, char **slist)
1720 {
1721         int l, n;
1722         char c[4];
1723         char *s1;
1724         regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1725
1726         /* in worst case, each char would be a separate field */
1727         *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1728         strcpy(s1, s);
1729
1730         c[0] = c[1] = (char)spl->info;
1731         c[2] = c[3] = '\0';
1732         if (*getvar_s(intvar[RS]) == '\0')
1733                 c[2] = '\n';
1734
1735         n = 0;
1736         if ((spl->info & OPCLSMASK) == OC_REGEXP) {  /* regex split */
1737                 if (!*s)
1738                         return n; /* "": zero fields */
1739                 n++; /* at least one field will be there */
1740                 do {
1741                         l = strcspn(s, c+2); /* len till next NUL or \n */
1742                         if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1743                          && pmatch[0].rm_so <= l
1744                         ) {
1745                                 l = pmatch[0].rm_so;
1746                                 if (pmatch[0].rm_eo == 0) {
1747                                         l++;
1748                                         pmatch[0].rm_eo++;
1749                                 }
1750                                 n++; /* we saw yet another delimiter */
1751                         } else {
1752                                 pmatch[0].rm_eo = l;
1753                                 if (s[l])
1754                                         pmatch[0].rm_eo++;
1755                         }
1756                         memcpy(s1, s, l);
1757                         /* make sure we remove *all* of the separator chars */
1758                         do {
1759                                 s1[l] = '\0';
1760                         } while (++l < pmatch[0].rm_eo);
1761                         nextword(&s1);
1762                         s += pmatch[0].rm_eo;
1763                 } while (*s);
1764                 return n;
1765         }
1766         if (c[0] == '\0') {  /* null split */
1767                 while (*s) {
1768                         *s1++ = *s++;
1769                         *s1++ = '\0';
1770                         n++;
1771                 }
1772                 return n;
1773         }
1774         if (c[0] != ' ') {  /* single-character split */
1775                 if (icase) {
1776                         c[0] = toupper(c[0]);
1777                         c[1] = tolower(c[1]);
1778                 }
1779                 if (*s1)
1780                         n++;
1781                 while ((s1 = strpbrk(s1, c)) != NULL) {
1782                         *s1++ = '\0';
1783                         n++;
1784                 }
1785                 return n;
1786         }
1787         /* space split */
1788         while (*s) {
1789                 s = skip_whitespace(s);
1790                 if (!*s)
1791                         break;
1792                 n++;
1793                 while (*s && !isspace(*s))
1794                         *s1++ = *s++;
1795                 *s1++ = '\0';
1796         }
1797         return n;
1798 }
1799
1800 static void split_f0(void)
1801 {
1802 /* static char *fstrings; */
1803 #define fstrings (G.split_f0__fstrings)
1804
1805         int i, n;
1806         char *s;
1807
1808         if (is_f0_split)
1809                 return;
1810
1811         is_f0_split = TRUE;
1812         free(fstrings);
1813         fsrealloc(0);
1814         n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1815         fsrealloc(n);
1816         s = fstrings;
1817         for (i = 0; i < n; i++) {
1818                 Fields[i].string = nextword(&s);
1819                 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1820         }
1821
1822         /* set NF manually to avoid side effects */
1823         clrvar(intvar[NF]);
1824         intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1825         intvar[NF]->number = nfields;
1826 #undef fstrings
1827 }
1828
1829 /* perform additional actions when some internal variables changed */
1830 static void handle_special(var *v)
1831 {
1832         int n;
1833         char *b;
1834         const char *sep, *s;
1835         int sl, l, len, i, bsize;
1836
1837         if (!(v->type & VF_SPECIAL))
1838                 return;
1839
1840         if (v == intvar[NF]) {
1841                 n = (int)getvar_i(v);
1842                 fsrealloc(n);
1843
1844                 /* recalculate $0 */
1845                 sep = getvar_s(intvar[OFS]);
1846                 sl = strlen(sep);
1847                 b = NULL;
1848                 len = 0;
1849                 for (i = 0; i < n; i++) {
1850                         s = getvar_s(&Fields[i]);
1851                         l = strlen(s);
1852                         if (b) {
1853                                 memcpy(b+len, sep, sl);
1854                                 len += sl;
1855                         }
1856                         b = qrealloc(b, len+l+sl, &bsize);
1857                         memcpy(b+len, s, l);
1858                         len += l;
1859                 }
1860                 if (b)
1861                         b[len] = '\0';
1862                 setvar_p(intvar[F0], b);
1863                 is_f0_split = TRUE;
1864
1865         } else if (v == intvar[F0]) {
1866                 is_f0_split = FALSE;
1867
1868         } else if (v == intvar[FS]) {
1869                 /*
1870                  * The POSIX-2008 standard says that changing FS should have no effect on the
1871                  * current input line, but only on the next one. The language is:
1872                  *
1873                  * > Before the first reference to a field in the record is evaluated, the record
1874                  * > shall be split into fields, according to the rules in Regular Expressions,
1875                  * > using the value of FS that was current at the time the record was read.
1876                  *
1877                  * So, split up current line before assignment to FS:
1878                  */
1879                 split_f0();
1880
1881                 mk_splitter(getvar_s(v), &fsplitter);
1882
1883         } else if (v == intvar[RS]) {
1884                 mk_splitter(getvar_s(v), &rsplitter);
1885
1886         } else if (v == intvar[IGNORECASE]) {
1887                 icase = istrue(v);
1888
1889         } else {                                /* $n */
1890                 n = getvar_i(intvar[NF]);
1891                 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1892                 /* right here v is invalid. Just to note... */
1893         }
1894 }
1895
1896 /* step through func/builtin/etc arguments */
1897 static node *nextarg(node **pn)
1898 {
1899         node *n;
1900
1901         n = *pn;
1902         if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1903                 *pn = n->r.n;
1904                 n = n->l.n;
1905         } else {
1906                 *pn = NULL;
1907         }
1908         return n;
1909 }
1910
1911 static void hashwalk_init(var *v, xhash *array)
1912 {
1913         hash_item *hi;
1914         unsigned i;
1915         walker_list *w;
1916         walker_list *prev_walker;
1917
1918         if (v->type & VF_WALK) {
1919                 prev_walker = v->x.walker;
1920         } else {
1921                 v->type |= VF_WALK;
1922                 prev_walker = NULL;
1923         }
1924         debug_printf_walker("hashwalk_init: prev_walker:%p\n", prev_walker);
1925
1926         w = v->x.walker = xzalloc(sizeof(*w) + array->glen + 1); /* why + 1? */
1927         debug_printf_walker(" walker@%p=%p\n", &v->x.walker, w);
1928         w->cur = w->end = w->wbuf;
1929         w->prev = prev_walker;
1930         for (i = 0; i < array->csize; i++) {
1931                 hi = array->items[i];
1932                 while (hi) {
1933                         strcpy(w->end, hi->name);
1934                         nextword(&w->end);
1935                         hi = hi->next;
1936                 }
1937         }
1938 }
1939
1940 static int hashwalk_next(var *v)
1941 {
1942         walker_list *w = v->x.walker;
1943
1944         if (w->cur >= w->end) {
1945                 walker_list *prev_walker = w->prev;
1946
1947                 debug_printf_walker("end of iteration, free(walker@%p:%p), prev_walker:%p\n", &v->x.walker, w, prev_walker);
1948                 free(w);
1949                 v->x.walker = prev_walker;
1950                 return FALSE;
1951         }
1952
1953         setvar_s(v, nextword(&w->cur));
1954         return TRUE;
1955 }
1956
1957 /* evaluate node, return 1 when result is true, 0 otherwise */
1958 static int ptest(node *pattern)
1959 {
1960         /* ptest__v is "static": to save stack space? */
1961         return istrue(evaluate(pattern, &G.ptest__v));
1962 }
1963
1964 /* read next record from stream rsm into a variable v */
1965 static int awk_getline(rstream *rsm, var *v)
1966 {
1967         char *b;
1968         regmatch_t pmatch[2];
1969         int size, a, p, pp = 0;
1970         int fd, so, eo, r, rp;
1971         char c, *m, *s;
1972
1973         debug_printf_eval("entered %s()\n", __func__);
1974
1975         /* we're using our own buffer since we need access to accumulating
1976          * characters
1977          */
1978         fd = fileno(rsm->F);
1979         m = rsm->buffer;
1980         a = rsm->adv;
1981         p = rsm->pos;
1982         size = rsm->size;
1983         c = (char) rsplitter.n.info;
1984         rp = 0;
1985
1986         if (!m)
1987                 m = qrealloc(m, 256, &size);
1988
1989         do {
1990                 b = m + a;
1991                 so = eo = p;
1992                 r = 1;
1993                 if (p > 0) {
1994                         if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1995                                 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1996                                                         b, 1, pmatch, 0) == 0) {
1997                                         so = pmatch[0].rm_so;
1998                                         eo = pmatch[0].rm_eo;
1999                                         if (b[eo] != '\0')
2000                                                 break;
2001                                 }
2002                         } else if (c != '\0') {
2003                                 s = strchr(b+pp, c);
2004                                 if (!s)
2005                                         s = memchr(b+pp, '\0', p - pp);
2006                                 if (s) {
2007                                         so = eo = s-b;
2008                                         eo++;
2009                                         break;
2010                                 }
2011                         } else {
2012                                 while (b[rp] == '\n')
2013                                         rp++;
2014                                 s = strstr(b+rp, "\n\n");
2015                                 if (s) {
2016                                         so = eo = s-b;
2017                                         while (b[eo] == '\n')
2018                                                 eo++;
2019                                         if (b[eo] != '\0')
2020                                                 break;
2021                                 }
2022                         }
2023                 }
2024
2025                 if (a > 0) {
2026                         memmove(m, m+a, p+1);
2027                         b = m;
2028                         a = 0;
2029                 }
2030
2031                 m = qrealloc(m, a+p+128, &size);
2032                 b = m + a;
2033                 pp = p;
2034                 p += safe_read(fd, b+p, size-p-1);
2035                 if (p < pp) {
2036                         p = 0;
2037                         r = 0;
2038                         setvar_i(intvar[ERRNO], errno);
2039                 }
2040                 b[p] = '\0';
2041
2042         } while (p > pp);
2043
2044         if (p == 0) {
2045                 r--;
2046         } else {
2047                 c = b[so]; b[so] = '\0';
2048                 setvar_s(v, b+rp);
2049                 v->type |= VF_USER;
2050                 b[so] = c;
2051                 c = b[eo]; b[eo] = '\0';
2052                 setvar_s(intvar[RT], b+so);
2053                 b[eo] = c;
2054         }
2055
2056         rsm->buffer = m;
2057         rsm->adv = a + eo;
2058         rsm->pos = p - eo;
2059         rsm->size = size;
2060
2061         debug_printf_eval("returning from %s(): %d\n", __func__, r);
2062
2063         return r;
2064 }
2065
2066 static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
2067 {
2068         int r = 0;
2069         char c;
2070         const char *s = format;
2071
2072         if (int_as_int && n == (long long)n) {
2073                 r = snprintf(b, size, "%lld", (long long)n);
2074         } else {
2075                 do { c = *s; } while (c && *++s);
2076                 if (strchr("diouxX", c)) {
2077                         r = snprintf(b, size, format, (int)n);
2078                 } else if (strchr("eEfgG", c)) {
2079                         r = snprintf(b, size, format, n);
2080                 } else {
2081                         syntax_error(EMSG_INV_FMT);
2082                 }
2083         }
2084         return r;
2085 }
2086
2087 /* formatted output into an allocated buffer, return ptr to buffer */
2088 static char *awk_printf(node *n)
2089 {
2090         char *b = NULL;
2091         char *fmt, *s, *f;
2092         const char *s1;
2093         int i, j, incr, bsize;
2094         char c, c1;
2095         var *v, *arg;
2096
2097         v = nvalloc(1);
2098         fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
2099
2100         i = 0;
2101         while (*f) {
2102                 s = f;
2103                 while (*f && (*f != '%' || *++f == '%'))
2104                         f++;
2105                 while (*f && !isalpha(*f)) {
2106                         if (*f == '*')
2107                                 syntax_error("%*x formats are not supported");
2108                         f++;
2109                 }
2110
2111                 incr = (f - s) + MAXVARFMT;
2112                 b = qrealloc(b, incr + i, &bsize);
2113                 c = *f;
2114                 if (c != '\0')
2115                         f++;
2116                 c1 = *f;
2117                 *f = '\0';
2118                 arg = evaluate(nextarg(&n), v);
2119
2120                 j = i;
2121                 if (c == 'c' || !c) {
2122                         i += sprintf(b+i, s, is_numeric(arg) ?
2123                                         (char)getvar_i(arg) : *getvar_s(arg));
2124                 } else if (c == 's') {
2125                         s1 = getvar_s(arg);
2126                         b = qrealloc(b, incr+i+strlen(s1), &bsize);
2127                         i += sprintf(b+i, s, s1);
2128                 } else {
2129                         i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
2130                 }
2131                 *f = c1;
2132
2133                 /* if there was an error while sprintf, return value is negative */
2134                 if (i < j)
2135                         i = j;
2136         }
2137
2138         free(fmt);
2139         nvfree(v);
2140         b = xrealloc(b, i + 1);
2141         b[i] = '\0';
2142         return b;
2143 }
2144
2145 /* Common substitution routine.
2146  * Replace (nm)'th substring of (src) that matches (rn) with (repl),
2147  * store result into (dest), return number of substitutions.
2148  * If nm = 0, replace all matches.
2149  * If src or dst is NULL, use $0.
2150  * If subexp != 0, enable subexpression matching (\1-\9).
2151  */
2152 static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int subexp)
2153 {
2154         char *resbuf;
2155         const char *sp;
2156         int match_no, residx, replen, resbufsize;
2157         int regexec_flags;
2158         regmatch_t pmatch[10];
2159         regex_t sreg, *regex;
2160
2161         resbuf = NULL;
2162         residx = 0;
2163         match_no = 0;
2164         regexec_flags = 0;
2165         regex = as_regex(rn, &sreg);
2166         sp = getvar_s(src ? src : intvar[F0]);
2167         replen = strlen(repl);
2168         while (regexec(regex, sp, 10, pmatch, regexec_flags) == 0) {
2169                 int so = pmatch[0].rm_so;
2170                 int eo = pmatch[0].rm_eo;
2171
2172                 //bb_error_msg("match %u: [%u,%u] '%s'%p", match_no+1, so, eo, sp,sp);
2173                 resbuf = qrealloc(resbuf, residx + eo + replen, &resbufsize);
2174                 memcpy(resbuf + residx, sp, eo);
2175                 residx += eo;
2176                 if (++match_no >= nm) {
2177                         const char *s;
2178                         int nbs;
2179
2180                         /* replace */
2181                         residx -= (eo - so);
2182                         nbs = 0;
2183                         for (s = repl; *s; s++) {
2184                                 char c = resbuf[residx++] = *s;
2185                                 if (c == '\\') {
2186                                         nbs++;
2187                                         continue;
2188                                 }
2189                                 if (c == '&' || (subexp && c >= '0' && c <= '9')) {
2190                                         int j;
2191                                         residx -= ((nbs + 3) >> 1);
2192                                         j = 0;
2193                                         if (c != '&') {
2194                                                 j = c - '0';
2195                                                 nbs++;
2196                                         }
2197                                         if (nbs % 2) {
2198                                                 resbuf[residx++] = c;
2199                                         } else {
2200                                                 int n = pmatch[j].rm_eo - pmatch[j].rm_so;
2201                                                 resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize);
2202                                                 memcpy(resbuf + residx, sp + pmatch[j].rm_so, n);
2203                                                 residx += n;
2204                                         }
2205                                 }
2206                                 nbs = 0;
2207                         }
2208                 }
2209
2210                 regexec_flags = REG_NOTBOL;
2211                 sp += eo;
2212                 if (match_no == nm)
2213                         break;
2214                 if (eo == so) {
2215                         /* Empty match (e.g. "b*" will match anywhere).
2216                          * Advance by one char. */
2217 //BUG (bug 1333):
2218 //gsub(/\<b*/,"") on "abc" will reach this point, advance to "bc"
2219 //... and will erroneously match "b" even though it is NOT at the word start.
2220 //we need REG_NOTBOW but it does not exist...
2221 //TODO: if EXTRA_COMPAT=y, use GNU matching and re_search,
2222 //it should be able to do it correctly.
2223                         /* Subtle: this is safe only because
2224                          * qrealloc allocated at least one extra byte */
2225                         resbuf[residx] = *sp;
2226                         if (*sp == '\0')
2227                                 goto ret;
2228                         sp++;
2229                         residx++;
2230                 }
2231         }
2232
2233         resbuf = qrealloc(resbuf, residx + strlen(sp), &resbufsize);
2234         strcpy(resbuf + residx, sp);
2235  ret:
2236         //bb_error_msg("end sp:'%s'%p", sp,sp);
2237         setvar_p(dest ? dest : intvar[F0], resbuf);
2238         if (regex == &sreg)
2239                 regfree(regex);
2240         return match_no;
2241 }
2242
2243 static NOINLINE int do_mktime(const char *ds)
2244 {
2245         struct tm then;
2246         int count;
2247
2248         /*memset(&then, 0, sizeof(then)); - not needed */
2249         then.tm_isdst = -1; /* default is unknown */
2250
2251         /* manpage of mktime says these fields are ints,
2252          * so we can sscanf stuff directly into them */
2253         count = sscanf(ds, "%u %u %u %u %u %u %d",
2254                 &then.tm_year, &then.tm_mon, &then.tm_mday,
2255                 &then.tm_hour, &then.tm_min, &then.tm_sec,
2256                 &then.tm_isdst);
2257
2258         if (count < 6
2259          || (unsigned)then.tm_mon < 1
2260          || (unsigned)then.tm_year < 1900
2261         ) {
2262                 return -1;
2263         }
2264
2265         then.tm_mon -= 1;
2266         then.tm_year -= 1900;
2267
2268         return mktime(&then);
2269 }
2270
2271 static NOINLINE var *exec_builtin(node *op, var *res)
2272 {
2273 #define tspl (G.exec_builtin__tspl)
2274
2275         var *tv;
2276         node *an[4];
2277         var *av[4];
2278         const char *as[4];
2279         regmatch_t pmatch[2];
2280         regex_t sreg, *re;
2281         node *spl;
2282         uint32_t isr, info;
2283         int nargs;
2284         time_t tt;
2285         int i, l, ll, n;
2286
2287         tv = nvalloc(4);
2288         isr = info = op->info;
2289         op = op->l.n;
2290
2291         av[2] = av[3] = NULL;
2292         for (i = 0; i < 4 && op; i++) {
2293                 an[i] = nextarg(&op);
2294                 if (isr & 0x09000000)
2295                         av[i] = evaluate(an[i], &tv[i]);
2296                 if (isr & 0x08000000)
2297                         as[i] = getvar_s(av[i]);
2298                 isr >>= 1;
2299         }
2300
2301         nargs = i;
2302         if ((uint32_t)nargs < (info >> 30))
2303                 syntax_error(EMSG_TOO_FEW_ARGS);
2304
2305         info &= OPNMASK;
2306         switch (info) {
2307
2308         case B_a2:
2309                 if (ENABLE_FEATURE_AWK_LIBM)
2310                         setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1])));
2311                 else
2312                         syntax_error(EMSG_NO_MATH);
2313                 break;
2314
2315         case B_sp: {
2316                 char *s, *s1;
2317
2318                 if (nargs > 2) {
2319                         spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2320                                 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2321                 } else {
2322                         spl = &fsplitter.n;
2323                 }
2324
2325                 n = awk_split(as[0], spl, &s);
2326                 s1 = s;
2327                 clear_array(iamarray(av[1]));
2328                 for (i = 1; i <= n; i++)
2329                         setari_u(av[1], i, nextword(&s));
2330                 free(s1);
2331                 setvar_i(res, n);
2332                 break;
2333         }
2334
2335         case B_ss: {
2336                 char *s;
2337
2338                 l = strlen(as[0]);
2339                 i = getvar_i(av[1]) - 1;
2340                 if (i > l)
2341                         i = l;
2342                 if (i < 0)
2343                         i = 0;
2344                 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2345                 if (n < 0)
2346                         n = 0;
2347                 s = xstrndup(as[0]+i, n);
2348                 setvar_p(res, s);
2349                 break;
2350         }
2351
2352         /* Bitwise ops must assume that operands are unsigned. GNU Awk 3.1.5:
2353          * awk '{ print or(-1,1) }' gives "4.29497e+09", not "-2.xxxe+09" */
2354         case B_an:
2355                 setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1]));
2356                 break;
2357
2358         case B_co:
2359                 setvar_i(res, ~getvar_i_int(av[0]));
2360                 break;
2361
2362         case B_ls:
2363                 setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1]));
2364                 break;
2365
2366         case B_or:
2367                 setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1]));
2368                 break;
2369
2370         case B_rs:
2371                 setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1]));
2372                 break;
2373
2374         case B_xo:
2375                 setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1]));
2376                 break;
2377
2378         case B_lo:
2379         case B_up: {
2380                 char *s, *s1;
2381                 s1 = s = xstrdup(as[0]);
2382                 while (*s1) {
2383                         //*s1 = (info == B_up) ? toupper(*s1) : tolower(*s1);
2384                         if ((unsigned char)((*s1 | 0x20) - 'a') <= ('z' - 'a'))
2385                                 *s1 = (info == B_up) ? (*s1 & 0xdf) : (*s1 | 0x20);
2386                         s1++;
2387                 }
2388                 setvar_p(res, s);
2389                 break;
2390         }
2391
2392         case B_ix:
2393                 n = 0;
2394                 ll = strlen(as[1]);
2395                 l = strlen(as[0]) - ll;
2396                 if (ll > 0 && l >= 0) {
2397                         if (!icase) {
2398                                 char *s = strstr(as[0], as[1]);
2399                                 if (s)
2400                                         n = (s - as[0]) + 1;
2401                         } else {
2402                                 /* this piece of code is terribly slow and
2403                                  * really should be rewritten
2404                                  */
2405                                 for (i = 0; i <= l; i++) {
2406                                         if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2407                                                 n = i+1;
2408                                                 break;
2409                                         }
2410                                 }
2411                         }
2412                 }
2413                 setvar_i(res, n);
2414                 break;
2415
2416         case B_ti:
2417                 if (nargs > 1)
2418                         tt = getvar_i(av[1]);
2419                 else
2420                         time(&tt);
2421                 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2422                 i = strftime(g_buf, MAXVARFMT,
2423                         ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2424                         localtime(&tt));
2425                 g_buf[i] = '\0';
2426                 setvar_s(res, g_buf);
2427                 break;
2428
2429         case B_mt:
2430                 setvar_i(res, do_mktime(as[0]));
2431                 break;
2432
2433         case B_ma:
2434                 re = as_regex(an[1], &sreg);
2435                 n = regexec(re, as[0], 1, pmatch, 0);
2436                 if (n == 0) {
2437                         pmatch[0].rm_so++;
2438                         pmatch[0].rm_eo++;
2439                 } else {
2440                         pmatch[0].rm_so = 0;
2441                         pmatch[0].rm_eo = -1;
2442                 }
2443                 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2444                 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2445                 setvar_i(res, pmatch[0].rm_so);
2446                 if (re == &sreg)
2447                         regfree(re);
2448                 break;
2449
2450         case B_ge:
2451                 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2452                 break;
2453
2454         case B_gs:
2455                 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2456                 break;
2457
2458         case B_su:
2459                 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2460                 break;
2461         }
2462
2463         nvfree(tv);
2464         return res;
2465 #undef tspl
2466 }
2467
2468 /*
2469  * Evaluate node - the heart of the program. Supplied with subtree
2470  * and place where to store result. returns ptr to result.
2471  */
2472 #define XC(n) ((n) >> 8)
2473
2474 static var *evaluate(node *op, var *res)
2475 {
2476 /* This procedure is recursive so we should count every byte */
2477 #define fnargs (G.evaluate__fnargs)
2478 /* seed is initialized to 1 */
2479 #define seed   (G.evaluate__seed)
2480 #define sreg   (G.evaluate__sreg)
2481
2482         var *v1;
2483
2484         if (!op)
2485                 return setvar_s(res, NULL);
2486
2487         debug_printf_eval("entered %s()\n", __func__);
2488
2489         v1 = nvalloc(2);
2490
2491         while (op) {
2492                 struct {
2493                         var *v;
2494                         const char *s;
2495                 } L = L; /* for compiler */
2496                 struct {
2497                         var *v;
2498                         const char *s;
2499                 } R = R;
2500                 double L_d = L_d;
2501                 uint32_t opinfo;
2502                 int opn;
2503                 node *op1;
2504
2505                 opinfo = op->info;
2506                 opn = (opinfo & OPNMASK);
2507                 g_lineno = op->lineno;
2508                 op1 = op->l.n;
2509                 debug_printf_eval("opinfo:%08x opn:%08x\n", opinfo, opn);
2510
2511                 /* execute inevitable things */
2512                 if (opinfo & OF_RES1)
2513                         L.v = evaluate(op1, v1);
2514                 if (opinfo & OF_RES2)
2515                         R.v = evaluate(op->r.n, v1+1);
2516                 if (opinfo & OF_STR1) {
2517                         L.s = getvar_s(L.v);
2518                         debug_printf_eval("L.s:'%s'\n", L.s);
2519                 }
2520                 if (opinfo & OF_STR2) {
2521                         R.s = getvar_s(R.v);
2522                         debug_printf_eval("R.s:'%s'\n", R.s);
2523                 }
2524                 if (opinfo & OF_NUM1) {
2525                         L_d = getvar_i(L.v);
2526                         debug_printf_eval("L_d:%f\n", L_d);
2527                 }
2528
2529                 debug_printf_eval("switch(0x%x)\n", XC(opinfo & OPCLSMASK));
2530                 switch (XC(opinfo & OPCLSMASK)) {
2531
2532                 /* -- iterative node type -- */
2533
2534                 /* test pattern */
2535                 case XC( OC_TEST ):
2536                         if ((op1->info & OPCLSMASK) == OC_COMMA) {
2537                                 /* it's range pattern */
2538                                 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2539                                         op->info |= OF_CHECKED;
2540                                         if (ptest(op1->r.n))
2541                                                 op->info &= ~OF_CHECKED;
2542                                         op = op->a.n;
2543                                 } else {
2544                                         op = op->r.n;
2545                                 }
2546                         } else {
2547                                 op = ptest(op1) ? op->a.n : op->r.n;
2548                         }
2549                         break;
2550
2551                 /* just evaluate an expression, also used as unconditional jump */
2552                 case XC( OC_EXEC ):
2553                         break;
2554
2555                 /* branch, used in if-else and various loops */
2556                 case XC( OC_BR ):
2557                         op = istrue(L.v) ? op->a.n : op->r.n;
2558                         break;
2559
2560                 /* initialize for-in loop */
2561                 case XC( OC_WALKINIT ):
2562                         hashwalk_init(L.v, iamarray(R.v));
2563                         break;
2564
2565                 /* get next array item */
2566                 case XC( OC_WALKNEXT ):
2567                         op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2568                         break;
2569
2570                 case XC( OC_PRINT ):
2571                 case XC( OC_PRINTF ): {
2572                         FILE *F = stdout;
2573
2574                         if (op->r.n) {
2575                                 rstream *rsm = newfile(R.s);
2576                                 if (!rsm->F) {
2577                                         if (opn == '|') {
2578                                                 rsm->F = popen(R.s, "w");
2579                                                 if (rsm->F == NULL)
2580                                                         bb_perror_msg_and_die("popen");
2581                                                 rsm->is_pipe = 1;
2582                                         } else {
2583                                                 rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2584                                         }
2585                                 }
2586                                 F = rsm->F;
2587                         }
2588
2589                         if ((opinfo & OPCLSMASK) == OC_PRINT) {
2590                                 if (!op1) {
2591                                         fputs(getvar_s(intvar[F0]), F);
2592                                 } else {
2593                                         while (op1) {
2594                                                 var *v = evaluate(nextarg(&op1), v1);
2595                                                 if (v->type & VF_NUMBER) {
2596                                                         fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2597                                                                         getvar_i(v), TRUE);
2598                                                         fputs(g_buf, F);
2599                                                 } else {
2600                                                         fputs(getvar_s(v), F);
2601                                                 }
2602
2603                                                 if (op1)
2604                                                         fputs(getvar_s(intvar[OFS]), F);
2605                                         }
2606                                 }
2607                                 fputs(getvar_s(intvar[ORS]), F);
2608
2609                         } else {        /* OC_PRINTF */
2610                                 char *s = awk_printf(op1);
2611                                 fputs(s, F);
2612                                 free(s);
2613                         }
2614                         fflush(F);
2615                         break;
2616                 }
2617
2618                 case XC( OC_DELETE ): {
2619                         uint32_t info = op1->info & OPCLSMASK;
2620                         var *v;
2621
2622                         if (info == OC_VAR) {
2623                                 v = op1->l.v;
2624                         } else if (info == OC_FNARG) {
2625                                 v = &fnargs[op1->l.aidx];
2626                         } else {
2627                                 syntax_error(EMSG_NOT_ARRAY);
2628                         }
2629
2630                         if (op1->r.n) {
2631                                 const char *s;
2632                                 clrvar(L.v);
2633                                 s = getvar_s(evaluate(op1->r.n, v1));
2634                                 hash_remove(iamarray(v), s);
2635                         } else {
2636                                 clear_array(iamarray(v));
2637                         }
2638                         break;
2639                 }
2640
2641                 case XC( OC_NEWSOURCE ):
2642                         g_progname = op->l.new_progname;
2643                         break;
2644
2645                 case XC( OC_RETURN ):
2646                         copyvar(res, L.v);
2647                         break;
2648
2649                 case XC( OC_NEXTFILE ):
2650                         nextfile = TRUE;
2651                 case XC( OC_NEXT ):
2652                         nextrec = TRUE;
2653                 case XC( OC_DONE ):
2654                         clrvar(res);
2655                         break;
2656
2657                 case XC( OC_EXIT ):
2658                         awk_exit(L_d);
2659
2660                 /* -- recursive node type -- */
2661
2662                 case XC( OC_VAR ):
2663                         L.v = op->l.v;
2664                         if (L.v == intvar[NF])
2665                                 split_f0();
2666                         goto v_cont;
2667
2668                 case XC( OC_FNARG ):
2669                         L.v = &fnargs[op->l.aidx];
2670  v_cont:
2671                         res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2672                         break;
2673
2674                 case XC( OC_IN ):
2675                         setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2676                         break;
2677
2678                 case XC( OC_REGEXP ):
2679                         op1 = op;
2680                         L.s = getvar_s(intvar[F0]);
2681                         goto re_cont;
2682
2683                 case XC( OC_MATCH ):
2684                         op1 = op->r.n;
2685  re_cont:
2686                         {
2687                                 regex_t *re = as_regex(op1, &sreg);
2688                                 int i = regexec(re, L.s, 0, NULL, 0);
2689                                 if (re == &sreg)
2690                                         regfree(re);
2691                                 setvar_i(res, (i == 0) ^ (opn == '!'));
2692                         }
2693                         break;
2694
2695                 case XC( OC_MOVE ):
2696                         debug_printf_eval("MOVE\n");
2697                         /* if source is a temporary string, jusk relink it to dest */
2698 //Disabled: if R.v is numeric but happens to have cached R.v->string,
2699 //then L.v ends up being a string, which is wrong
2700 //                      if (R.v == v1+1 && R.v->string) {
2701 //                              res = setvar_p(L.v, R.v->string);
2702 //                              R.v->string = NULL;
2703 //                      } else {
2704                                 res = copyvar(L.v, R.v);
2705 //                      }
2706                         break;
2707
2708                 case XC( OC_TERNARY ):
2709                         if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2710                                 syntax_error(EMSG_POSSIBLE_ERROR);
2711                         res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2712                         break;
2713
2714                 case XC( OC_FUNC ): {
2715                         var *vbeg, *v;
2716                         const char *sv_progname;
2717
2718                         /* The body might be empty, still has to eval the args */
2719                         if (!op->r.n->info && !op->r.f->body.first)
2720                                 syntax_error(EMSG_UNDEF_FUNC);
2721
2722                         vbeg = v = nvalloc(op->r.f->nargs + 1);
2723                         while (op1) {
2724                                 var *arg = evaluate(nextarg(&op1), v1);
2725                                 copyvar(v, arg);
2726                                 v->type |= VF_CHILD;
2727                                 v->x.parent = arg;
2728                                 if (++v - vbeg >= op->r.f->nargs)
2729                                         break;
2730                         }
2731
2732                         v = fnargs;
2733                         fnargs = vbeg;
2734                         sv_progname = g_progname;
2735
2736                         res = evaluate(op->r.f->body.first, res);
2737
2738                         g_progname = sv_progname;
2739                         nvfree(fnargs);
2740                         fnargs = v;
2741
2742                         break;
2743                 }
2744
2745                 case XC( OC_GETLINE ):
2746                 case XC( OC_PGETLINE ): {
2747                         rstream *rsm;
2748                         int i;
2749
2750                         if (op1) {
2751                                 rsm = newfile(L.s);
2752                                 if (!rsm->F) {
2753                                         if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2754                                                 rsm->F = popen(L.s, "r");
2755                                                 rsm->is_pipe = TRUE;
2756                                         } else {
2757                                                 rsm->F = fopen_for_read(L.s);  /* not xfopen! */
2758                                         }
2759                                 }
2760                         } else {
2761                                 if (!iF)
2762                                         iF = next_input_file();
2763                                 rsm = iF;
2764                         }
2765
2766                         if (!rsm || !rsm->F) {
2767                                 setvar_i(intvar[ERRNO], errno);
2768                                 setvar_i(res, -1);
2769                                 break;
2770                         }
2771
2772                         if (!op->r.n)
2773                                 R.v = intvar[F0];
2774
2775                         i = awk_getline(rsm, R.v);
2776                         if (i > 0 && !op1) {
2777                                 incvar(intvar[FNR]);
2778                                 incvar(intvar[NR]);
2779                         }
2780                         setvar_i(res, i);
2781                         break;
2782                 }
2783
2784                 /* simple builtins */
2785                 case XC( OC_FBLTIN ): {
2786                         double R_d = R_d; /* for compiler */
2787
2788                         switch (opn) {
2789                         case F_in:
2790                                 R_d = (long long)L_d;
2791                                 break;
2792
2793                         case F_rn:
2794                                 R_d = (double)rand() / (double)RAND_MAX;
2795                                 break;
2796
2797                         case F_co:
2798                                 if (ENABLE_FEATURE_AWK_LIBM) {
2799                                         R_d = cos(L_d);
2800                                         break;
2801                                 }
2802
2803                         case F_ex:
2804                                 if (ENABLE_FEATURE_AWK_LIBM) {
2805                                         R_d = exp(L_d);
2806                                         break;
2807                                 }
2808
2809                         case F_lg:
2810                                 if (ENABLE_FEATURE_AWK_LIBM) {
2811                                         R_d = log(L_d);
2812                                         break;
2813                                 }
2814
2815                         case F_si:
2816                                 if (ENABLE_FEATURE_AWK_LIBM) {
2817                                         R_d = sin(L_d);
2818                                         break;
2819                                 }
2820
2821                         case F_sq:
2822                                 if (ENABLE_FEATURE_AWK_LIBM) {
2823                                         R_d = sqrt(L_d);
2824                                         break;
2825                                 }
2826
2827                                 syntax_error(EMSG_NO_MATH);
2828                                 break;
2829
2830                         case F_sr:
2831                                 R_d = (double)seed;
2832                                 seed = op1 ? (unsigned)L_d : (unsigned)time(NULL);
2833                                 srand(seed);
2834                                 break;
2835
2836                         case F_ti:
2837                                 R_d = time(NULL);
2838                                 break;
2839
2840                         case F_le:
2841                                 debug_printf_eval("length: L.s:'%s'\n", L.s);
2842                                 if (!op1) {
2843                                         L.s = getvar_s(intvar[F0]);
2844                                         debug_printf_eval("length: L.s='%s'\n", L.s);
2845                                 }
2846                                 else if (L.v->type & VF_ARRAY) {
2847                                         R_d = L.v->x.array->nel;
2848                                         debug_printf_eval("length: array_len:%d\n", L.v->x.array->nel);
2849                                         break;
2850                                 }
2851                                 R_d = strlen(L.s);
2852                                 break;
2853
2854                         case F_sy:
2855                                 fflush_all();
2856                                 R_d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2857                                                 ? (system(L.s) >> 8) : 0;
2858                                 break;
2859
2860                         case F_ff:
2861                                 if (!op1) {
2862                                         fflush(stdout);
2863                                 } else if (L.s && *L.s) {
2864                                         rstream *rsm = newfile(L.s);
2865                                         fflush(rsm->F);
2866                                 } else {
2867                                         fflush_all();
2868                                 }
2869                                 break;
2870
2871                         case F_cl: {
2872                                 rstream *rsm;
2873                                 int err = 0;
2874                                 rsm = (rstream *)hash_search(fdhash, L.s);
2875                                 debug_printf_eval("OC_FBLTIN F_cl rsm:%p\n", rsm);
2876                                 if (rsm) {
2877                                         debug_printf_eval("OC_FBLTIN F_cl "
2878                                                 "rsm->is_pipe:%d, ->F:%p\n",
2879                                                 rsm->is_pipe, rsm->F);
2880                                         /* Can be NULL if open failed. Example:
2881                                          * getline line <"doesnt_exist";
2882                                          * close("doesnt_exist"); <--- here rsm->F is NULL
2883                                          */
2884                                         if (rsm->F)
2885                                                 err = rsm->is_pipe ? pclose(rsm->F) : fclose(rsm->F);
2886                                         free(rsm->buffer);
2887                                         hash_remove(fdhash, L.s);
2888                                 }
2889                                 if (err)
2890                                         setvar_i(intvar[ERRNO], errno);
2891                                 R_d = (double)err;
2892                                 break;
2893                         }
2894                         } /* switch */
2895                         setvar_i(res, R_d);
2896                         break;
2897                 }
2898
2899                 case XC( OC_BUILTIN ):
2900                         res = exec_builtin(op, res);
2901                         break;
2902
2903                 case XC( OC_SPRINTF ):
2904                         setvar_p(res, awk_printf(op1));
2905                         break;
2906
2907                 case XC( OC_UNARY ): {
2908                         double Ld, R_d;
2909
2910                         Ld = R_d = getvar_i(R.v);
2911                         switch (opn) {
2912                         case 'P':
2913                                 Ld = ++R_d;
2914                                 goto r_op_change;
2915                         case 'p':
2916                                 R_d++;
2917                                 goto r_op_change;
2918                         case 'M':
2919                                 Ld = --R_d;
2920                                 goto r_op_change;
2921                         case 'm':
2922                                 R_d--;
2923  r_op_change:
2924                                 setvar_i(R.v, R_d);
2925                                 break;
2926                         case '!':
2927                                 Ld = !istrue(R.v);
2928                                 break;
2929                         case '-':
2930                                 Ld = -R_d;
2931                                 break;
2932                         }
2933                         setvar_i(res, Ld);
2934                         break;
2935                 }
2936
2937                 case XC( OC_FIELD ): {
2938                         int i = (int)getvar_i(R.v);
2939                         if (i == 0) {
2940                                 res = intvar[F0];
2941                         } else {
2942                                 split_f0();
2943                                 if (i > nfields)
2944                                         fsrealloc(i);
2945                                 res = &Fields[i - 1];
2946                         }
2947                         break;
2948                 }
2949
2950                 /* concatenation (" ") and index joining (",") */
2951                 case XC( OC_CONCAT ):
2952                 case XC( OC_COMMA ): {
2953                         const char *sep = "";
2954                         if ((opinfo & OPCLSMASK) == OC_COMMA)
2955                                 sep = getvar_s(intvar[SUBSEP]);
2956                         setvar_p(res, xasprintf("%s%s%s", L.s, sep, R.s));
2957                         break;
2958                 }
2959
2960                 case XC( OC_LAND ):
2961                         setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2962                         break;
2963
2964                 case XC( OC_LOR ):
2965                         setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2966                         break;
2967
2968                 case XC( OC_BINARY ):
2969                 case XC( OC_REPLACE ): {
2970                         double R_d = getvar_i(R.v);
2971                         debug_printf_eval("BINARY/REPLACE: R_d:%f opn:%c\n", R_d, opn);
2972                         switch (opn) {
2973                         case '+':
2974                                 L_d += R_d;
2975                                 break;
2976                         case '-':
2977                                 L_d -= R_d;
2978                                 break;
2979                         case '*':
2980                                 L_d *= R_d;
2981                                 break;
2982                         case '/':
2983                                 if (R_d == 0)
2984                                         syntax_error(EMSG_DIV_BY_ZERO);
2985                                 L_d /= R_d;
2986                                 break;
2987                         case '&':
2988                                 if (ENABLE_FEATURE_AWK_LIBM)
2989                                         L_d = pow(L_d, R_d);
2990                                 else
2991                                         syntax_error(EMSG_NO_MATH);
2992                                 break;
2993                         case '%':
2994                                 if (R_d == 0)
2995                                         syntax_error(EMSG_DIV_BY_ZERO);
2996                                 L_d -= (long long)(L_d / R_d) * R_d;
2997                                 break;
2998                         }
2999                         debug_printf_eval("BINARY/REPLACE result:%f\n", L_d);
3000                         res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : L.v, L_d);
3001                         break;
3002                 }
3003
3004                 case XC( OC_COMPARE ): {
3005                         int i = i; /* for compiler */
3006                         double Ld;
3007
3008                         if (is_numeric(L.v) && is_numeric(R.v)) {
3009                                 Ld = getvar_i(L.v) - getvar_i(R.v);
3010                         } else {
3011                                 const char *l = getvar_s(L.v);
3012                                 const char *r = getvar_s(R.v);
3013                                 Ld = icase ? strcasecmp(l, r) : strcmp(l, r);
3014                         }
3015                         switch (opn & 0xfe) {
3016                         case 0:
3017                                 i = (Ld > 0);
3018                                 break;
3019                         case 2:
3020                                 i = (Ld >= 0);
3021                                 break;
3022                         case 4:
3023                                 i = (Ld == 0);
3024                                 break;
3025                         }
3026                         setvar_i(res, (i == 0) ^ (opn & 1));
3027                         break;
3028                 }
3029
3030                 default:
3031                         syntax_error(EMSG_POSSIBLE_ERROR);
3032                 }
3033                 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
3034                         op = op->a.n;
3035                 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
3036                         break;
3037                 if (nextrec)
3038                         break;
3039         } /* while (op) */
3040
3041         nvfree(v1);
3042         debug_printf_eval("returning from %s(): %p\n", __func__, res);
3043         return res;
3044 #undef fnargs
3045 #undef seed
3046 #undef sreg
3047 }
3048
3049
3050 /* -------- main & co. -------- */
3051
3052 static int awk_exit(int r)
3053 {
3054         var tv;
3055         unsigned i;
3056         hash_item *hi;
3057
3058         zero_out_var(&tv);
3059
3060         if (!exiting) {
3061                 exiting = TRUE;
3062                 nextrec = FALSE;
3063                 evaluate(endseq.first, &tv);
3064         }
3065
3066         /* waiting for children */
3067         for (i = 0; i < fdhash->csize; i++) {
3068                 hi = fdhash->items[i];
3069                 while (hi) {
3070                         if (hi->data.rs.F && hi->data.rs.is_pipe)
3071                                 pclose(hi->data.rs.F);
3072                         hi = hi->next;
3073                 }
3074         }
3075
3076         exit(r);
3077 }
3078
3079 /* if expr looks like "var=value", perform assignment and return 1,
3080  * otherwise return 0 */
3081 static int is_assignment(const char *expr)
3082 {
3083         char *exprc, *val;
3084
3085         if (!isalnum_(*expr) || (val = strchr(expr, '=')) == NULL) {
3086                 return FALSE;
3087         }
3088
3089         exprc = xstrdup(expr);
3090         val = exprc + (val - expr);
3091         *val++ = '\0';
3092
3093         unescape_string_in_place(val);
3094         setvar_u(newvar(exprc), val);
3095         free(exprc);
3096         return TRUE;
3097 }
3098
3099 /* switch to next input file */
3100 static rstream *next_input_file(void)
3101 {
3102 #define rsm          (G.next_input_file__rsm)
3103 #define files_happen (G.next_input_file__files_happen)
3104
3105         FILE *F;
3106         const char *fname, *ind;
3107
3108         if (rsm.F)
3109                 fclose(rsm.F);
3110         rsm.F = NULL;
3111         rsm.pos = rsm.adv = 0;
3112
3113         for (;;) {
3114                 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
3115                         if (files_happen)
3116                                 return NULL;
3117                         fname = "-";
3118                         F = stdin;
3119                         break;
3120                 }
3121                 ind = getvar_s(incvar(intvar[ARGIND]));
3122                 fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
3123                 if (fname && *fname && !is_assignment(fname)) {
3124                         F = xfopen_stdin(fname);
3125                         break;
3126                 }
3127         }
3128
3129         files_happen = TRUE;
3130         setvar_s(intvar[FILENAME], fname);
3131         rsm.F = F;
3132         return &rsm;
3133 #undef rsm
3134 #undef files_happen
3135 }
3136
3137 int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
3138 int awk_main(int argc, char **argv)
3139 {
3140         unsigned opt;
3141         char *opt_F;
3142         llist_t *list_v = NULL;
3143         llist_t *list_f = NULL;
3144 #if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
3145         llist_t *list_e = NULL;
3146 #endif
3147         int i, j;
3148         var *v;
3149         var tv;
3150         char **envp;
3151         char *vnames = (char *)vNames; /* cheat */
3152         char *vvalues = (char *)vValues;
3153
3154         INIT_G();
3155
3156         /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
3157          * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
3158         if (ENABLE_LOCALE_SUPPORT)
3159                 setlocale(LC_NUMERIC, "C");
3160
3161         zero_out_var(&tv);
3162
3163         /* allocate global buffer */
3164         g_buf = xmalloc(MAXVARFMT + 1);
3165
3166         vhash = hash_init();
3167         ahash = hash_init();
3168         fdhash = hash_init();
3169         fnhash = hash_init();
3170
3171         /* initialize variables */
3172         for (i = 0; *vnames; i++) {
3173                 intvar[i] = v = newvar(nextword(&vnames));
3174                 if (*vvalues != '\377')
3175                         setvar_s(v, nextword(&vvalues));
3176                 else
3177                         setvar_i(v, 0);
3178
3179                 if (*vnames == '*') {
3180                         v->type |= VF_SPECIAL;
3181                         vnames++;
3182                 }
3183         }
3184
3185         handle_special(intvar[FS]);
3186         handle_special(intvar[RS]);
3187
3188         newfile("/dev/stdin")->F = stdin;
3189         newfile("/dev/stdout")->F = stdout;
3190         newfile("/dev/stderr")->F = stderr;
3191
3192         /* Huh, people report that sometimes environ is NULL. Oh well. */
3193         if (environ) for (envp = environ; *envp; envp++) {
3194                 /* environ is writable, thus we don't strdup it needlessly */
3195                 char *s = *envp;
3196                 char *s1 = strchr(s, '=');
3197                 if (s1) {
3198                         *s1 = '\0';
3199                         /* Both findvar and setvar_u take const char*
3200                          * as 2nd arg -> environment is not trashed */
3201                         setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
3202                         *s1 = '=';
3203                 }
3204         }
3205         opt_complementary = OPTCOMPLSTR_AWK;
3206         opt = getopt32(argv, OPTSTR_AWK, &opt_F, &list_v, &list_f, IF_FEATURE_AWK_GNU_EXTENSIONS(&list_e,) NULL);
3207         argv += optind;
3208         argc -= optind;
3209         if (opt & OPT_W)
3210                 bb_error_msg("warning: option -W is ignored");
3211         if (opt & OPT_F) {
3212                 unescape_string_in_place(opt_F);
3213                 setvar_s(intvar[FS], opt_F);
3214         }
3215         while (list_v) {
3216                 if (!is_assignment(llist_pop(&list_v)))
3217                         bb_show_usage();
3218         }
3219         while (list_f) {
3220                 char *s = NULL;
3221                 FILE *from_file;
3222
3223                 g_progname = llist_pop(&list_f);
3224                 from_file = xfopen_stdin(g_progname);
3225                 /* one byte is reserved for some trick in next_token */
3226                 for (i = j = 1; j > 0; i += j) {
3227                         s = xrealloc(s, i + 4096);
3228                         j = fread(s + i, 1, 4094, from_file);
3229                 }
3230                 s[i] = '\0';
3231                 fclose(from_file);
3232                 parse_program(s + 1);
3233                 free(s);
3234         }
3235         g_progname = "cmd. line";
3236 #if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
3237         while (list_e) {
3238                 parse_program(llist_pop(&list_e));
3239         }
3240 #endif
3241         if (!(opt & (OPT_f | OPT_e))) {
3242                 if (!*argv)
3243                         bb_show_usage();
3244                 parse_program(*argv++);
3245                 argc--;
3246         }
3247
3248         /* fill in ARGV array */
3249         setvar_i(intvar[ARGC], argc + 1);
3250         setari_u(intvar[ARGV], 0, "awk");
3251         i = 0;
3252         while (*argv)
3253                 setari_u(intvar[ARGV], ++i, *argv++);
3254
3255         evaluate(beginseq.first, &tv);
3256         if (!mainseq.first && !endseq.first)
3257                 awk_exit(EXIT_SUCCESS);
3258
3259         /* input file could already be opened in BEGIN block */
3260         if (!iF)
3261                 iF = next_input_file();
3262
3263         /* passing through input files */
3264         while (iF) {
3265                 nextfile = FALSE;
3266                 setvar_i(intvar[FNR], 0);
3267
3268                 while ((i = awk_getline(iF, intvar[F0])) > 0) {
3269                         nextrec = FALSE;
3270                         incvar(intvar[NR]);
3271                         incvar(intvar[FNR]);
3272                         evaluate(mainseq.first, &tv);
3273
3274                         if (nextfile)
3275                                 break;
3276                 }
3277
3278                 if (i < 0)
3279                         syntax_error(strerror(errno));
3280
3281                 iF = next_input_file();
3282         }
3283
3284         awk_exit(EXIT_SUCCESS);
3285         /*return 0;*/
3286 }