X-Git-Url: https://git.librecmc.org/?a=blobdiff_plain;f=editors%2Fawk.c;h=685e8bed83fab87ffa603f2e740aec7ce6e8dde9;hb=b130f9f758b6404c6d0911a1c120937ae6ab47f8;hp=2d6773b65172c563112e5978b1de028ac720e433;hpb=d9b5ab868c284fdb38806867ff4faddd6f29eb36;p=oweals%2Fbusybox.git diff --git a/editors/awk.c b/editors/awk.c index 2d6773b65..685e8bed8 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -4,17 +4,90 @@ * * Copyright (C) 2002 by Dmitry Zakharov * - * Licensed under the GPL v2 or later, see the file LICENSE in this tarball. + * Licensed under GPLv2 or later, see file LICENSE in this source tree. */ -#include "busybox.h" +//config:config AWK +//config: bool "awk" +//config: default y +//config: help +//config: Awk is used as a pattern scanning and processing language. This is +//config: the BusyBox implementation of that programming language. +//config: +//config:config FEATURE_AWK_LIBM +//config: bool "Enable math functions (requires libm)" +//config: default y +//config: depends on AWK +//config: help +//config: Enable math functions of the Awk programming language. +//config: NOTE: This will require libm to be present for linking. +//config: +//config:config FEATURE_AWK_GNU_EXTENSIONS +//config: bool "Enable a few GNU extensions" +//config: default y +//config: depends on AWK +//config: help +//config: Enable a few features from gawk: +//config: * command line option -e AWK_PROGRAM +//config: * simultaneous use of -f and -e on the command line. +//config: This enables the use of awk library files. +//config: Ex: awk -f mylib.awk -e '{print myfunction($1);}' ... + +//applet:IF_AWK(APPLET_NOEXEC(awk, awk, BB_DIR_USR_BIN, BB_SUID_DROP, awk)) + +//kbuild:lib-$(CONFIG_AWK) += awk.o + +//usage:#define awk_trivial_usage +//usage: "[OPTIONS] [AWK_PROGRAM] [FILE]..." +//usage:#define awk_full_usage "\n\n" +//usage: " -v VAR=VAL Set variable" +//usage: "\n -F SEP Use SEP as field separator" +//usage: "\n -f FILE Read program from FILE" +//usage: IF_FEATURE_AWK_GNU_EXTENSIONS( +//usage: "\n -e AWK_PROGRAM" +//usage: ) + +#include "libbb.h" #include "xregex.h" #include -extern char **environ; /* This is a NOEXEC applet. Be very careful! */ +/* If you comment out one of these below, it will be #defined later + * to perform debug printfs to stderr: */ +#define debug_printf_walker(...) do {} while (0) +#define debug_printf_eval(...) do {} while (0) +#define debug_printf_parse(...) do {} while (0) + +#ifndef debug_printf_walker +# define debug_printf_walker(...) (fprintf(stderr, __VA_ARGS__)) +#endif +#ifndef debug_printf_eval +# define debug_printf_eval(...) (fprintf(stderr, __VA_ARGS__)) +#endif +#ifndef debug_printf_parse +# define debug_printf_parse(...) (fprintf(stderr, __VA_ARGS__)) +#endif + + +#define OPTSTR_AWK \ + "F:v:*f:*" \ + IF_FEATURE_AWK_GNU_EXTENSIONS("e:*") \ + "W:" +enum { + OPTBIT_F, /* define field separator */ + OPTBIT_v, /* define variable */ + OPTBIT_f, /* pull in awk program from file */ + IF_FEATURE_AWK_GNU_EXTENSIONS(OPTBIT_e,) /* -e AWK_PROGRAM */ + OPTBIT_W, /* -W ignored */ + OPT_F = 1 << OPTBIT_F, + OPT_v = 1 << OPTBIT_v, + OPT_f = 1 << OPTBIT_f, + OPT_e = IF_FEATURE_AWK_GNU_EXTENSIONS((1 << OPTBIT_e)) + 0, + OPT_W = 1 << OPTBIT_W +}; + #define MAXVARFMT 240 #define MINNVBLOCK 64 @@ -33,16 +106,23 @@ extern char **environ; /* these flags are static, don't change them when value is changed */ #define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY) +typedef struct walker_list { + char *end; + char *cur; + struct walker_list *prev; + char wbuf[1]; +} walker_list; + /* Variable */ typedef struct var_s { - unsigned short type; /* flags */ + unsigned type; /* flags */ double number; char *string; union { int aidx; /* func arg idx (for compilation stage) */ struct xhash_s *array; /* array ptr */ struct var_s *parent; /* for func args, ptr to actual parameter */ - char **walker; /* list of array elements (for..in) */ + walker_list *walker; /* list of array elements (for..in) */ } x; } var; @@ -55,7 +135,7 @@ typedef struct chain_s { /* Function */ typedef struct func_s { - unsigned short nargs; + unsigned nargs; struct chain_s body; } func; @@ -66,7 +146,7 @@ typedef struct rstream_s { int adv; int size; int pos; - unsigned short is_pipe; + smallint is_pipe; } rstream; typedef struct hash_item_s { @@ -94,15 +174,14 @@ typedef struct node_s { union { struct node_s *n; var *v; - int i; - char *s; + int aidx; + char *new_progname; regex_t *re; } l; union { struct node_s *n; regex_t *ire; func *f; - int argno; } r; union { struct node_s *n; @@ -115,7 +194,7 @@ typedef struct nvblock_s { var *pos; struct nvblock_s *prev; struct nvblock_s *next; - var nv[0]; + var nv[]; } nvblock; typedef struct tsplitter_s { @@ -125,7 +204,7 @@ typedef struct tsplitter_s { /* simple token classes */ /* Order and hex values are very important!!! See next_token() */ -#define TC_SEQSTART 1 /* ( */ +#define TC_SEQSTART (1 << 0) /* ( */ #define TC_SEQTERM (1 << 1) /* ) */ #define TC_REGEXP (1 << 2) /* /.../ */ #define TC_OUTRDR (1 << 3) /* | > >> */ @@ -145,31 +224,39 @@ typedef struct tsplitter_s { #define TC_WHILE (1 << 17) #define TC_ELSE (1 << 18) #define TC_BUILTIN (1 << 19) -#define TC_GETLINE (1 << 20) -#define TC_FUNCDECL (1 << 21) /* `function' `func' */ -#define TC_BEGIN (1 << 22) -#define TC_END (1 << 23) -#define TC_EOF (1 << 24) -#define TC_VARIABLE (1 << 25) -#define TC_ARRAY (1 << 26) -#define TC_FUNCTION (1 << 27) -#define TC_STRING (1 << 28) -#define TC_NUMBER (1 << 29) +/* This costs ~50 bytes of code. + * A separate class to support deprecated "length" form. If we don't need that + * (i.e. if we demand that only "length()" with () is valid), then TC_LENGTH + * can be merged with TC_BUILTIN: + */ +#define TC_LENGTH (1 << 20) +#define TC_GETLINE (1 << 21) +#define TC_FUNCDECL (1 << 22) /* `function' `func' */ +#define TC_BEGIN (1 << 23) +#define TC_END (1 << 24) +#define TC_EOF (1 << 25) +#define TC_VARIABLE (1 << 26) +#define TC_ARRAY (1 << 27) +#define TC_FUNCTION (1 << 28) +#define TC_STRING (1 << 29) +#define TC_NUMBER (1 << 30) #define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2) /* combined token classes */ #define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN) -#define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST) +//#define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST) #define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \ - | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER) + | TC_BUILTIN | TC_LENGTH | TC_GETLINE \ + | TC_SEQSTART | TC_STRING | TC_NUMBER) #define TC_STATEMNT (TC_STATX | TC_WHILE) #define TC_OPTERM (TC_SEMICOL | TC_NEWLINE) /* word tokens, cannot mean something else if not expected */ -#define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \ - | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END) +#define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE \ + | TC_BUILTIN | TC_LENGTH | TC_GETLINE \ + | TC_FUNCDECL | TC_BEGIN | TC_END) /* discard newlines after these */ #define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM \ @@ -212,6 +299,9 @@ typedef struct tsplitter_s { * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1, * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string */ +#undef P +#undef PRIMASK +#undef PRIMASK2 #define P(x) (x << 24) #define PRIMASK 0x7F000000 #define PRIMASK2 0x7E000000 @@ -251,115 +341,107 @@ enum { /* builtins */ enum { - B_a2, B_ix, B_ma, B_sp, B_ss, B_ti, B_lo, B_up, + B_a2, B_ix, B_ma, B_sp, B_ss, B_ti, B_mt, B_lo, B_up, B_ge, B_gs, B_su, B_an, B_co, B_ls, B_or, B_rs, B_xo, }; /* tokens and their corresponding info values */ -#define NTC "\377" /* switch to next token class (tc<<1) */ -#define NTCC '\377' - -#define OC_B OC_BUILTIN - -static const char tokenlist[] = - "\1(" NTC - "\1)" NTC - "\1/" NTC /* REGEXP */ - "\2>>" "\1>" "\1|" NTC /* OUTRDR */ - "\2++" "\2--" NTC /* UOPPOST */ - "\2++" "\2--" "\1$" NTC /* UOPPRE1 */ - "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */ - "\2*=" "\2/=" "\2%=" "\2^=" - "\1+" "\1-" "\3**=" "\2**" - "\1/" "\1%" "\1^" "\1*" - "\2!=" "\2>=" "\2<=" "\1>" - "\1<" "\2!~" "\1~" "\2&&" - "\2||" "\1?" "\1:" NTC - "\2in" NTC - "\1," NTC - "\1|" NTC - "\1+" "\1-" "\1!" NTC /* UOPPRE2 */ - "\1]" NTC - "\1{" NTC - "\1}" NTC - "\1;" NTC - "\1\n" NTC - "\2if" "\2do" "\3for" "\5break" /* STATX */ - "\10continue" "\6delete" "\5print" - "\6printf" "\4next" "\10nextfile" - "\6return" "\4exit" NTC - "\5while" NTC - "\4else" NTC - - "\3and" "\5compl" "\6lshift" "\2or" - "\6rshift" "\3xor" - "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */ - "\3cos" "\3exp" "\3int" "\3log" - "\4rand" "\3sin" "\4sqrt" "\5srand" - "\6gensub" "\4gsub" "\5index" "\6length" - "\5match" "\5split" "\7sprintf" "\3sub" - "\6substr" "\7systime" "\10strftime" - "\7tolower" "\7toupper" NTC - "\7getline" NTC - "\4func" "\10function" NTC - "\5BEGIN" NTC - "\3END" "\0" +#define NTC "\377" /* switch to next token class (tc<<1) */ +#define NTCC '\377' + +static const char tokenlist[] ALIGN1 = + "\1(" NTC /* TC_SEQSTART */ + "\1)" NTC /* TC_SEQTERM */ + "\1/" NTC /* TC_REGEXP */ + "\2>>" "\1>" "\1|" NTC /* TC_OUTRDR */ + "\2++" "\2--" NTC /* TC_UOPPOST */ + "\2++" "\2--" "\1$" NTC /* TC_UOPPRE1 */ + "\2==" "\1=" "\2+=" "\2-=" /* TC_BINOPX */ + "\2*=" "\2/=" "\2%=" "\2^=" + "\1+" "\1-" "\3**=" "\2**" + "\1/" "\1%" "\1^" "\1*" + "\2!=" "\2>=" "\2<=" "\1>" + "\1<" "\2!~" "\1~" "\2&&" + "\2||" "\1?" "\1:" NTC + "\2in" NTC /* TC_IN */ + "\1," NTC /* TC_COMMA */ + "\1|" NTC /* TC_PIPE */ + "\1+" "\1-" "\1!" NTC /* TC_UOPPRE2 */ + "\1]" NTC /* TC_ARRTERM */ + "\1{" NTC /* TC_GRPSTART */ + "\1}" NTC /* TC_GRPTERM */ + "\1;" NTC /* TC_SEMICOL */ + "\1\n" NTC /* TC_NEWLINE */ + "\2if" "\2do" "\3for" "\5break" /* TC_STATX */ + "\10continue" "\6delete" "\5print" + "\6printf" "\4next" "\10nextfile" + "\6return" "\4exit" NTC + "\5while" NTC /* TC_WHILE */ + "\4else" NTC /* TC_ELSE */ + "\3and" "\5compl" "\6lshift" "\2or" /* TC_BUILTIN */ + "\6rshift" "\3xor" + "\5close" "\6system" "\6fflush" "\5atan2" + "\3cos" "\3exp" "\3int" "\3log" + "\4rand" "\3sin" "\4sqrt" "\5srand" + "\6gensub" "\4gsub" "\5index" /* "\6length" was here */ + "\5match" "\5split" "\7sprintf" "\3sub" + "\6substr" "\7systime" "\10strftime" "\6mktime" + "\7tolower" "\7toupper" NTC + "\6length" NTC /* TC_LENGTH */ + "\7getline" NTC /* TC_GETLINE */ + "\4func" "\10function" NTC /* TC_FUNCDECL */ + "\5BEGIN" NTC /* TC_BEGIN */ + "\3END" /* TC_END */ + /* compiler adds trailing "\0" */ ; +#define OC_B OC_BUILTIN + static const uint32_t tokeninfo[] = { 0, 0, OC_REGEXP, - xS|'a', xS|'w', xS|'|', - OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m', - OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M', - OC_FIELD|xV|P(5), - OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74), - OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-', - OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/', - OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&', - OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-', - OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&', - OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%', - OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*', - OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3, - OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1, - OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!', - OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55), - OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?', - OC_COLON|xx|P(67)|':', - OC_IN|SV|P(49), + xS|'a', xS|'w', xS|'|', + OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m', + OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M', OC_FIELD|xV|P(5), + OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74), OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-', + OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/', OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&', + OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-', OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&', + OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%', OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*', + OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3, OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1, + OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!', OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55), + OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?', OC_COLON|xx|P(67)|':', + OC_IN|SV|P(49), /* TC_IN */ OC_COMMA|SS|P(80), OC_PGETLINE|SV|P(37), - OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-', - OC_UNARY|xV|P(19)|'!', + OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-', OC_UNARY|xV|P(19)|'!', + 0, /* ] */ 0, 0, 0, - 0, - 0, - ST_IF, ST_DO, ST_FOR, OC_BREAK, - OC_CONTINUE, OC_DELETE|Vx, OC_PRINT, - OC_PRINTF, OC_NEXT, OC_NEXTFILE, - OC_RETURN|Vx, OC_EXIT|Nx, + 0, /* \n */ + ST_IF, ST_DO, ST_FOR, OC_BREAK, + OC_CONTINUE, OC_DELETE|Vx, OC_PRINT, + OC_PRINTF, OC_NEXT, OC_NEXTFILE, + OC_RETURN|Vx, OC_EXIT|Nx, ST_WHILE, - 0, - + 0, /* else */ OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83), OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83), OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83), OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg, OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr, - OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le, + OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), /* OC_FBLTIN|Sx|F_le, was here */ OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6), - OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b), + OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b), OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49), + OC_FBLTIN|Sx|F_le, /* TC_LENGTH */ OC_GETLINE|SV|P(0), - 0, 0, + 0, 0, 0, - 0 + 0 /* TC_END */ }; /* internal variable names and their initial values */ @@ -367,61 +449,131 @@ static const uint32_t tokeninfo[] = { enum { CONVFMT, OFMT, FS, OFS, ORS, RS, RT, FILENAME, - SUBSEP, ARGIND, ARGC, ARGV, - ERRNO, FNR, - NR, NF, IGNORECASE, - ENVIRON, F0, NUM_INTERNAL_VARS + SUBSEP, F0, ARGIND, ARGC, + ARGV, ERRNO, FNR, NR, + NF, IGNORECASE, ENVIRON, NUM_INTERNAL_VARS }; -static const char vNames[] = +static const char vNames[] ALIGN1 = "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0" "ORS\0" "RS\0*" "RT\0" "FILENAME\0" - "SUBSEP\0" "ARGIND\0" "ARGC\0" "ARGV\0" - "ERRNO\0" "FNR\0" - "NR\0" "NF\0*" "IGNORECASE\0*" - "ENVIRON\0" "$\0*" "\0"; + "SUBSEP\0" "$\0*" "ARGIND\0" "ARGC\0" + "ARGV\0" "ERRNO\0" "FNR\0" "NR\0" + "NF\0*" "IGNORECASE\0*" "ENVIRON\0" "\0"; -static const char vValues[] = +static const char vValues[] ALIGN1 = "%.6g\0" "%.6g\0" " \0" " \0" "\n\0" "\n\0" "\0" "\0" - "\034\0" - "\377"; + "\034\0" "\0" "\377"; /* hash size may grow to these values */ -#define FIRST_PRIME 61; -static const unsigned PRIMES[] = { 251, 1021, 4093, 16381, 65521 }; -enum { NPRIMES = sizeof(PRIMES) / sizeof(PRIMES[0]) }; - -/* globals */ - -static var *intvar[NUM_INTERNAL_VARS]; -static chain beginseq, mainseq, endseq, *seq; -static int nextrec, nextfile; -static node *break_ptr, *continue_ptr; -static rstream *iF; -static xhash *vhash, *ahash, *fdhash, *fnhash; -static const char *programname; -static int lineno; -static int is_f0_split; -static int nfields; -static var *Fields; -static tsplitter fsplitter, rsplitter; -static nvblock *cb; -static char *pos; -static char *buf; -static int icase; -static int exiting; - -static struct { - uint32_t tclass; - uint32_t info; - char *string; - double number; - int lineno; - int rollback; -} ttt; -/* It had even better name: 't'. Whoever knows what is it, please rename! */ -/* (actually it looks like unrelated stuff lumped together...) */ +#define FIRST_PRIME 61 +static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 }; + + +/* Globals. Split in two parts so that first one is addressed + * with (mostly short) negative offsets. + * NB: it's unsafe to put members of type "double" + * into globals2 (gcc may fail to align them). + */ +struct globals { + double t_double; + chain beginseq, mainseq, endseq; + chain *seq; + node *break_ptr, *continue_ptr; + rstream *iF; + xhash *vhash, *ahash, *fdhash, *fnhash; + const char *g_progname; + int g_lineno; + int nfields; + int maxfields; /* used in fsrealloc() only */ + var *Fields; + nvblock *g_cb; + char *g_pos; + char *g_buf; + smallint icase; + smallint exiting; + smallint nextrec; + smallint nextfile; + smallint is_f0_split; + smallint t_rollback; +}; +struct globals2 { + uint32_t t_info; /* often used */ + uint32_t t_tclass; + char *t_string; + int t_lineno; + + var *intvar[NUM_INTERNAL_VARS]; /* often used */ + + /* former statics from various functions */ + char *split_f0__fstrings; + + uint32_t next_token__save_tclass; + uint32_t next_token__save_info; + uint32_t next_token__ltclass; + smallint next_token__concat_inserted; + + smallint next_input_file__files_happen; + rstream next_input_file__rsm; + + var *evaluate__fnargs; + unsigned evaluate__seed; + regex_t evaluate__sreg; + + var ptest__v; + + tsplitter exec_builtin__tspl; + + /* biggest and least used members go last */ + tsplitter fsplitter, rsplitter; +}; +#define G1 (ptr_to_globals[-1]) +#define G (*(struct globals2 *)ptr_to_globals) +/* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */ +/*char G1size[sizeof(G1)]; - 0x74 */ +/*char Gsize[sizeof(G)]; - 0x1c4 */ +/* Trying to keep most of members accessible with short offsets: */ +/*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */ +#define t_double (G1.t_double ) +#define beginseq (G1.beginseq ) +#define mainseq (G1.mainseq ) +#define endseq (G1.endseq ) +#define seq (G1.seq ) +#define break_ptr (G1.break_ptr ) +#define continue_ptr (G1.continue_ptr) +#define iF (G1.iF ) +#define vhash (G1.vhash ) +#define ahash (G1.ahash ) +#define fdhash (G1.fdhash ) +#define fnhash (G1.fnhash ) +#define g_progname (G1.g_progname ) +#define g_lineno (G1.g_lineno ) +#define nfields (G1.nfields ) +#define maxfields (G1.maxfields ) +#define Fields (G1.Fields ) +#define g_cb (G1.g_cb ) +#define g_pos (G1.g_pos ) +#define g_buf (G1.g_buf ) +#define icase (G1.icase ) +#define exiting (G1.exiting ) +#define nextrec (G1.nextrec ) +#define nextfile (G1.nextfile ) +#define is_f0_split (G1.is_f0_split ) +#define t_rollback (G1.t_rollback ) +#define t_info (G.t_info ) +#define t_tclass (G.t_tclass ) +#define t_string (G.t_string ) +#define t_lineno (G.t_lineno ) +#define intvar (G.intvar ) +#define fsplitter (G.fsplitter ) +#define rsplitter (G.rsplitter ) +#define INIT_G() do { \ + SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \ + G.next_token__ltclass = TC_OPTERM; \ + G.evaluate__seed = 1; \ +} while (0) + /* function prototypes */ static void handle_special(var *); @@ -430,44 +582,40 @@ static void chain_group(void); static var *evaluate(node *, var *); static rstream *next_input_file(void); static int fmt_num(char *, int, const char *, double, int); -static int awk_exit(int) ATTRIBUTE_NORETURN; +static int awk_exit(int) NORETURN; /* ---- error handling ---- */ -static const char EMSG_INTERNAL_ERROR[] = "Internal error"; -static const char EMSG_UNEXP_EOS[] = "Unexpected end of string"; -static const char EMSG_UNEXP_TOKEN[] = "Unexpected token"; -static const char EMSG_DIV_BY_ZERO[] = "Division by zero"; -static const char EMSG_INV_FMT[] = "Invalid format specifier"; -static const char EMSG_TOO_FEW_ARGS[] = "Too few arguments for builtin"; -static const char EMSG_NOT_ARRAY[] = "Not an array"; -static const char EMSG_POSSIBLE_ERROR[] = "Possible syntax error"; -static const char EMSG_UNDEF_FUNC[] = "Call to undefined function"; -#if !ENABLE_FEATURE_AWK_MATH -static const char EMSG_NO_MATH[] = "Math support is not compiled in"; -#endif - -static void zero_out_var(var * vp) +static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error"; +static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string"; +static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token"; +static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero"; +static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier"; +static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin"; +static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array"; +static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error"; +static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function"; +static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in"; + +static void zero_out_var(var *vp) { memset(vp, 0, sizeof(*vp)); } -static void syntax_error(const char * const message) ATTRIBUTE_NORETURN; -static void syntax_error(const char * const message) +static void syntax_error(const char *message) NORETURN; +static void syntax_error(const char *message) { - bb_error_msg_and_die("%s:%i: %s", programname, lineno, message); + bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message); } -#define runtime_error(x) syntax_error(x) - - /* ---- hash stuff ---- */ static unsigned hashidx(const char *name) { unsigned idx = 0; - while (*name) idx = *name++ + (idx << 6) - idx; + while (*name) + idx = *name++ + (idx << 6) - idx; return idx; } @@ -476,9 +624,9 @@ static xhash *hash_init(void) { xhash *newhash; - newhash = xzalloc(sizeof(xhash)); + newhash = xzalloc(sizeof(*newhash)); newhash->csize = FIRST_PRIME; - newhash->items = xzalloc(newhash->csize * sizeof(hash_item *)); + newhash->items = xzalloc(FIRST_PRIME * sizeof(newhash->items[0])); return newhash; } @@ -488,10 +636,10 @@ static void *hash_search(xhash *hash, const char *name) { hash_item *hi; - hi = hash->items [ hashidx(name) % hash->csize ]; + hi = hash->items[hashidx(name) % hash->csize]; while (hi) { if (strcmp(hi->name, name) == 0) - return &(hi->data); + return &hi->data; hi = hi->next; } return NULL; @@ -503,11 +651,11 @@ static void hash_rebuild(xhash *hash) unsigned newsize, i, idx; hash_item **newitems, *hi, *thi; - if (hash->nprime == NPRIMES) + if (hash->nprime == ARRAY_SIZE(PRIMES)) return; newsize = PRIMES[hash->nprime++]; - newitems = xzalloc(newsize * sizeof(hash_item *)); + newitems = xzalloc(newsize * sizeof(newitems[0])); for (i = 0; i < hash->csize; i++) { hi = hash->items[i]; @@ -533,20 +681,20 @@ static void *hash_find(xhash *hash, const char *name) int l; hi = hash_search(hash, name); - if (! hi) { + if (!hi) { if (++hash->nel / hash->csize > 10) hash_rebuild(hash); l = strlen(name) + 1; - hi = xzalloc(sizeof(hash_item) + l); - memcpy(hi->name, name, l); + hi = xzalloc(sizeof(*hi) + l); + strcpy(hi->name, name); idx = hashidx(name) % hash->csize; hi->next = hash->items[idx]; hash->items[idx] = hi; hash->glen += l; } - return &(hi->data); + return &hi->data; } #define findvar(hash, name) ((var*) hash_find((hash), (name))) @@ -558,7 +706,7 @@ static void hash_remove(xhash *hash, const char *name) { hash_item *hi, **phi; - phi = &(hash->items[hashidx(name) % hash->csize]); + phi = &hash->items[hashidx(name) % hash->csize]; while (*phi) { hi = *phi; if (strcmp(hi->name, name) == 0) { @@ -568,34 +716,32 @@ static void hash_remove(xhash *hash, const char *name) free(hi); break; } - phi = &(hi->next); + phi = &hi->next; } } /* ------ some useful functions ------ */ -static void skip_spaces(char **s) +static char *skip_spaces(char *p) { - char *p = *s; - while (1) { if (*p == '\\' && p[1] == '\n') { p++; - ttt.lineno++; + t_lineno++; } else if (*p != ' ' && *p != '\t') { break; } p++; } - *s = p; + return p; } +/* returns old *s, advances *s past word and terminating NUL */ static char *nextword(char **s) { char *p = *s; - - while (*(*s)++) /* */; - + while (*(*s)++ != '\0') + continue; return p; } @@ -603,21 +749,57 @@ static char nextchar(char **s) { char c, *pps; - c = *((*s)++); + c = *(*s)++; pps = *s; - if (c == '\\') c = bb_process_escape_sequence((const char**)s); - if (c == '\\' && *s == pps) c = *((*s)++); + if (c == '\\') + c = bb_process_escape_sequence((const char**)s); + /* Example awk statement: + * s = "abc\"def" + * we must treat \" as " + */ + if (c == '\\' && *s == pps) { /* unrecognized \z? */ + c = *(*s); /* yes, fetch z */ + if (c) + (*s)++; /* advance unless z = NUL */ + } return c; } -static int ATTRIBUTE_ALWAYS_INLINE isalnum_(int c) +/* TODO: merge with strcpy_and_process_escape_sequences()? + */ +static void unescape_string_in_place(char *s1) +{ + char *s = s1; + while ((*s1 = nextchar(&s)) != '\0') + s1++; +} + +static ALWAYS_INLINE int isalnum_(int c) { return (isalnum(c) || c == '_'); } -static FILE *afopen(const char *path, const char *mode) +static double my_strtod(char **pp) { - return (*path == '-' && *(path+1) == '\0') ? stdin : xfopen(path, mode); + char *cp = *pp; + if (ENABLE_DESKTOP && cp[0] == '0') { + /* Might be hex or octal integer: 0x123abc or 07777 */ + char c = (cp[1] | 0x20); + if (c == 'x' || isdigit(cp[1])) { + unsigned long long ull = strtoull(cp, pp, 0); + if (c == 'x') + return ull; + c = **pp; + if (!isdigit(c) && c != '.') + return ull; + /* else: it may be a floating number. Examples: + * 009.123 (*pp points to '9') + * 000.123 (*pp points to '.') + * fall through to strtod. + */ + } + } + return strtod(cp, pp); } /* -------- working with variables (set/get/copy/etc) -------- */ @@ -681,10 +863,10 @@ static var *setvar_s(var *v, const char *value) return setvar_p(v, (value && *value) ? xstrdup(value) : NULL); } -/* same as setvar_s but set USER flag */ +/* same as setvar_s but sets USER flag */ static var *setvar_u(var *v, const char *value) { - setvar_s(v, value); + v = setvar_s(v, value); v->type |= VF_USER; return v; } @@ -692,11 +874,9 @@ static var *setvar_u(var *v, const char *value) /* set array element to user string */ static void setari_u(var *a, int idx, const char *s) { - char sidx[sizeof(int)*3 + 1]; var *v; - sprintf(sidx, "%d", idx); - v = findvar(iamarray(a), sidx); + v = findvar(iamarray(a), itoa(idx)); setvar_u(v, s); } @@ -714,8 +894,8 @@ static const char *getvar_s(var *v) { /* if v is numeric and has no cached string, convert it to string */ if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) { - fmt_num(buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE); - v->string = xstrdup(buf); + fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE); + v->string = xstrdup(g_buf); v->type |= VF_CACHED; } return (v->string == NULL) ? "" : v->string; @@ -729,25 +909,43 @@ static double getvar_i(var *v) v->number = 0; s = v->string; if (s && *s) { - v->number = strtod(s, &s); + debug_printf_eval("getvar_i: '%s'->", s); + v->number = my_strtod(&s); + debug_printf_eval("%f (s:'%s')\n", v->number, s); if (v->type & VF_USER) { - skip_spaces(&s); + s = skip_spaces(s); if (*s != '\0') v->type &= ~VF_USER; } } else { + debug_printf_eval("getvar_i: '%s'->zero\n", s); v->type &= ~VF_USER; } v->type |= VF_CACHED; } + debug_printf_eval("getvar_i: %f\n", v->number); return v->number; } +/* Used for operands of bitwise ops */ +static unsigned long getvar_i_int(var *v) +{ + double d = getvar_i(v); + + /* Casting doubles to longs is undefined for values outside + * of target type range. Try to widen it as much as possible */ + if (d >= 0) + return (unsigned long)d; + /* Why? Think about d == -4294967295.0 (assuming 32bit longs) */ + return - (long) (unsigned long) (-d); +} + static var *copyvar(var *dest, const var *src) { if (dest != src) { clrvar(dest); dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR)); + debug_printf_eval("copyvar: number:%f string:'%s'\n", src->number, src->string); dest->number = src->number; if (src->string) dest->string = xstrdup(src->string); @@ -758,7 +956,7 @@ static var *copyvar(var *dest, const var *src) static var *incvar(var *v) { - return setvar_i(v, getvar_i(v)+1.); + return setvar_i(v, getvar_i(v) + 1.0); } /* return true if v is number or numeric string */ @@ -772,9 +970,8 @@ static int is_numeric(var *v) static int istrue(var *v) { if (is_numeric(v)) - return (v->number == 0) ? 0 : 1; - else - return (v->string && *(v->string)) ? 1 : 0; + return (v->number != 0); + return (v->string && v->string[0]); } /* temporary variables allocator. Last allocated should be first freed */ @@ -784,26 +981,28 @@ static var *nvalloc(int n) var *v, *r; int size; - while (cb) { - pb = cb; - if ((cb->pos - cb->nv) + n <= cb->size) break; - cb = cb->next; + while (g_cb) { + pb = g_cb; + if ((g_cb->pos - g_cb->nv) + n <= g_cb->size) + break; + g_cb = g_cb->next; } - if (! cb) { + if (!g_cb) { size = (n <= MINNVBLOCK) ? MINNVBLOCK : n; - cb = xmalloc(sizeof(nvblock) + size * sizeof(var)); - cb->size = size; - cb->pos = cb->nv; - cb->prev = pb; - cb->next = NULL; - if (pb) pb->next = cb; + g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var)); + g_cb->size = size; + g_cb->pos = g_cb->nv; + g_cb->prev = pb; + /*g_cb->next = NULL; - xzalloc did it */ + if (pb) + pb->next = g_cb; } - v = r = cb->pos; - cb->pos += n; + v = r = g_cb->pos; + g_cb->pos += n; - while (v < cb->pos) { + while (v < g_cb->pos) { v->type = 0; v->string = NULL; v++; @@ -816,24 +1015,33 @@ static void nvfree(var *v) { var *p; - if (v < cb->nv || v >= cb->pos) - runtime_error(EMSG_INTERNAL_ERROR); + if (v < g_cb->nv || v >= g_cb->pos) + syntax_error(EMSG_INTERNAL_ERROR); - for (p = v; p < cb->pos; p++) { + for (p = v; p < g_cb->pos; p++) { if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) { clear_array(iamarray(p)); free(p->x.array->items); free(p->x.array); } - if (p->type & VF_WALK) - free(p->x.walker); - + if (p->type & VF_WALK) { + walker_list *n; + walker_list *w = p->x.walker; + debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker); + p->x.walker = NULL; + while (w) { + n = w->prev; + debug_printf_walker(" free(%p)\n", w); + free(w); + w = n; + } + } clrvar(p); } - cb->pos = v; - while (cb->prev && cb->pos == cb->nv) { - cb = cb->prev; + g_cb->pos = v; + while (g_cb->prev && g_cb->pos == g_cb->nv) { + g_cb = g_cb->prev; } } @@ -844,132 +1052,142 @@ static void nvfree(var *v) */ static uint32_t next_token(uint32_t expected) { - static int concat_inserted; - static uint32_t save_tclass, save_info; - static uint32_t ltclass = TC_OPTERM; +#define concat_inserted (G.next_token__concat_inserted) +#define save_tclass (G.next_token__save_tclass) +#define save_info (G.next_token__save_info) +/* Initialized to TC_OPTERM: */ +#define ltclass (G.next_token__ltclass) - char *p, *pp, *s; + char *p, *s; const char *tl; uint32_t tc; const uint32_t *ti; - int l; - - if (ttt.rollback) { - ttt.rollback = FALSE; + if (t_rollback) { + t_rollback = FALSE; } else if (concat_inserted) { concat_inserted = FALSE; - ttt.tclass = save_tclass; - ttt.info = save_info; - + t_tclass = save_tclass; + t_info = save_info; } else { - p = pos; + p = g_pos; readnext: - skip_spaces(&p); - lineno = ttt.lineno; + p = skip_spaces(p); + g_lineno = t_lineno; if (*p == '#') while (*p != '\n' && *p != '\0') p++; if (*p == '\n') - ttt.lineno++; + t_lineno++; if (*p == '\0') { tc = TC_EOF; - + debug_printf_parse("%s: token found: TC_EOF\n", __func__); } else if (*p == '\"') { /* it's a string */ - ttt.string = s = ++p; + t_string = s = ++p; while (*p != '\"') { + char *pp; if (*p == '\0' || *p == '\n') syntax_error(EMSG_UNEXP_EOS); - *(s++) = nextchar(&p); + pp = p; + *s++ = nextchar(&pp); + p = pp; } p++; *s = '\0'; tc = TC_STRING; - + debug_printf_parse("%s: token found:'%s' TC_STRING\n", __func__, t_string); } else if ((expected & TC_REGEXP) && *p == '/') { /* it's regexp */ - ttt.string = s = ++p; + t_string = s = ++p; while (*p != '/') { if (*p == '\0' || *p == '\n') syntax_error(EMSG_UNEXP_EOS); *s = *p++; if (*s++ == '\\') { - pp = p; - *(s-1) = bb_process_escape_sequence((const char **)&p); - if (*pp == '\\') + char *pp = p; + s[-1] = bb_process_escape_sequence((const char **)&pp); + if (*p == '\\') *s++ = '\\'; - if (p == pp) + if (pp == p) *s++ = *p++; + else + p = pp; } } p++; *s = '\0'; tc = TC_REGEXP; + debug_printf_parse("%s: token found:'%s' TC_REGEXP\n", __func__, t_string); } else if (*p == '.' || isdigit(*p)) { /* it's a number */ - ttt.number = strtod(p, &p); + char *pp = p; + t_double = my_strtod(&pp); + p = pp; if (*p == '.') syntax_error(EMSG_UNEXP_TOKEN); tc = TC_NUMBER; - + debug_printf_parse("%s: token found:%f TC_NUMBER\n", __func__, t_double); } else { /* search for something known */ tl = tokenlist; tc = 0x00000001; ti = tokeninfo; while (*tl) { - l = *(tl++); - if (l == NTCC) { + int l = (unsigned char) *tl++; + if (l == (unsigned char) NTCC) { tc <<= 1; continue; } - /* if token class is expected, token - * matches and it's not a longer word, - * then this is what we are looking for + /* if token class is expected, + * token matches, + * and it's not a longer word, */ if ((tc & (expected | TC_WORD | TC_NEWLINE)) - && *tl == *p && strncmp(p, tl, l) == 0 + && strncmp(p, tl, l) == 0 && !((tc & TC_WORD) && isalnum_(p[l])) ) { - ttt.info = *ti; + /* then this is what we are looking for */ + t_info = *ti; + debug_printf_parse("%s: token found:'%.*s' t_info:%x\n", __func__, l, p, t_info); p += l; - break; + goto token_found; } ti++; tl += l; } - - if (!*tl) { - /* it's a name (var/array/function), - * otherwise it's something wrong - */ - if (!isalnum_(*p)) - syntax_error(EMSG_UNEXP_TOKEN); - - ttt.string = --p; - while (isalnum_(*(++p))) { - *(p-1) = *p; - } - *(p-1) = '\0'; - tc = TC_VARIABLE; - /* also consume whitespace between functionname and bracket */ - if (!(expected & TC_VARIABLE)) - skip_spaces(&p); - if (*p == '(') { - tc = TC_FUNCTION; - } else { - if (*p == '[') { - p++; - tc = TC_ARRAY; - } - } + /* not a known token */ + + /* is it a name? (var/array/function) */ + if (!isalnum_(*p)) + syntax_error(EMSG_UNEXP_TOKEN); /* no */ + /* yes */ + t_string = --p; + while (isalnum_(*++p)) { + p[-1] = *p; + } + p[-1] = '\0'; + tc = TC_VARIABLE; + /* also consume whitespace between functionname and bracket */ + if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY)) + p = skip_spaces(p); + if (*p == '(') { + tc = TC_FUNCTION; + debug_printf_parse("%s: token found:'%s' TC_FUNCTION\n", __func__, t_string); + } else { + if (*p == '[') { + p++; + tc = TC_ARRAY; + debug_printf_parse("%s: token found:'%s' TC_ARRAY\n", __func__, t_string); + } else + debug_printf_parse("%s: token found:'%s' TC_VARIABLE\n", __func__, t_string); } } - pos = p; + token_found: + g_pos = p; /* skipping newlines in some cases */ if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE)) @@ -979,26 +1197,31 @@ static uint32_t next_token(uint32_t expected) if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) { concat_inserted = TRUE; save_tclass = tc; - save_info = ttt.info; + save_info = t_info; tc = TC_BINOP; - ttt.info = OC_CONCAT | SS | P(35); + t_info = OC_CONCAT | SS | P(35); } - ttt.tclass = tc; + t_tclass = tc; } - ltclass = ttt.tclass; + ltclass = t_tclass; /* Are we ready for this? */ - if (!(ltclass & expected)) + if (!(ltclass & expected)) { syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ? EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN); + } return ltclass; +#undef concat_inserted +#undef save_tclass +#undef save_info +#undef ltclass } static void rollback_token(void) { - ttt.rollback = TRUE; + t_rollback = TRUE; } static node *new_node(uint32_t info) @@ -1007,19 +1230,17 @@ static node *new_node(uint32_t info) n = xzalloc(sizeof(node)); n->info = info; - n->lineno = lineno; + n->lineno = g_lineno; return n; } -static node *mk_re_node(const char *s, node *n, regex_t *re) +static void mk_re_node(const char *s, node *n, regex_t *re) { n->info = OC_REGEXP; n->l.re = re; n->r.ire = re + 1; xregcomp(re, s, REG_EXTENDED); xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE); - - return n; } static node *condition(void) @@ -1038,33 +1259,40 @@ static node *parse_expr(uint32_t iexp) uint32_t tc, xtc; var *v; + debug_printf_parse("%s(%x)\n", __func__, iexp); + sn.info = PRIMASK; sn.r.n = glptr = NULL; xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp; while (!((tc = next_token(xtc)) & iexp)) { - if (glptr && (ttt.info == (OC_COMPARE | VV | P(39) | 2))) { + + if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) { /* input redirection (<) attached to glptr node */ + debug_printf_parse("%s: input redir\n", __func__); cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37)); cn->a.n = glptr; xtc = TC_OPERAND | TC_UOPPRE; glptr = NULL; } else if (tc & (TC_BINOP | TC_UOPPOST)) { + debug_printf_parse("%s: TC_BINOP | TC_UOPPOST\n", __func__); /* for binary and postfix-unary operators, jump back over * previous operators with higher priority */ vn = cn; - while ( ((ttt.info & PRIMASK) > (vn->a.n->info & PRIMASK2)) - || ((ttt.info == vn->info) && ((ttt.info & OPCLSMASK) == OC_COLON)) ) + while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2)) + || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON)) + ) { vn = vn->a.n; - if ((ttt.info & OPCLSMASK) == OC_TERNARY) - ttt.info += P(6); - cn = vn->a.n->r.n = new_node(ttt.info); + } + if ((t_info & OPCLSMASK) == OC_TERNARY) + t_info += P(6); + cn = vn->a.n->r.n = new_node(t_info); cn->a.n = vn->a.n; if (tc & TC_BINOP) { cn->l.n = vn; xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP; - if ((ttt.info & OPCLSMASK) == OC_PGETLINE) { + if ((t_info & OPCLSMASK) == OC_PGETLINE) { /* it's a pipe */ next_token(TC_GETLINE); /* give maximum priority to this pipe */ @@ -1078,26 +1306,29 @@ static node *parse_expr(uint32_t iexp) vn->a.n = cn; } else { + debug_printf_parse("%s: other\n", __func__); /* for operands and prefix-unary operators, attach them * to last node */ vn = cn; - cn = vn->r.n = new_node(ttt.info); + cn = vn->r.n = new_node(t_info); cn->a.n = vn; xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP; if (tc & (TC_OPERAND | TC_REGEXP)) { + debug_printf_parse("%s: TC_OPERAND | TC_REGEXP\n", __func__); xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp; /* one should be very careful with switch on tclass - * only simple tclasses should be used! */ switch (tc) { case TC_VARIABLE: case TC_ARRAY: + debug_printf_parse("%s: TC_VARIABLE | TC_ARRAY\n", __func__); cn->info = OC_VAR; - v = hash_search(ahash, ttt.string); + v = hash_search(ahash, t_string); if (v != NULL) { cn->info = OC_FNARG; - cn->l.i = v->x.aidx; + cn->l.aidx = v->x.aidx; } else { - cn->l.v = newvar(ttt.string); + cn->l.v = newvar(t_string); } if (tc & TC_ARRAY) { cn->info |= xS; @@ -1107,41 +1338,61 @@ static node *parse_expr(uint32_t iexp) case TC_NUMBER: case TC_STRING: + debug_printf_parse("%s: TC_NUMBER | TC_STRING\n", __func__); cn->info = OC_VAR; v = cn->l.v = xzalloc(sizeof(var)); if (tc & TC_NUMBER) - setvar_i(v, ttt.number); + setvar_i(v, t_double); else - setvar_s(v, ttt.string); + setvar_s(v, t_string); break; case TC_REGEXP: - mk_re_node(ttt.string, cn, xzalloc(sizeof(regex_t)*2)); + debug_printf_parse("%s: TC_REGEXP\n", __func__); + mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2)); break; case TC_FUNCTION: + debug_printf_parse("%s: TC_FUNCTION\n", __func__); cn->info = OC_FUNC; - cn->r.f = newfunc(ttt.string); + cn->r.f = newfunc(t_string); cn->l.n = condition(); break; case TC_SEQSTART: + debug_printf_parse("%s: TC_SEQSTART\n", __func__); cn = vn->r.n = parse_expr(TC_SEQTERM); + if (!cn) + syntax_error("Empty sequence"); cn->a.n = vn; break; case TC_GETLINE: + debug_printf_parse("%s: TC_GETLINE\n", __func__); glptr = cn; xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp; break; case TC_BUILTIN: + debug_printf_parse("%s: TC_BUILTIN\n", __func__); cn->l.n = condition(); break; + + case TC_LENGTH: + debug_printf_parse("%s: TC_LENGTH\n", __func__); + next_token(TC_SEQSTART | TC_OPTERM | TC_GRPTERM); + rollback_token(); + if (t_tclass & TC_SEQSTART) { + /* It was a "(" token. Handle just like TC_BUILTIN */ + cn->l.n = condition(); + } + break; } } } } + + debug_printf_parse("%s() returns %p\n", __func__, sn.r.n); return sn.r.n; } @@ -1153,10 +1404,10 @@ static node *chain_node(uint32_t info) if (!seq->first) seq->first = seq->last = new_node(0); - if (seq->programname != programname) { - seq->programname = programname; + if (seq->programname != g_progname) { + seq->programname = g_progname; n = chain_node(OC_NEWSOURCE); - n->l.s = xstrdup(programname); + n->l.new_progname = xstrdup(g_progname); } n = seq->last; @@ -1172,7 +1423,7 @@ static void chain_expr(uint32_t info) n = chain_node(info); n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM); - if (ttt.tclass & TC_GRPTERM) + if (t_tclass & TC_GRPTERM) rollback_token(); } @@ -1210,17 +1461,25 @@ static void chain_group(void) } while (c & TC_NEWLINE); if (c & TC_GRPSTART) { + debug_printf_parse("%s: TC_GRPSTART\n", __func__); while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) { - if (ttt.tclass & TC_NEWLINE) continue; + debug_printf_parse("%s: !TC_GRPTERM\n", __func__); + if (t_tclass & TC_NEWLINE) + continue; rollback_token(); chain_group(); } + debug_printf_parse("%s: TC_GRPTERM\n", __func__); } else if (c & (TC_OPSEQ | TC_OPTERM)) { + debug_printf_parse("%s: TC_OPSEQ | TC_OPTERM\n", __func__); rollback_token(); chain_expr(OC_EXEC | Vx); - } else { /* TC_STATEMNT */ - switch (ttt.info & OPCLSMASK) { + } else { + /* TC_STATEMNT */ + debug_printf_parse("%s: TC_STATEMNT(?)\n", __func__); + switch (t_info & OPCLSMASK) { case ST_IF: + debug_printf_parse("%s: ST_IF\n", __func__); n = chain_node(OC_BR | Vx); n->l.n = condition(); chain_group(); @@ -1235,12 +1494,14 @@ static void chain_group(void) break; case ST_WHILE: + debug_printf_parse("%s: ST_WHILE\n", __func__); n2 = condition(); n = chain_loop(NULL); n->l.n = n2; break; case ST_DO: + debug_printf_parse("%s: ST_DO\n", __func__); n2 = chain_node(OC_EXEC); n = chain_loop(NULL); n2->a.n = n->a.n; @@ -1249,10 +1510,11 @@ static void chain_group(void) break; case ST_FOR: + debug_printf_parse("%s: ST_FOR\n", __func__); next_token(TC_SEQSTART); n2 = parse_expr(TC_SEMICOL | TC_SEQTERM); - if (ttt.tclass & TC_SEQTERM) { /* for-in */ - if ((n2->info & OPCLSMASK) != OC_IN) + if (t_tclass & TC_SEQTERM) { /* for-in */ + if (!n2 || (n2->info & OPCLSMASK) != OC_IN) syntax_error(EMSG_UNEXP_TOKEN); n = chain_node(OC_WALKINIT | VV); n->l.n = n2->l.n; @@ -1267,36 +1529,42 @@ static void chain_group(void) n3 = parse_expr(TC_SEQTERM); n = chain_loop(n3); n->l.n = n2; - if (! n2) + if (!n2) n->info = OC_EXEC; } break; case OC_PRINT: case OC_PRINTF: - n = chain_node(ttt.info); + debug_printf_parse("%s: OC_PRINT[F]\n", __func__); + n = chain_node(t_info); n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM); - if (ttt.tclass & TC_OUTRDR) { - n->info |= ttt.info; + if (t_tclass & TC_OUTRDR) { + n->info |= t_info; n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM); } - if (ttt.tclass & TC_GRPTERM) + if (t_tclass & TC_GRPTERM) rollback_token(); break; case OC_BREAK: + debug_printf_parse("%s: OC_BREAK\n", __func__); n = chain_node(OC_EXEC); n->a.n = break_ptr; + chain_expr(t_info); break; case OC_CONTINUE: + debug_printf_parse("%s: OC_CONTINUE\n", __func__); n = chain_node(OC_EXEC); n->a.n = continue_ptr; + chain_expr(t_info); break; /* delete, next, nextfile, return, exit */ default: - chain_expr(ttt.info); + debug_printf_parse("%s: default\n", __func__); + chain_expr(t_info); } } } @@ -1308,57 +1576,63 @@ static void parse_program(char *p) func *f; var *v; - pos = p; - ttt.lineno = 1; + g_pos = p; + t_lineno = 1; while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART | TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) { - if (tclass & TC_OPTERM) + if (tclass & TC_OPTERM) { + debug_printf_parse("%s: TC_OPTERM\n", __func__); continue; + } seq = &mainseq; if (tclass & TC_BEGIN) { + debug_printf_parse("%s: TC_BEGIN\n", __func__); seq = &beginseq; chain_group(); - } else if (tclass & TC_END) { + debug_printf_parse("%s: TC_END\n", __func__); seq = &endseq; chain_group(); - } else if (tclass & TC_FUNCDECL) { + debug_printf_parse("%s: TC_FUNCDECL\n", __func__); next_token(TC_FUNCTION); - pos++; - f = newfunc(ttt.string); + g_pos++; + f = newfunc(t_string); f->body.first = NULL; f->nargs = 0; while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) { - v = findvar(ahash, ttt.string); - v->x.aidx = (f->nargs)++; + v = findvar(ahash, t_string); + v->x.aidx = f->nargs++; if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM) break; } - seq = &(f->body); + seq = &f->body; chain_group(); clear_array(ahash); - } else if (tclass & TC_OPSEQ) { + debug_printf_parse("%s: TC_OPSEQ\n", __func__); rollback_token(); cn = chain_node(OC_TEST); cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART); - if (ttt.tclass & TC_GRPSTART) { + if (t_tclass & TC_GRPSTART) { + debug_printf_parse("%s: TC_GRPSTART\n", __func__); rollback_token(); chain_group(); } else { + debug_printf_parse("%s: !TC_GRPSTART\n", __func__); chain_node(OC_PRINT); } cn->r.n = mainseq.last; - } else /* if (tclass & TC_GRPSTART) */ { + debug_printf_parse("%s: TC_GRPSTART(?)\n", __func__); rollback_token(); chain_group(); } } + debug_printf_parse("%s: TC_EOF\n", __func__); } @@ -1374,12 +1648,12 @@ static node *mk_splitter(const char *s, tsplitter *spl) n = &spl->n; if ((n->info & OPCLSMASK) == OC_REGEXP) { regfree(re); - regfree(ire); + regfree(ire); // TODO: nuke ire, use re+1? } - if (strlen(s) > 1) { + if (s[0] && s[1]) { /* strlen(s) > 1 */ mk_re_node(s, n, re); } else { - n->info = (uint32_t) *s; + n->info = (uint32_t) s[0]; } return n; @@ -1391,6 +1665,7 @@ static node *mk_splitter(const char *s, tsplitter *spl) */ static regex_t *as_regex(node *op, regex_t *preg) { + int cflags; var *v; const char *s; @@ -1399,48 +1674,61 @@ static regex_t *as_regex(node *op, regex_t *preg) } v = nvalloc(1); s = getvar_s(evaluate(op, v)); - xregcomp(preg, s, icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED); + + cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED; + /* Testcase where REG_EXTENDED fails (unpaired '{'): + * echo Hi | awk 'gsub("@(samp|code|file)\{","");' + * gawk 3.1.5 eats this. We revert to ~REG_EXTENDED + * (maybe gsub is not supposed to use REG_EXTENDED?). + */ + if (regcomp(preg, s, cflags)) { + cflags &= ~REG_EXTENDED; + xregcomp(preg, s, cflags); + } nvfree(v); return preg; } -/* gradually increasing buffer */ -static void qrealloc(char **b, int n, int *size) +/* gradually increasing buffer. + * note that we reallocate even if n == old_size, + * and thus there is at least one extra allocated byte. + */ +static char* qrealloc(char *b, int n, int *size) { - if (!*b || n >= *size) - *b = xrealloc(*b, *size = n + (n>>1) + 80); + if (!b || n >= *size) { + *size = n + (n>>1) + 80; + b = xrealloc(b, *size); + } + return b; } /* resize field storage space */ static void fsrealloc(int size) { - static int maxfields; /* = 0;*/ int i; if (size >= maxfields) { i = maxfields; maxfields = size + 16; - Fields = xrealloc(Fields, maxfields * sizeof(var)); + Fields = xrealloc(Fields, maxfields * sizeof(Fields[0])); for (; i < maxfields; i++) { Fields[i].type = VF_SPECIAL; Fields[i].string = NULL; } } - - if (size < nfields) { - for (i = size; i < nfields; i++) { - clrvar(Fields + i); - } + /* if size < nfields, clear extra field variables */ + for (i = size; i < nfields; i++) { + clrvar(Fields + i); } nfields = size; } static int awk_split(const char *s, node *spl, char **slist) { - int l, n = 0; + int l, n; char c[4]; char *s1; - regmatch_t pmatch[2]; + regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough... /* in worst case, each char would be a separate field */ *slist = s1 = xzalloc(strlen(s) * 2 + 3); @@ -1451,9 +1739,13 @@ static int awk_split(const char *s, node *spl, char **slist) if (*getvar_s(intvar[RS]) == '\0') c[2] = '\n'; - if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */ - while (*s) { - l = strcspn(s, c+2); + n = 0; + if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */ + if (!*s) + return n; /* "": zero fields */ + n++; /* at least one field will be there */ + do { + l = strcspn(s, c+2); /* len till next NUL or \n */ if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0 && pmatch[0].rm_so <= l ) { @@ -1462,49 +1754,60 @@ static int awk_split(const char *s, node *spl, char **slist) l++; pmatch[0].rm_eo++; } + n++; /* we saw yet another delimiter */ } else { pmatch[0].rm_eo = l; - if (s[l]) pmatch[0].rm_eo++; + if (s[l]) + pmatch[0].rm_eo++; } - memcpy(s1, s, l); - s1[l] = '\0'; + /* make sure we remove *all* of the separator chars */ + do { + s1[l] = '\0'; + } while (++l < pmatch[0].rm_eo); nextword(&s1); s += pmatch[0].rm_eo; - n++; - } - } else if (c[0] == '\0') { /* null split */ + } while (*s); + return n; + } + if (c[0] == '\0') { /* null split */ while (*s) { *s1++ = *s++; *s1++ = '\0'; n++; } - } else if (c[0] != ' ') { /* single-character split */ + return n; + } + if (c[0] != ' ') { /* single-character split */ if (icase) { c[0] = toupper(c[0]); c[1] = tolower(c[1]); } - if (*s1) n++; - while ((s1 = strpbrk(s1, c))) { - *s1++ = '\0'; - n++; - } - } else { /* space split */ - while (*s) { - s = skip_whitespace(s); - if (!*s) break; + if (*s1) n++; - while (*s && !isspace(*s)) - *s1++ = *s++; + while ((s1 = strpbrk(s1, c)) != NULL) { *s1++ = '\0'; + n++; } + return n; + } + /* space split */ + while (*s) { + s = skip_whitespace(s); + if (!*s) + break; + n++; + while (*s && !isspace(*s)) + *s1++ = *s++; + *s1++ = '\0'; } return n; } static void split_f0(void) { - static char *fstrings = NULL; +/* static char *fstrings; */ +#define fstrings (G.split_f0__fstrings) int i, n; char *s; @@ -1527,6 +1830,7 @@ static void split_f0(void) clrvar(intvar[NF]); intvar[NF]->type = VF_NUMBER | VF_SPECIAL; intvar[NF]->number = nfields; +#undef fstrings } /* perform additional actions when some internal variables changed */ @@ -1556,7 +1860,7 @@ static void handle_special(var *v) memcpy(b+len, sep, sl); len += sl; } - qrealloc(&b, len+l+sl, &bsize); + b = qrealloc(b, len+l+sl, &bsize); memcpy(b+len, s, l); len += l; } @@ -1569,14 +1873,23 @@ static void handle_special(var *v) is_f0_split = FALSE; } else if (v == intvar[FS]) { - mk_splitter(getvar_s(v), &fsplitter); + /* + * The POSIX-2008 standard says that changing FS should have no effect on the + * current input line, but only on the next one. The language is: + * + * > Before the first reference to a field in the record is evaluated, the record + * > shall be split into fields, according to the rules in Regular Expressions, + * > using the value of FS that was current at the time the record was read. + * + * So, split up current line before assignment to FS: + */ + split_f0(); + mk_splitter(getvar_s(v), &fsplitter); } else if (v == intvar[RS]) { mk_splitter(getvar_s(v), &rsplitter); - } else if (v == intvar[IGNORECASE]) { icase = istrue(v); - } else { /* $n */ n = getvar_i(intvar[NF]); setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1); @@ -1601,21 +1914,28 @@ static node *nextarg(node **pn) static void hashwalk_init(var *v, xhash *array) { - char **w; hash_item *hi; - int i; + unsigned i; + walker_list *w; + walker_list *prev_walker; - if (v->type & VF_WALK) - free(v->x.walker); + if (v->type & VF_WALK) { + prev_walker = v->x.walker; + } else { + v->type |= VF_WALK; + prev_walker = NULL; + } + debug_printf_walker("hashwalk_init: prev_walker:%p\n", prev_walker); - v->type |= VF_WALK; - w = v->x.walker = xzalloc(2 + 2*sizeof(char *) + array->glen); - w[0] = w[1] = (char *)(w + 2); + w = v->x.walker = xzalloc(sizeof(*w) + array->glen + 1); /* why + 1? */ + debug_printf_walker(" walker@%p=%p\n", &v->x.walker, w); + w->cur = w->end = w->wbuf; + w->prev = prev_walker; for (i = 0; i < array->csize; i++) { hi = array->items[i]; while (hi) { - strcpy(*w, hi->name); - nextword(w); + strcpy(w->end, hi->name); + nextword(&w->end); hi = hi->next; } } @@ -1623,22 +1943,26 @@ static void hashwalk_init(var *v, xhash *array) static int hashwalk_next(var *v) { - char **w; + walker_list *w = v->x.walker; - w = v->x.walker; - if (w[1] == w[0]) + if (w->cur >= w->end) { + walker_list *prev_walker = w->prev; + + debug_printf_walker("end of iteration, free(walker@%p:%p), prev_walker:%p\n", &v->x.walker, w, prev_walker); + free(w); + v->x.walker = prev_walker; return FALSE; + } - setvar_s(v, nextword(w+1)); + setvar_s(v, nextword(&w->cur)); return TRUE; } /* evaluate node, return 1 when result is true, 0 otherwise */ static int ptest(node *pattern) { - static var v; /* static: to save stack space? */ - - return istrue(evaluate(pattern, &v)); + /* ptest__v is "static": to save stack space? */ + return istrue(evaluate(pattern, &G.ptest__v)); } /* read next record from stream rsm into a variable v */ @@ -1646,10 +1970,12 @@ static int awk_getline(rstream *rsm, var *v) { char *b; regmatch_t pmatch[2]; - int a, p, pp=0, size; + int size, a, p, pp = 0; int fd, so, eo, r, rp; char c, *m, *s; + debug_printf_eval("entered %s()\n", __func__); + /* we're using our own buffer since we need access to accumulating * characters */ @@ -1661,7 +1987,9 @@ static int awk_getline(rstream *rsm, var *v) c = (char) rsplitter.n.info; rp = 0; - if (! m) qrealloc(&m, 256, &size); + if (!m) + m = qrealloc(m, 256, &size); + do { b = m + a; so = eo = p; @@ -1677,7 +2005,8 @@ static int awk_getline(rstream *rsm, var *v) } } else if (c != '\0') { s = strchr(b+pp, c); - if (! s) s = memchr(b+pp, '\0', p - pp); + if (!s) + s = memchr(b+pp, '\0', p - pp); if (s) { so = eo = s-b; eo++; @@ -1689,7 +2018,8 @@ static int awk_getline(rstream *rsm, var *v) s = strstr(b+rp, "\n\n"); if (s) { so = eo = s-b; - while (b[eo] == '\n') eo++; + while (b[eo] == '\n') + eo++; if (b[eo] != '\0') break; } @@ -1697,12 +2027,12 @@ static int awk_getline(rstream *rsm, var *v) } if (a > 0) { - memmove(m, (const void *)(m+a), p+1); + memmove(m, m+a, p+1); b = m; a = 0; } - qrealloc(&m, a+p+128, &size); + m = qrealloc(m, a+p+128, &size); b = m + a; pp = p; p += safe_read(fd, b+p, size-p-1); @@ -1732,6 +2062,8 @@ static int awk_getline(rstream *rsm, var *v) rsm->pos = p - eo; rsm->size = size; + debug_printf_eval("returning from %s(): %d\n", __func__, r); + return r; } @@ -1741,8 +2073,8 @@ static int fmt_num(char *b, int size, const char *format, double n, int int_as_i char c; const char *s = format; - if (int_as_int && n == (int)n) { - r = snprintf(b, size, "%d", (int)n); + if (int_as_int && n == (long long)n) { + r = snprintf(b, size, "%lld", (long long)n); } else { do { c = *s; } while (c && *++s); if (strchr("diouxX", c)) { @@ -1750,13 +2082,12 @@ static int fmt_num(char *b, int size, const char *format, double n, int int_as_i } else if (strchr("eEfgG", c)) { r = snprintf(b, size, format, n); } else { - runtime_error(EMSG_INV_FMT); + syntax_error(EMSG_INV_FMT); } } return r; } - /* formatted output into an allocated buffer, return ptr to buffer */ static char *awk_printf(node *n) { @@ -1773,7 +2104,7 @@ static char *awk_printf(node *n) i = 0; while (*f) { s = f; - while (*f && (*f != '%' || *(++f) == '%')) + while (*f && (*f != '%' || *++f == '%')) f++; while (*f && !isalpha(*f)) { if (*f == '*') @@ -1782,9 +2113,10 @@ static char *awk_printf(node *n) } incr = (f - s) + MAXVARFMT; - qrealloc(&b, incr + i, &bsize); + b = qrealloc(b, incr + i, &bsize); c = *f; - if (c != '\0') f++; + if (c != '\0') + f++; c1 = *f; *f = '\0'; arg = evaluate(nextarg(&n), v); @@ -1795,7 +2127,7 @@ static char *awk_printf(node *n) (char)getvar_i(arg) : *getvar_s(arg)); } else if (c == 's') { s1 = getvar_s(arg); - qrealloc(&b, incr+i+strlen(s1), &bsize); + b = qrealloc(b, incr+i+strlen(s1), &bsize); i += sprintf(b+i, s, s1); } else { i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE); @@ -1803,104 +2135,157 @@ static char *awk_printf(node *n) *f = c1; /* if there was an error while sprintf, return value is negative */ - if (i < j) i = j; + if (i < j) + i = j; } - b = xrealloc(b, i + 1); free(fmt); nvfree(v); + b = xrealloc(b, i + 1); b[i] = '\0'; return b; } -/* common substitution routine - * replace (nm) substring of (src) that match (n) with (repl), store - * result into (dest), return number of substitutions. If nm=0, replace - * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable - * subexpression matching (\1-\9) +/* Common substitution routine. + * Replace (nm)'th substring of (src) that matches (rn) with (repl), + * store result into (dest), return number of substitutions. + * If nm = 0, replace all matches. + * If src or dst is NULL, use $0. + * If subexp != 0, enable subexpression matching (\1-\9). */ -static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int ex) +static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int subexp) { - char *ds = NULL; - const char *s; + char *resbuf; const char *sp; - int c, i, j, di, rl, so, eo, nbs, n, dssize; + int match_no, residx, replen, resbufsize; + int regexec_flags; regmatch_t pmatch[10]; - regex_t sreg, *re; + regex_t sreg, *regex; + + resbuf = NULL; + residx = 0; + match_no = 0; + regexec_flags = 0; + regex = as_regex(rn, &sreg); + sp = getvar_s(src ? src : intvar[F0]); + replen = strlen(repl); + while (regexec(regex, sp, 10, pmatch, regexec_flags) == 0) { + int so = pmatch[0].rm_so; + int eo = pmatch[0].rm_eo; + + //bb_error_msg("match %u: [%u,%u] '%s'%p", match_no+1, so, eo, sp,sp); + resbuf = qrealloc(resbuf, residx + eo + replen, &resbufsize); + memcpy(resbuf + residx, sp, eo); + residx += eo; + if (++match_no >= nm) { + const char *s; + int nbs; - re = as_regex(rn, &sreg); - if (! src) src = intvar[F0]; - if (! dest) dest = intvar[F0]; - - i = di = 0; - sp = getvar_s(src); - rl = strlen(repl); - while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0 : REG_NOTBOL) == 0) { - so = pmatch[0].rm_so; - eo = pmatch[0].rm_eo; - - qrealloc(&ds, di + eo + rl, &dssize); - memcpy(ds + di, sp, eo); - di += eo; - if (++i >= nm) { /* replace */ - di -= (eo - so); + residx -= (eo - so); nbs = 0; for (s = repl; *s; s++) { - ds[di++] = c = *s; + char c = resbuf[residx++] = *s; if (c == '\\') { nbs++; continue; } - if (c == '&' || (ex && c >= '0' && c <= '9')) { - di -= ((nbs + 3) >> 1); + if (c == '&' || (subexp && c >= '0' && c <= '9')) { + int j; + residx -= ((nbs + 3) >> 1); j = 0; if (c != '&') { j = c - '0'; nbs++; } if (nbs % 2) { - ds[di++] = c; + resbuf[residx++] = c; } else { - n = pmatch[j].rm_eo - pmatch[j].rm_so; - qrealloc(&ds, di + rl + n, &dssize); - memcpy(ds + di, sp + pmatch[j].rm_so, n); - di += n; + int n = pmatch[j].rm_eo - pmatch[j].rm_so; + resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize); + memcpy(resbuf + residx, sp + pmatch[j].rm_so, n); + residx += n; } } nbs = 0; } } + regexec_flags = REG_NOTBOL; sp += eo; - if (i == nm) break; + if (match_no == nm) + break; if (eo == so) { - if (! (ds[di++] = *sp++)) break; + /* Empty match (e.g. "b*" will match anywhere). + * Advance by one char. */ +//BUG (bug 1333): +//gsub(/\>= 1; } nargs = i; - if (nargs < (info >> 30)) - runtime_error(EMSG_TOO_FEW_ARGS); + if ((uint32_t)nargs < (info >> 30)) + syntax_error(EMSG_TOO_FEW_ARGS); - switch (info & OPNMASK) { + info &= OPNMASK; + switch (info) { case B_a2: -#if ENABLE_FEATURE_AWK_MATH - setvar_i(res, atan2(getvar_i(av[i]), getvar_i(av[1]))); -#else - runtime_error(EMSG_NO_MATH); -#endif + if (ENABLE_FEATURE_AWK_LIBM) + setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1]))); + else + syntax_error(EMSG_NO_MATH); break; - case B_sp: + case B_sp: { + char *s, *s1; + if (nargs > 2) { spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ? an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl); @@ -1940,77 +2329,84 @@ static var *exec_builtin(node *op, var *res) n = awk_split(as[0], spl, &s); s1 = s; clear_array(iamarray(av[1])); - for (i=1; i<=n; i++) - setari_u(av[1], i, nextword(&s1)); - free(s); + for (i = 1; i <= n; i++) + setari_u(av[1], i, nextword(&s)); + free(s1); setvar_i(res, n); break; + } + + case B_ss: { + char *s; - case B_ss: l = strlen(as[0]); i = getvar_i(av[1]) - 1; - if (i > l) i = l; - if (i < 0) i = 0; + if (i > l) + i = l; + if (i < 0) + i = 0; n = (nargs > 2) ? getvar_i(av[2]) : l-i; - if (n < 0) n = 0; - s = xmalloc(n+1); - strncpy(s, as[0]+i, n); - s[n] = '\0'; + if (n < 0) + n = 0; + s = xstrndup(as[0]+i, n); setvar_p(res, s); break; + } + /* Bitwise ops must assume that operands are unsigned. GNU Awk 3.1.5: + * awk '{ print or(-1,1) }' gives "4.29497e+09", not "-2.xxxe+09" */ case B_an: - setvar_i(res, (long)getvar_i(av[0]) & (long)getvar_i(av[1])); + setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1])); break; case B_co: - setvar_i(res, ~(long)getvar_i(av[0])); + setvar_i(res, ~getvar_i_int(av[0])); break; case B_ls: - setvar_i(res, (long)getvar_i(av[0]) << (long)getvar_i(av[1])); + setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1])); break; case B_or: - setvar_i(res, (long)getvar_i(av[0]) | (long)getvar_i(av[1])); + setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1])); break; case B_rs: - setvar_i(res, (long)((unsigned long)getvar_i(av[0]) >> (unsigned long)getvar_i(av[1]))); + setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1])); break; case B_xo: - setvar_i(res, (long)getvar_i(av[0]) ^ (long)getvar_i(av[1])); + setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1])); break; case B_lo: - to_xxx = tolower; - goto lo_cont; - - case B_up: - to_xxx = toupper; - lo_cont: + case B_up: { + char *s, *s1; s1 = s = xstrdup(as[0]); while (*s1) { - *s1 = (*to_xxx)(*s1); + //*s1 = (info == B_up) ? toupper(*s1) : tolower(*s1); + if ((unsigned char)((*s1 | 0x20) - 'a') <= ('z' - 'a')) + *s1 = (info == B_up) ? (*s1 & 0xdf) : (*s1 | 0x20); s1++; } setvar_p(res, s); break; + } case B_ix: n = 0; ll = strlen(as[1]); l = strlen(as[0]) - ll; if (ll > 0 && l >= 0) { - if (! icase) { - s = strstr(as[0], as[1]); - if (s) n = (s - as[0]) + 1; + if (!icase) { + char *s = strstr(as[0], as[1]); + if (s) + n = (s - as[0]) + 1; } else { /* this piece of code is terribly slow and * really should be rewritten */ - for (i=0; i<=l; i++) { + for (i = 0; i <= l; i++) { if (strncasecmp(as[0]+i, as[1], ll) == 0) { n = i+1; break; @@ -2027,11 +2423,15 @@ static var *exec_builtin(node *op, var *res) else time(&tt); //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"; - i = strftime(buf, MAXVARFMT, + i = strftime(g_buf, MAXVARFMT, ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"), localtime(&tt)); - buf[i] = '\0'; - setvar_s(res, buf); + g_buf[i] = '\0'; + setvar_s(res, g_buf); + break; + + case B_mt: + setvar_i(res, do_mktime(as[0])); break; case B_ma: @@ -2047,7 +2447,8 @@ static var *exec_builtin(node *op, var *res) setvar_i(newvar("RSTART"), pmatch[0].rm_so); setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so); setvar_i(res, pmatch[0].rm_so); - if (re == &sreg) regfree(re); + if (re == &sreg) + regfree(re); break; case B_ge: @@ -2065,6 +2466,7 @@ static var *exec_builtin(node *op, var *res) nvfree(tv); return res; +#undef tspl } /* @@ -2075,48 +2477,60 @@ static var *exec_builtin(node *op, var *res) static var *evaluate(node *op, var *res) { - /* This procedure is recursive so we should count every byte */ - static var *fnargs = NULL; - static unsigned seed = 1; - static regex_t sreg; +/* This procedure is recursive so we should count every byte */ +#define fnargs (G.evaluate__fnargs) +/* seed is initialized to 1 */ +#define seed (G.evaluate__seed) +#define sreg (G.evaluate__sreg) - node *op1; var *v1; - union { - var *v; - const char *s; - double d; - int i; - } L, R; - uint32_t opinfo; - short opn; - union { - char *s; - rstream *rsm; - FILE *F; - var *v; - regex_t *re; - uint32_t info; - } X; if (!op) return setvar_s(res, NULL); + debug_printf_eval("entered %s()\n", __func__); + v1 = nvalloc(2); while (op) { + struct { + var *v; + const char *s; + } L = L; /* for compiler */ + struct { + var *v; + const char *s; + } R = R; + double L_d = L_d; + uint32_t opinfo; + int opn; + node *op1; + opinfo = op->info; - opn = (short)(opinfo & OPNMASK); - lineno = op->lineno; + opn = (opinfo & OPNMASK); + g_lineno = op->lineno; + op1 = op->l.n; + debug_printf_eval("opinfo:%08x opn:%08x\n", opinfo, opn); /* execute inevitable things */ - op1 = op->l.n; - if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1); - if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1); - if (opinfo & OF_STR1) L.s = getvar_s(L.v); - if (opinfo & OF_STR2) R.s = getvar_s(R.v); - if (opinfo & OF_NUM1) L.d = getvar_i(L.v); + if (opinfo & OF_RES1) + L.v = evaluate(op1, v1); + if (opinfo & OF_RES2) + R.v = evaluate(op->r.n, v1+1); + if (opinfo & OF_STR1) { + L.s = getvar_s(L.v); + debug_printf_eval("L.s:'%s'\n", L.s); + } + if (opinfo & OF_STR2) { + R.s = getvar_s(R.v); + debug_printf_eval("R.s:'%s'\n", R.s); + } + if (opinfo & OF_NUM1) { + L_d = getvar_i(L.v); + debug_printf_eval("L_d:%f\n", L_d); + } + debug_printf_eval("switch(0x%x)\n", XC(opinfo & OPCLSMASK)); switch (XC(opinfo & OPCLSMASK)) { /* -- iterative node type -- */ @@ -2129,13 +2543,12 @@ static var *evaluate(node *op, var *res) op->info |= OF_CHECKED; if (ptest(op1->r.n)) op->info &= ~OF_CHECKED; - op = op->a.n; } else { op = op->r.n; } } else { - op = (ptest(op1)) ? op->a.n : op->r.n; + op = ptest(op1) ? op->a.n : op->r.n; } break; @@ -2159,71 +2572,78 @@ static var *evaluate(node *op, var *res) break; case XC( OC_PRINT ): - case XC( OC_PRINTF ): - X.F = stdout; + case XC( OC_PRINTF ): { + FILE *F = stdout; + if (op->r.n) { - X.rsm = newfile(R.s); - if (! X.rsm->F) { + rstream *rsm = newfile(R.s); + if (!rsm->F) { if (opn == '|') { - X.rsm->F = popen(R.s, "w"); - if (X.rsm->F == NULL) + rsm->F = popen(R.s, "w"); + if (rsm->F == NULL) bb_perror_msg_and_die("popen"); - X.rsm->is_pipe = 1; + rsm->is_pipe = 1; } else { - X.rsm->F = xfopen(R.s, opn=='w' ? "w" : "a"); + rsm->F = xfopen(R.s, opn=='w' ? "w" : "a"); } } - X.F = X.rsm->F; + F = rsm->F; } if ((opinfo & OPCLSMASK) == OC_PRINT) { - if (! op1) { - fputs(getvar_s(intvar[F0]), X.F); + if (!op1) { + fputs(getvar_s(intvar[F0]), F); } else { while (op1) { - L.v = evaluate(nextarg(&op1), v1); - if (L.v->type & VF_NUMBER) { - fmt_num(buf, MAXVARFMT, getvar_s(intvar[OFMT]), - getvar_i(L.v), TRUE); - fputs(buf, X.F); + var *v = evaluate(nextarg(&op1), v1); + if (v->type & VF_NUMBER) { + fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]), + getvar_i(v), TRUE); + fputs(g_buf, F); } else { - fputs(getvar_s(L.v), X.F); + fputs(getvar_s(v), F); } - if (op1) fputs(getvar_s(intvar[OFS]), X.F); + if (op1) + fputs(getvar_s(intvar[OFS]), F); } } - fputs(getvar_s(intvar[ORS]), X.F); + fputs(getvar_s(intvar[ORS]), F); } else { /* OC_PRINTF */ - L.s = awk_printf(op1); - fputs(L.s, X.F); - free((char*)L.s); + char *s = awk_printf(op1); + fputs(s, F); + free(s); } - fflush(X.F); + fflush(F); break; + } + + case XC( OC_DELETE ): { + uint32_t info = op1->info & OPCLSMASK; + var *v; - case XC( OC_DELETE ): - X.info = op1->info & OPCLSMASK; - if (X.info == OC_VAR) { - R.v = op1->l.v; - } else if (X.info == OC_FNARG) { - R.v = &fnargs[op1->l.i]; + if (info == OC_VAR) { + v = op1->l.v; + } else if (info == OC_FNARG) { + v = &fnargs[op1->l.aidx]; } else { - runtime_error(EMSG_NOT_ARRAY); + syntax_error(EMSG_NOT_ARRAY); } if (op1->r.n) { + const char *s; clrvar(L.v); - L.s = getvar_s(evaluate(op1->r.n, v1)); - hash_remove(iamarray(R.v), L.s); + s = getvar_s(evaluate(op1->r.n, v1)); + hash_remove(iamarray(v), s); } else { - clear_array(iamarray(R.v)); + clear_array(iamarray(v)); } break; + } case XC( OC_NEWSOURCE ): - programname = op->l.s; + g_progname = op->l.new_progname; break; case XC( OC_RETURN ): @@ -2239,7 +2659,7 @@ static var *evaluate(node *op, var *res) break; case XC( OC_EXIT ): - awk_exit(L.d); + awk_exit(L_d); /* -- recursive node type -- */ @@ -2250,7 +2670,7 @@ static var *evaluate(node *op, var *res) goto v_cont; case XC( OC_FNARG ): - L.v = &fnargs[op->l.i]; + L.v = &fnargs[op->l.aidx]; v_cont: res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v; break; @@ -2267,71 +2687,87 @@ static var *evaluate(node *op, var *res) case XC( OC_MATCH ): op1 = op->r.n; re_cont: - X.re = as_regex(op1, &sreg); - R.i = regexec(X.re, L.s, 0, NULL, 0); - if (X.re == &sreg) regfree(X.re); - setvar_i(res, (R.i == 0 ? 1 : 0) ^ (opn == '!' ? 1 : 0)); + { + regex_t *re = as_regex(op1, &sreg); + int i = regexec(re, L.s, 0, NULL, 0); + if (re == &sreg) + regfree(re); + setvar_i(res, (i == 0) ^ (opn == '!')); + } break; case XC( OC_MOVE ): + debug_printf_eval("MOVE\n"); /* if source is a temporary string, jusk relink it to dest */ - if (R.v == v1+1 && R.v->string) { - res = setvar_p(L.v, R.v->string); - R.v->string = NULL; - } else { +//Disabled: if R.v is numeric but happens to have cached R.v->string, +//then L.v ends up being a string, which is wrong +// if (R.v == v1+1 && R.v->string) { +// res = setvar_p(L.v, R.v->string); +// R.v->string = NULL; +// } else { res = copyvar(L.v, R.v); - } +// } break; case XC( OC_TERNARY ): if ((op->r.n->info & OPCLSMASK) != OC_COLON) - runtime_error(EMSG_POSSIBLE_ERROR); + syntax_error(EMSG_POSSIBLE_ERROR); res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res); break; - case XC( OC_FUNC ): - if (!op->r.f->body.first) - runtime_error(EMSG_UNDEF_FUNC); + case XC( OC_FUNC ): { + var *vbeg, *v; + const char *sv_progname; - X.v = R.v = nvalloc(op->r.f->nargs+1); + /* The body might be empty, still has to eval the args */ + if (!op->r.n->info && !op->r.f->body.first) + syntax_error(EMSG_UNDEF_FUNC); + + vbeg = v = nvalloc(op->r.f->nargs + 1); while (op1) { - L.v = evaluate(nextarg(&op1), v1); - copyvar(R.v, L.v); - R.v->type |= VF_CHILD; - R.v->x.parent = L.v; - if (++R.v - X.v >= op->r.f->nargs) + var *arg = evaluate(nextarg(&op1), v1); + copyvar(v, arg); + v->type |= VF_CHILD; + v->x.parent = arg; + if (++v - vbeg >= op->r.f->nargs) break; } - R.v = fnargs; - fnargs = X.v; + v = fnargs; + fnargs = vbeg; + sv_progname = g_progname; - L.s = programname; res = evaluate(op->r.f->body.first, res); - programname = L.s; + g_progname = sv_progname; nvfree(fnargs); - fnargs = R.v; + fnargs = v; + break; + } case XC( OC_GETLINE ): - case XC( OC_PGETLINE ): + case XC( OC_PGETLINE ): { + rstream *rsm; + int i; + if (op1) { - X.rsm = newfile(L.s); - if (!X.rsm->F) { + rsm = newfile(L.s); + if (!rsm->F) { if ((opinfo & OPCLSMASK) == OC_PGETLINE) { - X.rsm->F = popen(L.s, "r"); - X.rsm->is_pipe = TRUE; + rsm->F = popen(L.s, "r"); + rsm->is_pipe = TRUE; } else { - X.rsm->F = fopen(L.s, "r"); /* not xfopen! */ + rsm->F = fopen_for_read(L.s); /* not xfopen! */ } } } else { - if (!iF) iF = next_input_file(); - X.rsm = iF; + if (!iF) + iF = next_input_file(); + rsm = iF; } - if (!X.rsm->F) { + if (!rsm || !rsm->F) { setvar_i(intvar[ERRNO], errno); setvar_i(res, -1); break; @@ -2340,105 +2776,129 @@ static var *evaluate(node *op, var *res) if (!op->r.n) R.v = intvar[F0]; - L.i = awk_getline(X.rsm, R.v); - if (L.i > 0) { - if (!op1) { - incvar(intvar[FNR]); - incvar(intvar[NR]); - } + i = awk_getline(rsm, R.v); + if (i > 0 && !op1) { + incvar(intvar[FNR]); + incvar(intvar[NR]); } - setvar_i(res, L.i); + setvar_i(res, i); break; + } /* simple builtins */ - case XC( OC_FBLTIN ): - switch (opn) { + case XC( OC_FBLTIN ): { + double R_d = R_d; /* for compiler */ + switch (opn) { case F_in: - R.d = (int)L.d; + R_d = (long long)L_d; break; case F_rn: - R.d = (double)rand() / (double)RAND_MAX; + R_d = (double)rand() / (double)RAND_MAX; break; -#if ENABLE_FEATURE_AWK_MATH + case F_co: - R.d = cos(L.d); - break; + if (ENABLE_FEATURE_AWK_LIBM) { + R_d = cos(L_d); + break; + } case F_ex: - R.d = exp(L.d); - break; + if (ENABLE_FEATURE_AWK_LIBM) { + R_d = exp(L_d); + break; + } case F_lg: - R.d = log(L.d); - break; + if (ENABLE_FEATURE_AWK_LIBM) { + R_d = log(L_d); + break; + } case F_si: - R.d = sin(L.d); - break; + if (ENABLE_FEATURE_AWK_LIBM) { + R_d = sin(L_d); + break; + } case F_sq: - R.d = sqrt(L.d); - break; -#else - case F_co: - case F_ex: - case F_lg: - case F_si: - case F_sq: - runtime_error(EMSG_NO_MATH); + if (ENABLE_FEATURE_AWK_LIBM) { + R_d = sqrt(L_d); + break; + } + + syntax_error(EMSG_NO_MATH); break; -#endif + case F_sr: - R.d = (double)seed; - seed = op1 ? (unsigned)L.d : (unsigned)time(NULL); + R_d = (double)seed; + seed = op1 ? (unsigned)L_d : (unsigned)time(NULL); srand(seed); break; case F_ti: - R.d = time(NULL); + R_d = time(NULL); break; case F_le: - if (!op1) + debug_printf_eval("length: L.s:'%s'\n", L.s); + if (!op1) { L.s = getvar_s(intvar[F0]); - R.d = strlen(L.s); + debug_printf_eval("length: L.s='%s'\n", L.s); + } + else if (L.v->type & VF_ARRAY) { + R_d = L.v->x.array->nel; + debug_printf_eval("length: array_len:%d\n", L.v->x.array->nel); + break; + } + R_d = strlen(L.s); break; case F_sy: - fflush(NULL); - R.d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s) + fflush_all(); + R_d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s) ? (system(L.s) >> 8) : 0; break; case F_ff: - if (!op1) + if (!op1) { fflush(stdout); - else { - if (L.s && *L.s) { - X.rsm = newfile(L.s); - fflush(X.rsm->F); - } else { - fflush(NULL); - } + } else if (L.s && *L.s) { + rstream *rsm = newfile(L.s); + fflush(rsm->F); + } else { + fflush_all(); } break; - case F_cl: - X.rsm = (rstream *)hash_search(fdhash, L.s); - if (X.rsm) { - R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F); - free(X.rsm->buffer); + case F_cl: { + rstream *rsm; + int err = 0; + rsm = (rstream *)hash_search(fdhash, L.s); + debug_printf_eval("OC_FBLTIN F_cl rsm:%p\n", rsm); + if (rsm) { + debug_printf_eval("OC_FBLTIN F_cl " + "rsm->is_pipe:%d, ->F:%p\n", + rsm->is_pipe, rsm->F); + /* Can be NULL if open failed. Example: + * getline line <"doesnt_exist"; + * close("doesnt_exist"); <--- here rsm->F is NULL + */ + if (rsm->F) + err = rsm->is_pipe ? pclose(rsm->F) : fclose(rsm->F); + free(rsm->buffer); hash_remove(fdhash, L.s); } - if (R.i != 0) + if (err) setvar_i(intvar[ERRNO], errno); - R.d = (double)R.i; + R_d = (double)err; break; } - setvar_i(res, R.d); + } /* switch */ + setvar_i(res, R_d); break; + } case XC( OC_BUILTIN ): res = exec_builtin(op, res); @@ -2448,60 +2908,58 @@ static var *evaluate(node *op, var *res) setvar_p(res, awk_printf(op1)); break; - case XC( OC_UNARY ): - X.v = R.v; - L.d = R.d = getvar_i(R.v); + case XC( OC_UNARY ): { + double Ld, R_d; + + Ld = R_d = getvar_i(R.v); switch (opn) { case 'P': - L.d = ++R.d; + Ld = ++R_d; goto r_op_change; case 'p': - R.d++; + R_d++; goto r_op_change; case 'M': - L.d = --R.d; + Ld = --R_d; goto r_op_change; case 'm': - R.d--; - goto r_op_change; + R_d--; + r_op_change: + setvar_i(R.v, R_d); + break; case '!': - L.d = istrue(X.v) ? 0 : 1; + Ld = !istrue(R.v); break; case '-': - L.d = -R.d; + Ld = -R_d; break; - r_op_change: - setvar_i(X.v, R.d); } - setvar_i(res, L.d); + setvar_i(res, Ld); break; + } - case XC( OC_FIELD ): - R.i = (int)getvar_i(R.v); - if (R.i == 0) { + case XC( OC_FIELD ): { + int i = (int)getvar_i(R.v); + if (i == 0) { res = intvar[F0]; } else { split_f0(); - if (R.i > nfields) - fsrealloc(R.i); - res = &Fields[R.i - 1]; + if (i > nfields) + fsrealloc(i); + res = &Fields[i - 1]; } break; + } /* concatenation (" ") and index joining (",") */ case XC( OC_CONCAT ): - case XC( OC_COMMA ): - opn = strlen(L.s) + strlen(R.s) + 2; - X.s = xmalloc(opn); - strcpy(X.s, L.s); - if ((opinfo & OPCLSMASK) == OC_COMMA) { - L.s = getvar_s(intvar[SUBSEP]); - X.s = xrealloc(X.s, opn + strlen(L.s)); - strcat(X.s, L.s); - } - strcat(X.s, R.s); - setvar_p(res, X.s); + case XC( OC_COMMA ): { + const char *sep = ""; + if ((opinfo & OPCLSMASK) == OC_COMMA) + sep = getvar_s(intvar[SUBSEP]); + setvar_p(res, xasprintf("%s%s%s", L.s, sep, R.s)); break; + } case XC( OC_LAND ): setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0); @@ -2512,61 +2970,69 @@ static var *evaluate(node *op, var *res) break; case XC( OC_BINARY ): - case XC( OC_REPLACE ): - R.d = getvar_i(R.v); + case XC( OC_REPLACE ): { + double R_d = getvar_i(R.v); + debug_printf_eval("BINARY/REPLACE: R_d:%f opn:%c\n", R_d, opn); switch (opn) { case '+': - L.d += R.d; + L_d += R_d; break; case '-': - L.d -= R.d; + L_d -= R_d; break; case '*': - L.d *= R.d; + L_d *= R_d; break; case '/': - if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO); - L.d /= R.d; + if (R_d == 0) + syntax_error(EMSG_DIV_BY_ZERO); + L_d /= R_d; break; case '&': -#if ENABLE_FEATURE_AWK_MATH - L.d = pow(L.d, R.d); -#else - runtime_error(EMSG_NO_MATH); -#endif + if (ENABLE_FEATURE_AWK_LIBM) + L_d = pow(L_d, R_d); + else + syntax_error(EMSG_NO_MATH); break; case '%': - if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO); - L.d -= (int)(L.d / R.d) * R.d; + if (R_d == 0) + syntax_error(EMSG_DIV_BY_ZERO); + L_d -= (long long)(L_d / R_d) * R_d; break; } - res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : X.v, L.d); + debug_printf_eval("BINARY/REPLACE result:%f\n", L_d); + res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : L.v, L_d); break; + } + + case XC( OC_COMPARE ): { + int i = i; /* for compiler */ + double Ld; - case XC( OC_COMPARE ): if (is_numeric(L.v) && is_numeric(R.v)) { - L.d = getvar_i(L.v) - getvar_i(R.v); + Ld = getvar_i(L.v) - getvar_i(R.v); } else { - L.s = getvar_s(L.v); - R.s = getvar_s(R.v); - L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s); + const char *l = getvar_s(L.v); + const char *r = getvar_s(R.v); + Ld = icase ? strcasecmp(l, r) : strcmp(l, r); } switch (opn & 0xfe) { case 0: - R.i = (L.d > 0); + i = (Ld > 0); break; case 2: - R.i = (L.d >= 0); + i = (Ld >= 0); break; case 4: - R.i = (L.d == 0); + i = (Ld == 0); break; } - setvar_i(res, (opn & 0x1 ? R.i : !R.i) ? 1 : 0); + setvar_i(res, (i == 0) ^ (opn & 1)); break; + } default: - runtime_error(EMSG_POSSIBLE_ERROR); + syntax_error(EMSG_POSSIBLE_ERROR); } if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS) op = op->a.n; @@ -2574,9 +3040,14 @@ static var *evaluate(node *op, var *res) break; if (nextrec) break; - } + } /* while (op) */ + nvfree(v1); + debug_printf_eval("returning from %s(): %p\n", __func__, res); return res; +#undef fnargs +#undef seed +#undef sreg } @@ -2613,21 +3084,18 @@ static int awk_exit(int r) * otherwise return 0 */ static int is_assignment(const char *expr) { - char *exprc, *s, *s0, *s1; + char *exprc, *val; - exprc = xstrdup(expr); - if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) { - free(exprc); + if (!isalnum_(*expr) || (val = strchr(expr, '=')) == NULL) { return FALSE; } - *(s++) = '\0'; - s0 = s1 = s; - while (*s) - *(s1++) = nextchar(&s); + exprc = xstrdup(expr); + val = exprc + (val - expr); + *val++ = '\0'; - *s1 = '\0'; - setvar_u(newvar(exprc), s0); + unescape_string_in_place(val); + setvar_u(newvar(exprc), val); free(exprc); return TRUE; } @@ -2635,49 +3103,60 @@ static int is_assignment(const char *expr) /* switch to next input file */ static rstream *next_input_file(void) { - static rstream rsm; - static int files_happen = FALSE; +#define rsm (G.next_input_file__rsm) +#define files_happen (G.next_input_file__files_happen) - FILE *F = NULL; + FILE *F; const char *fname, *ind; - if (rsm.F) fclose(rsm.F); + if (rsm.F) + fclose(rsm.F); rsm.F = NULL; rsm.pos = rsm.adv = 0; - do { + for (;;) { if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) { if (files_happen) return NULL; fname = "-"; F = stdin; - } else { - ind = getvar_s(incvar(intvar[ARGIND])); - fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind)); - if (fname && *fname && !is_assignment(fname)) - F = afopen(fname, "r"); + break; + } + ind = getvar_s(incvar(intvar[ARGIND])); + fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind)); + if (fname && *fname && !is_assignment(fname)) { + F = xfopen_stdin(fname); + break; } - } while (!F); + } files_happen = TRUE; setvar_s(intvar[FILENAME], fname); rsm.F = F; return &rsm; +#undef rsm +#undef files_happen } -int awk_main(int argc, char **argv); +int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; int awk_main(int argc, char **argv) { unsigned opt; - char *opt_F, *opt_W; - llist_t *opt_v = NULL; - int i, j, flen; + char *opt_F; + llist_t *list_v = NULL; + llist_t *list_f = NULL; +#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS + llist_t *list_e = NULL; +#endif + int i, j; var *v; var tv; char **envp; char *vnames = (char *)vNames; /* cheat */ char *vvalues = (char *)vValues; + INIT_G(); + /* Undo busybox.c, or else strtod may eat ','! This breaks parsing: * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */ if (ENABLE_LOCALE_SUPPORT) @@ -2686,7 +3165,7 @@ int awk_main(int argc, char **argv) zero_out_var(&tv); /* allocate global buffer */ - buf = xmalloc(MAXVARFMT + 1); + g_buf = xmalloc(MAXVARFMT + 1); vhash = hash_init(); ahash = hash_init(); @@ -2716,52 +3195,58 @@ int awk_main(int argc, char **argv) /* Huh, people report that sometimes environ is NULL. Oh well. */ if (environ) for (envp = environ; *envp; envp++) { - char *s = xstrdup(*envp); + /* environ is writable, thus we don't strdup it needlessly */ + char *s = *envp; char *s1 = strchr(s, '='); if (s1) { - *s1++ = '\0'; - setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1); + *s1 = '\0'; + /* Both findvar and setvar_u take const char* + * as 2nd arg -> environment is not trashed */ + setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1); + *s1 = '='; } - free(s); } - opt_complementary = "v::"; - opt = getopt32(argc, argv, "F:v:f:W:", &opt_F, &opt_v, &programname, &opt_W); + opt = getopt32(argv, OPTSTR_AWK, &opt_F, &list_v, &list_f, IF_FEATURE_AWK_GNU_EXTENSIONS(&list_e,) NULL); argv += optind; argc -= optind; - if (opt & 0x1) - setvar_s(intvar[FS], opt_F); // -F - while (opt_v) { /* -v */ - if (!is_assignment(llist_pop(&opt_v))) + if (opt & OPT_W) + bb_error_msg("warning: option -W is ignored"); + if (opt & OPT_F) { + unescape_string_in_place(opt_F); + setvar_s(intvar[FS], opt_F); + } + while (list_v) { + if (!is_assignment(llist_pop(&list_v))) bb_show_usage(); } - if (opt & 0x4) { // -f - char *s = s; /* die, gcc, die */ - FILE *from_file = afopen(programname, "r"); + while (list_f) { + char *s = NULL; + FILE *from_file; + + g_progname = llist_pop(&list_f); + from_file = xfopen_stdin(g_progname); /* one byte is reserved for some trick in next_token */ - if (fseek(from_file, 0, SEEK_END) == 0) { - flen = ftell(from_file); - s = xmalloc(flen + 4); - fseek(from_file, 0, SEEK_SET); - i = 1 + fread(s + 1, 1, flen, from_file); - } else { - for (i = j = 1; j > 0; i += j) { - s = xrealloc(s, i + 4096); - j = fread(s + i, 1, 4094, from_file); - } + for (i = j = 1; j > 0; i += j) { + s = xrealloc(s, i + 4096); + j = fread(s + i, 1, 4094, from_file); } s[i] = '\0'; fclose(from_file); parse_program(s + 1); free(s); - } else { // no -f: take program from 1st parameter - if (!argc) + } + g_progname = "cmd. line"; +#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS + while (list_e) { + parse_program(llist_pop(&list_e)); + } +#endif + if (!(opt & (OPT_f | OPT_e))) { + if (!*argv) bb_show_usage(); - programname = "cmd. line"; parse_program(*argv++); argc--; } - if (opt & 0x8) // -W - bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W); /* fill in ARGV array */ setvar_i(intvar[ARGC], argc + 1); @@ -2775,7 +3260,8 @@ int awk_main(int argc, char **argv) awk_exit(EXIT_SUCCESS); /* input file could already be opened in BEGIN block */ - if (!iF) iF = next_input_file(); + if (!iF) + iF = next_input_file(); /* passing through input files */ while (iF) { @@ -2793,7 +3279,7 @@ int awk_main(int argc, char **argv) } if (i < 0) - runtime_error(strerror(errno)); + syntax_error(strerror(errno)); iF = next_input_file(); }