X-Git-Url: https://git.librecmc.org/?a=blobdiff_plain;f=editors%2Fawk.c;h=0b573a065c18bfd237886ee563f661c5199acc78;hb=b79a0fef99627c457548e804fcd6e162b116cbe8;hp=30c6b88ef7827c32c3238d7ffc294c4fba90783a;hpb=f9782ffc451a99fcd7ab54b05500dbf94e687066;p=oweals%2Fbusybox.git diff --git a/editors/awk.c b/editors/awk.c index 30c6b88ef..0b573a065 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -4,9 +4,16 @@ * * Copyright (C) 2002 by Dmitry Zakharov * - * Licensed under the GPL v2 or later, see the file LICENSE in this tarball. + * Licensed under GPLv2 or later, see file LICENSE in this source tree. */ +//usage:#define awk_trivial_usage +//usage: "[OPTIONS] [AWK_PROGRAM] [FILE]..." +//usage:#define awk_full_usage "\n\n" +//usage: " -v VAR=VAL Set variable" +//usage: "\n -F SEP Use SEP as field separator" +//usage: "\n -f FILE Read program from FILE" + #include "libbb.h" #include "xregex.h" #include @@ -17,10 +24,18 @@ /* If you comment out one of these below, it will be #defined later * to perform debug printfs to stderr: */ #define debug_printf_walker(...) do {} while (0) +#define debug_printf_eval(...) do {} while (0) +#define debug_printf_parse(...) do {} while (0) #ifndef debug_printf_walker # define debug_printf_walker(...) (fprintf(stderr, __VA_ARGS__)) #endif +#ifndef debug_printf_eval +# define debug_printf_eval(...) (fprintf(stderr, __VA_ARGS__)) +#endif +#ifndef debug_printf_parse +# define debug_printf_parse(...) (fprintf(stderr, __VA_ARGS__)) +#endif @@ -140,7 +155,7 @@ typedef struct tsplitter_s { /* simple token classes */ /* Order and hex values are very important!!! See next_token() */ -#define TC_SEQSTART 1 /* ( */ +#define TC_SEQSTART 1 /* ( */ #define TC_SEQTERM (1 << 1) /* ) */ #define TC_REGEXP (1 << 2) /* /.../ */ #define TC_OUTRDR (1 << 3) /* | > >> */ @@ -227,6 +242,9 @@ typedef struct tsplitter_s { * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1, * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string */ +#undef P +#undef PRIMASK +#undef PRIMASK2 #define P(x) (x << 24) #define PRIMASK 0x7F000000 #define PRIMASK2 0x7E000000 @@ -273,94 +291,86 @@ enum { /* tokens and their corresponding info values */ -#define NTC "\377" /* switch to next token class (tc<<1) */ -#define NTCC '\377' +#define NTC "\377" /* switch to next token class (tc<<1) */ +#define NTCC '\377' -#define OC_B OC_BUILTIN +#define OC_B OC_BUILTIN static const char tokenlist[] ALIGN1 = - "\1(" NTC - "\1)" NTC - "\1/" NTC /* REGEXP */ - "\2>>" "\1>" "\1|" NTC /* OUTRDR */ - "\2++" "\2--" NTC /* UOPPOST */ - "\2++" "\2--" "\1$" NTC /* UOPPRE1 */ - "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */ - "\2*=" "\2/=" "\2%=" "\2^=" - "\1+" "\1-" "\3**=" "\2**" - "\1/" "\1%" "\1^" "\1*" - "\2!=" "\2>=" "\2<=" "\1>" - "\1<" "\2!~" "\1~" "\2&&" - "\2||" "\1?" "\1:" NTC - "\2in" NTC - "\1," NTC - "\1|" NTC - "\1+" "\1-" "\1!" NTC /* UOPPRE2 */ - "\1]" NTC - "\1{" NTC - "\1}" NTC - "\1;" NTC - "\1\n" NTC - "\2if" "\2do" "\3for" "\5break" /* STATX */ - "\10continue" "\6delete" "\5print" - "\6printf" "\4next" "\10nextfile" - "\6return" "\4exit" NTC - "\5while" NTC - "\4else" NTC - - "\3and" "\5compl" "\6lshift" "\2or" - "\6rshift" "\3xor" - "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */ - "\3cos" "\3exp" "\3int" "\3log" - "\4rand" "\3sin" "\4sqrt" "\5srand" - "\6gensub" "\4gsub" "\5index" "\6length" - "\5match" "\5split" "\7sprintf" "\3sub" - "\6substr" "\7systime" "\10strftime" "\6mktime" - "\7tolower" "\7toupper" NTC - "\7getline" NTC - "\4func" "\10function" NTC - "\5BEGIN" NTC - "\3END" "\0" + "\1(" NTC + "\1)" NTC + "\1/" NTC /* REGEXP */ + "\2>>" "\1>" "\1|" NTC /* OUTRDR */ + "\2++" "\2--" NTC /* UOPPOST */ + "\2++" "\2--" "\1$" NTC /* UOPPRE1 */ + "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */ + "\2*=" "\2/=" "\2%=" "\2^=" + "\1+" "\1-" "\3**=" "\2**" + "\1/" "\1%" "\1^" "\1*" + "\2!=" "\2>=" "\2<=" "\1>" + "\1<" "\2!~" "\1~" "\2&&" + "\2||" "\1?" "\1:" NTC + "\2in" NTC + "\1," NTC + "\1|" NTC + "\1+" "\1-" "\1!" NTC /* UOPPRE2 */ + "\1]" NTC + "\1{" NTC + "\1}" NTC + "\1;" NTC + "\1\n" NTC + "\2if" "\2do" "\3for" "\5break" /* STATX */ + "\10continue" "\6delete" "\5print" + "\6printf" "\4next" "\10nextfile" + "\6return" "\4exit" NTC + "\5while" NTC + "\4else" NTC + + "\3and" "\5compl" "\6lshift" "\2or" + "\6rshift" "\3xor" + "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */ + "\3cos" "\3exp" "\3int" "\3log" + "\4rand" "\3sin" "\4sqrt" "\5srand" + "\6gensub" "\4gsub" "\5index" "\6length" + "\5match" "\5split" "\7sprintf" "\3sub" + "\6substr" "\7systime" "\10strftime" "\6mktime" + "\7tolower" "\7toupper" NTC + "\7getline" NTC + "\4func" "\10function" NTC + "\5BEGIN" NTC + "\3END" + /* compiler adds trailing "\0" */ ; static const uint32_t tokeninfo[] = { 0, 0, OC_REGEXP, - xS|'a', xS|'w', xS|'|', - OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m', - OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M', - OC_FIELD|xV|P(5), - OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74), - OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-', - OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/', - OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&', - OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-', - OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&', - OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%', - OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*', - OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3, - OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1, - OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!', - OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55), - OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?', - OC_COLON|xx|P(67)|':', - OC_IN|SV|P(49), + xS|'a', xS|'w', xS|'|', + OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m', + OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M', OC_FIELD|xV|P(5), + OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74), OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-', + OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/', OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&', + OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-', OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&', + OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%', OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*', + OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3, OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1, + OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!', OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55), + OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?', OC_COLON|xx|P(67)|':', + OC_IN|SV|P(49), /* in */ OC_COMMA|SS|P(80), OC_PGETLINE|SV|P(37), - OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-', - OC_UNARY|xV|P(19)|'!', + OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-', OC_UNARY|xV|P(19)|'!', + 0, /* ] */ 0, 0, 0, - 0, - 0, - ST_IF, ST_DO, ST_FOR, OC_BREAK, - OC_CONTINUE, OC_DELETE|Vx, OC_PRINT, - OC_PRINTF, OC_NEXT, OC_NEXTFILE, - OC_RETURN|Vx, OC_EXIT|Nx, + 0, /* \n */ + ST_IF, ST_DO, ST_FOR, OC_BREAK, + OC_CONTINUE, OC_DELETE|Vx, OC_PRINT, + OC_PRINTF, OC_NEXT, OC_NEXTFILE, + OC_RETURN|Vx, OC_EXIT|Nx, ST_WHILE, - 0, + 0, /* else */ OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83), OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83), @@ -372,9 +382,9 @@ static const uint32_t tokeninfo[] = { OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b), OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49), OC_GETLINE|SV|P(0), - 0, 0, + 0, 0, 0, - 0 + 0 /* END */ }; /* internal variable names and their initial values */ @@ -384,7 +394,7 @@ enum { ORS, RS, RT, FILENAME, SUBSEP, F0, ARGIND, ARGC, ARGV, ERRNO, FNR, NR, - NF, IGNORECASE, ENVIRON, NUM_INTERNAL_VARS + NF, IGNORECASE, ENVIRON, NUM_INTERNAL_VARS }; static const char vNames[] ALIGN1 = @@ -429,13 +439,13 @@ struct globals { smallint nextrec; smallint nextfile; smallint is_f0_split; + smallint t_rollback; }; struct globals2 { uint32_t t_info; /* often used */ uint32_t t_tclass; char *t_string; int t_lineno; - int t_rollback; var *intvar[NUM_INTERNAL_VARS]; /* often used */ @@ -493,11 +503,11 @@ struct globals2 { #define nextrec (G1.nextrec ) #define nextfile (G1.nextfile ) #define is_f0_split (G1.is_f0_split ) +#define t_rollback (G1.t_rollback ) #define t_info (G.t_info ) #define t_tclass (G.t_tclass ) #define t_string (G.t_string ) #define t_lineno (G.t_lineno ) -#define t_rollback (G.t_rollback ) #define intvar (G.intvar ) #define fsplitter (G.fsplitter ) #define rsplitter (G.rsplitter ) @@ -528,9 +538,7 @@ static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin"; static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array"; static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error"; static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function"; -#if !ENABLE_FEATURE_AWK_LIBM static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in"; -#endif static void zero_out_var(var *vp) { @@ -688,11 +696,27 @@ static char nextchar(char **s) pps = *s; if (c == '\\') c = bb_process_escape_sequence((const char**)s); - if (c == '\\' && *s == pps) - c = *(*s)++; + /* Example awk statement: + * s = "abc\"def" + * we must treat \" as " + */ + if (c == '\\' && *s == pps) { /* unrecognized \z? */ + c = *(*s); /* yes, fetch z */ + if (c) + (*s)++; /* advance unless z = NUL */ + } return c; } +/* TODO: merge with strcpy_and_process_escape_sequences()? + */ +static void unescape_string_in_place(char *s1) +{ + char *s = s1; + while ((*s1 = nextchar(&s)) != '\0') + s1++; +} + static ALWAYS_INLINE int isalnum_(int c) { return (isalnum(c) || c == '_'); @@ -700,14 +724,25 @@ static ALWAYS_INLINE int isalnum_(int c) static double my_strtod(char **pp) { -#if ENABLE_DESKTOP - if ((*pp)[0] == '0' - && ((((*pp)[1] | 0x20) == 'x') || isdigit((*pp)[1])) - ) { - return strtoull(*pp, pp, 0); + char *cp = *pp; + if (ENABLE_DESKTOP && cp[0] == '0') { + /* Might be hex or octal integer: 0x123abc or 07777 */ + char c = (cp[1] | 0x20); + if (c == 'x' || isdigit(cp[1])) { + unsigned long long ull = strtoull(cp, pp, 0); + if (c == 'x') + return ull; + c = **pp; + if (!isdigit(c) && c != '.') + return ull; + /* else: it may be a floating number. Examples: + * 009.123 (*pp points to '9') + * 000.123 (*pp points to '.') + * fall through to strtod. + */ + } } -#endif - return strtod(*pp, pp); + return strtod(cp, pp); } /* -------- working with variables (set/get/copy/etc) -------- */ @@ -817,17 +852,21 @@ static double getvar_i(var *v) v->number = 0; s = v->string; if (s && *s) { + debug_printf_eval("getvar_i: '%s'->", s); v->number = my_strtod(&s); + debug_printf_eval("%f (s:'%s')\n", v->number, s); if (v->type & VF_USER) { s = skip_spaces(s); if (*s != '\0') v->type &= ~VF_USER; } } else { + debug_printf_eval("getvar_i: '%s'->zero\n", s); v->type &= ~VF_USER; } v->type |= VF_CACHED; } + debug_printf_eval("getvar_i: %f\n", v->number); return v->number; } @@ -849,6 +888,7 @@ static var *copyvar(var *dest, const var *src) if (dest != src) { clrvar(dest); dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR)); + debug_printf_eval("copyvar: number:%f string:'%s'\n", src->number, src->string); dest->number = src->number; if (src->string) dest->string = xstrdup(src->string); @@ -965,7 +1005,6 @@ static uint32_t next_token(uint32_t expected) const char *tl; uint32_t tc; const uint32_t *ti; - int l; if (t_rollback) { t_rollback = FALSE; @@ -989,20 +1028,23 @@ static uint32_t next_token(uint32_t expected) if (*p == '\0') { tc = TC_EOF; + debug_printf_parse("%s: token found: TC_EOF\n", __func__); } else if (*p == '\"') { /* it's a string */ t_string = s = ++p; while (*p != '\"') { - char *pp = p; + char *pp; if (*p == '\0' || *p == '\n') syntax_error(EMSG_UNEXP_EOS); + pp = p; *s++ = nextchar(&pp); p = pp; } p++; *s = '\0'; tc = TC_STRING; + debug_printf_parse("%s: token found:'%s' TC_STRING\n", __func__, t_string); } else if ((expected & TC_REGEXP) && *p == '/') { /* it's regexp */ @@ -1025,15 +1067,17 @@ static uint32_t next_token(uint32_t expected) p++; *s = '\0'; tc = TC_REGEXP; + debug_printf_parse("%s: token found:'%s' TC_REGEXP\n", __func__, t_string); } else if (*p == '.' || isdigit(*p)) { /* it's a number */ char *pp = p; t_double = my_strtod(&pp); p = pp; - if (*pp == '.') + if (*p == '.') syntax_error(EMSG_UNEXP_TOKEN); tc = TC_NUMBER; + debug_printf_parse("%s: token found:%f TC_NUMBER\n", __func__, t_double); } else { /* search for something known */ @@ -1041,53 +1085,56 @@ static uint32_t next_token(uint32_t expected) tc = 0x00000001; ti = tokeninfo; while (*tl) { - l = *tl++; - if (l == NTCC) { + int l = (unsigned char) *tl++; + if (l == (unsigned char) NTCC) { tc <<= 1; continue; } - /* if token class is expected, token - * matches and it's not a longer word, - * then this is what we are looking for + /* if token class is expected, + * token matches, + * and it's not a longer word, */ if ((tc & (expected | TC_WORD | TC_NEWLINE)) - && *tl == *p && strncmp(p, tl, l) == 0 + && strncmp(p, tl, l) == 0 && !((tc & TC_WORD) && isalnum_(p[l])) ) { + /* then this is what we are looking for */ t_info = *ti; + debug_printf_parse("%s: token found:'%.*s' t_info:%x\n", __func__, l, p, t_info); p += l; - break; + goto token_found; } ti++; tl += l; } - - if (!*tl) { - /* it's a name (var/array/function), - * otherwise it's something wrong - */ - if (!isalnum_(*p)) - syntax_error(EMSG_UNEXP_TOKEN); - - t_string = --p; - while (isalnum_(*++p)) { - p[-1] = *p; - } - p[-1] = '\0'; - tc = TC_VARIABLE; - /* also consume whitespace between functionname and bracket */ - if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY)) - p = skip_spaces(p); - if (*p == '(') { - tc = TC_FUNCTION; - } else { - if (*p == '[') { - p++; - tc = TC_ARRAY; - } - } + /* not a known token */ + + /* is it a name? (var/array/function) */ + if (!isalnum_(*p)) + syntax_error(EMSG_UNEXP_TOKEN); /* no */ + /* yes */ + t_string = --p; + while (isalnum_(*++p)) { + p[-1] = *p; + } + p[-1] = '\0'; + tc = TC_VARIABLE; + /* also consume whitespace between functionname and bracket */ + if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY)) + p = skip_spaces(p); + if (*p == '(') { + tc = TC_FUNCTION; + debug_printf_parse("%s: token found:'%s' TC_FUNCTION\n", __func__, t_string); + } else { + if (*p == '[') { + p++; + tc = TC_ARRAY; + debug_printf_parse("%s: token found:'%s' TC_ARRAY\n", __func__, t_string); + } else + debug_printf_parse("%s: token found:'%s' TC_VARIABLE\n", __func__, t_string); } } + token_found: g_pos = p; /* skipping newlines in some cases */ @@ -1134,15 +1181,13 @@ static node *new_node(uint32_t info) return n; } -static node *mk_re_node(const char *s, node *n, regex_t *re) +static void mk_re_node(const char *s, node *n, regex_t *re) { n->info = OC_REGEXP; n->l.re = re; n->r.ire = re + 1; xregcomp(re, s, REG_EXTENDED); xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE); - - return n; } static node *condition(void) @@ -1161,19 +1206,24 @@ static node *parse_expr(uint32_t iexp) uint32_t tc, xtc; var *v; + debug_printf_parse("%s(%x)\n", __func__, iexp); + sn.info = PRIMASK; sn.r.n = glptr = NULL; xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp; while (!((tc = next_token(xtc)) & iexp)) { + if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) { /* input redirection (<) attached to glptr node */ + debug_printf_parse("%s: input redir\n", __func__); cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37)); cn->a.n = glptr; xtc = TC_OPERAND | TC_UOPPRE; glptr = NULL; } else if (tc & (TC_BINOP | TC_UOPPOST)) { + debug_printf_parse("%s: TC_BINOP | TC_UOPPOST\n", __func__); /* for binary and postfix-unary operators, jump back over * previous operators with higher priority */ vn = cn; @@ -1203,6 +1253,7 @@ static node *parse_expr(uint32_t iexp) vn->a.n = cn; } else { + debug_printf_parse("%s: other\n", __func__); /* for operands and prefix-unary operators, attach them * to last node */ vn = cn; @@ -1210,12 +1261,14 @@ static node *parse_expr(uint32_t iexp) cn->a.n = vn; xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP; if (tc & (TC_OPERAND | TC_REGEXP)) { + debug_printf_parse("%s: TC_OPERAND | TC_REGEXP\n", __func__); xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp; /* one should be very careful with switch on tclass - * only simple tclasses should be used! */ switch (tc) { case TC_VARIABLE: case TC_ARRAY: + debug_printf_parse("%s: TC_VARIABLE | TC_ARRAY\n", __func__); cn->info = OC_VAR; v = hash_search(ahash, t_string); if (v != NULL) { @@ -1232,6 +1285,7 @@ static node *parse_expr(uint32_t iexp) case TC_NUMBER: case TC_STRING: + debug_printf_parse("%s: TC_NUMBER | TC_STRING\n", __func__); cn->info = OC_VAR; v = cn->l.v = xzalloc(sizeof(var)); if (tc & TC_NUMBER) @@ -1241,32 +1295,41 @@ static node *parse_expr(uint32_t iexp) break; case TC_REGEXP: + debug_printf_parse("%s: TC_REGEXP\n", __func__); mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2)); break; case TC_FUNCTION: + debug_printf_parse("%s: TC_FUNCTION\n", __func__); cn->info = OC_FUNC; cn->r.f = newfunc(t_string); cn->l.n = condition(); break; case TC_SEQSTART: + debug_printf_parse("%s: TC_SEQSTART\n", __func__); cn = vn->r.n = parse_expr(TC_SEQTERM); + if (!cn) + syntax_error("Empty sequence"); cn->a.n = vn; break; case TC_GETLINE: + debug_printf_parse("%s: TC_GETLINE\n", __func__); glptr = cn; xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp; break; case TC_BUILTIN: + debug_printf_parse("%s: TC_BUILTIN\n", __func__); cn->l.n = condition(); break; } } } } + + debug_printf_parse("%s() returns %p\n", __func__, sn.r.n); return sn.r.n; } @@ -1335,18 +1398,25 @@ static void chain_group(void) } while (c & TC_NEWLINE); if (c & TC_GRPSTART) { + debug_printf_parse("%s: TC_GRPSTART\n", __func__); while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) { + debug_printf_parse("%s: !TC_GRPTERM\n", __func__); if (t_tclass & TC_NEWLINE) continue; rollback_token(); chain_group(); } + debug_printf_parse("%s: TC_GRPTERM\n", __func__); } else if (c & (TC_OPSEQ | TC_OPTERM)) { + debug_printf_parse("%s: TC_OPSEQ | TC_OPTERM\n", __func__); rollback_token(); chain_expr(OC_EXEC | Vx); - } else { /* TC_STATEMNT */ + } else { + /* TC_STATEMNT */ + debug_printf_parse("%s: TC_STATEMNT(?)\n", __func__); switch (t_info & OPCLSMASK) { case ST_IF: + debug_printf_parse("%s: ST_IF\n", __func__); n = chain_node(OC_BR | Vx); n->l.n = condition(); chain_group(); @@ -1361,12 +1431,14 @@ static void chain_group(void) break; case ST_WHILE: + debug_printf_parse("%s: ST_WHILE\n", __func__); n2 = condition(); n = chain_loop(NULL); n->l.n = n2; break; case ST_DO: + debug_printf_parse("%s: ST_DO\n", __func__); n2 = chain_node(OC_EXEC); n = chain_loop(NULL); n2->a.n = n->a.n; @@ -1375,6 +1447,7 @@ static void chain_group(void) break; case ST_FOR: + debug_printf_parse("%s: ST_FOR\n", __func__); next_token(TC_SEQSTART); n2 = parse_expr(TC_SEMICOL | TC_SEQTERM); if (t_tclass & TC_SEQTERM) { /* for-in */ @@ -1400,6 +1473,7 @@ static void chain_group(void) case OC_PRINT: case OC_PRINTF: + debug_printf_parse("%s: OC_PRINT[F]\n", __func__); n = chain_node(t_info); n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM); if (t_tclass & TC_OUTRDR) { @@ -1411,17 +1485,20 @@ static void chain_group(void) break; case OC_BREAK: + debug_printf_parse("%s: OC_BREAK\n", __func__); n = chain_node(OC_EXEC); n->a.n = break_ptr; break; case OC_CONTINUE: + debug_printf_parse("%s: OC_CONTINUE\n", __func__); n = chain_node(OC_EXEC); n->a.n = continue_ptr; break; /* delete, next, nextfile, return, exit */ default: + debug_printf_parse("%s: default\n", __func__); chain_expr(t_info); } } @@ -1439,19 +1516,24 @@ static void parse_program(char *p) while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART | TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) { - if (tclass & TC_OPTERM) + if (tclass & TC_OPTERM) { + debug_printf_parse("%s: TC_OPTERM\n", __func__); continue; + } seq = &mainseq; if (tclass & TC_BEGIN) { + debug_printf_parse("%s: TC_BEGIN\n", __func__); seq = &beginseq; chain_group(); } else if (tclass & TC_END) { + debug_printf_parse("%s: TC_END\n", __func__); seq = &endseq; chain_group(); } else if (tclass & TC_FUNCDECL) { + debug_printf_parse("%s: TC_FUNCDECL\n", __func__); next_token(TC_FUNCTION); g_pos++; f = newfunc(t_string); @@ -1469,22 +1551,27 @@ static void parse_program(char *p) clear_array(ahash); } else if (tclass & TC_OPSEQ) { + debug_printf_parse("%s: TC_OPSEQ\n", __func__); rollback_token(); cn = chain_node(OC_TEST); cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART); if (t_tclass & TC_GRPSTART) { + debug_printf_parse("%s: TC_GRPSTART\n", __func__); rollback_token(); chain_group(); } else { + debug_printf_parse("%s: !TC_GRPSTART\n", __func__); chain_node(OC_PRINT); } cn->r.n = mainseq.last; } else /* if (tclass & TC_GRPSTART) */ { + debug_printf_parse("%s: TC_GRPSTART(?)\n", __func__); rollback_token(); chain_group(); } } + debug_printf_parse("%s: TC_EOF\n", __func__); } @@ -1502,10 +1589,10 @@ static node *mk_splitter(const char *s, tsplitter *spl) regfree(re); regfree(ire); // TODO: nuke ire, use re+1? } - if (strlen(s) > 1) { + if (s[0] && s[1]) { /* strlen(s) > 1 */ mk_re_node(s, n, re); } else { - n->info = (uint32_t) *s; + n->info = (uint32_t) s[0]; } return n; @@ -1541,7 +1628,10 @@ static regex_t *as_regex(node *op, regex_t *preg) return preg; } -/* gradually increasing buffer */ +/* gradually increasing buffer. + * note that we reallocate even if n == old_size, + * and thus there is at least one extra allocated byte. + */ static char* qrealloc(char *b, int n, int *size) { if (!b || n >= *size) { @@ -1559,24 +1649,22 @@ static void fsrealloc(int size) if (size >= maxfields) { i = maxfields; maxfields = size + 16; - Fields = xrealloc(Fields, maxfields * sizeof(var)); + Fields = xrealloc(Fields, maxfields * sizeof(Fields[0])); for (; i < maxfields; i++) { Fields[i].type = VF_SPECIAL; Fields[i].string = NULL; } } - - if (size < nfields) { - for (i = size; i < nfields; i++) { - clrvar(Fields + i); - } + /* if size < nfields, clear extra field variables */ + for (i = size; i < nfields; i++) { + clrvar(Fields + i); } nfields = size; } static int awk_split(const char *s, node *spl, char **slist) { - int l, n = 0; + int l, n; char c[4]; char *s1; regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough... @@ -1590,6 +1678,7 @@ static int awk_split(const char *s, node *spl, char **slist) if (*getvar_s(intvar[RS]) == '\0') c[2] = '\n'; + n = 0; if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */ if (!*s) return n; /* "": zero fields */ @@ -1635,7 +1724,7 @@ static int awk_split(const char *s, node *spl, char **slist) } if (*s1) n++; - while ((s1 = strpbrk(s1, c))) { + while ((s1 = strpbrk(s1, c)) != NULL) { *s1++ = '\0'; n++; } @@ -1723,6 +1812,18 @@ static void handle_special(var *v) is_f0_split = FALSE; } else if (v == intvar[FS]) { + /* + * The POSIX-2008 standard says that changing FS should have no effect on the + * current input line, but only on the next one. The language is: + * + * > Before the first reference to a field in the record is evaluated, the record + * > shall be split into fields, according to the rules in Regular Expressions, + * > using the value of FS that was current at the time the record was read. + * + * So, split up current line before assignment to FS: + */ + split_f0(); + mk_splitter(getvar_s(v), &fsplitter); } else if (v == intvar[RS]) { @@ -1815,6 +1916,8 @@ static int awk_getline(rstream *rsm, var *v) int fd, so, eo, r, rp; char c, *m, *s; + debug_printf_eval("entered %s()\n", __func__); + /* we're using our own buffer since we need access to accumulating * characters */ @@ -1901,6 +2004,8 @@ static int awk_getline(rstream *rsm, var *v) rsm->pos = p - eo; rsm->size = size; + debug_printf_eval("returning from %s(): %d\n", __func__, r); + return r; } @@ -1983,83 +2088,102 @@ static char *awk_printf(node *n) return b; } -/* common substitution routine - * replace (nm) substring of (src) that match (n) with (repl), store - * result into (dest), return number of substitutions. If nm=0, replace - * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable - * subexpression matching (\1-\9) +/* Common substitution routine. + * Replace (nm)'th substring of (src) that matches (rn) with (repl), + * store result into (dest), return number of substitutions. + * If nm = 0, replace all matches. + * If src or dst is NULL, use $0. + * If subexp != 0, enable subexpression matching (\1-\9). */ -static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int ex) +static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int subexp) { - char *ds = NULL; - const char *s; + char *resbuf; const char *sp; - int c, i, j, di, rl, so, eo, nbs, n, dssize; + int match_no, residx, replen, resbufsize; + int regexec_flags; regmatch_t pmatch[10]; - regex_t sreg, *re; + regex_t sreg, *regex; + + resbuf = NULL; + residx = 0; + match_no = 0; + regexec_flags = 0; + regex = as_regex(rn, &sreg); + sp = getvar_s(src ? src : intvar[F0]); + replen = strlen(repl); + while (regexec(regex, sp, 10, pmatch, regexec_flags) == 0) { + int so = pmatch[0].rm_so; + int eo = pmatch[0].rm_eo; + + //bb_error_msg("match %u: [%u,%u] '%s'%p", match_no+1, so, eo, sp,sp); + resbuf = qrealloc(resbuf, residx + eo + replen, &resbufsize); + memcpy(resbuf + residx, sp, eo); + residx += eo; + if (++match_no >= nm) { + const char *s; + int nbs; - re = as_regex(rn, &sreg); - if (!src) - src = intvar[F0]; - if (!dest) - dest = intvar[F0]; - - i = di = 0; - sp = getvar_s(src); - rl = strlen(repl); - while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0 : REG_NOTBOL) == 0) { - so = pmatch[0].rm_so; - eo = pmatch[0].rm_eo; - - ds = qrealloc(ds, di + eo + rl, &dssize); - memcpy(ds + di, sp, eo); - di += eo; - if (++i >= nm) { /* replace */ - di -= (eo - so); + residx -= (eo - so); nbs = 0; for (s = repl; *s; s++) { - ds[di++] = c = *s; + char c = resbuf[residx++] = *s; if (c == '\\') { nbs++; continue; } - if (c == '&' || (ex && c >= '0' && c <= '9')) { - di -= ((nbs + 3) >> 1); + if (c == '&' || (subexp && c >= '0' && c <= '9')) { + int j; + residx -= ((nbs + 3) >> 1); j = 0; if (c != '&') { j = c - '0'; nbs++; } if (nbs % 2) { - ds[di++] = c; + resbuf[residx++] = c; } else { - n = pmatch[j].rm_eo - pmatch[j].rm_so; - ds = qrealloc(ds, di + rl + n, &dssize); - memcpy(ds + di, sp + pmatch[j].rm_so, n); - di += n; + int n = pmatch[j].rm_eo - pmatch[j].rm_so; + resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize); + memcpy(resbuf + residx, sp + pmatch[j].rm_so, n); + residx += n; } } nbs = 0; } } + regexec_flags = REG_NOTBOL; sp += eo; - if (i == nm) + if (match_no == nm) break; if (eo == so) { - ds[di] = *sp++; - if (!ds[di++]) - break; + /* Empty match (e.g. "b*" will match anywhere). + * Advance by one char. */ +//BUG (bug 1333): +//gsub(/\lineno; op1 = op->l.n; + debug_printf_eval("opinfo:%08x opn:%08x\n", opinfo, opn); /* execute inevitable things */ if (opinfo & OF_RES1) L.v = evaluate(op1, v1); if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1); - if (opinfo & OF_STR1) + if (opinfo & OF_STR1) { L.s = getvar_s(L.v); - if (opinfo & OF_STR2) + debug_printf_eval("L.s:'%s'\n", L.s); + } + if (opinfo & OF_STR2) { R.s = getvar_s(R.v); - if (opinfo & OF_NUM1) + debug_printf_eval("R.s:'%s'\n", R.s); + } + if (opinfo & OF_NUM1) { L_d = getvar_i(L.v); + debug_printf_eval("L_d:%f\n", L_d); + } + debug_printf_eval("switch(0x%x)\n", XC(opinfo & OPCLSMASK)); switch (XC(opinfo & OPCLSMASK)) { /* -- iterative node type -- */ @@ -2506,6 +2639,7 @@ static var *evaluate(node *op, var *res) break; case XC( OC_MOVE ): + debug_printf_eval("MOVE\n"); /* if source is a temporary string, jusk relink it to dest */ //Disabled: if R.v is numeric but happens to have cached R.v->string, //then L.v ends up being a string, which is wrong @@ -2527,7 +2661,8 @@ static var *evaluate(node *op, var *res) var *vbeg, *v; const char *sv_progname; - if (!op->r.f->body.first) + /* The body might be empty, still has to eval the args */ + if (!op->r.n->info) syntax_error(EMSG_UNDEF_FUNC); vbeg = v = nvalloc(op->r.f->nargs + 1); @@ -2565,7 +2700,7 @@ static var *evaluate(node *op, var *res) rsm->F = popen(L.s, "r"); rsm->is_pipe = TRUE; } else { - rsm->F = fopen_for_read(L.s); /* not xfopen! */ + rsm->F = fopen_for_read(L.s); /* not xfopen! */ } } } else { @@ -2574,7 +2709,7 @@ static var *evaluate(node *op, var *res) rsm = iF; } - if (!rsm->F) { + if (!rsm || !rsm->F) { setvar_i(intvar[ERRNO], errno); setvar_i(res, -1); break; @@ -2594,8 +2729,6 @@ static var *evaluate(node *op, var *res) /* simple builtins */ case XC( OC_FBLTIN ): { - int i; - rstream *rsm; double R_d = R_d; /* for compiler */ switch (opn) { @@ -2606,35 +2739,40 @@ static var *evaluate(node *op, var *res) case F_rn: R_d = (double)rand() / (double)RAND_MAX; break; -#if ENABLE_FEATURE_AWK_LIBM + case F_co: - R_d = cos(L_d); - break; + if (ENABLE_FEATURE_AWK_LIBM) { + R_d = cos(L_d); + break; + } case F_ex: - R_d = exp(L_d); - break; + if (ENABLE_FEATURE_AWK_LIBM) { + R_d = exp(L_d); + break; + } case F_lg: - R_d = log(L_d); - break; + if (ENABLE_FEATURE_AWK_LIBM) { + R_d = log(L_d); + break; + } case F_si: - R_d = sin(L_d); - break; + if (ENABLE_FEATURE_AWK_LIBM) { + R_d = sin(L_d); + break; + } case F_sq: - R_d = sqrt(L_d); - break; -#else - case F_co: - case F_ex: - case F_lg: - case F_si: - case F_sq: + if (ENABLE_FEATURE_AWK_LIBM) { + R_d = sqrt(L_d); + break; + } + syntax_error(EMSG_NO_MATH); break; -#endif + case F_sr: R_d = (double)seed; seed = op1 ? (unsigned)L_d : (unsigned)time(NULL); @@ -2661,26 +2799,37 @@ static var *evaluate(node *op, var *res) if (!op1) { fflush(stdout); } else if (L.s && *L.s) { - rsm = newfile(L.s); + rstream *rsm = newfile(L.s); fflush(rsm->F); } else { fflush_all(); } break; - case F_cl: - i = 0; + case F_cl: { + rstream *rsm; + int err = 0; rsm = (rstream *)hash_search(fdhash, L.s); + debug_printf_eval("OC_FBLTIN F_cl rsm:%p\n", rsm); if (rsm) { - i = rsm->is_pipe ? pclose(rsm->F) : fclose(rsm->F); + debug_printf_eval("OC_FBLTIN F_cl " + "rsm->is_pipe:%d, ->F:%p\n", + rsm->is_pipe, rsm->F); + /* Can be NULL if open failed. Example: + * getline line <"doesnt_exist"; + * close("doesnt_exist"); <--- here rsm->F is NULL + */ + if (rsm->F) + err = rsm->is_pipe ? pclose(rsm->F) : fclose(rsm->F); free(rsm->buffer); hash_remove(fdhash, L.s); } - if (i != 0) + if (err) setvar_i(intvar[ERRNO], errno); - R_d = (double)i; + R_d = (double)err; break; } + } /* switch */ setvar_i(res, R_d); break; } @@ -2757,6 +2906,7 @@ static var *evaluate(node *op, var *res) case XC( OC_BINARY ): case XC( OC_REPLACE ): { double R_d = getvar_i(R.v); + debug_printf_eval("BINARY/REPLACE: R_d:%f opn:%c\n", R_d, opn); switch (opn) { case '+': L_d += R_d; @@ -2773,11 +2923,10 @@ static var *evaluate(node *op, var *res) L_d /= R_d; break; case '&': -#if ENABLE_FEATURE_AWK_LIBM - L_d = pow(L_d, R_d); -#else - syntax_error(EMSG_NO_MATH); -#endif + if (ENABLE_FEATURE_AWK_LIBM) + L_d = pow(L_d, R_d); + else + syntax_error(EMSG_NO_MATH); break; case '%': if (R_d == 0) @@ -2785,6 +2934,7 @@ static var *evaluate(node *op, var *res) L_d -= (int)(L_d / R_d) * R_d; break; } + debug_printf_eval("BINARY/REPLACE result:%f\n", L_d); res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : L.v, L_d); break; } @@ -2827,6 +2977,7 @@ static var *evaluate(node *op, var *res) } /* while (op) */ nvfree(v1); + debug_printf_eval("returning from %s(): %p\n", __func__, res); return res; #undef fnargs #undef seed @@ -2867,21 +3018,18 @@ static int awk_exit(int r) * otherwise return 0 */ static int is_assignment(const char *expr) { - char *exprc, *s, *s0, *s1; + char *exprc, *val; - exprc = xstrdup(expr); - if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) { - free(exprc); + if (!isalnum_(*expr) || (val = strchr(expr, '=')) == NULL) { return FALSE; } - *s++ = '\0'; - s0 = s1 = s; - while (*s) - *s1++ = nextchar(&s); + exprc = xstrdup(expr); + val = exprc + (val - expr); + *val++ = '\0'; - *s1 = '\0'; - setvar_u(newvar(exprc), s0); + unescape_string_in_place(val); + setvar_u(newvar(exprc), val); free(exprc); return TRUE; } @@ -2892,7 +3040,7 @@ static rstream *next_input_file(void) #define rsm (G.next_input_file__rsm) #define files_happen (G.next_input_file__files_happen) - FILE *F = NULL; + FILE *F; const char *fname, *ind; if (rsm.F) @@ -2900,19 +3048,21 @@ static rstream *next_input_file(void) rsm.F = NULL; rsm.pos = rsm.adv = 0; - do { + for (;;) { if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) { if (files_happen) return NULL; fname = "-"; F = stdin; - } else { - ind = getvar_s(incvar(intvar[ARGIND])); - fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind)); - if (fname && *fname && !is_assignment(fname)) - F = xfopen_stdin(fname); + break; + } + ind = getvar_s(incvar(intvar[ARGIND])); + fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind)); + if (fname && *fname && !is_assignment(fname)) { + F = xfopen_stdin(fname); + break; } - } while (!F); + } files_happen = TRUE; setvar_s(intvar[FILENAME], fname); @@ -2926,7 +3076,7 @@ int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; int awk_main(int argc, char **argv) { unsigned opt; - char *opt_F, *opt_W; + char *opt_F; llist_t *list_v = NULL; llist_t *list_f = NULL; int i, j; @@ -2988,11 +3138,13 @@ int awk_main(int argc, char **argv) } } opt_complementary = "v::f::"; /* -v and -f can occur multiple times */ - opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, &opt_W); + opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, NULL); argv += optind; argc -= optind; - if (opt & 0x1) - setvar_s(intvar[FS], opt_F); // -F + if (opt & 0x1) { /* -F */ + unescape_string_in_place(opt_F); + setvar_s(intvar[FS], opt_F); + } while (list_v) { /* -v */ if (!is_assignment(llist_pop(&list_v))) bb_show_usage(); @@ -3022,7 +3174,7 @@ int awk_main(int argc, char **argv) parse_program(*argv++); } if (opt & 0x8) // -W - bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W); + bb_error_msg("warning: option -W is ignored"); /* fill in ARGV array */ setvar_i(intvar[ARGC], argc);