* Licensed under GPLv2 or later, see file LICENSE in this source tree.
*/
+//usage:#define awk_trivial_usage
+//usage: "[OPTIONS] [AWK_PROGRAM] [FILE]..."
+//usage:#define awk_full_usage "\n\n"
+//usage: " -v VAR=VAL Set variable"
+//usage: "\n -F SEP Use SEP as field separator"
+//usage: "\n -f FILE Read program from FILE"
+
#include "libbb.h"
#include "xregex.h"
#include <math.h>
* to perform debug printfs to stderr: */
#define debug_printf_walker(...) do {} while (0)
#define debug_printf_eval(...) do {} while (0)
+#define debug_printf_parse(...) do {} while (0)
#ifndef debug_printf_walker
# define debug_printf_walker(...) (fprintf(stderr, __VA_ARGS__))
#ifndef debug_printf_eval
# define debug_printf_eval(...) (fprintf(stderr, __VA_ARGS__))
#endif
+#ifndef debug_printf_parse
+# define debug_printf_parse(...) (fprintf(stderr, __VA_ARGS__))
+#endif
/* simple token classes */
/* Order and hex values are very important!!! See next_token() */
-#define TC_SEQSTART 1 /* ( */
+#define TC_SEQSTART 1 /* ( */
#define TC_SEQTERM (1 << 1) /* ) */
#define TC_REGEXP (1 << 2) /* /.../ */
#define TC_OUTRDR (1 << 3) /* | > >> */
* For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
* n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
*/
+#undef P
+#undef PRIMASK
+#undef PRIMASK2
#define P(x) (x << 24)
#define PRIMASK 0x7F000000
#define PRIMASK2 0x7E000000
/* tokens and their corresponding info values */
-#define NTC "\377" /* switch to next token class (tc<<1) */
-#define NTCC '\377'
+#define NTC "\377" /* switch to next token class (tc<<1) */
+#define NTCC '\377'
-#define OC_B OC_BUILTIN
+#define OC_B OC_BUILTIN
static const char tokenlist[] ALIGN1 =
- "\1(" NTC
- "\1)" NTC
- "\1/" NTC /* REGEXP */
- "\2>>" "\1>" "\1|" NTC /* OUTRDR */
- "\2++" "\2--" NTC /* UOPPOST */
- "\2++" "\2--" "\1$" NTC /* UOPPRE1 */
- "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */
- "\2*=" "\2/=" "\2%=" "\2^="
- "\1+" "\1-" "\3**=" "\2**"
- "\1/" "\1%" "\1^" "\1*"
- "\2!=" "\2>=" "\2<=" "\1>"
- "\1<" "\2!~" "\1~" "\2&&"
- "\2||" "\1?" "\1:" NTC
- "\2in" NTC
- "\1," NTC
- "\1|" NTC
- "\1+" "\1-" "\1!" NTC /* UOPPRE2 */
- "\1]" NTC
- "\1{" NTC
- "\1}" NTC
- "\1;" NTC
- "\1\n" NTC
- "\2if" "\2do" "\3for" "\5break" /* STATX */
- "\10continue" "\6delete" "\5print"
- "\6printf" "\4next" "\10nextfile"
- "\6return" "\4exit" NTC
- "\5while" NTC
- "\4else" NTC
-
- "\3and" "\5compl" "\6lshift" "\2or"
- "\6rshift" "\3xor"
- "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */
- "\3cos" "\3exp" "\3int" "\3log"
- "\4rand" "\3sin" "\4sqrt" "\5srand"
- "\6gensub" "\4gsub" "\5index" "\6length"
- "\5match" "\5split" "\7sprintf" "\3sub"
- "\6substr" "\7systime" "\10strftime" "\6mktime"
- "\7tolower" "\7toupper" NTC
- "\7getline" NTC
- "\4func" "\10function" NTC
- "\5BEGIN" NTC
- "\3END" "\0"
+ "\1(" NTC
+ "\1)" NTC
+ "\1/" NTC /* REGEXP */
+ "\2>>" "\1>" "\1|" NTC /* OUTRDR */
+ "\2++" "\2--" NTC /* UOPPOST */
+ "\2++" "\2--" "\1$" NTC /* UOPPRE1 */
+ "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */
+ "\2*=" "\2/=" "\2%=" "\2^="
+ "\1+" "\1-" "\3**=" "\2**"
+ "\1/" "\1%" "\1^" "\1*"
+ "\2!=" "\2>=" "\2<=" "\1>"
+ "\1<" "\2!~" "\1~" "\2&&"
+ "\2||" "\1?" "\1:" NTC
+ "\2in" NTC
+ "\1," NTC
+ "\1|" NTC
+ "\1+" "\1-" "\1!" NTC /* UOPPRE2 */
+ "\1]" NTC
+ "\1{" NTC
+ "\1}" NTC
+ "\1;" NTC
+ "\1\n" NTC
+ "\2if" "\2do" "\3for" "\5break" /* STATX */
+ "\10continue" "\6delete" "\5print"
+ "\6printf" "\4next" "\10nextfile"
+ "\6return" "\4exit" NTC
+ "\5while" NTC
+ "\4else" NTC
+
+ "\3and" "\5compl" "\6lshift" "\2or"
+ "\6rshift" "\3xor"
+ "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */
+ "\3cos" "\3exp" "\3int" "\3log"
+ "\4rand" "\3sin" "\4sqrt" "\5srand"
+ "\6gensub" "\4gsub" "\5index" "\6length"
+ "\5match" "\5split" "\7sprintf" "\3sub"
+ "\6substr" "\7systime" "\10strftime" "\6mktime"
+ "\7tolower" "\7toupper" NTC
+ "\7getline" NTC
+ "\4func" "\10function" NTC
+ "\5BEGIN" NTC
+ "\3END"
+ /* compiler adds trailing "\0" */
;
static const uint32_t tokeninfo[] = {
0,
0,
OC_REGEXP,
- xS|'a', xS|'w', xS|'|',
- OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
- OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M',
- OC_FIELD|xV|P(5),
- OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74),
- OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
- OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/',
- OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
- OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-',
- OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
- OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%',
- OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
- OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3,
- OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
- OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!',
- OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
- OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?',
- OC_COLON|xx|P(67)|':',
- OC_IN|SV|P(49),
+ xS|'a', xS|'w', xS|'|',
+ OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
+ OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M', OC_FIELD|xV|P(5),
+ OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74), OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
+ OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/', OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
+ OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-', OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
+ OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%', OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
+ OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3, OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
+ OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!', OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
+ OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?', OC_COLON|xx|P(67)|':',
+ OC_IN|SV|P(49), /* in */
OC_COMMA|SS|P(80),
OC_PGETLINE|SV|P(37),
- OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-',
- OC_UNARY|xV|P(19)|'!',
- 0,
+ OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-', OC_UNARY|xV|P(19)|'!',
+ 0, /* ] */
0,
0,
0,
- 0,
- ST_IF, ST_DO, ST_FOR, OC_BREAK,
- OC_CONTINUE, OC_DELETE|Vx, OC_PRINT,
- OC_PRINTF, OC_NEXT, OC_NEXTFILE,
- OC_RETURN|Vx, OC_EXIT|Nx,
+ 0, /* \n */
+ ST_IF, ST_DO, ST_FOR, OC_BREAK,
+ OC_CONTINUE, OC_DELETE|Vx, OC_PRINT,
+ OC_PRINTF, OC_NEXT, OC_NEXTFILE,
+ OC_RETURN|Vx, OC_EXIT|Nx,
ST_WHILE,
- 0,
+ 0, /* else */
OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b),
OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
OC_GETLINE|SV|P(0),
- 0, 0,
+ 0, 0,
0,
- 0
+ 0 /* END */
};
/* internal variable names and their initial values */
ORS, RS, RT, FILENAME,
SUBSEP, F0, ARGIND, ARGC,
ARGV, ERRNO, FNR, NR,
- NF, IGNORECASE, ENVIRON, NUM_INTERNAL_VARS
+ NF, IGNORECASE, ENVIRON, NUM_INTERNAL_VARS
};
static const char vNames[] ALIGN1 =
smallint nextrec;
smallint nextfile;
smallint is_f0_split;
+ smallint t_rollback;
};
struct globals2 {
uint32_t t_info; /* often used */
uint32_t t_tclass;
char *t_string;
int t_lineno;
- int t_rollback;
var *intvar[NUM_INTERNAL_VARS]; /* often used */
#define nextrec (G1.nextrec )
#define nextfile (G1.nextfile )
#define is_f0_split (G1.is_f0_split )
+#define t_rollback (G1.t_rollback )
#define t_info (G.t_info )
#define t_tclass (G.t_tclass )
#define t_string (G.t_string )
#define t_lineno (G.t_lineno )
-#define t_rollback (G.t_rollback )
#define intvar (G.intvar )
#define fsplitter (G.fsplitter )
#define rsplitter (G.rsplitter )
static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
-#if !ENABLE_FEATURE_AWK_LIBM
static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
-#endif
static void zero_out_var(var *vp)
{
pps = *s;
if (c == '\\')
c = bb_process_escape_sequence((const char**)s);
- if (c == '\\' && *s == pps)
- c = *(*s)++;
+ /* Example awk statement:
+ * s = "abc\"def"
+ * we must treat \" as "
+ */
+ if (c == '\\' && *s == pps) { /* unrecognized \z? */
+ c = *(*s); /* yes, fetch z */
+ if (c)
+ (*s)++; /* advance unless z = NUL */
+ }
return c;
}
+/* TODO: merge with strcpy_and_process_escape_sequences()?
+ */
+static void unescape_string_in_place(char *s1)
+{
+ char *s = s1;
+ while ((*s1 = nextchar(&s)) != '\0')
+ s1++;
+}
+
static ALWAYS_INLINE int isalnum_(int c)
{
return (isalnum(c) || c == '_');
static double my_strtod(char **pp)
{
char *cp = *pp;
-#if ENABLE_DESKTOP
- if (cp[0] == '0') {
+ if (ENABLE_DESKTOP && cp[0] == '0') {
/* Might be hex or octal integer: 0x123abc or 07777 */
char c = (cp[1] | 0x20);
if (c == 'x' || isdigit(cp[1])) {
*/
}
}
-#endif
return strtod(cp, pp);
}
const char *tl;
uint32_t tc;
const uint32_t *ti;
- int l;
if (t_rollback) {
t_rollback = FALSE;
if (*p == '\0') {
tc = TC_EOF;
+ debug_printf_parse("%s: token found: TC_EOF\n", __func__);
} else if (*p == '\"') {
/* it's a string */
t_string = s = ++p;
while (*p != '\"') {
- char *pp = p;
+ char *pp;
if (*p == '\0' || *p == '\n')
syntax_error(EMSG_UNEXP_EOS);
+ pp = p;
*s++ = nextchar(&pp);
p = pp;
}
p++;
*s = '\0';
tc = TC_STRING;
+ debug_printf_parse("%s: token found:'%s' TC_STRING\n", __func__, t_string);
} else if ((expected & TC_REGEXP) && *p == '/') {
/* it's regexp */
p++;
*s = '\0';
tc = TC_REGEXP;
+ debug_printf_parse("%s: token found:'%s' TC_REGEXP\n", __func__, t_string);
} else if (*p == '.' || isdigit(*p)) {
/* it's a number */
char *pp = p;
t_double = my_strtod(&pp);
p = pp;
- if (*pp == '.')
+ if (*p == '.')
syntax_error(EMSG_UNEXP_TOKEN);
tc = TC_NUMBER;
+ debug_printf_parse("%s: token found:%f TC_NUMBER\n", __func__, t_double);
} else {
/* search for something known */
tc = 0x00000001;
ti = tokeninfo;
while (*tl) {
- l = *tl++;
- if (l == NTCC) {
+ int l = (unsigned char) *tl++;
+ if (l == (unsigned char) NTCC) {
tc <<= 1;
continue;
}
- /* if token class is expected, token
- * matches and it's not a longer word,
- * then this is what we are looking for
+ /* if token class is expected,
+ * token matches,
+ * and it's not a longer word,
*/
if ((tc & (expected | TC_WORD | TC_NEWLINE))
- && *tl == *p && strncmp(p, tl, l) == 0
+ && strncmp(p, tl, l) == 0
&& !((tc & TC_WORD) && isalnum_(p[l]))
) {
+ /* then this is what we are looking for */
t_info = *ti;
+ debug_printf_parse("%s: token found:'%.*s' t_info:%x\n", __func__, l, p, t_info);
p += l;
- break;
+ goto token_found;
}
ti++;
tl += l;
}
-
- if (!*tl) {
- /* it's a name (var/array/function),
- * otherwise it's something wrong
- */
- if (!isalnum_(*p))
- syntax_error(EMSG_UNEXP_TOKEN);
-
- t_string = --p;
- while (isalnum_(*++p)) {
- p[-1] = *p;
- }
- p[-1] = '\0';
- tc = TC_VARIABLE;
- /* also consume whitespace between functionname and bracket */
- if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY))
- p = skip_spaces(p);
- if (*p == '(') {
- tc = TC_FUNCTION;
- } else {
- if (*p == '[') {
- p++;
- tc = TC_ARRAY;
- }
- }
+ /* not a known token */
+
+ /* is it a name? (var/array/function) */
+ if (!isalnum_(*p))
+ syntax_error(EMSG_UNEXP_TOKEN); /* no */
+ /* yes */
+ t_string = --p;
+ while (isalnum_(*++p)) {
+ p[-1] = *p;
+ }
+ p[-1] = '\0';
+ tc = TC_VARIABLE;
+ /* also consume whitespace between functionname and bracket */
+ if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY))
+ p = skip_spaces(p);
+ if (*p == '(') {
+ tc = TC_FUNCTION;
+ debug_printf_parse("%s: token found:'%s' TC_FUNCTION\n", __func__, t_string);
+ } else {
+ if (*p == '[') {
+ p++;
+ tc = TC_ARRAY;
+ debug_printf_parse("%s: token found:'%s' TC_ARRAY\n", __func__, t_string);
+ } else
+ debug_printf_parse("%s: token found:'%s' TC_VARIABLE\n", __func__, t_string);
}
}
+ token_found:
g_pos = p;
/* skipping newlines in some cases */
uint32_t tc, xtc;
var *v;
+ debug_printf_parse("%s(%x)\n", __func__, iexp);
+
sn.info = PRIMASK;
sn.r.n = glptr = NULL;
xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
while (!((tc = next_token(xtc)) & iexp)) {
+
if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
/* input redirection (<) attached to glptr node */
+ debug_printf_parse("%s: input redir\n", __func__);
cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
cn->a.n = glptr;
xtc = TC_OPERAND | TC_UOPPRE;
glptr = NULL;
} else if (tc & (TC_BINOP | TC_UOPPOST)) {
+ debug_printf_parse("%s: TC_BINOP | TC_UOPPOST\n", __func__);
/* for binary and postfix-unary operators, jump back over
* previous operators with higher priority */
vn = cn;
vn->a.n = cn;
} else {
+ debug_printf_parse("%s: other\n", __func__);
/* for operands and prefix-unary operators, attach them
* to last node */
vn = cn;
cn->a.n = vn;
xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
if (tc & (TC_OPERAND | TC_REGEXP)) {
+ debug_printf_parse("%s: TC_OPERAND | TC_REGEXP\n", __func__);
xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
/* one should be very careful with switch on tclass -
* only simple tclasses should be used! */
switch (tc) {
case TC_VARIABLE:
case TC_ARRAY:
+ debug_printf_parse("%s: TC_VARIABLE | TC_ARRAY\n", __func__);
cn->info = OC_VAR;
v = hash_search(ahash, t_string);
if (v != NULL) {
case TC_NUMBER:
case TC_STRING:
+ debug_printf_parse("%s: TC_NUMBER | TC_STRING\n", __func__);
cn->info = OC_VAR;
v = cn->l.v = xzalloc(sizeof(var));
if (tc & TC_NUMBER)
break;
case TC_REGEXP:
+ debug_printf_parse("%s: TC_REGEXP\n", __func__);
mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
break;
case TC_FUNCTION:
+ debug_printf_parse("%s: TC_FUNCTION\n", __func__);
cn->info = OC_FUNC;
cn->r.f = newfunc(t_string);
cn->l.n = condition();
break;
case TC_SEQSTART:
+ debug_printf_parse("%s: TC_SEQSTART\n", __func__);
cn = vn->r.n = parse_expr(TC_SEQTERM);
+ if (!cn)
+ syntax_error("Empty sequence");
cn->a.n = vn;
break;
case TC_GETLINE:
+ debug_printf_parse("%s: TC_GETLINE\n", __func__);
glptr = cn;
xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
break;
case TC_BUILTIN:
+ debug_printf_parse("%s: TC_BUILTIN\n", __func__);
cn->l.n = condition();
break;
}
}
}
}
+
+ debug_printf_parse("%s() returns %p\n", __func__, sn.r.n);
return sn.r.n;
}
} while (c & TC_NEWLINE);
if (c & TC_GRPSTART) {
+ debug_printf_parse("%s: TC_GRPSTART\n", __func__);
while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
+ debug_printf_parse("%s: !TC_GRPTERM\n", __func__);
if (t_tclass & TC_NEWLINE)
continue;
rollback_token();
chain_group();
}
+ debug_printf_parse("%s: TC_GRPTERM\n", __func__);
} else if (c & (TC_OPSEQ | TC_OPTERM)) {
+ debug_printf_parse("%s: TC_OPSEQ | TC_OPTERM\n", __func__);
rollback_token();
chain_expr(OC_EXEC | Vx);
- } else { /* TC_STATEMNT */
+ } else {
+ /* TC_STATEMNT */
+ debug_printf_parse("%s: TC_STATEMNT(?)\n", __func__);
switch (t_info & OPCLSMASK) {
case ST_IF:
+ debug_printf_parse("%s: ST_IF\n", __func__);
n = chain_node(OC_BR | Vx);
n->l.n = condition();
chain_group();
break;
case ST_WHILE:
+ debug_printf_parse("%s: ST_WHILE\n", __func__);
n2 = condition();
n = chain_loop(NULL);
n->l.n = n2;
break;
case ST_DO:
+ debug_printf_parse("%s: ST_DO\n", __func__);
n2 = chain_node(OC_EXEC);
n = chain_loop(NULL);
n2->a.n = n->a.n;
break;
case ST_FOR:
+ debug_printf_parse("%s: ST_FOR\n", __func__);
next_token(TC_SEQSTART);
n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
if (t_tclass & TC_SEQTERM) { /* for-in */
case OC_PRINT:
case OC_PRINTF:
+ debug_printf_parse("%s: OC_PRINT[F]\n", __func__);
n = chain_node(t_info);
n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
if (t_tclass & TC_OUTRDR) {
break;
case OC_BREAK:
+ debug_printf_parse("%s: OC_BREAK\n", __func__);
n = chain_node(OC_EXEC);
n->a.n = break_ptr;
break;
case OC_CONTINUE:
+ debug_printf_parse("%s: OC_CONTINUE\n", __func__);
n = chain_node(OC_EXEC);
n->a.n = continue_ptr;
break;
/* delete, next, nextfile, return, exit */
default:
+ debug_printf_parse("%s: default\n", __func__);
chain_expr(t_info);
}
}
while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
- if (tclass & TC_OPTERM)
+ if (tclass & TC_OPTERM) {
+ debug_printf_parse("%s: TC_OPTERM\n", __func__);
continue;
+ }
seq = &mainseq;
if (tclass & TC_BEGIN) {
+ debug_printf_parse("%s: TC_BEGIN\n", __func__);
seq = &beginseq;
chain_group();
} else if (tclass & TC_END) {
+ debug_printf_parse("%s: TC_END\n", __func__);
seq = &endseq;
chain_group();
} else if (tclass & TC_FUNCDECL) {
+ debug_printf_parse("%s: TC_FUNCDECL\n", __func__);
next_token(TC_FUNCTION);
g_pos++;
f = newfunc(t_string);
clear_array(ahash);
} else if (tclass & TC_OPSEQ) {
+ debug_printf_parse("%s: TC_OPSEQ\n", __func__);
rollback_token();
cn = chain_node(OC_TEST);
cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
if (t_tclass & TC_GRPSTART) {
+ debug_printf_parse("%s: TC_GRPSTART\n", __func__);
rollback_token();
chain_group();
} else {
+ debug_printf_parse("%s: !TC_GRPSTART\n", __func__);
chain_node(OC_PRINT);
}
cn->r.n = mainseq.last;
} else /* if (tclass & TC_GRPSTART) */ {
+ debug_printf_parse("%s: TC_GRPSTART(?)\n", __func__);
rollback_token();
chain_group();
}
}
+ debug_printf_parse("%s: TC_EOF\n", __func__);
}
regfree(re);
regfree(ire); // TODO: nuke ire, use re+1?
}
- if (strlen(s) > 1) {
+ if (s[0] && s[1]) { /* strlen(s) > 1 */
mk_re_node(s, n, re);
} else {
- n->info = (uint32_t) *s;
+ n->info = (uint32_t) s[0];
}
return n;
if (size >= maxfields) {
i = maxfields;
maxfields = size + 16;
- Fields = xrealloc(Fields, maxfields * sizeof(var));
+ Fields = xrealloc(Fields, maxfields * sizeof(Fields[0]));
for (; i < maxfields; i++) {
Fields[i].type = VF_SPECIAL;
Fields[i].string = NULL;
}
}
-
- if (size < nfields) {
- for (i = size; i < nfields; i++) {
- clrvar(Fields + i);
- }
+ /* if size < nfields, clear extra field variables */
+ for (i = size; i < nfields; i++) {
+ clrvar(Fields + i);
}
nfields = size;
}
static int awk_split(const char *s, node *spl, char **slist)
{
- int l, n = 0;
+ int l, n;
char c[4];
char *s1;
regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
if (*getvar_s(intvar[RS]) == '\0')
c[2] = '\n';
+ n = 0;
if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */
if (!*s)
return n; /* "": zero fields */
}
if (*s1)
n++;
- while ((s1 = strpbrk(s1, c))) {
+ while ((s1 = strpbrk(s1, c)) != NULL) {
*s1++ = '\0';
n++;
}
is_f0_split = FALSE;
} else if (v == intvar[FS]) {
+ /*
+ * The POSIX-2008 standard says that changing FS should have no effect on the
+ * current input line, but only on the next one. The language is:
+ *
+ * > Before the first reference to a field in the record is evaluated, the record
+ * > shall be split into fields, according to the rules in Regular Expressions,
+ * > using the value of FS that was current at the time the record was read.
+ *
+ * So, split up current line before assignment to FS:
+ */
+ split_f0();
+
mk_splitter(getvar_s(v), &fsplitter);
} else if (v == intvar[RS]) {
int fd, so, eo, r, rp;
char c, *m, *s;
+ debug_printf_eval("entered %s()\n", __func__);
+
/* we're using our own buffer since we need access to accumulating
* characters
*/
rsm->pos = p - eo;
rsm->size = size;
+ debug_printf_eval("returning from %s(): %d\n", __func__, r);
+
return r;
}
switch (info) {
case B_a2:
-#if ENABLE_FEATURE_AWK_LIBM
- setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1])));
-#else
- syntax_error(EMSG_NO_MATH);
-#endif
+ if (ENABLE_FEATURE_AWK_LIBM)
+ setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1])));
+ else
+ syntax_error(EMSG_NO_MATH);
break;
case B_sp: {
#define fnargs (G.evaluate__fnargs)
/* seed is initialized to 1 */
#define seed (G.evaluate__seed)
-#define sreg (G.evaluate__sreg)
+#define sreg (G.evaluate__sreg)
var *v1;
if (!op)
return setvar_s(res, NULL);
+ debug_printf_eval("entered %s()\n", __func__);
+
v1 = nvalloc(2);
while (op) {
opn = (opinfo & OPNMASK);
g_lineno = op->lineno;
op1 = op->l.n;
- debug_printf_eval("opinfo:%08x opn:%08x XC:%x\n", opinfo, opn, XC(opinfo & OPCLSMASK));
+ debug_printf_eval("opinfo:%08x opn:%08x\n", opinfo, opn);
/* execute inevitable things */
if (opinfo & OF_RES1)
debug_printf_eval("L_d:%f\n", L_d);
}
+ debug_printf_eval("switch(0x%x)\n", XC(opinfo & OPCLSMASK));
switch (XC(opinfo & OPCLSMASK)) {
/* -- iterative node type -- */
var *vbeg, *v;
const char *sv_progname;
- if (!op->r.f->body.first)
+ /* The body might be empty, still has to eval the args */
+ if (!op->r.n->info)
syntax_error(EMSG_UNDEF_FUNC);
vbeg = v = nvalloc(op->r.f->nargs + 1);
rsm->F = popen(L.s, "r");
rsm->is_pipe = TRUE;
} else {
- rsm->F = fopen_for_read(L.s); /* not xfopen! */
+ rsm->F = fopen_for_read(L.s); /* not xfopen! */
}
}
} else {
rsm = iF;
}
- if (!rsm->F) {
+ if (!rsm || !rsm->F) {
setvar_i(intvar[ERRNO], errno);
setvar_i(res, -1);
break;
/* simple builtins */
case XC( OC_FBLTIN ): {
- int i;
- rstream *rsm;
double R_d = R_d; /* for compiler */
switch (opn) {
case F_rn:
R_d = (double)rand() / (double)RAND_MAX;
break;
-#if ENABLE_FEATURE_AWK_LIBM
+
case F_co:
- R_d = cos(L_d);
- break;
+ if (ENABLE_FEATURE_AWK_LIBM) {
+ R_d = cos(L_d);
+ break;
+ }
case F_ex:
- R_d = exp(L_d);
- break;
+ if (ENABLE_FEATURE_AWK_LIBM) {
+ R_d = exp(L_d);
+ break;
+ }
case F_lg:
- R_d = log(L_d);
- break;
+ if (ENABLE_FEATURE_AWK_LIBM) {
+ R_d = log(L_d);
+ break;
+ }
case F_si:
- R_d = sin(L_d);
- break;
+ if (ENABLE_FEATURE_AWK_LIBM) {
+ R_d = sin(L_d);
+ break;
+ }
case F_sq:
- R_d = sqrt(L_d);
- break;
-#else
- case F_co:
- case F_ex:
- case F_lg:
- case F_si:
- case F_sq:
+ if (ENABLE_FEATURE_AWK_LIBM) {
+ R_d = sqrt(L_d);
+ break;
+ }
+
syntax_error(EMSG_NO_MATH);
break;
-#endif
+
case F_sr:
R_d = (double)seed;
seed = op1 ? (unsigned)L_d : (unsigned)time(NULL);
if (!op1) {
fflush(stdout);
} else if (L.s && *L.s) {
- rsm = newfile(L.s);
+ rstream *rsm = newfile(L.s);
fflush(rsm->F);
} else {
fflush_all();
}
break;
- case F_cl:
- i = 0;
+ case F_cl: {
+ rstream *rsm;
+ int err = 0;
rsm = (rstream *)hash_search(fdhash, L.s);
+ debug_printf_eval("OC_FBLTIN F_cl rsm:%p\n", rsm);
if (rsm) {
- i = rsm->is_pipe ? pclose(rsm->F) : fclose(rsm->F);
+ debug_printf_eval("OC_FBLTIN F_cl "
+ "rsm->is_pipe:%d, ->F:%p\n",
+ rsm->is_pipe, rsm->F);
+ /* Can be NULL if open failed. Example:
+ * getline line <"doesnt_exist";
+ * close("doesnt_exist"); <--- here rsm->F is NULL
+ */
+ if (rsm->F)
+ err = rsm->is_pipe ? pclose(rsm->F) : fclose(rsm->F);
free(rsm->buffer);
hash_remove(fdhash, L.s);
}
- if (i != 0)
+ if (err)
setvar_i(intvar[ERRNO], errno);
- R_d = (double)i;
+ R_d = (double)err;
break;
}
+ } /* switch */
setvar_i(res, R_d);
break;
}
L_d /= R_d;
break;
case '&':
-#if ENABLE_FEATURE_AWK_LIBM
- L_d = pow(L_d, R_d);
-#else
- syntax_error(EMSG_NO_MATH);
-#endif
+ if (ENABLE_FEATURE_AWK_LIBM)
+ L_d = pow(L_d, R_d);
+ else
+ syntax_error(EMSG_NO_MATH);
break;
case '%':
if (R_d == 0)
} /* while (op) */
nvfree(v1);
+ debug_printf_eval("returning from %s(): %p\n", __func__, res);
return res;
#undef fnargs
#undef seed
* otherwise return 0 */
static int is_assignment(const char *expr)
{
- char *exprc, *s, *s0, *s1;
+ char *exprc, *val;
- exprc = xstrdup(expr);
- if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
- free(exprc);
+ if (!isalnum_(*expr) || (val = strchr(expr, '=')) == NULL) {
return FALSE;
}
- *s++ = '\0';
- s0 = s1 = s;
- while (*s)
- *s1++ = nextchar(&s);
+ exprc = xstrdup(expr);
+ val = exprc + (val - expr);
+ *val++ = '\0';
- *s1 = '\0';
- setvar_u(newvar(exprc), s0);
+ unescape_string_in_place(val);
+ setvar_u(newvar(exprc), val);
free(exprc);
return TRUE;
}
#define rsm (G.next_input_file__rsm)
#define files_happen (G.next_input_file__files_happen)
- FILE *F = NULL;
+ FILE *F;
const char *fname, *ind;
if (rsm.F)
rsm.F = NULL;
rsm.pos = rsm.adv = 0;
- do {
+ for (;;) {
if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
if (files_happen)
return NULL;
fname = "-";
F = stdin;
- } else {
- ind = getvar_s(incvar(intvar[ARGIND]));
- fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
- if (fname && *fname && !is_assignment(fname))
- F = xfopen_stdin(fname);
+ break;
}
- } while (!F);
+ ind = getvar_s(incvar(intvar[ARGIND]));
+ fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
+ if (fname && *fname && !is_assignment(fname)) {
+ F = xfopen_stdin(fname);
+ break;
+ }
+ }
files_happen = TRUE;
setvar_s(intvar[FILENAME], fname);
int awk_main(int argc, char **argv)
{
unsigned opt;
- char *opt_F, *opt_W;
+ char *opt_F;
llist_t *list_v = NULL;
llist_t *list_f = NULL;
int i, j;
}
}
opt_complementary = "v::f::"; /* -v and -f can occur multiple times */
- opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, &opt_W);
+ opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, NULL);
argv += optind;
argc -= optind;
- if (opt & 0x1)
- setvar_s(intvar[FS], opt_F); // -F
+ if (opt & 0x1) { /* -F */
+ unescape_string_in_place(opt_F);
+ setvar_s(intvar[FS], opt_F);
+ }
while (list_v) { /* -v */
if (!is_assignment(llist_pop(&list_v)))
bb_show_usage();
parse_program(*argv++);
}
if (opt & 0x8) // -W
- bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
+ bb_error_msg("warning: option -W is ignored");
/* fill in ARGV array */
setvar_i(intvar[ARGC], argc);