#include "libbb.h"
#include "xregex.h"
#include <math.h>
-extern char **environ;
/* This is a NOEXEC applet. Be very careful! */
var *pos;
struct nvblock_s *prev;
struct nvblock_s *next;
- var nv[0];
+ var nv[];
} nvblock;
typedef struct tsplitter_s {
/* builtins */
enum {
- B_a2, B_ix, B_ma, B_sp, B_ss, B_ti, B_lo, B_up,
+ B_a2, B_ix, B_ma, B_sp, B_ss, B_ti, B_mt, B_lo, B_up,
B_ge, B_gs, B_su,
B_an, B_co, B_ls, B_or, B_rs, B_xo,
};
"\4rand" "\3sin" "\4sqrt" "\5srand"
"\6gensub" "\4gsub" "\5index" "\6length"
"\5match" "\5split" "\7sprintf" "\3sub"
- "\6substr" "\7systime" "\10strftime"
+ "\6substr" "\7systime" "\10strftime" "\6mktime"
"\7tolower" "\7toupper" NTC
"\7getline" NTC
"\4func" "\10function" NTC
OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
- OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b),
+ OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b),
OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
OC_GETLINE|SV|P(0),
0, 0,
enum {
CONVFMT, OFMT, FS, OFS,
ORS, RS, RT, FILENAME,
- SUBSEP, ARGIND, ARGC, ARGV,
- ERRNO, FNR,
- NR, NF, IGNORECASE,
- ENVIRON, F0, NUM_INTERNAL_VARS
+ SUBSEP, F0, ARGIND, ARGC,
+ ARGV, ERRNO, FNR, NR,
+ NF, IGNORECASE, ENVIRON, NUM_INTERNAL_VARS
};
static const char vNames[] ALIGN1 =
"CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
"ORS\0" "RS\0*" "RT\0" "FILENAME\0"
- "SUBSEP\0" "ARGIND\0" "ARGC\0" "ARGV\0"
- "ERRNO\0" "FNR\0"
- "NR\0" "NF\0*" "IGNORECASE\0*"
- "ENVIRON\0" "$\0*" "\0";
+ "SUBSEP\0" "$\0*" "ARGIND\0" "ARGC\0"
+ "ARGV\0" "ERRNO\0" "FNR\0" "NR\0"
+ "NF\0*" "IGNORECASE\0*" "ENVIRON\0" "\0";
static const char vValues[] ALIGN1 =
"%.6g\0" "%.6g\0" " \0" " \0"
"\n\0" "\n\0" "\0" "\0"
- "\034\0"
- "\377";
+ "\034\0" "\0" "\377";
/* hash size may grow to these values */
#define FIRST_PRIME 61
/* Globals. Split in two parts so that first one is addressed
- * with (mostly short) negative offsets */
+ * with (mostly short) negative offsets.
+ * NB: it's unsafe to put members of type "double"
+ * into globals2 (gcc may fail to align them).
+ */
struct globals {
- chain beginseq, mainseq, endseq, *seq;
+ double t_double;
+ chain beginseq, mainseq, endseq;
+ chain *seq;
node *break_ptr, *continue_ptr;
rstream *iF;
xhash *vhash, *ahash, *fdhash, *fnhash;
tsplitter exec_builtin__tspl;
/* biggest and least used members go last */
- double t_double;
tsplitter fsplitter, rsplitter;
};
#define G1 (ptr_to_globals[-1])
-#define G (*(struct globals2 *const)ptr_to_globals)
+#define G (*(struct globals2 *)ptr_to_globals)
/* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
-/* char G1size[sizeof(G1)]; - 0x6c */
-/* char Gsize[sizeof(G)]; - 0x1cc */
+/*char G1size[sizeof(G1)]; - 0x74 */
+/*char Gsize[sizeof(G)]; - 0x1c4 */
/* Trying to keep most of members accessible with short offsets: */
-/* char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
+/*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
+#define t_double (G1.t_double )
#define beginseq (G1.beginseq )
#define mainseq (G1.mainseq )
#define endseq (G1.endseq )
#define t_info (G.t_info )
#define t_tclass (G.t_tclass )
#define t_string (G.t_string )
-#define t_double (G.t_double )
#define t_lineno (G.t_lineno )
#define t_rollback (G.t_rollback )
#define intvar (G.intvar )
#define fsplitter (G.fsplitter )
#define rsplitter (G.rsplitter )
#define INIT_G() do { \
- PTR_TO_GLOBALS = xzalloc(sizeof(G1) + sizeof(G)) + sizeof(G1); \
+ SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
G.next_token__ltclass = TC_OPTERM; \
G.evaluate__seed = 1; \
} while (0)
static var *evaluate(node *, var *);
static rstream *next_input_file(void);
static int fmt_num(char *, int, const char *, double, int);
-static int awk_exit(int) ATTRIBUTE_NORETURN;
+static int awk_exit(int) NORETURN;
/* ---- error handling ---- */
static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
-#if !ENABLE_FEATURE_AWK_MATH
+#if !ENABLE_FEATURE_AWK_LIBM
static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
#endif
memset(vp, 0, sizeof(*vp));
}
-static void syntax_error(const char *const message) ATTRIBUTE_NORETURN;
-static void syntax_error(const char *const message)
+static void syntax_error(const char *message) NORETURN;
+static void syntax_error(const char *message)
{
bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
}
hash_rebuild(hash);
l = strlen(name) + 1;
- hi = xzalloc(sizeof(hash_item) + l);
- memcpy(hi->name, name, l);
+ hi = xzalloc(sizeof(*hi) + l);
+ strcpy(hi->name, name);
idx = hashidx(name) % hash->csize;
hi->next = hash->items[idx];
return c;
}
-static int ALWAYS_INLINE isalnum_(int c)
+static ALWAYS_INLINE int isalnum_(int c)
{
return (isalnum(c) || c == '_');
}
-static FILE *afopen(const char *path, const char *mode)
+static double my_strtod(char **pp)
{
- return (*path == '-' && *(path+1) == '\0') ? stdin : xfopen(path, mode);
+#if ENABLE_DESKTOP
+ if ((*pp)[0] == '0'
+ && ((((*pp)[1] | 0x20) == 'x') || isdigit((*pp)[1]))
+ ) {
+ return strtoull(*pp, pp, 0);
+ }
+#endif
+ return strtod(*pp, pp);
}
/* -------- working with variables (set/get/copy/etc) -------- */
/* set array element to user string */
static void setari_u(var *a, int idx, const char *s)
{
- char sidx[sizeof(int)*3 + 1];
var *v;
- sprintf(sidx, "%d", idx);
- v = findvar(iamarray(a), sidx);
+ v = findvar(iamarray(a), itoa(idx));
setvar_u(v, s);
}
v->number = 0;
s = v->string;
if (s && *s) {
- v->number = strtod(s, &s);
+ v->number = my_strtod(&s);
if (v->type & VF_USER) {
skip_spaces(&s);
if (*s != '\0')
return v->number;
}
+/* Used for operands of bitwise ops */
+static unsigned long getvar_i_int(var *v)
+{
+ double d = getvar_i(v);
+
+ /* Casting doubles to longs is undefined for values outside
+ * of target type range. Try to widen it as much as possible */
+ if (d >= 0)
+ return (unsigned long)d;
+ /* Why? Think about d == -4294967295.0 (assuming 32bit longs) */
+ return - (long) (unsigned long) (-d);
+}
+
static var *copyvar(var *dest, const var *src)
{
if (dest != src) {
if (!g_cb) {
size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
- g_cb = xmalloc(sizeof(nvblock) + size * sizeof(var));
+ g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
g_cb->size = size;
g_cb->pos = g_cb->nv;
g_cb->prev = pb;
- g_cb->next = NULL;
+ /*g_cb->next = NULL; - xzalloc did it */
if (pb) pb->next = g_cb;
}
} else if (*p == '.' || isdigit(*p)) {
/* it's a number */
- t_double = strtod(p, &p);
+ t_double = my_strtod(&p);
if (*p == '.')
syntax_error(EMSG_UNEXP_TOKEN);
tc = TC_NUMBER;
*/
static regex_t *as_regex(node *op, regex_t *preg)
{
+ int cflags;
var *v;
const char *s;
}
v = nvalloc(1);
s = getvar_s(evaluate(op, v));
- xregcomp(preg, s, icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED);
+
+ cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
+ /* Testcase where REG_EXTENDED fails (unpaired '{'):
+ * echo Hi | awk 'gsub("@(samp|code|file)\{","");'
+ * gawk 3.1.5 eats this. We revert to ~REG_EXTENDED
+ * (maybe gsub is not supposed to use REG_EXTENDED?).
+ */
+ if (regcomp(preg, s, cflags)) {
+ cflags &= ~REG_EXTENDED;
+ xregcomp(preg, s, cflags);
+ }
nvfree(v);
return preg;
}
/* gradually increasing buffer */
static void qrealloc(char **b, int n, int *size)
{
- if (!*b || n >= *size)
- *b = xrealloc(*b, *size = n + (n>>1) + 80);
+ if (!*b || n >= *size) {
+ *size = n + (n>>1) + 80;
+ *b = xrealloc(*b, *size);
+ }
}
/* resize field storage space */
n++; /* we saw yet another delimiter */
} else {
pmatch[0].rm_eo = l;
- if (s[l]) pmatch[0].rm_eo++;
+ if (s[l])
+ pmatch[0].rm_eo++;
}
memcpy(s1, s, l);
- s1[l] = '\0';
+ /* make sure we remove *all* of the separator chars */
+ do {
+ s1[l] = '\0';
+ } while (++l < pmatch[0].rm_eo);
nextword(&s1);
s += pmatch[0].rm_eo;
} while (*s);
{
char **w;
hash_item *hi;
- int i;
+ unsigned i;
if (v->type & VF_WALK)
free(v->x.walker);
return r;
}
-
/* formatted output into an allocated buffer, return ptr to buffer */
static char *awk_printf(node *n)
{
return i;
}
-static var *exec_builtin(node *op, var *res)
+static NOINLINE int do_mktime(const char *ds)
+{
+ struct tm then;
+ int count;
+
+ /*memset(&then, 0, sizeof(then)); - not needed */
+ then.tm_isdst = -1; /* default is unknown */
+
+ /* manpage of mktime says these fields are ints,
+ * so we can sscanf stuff directly into them */
+ count = sscanf(ds, "%u %u %u %u %u %u %d",
+ &then.tm_year, &then.tm_mon, &then.tm_mday,
+ &then.tm_hour, &then.tm_min, &then.tm_sec,
+ &then.tm_isdst);
+
+ if (count < 6
+ || (unsigned)then.tm_mon < 1
+ || (unsigned)then.tm_year < 1900
+ ) {
+ return -1;
+ }
+
+ then.tm_mon -= 1;
+ then.tm_year -= 1900;
+
+ return mktime(&then);
+}
+
+static NOINLINE var *exec_builtin(node *op, var *res)
{
#define tspl (G.exec_builtin__tspl)
- int (*to_xxx)(int);
var *tv;
node *an[4];
var *av[4];
}
nargs = i;
- if (nargs < (info >> 30))
+ if ((uint32_t)nargs < (info >> 30))
syntax_error(EMSG_TOO_FEW_ARGS);
- switch (info & OPNMASK) {
+ info &= OPNMASK;
+ switch (info) {
case B_a2:
-#if ENABLE_FEATURE_AWK_MATH
- setvar_i(res, atan2(getvar_i(av[i]), getvar_i(av[1])));
+#if ENABLE_FEATURE_AWK_LIBM
+ setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1])));
#else
syntax_error(EMSG_NO_MATH);
#endif
n = awk_split(as[0], spl, &s);
s1 = s;
clear_array(iamarray(av[1]));
- for (i=1; i<=n; i++)
+ for (i = 1; i <= n; i++)
setari_u(av[1], i, nextword(&s1));
free(s);
setvar_i(res, n);
if (i < 0) i = 0;
n = (nargs > 2) ? getvar_i(av[2]) : l-i;
if (n < 0) n = 0;
- s = xmalloc(n+1);
- strncpy(s, as[0]+i, n);
- s[n] = '\0';
+ s = xstrndup(as[0]+i, n);
setvar_p(res, s);
break;
+ /* Bitwise ops must assume that operands are unsigned. GNU Awk 3.1.5:
+ * awk '{ print or(-1,1) }' gives "4.29497e+09", not "-2.xxxe+09" */
case B_an:
- setvar_i(res, (long)getvar_i(av[0]) & (long)getvar_i(av[1]));
+ setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1]));
break;
case B_co:
- setvar_i(res, ~(long)getvar_i(av[0]));
+ setvar_i(res, ~getvar_i_int(av[0]));
break;
case B_ls:
- setvar_i(res, (long)getvar_i(av[0]) << (long)getvar_i(av[1]));
+ setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1]));
break;
case B_or:
- setvar_i(res, (long)getvar_i(av[0]) | (long)getvar_i(av[1]));
+ setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1]));
break;
case B_rs:
- setvar_i(res, (long)((unsigned long)getvar_i(av[0]) >> (unsigned long)getvar_i(av[1])));
+ setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1]));
break;
case B_xo:
- setvar_i(res, (long)getvar_i(av[0]) ^ (long)getvar_i(av[1]));
+ setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1]));
break;
case B_lo:
- to_xxx = tolower;
- goto lo_cont;
-
case B_up:
- to_xxx = toupper;
- lo_cont:
s1 = s = xstrdup(as[0]);
while (*s1) {
- *s1 = (*to_xxx)(*s1);
+ //*s1 = (info == B_up) ? toupper(*s1) : tolower(*s1);
+ if ((unsigned char)((*s1 | 0x20) - 'a') <= ('z' - 'a'))
+ *s1 = (info == B_up) ? (*s1 & 0xdf) : (*s1 | 0x20);
s1++;
}
setvar_p(res, s);
setvar_s(res, g_buf);
break;
+ case B_mt:
+ setvar_i(res, do_mktime(as[0]));
+ break;
+
case B_ma:
re = as_regex(an[1], &sreg);
n = regexec(re, as[0], 1, pmatch, 0);
X.rsm->F = popen(L.s, "r");
X.rsm->is_pipe = TRUE;
} else {
- X.rsm->F = fopen(L.s, "r"); /* not xfopen! */
+ X.rsm->F = fopen_for_read(L.s); /* not xfopen! */
}
}
} else {
case F_rn:
R.d = (double)rand() / (double)RAND_MAX;
break;
-#if ENABLE_FEATURE_AWK_MATH
+#if ENABLE_FEATURE_AWK_LIBM
case F_co:
R.d = cos(L.d);
break;
break;
case F_sy:
- fflush(NULL);
+ fflush_all();
R.d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
? (system(L.s) >> 8) : 0;
break;
X.rsm = newfile(L.s);
fflush(X.rsm->F);
} else {
- fflush(NULL);
+ fflush_all();
}
}
break;
L.d /= R.d;
break;
case '&':
-#if ENABLE_FEATURE_AWK_MATH
+#if ENABLE_FEATURE_AWK_LIBM
L.d = pow(L.d, R.d);
#else
syntax_error(EMSG_NO_MATH);
ind = getvar_s(incvar(intvar[ARGIND]));
fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
if (fname && *fname && !is_assignment(fname))
- F = afopen(fname, "r");
+ F = xfopen_stdin(fname);
}
} while (!F);
#undef files_happen
}
-int awk_main(int argc, char **argv);
+int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
int awk_main(int argc, char **argv)
{
unsigned opt;
char *opt_F, *opt_W;
- llist_t *opt_v = NULL;
- int i, j, flen;
+ llist_t *list_v = NULL;
+ llist_t *list_f = NULL;
+ int i, j;
var *v;
var tv;
char **envp;
*s1 = '=';
}
}
- opt_complementary = "v::";
- opt = getopt32(argv, "F:v:f:W:", &opt_F, &opt_v, &g_progname, &opt_W);
+ opt_complementary = "v::f::"; /* -v and -f can occur multiple times */
+ opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, &opt_W);
argv += optind;
argc -= optind;
if (opt & 0x1)
setvar_s(intvar[FS], opt_F); // -F
- while (opt_v) { /* -v */
- if (!is_assignment(llist_pop(&opt_v)))
+ while (list_v) { /* -v */
+ if (!is_assignment(llist_pop(&list_v)))
bb_show_usage();
}
- if (opt & 0x4) { // -f
- char *s = s; /* die, gcc, die */
- FILE *from_file = afopen(g_progname, "r");
- /* one byte is reserved for some trick in next_token */
- if (fseek(from_file, 0, SEEK_END) == 0) {
- flen = ftell(from_file);
- s = xmalloc(flen + 4);
- fseek(from_file, 0, SEEK_SET);
- i = 1 + fread(s + 1, 1, flen, from_file);
- } else {
+ if (list_f) { /* -f */
+ do {
+ char *s = NULL;
+ FILE *from_file;
+
+ g_progname = llist_pop(&list_f);
+ from_file = xfopen_stdin(g_progname);
+ /* one byte is reserved for some trick in next_token */
for (i = j = 1; j > 0; i += j) {
s = xrealloc(s, i + 4096);
j = fread(s + i, 1, 4094, from_file);
}
- }
- s[i] = '\0';
- fclose(from_file);
- parse_program(s + 1);
- free(s);
+ s[i] = '\0';
+ fclose(from_file);
+ parse_program(s + 1);
+ free(s);
+ } while (list_f);
+ argc++;
} else { // no -f: take program from 1st parameter
if (!argc)
bb_show_usage();
g_progname = "cmd. line";
parse_program(*argv++);
- argc--;
}
if (opt & 0x8) // -W
bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
/* fill in ARGV array */
- setvar_i(intvar[ARGC], argc + 1);
+ setvar_i(intvar[ARGC], argc);
setari_u(intvar[ARGV], 0, "awk");
i = 0;
while (*argv)