X-Git-Url: https://git.librecmc.org/?a=blobdiff_plain;f=editors%2Fawk.c;h=a820c7a179f946d39775a5a2b947a485161cb33a;hb=d21f596ddb294bdb65623ba1d0e49b17d0829229;hp=b5bab16af292ac0bc2dae2d7c0a4398fdc213e9a;hpb=ae5a8aac26d0fdd77218bb2ce336aa5f5ce82f9d;p=oweals%2Fbusybox.git diff --git a/editors/awk.c b/editors/awk.c index b5bab16af..a820c7a17 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -263,7 +263,7 @@ enum { #define OC_B OC_BUILTIN -static const char tokenlist[] = +static const char tokenlist[] ALIGN1 = "\1(" NTC "\1)" NTC "\1/" NTC /* REGEXP */ @@ -373,7 +373,7 @@ enum { ENVIRON, F0, NUM_INTERNAL_VARS }; -static const char vNames[] = +static const char vNames[] ALIGN1 = "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0" "ORS\0" "RS\0*" "RT\0" "FILENAME\0" "SUBSEP\0" "ARGIND\0" "ARGC\0" "ARGV\0" @@ -381,34 +381,20 @@ static const char vNames[] = "NR\0" "NF\0*" "IGNORECASE\0*" "ENVIRON\0" "$\0*" "\0"; -static const char vValues[] = +static const char vValues[] ALIGN1 = "%.6g\0" "%.6g\0" " \0" " \0" "\n\0" "\n\0" "\0" "\0" "\034\0" "\377"; /* hash size may grow to these values */ -#define FIRST_PRIME 61; -static const unsigned PRIMES[] = { 251, 1021, 4093, 16381, 65521 }; -enum { NPRIMES = sizeof(PRIMES) / sizeof(PRIMES[0]) }; +#define FIRST_PRIME 61 +static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 }; -/* globals */ +/* Globals. Split in two parts so that first one is addressed + * with (mostly short) negative offsets */ struct globals { - /* former 'struct t' */ - uint32_t t_info; /* often used */ - uint32_t t_tclass; - char *t_string; - double t_double; - int t_lineno; - int t_rollback; - - /* the rest */ - smallint icase; - smallint exiting; - smallint nextrec; - smallint nextfile; - smallint is_f0_split; chain beginseq, mainseq, endseq, *seq; node *break_ptr, *continue_ptr; rstream *iF; @@ -421,17 +407,31 @@ struct globals { nvblock *g_cb; char *g_pos; char *g_buf; + smallint icase; + smallint exiting; + smallint nextrec; + smallint nextfile; + smallint is_f0_split; +}; +struct globals2 { + uint32_t t_info; /* often used */ + uint32_t t_tclass; + char *t_string; + int t_lineno; + int t_rollback; + + var *intvar[NUM_INTERNAL_VARS]; /* often used */ /* former statics from various functions */ char *split_f0__fstrings; - rstream next_input_file__rsm; - smallint next_input_file__files_happen; - - smallint next_token__concat_inserted; uint32_t next_token__save_tclass; uint32_t next_token__save_info; uint32_t next_token__ltclass; + smallint next_token__concat_inserted; + + smallint next_input_file__files_happen; + rstream next_input_file__rsm; var *evaluate__fnargs; unsigned evaluate__seed; @@ -441,50 +441,52 @@ struct globals { tsplitter exec_builtin__tspl; - /* biggest members go last */ - var *intvar[NUM_INTERNAL_VARS]; + /* biggest and least used members go last */ + double t_double; tsplitter fsplitter, rsplitter; }; -#define G (*ptr_to_globals) -/* for debug */ -/* char Gsize[sizeof(G)]; ~0x240 */ +#define G1 (ptr_to_globals[-1]) +#define G (*(struct globals2 *const)ptr_to_globals) +/* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */ +/* char G1size[sizeof(G1)]; - 0x6c */ +/* char Gsize[sizeof(G)]; - 0x1cc */ /* Trying to keep most of members accessible with short offsets: */ -/* char Gofs_seed[offsetof(struct globals, evaluate__seed)]; ~0xc0 */ +/* char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */ +#define beginseq (G1.beginseq ) +#define mainseq (G1.mainseq ) +#define endseq (G1.endseq ) +#define seq (G1.seq ) +#define break_ptr (G1.break_ptr ) +#define continue_ptr (G1.continue_ptr) +#define iF (G1.iF ) +#define vhash (G1.vhash ) +#define ahash (G1.ahash ) +#define fdhash (G1.fdhash ) +#define fnhash (G1.fnhash ) +#define g_progname (G1.g_progname ) +#define g_lineno (G1.g_lineno ) +#define nfields (G1.nfields ) +#define maxfields (G1.maxfields ) +#define Fields (G1.Fields ) +#define g_cb (G1.g_cb ) +#define g_pos (G1.g_pos ) +#define g_buf (G1.g_buf ) +#define icase (G1.icase ) +#define exiting (G1.exiting ) +#define nextrec (G1.nextrec ) +#define nextfile (G1.nextfile ) +#define is_f0_split (G1.is_f0_split ) #define t_info (G.t_info ) #define t_tclass (G.t_tclass ) #define t_string (G.t_string ) #define t_double (G.t_double ) #define t_lineno (G.t_lineno ) #define t_rollback (G.t_rollback ) -#define icase (G.icase ) -#define exiting (G.exiting ) -#define nextrec (G.nextrec ) -#define nextfile (G.nextfile ) -#define is_f0_split (G.is_f0_split ) -#define beginseq (G.beginseq ) -#define mainseq (G.mainseq ) -#define endseq (G.endseq ) -#define seq (G.seq ) -#define break_ptr (G.break_ptr ) -#define continue_ptr (G.continue_ptr) -#define iF (G.iF ) -#define vhash (G.vhash ) -#define ahash (G.ahash ) -#define fdhash (G.fdhash ) -#define fnhash (G.fnhash ) -#define g_progname (G.g_progname ) -#define g_lineno (G.g_lineno ) -#define nfields (G.nfields ) -#define maxfields (G.maxfields ) -#define Fields (G.Fields ) -#define g_cb (G.g_cb ) -#define g_pos (G.g_pos ) -#define g_buf (G.g_buf ) #define intvar (G.intvar ) #define fsplitter (G.fsplitter ) #define rsplitter (G.rsplitter ) #define INIT_G() do { \ - PTR_TO_GLOBALS = xzalloc(sizeof(G)); \ + PTR_TO_GLOBALS = xzalloc(sizeof(G1) + sizeof(G)) + sizeof(G1); \ G.next_token__ltclass = TC_OPTERM; \ G.evaluate__seed = 1; \ } while (0) @@ -501,17 +503,17 @@ static int awk_exit(int) ATTRIBUTE_NORETURN; /* ---- error handling ---- */ -static const char EMSG_INTERNAL_ERROR[] = "Internal error"; -static const char EMSG_UNEXP_EOS[] = "Unexpected end of string"; -static const char EMSG_UNEXP_TOKEN[] = "Unexpected token"; -static const char EMSG_DIV_BY_ZERO[] = "Division by zero"; -static const char EMSG_INV_FMT[] = "Invalid format specifier"; -static const char EMSG_TOO_FEW_ARGS[] = "Too few arguments for builtin"; -static const char EMSG_NOT_ARRAY[] = "Not an array"; -static const char EMSG_POSSIBLE_ERROR[] = "Possible syntax error"; -static const char EMSG_UNDEF_FUNC[] = "Call to undefined function"; +static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error"; +static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string"; +static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token"; +static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero"; +static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier"; +static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin"; +static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array"; +static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error"; +static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function"; #if !ENABLE_FEATURE_AWK_MATH -static const char EMSG_NO_MATH[] = "Math support is not compiled in"; +static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in"; #endif static void zero_out_var(var * vp) @@ -519,8 +521,8 @@ static void zero_out_var(var * vp) memset(vp, 0, sizeof(*vp)); } -static void syntax_error(const char * const message) ATTRIBUTE_NORETURN; -static void syntax_error(const char * const message) +static void syntax_error(const char *const message) ATTRIBUTE_NORETURN; +static void syntax_error(const char *const message) { bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message); } @@ -567,7 +569,7 @@ static void hash_rebuild(xhash *hash) unsigned newsize, i, idx; hash_item **newitems, *hi, *thi; - if (hash->nprime == NPRIMES) + if (hash->nprime == ARRAY_SIZE(PRIMES)) return; newsize = PRIMES[hash->nprime++]; @@ -597,7 +599,7 @@ static void *hash_find(xhash *hash, const char *name) int l; hi = hash_search(hash, name); - if (! hi) { + if (!hi) { if (++hash->nel / hash->csize > 10) hash_rebuild(hash); @@ -674,7 +676,7 @@ static char nextchar(char **s) return c; } -static int ATTRIBUTE_ALWAYS_INLINE isalnum_(int c) +static int ALWAYS_INLINE isalnum_(int c) { return (isalnum(c) || c == '_'); } @@ -822,7 +824,7 @@ static var *copyvar(var *dest, const var *src) static var *incvar(var *v) { - return setvar_i(v, getvar_i(v)+1.); + return setvar_i(v, getvar_i(v) + 1.); } /* return true if v is number or numeric string */ @@ -1336,7 +1338,7 @@ static void chain_group(void) n3 = parse_expr(TC_SEQTERM); n = chain_loop(n3); n->l.n = n2; - if (! n2) + if (!n2) n->info = OC_EXEC; } break; @@ -1443,7 +1445,7 @@ static node *mk_splitter(const char *s, tsplitter *spl) n = &spl->n; if ((n->info & OPCLSMASK) == OC_REGEXP) { regfree(re); - regfree(ire); + regfree(ire); // TODO: nuke ire, use re+1? } if (strlen(s) > 1) { mk_re_node(s, n, re); @@ -1508,7 +1510,7 @@ static int awk_split(const char *s, node *spl, char **slist) int l, n = 0; char c[4]; char *s1; - regmatch_t pmatch[2]; + regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough... /* in worst case, each char would be a separate field */ *slist = s1 = xzalloc(strlen(s) * 2 + 3); @@ -1519,9 +1521,12 @@ static int awk_split(const char *s, node *spl, char **slist) if (*getvar_s(intvar[RS]) == '\0') c[2] = '\n'; - if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */ - while (*s) { - l = strcspn(s, c+2); + if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */ + if (!*s) + return n; /* "": zero fields */ + n++; /* at least one field will be there */ + do { + l = strcspn(s, c+2); /* len till next NUL or \n */ if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0 && pmatch[0].rm_so <= l ) { @@ -1530,24 +1535,27 @@ static int awk_split(const char *s, node *spl, char **slist) l++; pmatch[0].rm_eo++; } + n++; /* we saw yet another delimiter */ } else { pmatch[0].rm_eo = l; if (s[l]) pmatch[0].rm_eo++; } - memcpy(s1, s, l); s1[l] = '\0'; nextword(&s1); s += pmatch[0].rm_eo; - n++; - } - } else if (c[0] == '\0') { /* null split */ + } while (*s); + return n; + } + if (c[0] == '\0') { /* null split */ while (*s) { *s1++ = *s++; *s1++ = '\0'; n++; } - } else if (c[0] != ' ') { /* single-character split */ + return n; + } + if (c[0] != ' ') { /* single-character split */ if (icase) { c[0] = toupper(c[0]); c[1] = tolower(c[1]); @@ -1557,21 +1565,23 @@ static int awk_split(const char *s, node *spl, char **slist) *s1++ = '\0'; n++; } - } else { /* space split */ - while (*s) { - s = skip_whitespace(s); - if (!*s) break; - n++; - while (*s && !isspace(*s)) - *s1++ = *s++; - *s1++ = '\0'; - } + return n; + } + /* space split */ + while (*s) { + s = skip_whitespace(s); + if (!*s) break; + n++; + while (*s && !isspace(*s)) + *s1++ = *s++; + *s1++ = '\0'; } return n; } static void split_f0(void) { +/* static char *fstrings; */ #define fstrings (G.split_f0__fstrings) int i, n; @@ -1729,7 +1739,7 @@ static int awk_getline(rstream *rsm, var *v) c = (char) rsplitter.n.info; rp = 0; - if (! m) qrealloc(&m, 256, &size); + if (!m) qrealloc(&m, 256, &size); do { b = m + a; so = eo = p; @@ -1745,7 +1755,7 @@ static int awk_getline(rstream *rsm, var *v) } } else if (c != '\0') { s = strchr(b+pp, c); - if (! s) s = memchr(b+pp, '\0', p - pp); + if (!s) s = memchr(b+pp, '\0', p - pp); if (s) { so = eo = s-b; eo++; @@ -1897,8 +1907,8 @@ static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int regex_t sreg, *re; re = as_regex(rn, &sreg); - if (! src) src = intvar[F0]; - if (! dest) dest = intvar[F0]; + if (!src) src = intvar[F0]; + if (!dest) dest = intvar[F0]; i = di = 0; sp = getvar_s(src); @@ -1943,7 +1953,8 @@ static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int sp += eo; if (i == nm) break; if (eo == so) { - if (! (ds[di++] = *sp++)) break; + ds[di] = *sp++; + if (!ds[di++]) break; } } @@ -2794,16 +2805,19 @@ int awk_main(int argc, char **argv) /* Huh, people report that sometimes environ is NULL. Oh well. */ if (environ) for (envp = environ; *envp; envp++) { - char *s = xstrdup(*envp); + /* environ is writable, thus we don't strdup it needlessly */ + char *s = *envp; char *s1 = strchr(s, '='); if (s1) { - *s1++ = '\0'; - setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1); + *s1 = '\0'; + /* Both findvar and setvar_u take const char* + * as 2nd arg -> environment is not trashed */ + setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1); + *s1 = '='; } - free(s); } opt_complementary = "v::"; - opt = getopt32(argc, argv, "F:v:f:W:", &opt_F, &opt_v, &g_progname, &opt_W); + opt = getopt32(argv, "F:v:f:W:", &opt_F, &opt_v, &g_progname, &opt_W); argv += optind; argc -= optind; if (opt & 0x1)