add tests for gunzip

[oweals/busybox.git] / editors / awk.c
diff --git a/editors/awk.c b/editors/awk.c

index b5bab16af292ac0bc2dae2d7c0a4398fdc213e9a..a820c7a179f946d39775a5a2b947a485161cb33a 100644 (file)
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -263,7 +263,7 @@ enum {
  
  #define        OC_B    OC_BUILTIN
  
-static const char tokenlist[] =
+static const char tokenlist[] ALIGN1 =
         "\1("       NTC
         "\1)"       NTC
         "\1/"       NTC                                 /* REGEXP */
@@ -373,7 +373,7 @@ enum {
         ENVIRON,    F0,         NUM_INTERNAL_VARS
  };
  
-static const char vNames[] =
+static const char vNames[] ALIGN1 =
         "CONVFMT\0" "OFMT\0"    "FS\0*"     "OFS\0"
         "ORS\0"     "RS\0*"     "RT\0"      "FILENAME\0"
         "SUBSEP\0"  "ARGIND\0"  "ARGC\0"    "ARGV\0"
@@ -381,34 +381,20 @@ static const char vNames[] =
         "NR\0"      "NF\0*"     "IGNORECASE\0*"
         "ENVIRON\0" "$\0*"      "\0";
  
-static const char vValues[] =
+static const char vValues[] ALIGN1 =
         "%.6g\0"    "%.6g\0"    " \0"       " \0"
         "\n\0"      "\n\0"      "\0"        "\0"
         "\034\0"
         "\377";
  
  /* hash size may grow to these values */
-#define FIRST_PRIME 61;
-static const unsigned PRIMES[] = { 251, 1021, 4093, 16381, 65521 };
-enum { NPRIMES = sizeof(PRIMES) / sizeof(PRIMES[0]) };
+#define FIRST_PRIME 61
+static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
  
-/* globals */
  
+/* Globals. Split in two parts so that first one is addressed
+ * with (mostly short) negative offsets */
  struct globals {
-       /* former 'struct t' */
-       uint32_t t_info; /* often used */
-       uint32_t t_tclass;
-       char *t_string;
-       double t_double;
-       int t_lineno;
-       int t_rollback;
-
-       /* the rest */
-       smallint icase;
-       smallint exiting;
-       smallint nextrec;
-       smallint nextfile;
-       smallint is_f0_split;
         chain beginseq, mainseq, endseq, *seq;
         node *break_ptr, *continue_ptr;
         rstream *iF;
@@ -421,17 +407,31 @@ struct globals {
         nvblock *g_cb;
         char *g_pos;
         char *g_buf;
+       smallint icase;
+       smallint exiting;
+       smallint nextrec;
+       smallint nextfile;
+       smallint is_f0_split;
+};
+struct globals2 {
+       uint32_t t_info; /* often used */
+       uint32_t t_tclass;
+       char *t_string;
+       int t_lineno;
+       int t_rollback;
+
+       var *intvar[NUM_INTERNAL_VARS]; /* often used */
  
         /* former statics from various functions */
         char *split_f0__fstrings;
  
-       rstream next_input_file__rsm;
-       smallint next_input_file__files_happen;
-
-        smallint next_token__concat_inserted;
         uint32_t next_token__save_tclass;
         uint32_t next_token__save_info;
         uint32_t next_token__ltclass;
+       smallint next_token__concat_inserted;
+
+       smallint next_input_file__files_happen;
+       rstream next_input_file__rsm;
  
         var *evaluate__fnargs;
         unsigned evaluate__seed;
@@ -441,50 +441,52 @@ struct globals {
  
         tsplitter exec_builtin__tspl;
  
-       /* biggest members go last */
-       var *intvar[NUM_INTERNAL_VARS];
+       /* biggest and least used members go last */
+       double t_double;
         tsplitter fsplitter, rsplitter;
  };
-#define G (*ptr_to_globals)
-/* for debug */
-/* char Gsize[sizeof(G)];  ~0x240 */
+#define G1 (ptr_to_globals[-1])
+#define G (*(struct globals2 *const)ptr_to_globals)
+/* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
+/* char G1size[sizeof(G1)]; - 0x6c */
+/* char Gsize[sizeof(G)]; - 0x1cc */
  /* Trying to keep most of members accessible with short offsets: */
-/* char Gofs_seed[offsetof(struct globals, evaluate__seed)];  ~0xc0 */
+/* char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
+#define beginseq     (G1.beginseq    )
+#define mainseq      (G1.mainseq     )
+#define endseq       (G1.endseq      )
+#define seq          (G1.seq         )
+#define break_ptr    (G1.break_ptr   )
+#define continue_ptr (G1.continue_ptr)
+#define iF           (G1.iF          )
+#define vhash        (G1.vhash       )
+#define ahash        (G1.ahash       )
+#define fdhash       (G1.fdhash      )
+#define fnhash       (G1.fnhash      )
+#define g_progname   (G1.g_progname  )
+#define g_lineno     (G1.g_lineno    )
+#define nfields      (G1.nfields     )
+#define maxfields    (G1.maxfields   )
+#define Fields       (G1.Fields      )
+#define g_cb         (G1.g_cb        )
+#define g_pos        (G1.g_pos       )
+#define g_buf        (G1.g_buf       )
+#define icase        (G1.icase       )
+#define exiting      (G1.exiting     )
+#define nextrec      (G1.nextrec     )
+#define nextfile     (G1.nextfile    )
+#define is_f0_split  (G1.is_f0_split )
  #define t_info       (G.t_info      )
  #define t_tclass     (G.t_tclass    )
  #define t_string     (G.t_string    )
  #define t_double     (G.t_double    )
  #define t_lineno     (G.t_lineno    )
  #define t_rollback   (G.t_rollback  )
-#define icase        (G.icase       )
-#define exiting      (G.exiting     )
-#define nextrec      (G.nextrec     )
-#define nextfile     (G.nextfile    )
-#define is_f0_split  (G.is_f0_split )
-#define beginseq     (G.beginseq    )
-#define mainseq      (G.mainseq     )
-#define endseq       (G.endseq      )
-#define seq          (G.seq         )
-#define break_ptr    (G.break_ptr   )
-#define continue_ptr (G.continue_ptr)
-#define iF           (G.iF          )
-#define vhash        (G.vhash       )
-#define ahash        (G.ahash       )
-#define fdhash       (G.fdhash      )
-#define fnhash       (G.fnhash      )
-#define g_progname   (G.g_progname  )
-#define g_lineno     (G.g_lineno    )
-#define nfields      (G.nfields     )
-#define maxfields    (G.maxfields   )
-#define Fields       (G.Fields      )
-#define g_cb         (G.g_cb        )
-#define g_pos        (G.g_pos       )
-#define g_buf        (G.g_buf       )
  #define intvar       (G.intvar      )
  #define fsplitter    (G.fsplitter   )
  #define rsplitter    (G.rsplitter   )
  #define INIT_G() do { \
-       PTR_TO_GLOBALS = xzalloc(sizeof(G)); \
+       PTR_TO_GLOBALS = xzalloc(sizeof(G1) + sizeof(G)) + sizeof(G1); \
         G.next_token__ltclass = TC_OPTERM; \
         G.evaluate__seed = 1; \
  } while (0)
@@ -501,17 +503,17 @@ static int awk_exit(int) ATTRIBUTE_NORETURN;
  
  /* ---- error handling ---- */
  
-static const char EMSG_INTERNAL_ERROR[] = "Internal error";
-static const char EMSG_UNEXP_EOS[] = "Unexpected end of string";
-static const char EMSG_UNEXP_TOKEN[] = "Unexpected token";
-static const char EMSG_DIV_BY_ZERO[] = "Division by zero";
-static const char EMSG_INV_FMT[] = "Invalid format specifier";
-static const char EMSG_TOO_FEW_ARGS[] = "Too few arguments for builtin";
-static const char EMSG_NOT_ARRAY[] = "Not an array";
-static const char EMSG_POSSIBLE_ERROR[] = "Possible syntax error";
-static const char EMSG_UNDEF_FUNC[] = "Call to undefined function";
+static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
+static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
+static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
+static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
+static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
+static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin";
+static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
+static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
+static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
  #if !ENABLE_FEATURE_AWK_MATH
-static const char EMSG_NO_MATH[] = "Math support is not compiled in";
+static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
  #endif
  
  static void zero_out_var(var * vp)
@@ -519,8 +521,8 @@ static void zero_out_var(var * vp)
         memset(vp, 0, sizeof(*vp));
  }
  
-static void syntax_error(const char * const message) ATTRIBUTE_NORETURN;
-static void syntax_error(const char * const message)
+static void syntax_error(const char *const message) ATTRIBUTE_NORETURN;
+static void syntax_error(const char *const message)
  {
         bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
  }
@@ -567,7 +569,7 @@ static void hash_rebuild(xhash *hash)
         unsigned newsize, i, idx;
         hash_item **newitems, *hi, *thi;
  
-       if (hash->nprime == NPRIMES)
+       if (hash->nprime == ARRAY_SIZE(PRIMES))
                 return;
  
         newsize = PRIMES[hash->nprime++];
@@ -597,7 +599,7 @@ static void *hash_find(xhash *hash, const char *name)
         int l;
  
         hi = hash_search(hash, name);
-       if (! hi) {
+       if (!hi) {
                 if (++hash->nel / hash->csize > 10)
                         hash_rebuild(hash);
  
@@ -674,7 +676,7 @@ static char nextchar(char **s)
         return c;
  }
  
-static int ATTRIBUTE_ALWAYS_INLINE isalnum_(int c)
+static int ALWAYS_INLINE isalnum_(int c)
  {
         return (isalnum(c) || c == '_');
  }
@@ -822,7 +824,7 @@ static var *copyvar(var *dest, const var *src)
  
  static var *incvar(var *v)
  {
-       return setvar_i(v, getvar_i(v)+1.);
+       return setvar_i(v, getvar_i(v) + 1.);
  }
  
  /* return true if v is number or numeric string */
@@ -1336,7 +1338,7 @@ static void chain_group(void)
                                 n3 = parse_expr(TC_SEQTERM);
                                 n = chain_loop(n3);
                                 n->l.n = n2;
-                               if (! n2)
+                               if (!n2)
                                         n->info = OC_EXEC;
                         }
                         break;
@@ -1443,7 +1445,7 @@ static node *mk_splitter(const char *s, tsplitter *spl)
         n = &spl->n;
         if ((n->info & OPCLSMASK) == OC_REGEXP) {
                 regfree(re);
-               regfree(ire);
+               regfree(ire); // TODO: nuke ire, use re+1?
         }
         if (strlen(s) > 1) {
                 mk_re_node(s, n, re);
@@ -1508,7 +1510,7 @@ static int awk_split(const char *s, node *spl, char **slist)
         int l, n = 0;
         char c[4];
         char *s1;
-       regmatch_t pmatch[2];
+       regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
  
         /* in worst case, each char would be a separate field */
         *slist = s1 = xzalloc(strlen(s) * 2 + 3);
@@ -1519,9 +1521,12 @@ static int awk_split(const char *s, node *spl, char **slist)
         if (*getvar_s(intvar[RS]) == '\0')
                 c[2] = '\n';
  
-       if ((spl->info & OPCLSMASK) == OC_REGEXP) {             /* regex split */
-               while (*s) {
-                       l = strcspn(s, c+2);
+       if ((spl->info & OPCLSMASK) == OC_REGEXP) {  /* regex split */
+               if (!*s)
+                       return n; /* "": zero fields */
+               n++; /* at least one field will be there */
+               do {
+                       l = strcspn(s, c+2); /* len till next NUL or \n */
                         if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
                          && pmatch[0].rm_so <= l
                         ) {
@@ -1530,24 +1535,27 @@ static int awk_split(const char *s, node *spl, char **slist)
                                         l++;
                                         pmatch[0].rm_eo++;
                                 }
+                               n++; /* we saw yet another delimiter */
                         } else {
                                 pmatch[0].rm_eo = l;
                                 if (s[l]) pmatch[0].rm_eo++;
                         }
-
                         memcpy(s1, s, l);
                         s1[l] = '\0';
                         nextword(&s1);
                         s += pmatch[0].rm_eo;
-                       n++;
-               }
-       } else if (c[0] == '\0') {              /* null split */
+               } while (*s);
+               return n;
+       }
+       if (c[0] == '\0') {  /* null split */
                 while (*s) {
                         *s1++ = *s++;
                         *s1++ = '\0';
                         n++;
                 }
-       } else if (c[0] != ' ') {               /* single-character split */
+               return n;
+       }
+       if (c[0] != ' ') {  /* single-character split */
                 if (icase) {
                         c[0] = toupper(c[0]);
                         c[1] = tolower(c[1]);
@@ -1557,21 +1565,23 @@ static int awk_split(const char *s, node *spl, char **slist)
                         *s1++ = '\0';
                         n++;
                 }
-       } else {                                /* space split */
-               while (*s) {
-                       s = skip_whitespace(s);
-                       if (!*s) break;
-                       n++;
-                       while (*s && !isspace(*s))
-                               *s1++ = *s++;
-                       *s1++ = '\0';
-               }
+               return n;
+       }
+       /* space split */
+       while (*s) {
+               s = skip_whitespace(s);
+               if (!*s) break;
+               n++;
+               while (*s && !isspace(*s))
+                       *s1++ = *s++;
+               *s1++ = '\0';
         }
         return n;
  }
  
  static void split_f0(void)
  {
+/* static char *fstrings; */
  #define fstrings (G.split_f0__fstrings)
  
         int i, n;
@@ -1729,7 +1739,7 @@ static int awk_getline(rstream *rsm, var *v)
         c = (char) rsplitter.n.info;
         rp = 0;
  
-       if (! m) qrealloc(&m, 256, &size);
+       if (!m) qrealloc(&m, 256, &size);
         do {
                 b = m + a;
                 so = eo = p;
@@ -1745,7 +1755,7 @@ static int awk_getline(rstream *rsm, var *v)
                                 }
                         } else if (c != '\0') {
                                 s = strchr(b+pp, c);
-                               if (! s) s = memchr(b+pp, '\0', p - pp);
+                               if (!s) s = memchr(b+pp, '\0', p - pp);
                                 if (s) {
                                         so = eo = s-b;
                                         eo++;
@@ -1897,8 +1907,8 @@ static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int
         regex_t sreg, *re;
  
         re = as_regex(rn, &sreg);
-       if (! src) src = intvar[F0];
-       if (! dest) dest = intvar[F0];
+       if (!src) src = intvar[F0];
+       if (!dest) dest = intvar[F0];
  
         i = di = 0;
         sp = getvar_s(src);
@@ -1943,7 +1953,8 @@ static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int
                 sp += eo;
                 if (i == nm) break;
                 if (eo == so) {
-                       if (! (ds[di++] = *sp++)) break;
+                       ds[di] = *sp++;
+                       if (!ds[di++]) break;
                 }
         }
  
@@ -2794,16 +2805,19 @@ int awk_main(int argc, char **argv)
  
         /* Huh, people report that sometimes environ is NULL. Oh well. */
         if (environ) for (envp = environ; *envp; envp++) {
-               char *s = xstrdup(*envp);
+               /* environ is writable, thus we don't strdup it needlessly */
+               char *s = *envp;
                 char *s1 = strchr(s, '=');
                 if (s1) {
-                       *s1++ = '\0';
-                       setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1);
+                       *s1 = '\0';
+                       /* Both findvar and setvar_u take const char*
+                        * as 2nd arg -> environment is not trashed */
+                       setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
+                       *s1 = '=';
                 }
-               free(s);
         }
         opt_complementary = "v::";
-       opt = getopt32(argc, argv, "F:v:f:W:", &opt_F, &opt_v, &g_progname, &opt_W);
+       opt = getopt32(argv, "F:v:f:W:", &opt_F, &opt_v, &g_progname, &opt_W);
         argv += optind;
         argc -= optind;
         if (opt & 0x1)