/*
* This file is part of GNUnet
* (C) 2012 Christian Grothoff (and other contributing authors)
- *
+ *
* GNUnet is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published
* by the Free Software Foundation; either version 3, or (at your
* option) any later version.
- *
+ *
* GNUnet is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
- *
+ *
* You should have received a copy of the GNU General Public License
* along with GNUnet; see the file COPYING. If not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* @file src/regex/regex_test_lib.c
* @brief library to read regexes representing IP networks from a file.
* and simplyfinying the into one big regex, in order to run
- * tests (regex performance, mesh profiler).
+ * tests (regex performance, cadet profiler).
* @author Bartlomiej Polot
*/
#include "platform.h"
#include "gnunet_util_lib.h"
+
+/**
+ * Struct to hold the tree formed by prefix-combining the regexes.
+ */
struct RegexCombineCtx {
+
+ /**
+ * Next node with same prefix but different token.
+ */
struct RegexCombineCtx *next;
+
+ /**
+ * Prev node with same prefix but different token.
+ */
struct RegexCombineCtx *prev;
+ /**
+ * First child node with same prefix and token.
+ */
struct RegexCombineCtx *head;
+
+ /**
+ * Last child node.
+ */
struct RegexCombineCtx *tail;
+ /**
+ * Token.
+ */
char *s;
};
+/*
+static void
+space (int n)
+{
+ int i;
+ for (i = 0; i < n; i++)
+ printf (" ");
+}
+
+static void
+debugctx (struct RegexCombineCtx *ctx, int level)
+{
+ struct RegexCombineCtx *p;
+ space (level);
+ if (NULL != ctx->s)
+ printf ("'%s'\n", ctx->s);
+ else
+ printf ("NULL\n");
+ for (p = ctx->head; NULL != p; p = p->next)
+ {
+ debugctx (p, level + 1);
+ }
+}
+*/
/**
* Extract a string from all prefix-combined regexes.
char *regex;
char *tmp;
char *s;
+ int opt;
- if (NULL != ctx->s)
- GNUNET_asprintf (®ex, "%s(", ctx->s);
- else
- regex = GNUNET_strdup ("(");
- GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "prefix: %s\n", regex);
-
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "new combine %s\n", ctx->s);
+ regex = GNUNET_strdup ("");
+ opt = GNUNET_NO;
for (p = ctx->head; NULL != p; p = p->next)
{
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "adding '%s' to innner %s\n", p->s, ctx->s);
s = regex_combine (p);
- GNUNET_asprintf (&tmp, "%s%s|", regex, s);
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " total '%s'\n", s);
+ if (strlen(s) == 0)
+ {
+ opt = GNUNET_YES;
+ }
+ else
+ {
+ GNUNET_asprintf (&tmp, "%s%s|", regex, s);
+ GNUNET_free_non_null (regex);
+ regex = tmp;
+ }
GNUNET_free_non_null (s);
- GNUNET_free_non_null (regex);
- regex = tmp;
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " so far '%s' for inner %s\n", regex, ctx->s);
}
+
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "opt: %d, innner: '%s'\n", opt, regex);
len = strlen (regex);
- if (1 == len)
+ if (0 == len)
{
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "empty, returning ''\n");
GNUNET_free (regex);
- return GNUNET_strdup ("");
+ return NULL == ctx->s ? NULL : GNUNET_strdup (ctx->s);
}
- GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "pre-partial: %s\n", regex);
if ('|' == regex[len - 1])
- regex[len - 1] = ')';
- if ('(' == regex[len - 1])
regex[len - 1] = '\0';
+ if (NULL != ctx->s)
+ {
+ if (opt)
+ GNUNET_asprintf (&s, "%s(%s)?", ctx->s, regex);
+ else
+ GNUNET_asprintf (&s, "%s(%s)", ctx->s, regex);
+ GNUNET_free (regex);
+ regex = s;
+ }
+
GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "partial: %s\n", regex);
return regex;
}
+/**
+ * Get the number of matching characters on the prefix of both strings.
+ *
+ * @param s1 String 1.
+ * @param s2 String 2.
+ *
+ * @return Number of characters of matching prefix.
+ */
+static unsigned int
+get_prefix_length (const char *s1, const char *s2)
+{
+ unsigned int l1;
+ unsigned int l2;
+ unsigned int limit;
+ unsigned int i;
+
+ l1 = strlen (s1);
+ l2 = strlen (s2);
+ limit = l1 > l2 ? l2 : l1;
+
+ for (i = 1; i <= limit; i++)
+ {
+ if (0 != strncmp (s1, s2, i))
+ return i - 1;
+ }
+ return limit;
+}
+
+
+/**
+ * Return the child context with the longest prefix match with the regex.
+ * Usually only one child will match, search all just in case.
+ *
+ * @param ctx Context whose children to search.
+ * @param regex String to match.
+ *
+ * @return Child with the longest prefix, NULL if no child matches.
+ */
+static struct RegexCombineCtx *
+get_longest_prefix (struct RegexCombineCtx *ctx, const char *regex)
+{
+ struct RegexCombineCtx *p;
+ struct RegexCombineCtx *best;
+ unsigned int l;
+ unsigned int best_l;
+
+ best_l = 0;
+ best = NULL;
+ for (p = ctx->head; NULL != p; p = p->next)
+ {
+ l = get_prefix_length (p->s, regex);
+ if (l > best_l)
+ {
+ GNUNET_break (0 == best_l);
+ best = p;
+ best_l = l;
+ }
+ }
+ return best;
+}
+
+
/**
* Add a single regex to a context, combining with exisiting regex by-prefix.
*
regex_add (struct RegexCombineCtx *ctx, const char *regex)
{
struct RegexCombineCtx *p;
- const char *rest;
+ struct RegexCombineCtx *newctx;
+ unsigned int prefix_l;
+ const char *rest_r;
+ const char *rest_s;
+ size_t len;
- rest = ®ex[1];
- for (p = ctx->head; NULL != p; p = p->next)
+ if (0 == strlen (regex))
+ return;
+
+ p = get_longest_prefix (ctx, regex);
+ if (NULL != p) /* There is some prefix match, reduce regex and try again */
{
- if (p->s[0] == regex[0])
+ prefix_l = get_prefix_length (p->s, regex);
+ rest_s = &p->s[prefix_l];
+ rest_r = ®ex[prefix_l];
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "chosen '%s' [%u]\n", p->s, prefix_l);
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "prefix r '%.*s'\n", prefix_l, p->s);
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "rest r '%s'\n", rest_r);
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "rest s '%s'\n", rest_s);
+ len = strlen (p->s);
+ if (prefix_l < len) /* only partial match, split existing state */
{
- if (1 == strlen(p->s))
- {
- GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "common char %s\n", p->s);
- GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "adding %s\n", rest);
- regex_add (p, rest);
- }
- else
- {
- struct RegexCombineCtx *new;
- new = GNUNET_malloc (sizeof (struct RegexCombineCtx));
- new->s = GNUNET_strdup (&p->s[1]);
- GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " p has now %s\n", p->s);
- GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " p will have %.1s\n", p->s);
- GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " regex is %s\n", regex);
- GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " new has now %s\n", new->s);
- GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " rest is now %s\n", rest);
- p->s[1] = '\0'; /* dont realloc */
- GNUNET_CONTAINER_DLL_insert (p->head, p->tail, new);
- regex_add (p, rest);
- }
- return;
+ newctx = GNUNET_new (struct RegexCombineCtx);
+ newctx->head = p->head;
+ newctx->tail = p->tail;
+ newctx->s = GNUNET_malloc(len - prefix_l + 1);
+ strncpy (newctx->s, rest_s, len - prefix_l + 1);
+
+ p->head = newctx;
+ p->tail = newctx;
+ p->s[prefix_l] = '\0';
}
+ regex_add (p, rest_r);
+ return;
}
- GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " no match\n");
+ /* There is no prefix match, add new */
+ if (NULL == ctx->head && NULL != ctx->s)
+ {
+ /* this was the end before, add empty string */
+ newctx = GNUNET_new (struct RegexCombineCtx);
+ newctx->s = GNUNET_strdup ("");
+ GNUNET_CONTAINER_DLL_insert (ctx->head, ctx->tail, newctx);
+ }
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " no match\n");
GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " new state %s\n", regex);
GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " under %s\n", ctx->s);
- p = GNUNET_malloc (sizeof (struct RegexCombineCtx));
- p->s = GNUNET_strdup (regex);
- GNUNET_CONTAINER_DLL_insert (ctx->head, ctx->tail, p);
+ newctx = GNUNET_new (struct RegexCombineCtx);
+ newctx->s = GNUNET_strdup (regex);
+ GNUNET_CONTAINER_DLL_insert (ctx->head, ctx->tail, newctx);
}
next = p->next;
regex_ctx_destroy (p);
}
- GNUNET_free (ctx->s);
+ GNUNET_free_non_null (ctx->s); /* 's' on root node is null */
GNUNET_free (ctx);
}
* This function DOES NOT support arbitrary regex combining.
*/
char *
-GNUNET_REGEX_combine (char * const regexes[])
+REGEX_TEST_combine (char * const regexes[])
{
unsigned int i;
char *combined;
const char *current;
struct RegexCombineCtx *ctx;
- ctx = GNUNET_malloc (sizeof (struct RegexCombineCtx));
+ ctx = GNUNET_new (struct RegexCombineCtx);
for (i = 0; regexes[i]; i++)
{
current = regexes[i];
GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Regex %u: %s\n", i, current);
regex_add (ctx, current);
+ /* debugctx (ctx, 0); */
}
GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "\nCombining...\n");
+ /* debugctx (ctx, 0); */
combined = regex_combine (ctx);
/**
* Read a set of regexes from a file, one per line and return them in an array
- * suitable for GNUNET_REGEX_combine.
- * The array must be free'd using GNUNET_REGEX_free_from_file.
+ * suitable for REGEX_TEST_combine.
+ * The array must be free'd using REGEX_TEST_free_from_file.
*
* @param filename Name of the file containing the regexes.
*
* @return A newly allocated, NULL terminated array of regexes.
*/
char **
-GNUNET_REGEX_read_from_file (const char *filename)
+REGEX_TEST_read_from_file (const char *filename)
{
struct GNUNET_DISK_FileHandle *f;
unsigned int nr;
{
GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
"Can't get size of file %s\n", filename);
+ GNUNET_DISK_file_close (f);
return NULL;
}
GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
offset += len + 1;
if (len < 1)
continue;
- if (len < 6 || strncmp (®ex[len - 6], "(0|1)*", 6) != 0)
- {
- GNUNET_log (GNUNET_ERROR_TYPE_WARNING,
- "%s (line %u) does not end in \"(0|1)*\"\n",
- buffer, nr);
- }
- else
- {
- len -= 6;
- buffer[len] = '\0';
- }
+ regex[len] = '\0';
regex = GNUNET_realloc (regex, len + 1);
GNUNET_array_grow (regexes, nr, nr + 1);
+ GNUNET_assert (NULL == regexes[nr - 2]);
regexes[nr - 2] = regex;
regexes[nr - 1] = NULL;
+ regex = NULL;
} while (offset < size);
+ GNUNET_free_non_null (regex);
GNUNET_free (buffer);
return regexes;
* @param regexes NULL-terminated array of regexes.
*/
void
-GNUNET_REGEX_free_from_file (char **regexes)
+REGEX_TEST_free_from_file (char **regexes)
{
unsigned int i;
GNUNET_free (regexes);
}
-/* end of regex_test_lib.c */
\ No newline at end of file
+/* end of regex_test_lib.c */