/*
This file is part of GNUnet.
- Copyright (C) 2011 - 2013 GNUnet e.V.
+ Copyright (C) 2011 - 2017 GNUnet e.V.
GNUnet is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published
/*************************** TESTBED PEER SETUP *****************************/
/******************************************************************************/
+/**
+ * Process the text buffer counting the non-empty lines and separating them
+ * with NULL characters, for later ease of copy using (as)printf.
+ *
+ * @param data Memory buffer with strings.
+ * @param data_size Size of the @a data buffer in bytes.
+ * @param str_max Maximum number of strings to return.
+ * @return Positive number of lines found in the buffer,
+ * #GNUNET_SYSERR otherwise.
+ */
+static int
+count_and_separate_strings (char *data,
+ uint64_t data_size,
+ unsigned int str_max)
+{
+ char *buf; // Keep track of last string to skip blank lines
+ unsigned int offset;
+ unsigned int str_cnt;
+
+ buf = data;
+ offset = 0;
+ str_cnt = 0;
+ while ( (offset < (data_size - 1)) && (str_cnt < str_max) )
+ {
+ offset++;
+ if ( ((data[offset] == '\n')) &&
+ (buf != &data[offset]) )
+ {
+ data[offset] = '\0';
+ str_cnt++;
+ buf = &data[offset + 1];
+ }
+ else if ( (data[offset] == '\n') ||
+ (data[offset] == '\0') )
+ buf = &data[offset + 1];
+ }
+ return str_cnt;
+}
+
+
+/**
+ * Allocate a string array and fill it with the prefixed strings
+ * from a pre-processed, NULL-separated memory region.
+ *
+ * @param data Preprocessed memory with strings
+ * @param data_size Size of the @a data buffer in bytes.
+ * @param strings Address of the string array to be created.
+ * Must be freed by caller if function end in success.
+ * @param str_cnt String count. The @a data buffer should contain
+ * at least this many NULL-separated strings.
+ * @return #GNUNET_OK in ase of success, #GNUNET_SYSERR otherwise.
+ * In case of error @a strings must not be freed.
+ */
+static int
+create_string_array (char *data, uint64_t data_size,
+ char ***strings, unsigned int str_cnt)
+{
+ uint64_t offset;
+ uint64_t len;
+ unsigned int i;
+
+ *strings = GNUNET_malloc (sizeof (char *) * str_cnt);
+ offset = 0;
+ for (i = 0; i < str_cnt; i++)
+ {
+ len = strlen (&data[offset]);
+ if (offset + len >= data_size)
+ {
+ GNUNET_free (*strings);
+ *strings = NULL;
+ return GNUNET_SYSERR;
+ }
+ if (0 == len) // empty line
+ {
+ offset++;
+ i--;
+ continue;
+ }
+
+ GNUNET_asprintf (&(*strings)[i],
+ "%s%s",
+ regex_prefix,
+ &data[offset]);
+ offset += len + 1;
+ }
+ return GNUNET_OK;
+}
+
/**
* Load search strings from given filename. One search string per line.
unsigned int limit)
{
char *data;
- char *buf;
uint64_t filesize;
- unsigned int offset;
int str_cnt;
- unsigned int i;
+ /* Sanity checks */
if (NULL == filename)
{
return GNUNET_SYSERR;
}
-
if (GNUNET_YES != GNUNET_DISK_file_test (filename))
{
GNUNET_log (GNUNET_ERROR_TYPE_WARNING,
&filesize,
GNUNET_YES,
GNUNET_YES))
- filesize = 0;
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_WARNING,
+ "Search strings file %s cannot be read.\n",
+ filename);
+ return GNUNET_SYSERR;
+ }
if (0 == filesize)
{
GNUNET_log (GNUNET_ERROR_TYPE_WARNING,
filename);
return GNUNET_SYSERR;
}
+
+ /* Read data into memory */
data = GNUNET_malloc (filesize + 1);
if (filesize != GNUNET_DISK_fn_read (filename,
data,
filename);
return GNUNET_SYSERR;
}
- buf = data;
- offset = 0;
- str_cnt = 0;
- while ( (offset < (filesize - 1)) && (str_cnt < limit) )
- {
- offset++;
- if ( ((data[offset] == '\n')) &&
- (buf != &data[offset]) )
- {
- data[offset] = '\0';
- str_cnt++;
- buf = &data[offset + 1];
- }
- else if ( (data[offset] == '\n') ||
- (data[offset] == '\0') )
- buf = &data[offset + 1];
- }
- *strings = GNUNET_malloc (sizeof (char *) * str_cnt);
- offset = 0;
- for (i = 0; i < str_cnt; i++)
+
+ /* Process buffer and build array */
+ str_cnt = count_and_separate_strings (data, filesize, limit);
+ if (GNUNET_OK != create_string_array (data, filesize, strings, str_cnt))
{
- GNUNET_asprintf (&(*strings)[i],
- "%s%s",
- regex_prefix,
- &data[offset]);
- offset += strlen (&data[offset]) + 1;
+ str_cnt = GNUNET_SYSERR;
}
GNUNET_free (data);
return str_cnt;
/*
* This file is part of GNUnet
- * Copyright (C) 2012 GNUnet e.V.
+ * Copyright (C) 2012-2017 GNUnet e.V.
*
* GNUnet is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published
/**
* Struct to hold the tree formed by prefix-combining the regexes.
*/
-struct RegexCombineCtx {
-
- /**
- * Next node with same prefix but different token.
- */
- struct RegexCombineCtx *next;
-
- /**
- * Prev node with same prefix but different token.
- */
- struct RegexCombineCtx *prev;
-
+struct RegexCombineCtx
+{
/**
- * First child node with same prefix and token.
+ * Child nodes with same prefix and token.
*/
- struct RegexCombineCtx *head;
+ struct RegexCombineCtx **children;
/**
- * Last child node.
+ * Alphabet size (how many @a children there are)
*/
- struct RegexCombineCtx *tail;
+ unsigned int size;
/**
* Token.
char *s;
};
-/*
+
+/**
+ * Char 2 int
+ *
+ * Convert a character into its int value depending on the base used
+ *
+ * @param c Char
+ * @param size base (2, 8 or 16(hex))
+ *
+ * @return Int in range [0, (base-1)]
+ */
+static int
+c2i (char c, int size)
+{
+ switch (size)
+ {
+ case 2:
+ case 8:
+ return c - '0';
+ break;
+ case 16:
+ if (c >= '0' && c <= '9')
+ return c - '0';
+ else if (c >= 'A' && c <= 'F')
+ return c - 'A' + 10;
+ else if (c >= 'a' && c <= 'f')
+ return c - 'a' + 10;
+ else
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
+ "Cannot convert char %c in base %u\n",
+ c, size);
+ GNUNET_assert (0);
+ }
+ break;
+ default:
+ GNUNET_assert (0);
+ }
+}
+
+
+/**
+ * Printf spaces to indent the regex tree
+ *
+ * @param n Indentation level
+ */
static void
space (int n)
{
int i;
for (i = 0; i < n; i++)
- printf (" ");
+ fprintf (stderr, "| ");
}
+
+/**
+ * Printf the combined regex ctx.
+ *
+ * @param ctx The ctx to printf
+ * @param level Indentation level to start with
+ */
static void
debugctx (struct RegexCombineCtx *ctx, int level)
{
- struct RegexCombineCtx *p;
- space (level);
+ return;
+ unsigned int i;
if (NULL != ctx->s)
- printf ("'%s'\n", ctx->s);
+ {
+ space (level - 1);
+ fprintf (stderr, "%u:'%s'\n", c2i(ctx->s[0], ctx->size), ctx->s);
+ }
else
- printf ("NULL\n");
- for (p = ctx->head; NULL != p; p = p->next)
+ fprintf (stderr, "ROOT (base %u)\n", ctx->size);
+ for (i = 0; i < ctx->size; i++)
+ {
+ if (NULL != ctx->children[i])
+ {
+ space (level);
+ debugctx (ctx->children[i], level + 1);
+ }
+ }
+ fflush(stderr);
+}
+
+
+/**
+ * Add a single regex to a context, combining with exisiting regex by-prefix.
+ *
+ * @param ctx Context with 0 or more regexes.
+ * @param regex Regex to add.
+ */
+static void
+regex_add (struct RegexCombineCtx *ctx, const char *regex);
+
+
+/**
+ * Create and initialize a new RegexCombineCtx.
+ *
+ * @param alphabet_size Size of the alphabet (and the Trie array)
+ */
+static struct RegexCombineCtx *
+new_regex_ctx (unsigned int alphabet_size)
+{
+ struct RegexCombineCtx *ctx;
+ size_t array_size;
+
+ array_size = sizeof(struct RegexCombineCtx *) * alphabet_size;
+ ctx = GNUNET_new (struct RegexCombineCtx);
+ ctx->children = GNUNET_malloc (array_size);
+ ctx->size = alphabet_size;
+
+ return ctx;
+}
+
+static void
+move_children (struct RegexCombineCtx *dst, const struct RegexCombineCtx *src)
+{
+ size_t array_size;
+
+ array_size = sizeof(struct RegexCombineCtx *) * src->size;
+ memcpy (dst->children, src->children, array_size);
+ for (int i = 0; i < src->size; i++)
{
- debugctx (p, level + 1);
+ src->children[i] = NULL;
}
}
-*/
+
/**
* Extract a string from all prefix-combined regexes.
regex_combine (struct RegexCombineCtx *ctx)
{
struct RegexCombineCtx *p;
+ unsigned int i;
size_t len;
char *regex;
char *tmp;
GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "new combine %s\n", ctx->s);
regex = GNUNET_strdup ("");
opt = GNUNET_NO;
- for (p = ctx->head; NULL != p; p = p->next)
+ for (i = 0; i < ctx->size; i++)
{
- GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "adding '%s' to innner %s\n", p->s, ctx->s);
+ p = ctx->children[i];
+ if (NULL == p)
+ continue;
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
+ "adding '%s' to innner %s\n",
+ p->s, ctx->s);
s = regex_combine (p);
GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " total '%s'\n", s);
if (strlen(s) == 0)
l2 = strlen (s2);
limit = l1 > l2 ? l2 : l1;
- for (i = 1; i <= limit; i++)
+ for (i = 0; i < limit; i++)
{
- if (0 != strncmp (s1, s2, i))
- return i - 1;
+ if (s1[i] != s2[i])
+ return i;
}
return limit;
}
{
struct RegexCombineCtx *p;
struct RegexCombineCtx *best;
+ unsigned int i;
unsigned int l;
unsigned int best_l;
best_l = 0;
best = NULL;
- for (p = ctx->head; NULL != p; p = p->next)
+
+ for (i = 0; i < ctx->size; i++)
{
+ p = ctx->children[i];
+ if (NULL == p)
+ continue;
+
l = get_prefix_length (p->s, regex);
if (l > best_l)
{
return best;
}
+static void
+regex_add_multiple (struct RegexCombineCtx *ctx,
+ const char *regex,
+ struct RegexCombineCtx **children)
+{
+ char tmp[2];
+ long unsigned int i;
+ size_t l;
+ struct RegexCombineCtx *newctx;
+ unsigned int count;
+
+ if ('(' != regex[0])
+ {
+ GNUNET_assert (0);
+ }
+
+ /* Does the regex cover *all* possible children? Then don't add any,
+ * as it will be covered by the post-regex "(a-z)*"
+ */
+ l = strlen (regex);
+ count = 0;
+ for (i = 1UL; i < l; i++)
+ {
+ if (regex[i] != '|' && regex[i] != ')')
+ {
+ count++;
+ }
+ }
+ if (count == ctx->size)
+ {
+ return;
+ }
+
+ /* Add every component as a child node */
+ tmp[1] = '\0';
+ for (i = 1UL; i < l; i++)
+ {
+ if (regex[i] != '|' && regex[i] != ')')
+ {
+ tmp[0] = regex[i];
+ newctx = new_regex_ctx(ctx->size);
+ newctx->s = GNUNET_strdup (tmp);
+ if (children != NULL)
+ memcpy (newctx->children, children, sizeof (*children) * ctx->size);
+ ctx->children[c2i(tmp[0], ctx->size)] = newctx;
+ }
+ }
+}
+
+/**
+ * Add a single regex to a context, splitting the exisiting state.
+ *
+ * We only had a partial match, split existing state, truncate the current node
+ * so it only contains the prefix, add suffix(es) as children.
+ *
+ * @param ctx Context to split.
+ * @param len Lenght of ctx->s
+ * @param prefix_l Lenght of common prefix of the new regex and @a ctx->s
+ */
+static void
+regex_split (struct RegexCombineCtx *ctx,
+ unsigned int len,
+ unsigned int prefix_l)
+{
+ struct RegexCombineCtx *newctx;
+ unsigned int idx;
+ char *suffix;
+
+ suffix = GNUNET_malloc (len - prefix_l + 1);
+ strncpy (suffix, &ctx->s[prefix_l], len - prefix_l + 1);
+
+ /* Suffix saved, truncate current node so it only contains the prefix,
+ * copy any children nodes to put as grandchildren and initialize new empty
+ * children array.
+ */
+ ctx->s[prefix_l] = '\0';
+
+ /* If the suffix is an OR expression, add multiple children */
+ if ('(' == suffix[0])
+ {
+ struct RegexCombineCtx **tmp;
+
+ tmp = ctx->children;
+ ctx->children = GNUNET_malloc (sizeof(*tmp) * ctx->size);
+ regex_add_multiple (ctx, suffix, tmp);
+ GNUNET_free (tmp);
+ return;
+ }
+
+ /* The suffix is a normal string, add as one node */
+ newctx = new_regex_ctx (ctx->size);
+ newctx->s = suffix;
+ move_children (newctx, ctx);
+ idx = c2i(suffix[0], ctx->size);
+ ctx->children[idx] = newctx;
+}
+
/**
* Add a single regex to a context, combining with exisiting regex by-prefix.
{
struct RegexCombineCtx *p;
struct RegexCombineCtx *newctx;
+ long unsigned int l;
unsigned int prefix_l;
const char *rest_r;
const char *rest_s;
size_t len;
+ int idx;
+
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
+ "regex_add '%s' into '%s'\n",
+ regex, ctx->s);
+ l = strlen (regex);
+ if (0UL == l)
+ return;
- if (0 == strlen (regex))
+ /* If the regex is in the form of (a|b|c), add every character separately */
+ if ('(' == regex[0])
+ {
+ regex_add_multiple (ctx, regex, NULL);
return;
+ }
p = get_longest_prefix (ctx, regex);
- if (NULL != p) /* There is some prefix match, reduce regex and try again */
+ if (NULL != p)
{
+ /* There is some prefix match, reduce regex and try again */
prefix_l = get_prefix_length (p->s, regex);
rest_s = &p->s[prefix_l];
rest_r = ®ex[prefix_l];
GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "rest r '%s'\n", rest_r);
GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "rest s '%s'\n", rest_s);
len = strlen (p->s);
- if (prefix_l < len) /* only partial match, split existing state */
+ if (prefix_l < len)
{
- newctx = GNUNET_new (struct RegexCombineCtx);
- newctx->head = p->head;
- newctx->tail = p->tail;
- newctx->s = GNUNET_malloc(len - prefix_l + 1);
- strncpy (newctx->s, rest_s, len - prefix_l + 1);
-
- p->head = newctx;
- p->tail = newctx;
- p->s[prefix_l] = '\0';
+ regex_split (p, len, prefix_l);
}
regex_add (p, rest_r);
return;
}
+
/* There is no prefix match, add new */
- if (NULL == ctx->head && NULL != ctx->s)
+ idx = c2i(regex[0], ctx->size);
+ if (NULL == ctx->children[idx] && NULL != ctx->s)
{
/* this was the end before, add empty string */
- newctx = GNUNET_new (struct RegexCombineCtx);
+ newctx = new_regex_ctx (ctx->size);
newctx->s = GNUNET_strdup ("");
- GNUNET_CONTAINER_DLL_insert (ctx->head, ctx->tail, newctx);
+ ctx->children[idx] = newctx;
}
GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " no match\n");
GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " new state %s\n", regex);
GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " under %s\n", ctx->s);
- newctx = GNUNET_new (struct RegexCombineCtx);
+ newctx = new_regex_ctx(ctx->size);
newctx->s = GNUNET_strdup (regex);
- GNUNET_CONTAINER_DLL_insert (ctx->head, ctx->tail, newctx);
+ ctx->children[idx] = newctx;
}
static void
regex_ctx_destroy (struct RegexCombineCtx *ctx)
{
- struct RegexCombineCtx *p;
- struct RegexCombineCtx *next;
+ unsigned int i;
+
+ if (NULL == ctx)
+ return;
- for (p = ctx->head; NULL != p; p = next)
+ for (i = 0; i < ctx->size; i++)
{
- next = p->next;
- regex_ctx_destroy (p);
+ regex_ctx_destroy (ctx->children[i]);
}
GNUNET_free_non_null (ctx->s); /* 's' on root node is null */
+ GNUNET_free (ctx->children);
GNUNET_free (ctx);
}
/**
- * Return a prefix-combine regex that matches the same strings as
+ * Combine an array of regexes into a single prefix-shared regex.
+ * Returns a prefix-combine regex that matches the same strings as
* any of the original regexes.
*
* WARNING: only useful for reading specific regexes for specific applications,
* namely the gnunet-regex-profiler / gnunet-regex-daemon.
* This function DOES NOT support arbitrary regex combining.
+ *
+ * @param regexes A NULL-terminated array of regexes.
+ * @param alphabet_size Size of the alphabet the regex uses.
+ *
+ * @return A string with a single regex that matches any of the original regexes
*/
char *
-REGEX_TEST_combine (char * const regexes[])
+REGEX_TEST_combine (char * const regexes[], unsigned int alphabet_size)
{
unsigned int i;
char *combined;
const char *current;
struct RegexCombineCtx *ctx;
- ctx = GNUNET_new (struct RegexCombineCtx);
+ ctx = new_regex_ctx (alphabet_size);
for (i = 0; regexes[i]; i++)
{
current = regexes[i];
GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Regex %u: %s\n", i, current);
regex_add (ctx, current);
- /* debugctx (ctx, 0); */
+ debugctx (ctx, 0);
}
GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "\nCombining...\n");
- /* debugctx (ctx, 0); */
+ debugctx (ctx, 0);
combined = regex_combine (ctx);