X-Git-Url: https://git.librecmc.org/?a=blobdiff_plain;ds=sidebyside;f=src%2Fregex%2Fregex_test_lib.c;h=d39db7ef84f9185a7d833b68dc6ac51fd50ed58a;hb=eb420e4b0f23c6ddb079cd40bc76b4f2a35bdbb1;hp=9863d6f4f2de2ee7d38db75d81dbb53cbf5877a6;hpb=39a13b80f71488e0c91f7fb6a78834017f589cbb;p=oweals%2Fgnunet.git diff --git a/src/regex/regex_test_lib.c b/src/regex/regex_test_lib.c index 9863d6f4f..d39db7ef8 100644 --- a/src/regex/regex_test_lib.c +++ b/src/regex/regex_test_lib.c @@ -28,16 +28,62 @@ #include "platform.h" #include "gnunet_util_lib.h" + +/** + * Struct to hold the tree formed by prefix-combining the regexes. + */ struct RegexCombineCtx { + + /** + * Next node with same prefix but different token. + */ struct RegexCombineCtx *next; + + /** + * Prev node with same prefix but different token. + */ struct RegexCombineCtx *prev; + /** + * First child node with same prefix and token. + */ struct RegexCombineCtx *head; + + /** + * Last child node. + */ struct RegexCombineCtx *tail; + /** + * Token. + */ char *s; }; +/* +static void +space (int n) +{ + int i; + for (i = 0; i < n; i++) + printf (" "); +} + +static void +debugctx (struct RegexCombineCtx *ctx, int level) +{ + struct RegexCombineCtx *p; + space (level); + if (NULL != ctx->s) + printf ("'%s'\n", ctx->s); + else + printf ("NULL\n"); + for (p = ctx->head; NULL != p; p = p->next) + { + debugctx (p, level + 1); + } +} +*/ /** * Extract a string from all prefix-combined regexes. @@ -54,36 +100,119 @@ regex_combine (struct RegexCombineCtx *ctx) char *regex; char *tmp; char *s; + int opt; - if (NULL != ctx->s) - GNUNET_asprintf (®ex, "%s(", ctx->s); - else - regex = GNUNET_strdup ("("); - GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "prefix: %s\n", regex); - + GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "new combine %s\n", ctx->s); + regex = GNUNET_strdup (""); + opt = GNUNET_NO; for (p = ctx->head; NULL != p; p = p->next) { + GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "adding '%s' to innner %s\n", p->s, ctx->s); s = regex_combine (p); - GNUNET_asprintf (&tmp, "%s%s|", regex, s); + GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " total '%s'\n", s); + if (strlen(s) == 0) + { + opt = GNUNET_YES; + } + else + { + GNUNET_asprintf (&tmp, "%s%s|", regex, s); + GNUNET_free_non_null (regex); + regex = tmp; + } GNUNET_free_non_null (s); - GNUNET_free_non_null (regex); - regex = tmp; + GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " so far '%s' for inner %s\n", regex, ctx->s); } + + GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "opt: %d, innner: '%s'\n", opt, regex); len = strlen (regex); - if (1 == len) - return GNUNET_strdup (""); + if (0 == len) + { + GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "empty, returning ''\n"); + GNUNET_free (regex); + return GNUNET_strdup (ctx->s); + } - GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "pre-partial: %s\n", regex); if ('|' == regex[len - 1]) - regex[len - 1] = ')'; - if ('(' == regex[len - 1]) regex[len - 1] = '\0'; + if (NULL != ctx->s) + { + if (opt) + GNUNET_asprintf (&s, "%s(%s)?", ctx->s, regex); + else + GNUNET_asprintf (&s, "%s(%s)", ctx->s, regex); + GNUNET_free (regex); + regex = s; + } + GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "partial: %s\n", regex); return regex; } +/** + * Get the number of matching characters on the prefix of both strings. + * + * @param s1 String 1. + * @param s2 String 2. + * + * @return Number of characters of matching prefix. + */ +static unsigned int +get_prefix_length (const char *s1, const char *s2) +{ + unsigned int l1; + unsigned int l2; + unsigned int limit; + unsigned int i; + + l1 = strlen (s1); + l2 = strlen (s2); + limit = l1 > l2 ? l2 : l1; + + for (i = 1; i <= limit; i++) + { + if (0 != strncmp (s1, s2, i)) + return i - 1; + } + return limit; +} + + +/** + * Return the child context with the longest prefix match with the regex. + * Usually only one child will match, search all just in case. + * + * @param ctx Context whose children to search. + * @param regex String to match. + * + * @return Child with the longest prefix, NULL if no child matches. + */ +static struct RegexCombineCtx * +get_longest_prefix (struct RegexCombineCtx *ctx, const char *regex) +{ + struct RegexCombineCtx *p; + struct RegexCombineCtx *best; + unsigned int l; + unsigned int best_l; + + best_l = 0; + best = NULL; + for (p = ctx->head; NULL != p; p = p->next) + { + l = get_prefix_length (p->s, regex); + if (l > best_l) + { + GNUNET_break (0 == best_l); + best = p; + best_l = l; + } + } + return best; +} + + /** * Add a single regex to a context, combining with exisiting regex by-prefix. * @@ -94,42 +223,55 @@ static void regex_add (struct RegexCombineCtx *ctx, const char *regex) { struct RegexCombineCtx *p; - const char *rest; + struct RegexCombineCtx *newctx; + unsigned int prefix_l; + const char *rest_r; + const char *rest_s; + size_t len; - rest = ®ex[1]; - for (p = ctx->head; NULL != p; p = p->next) + if (0 == strlen (regex)) + return; + + p = get_longest_prefix (ctx, regex); + if (NULL != p) /* There is some prefix match, reduce regex and try again */ { - if (p->s[0] == regex[0]) + prefix_l = get_prefix_length (p->s, regex); + rest_s = &p->s[prefix_l]; + rest_r = ®ex[prefix_l]; + GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "chosen '%s' [%u]\n", p->s, prefix_l); + GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "prefix r '%.*s'\n", prefix_l, p->s); + GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "rest r '%s'\n", rest_r); + GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "rest s '%s'\n", rest_s); + len = strlen (p->s); + if (prefix_l < len) /* only partial match, split existing state */ { - if (1 == strlen(p->s)) - { - GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "common char %s\n", p->s); - GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "adding %s\n", rest); - regex_add (p, rest); - } - else - { - struct RegexCombineCtx *new; - new = GNUNET_malloc (sizeof (struct RegexCombineCtx)); - new->s = GNUNET_strdup (&p->s[1]); - GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " p has now %s\n", p->s); - GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " p will have %.1s\n", p->s); - GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " regex is %s\n", regex); - GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " new has now %s\n", new->s); - GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " rest is now %s\n", rest); - p->s[1] = '\0'; /* dont realloc */ - GNUNET_CONTAINER_DLL_insert (p->head, p->tail, new); - regex_add (p, rest); - } - return; + newctx = GNUNET_malloc (sizeof (struct RegexCombineCtx)); + newctx->head = p->head; + newctx->tail = p->tail; + newctx->s = GNUNET_malloc(len - prefix_l + 1); + strncpy (newctx->s, rest_s, len - prefix_l + 1); + + p->head = newctx; + p->tail = newctx; + p->s[prefix_l] = '\0'; } + regex_add (p, rest_r); + return; + } + /* There is no prefix match, add new */ + if (NULL == ctx->head && NULL != ctx->s) + { + /* this was the end before, add empty string */ + newctx = GNUNET_malloc (sizeof (struct RegexCombineCtx)); + newctx->s = GNUNET_strdup (""); + GNUNET_CONTAINER_DLL_insert (ctx->head, ctx->tail, newctx); } - GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " no match\n"); + GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " no match\n"); GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " new state %s\n", regex); GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " under %s\n", ctx->s); - p = GNUNET_malloc (sizeof (struct RegexCombineCtx)); - p->s = GNUNET_strdup (regex); - GNUNET_CONTAINER_DLL_insert (ctx->head, ctx->tail, p); + newctx = GNUNET_malloc (sizeof (struct RegexCombineCtx)); + newctx->s = GNUNET_strdup (regex); + GNUNET_CONTAINER_DLL_insert (ctx->head, ctx->tail, newctx); } @@ -149,7 +291,7 @@ regex_ctx_destroy (struct RegexCombineCtx *ctx) next = p->next; regex_ctx_destroy (p); } - GNUNET_free (ctx->s); + GNUNET_free_non_null (ctx->s); /* 's' on root node is null */ GNUNET_free (ctx); } @@ -163,7 +305,7 @@ regex_ctx_destroy (struct RegexCombineCtx *ctx) * This function DOES NOT support arbitrary regex combining. */ char * -GNUNET_REGEX_combine (char * const regexes[]) +REGEX_TEST_combine (char * const regexes[]) { unsigned int i; char *combined; @@ -176,8 +318,10 @@ GNUNET_REGEX_combine (char * const regexes[]) current = regexes[i]; GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Regex %u: %s\n", i, current); regex_add (ctx, current); + /* debugctx (ctx, 0); */ } GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "\nCombining...\n"); + /* debugctx (ctx, 0); */ combined = regex_combine (ctx); @@ -189,20 +333,20 @@ GNUNET_REGEX_combine (char * const regexes[]) /** * Read a set of regexes from a file, one per line and return them in an array - * suitable for GNUNET_REGEX_combine. - * The array must be free'd using GNUNET_REGEX_free_from_file. + * suitable for REGEX_TEST_combine. + * The array must be free'd using REGEX_TEST_free_from_file. * * @param filename Name of the file containing the regexes. * * @return A newly allocated, NULL terminated array of regexes. */ char ** -GNUNET_REGEX_read_from_file (const char *filename) +REGEX_TEST_read_from_file (const char *filename) { struct GNUNET_DISK_FileHandle *f; unsigned int nr; unsigned int offset; - off_t size; + OFF_T size; size_t len; char *buffer; char *regex; @@ -221,6 +365,7 @@ GNUNET_REGEX_read_from_file (const char *filename) { GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Can't get size of file %s\n", filename); + GNUNET_DISK_file_close (f); return NULL; } GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, @@ -254,13 +399,16 @@ GNUNET_REGEX_read_from_file (const char *filename) else { len -= 6; - buffer[len] = '\0'; + regex[len] = '\0'; } regex = GNUNET_realloc (regex, len + 1); GNUNET_array_grow (regexes, nr, nr + 1); + GNUNET_assert (NULL == regexes[nr - 2]); regexes[nr - 2] = regex; regexes[nr - 1] = NULL; + regex = NULL; } while (offset < size); + GNUNET_free_non_null (regex); GNUNET_free (buffer); return regexes; @@ -273,7 +421,7 @@ GNUNET_REGEX_read_from_file (const char *filename) * @param regexes NULL-terminated array of regexes. */ void -GNUNET_REGEX_free_from_file (char **regexes) +REGEX_TEST_free_from_file (char **regexes) { unsigned int i; @@ -282,4 +430,4 @@ GNUNET_REGEX_free_from_file (char **regexes) GNUNET_free (regexes); } -/* end of regex_test_lib.c */ \ No newline at end of file +/* end of regex_test_lib.c */