From a3825c187b4f6acb08f0e5a0b5b0590dbe8ea9c8 Mon Sep 17 00:00:00 2001 From: Maximilian Szengel Date: Tue, 27 Mar 2012 14:22:16 +0000 Subject: [PATCH] - dfa construction --- src/include/gnunet_container_lib.h | 18 +- src/include/gnunet_regex_lib.h | 4 +- src/regex/regex.c | 819 ++++++++++++++++++++--------- src/regex/test_regex.c | 8 +- src/util/container_slist.c | 28 +- 5 files changed, 618 insertions(+), 259 deletions(-) diff --git a/src/include/gnunet_container_lib.h b/src/include/gnunet_container_lib.h index 75443b6ae..af64daa82 100644 --- a/src/include/gnunet_container_lib.h +++ b/src/include/gnunet_container_lib.h @@ -1152,12 +1152,28 @@ GNUNET_CONTAINER_slist_clear (struct GNUNET_CONTAINER_SList *l); * @param l list * @param buf payload buffer to find * @param len length of the payload (number of bytes in buf) + * + * @return GNUNET_YES if found, GNUNET_NO otherwise */ int GNUNET_CONTAINER_slist_contains (const struct GNUNET_CONTAINER_SList *l, const void *buf, size_t len); - +/** + * Check if a list contains a certain element using 'compare' function + * + * @param l list + * @param buf payload buffer to find + * @param len length of the payload (number of bytes in buf) + * @param compare comparison function + * + * @return NULL if the 'buf' could not be found, pointer to the + * list element, if found + */ +void * +GNUNET_CONTAINER_slist_contains2 (const struct GNUNET_CONTAINER_SList *l, + const void *buf, size_t len, + int (*compare)(const void *, const size_t, const void *, const size_t)); /** * Count the elements of a list * @param l list diff --git a/src/include/gnunet_regex_lib.h b/src/include/gnunet_regex_lib.h index 604f09cf0..46ca11ba2 100644 --- a/src/include/gnunet_regex_lib.h +++ b/src/include/gnunet_regex_lib.h @@ -52,7 +52,7 @@ struct GNUNET_REGEX_Automaton; * @return NFA.Needs to be freed using GNUNET_REGEX_destroy_automaton */ struct GNUNET_REGEX_Automaton * -GNUNET_REGEX_construct_nfa(const char *regex, size_t len); +GNUNET_REGEX_construct_nfa(const char *regex, const size_t len); /** * Free the memory allocated by constructing the GNUNET_REGEX_Automaton @@ -83,7 +83,7 @@ GNUNET_REGEX_save_nfa_graph(struct GNUNET_REGEX_Automaton *n, * @return DFA. Needs to be freed using GNUNET_REGEX_destroy_automaton */ struct GNUNET_REGEX_Automaton * -GNUNET_REGEX_construct_dfa (const char *regex, size_t len); +GNUNET_REGEX_construct_dfa (const char *regex, const size_t len); #if 0 /* keep Emacsens' auto-indent happy */ { diff --git a/src/regex/regex.c b/src/regex/regex.c index bf3bbdc32..0300d31d6 100644 --- a/src/regex/regex.c +++ b/src/regex/regex.c @@ -23,85 +23,65 @@ * @author Maximilian Szengel */ #include "platform.h" +#include "gnunet_container_lib.h" #include "gnunet_regex_lib.h" #include "regex.h" -struct Stack -{ - void *data; - struct Stack *next; -}; - -static struct Stack *nfa_stack = NULL; - void -push (void *val, struct Stack **stack) +stack_push (struct GNUNET_CONTAINER_SList *stack, const void *buf, size_t len) { - struct Stack *new = GNUNET_malloc (sizeof (struct Stack *)); - - if (NULL == new) - { - GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not push to stack\n"); - return; - } - new->data = val; - new->next = *stack; - *stack = new; + GNUNET_CONTAINER_slist_add (stack, GNUNET_CONTAINER_SLIST_DISPOSITION_STATIC, + buf, len); } int -empty (struct Stack **stack) +stack_empty (struct GNUNET_CONTAINER_SList *stack) { - return (NULL == *stack || NULL == stack); + return 0 == GNUNET_CONTAINER_slist_count (stack); } void * -pop (struct Stack **stack) +stack_pop (struct GNUNET_CONTAINER_SList *stack, size_t length) { - struct Stack *top; + struct GNUNET_CONTAINER_SList_Iterator it; void *val; + size_t len; - if (empty (stack)) - return NULL; + it = GNUNET_CONTAINER_slist_begin (stack); + val = GNUNET_CONTAINER_slist_get (&it, &len); + GNUNET_assert (length == len); + GNUNET_CONTAINER_slist_erase (&it); + GNUNET_CONTAINER_slist_iter_destroy (&it); - top = *stack; - val = top->data; - *stack = top->next; - GNUNET_free (top); return val; } void * -top (struct Stack **stack) +stack_top (struct GNUNET_CONTAINER_SList *stack, size_t *len) { - if (empty (stack)) + struct GNUNET_CONTAINER_SList_Iterator it; + + if (stack_empty (stack)) return NULL; - return (*stack)->data; + return GNUNET_CONTAINER_slist_get (&it, len); } struct State { unsigned int id; int accepting; - unsigned int tcnt; - struct Transition *transitions; int marked; char *name; -}; - -struct StateSet -{ - struct State **states; - unsigned int count; + struct GNUNET_CONTAINER_SList *transitions; + struct GNUNET_CONTAINER_SList *nfa_set; }; struct GNUNET_REGEX_Automaton { struct State *start; struct State *end; - - struct StateSet sset; + struct GNUNET_CONTAINER_SList *states; }; struct Transition @@ -115,40 +95,269 @@ struct GNUNET_REGEX_Context { unsigned int state_id; unsigned int transition_id; + struct GNUNET_CONTAINER_SList *stack; }; +void +GNUNET_REGEX_context_init (struct GNUNET_REGEX_Context *ctx) +{ + if (NULL == ctx) + { + GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Context was NULL!"); + return; + } + ctx->state_id = 0; + ctx->transition_id = 0; + ctx->stack = GNUNET_CONTAINER_slist_create(); +} + +void +GNUNET_REGEX_context_destroy (struct GNUNET_REGEX_Context *ctx) +{ + if (NULL != ctx->stack) + GNUNET_CONTAINER_slist_destroy(ctx->stack); +} + +void +debug_print_state (struct State *s) +{ + GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "State %i: %s marked: %i accepting: %i\n", + s->id, s->name, s->marked, s->accepting); +} + +void +debug_print_states (struct GNUNET_CONTAINER_SList *states) +{ + struct GNUNET_CONTAINER_SList_Iterator it; + struct State *s; + + for (it = GNUNET_CONTAINER_slist_begin (states); + GNUNET_YES != GNUNET_CONTAINER_slist_end (&it); + GNUNET_CONTAINER_slist_next (&it)) + { + s = GNUNET_CONTAINER_slist_get (&it, NULL); + debug_print_state (s); + } + GNUNET_CONTAINER_slist_iter_destroy (&it); +} + +void +debug_print_transitions (struct State *s) +{ + struct GNUNET_CONTAINER_SList_Iterator it; + struct Transition *t; + char *state; + char literal; + + for (it = GNUNET_CONTAINER_slist_begin (s->transitions); + GNUNET_YES != GNUNET_CONTAINER_slist_end (&it); + GNUNET_CONTAINER_slist_next (&it)) + { + t = GNUNET_CONTAINER_slist_get(&it, NULL); + + if (0 == t->literal) + literal = '0'; + else + literal = t->literal; + + if (NULL == t->state) + state = "NULL"; + else + state = t->state->name; + + GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Transition %i: On %c to %s\n", t->id, literal, state); + } + + GNUNET_CONTAINER_slist_iter_destroy (&it); +} + +int +set_compare (const void *buf1, const size_t len1, const void *buf2, const size_t len2) +{ + int c1; + int c2; + struct GNUNET_CONTAINER_SList_Iterator it1; + struct GNUNET_CONTAINER_SList_Iterator it2; + struct State *s1; + struct State *s2; + struct GNUNET_CONTAINER_SList *l1; + struct GNUNET_CONTAINER_SList *l2; + const void *el1; + const void *el2; + size_t length1; + size_t length2; + int rslt; + int contains; + + if (len1 != len2 + && len1 != sizeof (struct State) + && len2 != sizeof (struct State)) + return 1; + + s1 = (struct State *)buf1; + s2 = (struct State *)buf2; + + l1 = s1->nfa_set; + l2 = s2->nfa_set; + + c1 = GNUNET_CONTAINER_slist_count (l1); + c2 = GNUNET_CONTAINER_slist_count (l2); + + if (c1 != c2) + return ((c1 > c2) ? 1 : -1); + + rslt = 0; + + for (it1 = GNUNET_CONTAINER_slist_begin (l1); + GNUNET_YES != GNUNET_CONTAINER_slist_end (&it1); + GNUNET_CONTAINER_slist_next (&it1)) + { + el1 = GNUNET_CONTAINER_slist_get (&it1, &length1); + contains = 0; + + for (it2 = GNUNET_CONTAINER_slist_begin (l2); + GNUNET_YES != GNUNET_CONTAINER_slist_end (&it2); + GNUNET_CONTAINER_slist_next (&it2)) + { + el2 = GNUNET_CONTAINER_slist_get (&it2, &length2); + + if (length1 != length2) + { + GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Comparing lists failed, element size mismatch\n"); + break; + } + if (((struct State*)el1)->id == ((struct State*)el2)->id) + { + contains = 1; + break; + } + } + GNUNET_CONTAINER_slist_iter_destroy (&it2); + + if (0 == contains) + { + rslt = 1; + break; + } + } + GNUNET_CONTAINER_slist_iter_destroy (&it1); + + return rslt; +} + +int +transition_literal_compare (const void *buf1, const size_t len1, const void *buf2, const size_t len2) +{ + struct Transition *t1; + struct Transition *t2; + + if (len1 != len2 + && len1 != sizeof (struct Transition) + && len2 != sizeof (struct Transition)) + { + return 1; + } + + t1 = (struct Transition *)buf1; + t2 = (struct Transition *)buf2; + + if (t1->literal == t2->literal) + return 0; + else if (t1->literal > t2->literal) + return 1; + else + return -1; +} + +void +add_transition (struct GNUNET_REGEX_Context *ctx, struct State *from_state, const char literal, + struct State *to_state) +{ + struct Transition t; + + if (NULL == from_state) + { + GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not create Transition.\n"); + return; + } + + t.id = ctx->transition_id++; + t.literal = literal; + t.state = to_state; + + GNUNET_CONTAINER_slist_add (from_state->transitions, + GNUNET_CONTAINER_SLIST_DISPOSITION_TRANSIENT, + &t, sizeof t); +} + struct State * -dfa_create_state (struct GNUNET_REGEX_Context *ctx, struct StateSet *sset, int accepting) +dfa_create_state (struct GNUNET_REGEX_Context *ctx, struct GNUNET_CONTAINER_SList *states) { - int i; struct State *s; char *name; + struct GNUNET_CONTAINER_SList_Iterator stateit; + struct GNUNET_CONTAINER_SList_Iterator tranit; + int len = 0; + struct State *cstate; + struct Transition *ctran; s = GNUNET_malloc (sizeof (struct State)); s->id = ctx->state_id++; - s->accepting = accepting; - s->tcnt = 0; - s->transitions = NULL; + s->accepting = 0; + s->transitions = GNUNET_CONTAINER_slist_create(); s->marked = 0; s->name = NULL; - if (0 == sset->count) + if (NULL == states) + return s; + + s->nfa_set = states; + + if (0 == GNUNET_CONTAINER_slist_count (states)) return s; - s->name = GNUNET_malloc ( strlen ("{")); + + // Create a name based on 'sset' + s->name = GNUNET_malloc (sizeof (char) * 2); strcat (s->name, "{"); + name = NULL; - for (i=0; icount; i++) + for (stateit = GNUNET_CONTAINER_slist_begin(states); + GNUNET_NO == GNUNET_CONTAINER_slist_end (&stateit); + GNUNET_CONTAINER_slist_next (&stateit)) { - name = GNUNET_malloc (sizeof (char)); - GNUNET_asprintf (&name, "%i,", sset->states[i]->id); - s->name = GNUNET_realloc (s->name, strlen (s->name) + strlen (name) + 1); - strcat (s->name, name); - GNUNET_free (name); + cstate = GNUNET_CONTAINER_slist_get (&stateit, NULL); + GNUNET_CONTAINER_slist_iter_destroy (&tranit); + GNUNET_asprintf (&name, "%i,", cstate->id); + + if (NULL != name) + { + len = strlen (s->name) + strlen (name) + 1; + s->name = GNUNET_realloc (s->name, len); + strcat (s->name, name); + GNUNET_free (name); + name = NULL; + } + + // Add a transition for each distinct literal to NULL state + for (tranit = GNUNET_CONTAINER_slist_begin(cstate->transitions); + GNUNET_NO == GNUNET_CONTAINER_slist_end (&tranit); + GNUNET_CONTAINER_slist_next (&tranit)) + { + ctran = GNUNET_CONTAINER_slist_get(&tranit, NULL); + if (0 != ctran->literal + && NULL == GNUNET_CONTAINER_slist_contains2 (s->transitions, ctran, sizeof *ctran, &transition_literal_compare)) + { + add_transition (ctx, s, ctran->literal, NULL); + } + } + + if (cstate->accepting) + s->accepting = 1; } - s->name[strlen (s->name)-1] = '}'; + GNUNET_CONTAINER_slist_iter_destroy (&stateit); - GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Created DFA state with name: %s\n", s->name); + s->name[strlen (s->name)-1] = '}'; return s; } @@ -157,26 +366,25 @@ struct GNUNET_REGEX_Automaton * nfa_create (struct State *start, struct State *end) { struct GNUNET_REGEX_Automaton *n; - struct StateSet sset; n = GNUNET_malloc (sizeof (struct GNUNET_REGEX_Automaton)); - if (NULL == start && NULL == end) - { - sset.states = NULL; - sset.count = 0; - n->sset = sset; - n->start = NULL; - n->end = NULL; + n->start = NULL; + n->end = NULL; + n->states = GNUNET_CONTAINER_slist_create (); + if (NULL == start && NULL == end) return n; - } - sset.states = GNUNET_malloc ((sizeof (struct State *)) * 2); - sset.states[0] = start; - sset.states[1] = end; - sset.count = 2; - n->sset = sset; + GNUNET_CONTAINER_slist_add (n->states, + GNUNET_CONTAINER_SLIST_DISPOSITION_STATIC, + end, + sizeof *end); + + GNUNET_CONTAINER_slist_add (n->states, + GNUNET_CONTAINER_SLIST_DISPOSITION_STATIC, + start, + sizeof *start); n->start = start; n->end = end; @@ -184,22 +392,28 @@ nfa_create (struct State *start, struct State *end) return n; } - void -nfa_add_states (struct GNUNET_REGEX_Automaton *n, struct StateSet *sset) +nfa_add_states (struct GNUNET_REGEX_Automaton *n, struct GNUNET_CONTAINER_SList *states) { - unsigned int i; - unsigned int j; + // This isn't very pretty. Would be better to use GNUNET_CONTAINER_slist_append, but + // this function adds to the beginning of dst, which currently breaks "pretty" + // printing of the graph... + struct GNUNET_CONTAINER_SList_Iterator i; + struct State *s; + + for (i = GNUNET_CONTAINER_slist_begin (states); + GNUNET_CONTAINER_slist_end (&i) != GNUNET_YES; + GNUNET_CONTAINER_slist_next (&i)) - i = n->sset.count; - GNUNET_array_grow (n->sset.states, n->sset.count, n->sset.count + sset->count); - for (j = 0; i < n->sset.count && j < sset->count; i++, j++) { - n->sset.states[i] = sset->states[j]; + s = GNUNET_CONTAINER_slist_get (&i, NULL); + GNUNET_CONTAINER_slist_add_end (n->states, + GNUNET_CONTAINER_SLIST_DISPOSITION_STATIC, + s, sizeof *s); } + GNUNET_CONTAINER_slist_iter_destroy (&i); } - struct State * nfa_create_state (struct GNUNET_REGEX_Context *ctx, int accepting) { @@ -208,8 +422,7 @@ nfa_create_state (struct GNUNET_REGEX_Context *ctx, int accepting) s = GNUNET_malloc (sizeof (struct State)); s->id = ctx->state_id++; s->accepting = accepting; - s->tcnt = 0; - s->transitions = NULL; + s->transitions = GNUNET_CONTAINER_slist_create(); s->marked = 0; s->name = NULL; GNUNET_asprintf (&s->name, "s%i", s->id); @@ -218,91 +431,79 @@ nfa_create_state (struct GNUNET_REGEX_Context *ctx, int accepting) } void -automaton_destroy_state (struct State *s) +automaton_fragment_clear (struct GNUNET_REGEX_Automaton *a) { - if (s->tcnt > 0) - GNUNET_free (s->transitions); - if (NULL != s->name) - GNUNET_free (s->name); - GNUNET_free (s); + GNUNET_CONTAINER_slist_destroy (a->states); + a->start = NULL; + a->end = NULL; + GNUNET_free (a); } void -nfa_add_transition (struct GNUNET_REGEX_Context *ctx, struct State *from_state, const char literal, - struct State *to_state) +automaton_destroy_state (struct State *s) { - struct Transition t; - - if (NULL == from_state || NULL == to_state) - { - GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not create Transition.\n"); - return; - } - - t.id = ctx->transition_id++; - t.literal = literal; - t.state = to_state; - - if (0 == from_state->tcnt) - from_state->transitions = NULL; - - GNUNET_array_append (from_state->transitions, from_state->tcnt, t); + if (NULL != s->transitions) + GNUNET_CONTAINER_slist_destroy (s->transitions); + if (NULL != s->name) + GNUNET_free (s->name); + if (NULL != s->nfa_set) + GNUNET_CONTAINER_slist_destroy(s->nfa_set); + GNUNET_free (s); } void mark_all_states (struct GNUNET_REGEX_Automaton *n, int marked) { - int i; + struct GNUNET_CONTAINER_SList_Iterator it; + struct State *s; - for (i = 0; i < n->sset.count; i++) + for (it = GNUNET_CONTAINER_slist_begin (n->states); + GNUNET_YES != GNUNET_CONTAINER_slist_end (&it); + GNUNET_CONTAINER_slist_next (&it)) { - n->sset.states[i]->marked = marked; + s = GNUNET_CONTAINER_slist_get (&it, NULL); + s->marked = marked; } + + GNUNET_CONTAINER_slist_iter_destroy (&it); } void nfa_add_concatenation (struct GNUNET_REGEX_Context *ctx) { - struct GNUNET_REGEX_Automaton *A; - struct GNUNET_REGEX_Automaton *B; + struct GNUNET_REGEX_Automaton *a; + struct GNUNET_REGEX_Automaton *b; struct GNUNET_REGEX_Automaton *new; - B = pop (&nfa_stack); - A = pop (&nfa_stack); - - if (NULL == A || NULL == B) - { - GNUNET_log (GNUNET_ERROR_TYPE_ERROR, - "nfa_add_concatenationenation failed, because there were not enough elements on the stack"); - return; - } + b = stack_pop (ctx->stack, sizeof (struct GNUNET_REGEX_Automaton)); + a = stack_pop (ctx->stack, sizeof (struct GNUNET_REGEX_Automaton)); - nfa_add_transition (ctx, A->end, 0, B->start); - A->end->accepting = 0; - B->end->accepting = 1; + add_transition (ctx, a->end, 0, b->start); + a->end->accepting = 0; + b->end->accepting = 1; new = nfa_create (NULL, NULL); - nfa_add_states (new, &A->sset); - nfa_add_states (new, &B->sset); - new->start = A->start; - new->end = B->end; - GNUNET_free (A); - GNUNET_free (B); - - push (new, &nfa_stack); + nfa_add_states (new, a->states); + nfa_add_states (new, b->states); + new->start = a->start; + new->end = b->end; + automaton_fragment_clear (a); + automaton_fragment_clear (b); + + stack_push (ctx->stack, new, sizeof *new); } void nfa_add_star_op (struct GNUNET_REGEX_Context *ctx) { - struct GNUNET_REGEX_Automaton *A; + struct GNUNET_REGEX_Automaton *a; struct GNUNET_REGEX_Automaton *new; struct State *start; struct State *end; - A = pop (&nfa_stack); + a = stack_pop (ctx->stack, sizeof (struct GNUNET_REGEX_Automaton)); - if (NULL == A) + if (NULL == a) { GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "nfa_add_star_op failed, because there was no element on the stack"); @@ -312,78 +513,64 @@ nfa_add_star_op (struct GNUNET_REGEX_Context *ctx) start = nfa_create_state (ctx, 0); end = nfa_create_state (ctx, 1); - nfa_add_transition (ctx, start, 0, A->start); - nfa_add_transition (ctx, start, 0, end); - nfa_add_transition (ctx, A->end, 0, A->start); - nfa_add_transition (ctx, A->end, 0, end); + add_transition (ctx, start, 0, a->start); + add_transition (ctx, start, 0, end); + add_transition (ctx, a->end, 0, a->start); + add_transition (ctx, a->end, 0, end); - A->end->accepting = 0; + a->end->accepting = 0; end->accepting = 1; new = nfa_create (start, end); - nfa_add_states (new, &A->sset); - GNUNET_free (A); + nfa_add_states (new, a->states); + automaton_fragment_clear (a); - push (new, &nfa_stack); + stack_push (ctx->stack, new, sizeof *new); } void nfa_add_plus_op (struct GNUNET_REGEX_Context *ctx) { - struct GNUNET_REGEX_Automaton *A; + struct GNUNET_REGEX_Automaton *a; - A = pop (&nfa_stack); + a = stack_pop (ctx->stack, sizeof (struct GNUNET_REGEX_Automaton)); - if (NULL == A) - { - GNUNET_log (GNUNET_ERROR_TYPE_ERROR, - "nfa_add_plus_op failed, because there was no element on the stack"); - return; - } - - nfa_add_transition (ctx, A->end, 0, A->start); + add_transition (ctx, a->end, 0, a->start); - push (A, &nfa_stack); + stack_push (ctx->stack, a, sizeof *a); } void nfa_add_alternation (struct GNUNET_REGEX_Context *ctx) { - struct GNUNET_REGEX_Automaton *A; - struct GNUNET_REGEX_Automaton *B; + struct GNUNET_REGEX_Automaton *a; + struct GNUNET_REGEX_Automaton *b; struct GNUNET_REGEX_Automaton *new; struct State *start; struct State *end; - B = pop (&nfa_stack); - A = pop (&nfa_stack); - - if (NULL == A || NULL == B) - { - GNUNET_log (GNUNET_ERROR_TYPE_ERROR, - "alternation failed, because there were not enough elements on the stack"); - return; - } + b = stack_pop (ctx->stack, sizeof (struct GNUNET_REGEX_Automaton)); + a = stack_pop (ctx->stack, sizeof (struct GNUNET_REGEX_Automaton)); start = nfa_create_state (ctx, 0); end = nfa_create_state (ctx, 1); - nfa_add_transition (ctx, start, 0, A->start); - nfa_add_transition (ctx, start, 0, B->start); + add_transition (ctx, start, 0, a->start); + add_transition (ctx, start, 0, b->start); - nfa_add_transition (ctx, A->end, 0, end); - nfa_add_transition (ctx, B->end, 0, end); + add_transition (ctx, a->end, 0, end); + add_transition (ctx, b->end, 0, end); - A->end->accepting = 0; - B->end->accepting = 0; + a->end->accepting = 0; + b->end->accepting = 0; end->accepting = 1; new = nfa_create (start, end); - nfa_add_states (new, &A->sset); - nfa_add_states (new, &B->sset); - GNUNET_free (A); - GNUNET_free (B); + nfa_add_states (new, a->states); + nfa_add_states (new, b->states); + automaton_fragment_clear (a); + automaton_fragment_clear (b); - push (new, &nfa_stack); + stack_push (ctx->stack, new, sizeof *new); } void @@ -395,9 +582,9 @@ nfa_add_literal (struct GNUNET_REGEX_Context *ctx, const char lit) start = nfa_create_state (ctx, 0); end = nfa_create_state (ctx, 1); - nfa_add_transition (ctx, start, lit, end); + add_transition (ctx, start, lit, end); n = nfa_create (start, end); - push (n, &nfa_stack); + stack_push (ctx->stack, n, sizeof *n); } /** @@ -410,55 +597,107 @@ nfa_add_literal (struct GNUNET_REGEX_Context *ctx, const char lit) * @return set of states that can be reached from the given 'states' when * using only 'literal' transitions */ -struct StateSet -nfa_closure (struct State **states, unsigned int count, const char literal) +struct GNUNET_CONTAINER_SList * +create_nfa_closure (struct GNUNET_CONTAINER_SList *states, const char literal) { - struct Stack *cls_check; - unsigned int scnt; - unsigned int tcnt; - struct StateSet cls; + struct GNUNET_CONTAINER_SList_Iterator stateit; + struct GNUNET_CONTAINER_SList_Iterator tranit; + struct GNUNET_CONTAINER_SList *cls; + struct GNUNET_CONTAINER_SList *cls_check; struct State *s; struct State *currentstate; + struct Transition *currenttransition; struct State *clsstate; + cls = GNUNET_CONTAINER_slist_create (); - for (scnt=0; scnt < count; scnt++) + for (stateit = GNUNET_CONTAINER_slist_begin (states); + GNUNET_YES != GNUNET_CONTAINER_slist_end (&stateit); + GNUNET_CONTAINER_slist_next (&stateit)) { - s = states[scnt]; - cls_check = NULL; - cls.states = NULL; - cls.count = 0; + s = GNUNET_CONTAINER_slist_get (&stateit, NULL); + cls_check = GNUNET_CONTAINER_slist_create (); + + // Add start state to closure only for epsilon closure + if (0 == literal) + { + GNUNET_CONTAINER_slist_add (cls, + GNUNET_CONTAINER_SLIST_DISPOSITION_STATIC, + s, sizeof *s); + } - // Add start state to closure - GNUNET_array_append (cls.states, cls.count, s); - push (s, &cls_check); + stack_push (cls_check, s, sizeof *s); - while (!empty(&cls_check)) + while (!stack_empty(cls_check)) { - currentstate = pop(&cls_check); + currentstate = stack_pop(cls_check, sizeof (struct State)); - for (tcnt=0; tcnttcnt; tcnt++) + for (tranit = GNUNET_CONTAINER_slist_begin (currentstate->transitions); + GNUNET_CONTAINER_slist_end (&tranit) != GNUNET_YES; + GNUNET_CONTAINER_slist_next (&tranit)) { - if (NULL != currentstate->transitions[tcnt].state - && literal == currentstate->transitions[tcnt].literal) + currenttransition = GNUNET_CONTAINER_slist_get (&tranit, NULL); + + if (NULL != currenttransition->state + && literal == currenttransition->literal) { - clsstate = currentstate->transitions[tcnt].state; + clsstate = currenttransition->state; if (NULL == clsstate) break; - GNUNET_array_append (cls.states, cls.count, clsstate); - push (clsstate, &cls_check); + if (GNUNET_YES != GNUNET_CONTAINER_slist_contains (cls, clsstate, sizeof *clsstate)) + { + GNUNET_CONTAINER_slist_add (cls, + GNUNET_CONTAINER_SLIST_DISPOSITION_STATIC, + clsstate, sizeof *clsstate); + stack_push (cls_check, clsstate, sizeof *clsstate); + } } } + GNUNET_CONTAINER_slist_iter_destroy (&tranit); } + + GNUNET_assert (stack_empty (cls_check)); + GNUNET_CONTAINER_slist_destroy (cls_check); } + GNUNET_CONTAINER_slist_iter_destroy (&stateit); return cls; } +struct GNUNET_CONTAINER_SList * +GNUNET_REGEX_move (struct GNUNET_REGEX_Automaton *a, struct State *s, const char literal) +{ + struct GNUNET_CONTAINER_SList *l; + struct GNUNET_CONTAINER_SList_Iterator it; + struct Transition *ctran; + + if (!GNUNET_CONTAINER_slist_contains (a->states, s, sizeof *s)) + { + GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, + "State %s is not part of the given automaton", s->name); + return NULL; + } + + l = GNUNET_CONTAINER_slist_create (); + + for (it = GNUNET_CONTAINER_slist_begin (s->transitions); + GNUNET_YES != GNUNET_CONTAINER_slist_end (&it); + GNUNET_CONTAINER_slist_next (&it)) + { + ctran = GNUNET_CONTAINER_slist_get (&it, NULL); + if (literal == ctran->literal) + GNUNET_CONTAINER_slist_add (l, GNUNET_CONTAINER_SLIST_DISPOSITION_STATIC, + ctran->state, sizeof *(ctran->state)); + } + GNUNET_CONTAINER_slist_iter_destroy (&it); + + return l; +} + struct GNUNET_REGEX_Automaton * -GNUNET_REGEX_construct_nfa (const char *regex, size_t len) +GNUNET_REGEX_construct_nfa (const char *regex, const size_t len) { struct GNUNET_REGEX_Context ctx; struct GNUNET_REGEX_Automaton *nfa; @@ -473,13 +712,13 @@ GNUNET_REGEX_construct_nfa (const char *regex, size_t len) int atomcount; }*p; + GNUNET_REGEX_context_init (&ctx); + p = NULL; error_msg = NULL; altcount = 0; atomcount = 0; pcount = 0; - ctx.state_id = 0; - ctx.transition_id = 0; for (count = 0; count < len && *regex; count++, regex++) { @@ -573,9 +812,9 @@ GNUNET_REGEX_construct_nfa (const char *regex, size_t len) if (NULL != p) GNUNET_free (p); - nfa = pop (&nfa_stack); + nfa = stack_pop (ctx.stack, sizeof (struct GNUNET_REGEX_Automaton)); - if (!empty (&nfa_stack)) + if (!stack_empty (ctx.stack)) { error_msg = "Creating the NFA failed. NFA stack was not empty!"; goto error; @@ -583,7 +822,10 @@ GNUNET_REGEX_construct_nfa (const char *regex, size_t len) GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Created NFA with %i States and a total of %i Transitions\n", - ctx.state_id, ctx.transition_id); + GNUNET_CONTAINER_slist_count (nfa->states), + ctx.transition_id); + + GNUNET_REGEX_context_destroy (&ctx); return nfa; @@ -592,38 +834,129 @@ error: if (NULL != error_msg) GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "%s\n", error_msg); GNUNET_free (p); - while (!empty (&nfa_stack)) - GNUNET_REGEX_destroy_automaton (pop (&nfa_stack)); + while (!stack_empty (ctx.stack)) + GNUNET_REGEX_destroy_automaton (stack_pop (ctx.stack, sizeof (struct GNUNET_REGEX_Automaton))); + GNUNET_REGEX_context_destroy (&ctx); return NULL; } void GNUNET_REGEX_destroy_automaton (struct GNUNET_REGEX_Automaton *a) { - int i; + struct GNUNET_CONTAINER_SList_Iterator it; if (NULL == a) return; - for (i = 0; i < a->sset.count; i++) + for (it = GNUNET_CONTAINER_slist_begin (a->states); + GNUNET_YES != GNUNET_CONTAINER_slist_end (&it); + GNUNET_CONTAINER_slist_next (&it)) { - automaton_destroy_state (a->sset.states[i]); + automaton_destroy_state (GNUNET_CONTAINER_slist_get (&it, NULL)); } - - if (NULL != a->sset.states) - GNUNET_free (a->sset.states); + GNUNET_CONTAINER_slist_iter_destroy (&it); + GNUNET_CONTAINER_slist_destroy (a->states); GNUNET_free (a); } + +struct GNUNET_REGEX_Automaton * +GNUNET_REGEX_construct_dfa (const char *regex, const size_t len) +{ + struct GNUNET_REGEX_Context ctx; + struct GNUNET_REGEX_Automaton *dfa; + struct GNUNET_REGEX_Automaton *nfa; + struct GNUNET_CONTAINER_SList *tmp; + struct GNUNET_CONTAINER_SList *nfa_set; + struct GNUNET_CONTAINER_SList *sset; + struct GNUNET_CONTAINER_SList *dfa_stack; + struct GNUNET_CONTAINER_SList_Iterator tranit; + struct Transition *currenttransition; + struct State *dfa_state; + struct State *new_dfa_state; + struct State *state_contains; + + GNUNET_REGEX_context_init (&ctx); + + // Create NFA + nfa = GNUNET_REGEX_construct_nfa (regex, len); + + dfa_stack = GNUNET_CONTAINER_slist_create (); + + // Initialize new dfa + dfa = GNUNET_malloc (sizeof (struct GNUNET_REGEX_Automaton)); + dfa->states = GNUNET_CONTAINER_slist_create (); + + // Create DFA start state from epsilon closure + sset = GNUNET_CONTAINER_slist_create (); + GNUNET_CONTAINER_slist_add (sset, GNUNET_CONTAINER_SLIST_DISPOSITION_STATIC, + nfa->start, sizeof *(nfa->start)); + nfa_set = create_nfa_closure (sset, 0); + GNUNET_CONTAINER_slist_destroy (sset); + dfa->start = dfa_create_state (&ctx, nfa_set); + GNUNET_CONTAINER_slist_add (dfa->states, GNUNET_CONTAINER_SLIST_DISPOSITION_STATIC, + dfa->start, sizeof *(dfa->start)); + stack_push (dfa_stack, dfa->start, sizeof *(dfa->start)); + + while (!stack_empty(dfa_stack)) + { + dfa_state = stack_pop (dfa_stack, sizeof (struct State)); + + for (tranit = GNUNET_CONTAINER_slist_begin(dfa_state->transitions); + GNUNET_YES != GNUNET_CONTAINER_slist_end (&tranit); + GNUNET_CONTAINER_slist_next (&tranit)) + { + currenttransition = GNUNET_CONTAINER_slist_get (&tranit, NULL); + + if (0 != currenttransition->literal + && NULL == currenttransition->state) + { + tmp = create_nfa_closure (dfa_state->nfa_set, currenttransition->literal); + nfa_set = create_nfa_closure (tmp, 0); + new_dfa_state = dfa_create_state (&ctx, nfa_set); + GNUNET_CONTAINER_slist_destroy (tmp); + + state_contains = GNUNET_CONTAINER_slist_contains2 (dfa->states, + new_dfa_state, + sizeof *new_dfa_state, + &set_compare); + if (NULL == state_contains) + { + GNUNET_CONTAINER_slist_add_end (dfa->states, GNUNET_CONTAINER_SLIST_DISPOSITION_STATIC, + new_dfa_state, sizeof *new_dfa_state); + stack_push (dfa_stack, new_dfa_state, sizeof *new_dfa_state); + currenttransition->state = new_dfa_state; + } + else + currenttransition->state = state_contains; + } + } + + GNUNET_CONTAINER_slist_iter_destroy (&tranit); + } + GNUNET_CONTAINER_slist_destroy (dfa_stack); + GNUNET_REGEX_destroy_automaton (nfa); + GNUNET_REGEX_context_destroy (&ctx); + + GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, + "Created DFA with %i States\n", + GNUNET_CONTAINER_slist_count (dfa->states)); + + return dfa; +} + void GNUNET_REGEX_save_nfa_graph (struct GNUNET_REGEX_Automaton *n, const char *filename) { + struct GNUNET_CONTAINER_SList_Iterator stateit; + struct GNUNET_CONTAINER_SList_Iterator tranit; struct State *s; + struct Transition *ctran; + char *s_acc = NULL; + char *s_tran = NULL; char *start; char *end; FILE *p; - int scnt; - int tcnt; if (NULL == n) { @@ -649,75 +982,55 @@ GNUNET_REGEX_save_nfa_graph (struct GNUNET_REGEX_Automaton *n, const char *filen start = "digraph G {\nrankdir=LR\n"; fwrite (start, strlen (start), 1, p); - for (scnt = 0; scnt < n->sset.count; scnt++) + for (stateit = GNUNET_CONTAINER_slist_begin (n->states); + GNUNET_YES != GNUNET_CONTAINER_slist_end (&stateit); + GNUNET_CONTAINER_slist_next (&stateit)) { - struct Transition *ctran; - char *s_acc = NULL; - char *s_tran = NULL; - s = n->sset.states[scnt]; + s = GNUNET_CONTAINER_slist_get (&stateit, NULL); if (s->accepting) { - GNUNET_asprintf (&s_acc, "s%i [shape=doublecircle];\n", s->id); + GNUNET_asprintf (&s_acc, "\"%s\" [shape=doublecircle];\n", s->name); fwrite (s_acc, strlen (s_acc), 1, p); GNUNET_free (s_acc); } - ctran = s->transitions; s->marked = 1; - for (tcnt = 0; tcnt < s->tcnt && NULL != ctran; tcnt++) + for (tranit = GNUNET_CONTAINER_slist_begin(s->transitions); + GNUNET_YES != GNUNET_CONTAINER_slist_end (&tranit); + GNUNET_CONTAINER_slist_next (&tranit)) { + ctran = GNUNET_CONTAINER_slist_get (&tranit, NULL); + if (NULL == ctran->state) { GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Transition from State %i has has no state for transitioning\n", s->id); + continue; } if (ctran->literal == 0) { - GNUNET_asprintf (&s_tran, "s%i -> s%i [label = \"epsilon\"];\n", s->id, - ctran->state->id); + GNUNET_asprintf (&s_tran, "\"%s\" -> \"%s\" [label = \"epsilon\"];\n", s->name, + ctran->state->name); } else { - GNUNET_asprintf (&s_tran, "s%i -> s%i [label = \"%c\"];\n", s->id, - ctran->state->id, ctran->literal); + GNUNET_asprintf (&s_tran, "\"%s\" -> \"%s\" [label = \"%c\"];\n", s->name, + ctran->state->name, ctran->literal); } fwrite (s_tran, strlen (s_tran), 1, p); - - ctran++; + GNUNET_free (s_tran); } + GNUNET_CONTAINER_slist_iter_destroy (&tranit); } + GNUNET_CONTAINER_slist_iter_destroy (&stateit); end = "\n}\n"; fwrite (end, strlen (end), 1, p); fclose (p); } - -struct GNUNET_REGEX_Automaton * -GNUNET_REGEX_construct_dfa (const char *regex, size_t len) -{ - struct GNUNET_REGEX_Context ctx; - struct GNUNET_REGEX_Automaton *dfa; - struct GNUNET_REGEX_Automaton *nfa; - struct StateSet dfa_start_set; - struct State *dfa_start; - - ctx.state_id = 0; - ctx.transition_id = 0; - - // Create NFA - nfa = GNUNET_REGEX_construct_nfa (regex, len); - - // Create DFA start state from epsilon closure - dfa_start_set = nfa_closure (&nfa->start, 1, 0); - dfa_start = dfa_create_state (&ctx, &dfa_start_set, 0); - - // ecls (move (dfa_start, lit)) - - return dfa; -} diff --git a/src/regex/test_regex.c b/src/regex/test_regex.c index c27b093b4..18db145ac 100644 --- a/src/regex/test_regex.c +++ b/src/regex/test_regex.c @@ -46,7 +46,9 @@ main (int argc, char *argv[]) dfa = NULL; regex = "a\\*b(c|d)+c*(a(b|c)d)+"; - /*regex = "a(ab)b";*/ + /*regex = "\\*a(a|b)b";*/ + /*regex = "a(a|b)c";*/ + /*regex = "(a|aa)+";*/ nfa = GNUNET_REGEX_construct_nfa (regex, strlen (regex)); if (nfa) @@ -59,7 +61,9 @@ main (int argc, char *argv[]) dfa = GNUNET_REGEX_construct_dfa (regex, strlen (regex)); if (dfa) + { + GNUNET_REGEX_save_nfa_graph (dfa, "dfa_graph.dot"); GNUNET_REGEX_destroy_automaton (dfa); - + } return err; } diff --git a/src/util/container_slist.c b/src/util/container_slist.c index 7b85dc877..950c4245e 100644 --- a/src/util/container_slist.c +++ b/src/util/container_slist.c @@ -250,10 +250,11 @@ GNUNET_CONTAINER_slist_clear (struct GNUNET_CONTAINER_SList *l) /** * Check if a list contains a certain element - * * @param l list * @param buf payload buffer to find * @param len length of the payload (number of bytes in buf) + * + * @return GNUNET_YES if found, GNUNET_NO otherwise */ int GNUNET_CONTAINER_slist_contains (const struct GNUNET_CONTAINER_SList *l, @@ -268,6 +269,31 @@ GNUNET_CONTAINER_slist_contains (const struct GNUNET_CONTAINER_SList *l, } +/** + * Check if a list contains a certain element + * + * @param l list + * @param buf payload buffer to find + * @param len length of the payload (number of bytes in buf) + * @param compare comparison function, should return 0 if compared elements match + * + * @return NULL if the 'buf' could not be found, pointer to the + * list element, if found + */ +void * +GNUNET_CONTAINER_slist_contains2 (const struct GNUNET_CONTAINER_SList *l, + const void *buf, size_t len, + int (*compare)(const void *, const size_t, const void *, const size_t)) +{ + struct GNUNET_CONTAINER_SList_Elem *e; + + for (e = l->head; e != NULL; e = e->next) + if ((e->len == len) && (*compare)(buf, len, e->elem, e->len) == 0) + return e->elem; + return NULL; +} + + /** * Count the elements of a list * @param l list -- 2.25.1