struct State *start;
struct State *end;
+ unsigned int state_count;
struct State *states_head;
struct State *states_tail;
int marked;
char *name;
+ unsigned int transition_count;
struct Transition *transitions_head;
struct Transition *transitions_tail;
*
* @param ctx context
*/
-void
+static void
GNUNET_REGEX_context_init (struct GNUNET_REGEX_Context *ctx)
{
if (NULL == ctx)
ctx->stack_tail = NULL;
}
-void
+static void
debug_print_state (struct State *s)
{
GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
s->marked, s->accepting);
}
-void
+static void
debug_print_states (struct StateSet *sset)
{
struct State *s;
}
}
-void
+static void
debug_print_transitions (struct State *s)
{
struct Transition *t;
}
}
+static int
+state_compare (const void *a, const void *b)
+{
+ struct State **s1;
+ struct State **s2;
+
+ s1 = (struct State **) a;
+ s2 = (struct State **) b;
+
+ return (*s1)->id - (*s2)->id;
+}
+
/**
* Compare to state sets by comparing the id's of the states that are
- * contained in each set.
+ * contained in each set. Both sets are expected to be sorted by id!
*
* @param sset1 first state set
* @param sset2 second state set
*
* @return 0 if they are equal, non 0 otherwise
*/
-int
+static int
state_set_compare (struct StateSet *sset1, struct StateSet *sset2)
{
- struct State *s1;
- struct State *s2;
- int i1;
- int i2;
- int contains;
- int rslt;
-
- if (sset1->len < 1 || sset2->len < 1)
- return -1;
+ int i;
- rslt = 0;
+ if (sset1->len != sset2->len)
+ return 1;
- for (i1 = 0; i1 < sset1->len; i1++)
+ for (i = 0; i < sset1->len; i++)
{
- s1 = sset1->states[i1];
- contains = 0;
- for (i2 = 0; i2 < sset2->len; i2++)
+ if (sset1->states[i]->id != sset2->states[i]->id)
{
- s2 = sset2->states[i2];
- if (s1->id == s2->id)
- {
- contains = 1;
- break;
- }
- }
-
- if (0 == contains)
- {
- rslt = 1;
- break;
+ return 1;
}
}
- return rslt;
+ return 0;
}
/**
*
* @return GNUNET_YES if 'set' contains 'elem, GNUNET_NO otherwise
*/
-int
+static int
state_set_contains (struct StateSet *set, struct State *elem)
{
struct State *s;
*
* @param set set to be cleared
*/
-void
+static void
state_set_clear (struct StateSet *set)
{
if (NULL != set)
* @param literal transition label
* @param to_state state to where the transition should point to
*/
-void
+static void
add_transition (struct GNUNET_REGEX_Context *ctx, struct State *from_state,
const char literal, struct State *to_state)
{
*
* @param a automaton to be cleared
*/
-void
+static void
automaton_fragment_clear (struct GNUNET_REGEX_Automaton *a)
{
a->start = NULL;
*
* @param s state that should be destroyed
*/
-void
+static void
automaton_destroy_state (struct State *s)
{
struct Transition *t;
*
* @return new DFA state
*/
-struct State *
+static struct State *
dfa_state_create (struct GNUNET_REGEX_Context *ctx, struct StateSet *nfa_states)
{
struct State *s;
s->name = NULL;
if (NULL == nfa_states)
+ {
+ GNUNET_asprintf (&s->name, "s%i", s->id);
return s;
+ }
s->nfa_set = nfa_states;
return s;
}
-struct State *
+static struct State *
dfa_move (struct State *s, const char literal)
{
struct Transition *t;
return new_s;
}
+static void
+dfa_remove_unreachable_states (struct GNUNET_REGEX_Automaton *a)
+{
+ /*
+ * struct StateSet *unreachable;
+ * struct State *stack[a->size];
+ * struct State *s;
+ *
+ * unreachable = GNUNET_malloc (sizeof (struct StateSet));
+ *
+ * // 1. add all states to unreachable set
+ * for (s = a->states_head; NULL != s; s = s->next)
+ * {
+ * GNUNET_array_append (unreachable->states, unreachable->len; s);
+ * }
+ *
+ * // 2. traverse dfa from start state and remove every visited state from unreachable set
+ * s = a->start;
+ * // push a->start
+ * while (stack->len > 0)
+ * {
+ * s = stack->states[stack->len - 1];
+ * GNUNET_array_grow (stack->states; stack->len; stack->len-1);
+ * GNUNET_array_
+ * for (t = s->transitions_head; NULL != t; t = t->next)
+ * {
+ *
+ * }
+ * }
+ * // 3. delete all states that are still in the unreachable set
+ */
+}
+
+static void
+dfa_remove_dead_states (struct GNUNET_REGEX_Automaton *a)
+{
+ struct State *s;
+ struct State *s_check;
+ struct Transition *t;
+ struct Transition *t_check;
+ int dead;
+
+ GNUNET_assert (DFA == a->type);
+
+ for (s = a->states_head; NULL != s; s = s->next)
+ {
+ if (s->accepting)
+ continue;
+
+ dead = 1;
+ for (t = s->transitions_head; NULL != t; t = t->next)
+ {
+ if (NULL != t->state && t->state != s)
+ {
+ dead = 0;
+ break;
+ }
+ }
+
+ if (0 == dead)
+ continue;
+
+ // state s is dead, remove it
+ // 1. remove all transitions to this state
+ for (s_check = a->states_head; NULL != s_check; s_check = s_check->next)
+ {
+ for (t_check = s_check->transitions_head; NULL != t_check;
+ t_check = t_check->next)
+ {
+ if (t_check->state == s)
+ {
+ GNUNET_CONTAINER_DLL_remove (s_check->transitions_head,
+ s_check->transitions_tail, t_check);
+ }
+ }
+ }
+ // 2. remove state
+ GNUNET_CONTAINER_DLL_remove (a->states_head, a->states_tail, s);
+ }
+}
+
+static void
+dfa_merge_nondistinguishable_states (struct GNUNET_REGEX_Automaton *a)
+{
+
+}
+
+static void
+dfa_minimize (struct GNUNET_REGEX_Automaton *a)
+{
+ GNUNET_assert (DFA == a->type);
+
+ // 1. remove unreachable states
+ dfa_remove_unreachable_states (a);
+
+ // 2. remove dead states
+ dfa_remove_dead_states (a);
+
+ // 3. Merge nondistinguishable states
+ dfa_merge_nondistinguishable_states (a);
+}
+
/**
* Creates a new NFA fragment. Needs to be cleared using automaton_fragment_clear.
*
*
* @return new NFA fragment
*/
-struct GNUNET_REGEX_Automaton *
+static struct GNUNET_REGEX_Automaton *
nfa_fragment_create (struct State *start, struct State *end)
{
struct GNUNET_REGEX_Automaton *n;
* @param states_head head of the DLL of states
* @param states_tail tail of the DLL of states
*/
-void
+static void
nfa_add_states (struct GNUNET_REGEX_Automaton *n, struct State *states_head,
struct State *states_tail)
{
*
* @return new NFA state
*/
-struct State *
+static struct State *
nfa_state_create (struct GNUNET_REGEX_Context *ctx, int accepting)
{
struct State *s;
return s;
}
+/**
+ * Calculates the NFA closure set for the given state
+ *
+ * @param s starting point state
+ * @param literal transitioning literal on which to base the closure on,
+ * pass 0 for epsilon transition
+ *
+ * @return nfa closure on 'literal' (epsilon closure if 'literal' is 0)
+ */
+static struct StateSet *
+nfa_closure_create (struct State *s, const char literal)
+{
+ struct StateSet *cls;
+ struct StateSet *cls_check;
+ struct State *clsstate;
+ struct State *currentstate;
+ struct Transition *ctran;
+
+ if (NULL == s)
+ return NULL;
+
+ cls = GNUNET_malloc (sizeof (struct StateSet));
+ cls_check = GNUNET_malloc (sizeof (struct StateSet));
+
+ // Add start state to closure only for epsilon closure
+ if (0 == literal)
+ GNUNET_array_append (cls->states, cls->len, s);
+
+ GNUNET_array_append (cls_check->states, cls_check->len, s);
+ while (cls_check->len > 0)
+ {
+ currentstate = cls_check->states[cls_check->len - 1];
+ GNUNET_array_grow (cls_check->states, cls_check->len, cls_check->len - 1);
+
+ for (ctran = currentstate->transitions_head; NULL != ctran;
+ ctran = ctran->next)
+ {
+ if (NULL != ctran->state && literal == ctran->literal)
+ {
+ clsstate = ctran->state;
+
+ if (NULL != clsstate &&
+ GNUNET_YES != state_set_contains (cls, clsstate))
+ {
+ GNUNET_array_append (cls->states, cls->len, clsstate);
+ GNUNET_array_append (cls_check->states, cls_check->len, clsstate);
+ }
+ }
+ }
+ }
+ GNUNET_assert (0 == cls_check->len);
+ GNUNET_free (cls_check);
+
+ if (cls->len > 1)
+ qsort (cls->states, cls->len, sizeof (struct State *), state_compare);
+
+ return cls;
+}
+
+/**
+ * Calculates the closure set for the given set of states.
+ *
+ * @param states list of states on which to base the closure on
+ * @param literal transitioning literal for which to base the closure on,
+ * pass 0 for epsilon transition
+ *
+ * @return nfa closure on 'literal' (epsilon closure if 'literal' is 0)
+ */
+static struct StateSet *
+nfa_closure_set_create (struct StateSet *states, const char literal)
+{
+ struct State *s;
+ struct StateSet *sset;
+ struct StateSet *cls;
+ int i;
+ int j;
+ int k;
+ int contains;
+
+ if (NULL == states)
+ return NULL;
+
+ cls = GNUNET_malloc (sizeof (struct StateSet));
+
+ for (i = 0; i < states->len; i++)
+ {
+ s = states->states[i];
+ sset = nfa_closure_create (s, literal);
+
+ for (j = 0; j < sset->len; j++)
+ {
+ contains = 0;
+ for (k = 0; k < cls->len; k++)
+ {
+ if (sset->states[j]->id == cls->states[k]->id)
+ contains = 1;
+ }
+ if (!contains)
+ GNUNET_array_append (cls->states, cls->len, sset->states[j]);
+ }
+ state_set_clear (sset);
+ }
+
+ if (cls->len > 1)
+ qsort (cls->states, cls->len, sizeof (struct State *), state_compare);
+
+ return cls;
+}
+
/**
* Pops two NFA fragments (a, b) from the stack and concatenates them (ab)
*
* @param ctx context
*/
-void
+static void
nfa_add_concatenation (struct GNUNET_REGEX_Context *ctx)
{
struct GNUNET_REGEX_Automaton *a;
*
* @param ctx context
*/
-void
+static void
nfa_add_star_op (struct GNUNET_REGEX_Context *ctx)
{
struct GNUNET_REGEX_Automaton *a;
*
* @param ctx context
*/
-void
+static void
nfa_add_plus_op (struct GNUNET_REGEX_Context *ctx)
{
struct GNUNET_REGEX_Automaton *a;
*
* @param ctx context
*/
-void
+static void
nfa_add_alternation (struct GNUNET_REGEX_Context *ctx)
{
struct GNUNET_REGEX_Automaton *a;
* @param ctx context
* @param lit literal for nfa transition
*/
-void
+static void
nfa_add_literal (struct GNUNET_REGEX_Context *ctx, const char lit)
{
struct GNUNET_REGEX_Automaton *n;
GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, n);
}
-/**
- * Calculates the NFA closure set for the given state
- *
- * @param s starting point state
- * @param literal transitioning literal on which to base the closure on,
- * pass 0 for epsilon transition
- *
- * @return nfa closure on 'literal' (epsilon closure if 'literal' is 0)
- */
-struct StateSet *
-nfa_closure_create (struct State *s, const char literal)
-{
- struct StateSet *cls;
- struct StateSet *cls_check;
- struct State *clsstate;
- struct State *currentstate;
- struct Transition *ctran;
-
- if (NULL == s)
- return NULL;
-
- cls = GNUNET_malloc (sizeof (struct StateSet));
- cls_check = GNUNET_malloc (sizeof (struct StateSet));
-
- // Add start state to closure only for epsilon closure
- if (0 == literal)
- GNUNET_array_append (cls->states, cls->len, s);
-
- GNUNET_array_append (cls_check->states, cls_check->len, s);
- while (cls_check->len > 0)
- {
- currentstate = cls_check->states[cls_check->len - 1];
- GNUNET_array_grow (cls_check->states, cls_check->len, cls_check->len - 1);
-
- for (ctran = currentstate->transitions_head; NULL != ctran;
- ctran = ctran->next)
- {
- if (NULL != ctran->state && literal == ctran->literal)
- {
- clsstate = ctran->state;
-
- if (NULL != clsstate &&
- GNUNET_YES != state_set_contains (cls, clsstate))
- {
- GNUNET_array_append (cls->states, cls->len, clsstate);
- GNUNET_array_append (cls_check->states, cls_check->len, clsstate);
- }
- }
- }
- }
- GNUNET_assert (0 == cls_check->len);
- GNUNET_free (cls_check);
-
- return cls;
-}
-
-/**
- * Calculates the closure set for the given set of states.
- *
- * @param states list of states on which to base the closure on
- * @param literal transitioning literal for which to base the closure on,
- * pass 0 for epsilon transition
- *
- * @return nfa closure on 'literal' (epsilon closure if 'literal' is 0)
- */
-struct StateSet *
-nfa_closure_set_create (struct StateSet *states, const char literal)
-{
- struct State *s;
- struct StateSet *sset;
- struct StateSet *cls;
- int i;
- int j;
-
- if (NULL == states)
- return NULL;
-
- cls = GNUNET_malloc (sizeof (struct StateSet));
-
- for (i = 0; i < states->len; i++)
- {
- s = states->states[i];
- sset = nfa_closure_create (s, literal);
-
- for (j = 0; j < sset->len; j++)
- GNUNET_array_append (cls->states, cls->len, sset->states[j]);
-
- state_set_clear (sset);
- }
-
- return cls;
-}
-
/**
* Construct an NFA by parsing the regex string of length 'len'.
*
GNUNET_free (dfa_stack);
GNUNET_REGEX_automaton_destroy (nfa);
+ dfa_minimize (dfa);
+
GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Created DFA with %i States\n",
ctx.state_id);
*
* @return GNUNET_YES if string matches, GNUNET_NO if not, GNUNET_SYSERR otherwise
*/
-int
+static int
evaluate_dfa (struct GNUNET_REGEX_Automaton *a, const char *string)
{
const char *strp;
if (DFA != a->type)
{
GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
- "Tried to evaluate NFA, but DFA automaton given");
+ "Tried to evaluate DFA, but NFA automaton given");
return GNUNET_SYSERR;
}
*
* @return GNUNET_YES if string matches, GNUNET_NO if not, GNUNET_SYSERR otherwise
*/
-int
+static int
evaluate_nfa (struct GNUNET_REGEX_Automaton *a, const char *string)
{
const char *strp;
struct StateSet *sset;
struct StateSet *new_sset;
int i;
- int eval;
+ int result;
if (NFA != a->type)
{
return GNUNET_SYSERR;
}
- eval = GNUNET_NO;
+ result = GNUNET_NO;
strp = string;
sset = GNUNET_malloc (sizeof (struct StateSet));
GNUNET_array_append (sset->states, sset->len, a->start);
s = sset->states[i];
if (NULL != s && s->accepting)
{
- eval = GNUNET_YES;
+ result = GNUNET_YES;
break;
}
}
state_set_clear (sset);
- return eval;
+ return result;
}
int
GNUNET_REGEX_eval (struct GNUNET_REGEX_Automaton *a, const char *string)
{
- int eval;
+ int result;
switch (a->type)
{
case DFA:
- eval = evaluate_dfa (a, string);
+ result = evaluate_dfa (a, string);
break;
case NFA:
- eval = evaluate_nfa (a, string);
+ result = evaluate_nfa (a, string);
break;
default:
GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
"Evaluating regex failed, automaton has no type!\n");
- eval = GNUNET_SYSERR;
+ result = GNUNET_SYSERR;
break;
}
- return eval;
+ return result;
}