2 This file is part of GNUnet
3 (C) 2012 Christian Grothoff (and other contributing authors)
5 GNUnet is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 3, or (at your
8 option) any later version.
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with GNUnet; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
21 * @file src/regex/regex.c
22 * @brief library to create automatons from regular expressions
23 * @author Maximilian Szengel
26 #include "gnunet_container_lib.h"
27 #include "gnunet_regex_lib.h"
31 * Context that contains an id counter for states and transitions
32 * as well as a DLL of automatons used as a stack for NFA construction.
34 struct GNUNET_REGEX_Context
36 unsigned int state_id;
37 unsigned int transition_id;
40 * DLL of GNUNET_REGEX_Automaton's used as a stack
42 struct GNUNET_REGEX_Automaton *stack_head;
43 struct GNUNET_REGEX_Automaton *stack_tail;
46 enum GNUNET_REGEX_automaton_type
53 * Automaton representation
55 struct GNUNET_REGEX_Automaton
57 struct GNUNET_REGEX_Automaton *prev;
58 struct GNUNET_REGEX_Automaton *next;
63 unsigned int state_count;
64 struct State *states_head;
65 struct State *states_tail;
67 enum GNUNET_REGEX_automaton_type type;
71 * A state. Can be used in DFA and NFA automatons.
83 unsigned int transition_count;
84 struct Transition *transitions_head;
85 struct Transition *transitions_tail;
87 struct StateSet *nfa_set;
91 * Transition between two states. Each state can have 0-n transitions.
92 * If literal is 0, this is considered to be an epsilon transition.
96 struct Transition *prev;
97 struct Transition *next;
112 struct State **states;
117 * Initialize a new context
122 GNUNET_REGEX_context_init (struct GNUNET_REGEX_Context *ctx)
126 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Context was NULL!");
130 ctx->transition_id = 0;
131 ctx->stack_head = NULL;
132 ctx->stack_tail = NULL;
136 debug_print_state (struct State *s)
138 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
139 "State %i: %s marked: %i accepting: %i\n", s->id, s->name,
140 s->marked, s->accepting);
144 debug_print_states (struct StateSet *sset)
149 for (i = 0; i < sset->len; i++)
152 debug_print_state (s);
157 debug_print_transitions (struct State *s)
159 struct Transition *t;
163 for (t = s->transitions_head; NULL != t; t = t->next)
168 literal = t->literal;
170 if (NULL == t->state)
173 state = t->state->name;
175 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Transition %i: On %c to %s\n", t->id,
181 state_compare (const void *a, const void *b)
186 s1 = (struct State **) a;
187 s2 = (struct State **) b;
189 return (*s1)->id - (*s2)->id;
193 * Compare to state sets by comparing the id's of the states that are
194 * contained in each set. Both sets are expected to be sorted by id!
196 * @param sset1 first state set
197 * @param sset2 second state set
199 * @return 0 if they are equal, non 0 otherwise
202 state_set_compare (struct StateSet *sset1, struct StateSet *sset2)
206 if (sset1->len != sset2->len)
209 for (i = 0; i < sset1->len; i++)
211 if (sset1->states[i]->id != sset2->states[i]->id)
220 * Checks if 'elem' is contained in 'set'
222 * @param set set of states
225 * @return GNUNET_YES if 'set' contains 'elem, GNUNET_NO otherwise
228 state_set_contains (struct StateSet *set, struct State *elem)
233 for (i = 0; i < set->len; i++)
236 if (0 == memcmp (s, elem, sizeof (struct State)))
243 * Clears the given StateSet 'set'
245 * @param set set to be cleared
248 state_set_clear (struct StateSet *set)
252 if (NULL != set->states)
253 GNUNET_free (set->states);
259 * Adds a transition from one state to another on 'literal'
262 * @param from_state starting state for the transition
263 * @param literal transition label
264 * @param to_state state to where the transition should point to
267 add_transition (struct GNUNET_REGEX_Context *ctx, struct State *from_state,
268 const char literal, struct State *to_state)
270 struct Transition *t;
272 if (NULL == from_state)
274 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not create Transition.\n");
278 t = GNUNET_malloc (sizeof (struct Transition));
280 t->id = ctx->transition_id++;
281 t->literal = literal;
284 GNUNET_CONTAINER_DLL_insert (from_state->transitions_head,
285 from_state->transitions_tail, t);
289 * Clears an automaton fragment. Does not destroy the states inside
292 * @param a automaton to be cleared
295 automaton_fragment_clear (struct GNUNET_REGEX_Automaton *a)
299 a->states_head = NULL;
300 a->states_tail = NULL;
306 * Frees the memory used by State 's'
308 * @param s state that should be destroyed
311 automaton_destroy_state (struct State *s)
313 struct Transition *t;
314 struct Transition *next_t;
317 GNUNET_free (s->name);
319 for (t = s->transitions_head; NULL != t;)
322 GNUNET_CONTAINER_DLL_remove (s->transitions_head, s->transitions_tail, t);
327 state_set_clear (s->nfa_set);
333 automaton_remove_state (struct GNUNET_REGEX_Automaton *a, struct State *s)
337 GNUNET_CONTAINER_DLL_remove (a->states_head, a->states_tail, s);
339 automaton_destroy_state (ss);
343 automaton_add_state (struct GNUNET_REGEX_Automaton *a, struct State *s)
345 GNUNET_CONTAINER_DLL_insert (a->states_head, a->states_tail, s);
350 * Creates a new DFA state based on a set of NFA states. Needs to be freed
351 * using automaton_destroy_state.
354 * @param nfa_states set of NFA states on which the DFA should be based on
356 * @return new DFA state
358 static struct State *
359 dfa_state_create (struct GNUNET_REGEX_Context *ctx, struct StateSet *nfa_states)
364 struct State *cstate;
365 struct Transition *ctran;
367 struct Transition *t;
370 s = GNUNET_malloc (sizeof (struct State));
371 s->id = ctx->state_id++;
376 if (NULL == nfa_states)
378 GNUNET_asprintf (&s->name, "s%i", s->id);
382 s->nfa_set = nfa_states;
384 if (nfa_states->len < 1)
387 // Create a name based on 'sset'
388 s->name = GNUNET_malloc (sizeof (char) * 2);
389 strcat (s->name, "{");
392 for (i = 0; i < nfa_states->len; i++)
394 cstate = nfa_states->states[i];
395 GNUNET_asprintf (&name, "%i,", cstate->id);
399 len = strlen (s->name) + strlen (name) + 1;
400 s->name = GNUNET_realloc (s->name, len);
401 strcat (s->name, name);
406 // Add a transition for each distinct literal to NULL state
407 for (ctran = cstate->transitions_head; NULL != ctran; ctran = ctran->next)
409 if (0 != ctran->literal)
413 for (t = s->transitions_head; NULL != t; t = t->next)
415 if (t->literal == ctran->literal)
423 add_transition (ctx, s, ctran->literal, NULL);
427 // If the nfa_states contain an accepting state, the new dfa state is also accepting
428 if (cstate->accepting)
432 s->name[strlen (s->name) - 1] = '}';
438 * Move from the given state 's' to the next state on
439 * transition 'literal'
441 * @param s starting state
442 * @param literal edge label to follow
444 * @return new state or NULL, if transition on literal not possible
446 static struct State *
447 dfa_move (struct State *s, const char literal)
449 struct Transition *t;
457 for (t = s->transitions_head; NULL != t; t = t->next)
459 if (literal == t->literal)
470 * Remove all unreachable states from DFA 'a'. Unreachable states
471 * are those states that are not reachable from the starting state.
473 * @param a DFA automaton
476 dfa_remove_unreachable_states (struct GNUNET_REGEX_Automaton *a)
478 struct State *stack[a->state_count];
481 struct Transition *t;
485 // 1. unmark all states
486 for (s = a->states_head; NULL != s; s = s->next)
491 // 2. traverse dfa from start state and mark all visited states
492 stack[stack_len] = a->start;
494 while (stack_len > 0)
496 s = stack[stack_len-1];
498 s->marked = 1; // mark s as visited
499 for (t = s->transitions_head; NULL != t; t = t->next)
501 if (NULL != t->state && 0 == t->state->marked)
503 // add next states to stack
504 stack[stack_len] = t->state;
510 // 3. delete all states that were not visited
511 for (s = a->states_head; NULL != s; s = s->next)
514 automaton_remove_state (a, s);
519 * Remove all dead states from the DFA 'a'. Dead states are those
520 * states that do not transition to any other state but themselfes.
522 * @param a DFA automaton
525 dfa_remove_dead_states (struct GNUNET_REGEX_Automaton *a)
528 struct State *s_check;
529 struct Transition *t;
530 struct Transition *t_check;
533 GNUNET_assert (DFA == a->type);
535 for (s = a->states_head; NULL != s; s = s->next)
541 for (t = s->transitions_head; NULL != t; t = t->next)
543 if (NULL != t->state && t->state != s)
553 // state s is dead, remove it
554 // 1. remove all transitions to this state
555 for (s_check = a->states_head; NULL != s_check; s_check = s_check->next)
557 for (t_check = s_check->transitions_head; NULL != t_check;
558 t_check = t_check->next)
560 if (t_check->state == s)
562 GNUNET_CONTAINER_DLL_remove (s_check->transitions_head,
563 s_check->transitions_tail, t_check);
568 automaton_remove_state (a, s);
573 * Merge all non distinguishable states in the DFA 'a'
575 * @param a DFA automaton
578 dfa_merge_nondistinguishable_states (struct GNUNET_REGEX_Automaton *a)
584 * Minimize the given DFA 'a' by removing all unreachable states,
585 * removing all dead states and merging all non distinguishable states
587 * @param a DFA automaton
590 dfa_minimize (struct GNUNET_REGEX_Automaton *a)
595 GNUNET_assert (DFA == a->type);
597 // 1. remove unreachable states
598 dfa_remove_unreachable_states (a);
600 // 2. remove dead states
601 dfa_remove_dead_states (a);
603 // 3. Merge nondistinguishable states
604 dfa_merge_nondistinguishable_states (a);
608 * Creates a new NFA fragment. Needs to be cleared using automaton_fragment_clear.
610 * @param start starting state
611 * @param end end state
613 * @return new NFA fragment
615 static struct GNUNET_REGEX_Automaton *
616 nfa_fragment_create (struct State *start, struct State *end)
618 struct GNUNET_REGEX_Automaton *n;
620 n = GNUNET_malloc (sizeof (struct GNUNET_REGEX_Automaton));
626 if (NULL == start && NULL == end)
629 automaton_add_state (n, end);
630 automaton_add_state (n, start);
639 * Adds a list of states to the given automaton 'n'.
641 * @param n automaton to which the states should be added
642 * @param states_head head of the DLL of states
643 * @param states_tail tail of the DLL of states
646 nfa_add_states (struct GNUNET_REGEX_Automaton *n, struct State *states_head,
647 struct State *states_tail)
651 if (NULL == n || NULL == states_head)
653 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not add states\n");
657 if (NULL == n->states_head)
659 n->states_head = states_head;
660 n->states_tail = states_tail;
664 if (NULL != states_head)
666 n->states_tail->next = states_head;
667 n->states_tail = states_tail;
670 for (s = states_head; NULL != s; s = s->next)
675 * Creates a new NFA state. Needs to be freed using automaton_destroy_state.
678 * @param accepting is it an accepting state or not
680 * @return new NFA state
682 static struct State *
683 nfa_state_create (struct GNUNET_REGEX_Context *ctx, int accepting)
687 s = GNUNET_malloc (sizeof (struct State));
688 s->id = ctx->state_id++;
689 s->accepting = accepting;
692 GNUNET_asprintf (&s->name, "s%i", s->id);
698 * Calculates the NFA closure set for the given state
700 * @param s starting point state
701 * @param literal transitioning literal on which to base the closure on,
702 * pass 0 for epsilon transition
704 * @return nfa closure on 'literal' (epsilon closure if 'literal' is 0)
706 static struct StateSet *
707 nfa_closure_create (struct State *s, const char literal)
709 struct StateSet *cls;
710 struct StateSet *cls_check;
711 struct State *clsstate;
712 struct State *currentstate;
713 struct Transition *ctran;
718 cls = GNUNET_malloc (sizeof (struct StateSet));
719 cls_check = GNUNET_malloc (sizeof (struct StateSet));
721 // Add start state to closure only for epsilon closure
723 GNUNET_array_append (cls->states, cls->len, s);
725 GNUNET_array_append (cls_check->states, cls_check->len, s);
726 while (cls_check->len > 0)
728 currentstate = cls_check->states[cls_check->len - 1];
729 GNUNET_array_grow (cls_check->states, cls_check->len, cls_check->len - 1);
731 for (ctran = currentstate->transitions_head; NULL != ctran;
734 if (NULL != ctran->state && literal == ctran->literal)
736 clsstate = ctran->state;
738 if (NULL != clsstate &&
739 GNUNET_YES != state_set_contains (cls, clsstate))
741 GNUNET_array_append (cls->states, cls->len, clsstate);
742 GNUNET_array_append (cls_check->states, cls_check->len, clsstate);
747 GNUNET_assert (0 == cls_check->len);
748 GNUNET_free (cls_check);
751 qsort (cls->states, cls->len, sizeof (struct State *), state_compare);
757 * Calculates the closure set for the given set of states.
759 * @param states list of states on which to base the closure on
760 * @param literal transitioning literal for which to base the closure on,
761 * pass 0 for epsilon transition
763 * @return nfa closure on 'literal' (epsilon closure if 'literal' is 0)
765 static struct StateSet *
766 nfa_closure_set_create (struct StateSet *states, const char literal)
769 struct StateSet *sset;
770 struct StateSet *cls;
779 cls = GNUNET_malloc (sizeof (struct StateSet));
781 for (i = 0; i < states->len; i++)
783 s = states->states[i];
784 sset = nfa_closure_create (s, literal);
786 for (j = 0; j < sset->len; j++)
789 for (k = 0; k < cls->len; k++)
791 if (sset->states[j]->id == cls->states[k]->id)
798 GNUNET_array_append (cls->states, cls->len, sset->states[j]);
800 state_set_clear (sset);
804 qsort (cls->states, cls->len, sizeof (struct State *), state_compare);
810 * Pops two NFA fragments (a, b) from the stack and concatenates them (ab)
815 nfa_add_concatenation (struct GNUNET_REGEX_Context *ctx)
817 struct GNUNET_REGEX_Automaton *a;
818 struct GNUNET_REGEX_Automaton *b;
819 struct GNUNET_REGEX_Automaton *new;
822 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, b);
824 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
826 add_transition (ctx, a->end, 0, b->start);
827 a->end->accepting = 0;
828 b->end->accepting = 1;
830 new = nfa_fragment_create (NULL, NULL);
831 nfa_add_states (new, a->states_head, a->states_tail);
832 nfa_add_states (new, b->states_head, b->states_tail);
833 new->start = a->start;
835 automaton_fragment_clear (a);
836 automaton_fragment_clear (b);
838 GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new);
842 * Pops a NFA fragment from the stack (a) and adds a new fragment (a*)
847 nfa_add_star_op (struct GNUNET_REGEX_Context *ctx)
849 struct GNUNET_REGEX_Automaton *a;
850 struct GNUNET_REGEX_Automaton *new;
855 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
859 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
860 "nfa_add_star_op failed, because there was no element on the stack");
864 start = nfa_state_create (ctx, 0);
865 end = nfa_state_create (ctx, 1);
867 add_transition (ctx, start, 0, a->start);
868 add_transition (ctx, start, 0, end);
869 add_transition (ctx, a->end, 0, a->start);
870 add_transition (ctx, a->end, 0, end);
872 a->end->accepting = 0;
875 new = nfa_fragment_create (start, end);
876 nfa_add_states (new, a->states_head, a->states_tail);
877 automaton_fragment_clear (a);
879 GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new);
883 * Pops an NFA fragment (a) from the stack and adds a new fragment (a+)
888 nfa_add_plus_op (struct GNUNET_REGEX_Context *ctx)
890 struct GNUNET_REGEX_Automaton *a;
893 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
895 add_transition (ctx, a->end, 0, a->start);
897 GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, a);
901 * Pops two NFA fragments (a, b) from the stack and adds a new NFA fragment
902 * that alternates between a and b (a|b)
907 nfa_add_alternation (struct GNUNET_REGEX_Context *ctx)
909 struct GNUNET_REGEX_Automaton *a;
910 struct GNUNET_REGEX_Automaton *b;
911 struct GNUNET_REGEX_Automaton *new;
916 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, b);
918 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
920 start = nfa_state_create (ctx, 0);
921 end = nfa_state_create (ctx, 1);
922 add_transition (ctx, start, 0, a->start);
923 add_transition (ctx, start, 0, b->start);
925 add_transition (ctx, a->end, 0, end);
926 add_transition (ctx, b->end, 0, end);
928 a->end->accepting = 0;
929 b->end->accepting = 0;
932 new = nfa_fragment_create (start, end);
933 nfa_add_states (new, a->states_head, a->states_tail);
934 nfa_add_states (new, b->states_head, b->states_tail);
935 automaton_fragment_clear (a);
936 automaton_fragment_clear (b);
938 GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new);
942 * Adds a new nfa fragment to the stack
945 * @param lit literal for nfa transition
948 nfa_add_literal (struct GNUNET_REGEX_Context *ctx, const char lit)
950 struct GNUNET_REGEX_Automaton *n;
954 GNUNET_assert (NULL != ctx);
956 start = nfa_state_create (ctx, 0);
957 end = nfa_state_create (ctx, 1);
958 add_transition (ctx, start, lit, end);
959 n = nfa_fragment_create (start, end);
960 GNUNET_assert (NULL != n);
961 GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, n);
965 * Construct an NFA by parsing the regex string of length 'len'.
967 * @param regex regular expression string
968 * @param len length of the string
970 * @return NFA, needs to be freed using GNUNET_REGEX_destroy_automaton
972 struct GNUNET_REGEX_Automaton *
973 GNUNET_REGEX_construct_nfa (const char *regex, const size_t len)
975 struct GNUNET_REGEX_Context ctx;
976 struct GNUNET_REGEX_Automaton *nfa;
980 unsigned int altcount;
981 unsigned int atomcount;
989 GNUNET_REGEX_context_init (&ctx);
998 for (count = 0; count < len && *regexp; count++, regexp++)
1006 nfa_add_concatenation (&ctx);
1008 GNUNET_array_grow (p, pcount, pcount + 1);
1009 p[pcount - 1].altcount = altcount;
1010 p[pcount - 1].atomcount = atomcount;
1017 error_msg = "Cannot append '|' to nothing";
1020 while (--atomcount > 0)
1021 nfa_add_concatenation (&ctx);
1027 error_msg = "Missing opening '('";
1032 // Ignore this: "()"
1034 altcount = p[pcount].altcount;
1035 atomcount = p[pcount].atomcount;
1038 while (--atomcount > 0)
1039 nfa_add_concatenation (&ctx);
1040 for (; altcount > 0; altcount--)
1041 nfa_add_alternation (&ctx);
1043 altcount = p[pcount].altcount;
1044 atomcount = p[pcount].atomcount;
1050 error_msg = "Cannot append '+' to nothing";
1053 nfa_add_star_op (&ctx);
1058 error_msg = "Cannot append '+' to nothing";
1061 nfa_add_plus_op (&ctx);
1063 case 92: /* escape: \ */
1070 nfa_add_concatenation (&ctx);
1072 nfa_add_literal (&ctx, *regexp);
1079 error_msg = "Unbalanced parenthesis";
1082 while (--atomcount > 0)
1083 nfa_add_concatenation (&ctx);
1084 for (; altcount > 0; altcount--)
1085 nfa_add_alternation (&ctx);
1090 nfa = ctx.stack_tail;
1091 GNUNET_CONTAINER_DLL_remove (ctx.stack_head, ctx.stack_tail, nfa);
1094 if (NULL != ctx.stack_head)
1096 error_msg = "Creating the NFA failed. NFA stack was not empty!";
1103 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not parse regex\n");
1104 if (NULL != error_msg)
1105 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "%s\n", error_msg);
1108 while (NULL != ctx.stack_tail)
1110 GNUNET_REGEX_automaton_destroy (ctx.stack_tail);
1111 GNUNET_CONTAINER_DLL_remove (ctx.stack_head, ctx.stack_tail,
1118 * Free the memory allocated by constructing the GNUNET_REGEX_Automaton
1121 * @param a automaton to be destroyed
1124 GNUNET_REGEX_automaton_destroy (struct GNUNET_REGEX_Automaton *a)
1127 struct State *next_state;
1132 for (s = a->states_head; NULL != s;)
1134 next_state = s->next;
1135 automaton_destroy_state (s);
1143 * Construct DFA for the given 'regex' of length 'len'
1145 * @param regex regular expression string
1146 * @param len length of the regular expression
1148 * @return DFA, needs to be freed using GNUNET_REGEX_destroy_automaton
1150 struct GNUNET_REGEX_Automaton *
1151 GNUNET_REGEX_construct_dfa (const char *regex, const size_t len)
1153 struct GNUNET_REGEX_Context ctx;
1154 struct GNUNET_REGEX_Automaton *dfa;
1155 struct GNUNET_REGEX_Automaton *nfa;
1156 struct StateSet *tmp;
1157 struct StateSet *nfa_set;
1158 struct StateSet *dfa_stack;
1159 struct Transition *ctran;
1160 struct State *dfa_state;
1161 struct State *new_dfa_state;
1162 struct State *state_contains;
1163 struct State *state_iter;
1165 GNUNET_REGEX_context_init (&ctx);
1168 nfa = GNUNET_REGEX_construct_nfa (regex, len);
1172 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
1173 "Could not create DFA, because NFA creation failed\n");
1177 dfa = GNUNET_malloc (sizeof (struct GNUNET_REGEX_Automaton));
1180 // Create DFA start state from epsilon closure
1181 dfa_stack = GNUNET_malloc (sizeof (struct StateSet));
1182 nfa_set = nfa_closure_create (nfa->start, 0);
1183 dfa->start = dfa_state_create (&ctx, nfa_set);
1184 automaton_add_state (dfa, dfa->start);
1185 GNUNET_array_append (dfa_stack->states, dfa_stack->len, dfa->start);
1186 while (dfa_stack->len > 0)
1188 dfa_state = dfa_stack->states[dfa_stack->len - 1];
1189 GNUNET_array_grow (dfa_stack->states, dfa_stack->len, dfa_stack->len - 1);
1191 for (ctran = dfa_state->transitions_head; NULL != ctran;
1192 ctran = ctran->next)
1194 if (0 != ctran->literal && NULL == ctran->state)
1196 tmp = nfa_closure_set_create (dfa_state->nfa_set, ctran->literal);
1197 nfa_set = nfa_closure_set_create (tmp, 0);
1198 state_set_clear (tmp);
1199 new_dfa_state = dfa_state_create (&ctx, nfa_set);
1200 state_contains = NULL;
1201 for (state_iter = dfa->states_head; NULL != state_iter;
1202 state_iter = state_iter->next)
1205 state_set_compare (state_iter->nfa_set, new_dfa_state->nfa_set))
1206 state_contains = state_iter;
1209 if (NULL == state_contains)
1211 automaton_add_state (dfa, new_dfa_state);
1212 GNUNET_array_append (dfa_stack->states, dfa_stack->len,
1214 ctran->state = new_dfa_state;
1218 ctran->state = state_contains;
1219 automaton_destroy_state (new_dfa_state);
1225 GNUNET_free (dfa_stack);
1226 GNUNET_REGEX_automaton_destroy (nfa);
1234 * Save the given automaton as a GraphViz dot file
1236 * @param a the automaton to be saved
1237 * @param filename where to save the file
1240 GNUNET_REGEX_automaton_save_graph (struct GNUNET_REGEX_Automaton *a,
1241 const char *filename)
1244 struct Transition *ctran;
1246 char *s_tran = NULL;
1253 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not print NFA, was NULL!");
1257 if (NULL == filename || strlen (filename) < 1)
1259 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "No Filename given!");
1263 p = fopen (filename, "w");
1267 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not open file for writing: %s",
1272 start = "digraph G {\nrankdir=LR\n";
1273 fwrite (start, strlen (start), 1, p);
1275 for (s = a->states_head; NULL != s; s = s->next)
1279 GNUNET_asprintf (&s_acc, "\"%s\" [shape=doublecircle];\n", s->name);
1280 fwrite (s_acc, strlen (s_acc), 1, p);
1281 GNUNET_free (s_acc);
1286 for (ctran = s->transitions_head; NULL != ctran; ctran = ctran->next)
1288 if (NULL == ctran->state)
1290 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
1291 "Transition from State %i has has no state for transitioning\n",
1296 if (ctran->literal == 0)
1298 GNUNET_asprintf (&s_tran, "\"%s\" -> \"%s\" [label = \"epsilon\"];\n",
1299 s->name, ctran->state->name);
1303 GNUNET_asprintf (&s_tran, "\"%s\" -> \"%s\" [label = \"%c\"];\n",
1304 s->name, ctran->state->name, ctran->literal);
1307 fwrite (s_tran, strlen (s_tran), 1, p);
1308 GNUNET_free (s_tran);
1313 fwrite (end, strlen (end), 1, p);
1318 * Evaluates the given string using the given DFA automaton
1320 * @param a automaton, type must be DFA
1321 * @param string string that should be evaluated
1323 * @return 0 if string matches, non 0 otherwise
1326 evaluate_dfa (struct GNUNET_REGEX_Automaton *a, const char *string)
1333 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
1334 "Tried to evaluate DFA, but NFA automaton given");
1340 for (strp = string; NULL != strp && *strp; strp++)
1342 s = dfa_move (s, *strp);
1347 if (NULL != s && s->accepting)
1354 * Evaluates the given string using the given NFA automaton
1356 * @param a automaton, type must be NFA
1357 * @param string string that should be evaluated
1359 * @return 0 if string matches, non 0 otherwise
1362 evaluate_nfa (struct GNUNET_REGEX_Automaton *a, const char *string)
1366 struct StateSet *sset;
1367 struct StateSet *new_sset;
1373 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
1374 "Tried to evaluate NFA, but DFA automaton given");
1380 sset = nfa_closure_create (a->start, 0);
1382 for (strp = string; NULL != strp && *strp; strp++)
1384 new_sset = nfa_closure_set_create (sset, *strp);
1385 state_set_clear (sset);
1386 sset = nfa_closure_set_create (new_sset, 0);
1387 state_set_clear (new_sset);
1390 for (i = 0; i < sset->len; i++)
1392 s = sset->states[i];
1393 if (NULL != s && s->accepting)
1400 state_set_clear (sset);
1406 * Evaluates the given 'string' against the given compiled regex
1408 * @param a automaton
1409 * @param string string to check
1411 * @return 0 if string matches, non 0 otherwise
1414 GNUNET_REGEX_eval (struct GNUNET_REGEX_Automaton *a, const char *string)
1421 result = evaluate_dfa (a, string);
1424 result = evaluate_nfa (a, string);
1427 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
1428 "Evaluating regex failed, automaton has no type!\n");
1429 result = GNUNET_SYSERR;