2 This file is part of GNUnet
3 (C) 2012 Christian Grothoff (and other contributing authors)
5 GNUnet is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 3, or (at your
8 option) any later version.
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with GNUnet; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
21 * @file src/regex/regex.c
22 * @brief library to create automatons from regular expressions
23 * @author Maximilian Szengel
26 #include "gnunet_container_lib.h"
27 #include "gnunet_regex_lib.h"
31 * Context that contains an id counter for states and transitions
32 * as well as a DLL of automatons used as a stack for NFA construction.
34 struct GNUNET_REGEX_Context
36 unsigned int state_id;
37 unsigned int transition_id;
40 * DLL of GNUNET_REGEX_Automaton's used as a stack
42 struct GNUNET_REGEX_Automaton *stack_head;
43 struct GNUNET_REGEX_Automaton *stack_tail;
46 enum GNUNET_REGEX_automaton_type
53 * Automaton representation
55 struct GNUNET_REGEX_Automaton
57 struct GNUNET_REGEX_Automaton *prev;
58 struct GNUNET_REGEX_Automaton *next;
63 unsigned int state_count;
64 struct State *states_head;
65 struct State *states_tail;
67 enum GNUNET_REGEX_automaton_type type;
71 * A state. Can be used in DFA and NFA automatons.
83 unsigned int transition_count;
84 struct Transition *transitions_head;
85 struct Transition *transitions_tail;
87 struct StateSet *nfa_set;
91 * Transition between two states. Each state can have 0-n transitions.
92 * If literal is 0, this is considered to be an epsilon transition.
96 struct Transition *prev;
97 struct Transition *next;
112 struct State **states;
117 * Initialize a new context
122 GNUNET_REGEX_context_init (struct GNUNET_REGEX_Context *ctx)
126 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Context was NULL!");
130 ctx->transition_id = 0;
131 ctx->stack_head = NULL;
132 ctx->stack_tail = NULL;
136 debug_print_state (struct State *s)
138 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
139 "State %i: %s marked: %i accepting: %i\n", s->id, s->name,
140 s->marked, s->accepting);
144 debug_print_states (struct StateSet *sset)
149 for (i = 0; i < sset->len; i++)
152 debug_print_state (s);
157 debug_print_transitions (struct State *s)
159 struct Transition *t;
163 for (t = s->transitions_head; NULL != t; t = t->next)
168 literal = t->literal;
170 if (NULL == t->state)
173 state = t->state->name;
175 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Transition %i: On %c to %s\n", t->id,
181 * Compare two states. Used for sorting.
183 * @param a first state
184 * @param b second state
186 * @return an integer less than, equal to, or greater than zero
187 * if the first argument is considered to be respectively
188 * less than, equal to, or greater than the second.
191 state_compare (const void *a, const void *b)
196 s1 = (struct State **) a;
197 s2 = (struct State **) b;
199 return (*s1)->id - (*s2)->id;
203 * Compare to state sets by comparing the id's of the states that are
204 * contained in each set. Both sets are expected to be sorted by id!
206 * @param sset1 first state set
207 * @param sset2 second state set
209 * @return 0 if they are equal, non 0 otherwise
212 state_set_compare (struct StateSet *sset1, struct StateSet *sset2)
216 if (sset1->len != sset2->len)
219 for (i = 0; i < sset1->len; i++)
221 if (sset1->states[i]->id != sset2->states[i]->id)
230 * Checks if 'elem' is contained in 'set'
232 * @param set set of states
235 * @return GNUNET_YES if 'set' contains 'elem, GNUNET_NO otherwise
238 state_set_contains (struct StateSet *set, struct State *elem)
243 for (i = 0; i < set->len; i++)
246 if (0 == memcmp (s, elem, sizeof (struct State)))
253 * Clears the given StateSet 'set'
255 * @param set set to be cleared
258 state_set_clear (struct StateSet *set)
262 if (NULL != set->states)
263 GNUNET_free (set->states);
269 * Adds a transition from one state to another on 'literal'
272 * @param from_state starting state for the transition
273 * @param literal transition label
274 * @param to_state state to where the transition should point to
277 add_transition (struct GNUNET_REGEX_Context *ctx, struct State *from_state,
278 const char literal, struct State *to_state)
280 struct Transition *t;
282 if (NULL == from_state)
284 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not create Transition.\n");
288 t = GNUNET_malloc (sizeof (struct Transition));
290 t->id = ctx->transition_id++;
291 t->literal = literal;
294 GNUNET_CONTAINER_DLL_insert (from_state->transitions_head,
295 from_state->transitions_tail, t);
299 * Clears an automaton fragment. Does not destroy the states inside
302 * @param a automaton to be cleared
305 automaton_fragment_clear (struct GNUNET_REGEX_Automaton *a)
309 a->states_head = NULL;
310 a->states_tail = NULL;
316 * Frees the memory used by State 's'
318 * @param s state that should be destroyed
321 automaton_destroy_state (struct State *s)
323 struct Transition *t;
324 struct Transition *next_t;
327 GNUNET_free (s->name);
329 for (t = s->transitions_head; NULL != t;)
332 GNUNET_CONTAINER_DLL_remove (s->transitions_head, s->transitions_tail, t);
337 state_set_clear (s->nfa_set);
343 * Remove a state from the given automaton 'a'. Always use this function
344 * when altering the states of an automaton. Will also remove all transitions
345 * leading to this state, before destroying it.
348 * @param s state to remove
351 automaton_remove_state (struct GNUNET_REGEX_Automaton *a, struct State *s)
354 struct State *s_check;
355 struct Transition *t_check;
359 GNUNET_CONTAINER_DLL_remove (a->states_head, a->states_tail, s);
362 // remove all transitions leading to this state
363 for (s_check = a->states_head; NULL != s_check; s_check = s_check->next)
365 for (t_check = s_check->transitions_head; NULL != t_check;
366 t_check = t_check->next)
368 if (t_check->state == ss)
370 GNUNET_CONTAINER_DLL_remove (s_check->transitions_head,
371 s_check->transitions_tail, t_check);
372 s_check->transition_count--;
377 automaton_destroy_state (ss);
381 * Merge two states into one. Will merge 's1' and 's2' into 's1' and destroy 's2'.
385 * @param s1 first state
386 * @param s2 second state, will be destroyed
389 automaton_merge_states (struct GNUNET_REGEX_Context *ctx,
390 struct GNUNET_REGEX_Automaton *a, struct State *s1,
393 struct State *s_check;
394 struct Transition *t_check;
395 struct Transition *t;
398 GNUNET_assert (NULL != ctx && NULL != a && NULL != s1 && NULL != s2);
400 // 1. Make all transitions pointing to s2 point to s1
401 for (s_check = a->states_head; NULL != s_check; s_check = s_check->next)
403 for (t_check = s_check->transitions_head; NULL != t_check;
404 t_check = t_check->next)
406 if (s_check != s1 && s2 == t_check->state)
411 // 2. Add all transitions from s2 to sX to s1
412 for (t_check = s2->transitions_head; NULL != t_check; t_check = t_check->next)
414 for (t = s1->transitions_head; NULL != t; t = t->next)
416 if (t_check->literal != t->literal && NULL != t_check->state &&
417 t_check->state != t->state && t_check->state != s2)
419 add_transition (ctx, s1, t_check->literal, t_check->state);
424 // 3. Rename s1 to {s1,s2}
425 new_name = GNUNET_malloc (strlen (s1->name) + strlen (s2->name) + 1);
426 strncat (new_name, s1->name, strlen (s1->name));
427 strncat (new_name, s2->name, strlen (s2->name));
428 if (NULL != s1->name)
429 GNUNET_free (s1->name);
434 GNUNET_CONTAINER_DLL_remove (a->states_head, a->states_tail, s_check);
436 automaton_destroy_state (s_check);
440 * Add a state to the automaton 'a', always use this function to
441 * alter the states DLL of the automaton.
443 * @param a automaton to add the state to
444 * @param s state that should be added
447 automaton_add_state (struct GNUNET_REGEX_Automaton *a, struct State *s)
449 GNUNET_CONTAINER_DLL_insert (a->states_head, a->states_tail, s);
454 * Creates a new DFA state based on a set of NFA states. Needs to be freed
455 * using automaton_destroy_state.
458 * @param nfa_states set of NFA states on which the DFA should be based on
460 * @return new DFA state
462 static struct State *
463 dfa_state_create (struct GNUNET_REGEX_Context *ctx, struct StateSet *nfa_states)
468 struct State *cstate;
469 struct Transition *ctran;
471 struct Transition *t;
474 s = GNUNET_malloc (sizeof (struct State));
475 s->id = ctx->state_id++;
480 if (NULL == nfa_states)
482 GNUNET_asprintf (&s->name, "s%i", s->id);
486 s->nfa_set = nfa_states;
488 if (nfa_states->len < 1)
491 // Create a name based on 'sset'
492 s->name = GNUNET_malloc (sizeof (char) * 2);
493 strcat (s->name, "{");
496 for (i = 0; i < nfa_states->len; i++)
498 cstate = nfa_states->states[i];
499 GNUNET_asprintf (&name, "%i,", cstate->id);
503 len = strlen (s->name) + strlen (name) + 1;
504 s->name = GNUNET_realloc (s->name, len);
505 strcat (s->name, name);
510 // Add a transition for each distinct literal to NULL state
511 for (ctran = cstate->transitions_head; NULL != ctran; ctran = ctran->next)
513 if (0 != ctran->literal)
517 for (t = s->transitions_head; NULL != t; t = t->next)
519 if (t->literal == ctran->literal)
527 add_transition (ctx, s, ctran->literal, NULL);
531 // If the nfa_states contain an accepting state, the new dfa state is also accepting
532 if (cstate->accepting)
536 s->name[strlen (s->name) - 1] = '}';
542 * Move from the given state 's' to the next state on
543 * transition 'literal'
545 * @param s starting state
546 * @param literal edge label to follow
548 * @return new state or NULL, if transition on literal not possible
550 static struct State *
551 dfa_move (struct State *s, const char literal)
553 struct Transition *t;
561 for (t = s->transitions_head; NULL != t; t = t->next)
563 if (literal == t->literal)
574 * Remove all unreachable states from DFA 'a'. Unreachable states
575 * are those states that are not reachable from the starting state.
577 * @param a DFA automaton
580 dfa_remove_unreachable_states (struct GNUNET_REGEX_Automaton *a)
582 struct State *stack[a->state_count];
585 struct Transition *t;
589 // 1. unmark all states
590 for (s = a->states_head; NULL != s; s = s->next)
595 // 2. traverse dfa from start state and mark all visited states
596 stack[stack_len] = a->start;
598 while (stack_len > 0)
600 s = stack[stack_len - 1];
602 s->marked = 1; // mark s as visited
603 for (t = s->transitions_head; NULL != t; t = t->next)
605 if (NULL != t->state && 0 == t->state->marked)
607 // add next states to stack
608 stack[stack_len] = t->state;
614 // 3. delete all states that were not visited
615 for (s = a->states_head; NULL != s; s = s->next)
618 automaton_remove_state (a, s);
623 * Remove all dead states from the DFA 'a'. Dead states are those
624 * states that do not transition to any other state but themselfes.
626 * @param a DFA automaton
629 dfa_remove_dead_states (struct GNUNET_REGEX_Automaton *a)
632 struct Transition *t;
635 GNUNET_assert (DFA == a->type);
637 for (s = a->states_head; NULL != s; s = s->next)
643 for (t = s->transitions_head; NULL != t; t = t->next)
645 if (NULL != t->state && t->state != s)
655 // state s is dead, remove it
656 automaton_remove_state (a, s);
661 * Merge all non distinguishable states in the DFA 'a'
664 * @param a DFA automaton
667 dfa_merge_nondistinguishable_states (struct GNUNET_REGEX_Context *ctx,
668 struct GNUNET_REGEX_Automaton *a)
671 int table[a->state_count][a->state_count];
674 struct Transition *t1;
675 struct Transition *t2;
679 for (i = 0, s1 = a->states_head; i < a->state_count && NULL != s1;
683 // Mark all pairs of accepting/!accepting states
684 for (s1 = a->states_head; NULL != s1; s1 = s1->next)
686 for (s2 = a->states_head; NULL != s2 && s1 != s2; s2 = s2->next)
688 if ((s1->accepting && !s2->accepting) ||
689 (!s1->accepting && s2->accepting))
691 table[s1->marked][s2->marked] = 1;
694 table[s1->marked][s2->marked] = 0;
701 for (s1 = a->states_head; NULL != s1; s1 = s1->next)
703 for (s2 = a->states_head; NULL != s2 && s1 != s2; s2 = s2->next)
705 if (0 != table[s1->marked][s2->marked])
708 for (t1 = s1->transitions_head; NULL != t1; t1 = t1->next)
710 for (t2 = s2->transitions_head; NULL != t2; t2 = t2->next)
712 if (t1->literal == t2->literal && t1->state == t2->state &&
713 (0 != table[t1->state->marked][t2->state->marked] ||
714 0 != table[t2->state->marked][t1->state->marked]))
716 table[s1->marked][s2->marked] = t1->literal;
719 else if (t1->literal != t2->literal && t1->state != t2->state)
721 table[s1->marked][s2->marked] = -1;
730 struct State *s2_next;
732 for (i = 0, s1 = a->states_head; NULL != s1; s1 = s1->next)
734 for (s2 = a->states_head; NULL != s2 && s1 != s2; s2 = s2_next)
737 if (s1 != s2 && table[s1->marked][s2->marked] == 0)
738 automaton_merge_states (ctx, a, s1, s2);
744 * Minimize the given DFA 'a' by removing all unreachable states,
745 * removing all dead states and merging all non distinguishable states
748 * @param a DFA automaton
751 dfa_minimize (struct GNUNET_REGEX_Context *ctx,
752 struct GNUNET_REGEX_Automaton *a)
757 GNUNET_assert (DFA == a->type);
759 // 1. remove unreachable states
760 dfa_remove_unreachable_states (a);
762 // 2. remove dead states
763 dfa_remove_dead_states (a);
765 // 3. Merge nondistinguishable states
766 dfa_merge_nondistinguishable_states (ctx, a);
770 * Creates a new NFA fragment. Needs to be cleared using automaton_fragment_clear.
772 * @param start starting state
773 * @param end end state
775 * @return new NFA fragment
777 static struct GNUNET_REGEX_Automaton *
778 nfa_fragment_create (struct State *start, struct State *end)
780 struct GNUNET_REGEX_Automaton *n;
782 n = GNUNET_malloc (sizeof (struct GNUNET_REGEX_Automaton));
788 if (NULL == start && NULL == end)
791 automaton_add_state (n, end);
792 automaton_add_state (n, start);
801 * Adds a list of states to the given automaton 'n'.
803 * @param n automaton to which the states should be added
804 * @param states_head head of the DLL of states
805 * @param states_tail tail of the DLL of states
808 nfa_add_states (struct GNUNET_REGEX_Automaton *n, struct State *states_head,
809 struct State *states_tail)
813 if (NULL == n || NULL == states_head)
815 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not add states\n");
819 if (NULL == n->states_head)
821 n->states_head = states_head;
822 n->states_tail = states_tail;
826 if (NULL != states_head)
828 n->states_tail->next = states_head;
829 n->states_tail = states_tail;
832 for (s = states_head; NULL != s; s = s->next)
837 * Creates a new NFA state. Needs to be freed using automaton_destroy_state.
840 * @param accepting is it an accepting state or not
842 * @return new NFA state
844 static struct State *
845 nfa_state_create (struct GNUNET_REGEX_Context *ctx, int accepting)
849 s = GNUNET_malloc (sizeof (struct State));
850 s->id = ctx->state_id++;
851 s->accepting = accepting;
854 GNUNET_asprintf (&s->name, "s%i", s->id);
860 * Calculates the NFA closure set for the given state
862 * @param s starting point state
863 * @param literal transitioning literal on which to base the closure on,
864 * pass 0 for epsilon transition
866 * @return nfa closure on 'literal' (epsilon closure if 'literal' is 0)
868 static struct StateSet *
869 nfa_closure_create (struct State *s, const char literal)
871 struct StateSet *cls;
872 struct StateSet *cls_check;
873 struct State *clsstate;
874 struct State *currentstate;
875 struct Transition *ctran;
880 cls = GNUNET_malloc (sizeof (struct StateSet));
881 cls_check = GNUNET_malloc (sizeof (struct StateSet));
883 // Add start state to closure only for epsilon closure
885 GNUNET_array_append (cls->states, cls->len, s);
887 GNUNET_array_append (cls_check->states, cls_check->len, s);
888 while (cls_check->len > 0)
890 currentstate = cls_check->states[cls_check->len - 1];
891 GNUNET_array_grow (cls_check->states, cls_check->len, cls_check->len - 1);
893 for (ctran = currentstate->transitions_head; NULL != ctran;
896 if (NULL != ctran->state && literal == ctran->literal)
898 clsstate = ctran->state;
900 if (NULL != clsstate &&
901 GNUNET_YES != state_set_contains (cls, clsstate))
903 GNUNET_array_append (cls->states, cls->len, clsstate);
904 GNUNET_array_append (cls_check->states, cls_check->len, clsstate);
909 GNUNET_assert (0 == cls_check->len);
910 GNUNET_free (cls_check);
913 qsort (cls->states, cls->len, sizeof (struct State *), state_compare);
919 * Calculates the closure set for the given set of states.
921 * @param states list of states on which to base the closure on
922 * @param literal transitioning literal for which to base the closure on,
923 * pass 0 for epsilon transition
925 * @return nfa closure on 'literal' (epsilon closure if 'literal' is 0)
927 static struct StateSet *
928 nfa_closure_set_create (struct StateSet *states, const char literal)
931 struct StateSet *sset;
932 struct StateSet *cls;
941 cls = GNUNET_malloc (sizeof (struct StateSet));
943 for (i = 0; i < states->len; i++)
945 s = states->states[i];
946 sset = nfa_closure_create (s, literal);
948 for (j = 0; j < sset->len; j++)
951 for (k = 0; k < cls->len; k++)
953 if (sset->states[j]->id == cls->states[k]->id)
960 GNUNET_array_append (cls->states, cls->len, sset->states[j]);
962 state_set_clear (sset);
966 qsort (cls->states, cls->len, sizeof (struct State *), state_compare);
972 * Pops two NFA fragments (a, b) from the stack and concatenates them (ab)
977 nfa_add_concatenation (struct GNUNET_REGEX_Context *ctx)
979 struct GNUNET_REGEX_Automaton *a;
980 struct GNUNET_REGEX_Automaton *b;
981 struct GNUNET_REGEX_Automaton *new;
984 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, b);
986 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
988 add_transition (ctx, a->end, 0, b->start);
989 a->end->accepting = 0;
990 b->end->accepting = 1;
992 new = nfa_fragment_create (NULL, NULL);
993 nfa_add_states (new, a->states_head, a->states_tail);
994 nfa_add_states (new, b->states_head, b->states_tail);
995 new->start = a->start;
997 automaton_fragment_clear (a);
998 automaton_fragment_clear (b);
1000 GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new);
1004 * Pops a NFA fragment from the stack (a) and adds a new fragment (a*)
1006 * @param ctx context
1009 nfa_add_star_op (struct GNUNET_REGEX_Context *ctx)
1011 struct GNUNET_REGEX_Automaton *a;
1012 struct GNUNET_REGEX_Automaton *new;
1013 struct State *start;
1016 a = ctx->stack_tail;
1017 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
1021 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
1022 "nfa_add_star_op failed, because there was no element on the stack");
1026 start = nfa_state_create (ctx, 0);
1027 end = nfa_state_create (ctx, 1);
1029 add_transition (ctx, start, 0, a->start);
1030 add_transition (ctx, start, 0, end);
1031 add_transition (ctx, a->end, 0, a->start);
1032 add_transition (ctx, a->end, 0, end);
1034 a->end->accepting = 0;
1037 new = nfa_fragment_create (start, end);
1038 nfa_add_states (new, a->states_head, a->states_tail);
1039 automaton_fragment_clear (a);
1041 GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new);
1045 * Pops an NFA fragment (a) from the stack and adds a new fragment (a+)
1047 * @param ctx context
1050 nfa_add_plus_op (struct GNUNET_REGEX_Context *ctx)
1052 struct GNUNET_REGEX_Automaton *a;
1054 a = ctx->stack_tail;
1055 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
1057 add_transition (ctx, a->end, 0, a->start);
1059 GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, a);
1063 * Pops two NFA fragments (a, b) from the stack and adds a new NFA fragment
1064 * that alternates between a and b (a|b)
1066 * @param ctx context
1069 nfa_add_alternation (struct GNUNET_REGEX_Context *ctx)
1071 struct GNUNET_REGEX_Automaton *a;
1072 struct GNUNET_REGEX_Automaton *b;
1073 struct GNUNET_REGEX_Automaton *new;
1074 struct State *start;
1077 b = ctx->stack_tail;
1078 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, b);
1079 a = ctx->stack_tail;
1080 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
1082 start = nfa_state_create (ctx, 0);
1083 end = nfa_state_create (ctx, 1);
1084 add_transition (ctx, start, 0, a->start);
1085 add_transition (ctx, start, 0, b->start);
1087 add_transition (ctx, a->end, 0, end);
1088 add_transition (ctx, b->end, 0, end);
1090 a->end->accepting = 0;
1091 b->end->accepting = 0;
1094 new = nfa_fragment_create (start, end);
1095 nfa_add_states (new, a->states_head, a->states_tail);
1096 nfa_add_states (new, b->states_head, b->states_tail);
1097 automaton_fragment_clear (a);
1098 automaton_fragment_clear (b);
1100 GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new);
1104 * Adds a new nfa fragment to the stack
1106 * @param ctx context
1107 * @param lit literal for nfa transition
1110 nfa_add_literal (struct GNUNET_REGEX_Context *ctx, const char lit)
1112 struct GNUNET_REGEX_Automaton *n;
1113 struct State *start;
1116 GNUNET_assert (NULL != ctx);
1118 start = nfa_state_create (ctx, 0);
1119 end = nfa_state_create (ctx, 1);
1120 add_transition (ctx, start, lit, end);
1121 n = nfa_fragment_create (start, end);
1122 GNUNET_assert (NULL != n);
1123 GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, n);
1127 * Construct an NFA by parsing the regex string of length 'len'.
1129 * @param regex regular expression string
1130 * @param len length of the string
1132 * @return NFA, needs to be freed using GNUNET_REGEX_destroy_automaton
1134 struct GNUNET_REGEX_Automaton *
1135 GNUNET_REGEX_construct_nfa (const char *regex, const size_t len)
1137 struct GNUNET_REGEX_Context ctx;
1138 struct GNUNET_REGEX_Automaton *nfa;
1142 unsigned int altcount;
1143 unsigned int atomcount;
1144 unsigned int pcount;
1151 GNUNET_REGEX_context_init (&ctx);
1160 for (count = 0; count < len && *regexp; count++, regexp++)
1168 nfa_add_concatenation (&ctx);
1170 GNUNET_array_grow (p, pcount, pcount + 1);
1171 p[pcount - 1].altcount = altcount;
1172 p[pcount - 1].atomcount = atomcount;
1179 error_msg = "Cannot append '|' to nothing";
1182 while (--atomcount > 0)
1183 nfa_add_concatenation (&ctx);
1189 error_msg = "Missing opening '('";
1194 // Ignore this: "()"
1196 altcount = p[pcount].altcount;
1197 atomcount = p[pcount].atomcount;
1200 while (--atomcount > 0)
1201 nfa_add_concatenation (&ctx);
1202 for (; altcount > 0; altcount--)
1203 nfa_add_alternation (&ctx);
1205 altcount = p[pcount].altcount;
1206 atomcount = p[pcount].atomcount;
1212 error_msg = "Cannot append '+' to nothing";
1215 nfa_add_star_op (&ctx);
1220 error_msg = "Cannot append '+' to nothing";
1223 nfa_add_plus_op (&ctx);
1225 case 92: /* escape: \ */
1232 nfa_add_concatenation (&ctx);
1234 nfa_add_literal (&ctx, *regexp);
1241 error_msg = "Unbalanced parenthesis";
1244 while (--atomcount > 0)
1245 nfa_add_concatenation (&ctx);
1246 for (; altcount > 0; altcount--)
1247 nfa_add_alternation (&ctx);
1252 nfa = ctx.stack_tail;
1253 GNUNET_CONTAINER_DLL_remove (ctx.stack_head, ctx.stack_tail, nfa);
1256 if (NULL != ctx.stack_head)
1258 error_msg = "Creating the NFA failed. NFA stack was not empty!";
1265 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not parse regex\n");
1266 if (NULL != error_msg)
1267 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "%s\n", error_msg);
1270 while (NULL != ctx.stack_tail)
1272 GNUNET_REGEX_automaton_destroy (ctx.stack_tail);
1273 GNUNET_CONTAINER_DLL_remove (ctx.stack_head, ctx.stack_tail,
1280 * Free the memory allocated by constructing the GNUNET_REGEX_Automaton
1283 * @param a automaton to be destroyed
1286 GNUNET_REGEX_automaton_destroy (struct GNUNET_REGEX_Automaton *a)
1289 struct State *next_state;
1294 for (s = a->states_head; NULL != s;)
1296 next_state = s->next;
1297 automaton_destroy_state (s);
1305 * Construct DFA for the given 'regex' of length 'len'
1307 * @param regex regular expression string
1308 * @param len length of the regular expression
1310 * @return DFA, needs to be freed using GNUNET_REGEX_destroy_automaton
1312 struct GNUNET_REGEX_Automaton *
1313 GNUNET_REGEX_construct_dfa (const char *regex, const size_t len)
1315 struct GNUNET_REGEX_Context ctx;
1316 struct GNUNET_REGEX_Automaton *dfa;
1317 struct GNUNET_REGEX_Automaton *nfa;
1318 struct StateSet *tmp;
1319 struct StateSet *nfa_set;
1320 struct StateSet *dfa_stack;
1321 struct Transition *ctran;
1322 struct State *dfa_state;
1323 struct State *new_dfa_state;
1324 struct State *state_contains;
1325 struct State *state_iter;
1327 GNUNET_REGEX_context_init (&ctx);
1330 nfa = GNUNET_REGEX_construct_nfa (regex, len);
1334 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
1335 "Could not create DFA, because NFA creation failed\n");
1339 dfa = GNUNET_malloc (sizeof (struct GNUNET_REGEX_Automaton));
1342 // Create DFA start state from epsilon closure
1343 dfa_stack = GNUNET_malloc (sizeof (struct StateSet));
1344 nfa_set = nfa_closure_create (nfa->start, 0);
1345 dfa->start = dfa_state_create (&ctx, nfa_set);
1346 automaton_add_state (dfa, dfa->start);
1347 GNUNET_array_append (dfa_stack->states, dfa_stack->len, dfa->start);
1349 // Create dfa states by combining nfa states
1350 while (dfa_stack->len > 0)
1352 dfa_state = dfa_stack->states[dfa_stack->len - 1];
1353 GNUNET_array_grow (dfa_stack->states, dfa_stack->len, dfa_stack->len - 1);
1355 for (ctran = dfa_state->transitions_head; NULL != ctran;
1356 ctran = ctran->next)
1358 if (0 != ctran->literal && NULL == ctran->state)
1360 tmp = nfa_closure_set_create (dfa_state->nfa_set, ctran->literal);
1361 nfa_set = nfa_closure_set_create (tmp, 0);
1362 state_set_clear (tmp);
1363 new_dfa_state = dfa_state_create (&ctx, nfa_set);
1364 state_contains = NULL;
1365 for (state_iter = dfa->states_head; NULL != state_iter;
1366 state_iter = state_iter->next)
1369 state_set_compare (state_iter->nfa_set, new_dfa_state->nfa_set))
1370 state_contains = state_iter;
1373 if (NULL == state_contains)
1375 automaton_add_state (dfa, new_dfa_state);
1376 GNUNET_array_append (dfa_stack->states, dfa_stack->len,
1378 ctran->state = new_dfa_state;
1382 ctran->state = state_contains;
1383 automaton_destroy_state (new_dfa_state);
1389 GNUNET_free (dfa_stack);
1390 GNUNET_REGEX_automaton_destroy (nfa);
1392 dfa_minimize (&ctx, dfa);
1398 * Save the given automaton as a GraphViz dot file
1400 * @param a the automaton to be saved
1401 * @param filename where to save the file
1404 GNUNET_REGEX_automaton_save_graph (struct GNUNET_REGEX_Automaton *a,
1405 const char *filename)
1408 struct Transition *ctran;
1410 char *s_tran = NULL;
1417 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not print NFA, was NULL!");
1421 if (NULL == filename || strlen (filename) < 1)
1423 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "No Filename given!");
1427 p = fopen (filename, "w");
1431 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not open file for writing: %s",
1436 start = "digraph G {\nrankdir=LR\n";
1437 fwrite (start, strlen (start), 1, p);
1439 for (s = a->states_head; NULL != s; s = s->next)
1443 GNUNET_asprintf (&s_acc, "\"%s\" [shape=doublecircle];\n", s->name);
1444 fwrite (s_acc, strlen (s_acc), 1, p);
1445 GNUNET_free (s_acc);
1450 for (ctran = s->transitions_head; NULL != ctran; ctran = ctran->next)
1452 if (NULL == ctran->state)
1454 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
1455 "Transition from State %i has has no state for transitioning\n",
1460 if (ctran->literal == 0)
1462 GNUNET_asprintf (&s_tran, "\"%s\" -> \"%s\" [label = \"epsilon\"];\n",
1463 s->name, ctran->state->name);
1467 GNUNET_asprintf (&s_tran, "\"%s\" -> \"%s\" [label = \"%c\"];\n",
1468 s->name, ctran->state->name, ctran->literal);
1471 fwrite (s_tran, strlen (s_tran), 1, p);
1472 GNUNET_free (s_tran);
1477 fwrite (end, strlen (end), 1, p);
1482 * Evaluates the given string using the given DFA automaton
1484 * @param a automaton, type must be DFA
1485 * @param string string that should be evaluated
1487 * @return 0 if string matches, non 0 otherwise
1490 evaluate_dfa (struct GNUNET_REGEX_Automaton *a, const char *string)
1497 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
1498 "Tried to evaluate DFA, but NFA automaton given");
1504 for (strp = string; NULL != strp && *strp; strp++)
1506 s = dfa_move (s, *strp);
1511 if (NULL != s && s->accepting)
1518 * Evaluates the given string using the given NFA automaton
1520 * @param a automaton, type must be NFA
1521 * @param string string that should be evaluated
1523 * @return 0 if string matches, non 0 otherwise
1526 evaluate_nfa (struct GNUNET_REGEX_Automaton *a, const char *string)
1530 struct StateSet *sset;
1531 struct StateSet *new_sset;
1537 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
1538 "Tried to evaluate NFA, but DFA automaton given");
1544 sset = nfa_closure_create (a->start, 0);
1546 for (strp = string; NULL != strp && *strp; strp++)
1548 new_sset = nfa_closure_set_create (sset, *strp);
1549 state_set_clear (sset);
1550 sset = nfa_closure_set_create (new_sset, 0);
1551 state_set_clear (new_sset);
1554 for (i = 0; i < sset->len; i++)
1556 s = sset->states[i];
1557 if (NULL != s && s->accepting)
1564 state_set_clear (sset);
1569 * Evaluates the given 'string' against the given compiled regex
1571 * @param a automaton
1572 * @param string string to check
1574 * @return 0 if string matches, non 0 otherwise
1577 GNUNET_REGEX_eval (struct GNUNET_REGEX_Automaton *a, const char *string)
1584 result = evaluate_dfa (a, string);
1587 result = evaluate_nfa (a, string);
1590 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
1591 "Evaluating regex failed, automaton has no type!\n");
1592 result = GNUNET_SYSERR;