2 This file is part of GNUnet
3 (C) 2012 Christian Grothoff (and other contributing authors)
5 GNUnet is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 3, or (at your
8 option) any later version.
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with GNUnet; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
21 * @file src/regex/regex.c
22 * @brief library to create automatons from regular expressions
23 * @author Maximilian Szengel
26 #include "gnunet_container_lib.h"
27 #include "gnunet_regex_lib.h"
31 * Context that contains an id counter for states and transitions
32 * as well as a DLL of automatons used as a stack for NFA construction.
34 struct GNUNET_REGEX_Context
39 unsigned int state_id;
42 * Unique transition id.
44 unsigned int transition_id;
47 * Unique SCC (Strongly Connected Component) id.
52 * DLL of GNUNET_REGEX_Automaton's used as a stack.
54 struct GNUNET_REGEX_Automaton *stack_head;
57 * DLL of GNUNET_REGEX_Automaton's used as a stack.
59 struct GNUNET_REGEX_Automaton *stack_tail;
63 * Type of an automaton.
65 enum GNUNET_REGEX_automaton_type
72 * Automaton representation.
74 struct GNUNET_REGEX_Automaton
77 * This is a linked list.
79 struct GNUNET_REGEX_Automaton *prev;
82 * This is a linked list.
84 struct GNUNET_REGEX_Automaton *next;
87 * First state of the automaton. This is mainly
88 * used for constructing an NFA, where each NFA
89 * itself consists of one or more NFAs linked
92 struct GNUNET_REGEX_State *start;
95 * End state of the automaton.
97 struct GNUNET_REGEX_State *end;
100 * Number of states in the automaton.
102 unsigned int state_count;
107 struct GNUNET_REGEX_State *states_head;
112 struct GNUNET_REGEX_State *states_tail;
115 * Type of the automaton.
117 enum GNUNET_REGEX_automaton_type type;
121 * A state. Can be used in DFA and NFA automatons.
123 struct GNUNET_REGEX_State
126 * This is a linked list.
128 struct GNUNET_REGEX_State *prev;
131 * This is a linked list.
133 struct GNUNET_REGEX_State *next;
141 * If this is an accepting state or not.
146 * Marking of the state. This is used for marking all visited
147 * states when traversing all states of an automaton and for
148 * cases where the state id cannot be used (dfa minimization).
153 * Marking the state as contained. This is used for checking,
154 * if the state is contained in a set in constant time
159 * Marking the state as part of an SCC (Strongly Connected Component).
160 * All states with the same scc_id are part of the same SCC.
165 * Used for SCC detection.
170 * Used for SCC detection.
175 * Human readable name of the automaton. Used for debugging
176 * and graph creation.
181 * Number of transitions from this state to other states.
183 unsigned int transition_count;
186 * DLL of transitions.
188 struct Transition *transitions_head;
191 * DLL of transitions.
193 struct Transition *transitions_tail;
196 * Set of states on which this state is based on. Used when
197 * creating a DFA out of several NFA states.
199 struct GNUNET_REGEX_StateSet *nfa_set;
203 * Transition between two states. Each state can have 0-n transitions.
204 * If literal is 0, this is considered to be an epsilon transition.
209 * This is a linked list.
211 struct Transition *prev;
214 * This is a linked list.
216 struct Transition *next;
219 * Unique id of this transition.
224 * Literal for this transition. This is basically the edge label for
230 * State to which this transition leads.
232 struct GNUNET_REGEX_State *to_state;
235 * State from which this transition origins.
237 struct GNUNET_REGEX_State *from_state;
243 struct GNUNET_REGEX_StateSet
248 struct GNUNET_REGEX_State **states;
251 * Length of the 'states' array.
257 debug_print_state (struct GNUNET_REGEX_State *s)
259 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
260 "State %i: %s marked: %i accepting: %i scc_id: %i\n", s->id,
261 s->name, s->marked, s->accepting, s->scc_id);
265 debug_print_states (struct GNUNET_REGEX_StateSet *sset)
267 struct GNUNET_REGEX_State *s;
270 for (i = 0; i < sset->len; i++)
273 debug_print_state (s);
278 debug_print_transitions (struct GNUNET_REGEX_State *s)
280 struct Transition *t;
284 for (t = s->transitions_head; NULL != t; t = t->next)
289 literal = t->literal;
291 if (NULL == t->to_state)
294 state = t->to_state->name;
296 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Transition %i: On %c to %s\n", t->id,
302 * Recursive function doing DFS with 'v' as a start, detecting all SCCs
303 * inside the subgraph reachable from 'v'. Used with scc_tarjan function
304 * to detect all SCCs inside an automaton.
307 * @param v start vertex
308 * @param index current index
309 * @param stack stack for saving all SCCs
310 * @param stack_size current size of the stack
313 scc_tarjan_strongconnect (struct GNUNET_REGEX_Context *ctx,
314 struct GNUNET_REGEX_State *v, int *index,
315 struct GNUNET_REGEX_State **stack,
316 unsigned int *stack_size)
318 struct GNUNET_REGEX_State *w;
319 struct Transition *t;
324 stack[(*stack_size)++] = v;
327 for (t = v->transitions_head; NULL != t; t = t->next)
330 if (NULL != w && w->index < 0)
332 scc_tarjan_strongconnect (ctx, w, index, stack, stack_size);
333 v->lowlink = (v->lowlink > w->lowlink) ? w->lowlink : v->lowlink;
335 else if (0 != w->contained)
336 v->lowlink = (v->lowlink > w->index) ? w->index : v->lowlink;
339 if (v->lowlink == v->index)
341 w = stack[--(*stack_size)];
349 w->scc_id = ctx->scc_id;
350 w = stack[--(*stack_size)];
353 w->scc_id = ctx->scc_id;
359 * Detect all SCCs (Strongly Connected Components) inside the given automaton.
360 * SCCs will be marked using the scc_id on each state.
366 scc_tarjan (struct GNUNET_REGEX_Context *ctx, struct GNUNET_REGEX_Automaton *a)
370 struct GNUNET_REGEX_State *v;
371 struct GNUNET_REGEX_State *stack[a->state_count];
372 unsigned int stack_size;
374 for (v = a->states_head; NULL != v; v = v->next)
384 for (i = 0, v = a->states_head; NULL != v; v = v->next)
387 scc_tarjan_strongconnect (ctx, v, &index, stack, &stack_size);
392 * Compare two states. Used for sorting.
394 * @param a first state
395 * @param b second state
397 * @return an integer less than, equal to, or greater than zero
398 * if the first argument is considered to be respectively
399 * less than, equal to, or greater than the second.
402 state_compare (const void *a, const void *b)
404 struct GNUNET_REGEX_State **s1;
405 struct GNUNET_REGEX_State **s2;
407 s1 = (struct GNUNET_REGEX_State **) a;
408 s2 = (struct GNUNET_REGEX_State **) b;
410 return (*s1)->id - (*s2)->id;
414 * Compare to state sets by comparing the id's of the states that are
415 * contained in each set. Both sets are expected to be sorted by id!
417 * @param sset1 first state set
418 * @param sset2 second state set
420 * @return an integer less than, equal to, or greater than zero
421 * if the first argument is considered to be respectively
422 * less than, equal to, or greater than the second.
425 state_set_compare (struct GNUNET_REGEX_StateSet *sset1,
426 struct GNUNET_REGEX_StateSet *sset2)
431 if (NULL == sset1 || NULL == sset2)
434 result = sset1->len - sset2->len;
436 for (i = 0; i < sset1->len; i++)
441 result = state_compare (&sset1->states[i], &sset2->states[i]);
447 * Clears the given StateSet 'set'
449 * @param set set to be cleared
452 state_set_clear (struct GNUNET_REGEX_StateSet *set)
456 if (NULL != set->states)
457 GNUNET_free (set->states);
463 * Adds a transition from one state to another on 'literal'
466 * @param from_state starting state for the transition
467 * @param literal transition label
468 * @param to_state state to where the transition should point to
471 state_add_transition (struct GNUNET_REGEX_Context *ctx,
472 struct GNUNET_REGEX_State *from_state, const char literal,
473 struct GNUNET_REGEX_State *to_state)
475 struct Transition *t;
477 if (NULL == from_state)
479 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not create Transition.\n");
483 t = GNUNET_malloc (sizeof (struct Transition));
485 t->id = ctx->transition_id++;
486 t->literal = literal;
487 t->to_state = to_state;
488 t->from_state = from_state;
490 GNUNET_CONTAINER_DLL_insert (from_state->transitions_head,
491 from_state->transitions_tail, t);
495 * Clears an automaton fragment. Does not destroy the states inside
498 * @param a automaton to be cleared
501 automaton_fragment_clear (struct GNUNET_REGEX_Automaton *a)
508 a->states_head = NULL;
509 a->states_tail = NULL;
515 * Frees the memory used by State 's'
517 * @param s state that should be destroyed
520 automaton_destroy_state (struct GNUNET_REGEX_State *s)
522 struct Transition *t;
523 struct Transition *next_t;
529 GNUNET_free (s->name);
531 for (t = s->transitions_head; NULL != t;)
534 GNUNET_CONTAINER_DLL_remove (s->transitions_head, s->transitions_tail, t);
539 state_set_clear (s->nfa_set);
545 * Remove a state from the given automaton 'a'. Always use this function
546 * when altering the states of an automaton. Will also remove all transitions
547 * leading to this state, before destroying it.
550 * @param s state to remove
553 automaton_remove_state (struct GNUNET_REGEX_Automaton *a,
554 struct GNUNET_REGEX_State *s)
556 struct GNUNET_REGEX_State *ss;
557 struct GNUNET_REGEX_State *s_check;
558 struct Transition *t_check;
560 if (NULL == a || NULL == s)
565 GNUNET_CONTAINER_DLL_remove (a->states_head, a->states_tail, s);
568 // remove all transitions leading to this state
569 for (s_check = a->states_head; NULL != s_check; s_check = s_check->next)
571 for (t_check = s_check->transitions_head; NULL != t_check;
572 t_check = t_check->next)
574 if (t_check->to_state == ss)
576 GNUNET_CONTAINER_DLL_remove (s_check->transitions_head,
577 s_check->transitions_tail, t_check);
578 s_check->transition_count--;
583 automaton_destroy_state (ss);
587 * Merge two states into one. Will merge 's1' and 's2' into 's1' and destroy 's2'.
591 * @param s1 first state
592 * @param s2 second state, will be destroyed
595 automaton_merge_states (struct GNUNET_REGEX_Context *ctx,
596 struct GNUNET_REGEX_Automaton *a,
597 struct GNUNET_REGEX_State *s1,
598 struct GNUNET_REGEX_State *s2)
600 struct GNUNET_REGEX_State *s_check;
601 struct Transition *t_check;
602 struct Transition *t;
605 GNUNET_assert (NULL != ctx && NULL != a && NULL != s1 && NULL != s2);
607 // 1. Make all transitions pointing to s2 point to s1
608 for (s_check = a->states_head; NULL != s_check; s_check = s_check->next)
610 for (t_check = s_check->transitions_head; NULL != t_check;
611 t_check = t_check->next)
613 if (s_check != s1 && s2 == t_check->to_state)
614 t_check->to_state = s1;
618 // 2. Add all transitions from s2 to sX to s1
619 for (t_check = s2->transitions_head; NULL != t_check; t_check = t_check->next)
621 for (t = s1->transitions_head; NULL != t; t = t->next)
623 if (t_check->literal != t->literal && NULL != t_check->to_state &&
624 t_check->to_state != t->to_state && t_check->to_state != s2)
626 state_add_transition (ctx, s1, t_check->literal, t_check->to_state);
631 // 3. Rename s1 to {s1,s2}
632 new_name = GNUNET_malloc (strlen (s1->name) + strlen (s2->name) + 1);
633 strncat (new_name, s1->name, strlen (s1->name));
634 strncat (new_name, s2->name, strlen (s2->name));
635 if (NULL != s1->name)
636 GNUNET_free (s1->name);
641 GNUNET_CONTAINER_DLL_remove (a->states_head, a->states_tail, s_check);
643 automaton_destroy_state (s_check);
647 * Add a state to the automaton 'a', always use this function to
648 * alter the states DLL of the automaton.
650 * @param a automaton to add the state to
651 * @param s state that should be added
654 automaton_add_state (struct GNUNET_REGEX_Automaton *a,
655 struct GNUNET_REGEX_State *s)
657 GNUNET_CONTAINER_DLL_insert (a->states_head, a->states_tail, s);
662 * Creates a new DFA state based on a set of NFA states. Needs to be freed
663 * using automaton_destroy_state.
666 * @param nfa_states set of NFA states on which the DFA should be based on
668 * @return new DFA state
670 static struct GNUNET_REGEX_State *
671 dfa_state_create (struct GNUNET_REGEX_Context *ctx,
672 struct GNUNET_REGEX_StateSet *nfa_states)
674 struct GNUNET_REGEX_State *s;
677 struct GNUNET_REGEX_State *cstate;
678 struct Transition *ctran;
680 struct Transition *t;
683 s = GNUNET_malloc (sizeof (struct GNUNET_REGEX_State));
684 s->id = ctx->state_id++;
693 if (NULL == nfa_states)
695 GNUNET_asprintf (&s->name, "s%i", s->id);
699 s->nfa_set = nfa_states;
701 if (nfa_states->len < 1)
704 // Create a name based on 'sset'
705 s->name = GNUNET_malloc (sizeof (char) * 2);
706 strcat (s->name, "{");
709 for (i = 0; i < nfa_states->len; i++)
711 cstate = nfa_states->states[i];
712 GNUNET_asprintf (&name, "%i,", cstate->id);
716 len = strlen (s->name) + strlen (name) + 1;
717 s->name = GNUNET_realloc (s->name, len);
718 strcat (s->name, name);
723 // Add a transition for each distinct literal to NULL state
724 for (ctran = cstate->transitions_head; NULL != ctran; ctran = ctran->next)
726 if (0 != ctran->literal)
730 for (t = s->transitions_head; NULL != t; t = t->next)
732 if (t->literal == ctran->literal)
740 state_add_transition (ctx, s, ctran->literal, NULL);
744 // If the nfa_states contain an accepting state, the new dfa state is also accepting
745 if (cstate->accepting)
749 s->name[strlen (s->name) - 1] = '}';
755 * Move from the given state 's' to the next state on
756 * transition 'literal'
758 * @param s starting state
759 * @param literal edge label to follow
761 * @return new state or NULL, if transition on literal not possible
763 static struct GNUNET_REGEX_State *
764 dfa_move (struct GNUNET_REGEX_State *s, const char literal)
766 struct Transition *t;
767 struct GNUNET_REGEX_State *new_s;
774 for (t = s->transitions_head; NULL != t; t = t->next)
776 if (literal == t->literal)
787 * Remove all unreachable states from DFA 'a'. Unreachable states
788 * are those states that are not reachable from the starting state.
790 * @param a DFA automaton
793 dfa_remove_unreachable_states (struct GNUNET_REGEX_Automaton *a)
795 struct GNUNET_REGEX_State *stack[a->state_count * a->state_count];
797 struct GNUNET_REGEX_State *s;
798 struct GNUNET_REGEX_State *s_next;
799 struct Transition *t;
803 // 1. unmark all states
804 for (s = a->states_head; NULL != s; s = s->next)
807 // 2. traverse dfa from start state and mark all visited states
808 stack[stack_len++] = a->start;
809 while (stack_len > 0)
811 s = stack[--stack_len];
812 s->marked = 1; // mark s as visited
813 for (t = s->transitions_head; NULL != t; t = t->next)
815 // add next states to stack
816 if (NULL != t->to_state && 0 == t->to_state->marked)
817 stack[stack_len++] = t->to_state;
821 // 3. delete all states that were not visited
822 for (s = a->states_head; NULL != s; s = s_next)
827 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Removed state %s\n", s->name);
828 automaton_remove_state (a, s);
834 * Remove all dead states from the DFA 'a'. Dead states are those
835 * states that do not transition to any other state but themselfes.
837 * @param a DFA automaton
840 dfa_remove_dead_states (struct GNUNET_REGEX_Automaton *a)
842 struct GNUNET_REGEX_State *s;
843 struct Transition *t;
846 GNUNET_assert (DFA == a->type);
848 for (s = a->states_head; NULL != s; s = s->next)
854 for (t = s->transitions_head; NULL != t; t = t->next)
856 if (NULL != t->to_state && t->to_state != s)
866 // state s is dead, remove it
867 automaton_remove_state (a, s);
872 * Merge all non distinguishable states in the DFA 'a'
875 * @param a DFA automaton
878 dfa_merge_nondistinguishable_states (struct GNUNET_REGEX_Context *ctx,
879 struct GNUNET_REGEX_Automaton *a)
882 int table[a->state_count][a->state_count];
883 struct GNUNET_REGEX_State *s1;
884 struct GNUNET_REGEX_State *s2;
885 struct Transition *t1;
886 struct Transition *t2;
890 for (i = 0, s1 = a->states_head; i < a->state_count && NULL != s1;
894 // Mark all pairs of accepting/!accepting states
895 for (s1 = a->states_head; NULL != s1; s1 = s1->next)
897 for (s2 = a->states_head; NULL != s2 && s1 != s2; s2 = s2->next)
899 if ((s1->accepting && !s2->accepting) ||
900 (!s1->accepting && s2->accepting))
902 table[s1->marked][s2->marked] = 1;
905 table[s1->marked][s2->marked] = 0;
912 for (s1 = a->states_head; NULL != s1; s1 = s1->next)
914 for (s2 = a->states_head; NULL != s2 && s1 != s2; s2 = s2->next)
916 if (0 != table[s1->marked][s2->marked])
919 for (t1 = s1->transitions_head; NULL != t1; t1 = t1->next)
921 for (t2 = s2->transitions_head; NULL != t2; t2 = t2->next)
923 if (t1->literal == t2->literal && t1->to_state == t2->to_state &&
924 (0 != table[t1->to_state->marked][t2->to_state->marked] ||
925 0 != table[t2->to_state->marked][t1->to_state->marked]))
927 table[s1->marked][s2->marked] = t1->literal;
930 else if (t1->literal != t2->literal && t1->to_state != t2->to_state)
932 table[s1->marked][s2->marked] = -1;
941 struct GNUNET_REGEX_State *s2_next;
943 for (i = 0, s1 = a->states_head; NULL != s1; s1 = s1->next)
945 for (s2 = a->states_head; NULL != s2 && s1 != s2; s2 = s2_next)
948 if (s1 != s2 && table[s1->marked][s2->marked] == 0)
949 automaton_merge_states (ctx, a, s1, s2);
955 * Minimize the given DFA 'a' by removing all unreachable states,
956 * removing all dead states and merging all non distinguishable states
959 * @param a DFA automaton
962 dfa_minimize (struct GNUNET_REGEX_Context *ctx,
963 struct GNUNET_REGEX_Automaton *a)
968 GNUNET_assert (DFA == a->type);
970 // 1. remove unreachable states
971 dfa_remove_unreachable_states (a);
973 // 2. remove dead states
974 dfa_remove_dead_states (a);
976 // 3. Merge nondistinguishable states
977 dfa_merge_nondistinguishable_states (ctx, a);
981 * Creates a new NFA fragment. Needs to be cleared using automaton_fragment_clear.
983 * @param start starting state
984 * @param end end state
986 * @return new NFA fragment
988 static struct GNUNET_REGEX_Automaton *
989 nfa_fragment_create (struct GNUNET_REGEX_State *start,
990 struct GNUNET_REGEX_State *end)
992 struct GNUNET_REGEX_Automaton *n;
994 n = GNUNET_malloc (sizeof (struct GNUNET_REGEX_Automaton));
1000 if (NULL == start && NULL == end)
1003 automaton_add_state (n, end);
1004 automaton_add_state (n, start);
1013 * Adds a list of states to the given automaton 'n'.
1015 * @param n automaton to which the states should be added
1016 * @param states_head head of the DLL of states
1017 * @param states_tail tail of the DLL of states
1020 nfa_add_states (struct GNUNET_REGEX_Automaton *n,
1021 struct GNUNET_REGEX_State *states_head,
1022 struct GNUNET_REGEX_State *states_tail)
1024 struct GNUNET_REGEX_State *s;
1026 if (NULL == n || NULL == states_head)
1028 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not add states\n");
1032 if (NULL == n->states_head)
1034 n->states_head = states_head;
1035 n->states_tail = states_tail;
1039 if (NULL != states_head)
1041 n->states_tail->next = states_head;
1042 n->states_tail = states_tail;
1045 for (s = states_head; NULL != s; s = s->next)
1050 * Creates a new NFA state. Needs to be freed using automaton_destroy_state.
1052 * @param ctx context
1053 * @param accepting is it an accepting state or not
1055 * @return new NFA state
1057 static struct GNUNET_REGEX_State *
1058 nfa_state_create (struct GNUNET_REGEX_Context *ctx, int accepting)
1060 struct GNUNET_REGEX_State *s;
1062 s = GNUNET_malloc (sizeof (struct GNUNET_REGEX_State));
1063 s->id = ctx->state_id++;
1064 s->accepting = accepting;
1071 GNUNET_asprintf (&s->name, "s%i", s->id);
1077 * Calculates the NFA closure set for the given state.
1079 * @param nfa the NFA containing 's'
1080 * @param s starting point state
1081 * @param literal transitioning literal on which to base the closure on,
1082 * pass 0 for epsilon transition
1084 * @return sorted nfa closure on 'literal' (epsilon closure if 'literal' is 0)
1086 static struct GNUNET_REGEX_StateSet *
1087 nfa_closure_create (struct GNUNET_REGEX_Automaton *nfa,
1088 struct GNUNET_REGEX_State *s, const char literal)
1090 struct GNUNET_REGEX_StateSet *cls;
1091 struct GNUNET_REGEX_StateSet *cls_check;
1092 struct GNUNET_REGEX_State *clsstate;
1093 struct GNUNET_REGEX_State *currentstate;
1094 struct Transition *ctran;
1099 cls = GNUNET_malloc (sizeof (struct GNUNET_REGEX_StateSet));
1100 cls_check = GNUNET_malloc (sizeof (struct GNUNET_REGEX_StateSet));
1102 for (clsstate = nfa->states_head; NULL != clsstate; clsstate = clsstate->next)
1103 clsstate->contained = 0;
1105 // Add start state to closure only for epsilon closure
1107 GNUNET_array_append (cls->states, cls->len, s);
1109 GNUNET_array_append (cls_check->states, cls_check->len, s);
1110 while (cls_check->len > 0)
1112 currentstate = cls_check->states[cls_check->len - 1];
1113 GNUNET_array_grow (cls_check->states, cls_check->len, cls_check->len - 1);
1115 for (ctran = currentstate->transitions_head; NULL != ctran;
1116 ctran = ctran->next)
1118 if (NULL != ctran->to_state && literal == ctran->literal)
1120 clsstate = ctran->to_state;
1122 if (NULL != clsstate && 0 == clsstate->contained)
1124 GNUNET_array_append (cls->states, cls->len, clsstate);
1125 GNUNET_array_append (cls_check->states, cls_check->len, clsstate);
1126 clsstate->contained = 1;
1131 GNUNET_assert (0 == cls_check->len);
1132 GNUNET_free (cls_check);
1135 qsort (cls->states, cls->len, sizeof (struct GNUNET_REGEX_State *),
1142 * Calculates the closure set for the given set of states.
1144 * @param nfa the NFA containing 's'
1145 * @param states list of states on which to base the closure on
1146 * @param literal transitioning literal for which to base the closure on,
1147 * pass 0 for epsilon transition
1149 * @return sorted nfa closure on 'literal' (epsilon closure if 'literal' is 0)
1151 static struct GNUNET_REGEX_StateSet *
1152 nfa_closure_set_create (struct GNUNET_REGEX_Automaton *nfa,
1153 struct GNUNET_REGEX_StateSet *states,
1156 struct GNUNET_REGEX_State *s;
1157 struct GNUNET_REGEX_StateSet *sset;
1158 struct GNUNET_REGEX_StateSet *cls;
1167 cls = GNUNET_malloc (sizeof (struct GNUNET_REGEX_StateSet));
1169 for (i = 0; i < states->len; i++)
1171 s = states->states[i];
1172 sset = nfa_closure_create (nfa, s, literal);
1174 for (j = 0; j < sset->len; j++)
1177 for (k = 0; k < cls->len; k++)
1179 if (sset->states[j]->id == cls->states[k]->id)
1186 GNUNET_array_append (cls->states, cls->len, sset->states[j]);
1188 state_set_clear (sset);
1192 qsort (cls->states, cls->len, sizeof (struct GNUNET_REGEX_State *),
1199 * Pops two NFA fragments (a, b) from the stack and concatenates them (ab)
1201 * @param ctx context
1204 nfa_add_concatenation (struct GNUNET_REGEX_Context *ctx)
1206 struct GNUNET_REGEX_Automaton *a;
1207 struct GNUNET_REGEX_Automaton *b;
1208 struct GNUNET_REGEX_Automaton *new;
1210 b = ctx->stack_tail;
1211 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, b);
1212 a = ctx->stack_tail;
1213 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
1215 state_add_transition (ctx, a->end, 0, b->start);
1216 a->end->accepting = 0;
1217 b->end->accepting = 1;
1219 new = nfa_fragment_create (NULL, NULL);
1220 nfa_add_states (new, a->states_head, a->states_tail);
1221 nfa_add_states (new, b->states_head, b->states_tail);
1222 new->start = a->start;
1224 automaton_fragment_clear (a);
1225 automaton_fragment_clear (b);
1227 GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new);
1231 * Pops a NFA fragment from the stack (a) and adds a new fragment (a*)
1233 * @param ctx context
1236 nfa_add_star_op (struct GNUNET_REGEX_Context *ctx)
1238 struct GNUNET_REGEX_Automaton *a;
1239 struct GNUNET_REGEX_Automaton *new;
1240 struct GNUNET_REGEX_State *start;
1241 struct GNUNET_REGEX_State *end;
1243 a = ctx->stack_tail;
1244 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
1248 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
1249 "nfa_add_star_op failed, because there was no element on the stack");
1253 start = nfa_state_create (ctx, 0);
1254 end = nfa_state_create (ctx, 1);
1256 state_add_transition (ctx, start, 0, a->start);
1257 state_add_transition (ctx, start, 0, end);
1258 state_add_transition (ctx, a->end, 0, a->start);
1259 state_add_transition (ctx, a->end, 0, end);
1261 a->end->accepting = 0;
1264 new = nfa_fragment_create (start, end);
1265 nfa_add_states (new, a->states_head, a->states_tail);
1266 automaton_fragment_clear (a);
1268 GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new);
1272 * Pops an NFA fragment (a) from the stack and adds a new fragment (a+)
1274 * @param ctx context
1277 nfa_add_plus_op (struct GNUNET_REGEX_Context *ctx)
1279 struct GNUNET_REGEX_Automaton *a;
1281 a = ctx->stack_tail;
1282 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
1284 state_add_transition (ctx, a->end, 0, a->start);
1286 GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, a);
1290 * Pops an NFA fragment (a) from the stack and adds a new fragment (a?)
1292 * @param ctx context
1295 nfa_add_question_op (struct GNUNET_REGEX_Context *ctx)
1297 struct GNUNET_REGEX_Automaton *a;
1298 struct GNUNET_REGEX_Automaton *new;
1299 struct GNUNET_REGEX_State *start;
1300 struct GNUNET_REGEX_State *end;
1302 a = ctx->stack_tail;
1303 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
1307 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
1308 "nfa_add_question_op failed, because there was no element on the stack");
1312 start = nfa_state_create (ctx, 0);
1313 end = nfa_state_create (ctx, 1);
1315 state_add_transition (ctx, start, 0, a->start);
1316 state_add_transition (ctx, start, 0, end);
1317 state_add_transition (ctx, a->end, 0, end);
1319 a->end->accepting = 0;
1321 new = nfa_fragment_create (start, end);
1322 nfa_add_states (new, a->states_head, a->states_tail);
1323 automaton_fragment_clear (a);
1325 GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new);
1329 * Pops two NFA fragments (a, b) from the stack and adds a new NFA fragment
1330 * that alternates between a and b (a|b)
1332 * @param ctx context
1335 nfa_add_alternation (struct GNUNET_REGEX_Context *ctx)
1337 struct GNUNET_REGEX_Automaton *a;
1338 struct GNUNET_REGEX_Automaton *b;
1339 struct GNUNET_REGEX_Automaton *new;
1340 struct GNUNET_REGEX_State *start;
1341 struct GNUNET_REGEX_State *end;
1343 b = ctx->stack_tail;
1344 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, b);
1345 a = ctx->stack_tail;
1346 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
1348 start = nfa_state_create (ctx, 0);
1349 end = nfa_state_create (ctx, 1);
1350 state_add_transition (ctx, start, 0, a->start);
1351 state_add_transition (ctx, start, 0, b->start);
1353 state_add_transition (ctx, a->end, 0, end);
1354 state_add_transition (ctx, b->end, 0, end);
1356 a->end->accepting = 0;
1357 b->end->accepting = 0;
1360 new = nfa_fragment_create (start, end);
1361 nfa_add_states (new, a->states_head, a->states_tail);
1362 nfa_add_states (new, b->states_head, b->states_tail);
1363 automaton_fragment_clear (a);
1364 automaton_fragment_clear (b);
1366 GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new);
1370 * Adds a new nfa fragment to the stack
1372 * @param ctx context
1373 * @param lit literal for nfa transition
1376 nfa_add_literal (struct GNUNET_REGEX_Context *ctx, const char lit)
1378 struct GNUNET_REGEX_Automaton *n;
1379 struct GNUNET_REGEX_State *start;
1380 struct GNUNET_REGEX_State *end;
1382 GNUNET_assert (NULL != ctx);
1384 start = nfa_state_create (ctx, 0);
1385 end = nfa_state_create (ctx, 1);
1386 state_add_transition (ctx, start, lit, end);
1387 n = nfa_fragment_create (start, end);
1388 GNUNET_assert (NULL != n);
1389 GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, n);
1393 * Initialize a new context
1395 * @param ctx context
1398 GNUNET_REGEX_context_init (struct GNUNET_REGEX_Context *ctx)
1402 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Context was NULL!");
1406 ctx->transition_id = 0;
1408 ctx->stack_head = NULL;
1409 ctx->stack_tail = NULL;
1413 * Construct an NFA by parsing the regex string of length 'len'.
1415 * @param regex regular expression string
1416 * @param len length of the string
1418 * @return NFA, needs to be freed using GNUNET_REGEX_destroy_automaton
1420 struct GNUNET_REGEX_Automaton *
1421 GNUNET_REGEX_construct_nfa (const char *regex, const size_t len)
1423 struct GNUNET_REGEX_Context ctx;
1424 struct GNUNET_REGEX_Automaton *nfa;
1428 unsigned int altcount;
1429 unsigned int atomcount;
1430 unsigned int pcount;
1437 GNUNET_REGEX_context_init (&ctx);
1446 for (count = 0; count < len && *regexp; count++, regexp++)
1454 nfa_add_concatenation (&ctx);
1456 GNUNET_array_grow (p, pcount, pcount + 1);
1457 p[pcount - 1].altcount = altcount;
1458 p[pcount - 1].atomcount = atomcount;
1465 error_msg = "Cannot append '|' to nothing";
1468 while (--atomcount > 0)
1469 nfa_add_concatenation (&ctx);
1475 error_msg = "Missing opening '('";
1480 // Ignore this: "()"
1482 altcount = p[pcount].altcount;
1483 atomcount = p[pcount].atomcount;
1486 while (--atomcount > 0)
1487 nfa_add_concatenation (&ctx);
1488 for (; altcount > 0; altcount--)
1489 nfa_add_alternation (&ctx);
1491 altcount = p[pcount].altcount;
1492 atomcount = p[pcount].atomcount;
1498 error_msg = "Cannot append '+' to nothing";
1501 nfa_add_star_op (&ctx);
1506 error_msg = "Cannot append '+' to nothing";
1509 nfa_add_plus_op (&ctx);
1514 error_msg = "Cannot append '?' to nothing";
1517 nfa_add_question_op (&ctx);
1519 case 92: /* escape: \ */
1526 nfa_add_concatenation (&ctx);
1528 nfa_add_literal (&ctx, *regexp);
1535 error_msg = "Unbalanced parenthesis";
1538 while (--atomcount > 0)
1539 nfa_add_concatenation (&ctx);
1540 for (; altcount > 0; altcount--)
1541 nfa_add_alternation (&ctx);
1546 nfa = ctx.stack_tail;
1547 GNUNET_CONTAINER_DLL_remove (ctx.stack_head, ctx.stack_tail, nfa);
1550 if (NULL != ctx.stack_head)
1552 error_msg = "Creating the NFA failed. NFA stack was not empty!";
1559 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not parse regex\n");
1560 if (NULL != error_msg)
1561 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "%s\n", error_msg);
1564 while (NULL != ctx.stack_tail)
1566 GNUNET_REGEX_automaton_destroy (ctx.stack_tail);
1567 GNUNET_CONTAINER_DLL_remove (ctx.stack_head, ctx.stack_tail,
1574 * Construct DFA for the given 'regex' of length 'len'
1576 * @param regex regular expression string
1577 * @param len length of the regular expression
1579 * @return DFA, needs to be freed using GNUNET_REGEX_destroy_automaton
1581 struct GNUNET_REGEX_Automaton *
1582 GNUNET_REGEX_construct_dfa (const char *regex, const size_t len)
1584 struct GNUNET_REGEX_Context ctx;
1585 struct GNUNET_REGEX_Automaton *dfa;
1586 struct GNUNET_REGEX_Automaton *nfa;
1587 struct GNUNET_REGEX_StateSet *tmp;
1588 struct GNUNET_REGEX_StateSet *nfa_set;
1589 struct GNUNET_REGEX_StateSet *dfa_stack;
1590 struct Transition *ctran;
1591 struct GNUNET_REGEX_State *dfa_state;
1592 struct GNUNET_REGEX_State *new_dfa_state;
1593 struct GNUNET_REGEX_State *state_contains;
1594 struct GNUNET_REGEX_State *state_iter;
1596 GNUNET_REGEX_context_init (&ctx);
1599 nfa = GNUNET_REGEX_construct_nfa (regex, len);
1603 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
1604 "Could not create DFA, because NFA creation failed\n");
1608 dfa = GNUNET_malloc (sizeof (struct GNUNET_REGEX_Automaton));
1611 // Create DFA start state from epsilon closure
1612 dfa_stack = GNUNET_malloc (sizeof (struct GNUNET_REGEX_StateSet));
1613 nfa_set = nfa_closure_create (nfa, nfa->start, 0);
1614 dfa->start = dfa_state_create (&ctx, nfa_set);
1615 automaton_add_state (dfa, dfa->start);
1616 GNUNET_array_append (dfa_stack->states, dfa_stack->len, dfa->start);
1618 // Create dfa states by combining nfa states
1619 while (dfa_stack->len > 0)
1621 dfa_state = dfa_stack->states[dfa_stack->len - 1];
1622 GNUNET_array_grow (dfa_stack->states, dfa_stack->len, dfa_stack->len - 1);
1624 for (ctran = dfa_state->transitions_head; NULL != ctran;
1625 ctran = ctran->next)
1627 if (0 != ctran->literal && NULL == ctran->to_state)
1629 tmp = nfa_closure_set_create (nfa, dfa_state->nfa_set, ctran->literal);
1630 nfa_set = nfa_closure_set_create (nfa, tmp, 0);
1631 state_set_clear (tmp);
1632 new_dfa_state = dfa_state_create (&ctx, nfa_set);
1633 state_contains = NULL;
1634 for (state_iter = dfa->states_head; NULL != state_iter;
1635 state_iter = state_iter->next)
1638 state_set_compare (state_iter->nfa_set, new_dfa_state->nfa_set))
1639 state_contains = state_iter;
1642 if (NULL == state_contains)
1644 automaton_add_state (dfa, new_dfa_state);
1645 GNUNET_array_append (dfa_stack->states, dfa_stack->len,
1647 ctran->to_state = new_dfa_state;
1651 ctran->to_state = state_contains;
1652 automaton_destroy_state (new_dfa_state);
1658 GNUNET_free (dfa_stack);
1659 GNUNET_REGEX_automaton_destroy (nfa);
1661 dfa_minimize (&ctx, dfa);
1662 scc_tarjan (&ctx, dfa);
1668 * Free the memory allocated by constructing the GNUNET_REGEX_Automaton
1671 * @param a automaton to be destroyed
1674 GNUNET_REGEX_automaton_destroy (struct GNUNET_REGEX_Automaton *a)
1676 struct GNUNET_REGEX_State *s;
1677 struct GNUNET_REGEX_State *next_state;
1682 for (s = a->states_head; NULL != s;)
1684 next_state = s->next;
1685 automaton_destroy_state (s);
1693 * Save the given automaton as a GraphViz dot file
1695 * @param a the automaton to be saved
1696 * @param filename where to save the file
1699 GNUNET_REGEX_automaton_save_graph (struct GNUNET_REGEX_Automaton *a,
1700 const char *filename)
1702 struct GNUNET_REGEX_State *s;
1703 struct Transition *ctran;
1705 char *s_tran = NULL;
1712 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not print NFA, was NULL!");
1716 if (NULL == filename || strlen (filename) < 1)
1718 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "No Filename given!");
1722 p = fopen (filename, "w");
1726 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not open file for writing: %s",
1731 start = "digraph G {\nrankdir=LR\n";
1732 fwrite (start, strlen (start), 1, p);
1734 for (s = a->states_head; NULL != s; s = s->next)
1738 GNUNET_asprintf (&s_acc,
1739 "\"%s\" [shape=doublecircle, color=\"0.%i 0.8 0.95\"];\n",
1740 s->name, s->scc_id);
1741 fwrite (s_acc, strlen (s_acc), 1, p);
1742 GNUNET_free (s_acc);
1746 GNUNET_asprintf (&s_acc, "\"%s\" [color=\"0.%i 0.8 0.95\"];\n", s->name,
1748 fwrite (s_acc, strlen (s_acc), 1, p);
1749 GNUNET_free (s_acc);
1754 for (ctran = s->transitions_head; NULL != ctran; ctran = ctran->next)
1756 if (NULL == ctran->to_state)
1758 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
1759 "Transition from State %i has has no state for transitioning\n",
1764 if (ctran->literal == 0)
1766 GNUNET_asprintf (&s_tran,
1767 "\"%s\" -> \"%s\" [label = \"epsilon\", color=\"0.%i 0.8 0.95\"];\n",
1768 s->name, ctran->to_state->name, s->scc_id);
1772 GNUNET_asprintf (&s_tran,
1773 "\"%s\" -> \"%s\" [label = \"%c\", color=\"0.%i 0.8 0.95\"];\n",
1774 s->name, ctran->to_state->name, ctran->literal,
1778 fwrite (s_tran, strlen (s_tran), 1, p);
1779 GNUNET_free (s_tran);
1784 fwrite (end, strlen (end), 1, p);
1789 * Evaluates the given string using the given DFA automaton
1791 * @param a automaton, type must be DFA
1792 * @param string string that should be evaluated
1794 * @return 0 if string matches, non 0 otherwise
1797 evaluate_dfa (struct GNUNET_REGEX_Automaton *a, const char *string)
1800 struct GNUNET_REGEX_State *s;
1804 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
1805 "Tried to evaluate DFA, but NFA automaton given");
1811 for (strp = string; NULL != strp && *strp; strp++)
1813 s = dfa_move (s, *strp);
1818 if (NULL != s && s->accepting)
1825 * Evaluates the given string using the given NFA automaton
1827 * @param a automaton, type must be NFA
1828 * @param string string that should be evaluated
1830 * @return 0 if string matches, non 0 otherwise
1833 evaluate_nfa (struct GNUNET_REGEX_Automaton *a, const char *string)
1836 struct GNUNET_REGEX_State *s;
1837 struct GNUNET_REGEX_StateSet *sset;
1838 struct GNUNET_REGEX_StateSet *new_sset;
1844 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
1845 "Tried to evaluate NFA, but DFA automaton given");
1851 sset = nfa_closure_create (a, a->start, 0);
1853 for (strp = string; NULL != strp && *strp; strp++)
1855 new_sset = nfa_closure_set_create (a, sset, *strp);
1856 state_set_clear (sset);
1857 sset = nfa_closure_set_create (a, new_sset, 0);
1858 state_set_clear (new_sset);
1861 for (i = 0; i < sset->len; i++)
1863 s = sset->states[i];
1864 if (NULL != s && s->accepting)
1871 state_set_clear (sset);
1876 * Evaluates the given 'string' against the given compiled regex
1878 * @param a automaton
1879 * @param string string to check
1881 * @return 0 if string matches, non 0 otherwise
1884 GNUNET_REGEX_eval (struct GNUNET_REGEX_Automaton *a, const char *string)
1891 result = evaluate_dfa (a, string);
1894 result = evaluate_nfa (a, string);
1897 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
1898 "Evaluating regex failed, automaton has no type!\n");
1899 result = GNUNET_SYSERR;
1907 * Get the starting state of the given automaton 'a'.
1909 * @param a automaton.
1911 * @return starting state.
1913 struct GNUNET_REGEX_State *
1914 GNUNET_REGEX_automaton_get_start (struct GNUNET_REGEX_Automaton *a)
1920 * Get the next states, starting from states 's'.
1922 * @param a automaton.
1924 * @param count number of states given in 's'. Will contain number of
1925 * states that were returned upon return.
1927 * @return next states, 'count' will contain the number of states.
1929 struct GNUNET_REGEX_State **
1930 GNUNET_REGEX_automaton_states_get_next (struct GNUNET_REGEX_Automaton *a,
1931 struct GNUNET_REGEX_State **s,
1932 unsigned int *count)
1934 struct GNUNET_REGEX_State *states[a->state_count];
1935 struct GNUNET_REGEX_State *state;
1936 struct Transition *t;
1941 for (cnt = 0, i = 0; i < *count; i++)
1944 for (t = state->transitions_head; NULL != t; t = t->next)
1946 if (NULL != t && NULL != t->to_state)
1948 states[cnt++] = t->to_state;
1959 * Hash a set of states.
1961 * @param a automaton.
1963 * @param count number of states.
1967 struct GNUNET_HashCode
1968 GNUNET_REGEX_automaton_states_hash (struct GNUNET_REGEX_Automaton *a,
1969 struct GNUNET_REGEX_State **s,
1972 struct GNUNET_HashCode hash;