2 This file is part of GNUnet
3 (C) 2012 Christian Grothoff (and other contributing authors)
5 GNUnet is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 3, or (at your
8 option) any later version.
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with GNUnet; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
21 * @file src/regex/regex.c
22 * @brief library to create automatons from regular expressions
23 * @author Maximilian Szengel
26 #include "gnunet_container_lib.h"
27 #include "gnunet_regex_lib.h"
31 * Context that contains an id counter for states and transitions
32 * as well as a DLL of automatons used as a stack for NFA construction.
34 struct GNUNET_REGEX_Context
39 unsigned int state_id;
42 * Unique transition id.
44 unsigned int transition_id;
47 * DLL of GNUNET_REGEX_Automaton's used as a stack.
49 struct GNUNET_REGEX_Automaton *stack_head;
52 * DLL of GNUNET_REGEX_Automaton's used as a stack.
54 struct GNUNET_REGEX_Automaton *stack_tail;
58 * Type of an automaton.
60 enum GNUNET_REGEX_automaton_type
67 * Automaton representation.
69 struct GNUNET_REGEX_Automaton
72 * This is a linked list.
74 struct GNUNET_REGEX_Automaton *prev;
77 * This is a linked list.
79 struct GNUNET_REGEX_Automaton *next;
82 * First state of the automaton. This is mainly
83 * used for constructing an NFA, where each NFA
84 * itself consists of one or more NFAs linked
90 * End state of the automaton.
95 * Number of states in the automaton.
97 unsigned int state_count;
102 struct State *states_head;
107 struct State *states_tail;
110 * Type of the automaton.
112 enum GNUNET_REGEX_automaton_type type;
116 * A state. Can be used in DFA and NFA automatons.
121 * This is a linked list.
126 * This is a linked list.
136 * If this is an accepting state or not.
141 * Marking of the state. This is used for marking all visited
142 * states when traversing all states of an automaton and for
143 * cases where the state id cannot be used (dfa minimization).
148 * Human readable name of the automaton. Used for debugging
149 * and graph creation.
154 * Number of transitions from this state to other states.
156 unsigned int transition_count;
159 * DLL of transitions.
161 struct Transition *transitions_head;
164 * DLL of transitions.
166 struct Transition *transitions_tail;
169 * Set of states on which this state is based on. Used when
170 * creating a DFA out of several NFA states.
172 struct StateSet *nfa_set;
176 * Transition between two states. Each state can have 0-n transitions.
177 * If literal is 0, this is considered to be an epsilon transition.
182 * This is a linked list.
184 struct Transition *prev;
187 * This is a linked list.
189 struct Transition *next;
192 * Unique id of this transition.
197 * Literal for this transition. This is basically the edge label for
203 * State to which this transition leads.
216 struct State **states;
219 * Length of the 'states' array.
225 * Initialize a new context
230 GNUNET_REGEX_context_init (struct GNUNET_REGEX_Context *ctx)
234 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Context was NULL!");
238 ctx->transition_id = 0;
239 ctx->stack_head = NULL;
240 ctx->stack_tail = NULL;
244 debug_print_state (struct State *s)
246 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
247 "State %i: %s marked: %i accepting: %i\n", s->id, s->name,
248 s->marked, s->accepting);
252 debug_print_states (struct StateSet *sset)
257 for (i = 0; i < sset->len; i++)
260 debug_print_state (s);
265 debug_print_transitions (struct State *s)
267 struct Transition *t;
271 for (t = s->transitions_head; NULL != t; t = t->next)
276 literal = t->literal;
278 if (NULL == t->state)
281 state = t->state->name;
283 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Transition %i: On %c to %s\n", t->id,
289 * Compare two states. Used for sorting.
291 * @param a first state
292 * @param b second state
294 * @return an integer less than, equal to, or greater than zero
295 * if the first argument is considered to be respectively
296 * less than, equal to, or greater than the second.
299 state_compare (const void *a, const void *b)
304 s1 = (struct State **) a;
305 s2 = (struct State **) b;
307 return (*s1)->id - (*s2)->id;
311 * Compare to state sets by comparing the id's of the states that are
312 * contained in each set. Both sets are expected to be sorted by id!
314 * @param sset1 first state set
315 * @param sset2 second state set
317 * @return an integer less than, equal to, or greater than zero
318 * if the first argument is considered to be respectively
319 * less than, equal to, or greater than the second.
322 state_set_compare (struct StateSet *sset1, struct StateSet *sset2)
327 if (NULL == sset1 || NULL == sset2)
330 result = sset1->len - sset2->len;
332 for (i = 0; i < sset1->len; i++)
337 result = state_compare (&sset1->states[i], &sset2->states[i]);
343 * Checks if 'elem' is contained in 'set'
345 * @param set set of states
348 * @return GNUNET_YES if 'set' contains 'elem, GNUNET_NO otherwise
351 state_set_contains (struct StateSet *set, struct State *elem)
356 for (i = 0; i < set->len; i++)
359 if (0 == memcmp (s, elem, sizeof (struct State)))
366 * Clears the given StateSet 'set'
368 * @param set set to be cleared
371 state_set_clear (struct StateSet *set)
375 if (NULL != set->states)
376 GNUNET_free (set->states);
382 * Adds a transition from one state to another on 'literal'
385 * @param from_state starting state for the transition
386 * @param literal transition label
387 * @param to_state state to where the transition should point to
390 add_transition (struct GNUNET_REGEX_Context *ctx, struct State *from_state,
391 const char literal, struct State *to_state)
393 struct Transition *t;
395 if (NULL == from_state)
397 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not create Transition.\n");
401 t = GNUNET_malloc (sizeof (struct Transition));
403 t->id = ctx->transition_id++;
404 t->literal = literal;
407 GNUNET_CONTAINER_DLL_insert (from_state->transitions_head,
408 from_state->transitions_tail, t);
412 * Clears an automaton fragment. Does not destroy the states inside
415 * @param a automaton to be cleared
418 automaton_fragment_clear (struct GNUNET_REGEX_Automaton *a)
425 a->states_head = NULL;
426 a->states_tail = NULL;
432 * Frees the memory used by State 's'
434 * @param s state that should be destroyed
437 automaton_destroy_state (struct State *s)
439 struct Transition *t;
440 struct Transition *next_t;
446 GNUNET_free (s->name);
448 for (t = s->transitions_head; NULL != t;)
451 GNUNET_CONTAINER_DLL_remove (s->transitions_head, s->transitions_tail, t);
456 state_set_clear (s->nfa_set);
462 * Remove a state from the given automaton 'a'. Always use this function
463 * when altering the states of an automaton. Will also remove all transitions
464 * leading to this state, before destroying it.
467 * @param s state to remove
470 automaton_remove_state (struct GNUNET_REGEX_Automaton *a, struct State *s)
473 struct State *s_check;
474 struct Transition *t_check;
476 if (NULL == a || NULL == s)
481 GNUNET_CONTAINER_DLL_remove (a->states_head, a->states_tail, s);
484 // remove all transitions leading to this state
485 for (s_check = a->states_head; NULL != s_check; s_check = s_check->next)
487 for (t_check = s_check->transitions_head; NULL != t_check;
488 t_check = t_check->next)
490 if (t_check->state == ss)
492 GNUNET_CONTAINER_DLL_remove (s_check->transitions_head,
493 s_check->transitions_tail, t_check);
494 s_check->transition_count--;
499 automaton_destroy_state (ss);
503 * Merge two states into one. Will merge 's1' and 's2' into 's1' and destroy 's2'.
507 * @param s1 first state
508 * @param s2 second state, will be destroyed
511 automaton_merge_states (struct GNUNET_REGEX_Context *ctx,
512 struct GNUNET_REGEX_Automaton *a, struct State *s1,
515 struct State *s_check;
516 struct Transition *t_check;
517 struct Transition *t;
520 GNUNET_assert (NULL != ctx && NULL != a && NULL != s1 && NULL != s2);
522 // 1. Make all transitions pointing to s2 point to s1
523 for (s_check = a->states_head; NULL != s_check; s_check = s_check->next)
525 for (t_check = s_check->transitions_head; NULL != t_check;
526 t_check = t_check->next)
528 if (s_check != s1 && s2 == t_check->state)
533 // 2. Add all transitions from s2 to sX to s1
534 for (t_check = s2->transitions_head; NULL != t_check; t_check = t_check->next)
536 for (t = s1->transitions_head; NULL != t; t = t->next)
538 if (t_check->literal != t->literal && NULL != t_check->state &&
539 t_check->state != t->state && t_check->state != s2)
541 add_transition (ctx, s1, t_check->literal, t_check->state);
546 // 3. Rename s1 to {s1,s2}
547 new_name = GNUNET_malloc (strlen (s1->name) + strlen (s2->name) + 1);
548 strncat (new_name, s1->name, strlen (s1->name));
549 strncat (new_name, s2->name, strlen (s2->name));
550 if (NULL != s1->name)
551 GNUNET_free (s1->name);
556 GNUNET_CONTAINER_DLL_remove (a->states_head, a->states_tail, s_check);
558 automaton_destroy_state (s_check);
562 * Add a state to the automaton 'a', always use this function to
563 * alter the states DLL of the automaton.
565 * @param a automaton to add the state to
566 * @param s state that should be added
569 automaton_add_state (struct GNUNET_REGEX_Automaton *a, struct State *s)
571 GNUNET_CONTAINER_DLL_insert (a->states_head, a->states_tail, s);
576 * Creates a new DFA state based on a set of NFA states. Needs to be freed
577 * using automaton_destroy_state.
580 * @param nfa_states set of NFA states on which the DFA should be based on
582 * @return new DFA state
584 static struct State *
585 dfa_state_create (struct GNUNET_REGEX_Context *ctx, struct StateSet *nfa_states)
590 struct State *cstate;
591 struct Transition *ctran;
593 struct Transition *t;
596 s = GNUNET_malloc (sizeof (struct State));
597 s->id = ctx->state_id++;
602 if (NULL == nfa_states)
604 GNUNET_asprintf (&s->name, "s%i", s->id);
608 s->nfa_set = nfa_states;
610 if (nfa_states->len < 1)
613 // Create a name based on 'sset'
614 s->name = GNUNET_malloc (sizeof (char) * 2);
615 strcat (s->name, "{");
618 for (i = 0; i < nfa_states->len; i++)
620 cstate = nfa_states->states[i];
621 GNUNET_asprintf (&name, "%i,", cstate->id);
625 len = strlen (s->name) + strlen (name) + 1;
626 s->name = GNUNET_realloc (s->name, len);
627 strcat (s->name, name);
632 // Add a transition for each distinct literal to NULL state
633 for (ctran = cstate->transitions_head; NULL != ctran; ctran = ctran->next)
635 if (0 != ctran->literal)
639 for (t = s->transitions_head; NULL != t; t = t->next)
641 if (t->literal == ctran->literal)
649 add_transition (ctx, s, ctran->literal, NULL);
653 // If the nfa_states contain an accepting state, the new dfa state is also accepting
654 if (cstate->accepting)
658 s->name[strlen (s->name) - 1] = '}';
664 * Move from the given state 's' to the next state on
665 * transition 'literal'
667 * @param s starting state
668 * @param literal edge label to follow
670 * @return new state or NULL, if transition on literal not possible
672 static struct State *
673 dfa_move (struct State *s, const char literal)
675 struct Transition *t;
683 for (t = s->transitions_head; NULL != t; t = t->next)
685 if (literal == t->literal)
696 * Remove all unreachable states from DFA 'a'. Unreachable states
697 * are those states that are not reachable from the starting state.
699 * @param a DFA automaton
702 dfa_remove_unreachable_states (struct GNUNET_REGEX_Automaton *a)
704 struct State *stack[a->state_count];
707 struct Transition *t;
711 // 1. unmark all states
712 for (s = a->states_head; NULL != s; s = s->next)
717 // 2. traverse dfa from start state and mark all visited states
718 stack[stack_len] = a->start;
720 while (stack_len > 0)
722 s = stack[stack_len - 1];
724 s->marked = 1; // mark s as visited
725 for (t = s->transitions_head; NULL != t; t = t->next)
727 // add next states to stack
728 if (NULL != t->state && 0 == t->state->marked)
729 stack[++stack_len] = t->state;
733 // 3. delete all states that were not visited
734 for (s = a->states_head; NULL != s; s = s->next)
737 automaton_remove_state (a, s);
742 * Remove all dead states from the DFA 'a'. Dead states are those
743 * states that do not transition to any other state but themselfes.
745 * @param a DFA automaton
748 dfa_remove_dead_states (struct GNUNET_REGEX_Automaton *a)
751 struct Transition *t;
754 GNUNET_assert (DFA == a->type);
756 for (s = a->states_head; NULL != s; s = s->next)
762 for (t = s->transitions_head; NULL != t; t = t->next)
764 if (NULL != t->state && t->state != s)
774 // state s is dead, remove it
775 automaton_remove_state (a, s);
780 * Merge all non distinguishable states in the DFA 'a'
783 * @param a DFA automaton
786 dfa_merge_nondistinguishable_states (struct GNUNET_REGEX_Context *ctx,
787 struct GNUNET_REGEX_Automaton *a)
790 int table[a->state_count][a->state_count];
793 struct Transition *t1;
794 struct Transition *t2;
798 for (i = 0, s1 = a->states_head; i < a->state_count && NULL != s1;
802 // Mark all pairs of accepting/!accepting states
803 for (s1 = a->states_head; NULL != s1; s1 = s1->next)
805 for (s2 = a->states_head; NULL != s2 && s1 != s2; s2 = s2->next)
807 if ((s1->accepting && !s2->accepting) ||
808 (!s1->accepting && s2->accepting))
810 table[s1->marked][s2->marked] = 1;
813 table[s1->marked][s2->marked] = 0;
820 for (s1 = a->states_head; NULL != s1; s1 = s1->next)
822 for (s2 = a->states_head; NULL != s2 && s1 != s2; s2 = s2->next)
824 if (0 != table[s1->marked][s2->marked])
827 for (t1 = s1->transitions_head; NULL != t1; t1 = t1->next)
829 for (t2 = s2->transitions_head; NULL != t2; t2 = t2->next)
831 if (t1->literal == t2->literal && t1->state == t2->state &&
832 (0 != table[t1->state->marked][t2->state->marked] ||
833 0 != table[t2->state->marked][t1->state->marked]))
835 table[s1->marked][s2->marked] = t1->literal;
838 else if (t1->literal != t2->literal && t1->state != t2->state)
840 table[s1->marked][s2->marked] = -1;
849 struct State *s2_next;
851 for (i = 0, s1 = a->states_head; NULL != s1; s1 = s1->next)
853 for (s2 = a->states_head; NULL != s2 && s1 != s2; s2 = s2_next)
856 if (s1 != s2 && table[s1->marked][s2->marked] == 0)
857 automaton_merge_states (ctx, a, s1, s2);
863 * Minimize the given DFA 'a' by removing all unreachable states,
864 * removing all dead states and merging all non distinguishable states
867 * @param a DFA automaton
870 dfa_minimize (struct GNUNET_REGEX_Context *ctx,
871 struct GNUNET_REGEX_Automaton *a)
876 GNUNET_assert (DFA == a->type);
878 // 1. remove unreachable states
879 dfa_remove_unreachable_states (a);
881 // 2. remove dead states
882 dfa_remove_dead_states (a);
884 // 3. Merge nondistinguishable states
885 dfa_merge_nondistinguishable_states (ctx, a);
889 * Creates a new NFA fragment. Needs to be cleared using automaton_fragment_clear.
891 * @param start starting state
892 * @param end end state
894 * @return new NFA fragment
896 static struct GNUNET_REGEX_Automaton *
897 nfa_fragment_create (struct State *start, struct State *end)
899 struct GNUNET_REGEX_Automaton *n;
901 n = GNUNET_malloc (sizeof (struct GNUNET_REGEX_Automaton));
907 if (NULL == start && NULL == end)
910 automaton_add_state (n, end);
911 automaton_add_state (n, start);
920 * Adds a list of states to the given automaton 'n'.
922 * @param n automaton to which the states should be added
923 * @param states_head head of the DLL of states
924 * @param states_tail tail of the DLL of states
927 nfa_add_states (struct GNUNET_REGEX_Automaton *n, struct State *states_head,
928 struct State *states_tail)
932 if (NULL == n || NULL == states_head)
934 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not add states\n");
938 if (NULL == n->states_head)
940 n->states_head = states_head;
941 n->states_tail = states_tail;
945 if (NULL != states_head)
947 n->states_tail->next = states_head;
948 n->states_tail = states_tail;
951 for (s = states_head; NULL != s; s = s->next)
956 * Creates a new NFA state. Needs to be freed using automaton_destroy_state.
959 * @param accepting is it an accepting state or not
961 * @return new NFA state
963 static struct State *
964 nfa_state_create (struct GNUNET_REGEX_Context *ctx, int accepting)
968 s = GNUNET_malloc (sizeof (struct State));
969 s->id = ctx->state_id++;
970 s->accepting = accepting;
973 GNUNET_asprintf (&s->name, "s%i", s->id);
979 * Calculates the NFA closure set for the given state.
981 * @param s starting point state
982 * @param literal transitioning literal on which to base the closure on,
983 * pass 0 for epsilon transition
985 * @return sorted nfa closure on 'literal' (epsilon closure if 'literal' is 0)
987 static struct StateSet *
988 nfa_closure_create (struct State *s, const char literal)
990 struct StateSet *cls;
991 struct StateSet *cls_check;
992 struct State *clsstate;
993 struct State *currentstate;
994 struct Transition *ctran;
999 cls = GNUNET_malloc (sizeof (struct StateSet));
1000 cls_check = GNUNET_malloc (sizeof (struct StateSet));
1002 // Add start state to closure only for epsilon closure
1004 GNUNET_array_append (cls->states, cls->len, s);
1006 GNUNET_array_append (cls_check->states, cls_check->len, s);
1007 while (cls_check->len > 0)
1009 currentstate = cls_check->states[cls_check->len - 1];
1010 GNUNET_array_grow (cls_check->states, cls_check->len, cls_check->len - 1);
1012 for (ctran = currentstate->transitions_head; NULL != ctran;
1013 ctran = ctran->next)
1015 if (NULL != ctran->state && literal == ctran->literal)
1017 clsstate = ctran->state;
1019 if (NULL != clsstate &&
1020 GNUNET_YES != state_set_contains (cls, clsstate))
1022 GNUNET_array_append (cls->states, cls->len, clsstate);
1023 GNUNET_array_append (cls_check->states, cls_check->len, clsstate);
1028 GNUNET_assert (0 == cls_check->len);
1029 GNUNET_free (cls_check);
1032 qsort (cls->states, cls->len, sizeof (struct State *), state_compare);
1038 * Calculates the closure set for the given set of states.
1040 * @param states list of states on which to base the closure on
1041 * @param literal transitioning literal for which to base the closure on,
1042 * pass 0 for epsilon transition
1044 * @return sorted nfa closure on 'literal' (epsilon closure if 'literal' is 0)
1046 static struct StateSet *
1047 nfa_closure_set_create (struct StateSet *states, const char literal)
1050 struct StateSet *sset;
1051 struct StateSet *cls;
1060 cls = GNUNET_malloc (sizeof (struct StateSet));
1062 for (i = 0; i < states->len; i++)
1064 s = states->states[i];
1065 sset = nfa_closure_create (s, literal);
1067 for (j = 0; j < sset->len; j++)
1070 for (k = 0; k < cls->len; k++)
1072 if (sset->states[j]->id == cls->states[k]->id)
1079 GNUNET_array_append (cls->states, cls->len, sset->states[j]);
1081 state_set_clear (sset);
1085 qsort (cls->states, cls->len, sizeof (struct State *), state_compare);
1091 * Pops two NFA fragments (a, b) from the stack and concatenates them (ab)
1093 * @param ctx context
1096 nfa_add_concatenation (struct GNUNET_REGEX_Context *ctx)
1098 struct GNUNET_REGEX_Automaton *a;
1099 struct GNUNET_REGEX_Automaton *b;
1100 struct GNUNET_REGEX_Automaton *new;
1102 b = ctx->stack_tail;
1103 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, b);
1104 a = ctx->stack_tail;
1105 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
1107 add_transition (ctx, a->end, 0, b->start);
1108 a->end->accepting = 0;
1109 b->end->accepting = 1;
1111 new = nfa_fragment_create (NULL, NULL);
1112 nfa_add_states (new, a->states_head, a->states_tail);
1113 nfa_add_states (new, b->states_head, b->states_tail);
1114 new->start = a->start;
1116 automaton_fragment_clear (a);
1117 automaton_fragment_clear (b);
1119 GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new);
1123 * Pops a NFA fragment from the stack (a) and adds a new fragment (a*)
1125 * @param ctx context
1128 nfa_add_star_op (struct GNUNET_REGEX_Context *ctx)
1130 struct GNUNET_REGEX_Automaton *a;
1131 struct GNUNET_REGEX_Automaton *new;
1132 struct State *start;
1135 a = ctx->stack_tail;
1136 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
1140 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
1141 "nfa_add_star_op failed, because there was no element on the stack");
1145 start = nfa_state_create (ctx, 0);
1146 end = nfa_state_create (ctx, 1);
1148 add_transition (ctx, start, 0, a->start);
1149 add_transition (ctx, start, 0, end);
1150 add_transition (ctx, a->end, 0, a->start);
1151 add_transition (ctx, a->end, 0, end);
1153 a->end->accepting = 0;
1156 new = nfa_fragment_create (start, end);
1157 nfa_add_states (new, a->states_head, a->states_tail);
1158 automaton_fragment_clear (a);
1160 GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new);
1164 * Pops an NFA fragment (a) from the stack and adds a new fragment (a+)
1166 * @param ctx context
1169 nfa_add_plus_op (struct GNUNET_REGEX_Context *ctx)
1171 struct GNUNET_REGEX_Automaton *a;
1173 a = ctx->stack_tail;
1174 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
1176 add_transition (ctx, a->end, 0, a->start);
1178 GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, a);
1182 * Pops an NFA fragment (a) from the stack and adds a new fragment (a?)
1184 * @param ctx context
1187 nfa_add_question_op (struct GNUNET_REGEX_Context *ctx)
1189 struct GNUNET_REGEX_Automaton *a;
1190 struct GNUNET_REGEX_Automaton *new;
1191 struct State *start;
1194 a = ctx->stack_tail;
1195 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
1199 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
1200 "nfa_add_question_op failed, because there was no element on the stack");
1204 start = nfa_state_create (ctx, 0);
1205 end = nfa_state_create (ctx, 1);
1207 add_transition (ctx, start, 0, a->start);
1208 add_transition (ctx, start, 0, end);
1209 add_transition (ctx, a->end, 0, end);
1211 a->end->accepting = 0;
1213 new = nfa_fragment_create (start, end);
1214 nfa_add_states (new, a->states_head, a->states_tail);
1215 automaton_fragment_clear (a);
1217 GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new);
1221 * Pops two NFA fragments (a, b) from the stack and adds a new NFA fragment
1222 * that alternates between a and b (a|b)
1224 * @param ctx context
1227 nfa_add_alternation (struct GNUNET_REGEX_Context *ctx)
1229 struct GNUNET_REGEX_Automaton *a;
1230 struct GNUNET_REGEX_Automaton *b;
1231 struct GNUNET_REGEX_Automaton *new;
1232 struct State *start;
1235 b = ctx->stack_tail;
1236 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, b);
1237 a = ctx->stack_tail;
1238 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
1240 start = nfa_state_create (ctx, 0);
1241 end = nfa_state_create (ctx, 1);
1242 add_transition (ctx, start, 0, a->start);
1243 add_transition (ctx, start, 0, b->start);
1245 add_transition (ctx, a->end, 0, end);
1246 add_transition (ctx, b->end, 0, end);
1248 a->end->accepting = 0;
1249 b->end->accepting = 0;
1252 new = nfa_fragment_create (start, end);
1253 nfa_add_states (new, a->states_head, a->states_tail);
1254 nfa_add_states (new, b->states_head, b->states_tail);
1255 automaton_fragment_clear (a);
1256 automaton_fragment_clear (b);
1258 GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new);
1262 * Adds a new nfa fragment to the stack
1264 * @param ctx context
1265 * @param lit literal for nfa transition
1268 nfa_add_literal (struct GNUNET_REGEX_Context *ctx, const char lit)
1270 struct GNUNET_REGEX_Automaton *n;
1271 struct State *start;
1274 GNUNET_assert (NULL != ctx);
1276 start = nfa_state_create (ctx, 0);
1277 end = nfa_state_create (ctx, 1);
1278 add_transition (ctx, start, lit, end);
1279 n = nfa_fragment_create (start, end);
1280 GNUNET_assert (NULL != n);
1281 GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, n);
1285 * Construct an NFA by parsing the regex string of length 'len'.
1287 * @param regex regular expression string
1288 * @param len length of the string
1290 * @return NFA, needs to be freed using GNUNET_REGEX_destroy_automaton
1292 struct GNUNET_REGEX_Automaton *
1293 GNUNET_REGEX_construct_nfa (const char *regex, const size_t len)
1295 struct GNUNET_REGEX_Context ctx;
1296 struct GNUNET_REGEX_Automaton *nfa;
1300 unsigned int altcount;
1301 unsigned int atomcount;
1302 unsigned int pcount;
1309 GNUNET_REGEX_context_init (&ctx);
1318 for (count = 0; count < len && *regexp; count++, regexp++)
1326 nfa_add_concatenation (&ctx);
1328 GNUNET_array_grow (p, pcount, pcount + 1);
1329 p[pcount - 1].altcount = altcount;
1330 p[pcount - 1].atomcount = atomcount;
1337 error_msg = "Cannot append '|' to nothing";
1340 while (--atomcount > 0)
1341 nfa_add_concatenation (&ctx);
1347 error_msg = "Missing opening '('";
1352 // Ignore this: "()"
1354 altcount = p[pcount].altcount;
1355 atomcount = p[pcount].atomcount;
1358 while (--atomcount > 0)
1359 nfa_add_concatenation (&ctx);
1360 for (; altcount > 0; altcount--)
1361 nfa_add_alternation (&ctx);
1363 altcount = p[pcount].altcount;
1364 atomcount = p[pcount].atomcount;
1370 error_msg = "Cannot append '+' to nothing";
1373 nfa_add_star_op (&ctx);
1378 error_msg = "Cannot append '+' to nothing";
1381 nfa_add_plus_op (&ctx);
1386 error_msg = "Cannot append '?' to nothing";
1389 nfa_add_question_op (&ctx);
1391 case 92: /* escape: \ */
1398 nfa_add_concatenation (&ctx);
1400 nfa_add_literal (&ctx, *regexp);
1407 error_msg = "Unbalanced parenthesis";
1410 while (--atomcount > 0)
1411 nfa_add_concatenation (&ctx);
1412 for (; altcount > 0; altcount--)
1413 nfa_add_alternation (&ctx);
1418 nfa = ctx.stack_tail;
1419 GNUNET_CONTAINER_DLL_remove (ctx.stack_head, ctx.stack_tail, nfa);
1422 if (NULL != ctx.stack_head)
1424 error_msg = "Creating the NFA failed. NFA stack was not empty!";
1431 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not parse regex\n");
1432 if (NULL != error_msg)
1433 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "%s\n", error_msg);
1436 while (NULL != ctx.stack_tail)
1438 GNUNET_REGEX_automaton_destroy (ctx.stack_tail);
1439 GNUNET_CONTAINER_DLL_remove (ctx.stack_head, ctx.stack_tail,
1446 * Free the memory allocated by constructing the GNUNET_REGEX_Automaton
1449 * @param a automaton to be destroyed
1452 GNUNET_REGEX_automaton_destroy (struct GNUNET_REGEX_Automaton *a)
1455 struct State *next_state;
1460 for (s = a->states_head; NULL != s;)
1462 next_state = s->next;
1463 automaton_destroy_state (s);
1471 * Construct DFA for the given 'regex' of length 'len'
1473 * @param regex regular expression string
1474 * @param len length of the regular expression
1476 * @return DFA, needs to be freed using GNUNET_REGEX_destroy_automaton
1478 struct GNUNET_REGEX_Automaton *
1479 GNUNET_REGEX_construct_dfa (const char *regex, const size_t len)
1481 struct GNUNET_REGEX_Context ctx;
1482 struct GNUNET_REGEX_Automaton *dfa;
1483 struct GNUNET_REGEX_Automaton *nfa;
1484 struct StateSet *tmp;
1485 struct StateSet *nfa_set;
1486 struct StateSet *dfa_stack;
1487 struct Transition *ctran;
1488 struct State *dfa_state;
1489 struct State *new_dfa_state;
1490 struct State *state_contains;
1491 struct State *state_iter;
1493 GNUNET_REGEX_context_init (&ctx);
1496 nfa = GNUNET_REGEX_construct_nfa (regex, len);
1500 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
1501 "Could not create DFA, because NFA creation failed\n");
1505 dfa = GNUNET_malloc (sizeof (struct GNUNET_REGEX_Automaton));
1508 // Create DFA start state from epsilon closure
1509 dfa_stack = GNUNET_malloc (sizeof (struct StateSet));
1510 nfa_set = nfa_closure_create (nfa->start, 0);
1511 dfa->start = dfa_state_create (&ctx, nfa_set);
1512 automaton_add_state (dfa, dfa->start);
1513 GNUNET_array_append (dfa_stack->states, dfa_stack->len, dfa->start);
1515 // Create dfa states by combining nfa states
1516 while (dfa_stack->len > 0)
1518 dfa_state = dfa_stack->states[dfa_stack->len - 1];
1519 GNUNET_array_grow (dfa_stack->states, dfa_stack->len, dfa_stack->len - 1);
1521 for (ctran = dfa_state->transitions_head; NULL != ctran;
1522 ctran = ctran->next)
1524 if (0 != ctran->literal && NULL == ctran->state)
1526 tmp = nfa_closure_set_create (dfa_state->nfa_set, ctran->literal);
1527 nfa_set = nfa_closure_set_create (tmp, 0);
1528 state_set_clear (tmp);
1529 new_dfa_state = dfa_state_create (&ctx, nfa_set);
1530 state_contains = NULL;
1531 for (state_iter = dfa->states_head; NULL != state_iter;
1532 state_iter = state_iter->next)
1535 state_set_compare (state_iter->nfa_set, new_dfa_state->nfa_set))
1536 state_contains = state_iter;
1539 if (NULL == state_contains)
1541 automaton_add_state (dfa, new_dfa_state);
1542 GNUNET_array_append (dfa_stack->states, dfa_stack->len,
1544 ctran->state = new_dfa_state;
1548 ctran->state = state_contains;
1549 automaton_destroy_state (new_dfa_state);
1555 GNUNET_free (dfa_stack);
1556 GNUNET_REGEX_automaton_destroy (nfa);
1558 dfa_minimize (&ctx, dfa);
1564 * Save the given automaton as a GraphViz dot file
1566 * @param a the automaton to be saved
1567 * @param filename where to save the file
1570 GNUNET_REGEX_automaton_save_graph (struct GNUNET_REGEX_Automaton *a,
1571 const char *filename)
1574 struct Transition *ctran;
1576 char *s_tran = NULL;
1583 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not print NFA, was NULL!");
1587 if (NULL == filename || strlen (filename) < 1)
1589 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "No Filename given!");
1593 p = fopen (filename, "w");
1597 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not open file for writing: %s",
1602 start = "digraph G {\nrankdir=LR\n";
1603 fwrite (start, strlen (start), 1, p);
1605 for (s = a->states_head; NULL != s; s = s->next)
1609 GNUNET_asprintf (&s_acc, "\"%s\" [shape=doublecircle];\n", s->name);
1610 fwrite (s_acc, strlen (s_acc), 1, p);
1611 GNUNET_free (s_acc);
1616 for (ctran = s->transitions_head; NULL != ctran; ctran = ctran->next)
1618 if (NULL == ctran->state)
1620 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
1621 "Transition from State %i has has no state for transitioning\n",
1626 if (ctran->literal == 0)
1628 GNUNET_asprintf (&s_tran, "\"%s\" -> \"%s\" [label = \"epsilon\"];\n",
1629 s->name, ctran->state->name);
1633 GNUNET_asprintf (&s_tran, "\"%s\" -> \"%s\" [label = \"%c\"];\n",
1634 s->name, ctran->state->name, ctran->literal);
1637 fwrite (s_tran, strlen (s_tran), 1, p);
1638 GNUNET_free (s_tran);
1643 fwrite (end, strlen (end), 1, p);
1648 * Evaluates the given string using the given DFA automaton
1650 * @param a automaton, type must be DFA
1651 * @param string string that should be evaluated
1653 * @return 0 if string matches, non 0 otherwise
1656 evaluate_dfa (struct GNUNET_REGEX_Automaton *a, const char *string)
1663 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
1664 "Tried to evaluate DFA, but NFA automaton given");
1670 for (strp = string; NULL != strp && *strp; strp++)
1672 s = dfa_move (s, *strp);
1677 if (NULL != s && s->accepting)
1684 * Evaluates the given string using the given NFA automaton
1686 * @param a automaton, type must be NFA
1687 * @param string string that should be evaluated
1689 * @return 0 if string matches, non 0 otherwise
1692 evaluate_nfa (struct GNUNET_REGEX_Automaton *a, const char *string)
1696 struct StateSet *sset;
1697 struct StateSet *new_sset;
1703 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
1704 "Tried to evaluate NFA, but DFA automaton given");
1710 sset = nfa_closure_create (a->start, 0);
1712 for (strp = string; NULL != strp && *strp; strp++)
1714 new_sset = nfa_closure_set_create (sset, *strp);
1715 state_set_clear (sset);
1716 sset = nfa_closure_set_create (new_sset, 0);
1717 state_set_clear (new_sset);
1720 for (i = 0; i < sset->len; i++)
1722 s = sset->states[i];
1723 if (NULL != s && s->accepting)
1730 state_set_clear (sset);
1735 * Evaluates the given 'string' against the given compiled regex
1737 * @param a automaton
1738 * @param string string to check
1740 * @return 0 if string matches, non 0 otherwise
1743 GNUNET_REGEX_eval (struct GNUNET_REGEX_Automaton *a, const char *string)
1750 result = evaluate_dfa (a, string);
1753 result = evaluate_nfa (a, string);
1756 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
1757 "Evaluating regex failed, automaton has no type!\n");
1758 result = GNUNET_SYSERR;