2 This file is part of GNUnet
3 Copyright (C) 2012 GNUnet e.V.
5 GNUnet is free software: you can redistribute it and/or modify it
6 under the terms of the GNU Affero General Public License as published
7 by the Free Software Foundation, either version 3 of the License,
8 or (at your option) any later version.
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Affero General Public License for more details.
15 You should have received a copy of the GNU Affero General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>.
18 SPDX-License-Identifier: AGPL3.0-or-later
21 * @file src/regex/regex_internal.c
22 * @brief library to create Deterministic Finite Automatons (DFAs) from regular
23 * expressions (regexes).
24 * @author Maximilian Szengel
27 #include "gnunet_util_lib.h"
28 #include "gnunet_regex_service.h"
29 #include "regex_internal_lib.h"
30 #include "regex_internal.h"
34 * Set this to #GNUNET_YES to enable state naming. Used to debug NFA->DFA
35 * creation. Disabled by default for better performance.
37 #define REGEX_DEBUG_DFA GNUNET_NO
40 * Set of states using MDLL API.
42 struct REGEX_INTERNAL_StateSet_MDLL
47 struct REGEX_INTERNAL_State *head;
52 struct REGEX_INTERNAL_State *tail;
62 * Append state to the given StateSet.
64 * @param set set to be modified
65 * @param state state to be appended
68 state_set_append (struct REGEX_INTERNAL_StateSet *set,
69 struct REGEX_INTERNAL_State *state)
71 if (set->off == set->size)
72 GNUNET_array_grow (set->states, set->size, set->size * 2 + 4);
73 set->states[set->off++] = state;
78 * Compare two strings for equality. If either is NULL they are not equal.
80 * @param str1 first string for comparison.
81 * @param str2 second string for comparison.
83 * @return 0 if the strings are the same or both NULL, 1 or -1 if not.
86 nullstrcmp (const char *str1, const char *str2)
88 if ((NULL == str1) != (NULL == str2))
90 if ((NULL == str1) && (NULL == str2))
93 return strcmp (str1, str2);
98 * Adds a transition from one state to another on @a label. Does not add
102 * @param from_state starting state for the transition
103 * @param label transition label
104 * @param to_state state to where the transition should point to
107 state_add_transition (struct REGEX_INTERNAL_Context *ctx,
108 struct REGEX_INTERNAL_State *from_state,
110 struct REGEX_INTERNAL_State *to_state)
112 struct REGEX_INTERNAL_Transition *t;
113 struct REGEX_INTERNAL_Transition *oth;
115 if (NULL == from_state)
117 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not create Transition.\n");
121 /* Do not add duplicate state transitions */
122 for (t = from_state->transitions_head; NULL != t; t = t->next)
124 if ((t->to_state == to_state) && (0 == nullstrcmp (t->label, label)) &&
125 (t->from_state == from_state) )
129 /* sort transitions by label */
130 for (oth = from_state->transitions_head; NULL != oth; oth = oth->next)
132 if (0 < nullstrcmp (oth->label, label))
136 t = GNUNET_new (struct REGEX_INTERNAL_Transition);
138 t->id = ctx->transition_id++;
140 t->label = GNUNET_strdup (label);
143 t->to_state = to_state;
144 t->from_state = from_state;
146 /* Add outgoing transition to 'from_state' */
147 from_state->transition_count++;
148 GNUNET_CONTAINER_DLL_insert_before (from_state->transitions_head,
149 from_state->transitions_tail,
156 * Remove a 'transition' from 'state'.
158 * @param state state from which the to-be-removed transition originates.
159 * @param transition transition that should be removed from state 'state'.
162 state_remove_transition (struct REGEX_INTERNAL_State *state,
163 struct REGEX_INTERNAL_Transition *transition)
165 if ((NULL == state) || (NULL == transition))
168 if (transition->from_state != state)
171 GNUNET_free_non_null (transition->label);
173 state->transition_count--;
174 GNUNET_CONTAINER_DLL_remove (state->transitions_head,
175 state->transitions_tail,
178 GNUNET_free (transition);
183 * Compare two states. Used for sorting.
185 * @param a first state
186 * @param b second state
188 * @return an integer less than, equal to, or greater than zero
189 * if the first argument is considered to be respectively
190 * less than, equal to, or greater than the second.
193 state_compare (const void *a, const void *b)
195 struct REGEX_INTERNAL_State **s1 = (struct REGEX_INTERNAL_State **) a;
196 struct REGEX_INTERNAL_State **s2 = (struct REGEX_INTERNAL_State **) b;
198 return (*s1)->id - (*s2)->id;
203 * Get all edges leaving state @a s.
206 * @param edges all edges leaving @a s, expected to be allocated and have enough
207 * space for `s->transitions_count` elements.
209 * @return number of edges.
212 state_get_edges (struct REGEX_INTERNAL_State *s, struct REGEX_BLOCK_Edge *edges)
214 struct REGEX_INTERNAL_Transition *t;
222 for (t = s->transitions_head; NULL != t; t = t->next)
224 if (NULL != t->to_state)
226 edges[count].label = t->label;
227 edges[count].destination = t->to_state->hash;
236 * Compare to state sets by comparing the id's of the states that are contained
237 * in each set. Both sets are expected to be sorted by id!
239 * @param sset1 first state set
240 * @param sset2 second state set
241 * @return 0 if the sets are equal, otherwise non-zero
244 state_set_compare (struct REGEX_INTERNAL_StateSet *sset1,
245 struct REGEX_INTERNAL_StateSet *sset2)
250 if ((NULL == sset1) || (NULL == sset2))
253 result = sset1->off - sset2->off;
258 for (i = 0; i < sset1->off; i++)
259 if (0 != (result = state_compare (&sset1->states[i], &sset2->states[i])))
266 * Clears the given StateSet 'set'
268 * @param set set to be cleared
271 state_set_clear (struct REGEX_INTERNAL_StateSet *set)
273 GNUNET_array_grow (set->states, set->size, 0);
279 * Clears an automaton fragment. Does not destroy the states inside the
282 * @param a automaton to be cleared
285 automaton_fragment_clear (struct REGEX_INTERNAL_Automaton *a)
292 a->states_head = NULL;
293 a->states_tail = NULL;
300 * Frees the memory used by State @a s
302 * @param s state that should be destroyed
305 automaton_destroy_state (struct REGEX_INTERNAL_State *s)
307 struct REGEX_INTERNAL_Transition *t;
308 struct REGEX_INTERNAL_Transition *next_t;
313 GNUNET_free_non_null (s->name);
314 GNUNET_free_non_null (s->proof);
315 state_set_clear (&s->nfa_set);
316 for (t = s->transitions_head; NULL != t; t = next_t)
319 state_remove_transition (s, t);
327 * Remove a state from the given automaton 'a'. Always use this function when
328 * altering the states of an automaton. Will also remove all transitions leading
329 * to this state, before destroying it.
332 * @param s state to remove
335 automaton_remove_state (struct REGEX_INTERNAL_Automaton *a,
336 struct REGEX_INTERNAL_State *s)
338 struct REGEX_INTERNAL_State *s_check;
339 struct REGEX_INTERNAL_Transition *t_check;
340 struct REGEX_INTERNAL_Transition *t_check_next;
342 if ((NULL == a) || (NULL == s))
345 /* remove all transitions leading to this state */
346 for (s_check = a->states_head; NULL != s_check; s_check = s_check->next)
348 for (t_check = s_check->transitions_head; NULL != t_check;
349 t_check = t_check_next)
351 t_check_next = t_check->next;
352 if (t_check->to_state == s)
353 state_remove_transition (s_check, t_check);
358 GNUNET_CONTAINER_DLL_remove (a->states_head, a->states_tail, s);
361 automaton_destroy_state (s);
366 * Merge two states into one. Will merge 's1' and 's2' into 's1' and destroy
367 * 's2'. 's1' will contain all (non-duplicate) outgoing transitions of 's2'.
371 * @param s1 first state
372 * @param s2 second state, will be destroyed
375 automaton_merge_states (struct REGEX_INTERNAL_Context *ctx,
376 struct REGEX_INTERNAL_Automaton *a,
377 struct REGEX_INTERNAL_State *s1,
378 struct REGEX_INTERNAL_State *s2)
380 struct REGEX_INTERNAL_State *s_check;
381 struct REGEX_INTERNAL_Transition *t_check;
382 struct REGEX_INTERNAL_Transition *t;
383 struct REGEX_INTERNAL_Transition *t_next;
389 /* 1. Make all transitions pointing to s2 point to s1, unless this transition
390 * does not already exists, if it already exists remove transition. */
391 for (s_check = a->states_head; NULL != s_check; s_check = s_check->next)
393 for (t_check = s_check->transitions_head; NULL != t_check; t_check = t_next)
395 t_next = t_check->next;
397 if (s2 == t_check->to_state)
400 for (t = t_check->from_state->transitions_head; NULL != t; t = t->next)
402 if ((t->to_state == s1) && (0 == strcmp (t_check->label, t->label)) )
405 if (GNUNET_NO == is_dup)
406 t_check->to_state = s1;
408 state_remove_transition (t_check->from_state, t_check);
413 /* 2. Add all transitions from s2 to sX to s1 */
414 for (t_check = s2->transitions_head; NULL != t_check; t_check = t_check->next)
416 if (t_check->to_state != s1)
417 state_add_transition (ctx, s1, t_check->label, t_check->to_state);
420 /* 3. Rename s1 to {s1,s2} */
425 GNUNET_asprintf (&s1->name, "{%s,%s}", new_name, s2->name);
426 GNUNET_free (new_name);
430 GNUNET_CONTAINER_DLL_remove (a->states_head, a->states_tail, s2);
432 automaton_destroy_state (s2);
437 * Add a state to the automaton 'a', always use this function to alter the
438 * states DLL of the automaton.
440 * @param a automaton to add the state to
441 * @param s state that should be added
444 automaton_add_state (struct REGEX_INTERNAL_Automaton *a,
445 struct REGEX_INTERNAL_State *s)
447 GNUNET_CONTAINER_DLL_insert (a->states_head, a->states_tail, s);
453 * Depth-first traversal (DFS) of all states that are reachable from state
454 * 's'. Performs 'action' on each visited state.
456 * @param s start state.
457 * @param marks an array of size a->state_count to remember which state was
459 * @param count current count of the state.
460 * @param check function that is checked before advancing on each transition
462 * @param check_cls closure for check.
463 * @param action action to be performed on each state.
464 * @param action_cls closure for action.
467 automaton_state_traverse (struct REGEX_INTERNAL_State *s,
470 REGEX_INTERNAL_traverse_check check,
472 REGEX_INTERNAL_traverse_action action,
475 struct REGEX_INTERNAL_Transition *t;
477 if (GNUNET_YES == marks[s->traversal_id])
480 marks[s->traversal_id] = GNUNET_YES;
483 action (action_cls, *count, s);
487 for (t = s->transitions_head; NULL != t; t = t->next)
489 if ((NULL == check) ||
490 ((NULL != check) && (GNUNET_YES == check (check_cls, s, t)) ))
492 automaton_state_traverse (t->to_state,
505 * Traverses the given automaton using depth-first-search (DFS) from it's start
506 * state, visiting all reachable states and calling 'action' on each one of
509 * @param a automaton to be traversed.
510 * @param start start state, pass a->start or NULL to traverse the whole automaton.
511 * @param check function that is checked before advancing on each transition
513 * @param check_cls closure for @a check.
514 * @param action action to be performed on each state.
515 * @param action_cls closure for @a action
518 REGEX_INTERNAL_automaton_traverse (const struct REGEX_INTERNAL_Automaton *a,
519 struct REGEX_INTERNAL_State *start,
520 REGEX_INTERNAL_traverse_check check,
522 REGEX_INTERNAL_traverse_action action,
526 struct REGEX_INTERNAL_State *s;
528 if ((NULL == a) || (0 == a->state_count))
531 int marks[a->state_count];
533 for (count = 0, s = a->states_head; NULL != s && count < a->state_count;
534 s = s->next, count++)
536 s->traversal_id = count;
537 marks[s->traversal_id] = GNUNET_NO;
547 automaton_state_traverse (s,
558 * String container for faster string operations.
563 * Buffer holding the string (may start in the middle!);
574 * Length of the string in the buffer.
579 * Number of bytes allocated for @e sbuf
584 * Buffer currently represents "NULL" (not the empty string!)
589 * If this entry is part of the last/current generation array,
590 * this flag is #GNUNET_YES if the last and current generation are
591 * identical (and thus copying is unnecessary if the value didn't
592 * change). This is used in an optimization that improves
593 * performance by about 1% --- if we use int16_t here. With just
594 * "int" for both flags, performance drops (on my system) significantly,
595 * most likely due to increased cache misses.
602 * Compare two strings for equality. If either is NULL they are not equal.
604 * @param s1 first string for comparison.
605 * @param s2 second string for comparison.
607 * @return 0 if the strings are the same or both NULL, 1 or -1 if not.
610 sb_nullstrcmp (const struct StringBuffer *s1, const struct StringBuffer *s2)
612 if ((GNUNET_YES == s1->null_flag) && (GNUNET_YES == s2->null_flag))
614 if ((GNUNET_YES == s1->null_flag) || (GNUNET_YES == s2->null_flag))
616 if (s1->slen != s2->slen)
620 return memcmp (s1->sbuf, s2->sbuf, s1->slen);
625 * Compare two strings for equality.
627 * @param s1 first string for comparison.
628 * @param s2 second string for comparison.
630 * @return 0 if the strings are the same, 1 or -1 if not.
633 sb_strcmp (const struct StringBuffer *s1, const struct StringBuffer *s2)
635 if (s1->slen != s2->slen)
639 return memcmp (s1->sbuf, s2->sbuf, s1->slen);
644 * Reallocate the buffer of 'ret' to fit 'nlen' characters;
645 * move the existing string to the beginning of the new buffer.
647 * @param ret current buffer, to be updated
648 * @param nlen target length for the buffer, must be at least ret->slen
651 sb_realloc (struct StringBuffer *ret, size_t nlen)
655 GNUNET_assert (nlen >= ret->slen);
657 ret->abuf = GNUNET_malloc (nlen);
659 GNUNET_memcpy (ret->abuf, ret->sbuf, ret->slen);
660 ret->sbuf = ret->abuf;
661 GNUNET_free_non_null (old);
668 * @param ret where to write the result
669 * @param sarg string to append
672 sb_append (struct StringBuffer *ret, const struct StringBuffer *sarg)
674 if (GNUNET_YES == ret->null_flag)
676 ret->null_flag = GNUNET_NO;
677 if (ret->blen < sarg->slen + ret->slen)
678 sb_realloc (ret, ret->blen + sarg->slen + 128);
679 GNUNET_memcpy (&ret->sbuf[ret->slen], sarg->sbuf, sarg->slen);
680 ret->slen += sarg->slen;
687 * @param ret where to write the result
688 * @param cstr string to append
691 sb_append_cstr (struct StringBuffer *ret, const char *cstr)
693 size_t cstr_len = strlen (cstr);
695 if (GNUNET_YES == ret->null_flag)
697 ret->null_flag = GNUNET_NO;
698 if (ret->blen < cstr_len + ret->slen)
699 sb_realloc (ret, ret->blen + cstr_len + 128);
700 GNUNET_memcpy (&ret->sbuf[ret->slen], cstr, cstr_len);
701 ret->slen += cstr_len;
706 * Wrap a string buffer, that is, set ret to the format string
707 * which contains an "%s" which is to be replaced with the original
708 * content of 'ret'. Note that optimizing this function is not
709 * really worth it, it is rarely called.
711 * @param ret where to write the result and take the input for %.*s from
712 * @param format format string, fprintf-style, with exactly one "%.*s"
713 * @param extra_chars how long will the result be, in addition to 'sarg' length
716 sb_wrap (struct StringBuffer *ret, const char *format, size_t extra_chars)
720 if (GNUNET_YES == ret->null_flag)
722 ret->null_flag = GNUNET_NO;
723 temp = GNUNET_malloc (ret->slen + extra_chars + 1);
724 GNUNET_snprintf (temp,
725 ret->slen + extra_chars + 1,
729 GNUNET_free_non_null (ret->abuf);
732 ret->blen = ret->slen + extra_chars + 1;
733 ret->slen = ret->slen + extra_chars;
738 * Format a string buffer. Note that optimizing this function is not
739 * really worth it, it is rarely called.
741 * @param ret where to write the result
742 * @param format format string, fprintf-style, with exactly one "%.*s"
743 * @param extra_chars how long will the result be, in addition to 'sarg' length
744 * @param sarg string to print into the format
747 sb_printf1 (struct StringBuffer *ret,
750 const struct StringBuffer *sarg)
752 if (ret->blen < sarg->slen + extra_chars + 1)
753 sb_realloc (ret, sarg->slen + extra_chars + 1);
754 ret->null_flag = GNUNET_NO;
755 ret->sbuf = ret->abuf;
756 ret->slen = sarg->slen + extra_chars;
757 GNUNET_snprintf (ret->sbuf, ret->blen, format, (int) sarg->slen, sarg->sbuf);
762 * Format a string buffer.
764 * @param ret where to write the result
765 * @param format format string, fprintf-style, with exactly two "%.*s"
766 * @param extra_chars how long will the result be, in addition to 'sarg1/2' length
767 * @param sarg1 first string to print into the format
768 * @param sarg2 second string to print into the format
771 sb_printf2 (struct StringBuffer *ret,
774 const struct StringBuffer *sarg1,
775 const struct StringBuffer *sarg2)
777 if (ret->blen < sarg1->slen + sarg2->slen + extra_chars + 1)
778 sb_realloc (ret, sarg1->slen + sarg2->slen + extra_chars + 1);
779 ret->null_flag = GNUNET_NO;
780 ret->slen = sarg1->slen + sarg2->slen + extra_chars;
781 ret->sbuf = ret->abuf;
782 GNUNET_snprintf (ret->sbuf,
793 * Format a string buffer. Note that optimizing this function is not
794 * really worth it, it is rarely called.
796 * @param ret where to write the result
797 * @param format format string, fprintf-style, with exactly three "%.*s"
798 * @param extra_chars how long will the result be, in addition to 'sarg1/2/3' length
799 * @param sarg1 first string to print into the format
800 * @param sarg2 second string to print into the format
801 * @param sarg3 third string to print into the format
804 sb_printf3 (struct StringBuffer *ret,
807 const struct StringBuffer *sarg1,
808 const struct StringBuffer *sarg2,
809 const struct StringBuffer *sarg3)
811 if (ret->blen < sarg1->slen + sarg2->slen + sarg3->slen + extra_chars + 1)
812 sb_realloc (ret, sarg1->slen + sarg2->slen + sarg3->slen + extra_chars + 1);
813 ret->null_flag = GNUNET_NO;
814 ret->slen = sarg1->slen + sarg2->slen + sarg3->slen + extra_chars;
815 ret->sbuf = ret->abuf;
816 GNUNET_snprintf (ret->sbuf,
829 * Free resources of the given string buffer.
831 * @param sb buffer to free (actual pointer is not freed, as they
832 * should not be individually allocated)
835 sb_free (struct StringBuffer *sb)
837 GNUNET_array_grow (sb->abuf, sb->blen, 0);
840 sb->null_flag = GNUNET_YES;
845 * Copy the given string buffer from 'in' to 'out'.
847 * @param in input string
848 * @param out output string
851 sb_strdup (struct StringBuffer *out, const struct StringBuffer *in)
854 out->null_flag = in->null_flag;
855 if (GNUNET_YES == out->null_flag)
857 if (out->blen < in->slen)
859 GNUNET_array_grow (out->abuf, out->blen, in->slen);
861 out->sbuf = out->abuf;
862 out->slen = in->slen;
863 GNUNET_memcpy (out->sbuf, in->sbuf, out->slen);
868 * Copy the given string buffer from 'in' to 'out'.
870 * @param cstr input string
871 * @param out output string
874 sb_strdup_cstr (struct StringBuffer *out, const char *cstr)
878 out->null_flag = GNUNET_YES;
881 out->null_flag = GNUNET_NO;
882 out->slen = strlen (cstr);
883 if (out->blen < out->slen)
885 GNUNET_array_grow (out->abuf, out->blen, out->slen);
887 out->sbuf = out->abuf;
888 GNUNET_memcpy (out->sbuf, cstr, out->slen);
893 * Check if the given string @a str needs parentheses around it when
894 * using it to generate a regex.
898 * @return #GNUNET_YES if parentheses are needed, #GNUNET_NO otherwise
901 needs_parentheses (const struct StringBuffer *str)
910 if ((GNUNET_YES == str->null_flag) || ((slen = str->slen) < 2))
915 end = str->sbuf + slen;
920 cl = memchr (pos, ')', end - pos);
926 /* while '(' before ')', count opening parens */
927 while ((NULL != (op = memchr (pos, '(', end - pos))) && (op < cl))
936 return (*pos == '\0') ? GNUNET_NO : GNUNET_YES;
941 * Remove parentheses surrounding string @a str.
942 * Example: "(a)" becomes "a", "(a|b)|(a|c)" stays the same.
943 * You need to #GNUNET_free() the returned string.
945 * @param str string, modified to contain a
946 * @return string without surrounding parentheses, string 'str' if no preceding
947 * epsilon could be found, NULL if 'str' was NULL
950 remove_parentheses (struct StringBuffer *str)
963 if ((GNUNET_YES == str->null_flag) || (1 >= (slen = str->slen)) ||
964 ('(' != str->sbuf[0]) || (')' != str->sbuf[slen - 1]))
968 end = &sbuf[slen - 1];
969 op = memchr (pos, '(', end - pos);
970 cp = memchr (pos, ')', end - pos);
973 while ((NULL != op) && (op < cp))
977 op = memchr (pos, '(', end - pos);
979 while ((NULL != cp) && ((NULL == op) || (cp < op)))
982 return; /* can't strip parens */
985 cp = memchr (pos, ')', end - pos);
999 * Check if the string 'str' starts with an epsilon (empty string).
1000 * Example: "(|a)" is starting with an epsilon.
1002 * @param str string to test
1004 * @return 0 if str has no epsilon, 1 if str starts with '(|' and ends with ')'
1007 has_epsilon (const struct StringBuffer *str)
1009 return (GNUNET_YES != str->null_flag) && (0 < str->slen) &&
1010 ('(' == str->sbuf[0]) && ('|' == str->sbuf[1]) &&
1011 (')' == str->sbuf[str->slen - 1]);
1016 * Remove an epsilon from the string str. Where epsilon is an empty string
1017 * Example: str = "(|a|b|c)", result: "a|b|c"
1018 * The returned string needs to be freed.
1020 * @param str original string
1021 * @param ret where to return string without preceding epsilon, string 'str' if no preceding
1022 * epsilon could be found, NULL if 'str' was NULL
1025 remove_epsilon (const struct StringBuffer *str, struct StringBuffer *ret)
1027 if (GNUNET_YES == str->null_flag)
1029 ret->null_flag = GNUNET_YES;
1032 if ((str->slen > 1) && ('(' == str->sbuf[0]) && ('|' == str->sbuf[1]) &&
1033 (')' == str->sbuf[str->slen - 1]))
1035 /* remove epsilon */
1036 if (ret->blen < str->slen - 3)
1038 GNUNET_array_grow (ret->abuf, ret->blen, str->slen - 3);
1040 ret->sbuf = ret->abuf;
1041 ret->slen = str->slen - 3;
1042 GNUNET_memcpy (ret->sbuf, &str->sbuf[2], ret->slen);
1045 sb_strdup (ret, str);
1050 * Compare n bytes of 'str1' and 'str2'
1052 * @param str1 first string to compare
1053 * @param str2 second string for comparison
1054 * @param n number of bytes to compare
1056 * @return -1 if any of the strings is NULL, 0 if equal, non 0 otherwise
1059 sb_strncmp (const struct StringBuffer *str1,
1060 const struct StringBuffer *str2,
1065 if ((str1->slen != str2->slen) && ((str1->slen < n) || (str2->slen < n)))
1067 max = GNUNET_MAX (str1->slen, str2->slen);
1070 return memcmp (str1->sbuf, str2->sbuf, max);
1075 * Compare n bytes of 'str1' and 'str2'
1077 * @param str1 first string to compare
1078 * @param str2 second C string for comparison
1079 * @param n number of bytes to compare (and length of str2)
1081 * @return -1 if any of the strings is NULL, 0 if equal, non 0 otherwise
1084 sb_strncmp_cstr (const struct StringBuffer *str1, const char *str2, size_t n)
1088 return memcmp (str1->sbuf, str2, n);
1093 * Initialize string buffer for storing strings of up to n
1096 * @param sb buffer to initialize
1097 * @param n desired target length
1100 sb_init (struct StringBuffer *sb, size_t n)
1102 sb->null_flag = GNUNET_NO;
1103 sb->abuf = sb->sbuf = (0 == n) ? NULL : GNUNET_malloc (n);
1110 * Compare 'str1', starting from position 'k', with whole 'str2'
1112 * @param str1 first string to compare, starting from position 'k'
1113 * @param str2 second string for comparison
1114 * @param k starting position in 'str1'
1116 * @return -1 if any of the strings is NULL, 0 if equal, non 0 otherwise
1119 sb_strkcmp (const struct StringBuffer *str1,
1120 const struct StringBuffer *str2,
1123 if ((GNUNET_YES == str1->null_flag) || (GNUNET_YES == str2->null_flag) ||
1124 (k > str1->slen) || (str1->slen - k != str2->slen))
1126 return memcmp (&str1->sbuf[k], str2->sbuf, str2->slen);
1131 * Helper function used as 'action' in 'REGEX_INTERNAL_automaton_traverse'
1132 * function to create the depth-first numbering of the states.
1134 * @param cls states array.
1135 * @param count current state counter.
1136 * @param s current state.
1139 number_states (void *cls,
1140 const unsigned int count,
1141 struct REGEX_INTERNAL_State *s)
1143 struct REGEX_INTERNAL_State **states = cls;
1152 ((GNUNET_YES == a.null_flag) ? 6 : (int) a.slen), \
1153 ((GNUNET_YES == a.null_flag) ? "(null)" : a.sbuf)
1157 * Construct the regular expression given the inductive step,
1158 * $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^*
1159 * R^{(k-1)}_{kj}, and simplify the resulting expression saved in R_cur_ij.
1161 * @param R_last_ij value of $R^{(k-1)_{ij}.
1162 * @param R_last_ik value of $R^{(k-1)_{ik}.
1163 * @param R_last_kk value of $R^{(k-1)_{kk}.
1164 * @param R_last_kj value of $R^{(k-1)_{kj}.
1165 * @param R_cur_ij result for this inductive step is saved in R_cur_ij, R_cur_ij
1166 * is expected to be NULL when called!
1167 * @param R_cur_l optimization -- kept between iterations to avoid realloc
1168 * @param R_cur_r optimization -- kept between iterations to avoid realloc
1171 automaton_create_proofs_simplify (const struct StringBuffer *R_last_ij,
1172 const struct StringBuffer *R_last_ik,
1173 const struct StringBuffer *R_last_kk,
1174 const struct StringBuffer *R_last_kj,
1175 struct StringBuffer *R_cur_ij,
1176 struct StringBuffer *R_cur_l,
1177 struct StringBuffer *R_cur_r)
1179 struct StringBuffer R_temp_ij;
1180 struct StringBuffer R_temp_ik;
1181 struct StringBuffer R_temp_kj;
1182 struct StringBuffer R_temp_kk;
1188 int clean_ik_kk_cmp;
1189 int clean_kk_kj_cmp;
1195 * $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj}
1196 * R_last == R^{(k-1)}, R_cur == R^{(k)}
1197 * R_cur_ij = R_cur_l | R_cur_r
1198 * R_cur_l == R^{(k-1)}_{ij}
1199 * R_cur_r == R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj}
1200 */if ((GNUNET_YES == R_last_ij->null_flag) &&
1201 ((GNUNET_YES == R_last_ik->null_flag) ||
1202 (GNUNET_YES == R_last_kj->null_flag)))
1204 /* R^{(k)}_{ij} = N | N */
1205 R_cur_ij->null_flag = GNUNET_YES;
1206 R_cur_ij->synced = GNUNET_NO;
1210 if ((GNUNET_YES == R_last_ik->null_flag) ||
1211 (GNUNET_YES == R_last_kj->null_flag))
1213 /* R^{(k)}_{ij} = R^{(k-1)}_{ij} | N */
1214 if (GNUNET_YES == R_last_ij->synced)
1216 R_cur_ij->synced = GNUNET_YES;
1217 R_cur_ij->null_flag = GNUNET_NO;
1220 R_cur_ij->synced = GNUNET_YES;
1221 sb_strdup (R_cur_ij, R_last_ij);
1224 R_cur_ij->synced = GNUNET_NO;
1226 /* $R^{(k)}_{ij} = N | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj} OR
1227 * $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj} */
1229 R_cur_r->null_flag = GNUNET_YES;
1231 R_cur_l->null_flag = GNUNET_YES;
1234 /* cache results from strcmp, we might need these many times */
1235 ij_kj_cmp = sb_nullstrcmp (R_last_ij, R_last_kj);
1236 ij_ik_cmp = sb_nullstrcmp (R_last_ij, R_last_ik);
1237 ik_kk_cmp = sb_nullstrcmp (R_last_ik, R_last_kk);
1238 kk_kj_cmp = sb_nullstrcmp (R_last_kk, R_last_kj);
1240 /* Assign R_temp_(ik|kk|kj) to R_last[][] and remove epsilon as well
1241 * as parentheses, so we can better compare the contents */
1243 memset (&R_temp_ij, 0, sizeof(struct StringBuffer));
1244 memset (&R_temp_ik, 0, sizeof(struct StringBuffer));
1245 memset (&R_temp_kk, 0, sizeof(struct StringBuffer));
1246 memset (&R_temp_kj, 0, sizeof(struct StringBuffer));
1247 remove_epsilon (R_last_ik, &R_temp_ik);
1248 remove_epsilon (R_last_kk, &R_temp_kk);
1249 remove_epsilon (R_last_kj, &R_temp_kj);
1250 remove_parentheses (&R_temp_ik);
1251 remove_parentheses (&R_temp_kk);
1252 remove_parentheses (&R_temp_kj);
1253 clean_ik_kk_cmp = sb_nullstrcmp (R_last_ik, &R_temp_kk);
1254 clean_kk_kj_cmp = sb_nullstrcmp (&R_temp_kk, R_last_kj);
1256 /* construct R_cur_l (and, if necessary R_cur_r) */
1257 if (GNUNET_YES != R_last_ij->null_flag)
1259 /* Assign R_temp_ij to R_last_ij and remove epsilon as well
1260 * as parentheses, so we can better compare the contents */
1261 remove_epsilon (R_last_ij, &R_temp_ij);
1262 remove_parentheses (&R_temp_ij);
1264 if ((0 == sb_strcmp (&R_temp_ij, &R_temp_ik)) &&
1265 (0 == sb_strcmp (&R_temp_ik, &R_temp_kk)) &&
1266 (0 == sb_strcmp (&R_temp_kk, &R_temp_kj)))
1268 if (0 == R_temp_ij.slen)
1270 R_cur_r->null_flag = GNUNET_NO;
1272 else if ((0 == sb_strncmp_cstr (R_last_ij, "(|", 2)) ||
1273 ((0 == sb_strncmp_cstr (R_last_ik, "(|", 2)) &&
1274 (0 == sb_strncmp_cstr (R_last_kj, "(|", 2)) ))
1277 * a|(e|a)a*(e|a) = a*
1278 * a|(e|a)(e|a)*(e|a) = a*
1280 * (e|a)|aa*(e|a) = a*
1281 * (e|a)|(e|a)a*a = a*
1282 * (e|a)|(e|a)a*(e|a) = a*
1283 * (e|a)|(e|a)(e|a)*(e|a) = a*
1284 */if (GNUNET_YES == needs_parentheses (&R_temp_ij))
1285 sb_printf1 (R_cur_r, "(%.*s)*", 3, &R_temp_ij);
1287 sb_printf1 (R_cur_r, "%.*s*", 1, &R_temp_ij);
1295 * a|(e|a)(e|a)*a = a+
1296 * a|a(e|a)*(e|a) = a+
1297 */if (GNUNET_YES == needs_parentheses (&R_temp_ij))
1298 sb_printf1 (R_cur_r, "(%.*s)+", 3, &R_temp_ij);
1300 sb_printf1 (R_cur_r, "%.*s+", 1, &R_temp_ij);
1303 else if ((0 == ij_ik_cmp) && (0 == clean_kk_kj_cmp) &&
1304 (0 != clean_ik_kk_cmp))
1307 if (0 == R_last_kk->slen)
1308 sb_strdup (R_cur_r, R_last_ij);
1309 else if (GNUNET_YES == needs_parentheses (&R_temp_kk))
1310 sb_printf2 (R_cur_r, "%.*s(%.*s)*", 3, R_last_ij, &R_temp_kk);
1312 sb_printf2 (R_cur_r, "%.*s%.*s*", 1, R_last_ij, R_last_kk);
1313 R_cur_l->null_flag = GNUNET_YES;
1315 else if ((0 == ij_kj_cmp) && (0 == clean_ik_kk_cmp) &&
1316 (0 != clean_kk_kj_cmp))
1319 if (R_last_kk->slen < 1)
1321 sb_strdup (R_cur_r, R_last_kj);
1323 else if (GNUNET_YES == needs_parentheses (&R_temp_kk))
1324 sb_printf2 (R_cur_r, "(%.*s)*%.*s", 3, &R_temp_kk, R_last_kj);
1326 sb_printf2 (R_cur_r, "%.*s*%.*s", 1, &R_temp_kk, R_last_kj);
1328 R_cur_l->null_flag = GNUNET_YES;
1330 else if ((0 == ij_ik_cmp) && (0 == kk_kj_cmp) &&
1331 (! has_epsilon (R_last_ij)) && has_epsilon (R_last_kk))
1333 /* a|a(e|b)*(e|b) = a|ab* = a|a|ab|abb|abbb|... = ab* */
1334 if (needs_parentheses (&R_temp_kk))
1335 sb_printf2 (R_cur_r, "%.*s(%.*s)*", 3, R_last_ij, &R_temp_kk);
1337 sb_printf2 (R_cur_r, "%.*s%.*s*", 1, R_last_ij, &R_temp_kk);
1338 R_cur_l->null_flag = GNUNET_YES;
1340 else if ((0 == ij_kj_cmp) && (0 == ik_kk_cmp) &&
1341 (! has_epsilon (R_last_ij)) && has_epsilon (R_last_kk))
1343 /* a|(e|b)(e|b)*a = a|b*a = a|a|ba|bba|bbba|... = b*a */
1344 if (needs_parentheses (&R_temp_kk))
1345 sb_printf2 (R_cur_r, "(%.*s)*%.*s", 3, &R_temp_kk, R_last_ij);
1347 sb_printf2 (R_cur_r, "%.*s*%.*s", 1, &R_temp_kk, R_last_ij);
1348 R_cur_l->null_flag = GNUNET_YES;
1352 sb_strdup (R_cur_l, R_last_ij);
1353 remove_parentheses (R_cur_l);
1358 /* we have no left side */
1359 R_cur_l->null_flag = GNUNET_YES;
1362 /* construct R_cur_r, if not already constructed */
1363 if (GNUNET_YES == R_cur_r->null_flag)
1365 length = R_temp_kk.slen - R_last_ik->slen;
1367 /* a(ba)*bx = (ab)+x */
1368 if ((length > 0) && (GNUNET_YES != R_last_kk->null_flag) &&
1369 (0 < R_last_kk->slen) && (GNUNET_YES != R_last_kj->null_flag) &&
1370 (0 < R_last_kj->slen) && (GNUNET_YES != R_last_ik->null_flag) &&
1371 (0 < R_last_ik->slen) &&
1372 (0 == sb_strkcmp (&R_temp_kk, R_last_ik, length)) &&
1373 (0 == sb_strncmp (&R_temp_kk, R_last_kj, length)))
1375 struct StringBuffer temp_a;
1376 struct StringBuffer temp_b;
1378 sb_init (&temp_a, length);
1379 sb_init (&temp_b, R_last_kj->slen - length);
1382 temp_a.sbuf = temp_a.abuf;
1383 GNUNET_memcpy (temp_a.sbuf, R_last_kj->sbuf, length_l);
1384 temp_a.slen = length_l;
1386 length_r = R_last_kj->slen - length;
1387 temp_b.sbuf = temp_b.abuf;
1388 GNUNET_memcpy (temp_b.sbuf, &R_last_kj->sbuf[length], length_r);
1389 temp_b.slen = length_r;
1391 /* e|(ab)+ = (ab)* */
1392 if ((GNUNET_YES != R_cur_l->null_flag) && (0 == R_cur_l->slen) &&
1395 sb_printf2 (R_cur_r, "(%.*s%.*s)*", 3, R_last_ik, &temp_a);
1397 R_cur_l->null_flag = GNUNET_YES;
1401 sb_printf3 (R_cur_r, "(%.*s%.*s)+%.*s", 3, R_last_ik, &temp_a, &temp_b);
1406 else if ((0 == sb_strcmp (&R_temp_ik, &R_temp_kk)) &&
1407 (0 == sb_strcmp (&R_temp_kk, &R_temp_kj)) )
1411 * (e|a)(e|a)*(e|a) = a*
1413 if (has_epsilon (R_last_ik) && has_epsilon (R_last_kj))
1415 if (needs_parentheses (&R_temp_kk))
1416 sb_printf1 (R_cur_r, "(%.*s)*", 3, &R_temp_kk);
1418 sb_printf1 (R_cur_r, "%.*s*", 1, &R_temp_kk);
1421 else if ((0 == clean_ik_kk_cmp) && (0 == clean_kk_kj_cmp) &&
1422 (! has_epsilon (R_last_ik)))
1424 if (needs_parentheses (&R_temp_kk))
1425 sb_printf2 (R_cur_r, "(%.*s)+%.*s", 3, &R_temp_kk, &R_temp_kk);
1427 sb_printf2 (R_cur_r, "%.*s+%.*s", 1, &R_temp_kk, &R_temp_kk);
1436 eps_check = (has_epsilon (R_last_ik) + has_epsilon (R_last_kk)
1437 + has_epsilon (R_last_kj));
1441 if (needs_parentheses (&R_temp_kk))
1442 sb_printf1 (R_cur_r, "(%.*s)+", 3, &R_temp_kk);
1444 sb_printf1 (R_cur_r, "%.*s+", 1, &R_temp_kk);
1450 * (e|a)(e|a)*b = a*b
1452 else if (0 == sb_strcmp (&R_temp_ik, &R_temp_kk))
1454 if (has_epsilon (R_last_ik))
1456 if (needs_parentheses (&R_temp_kk))
1457 sb_printf2 (R_cur_r, "(%.*s)*%.*s", 3, &R_temp_kk, R_last_kj);
1459 sb_printf2 (R_cur_r, "%.*s*%.*s", 1, &R_temp_kk, R_last_kj);
1463 if (needs_parentheses (&R_temp_kk))
1464 sb_printf2 (R_cur_r, "(%.*s)+%.*s", 3, &R_temp_kk, R_last_kj);
1466 sb_printf2 (R_cur_r, "%.*s+%.*s", 1, &R_temp_kk, R_last_kj);
1471 * b(e|a)*(e|a) = ba*
1473 else if (0 == sb_strcmp (&R_temp_kk, &R_temp_kj))
1475 if (has_epsilon (R_last_kj))
1477 if (needs_parentheses (&R_temp_kk))
1478 sb_printf2 (R_cur_r, "%.*s(%.*s)*", 3, R_last_ik, &R_temp_kk);
1480 sb_printf2 (R_cur_r, "%.*s%.*s*", 1, R_last_ik, &R_temp_kk);
1484 if (needs_parentheses (&R_temp_kk))
1485 sb_printf2 (R_cur_r, "(%.*s)+%.*s", 3, R_last_ik, &R_temp_kk);
1487 sb_printf2 (R_cur_r, "%.*s+%.*s", 1, R_last_ik, &R_temp_kk);
1492 if (0 < R_temp_kk.slen)
1494 if (needs_parentheses (&R_temp_kk))
1496 sb_printf3 (R_cur_r,
1505 sb_printf3 (R_cur_r,
1515 sb_printf2 (R_cur_r, "%.*s%.*s", 0, R_last_ik, R_last_kj);
1519 sb_free (&R_temp_ij);
1520 sb_free (&R_temp_ik);
1521 sb_free (&R_temp_kk);
1522 sb_free (&R_temp_kj);
1524 if ((GNUNET_YES == R_cur_l->null_flag) && (GNUNET_YES == R_cur_r->null_flag))
1526 R_cur_ij->null_flag = GNUNET_YES;
1530 if ((GNUNET_YES != R_cur_l->null_flag) && (GNUNET_YES == R_cur_r->null_flag))
1532 struct StringBuffer tmp;
1535 *R_cur_ij = *R_cur_l;
1540 if ((GNUNET_YES == R_cur_l->null_flag) && (GNUNET_YES != R_cur_r->null_flag))
1542 struct StringBuffer tmp;
1545 *R_cur_ij = *R_cur_r;
1550 if (0 == sb_nullstrcmp (R_cur_l, R_cur_r))
1552 struct StringBuffer tmp;
1555 *R_cur_ij = *R_cur_l;
1559 sb_printf2 (R_cur_ij, "(%.*s|%.*s)", 3, R_cur_l, R_cur_r);
1564 * Create proofs for all states in the given automaton. Implementation of the
1565 * algorithm descriped in chapter 3.2.1 of "Automata Theory, Languages, and
1566 * Computation 3rd Edition" by Hopcroft, Motwani and Ullman.
1568 * Each state in the automaton gets assigned 'proof' and 'hash' (hash of the
1569 * proof) fields. The starting state will only have a valid proof/hash if it has
1570 * any incoming transitions.
1572 * @param a automaton for which to assign proofs and hashes, must not be NULL
1575 automaton_create_proofs (struct REGEX_INTERNAL_Automaton *a)
1577 unsigned int n = a->state_count;
1578 struct REGEX_INTERNAL_State *states[n];
1579 struct StringBuffer *R_last;
1580 struct StringBuffer *R_cur;
1581 struct StringBuffer R_cur_r;
1582 struct StringBuffer R_cur_l;
1583 struct StringBuffer *R_swap;
1584 struct REGEX_INTERNAL_Transition *t;
1585 struct StringBuffer complete_regex;
1590 R_last = GNUNET_malloc_large (sizeof(struct StringBuffer) * n * n);
1591 R_cur = GNUNET_malloc_large (sizeof(struct StringBuffer) * n * n);
1592 if ((NULL == R_last) || (NULL == R_cur))
1594 GNUNET_log_strerror (GNUNET_ERROR_TYPE_ERROR, "malloc");
1595 GNUNET_free_non_null (R_cur);
1596 GNUNET_free_non_null (R_last);
1597 return GNUNET_SYSERR;
1600 /* create depth-first numbering of the states, initializes 'state' */
1601 REGEX_INTERNAL_automaton_traverse (a,
1608 for (i = 0; i < n; i++)
1609 GNUNET_assert (NULL != states[i]);
1610 for (i = 0; i < n; i++)
1611 for (j = 0; j < n; j++)
1612 R_last[i * n + j].null_flag = GNUNET_YES;
1614 /* Compute regular expressions of length "1" between each pair of states */
1615 for (i = 0; i < n; i++)
1617 for (t = states[i]->transitions_head; NULL != t; t = t->next)
1619 j = t->to_state->dfs_id;
1620 if (GNUNET_YES == R_last[i * n + j].null_flag)
1622 sb_strdup_cstr (&R_last[i * n + j], t->label);
1626 sb_append_cstr (&R_last[i * n + j], "|");
1627 sb_append_cstr (&R_last[i * n + j], t->label);
1630 /* add self-loop: i is reachable from i via epsilon-transition */
1631 if (GNUNET_YES == R_last[i * n + i].null_flag)
1633 R_last[i * n + i].slen = 0;
1634 R_last[i * n + i].null_flag = GNUNET_NO;
1638 sb_wrap (&R_last[i * n + i], "(|%.*s)", 3);
1641 for (i = 0; i < n; i++)
1642 for (j = 0; j < n; j++)
1643 if (needs_parentheses (&R_last[i * n + j]))
1644 sb_wrap (&R_last[i * n + j], "(%.*s)", 2);
1645 /* Compute regular expressions of length "k" between each pair of states per
1647 memset (&R_cur_l, 0, sizeof(struct StringBuffer));
1648 memset (&R_cur_r, 0, sizeof(struct StringBuffer));
1649 for (k = 0; k < n; k++)
1651 for (i = 0; i < n; i++)
1653 for (j = 0; j < n; j++)
1655 /* Basis for the recursion:
1656 * $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj}
1657 * R_last == R^{(k-1)}, R_cur == R^{(k)}
1660 /* Create R_cur[i][j] and simplify the expression */
1661 automaton_create_proofs_simplify (&R_last[i * n + j],
1670 /* set R_last = R_cur */
1674 /* clear 'R_cur' for next iteration */
1675 for (i = 0; i < n; i++)
1676 for (j = 0; j < n; j++)
1677 R_cur[i * n + j].null_flag = GNUNET_YES;
1681 /* assign proofs and hashes */
1682 for (i = 0; i < n; i++)
1684 if (GNUNET_YES != R_last[a->start->dfs_id * n + i].null_flag)
1686 states[i]->proof = GNUNET_strndup (R_last[a->start->dfs_id * n + i].sbuf,
1687 R_last[a->start->dfs_id * n + i].slen);
1688 GNUNET_CRYPTO_hash (states[i]->proof,
1689 strlen (states[i]->proof),
1694 /* complete regex for whole DFA: union of all pairs (start state/accepting
1696 sb_init (&complete_regex, 16 * n);
1697 for (i = 0; i < n; i++)
1699 if (states[i]->accepting)
1701 if ((0 == complete_regex.slen) &&
1702 (0 < R_last[a->start->dfs_id * n + i].slen))
1704 sb_append (&complete_regex, &R_last[a->start->dfs_id * n + i]);
1706 else if ((GNUNET_YES != R_last[a->start->dfs_id * n + i].null_flag) &&
1707 (0 < R_last[a->start->dfs_id * n + i].slen))
1709 sb_append_cstr (&complete_regex, "|");
1710 sb_append (&complete_regex, &R_last[a->start->dfs_id * n + i]);
1714 a->canonical_regex =
1715 GNUNET_strndup (complete_regex.sbuf, complete_regex.slen);
1718 sb_free (&complete_regex);
1719 for (i = 0; i < n; i++)
1720 for (j = 0; j < n; j++)
1722 sb_free (&R_cur[i * n + j]);
1723 sb_free (&R_last[i * n + j]);
1725 GNUNET_free (R_cur);
1726 GNUNET_free (R_last);
1732 * Creates a new DFA state based on a set of NFA states. Needs to be freed using
1733 * automaton_destroy_state.
1735 * @param ctx context
1736 * @param nfa_states set of NFA states on which the DFA should be based on
1738 * @return new DFA state
1740 static struct REGEX_INTERNAL_State *
1741 dfa_state_create (struct REGEX_INTERNAL_Context *ctx,
1742 struct REGEX_INTERNAL_StateSet *nfa_states)
1744 struct REGEX_INTERNAL_State *s;
1747 struct REGEX_INTERNAL_State *cstate;
1748 struct REGEX_INTERNAL_Transition *ctran;
1751 s = GNUNET_new (struct REGEX_INTERNAL_State);
1752 s->id = ctx->state_id++;
1756 if (NULL == nfa_states)
1758 GNUNET_asprintf (&s->name, "s%i", s->id);
1762 s->nfa_set = *nfa_states;
1764 if (nfa_states->off < 1)
1767 /* Create a name based on 'nfa_states' */
1768 len = nfa_states->off * 14 + 4;
1769 s->name = GNUNET_malloc (len);
1770 strcat (s->name, "{");
1773 for (i = 0; i < nfa_states->off; i++)
1775 cstate = nfa_states->states[i];
1776 GNUNET_snprintf (pos, pos - s->name + len, "%i,", cstate->id);
1777 pos += strlen (pos);
1779 /* Add a transition for each distinct label to NULL state */
1780 for (ctran = cstate->transitions_head; NULL != ctran; ctran = ctran->next)
1781 if (NULL != ctran->label)
1782 state_add_transition (ctx, s, ctran->label, NULL);
1784 /* If the nfa_states contain an accepting state, the new dfa state is also
1786 if (cstate->accepting)
1790 s->name = GNUNET_realloc (s->name, strlen (s->name) + 1);
1792 memset (nfa_states, 0, sizeof(struct REGEX_INTERNAL_StateSet));
1798 * Move from the given state 's' to the next state on transition 'str'. Consumes
1799 * as much of the given 'str' as possible (usefull for strided DFAs). On return
1800 * 's' will point to the next state, and the length of the substring used for
1801 * this transition will be returned. If no transition possible 0 is returned and
1802 * 's' points to NULL.
1804 * @param s starting state, will point to the next state or NULL (if no
1805 * transition possible)
1806 * @param str edge label to follow (will match longest common prefix)
1808 * @return length of the substring comsumed from 'str'
1811 dfa_move (struct REGEX_INTERNAL_State **s, const char *str)
1813 struct REGEX_INTERNAL_Transition *t;
1814 struct REGEX_INTERNAL_State *new_s;
1816 unsigned int max_len;
1823 for (t = (*s)->transitions_head; NULL != t; t = t->next)
1825 len = strlen (t->label);
1827 if (0 == strncmp (t->label, str, len))
1832 new_s = t->to_state;
1843 * Set the given state 'marked' to #GNUNET_YES. Used by the
1844 * #dfa_remove_unreachable_states() function to detect unreachable states in the
1847 * @param cls closure, not used.
1848 * @param count count, not used.
1849 * @param s state where the marked attribute will be set to #GNUNET_YES.
1852 mark_states (void *cls,
1853 const unsigned int count,
1854 struct REGEX_INTERNAL_State *s)
1856 s->marked = GNUNET_YES;
1861 * Remove all unreachable states from DFA 'a'. Unreachable states are those
1862 * states that are not reachable from the starting state.
1864 * @param a DFA automaton
1867 dfa_remove_unreachable_states (struct REGEX_INTERNAL_Automaton *a)
1869 struct REGEX_INTERNAL_State *s;
1870 struct REGEX_INTERNAL_State *s_next;
1872 /* 1. unmark all states */
1873 for (s = a->states_head; NULL != s; s = s->next)
1874 s->marked = GNUNET_NO;
1876 /* 2. traverse dfa from start state and mark all visited states */
1877 REGEX_INTERNAL_automaton_traverse (a,
1884 /* 3. delete all states that were not visited */
1885 for (s = a->states_head; NULL != s; s = s_next)
1888 if (GNUNET_NO == s->marked)
1889 automaton_remove_state (a, s);
1895 * Remove all dead states from the DFA 'a'. Dead states are those states that do
1896 * not transition to any other state but themselves.
1898 * @param a DFA automaton
1901 dfa_remove_dead_states (struct REGEX_INTERNAL_Automaton *a)
1903 struct REGEX_INTERNAL_State *s;
1904 struct REGEX_INTERNAL_State *s_next;
1905 struct REGEX_INTERNAL_Transition *t;
1908 GNUNET_assert (DFA == a->type);
1910 for (s = a->states_head; NULL != s; s = s_next)
1918 for (t = s->transitions_head; NULL != t; t = t->next)
1920 if ((NULL != t->to_state) && (t->to_state != s) )
1930 /* state s is dead, remove it */
1931 automaton_remove_state (a, s);
1937 * Merge all non distinguishable states in the DFA 'a'
1939 * @param ctx context
1940 * @param a DFA automaton
1941 * @return #GNUNET_OK on success
1944 dfa_merge_nondistinguishable_states (struct REGEX_INTERNAL_Context *ctx,
1945 struct REGEX_INTERNAL_Automaton *a)
1948 struct REGEX_INTERNAL_State *s1;
1949 struct REGEX_INTERNAL_State *s2;
1950 struct REGEX_INTERNAL_Transition *t1;
1951 struct REGEX_INTERNAL_Transition *t2;
1952 struct REGEX_INTERNAL_State *s1_next;
1953 struct REGEX_INTERNAL_State *s2_next;
1955 unsigned int num_equal_edges;
1957 unsigned int state_cnt;
1958 unsigned long long idx;
1959 unsigned long long idx1;
1961 if ((NULL == a) || (0 == a->state_count))
1963 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
1964 "Could not merge nondistinguishable states, automaton was NULL.\n");
1965 return GNUNET_SYSERR;
1968 state_cnt = a->state_count;
1969 table = GNUNET_malloc_large (
1970 (sizeof(uint32_t) * state_cnt * state_cnt / 32) + sizeof(uint32_t));
1973 GNUNET_log_strerror (GNUNET_ERROR_TYPE_ERROR, "malloc");
1974 return GNUNET_SYSERR;
1977 for (i = 0, s1 = a->states_head; NULL != s1; s1 = s1->next)
1980 /* Mark all pairs of accepting/!accepting states */
1981 for (s1 = a->states_head; NULL != s1; s1 = s1->next)
1982 for (s2 = a->states_head; NULL != s2; s2 = s2->next)
1983 if ((s1->accepting && ! s2->accepting) ||
1984 (! s1->accepting && s2->accepting))
1986 idx = (unsigned long long) s1->marked * state_cnt + s2->marked;
1987 table[idx / 32] |= (1U << (idx % 32));
1990 /* Find all equal states */
1995 for (s1 = a->states_head; NULL != s1; s1 = s1->next)
1997 for (s2 = a->states_head; NULL != s2 && s1 != s2; s2 = s2->next)
1999 idx = (unsigned long long) s1->marked * state_cnt + s2->marked;
2000 if (0 != (table[idx / 32] & (1U << (idx % 32))))
2002 num_equal_edges = 0;
2003 for (t1 = s1->transitions_head; NULL != t1; t1 = t1->next)
2005 for (t2 = s2->transitions_head; NULL != t2; t2 = t2->next)
2007 if (0 == strcmp (t1->label, t2->label))
2010 /* same edge, but targets definitively different, so we're different
2012 if (t1->to_state->marked > t2->to_state->marked)
2013 idx1 = (unsigned long long) t1->to_state->marked * state_cnt
2014 + t2->to_state->marked;
2016 idx1 = (unsigned long long) t2->to_state->marked * state_cnt
2017 + t1->to_state->marked;
2018 if (0 != (table[idx1 / 32] & (1U << (idx1 % 32))))
2020 table[idx / 32] |= (1U << (idx % 32));
2021 change = 1; /* changed a marker, need to run again */
2026 if ((num_equal_edges != s1->transition_count) ||
2027 (num_equal_edges != s2->transition_count))
2029 /* Make sure ALL edges of possible equal states are the same */
2030 table[idx / 32] |= (1U << (idx % 32));
2031 change = 1; /* changed a marker, need to run again */
2037 /* Merge states that are equal */
2038 for (s1 = a->states_head; NULL != s1; s1 = s1_next)
2041 for (s2 = a->states_head; NULL != s2 && s1 != s2; s2 = s2_next)
2044 idx = (unsigned long long) s1->marked * state_cnt + s2->marked;
2045 if (0 == (table[idx / 32] & (1U << (idx % 32))))
2046 automaton_merge_states (ctx, a, s1, s2);
2050 GNUNET_free (table);
2056 * Minimize the given DFA 'a' by removing all unreachable states, removing all
2057 * dead states and merging all non distinguishable states
2059 * @param ctx context
2060 * @param a DFA automaton
2061 * @return GNUNET_OK on success
2064 dfa_minimize (struct REGEX_INTERNAL_Context *ctx,
2065 struct REGEX_INTERNAL_Automaton *a)
2068 return GNUNET_SYSERR;
2070 GNUNET_assert (DFA == a->type);
2072 /* 1. remove unreachable states */
2073 dfa_remove_unreachable_states (a);
2075 /* 2. remove dead states */
2076 dfa_remove_dead_states (a);
2078 /* 3. Merge nondistinguishable states */
2079 if (GNUNET_OK != dfa_merge_nondistinguishable_states (ctx, a))
2080 return GNUNET_SYSERR;
2086 * Context for adding strided transitions to a DFA.
2088 struct REGEX_INTERNAL_Strided_Context
2091 * Length of the strides.
2093 const unsigned int stride;
2096 * Strided transitions DLL. New strided transitions will be stored in this DLL
2097 * and afterwards added to the DFA.
2099 struct REGEX_INTERNAL_Transition *transitions_head;
2102 * Strided transitions DLL.
2104 struct REGEX_INTERNAL_Transition *transitions_tail;
2109 * Recursive helper function to add strides to a DFA.
2111 * @param cls context, contains stride length and strided transitions DLL.
2112 * @param depth current depth of the depth-first traversal of the graph.
2113 * @param label current label, string that contains all labels on the path from
2115 * @param start start state for the depth-first traversal of the graph.
2116 * @param s current state in the depth-first traversal
2119 dfa_add_multi_strides_helper (void *cls,
2120 const unsigned int depth,
2122 struct REGEX_INTERNAL_State *start,
2123 struct REGEX_INTERNAL_State *s)
2125 struct REGEX_INTERNAL_Strided_Context *ctx = cls;
2126 struct REGEX_INTERNAL_Transition *t;
2129 if (depth == ctx->stride)
2131 t = GNUNET_new (struct REGEX_INTERNAL_Transition);
2132 t->label = GNUNET_strdup (label);
2134 t->from_state = start;
2135 GNUNET_CONTAINER_DLL_insert (ctx->transitions_head,
2136 ctx->transitions_tail,
2141 for (t = s->transitions_head; NULL != t; t = t->next)
2143 /* Do not consider self-loops, because it end's up in too many
2145 if (t->to_state == t->from_state)
2150 GNUNET_asprintf (&new_label, "%s%s", label, t->label);
2153 new_label = GNUNET_strdup (t->label);
2155 dfa_add_multi_strides_helper (cls,
2162 GNUNET_free_non_null (label);
2167 * Function called for each state in the DFA. Starts a traversal of depth set in
2168 * context starting from state 's'.
2170 * @param cls context.
2171 * @param count not used.
2172 * @param s current state.
2175 dfa_add_multi_strides (void *cls,
2176 const unsigned int count,
2177 struct REGEX_INTERNAL_State *s)
2179 dfa_add_multi_strides_helper (cls, 0, NULL, s, s);
2184 * Adds multi-strided transitions to the given 'dfa'.
2186 * @param regex_ctx regex context needed to add transitions to the automaton.
2187 * @param dfa DFA to which the multi strided transitions should be added.
2188 * @param stride_len length of the strides.
2191 REGEX_INTERNAL_dfa_add_multi_strides (struct REGEX_INTERNAL_Context *regex_ctx,
2192 struct REGEX_INTERNAL_Automaton *dfa,
2193 const unsigned int stride_len)
2195 struct REGEX_INTERNAL_Strided_Context ctx = { stride_len, NULL, NULL };
2196 struct REGEX_INTERNAL_Transition *t;
2197 struct REGEX_INTERNAL_Transition *t_next;
2199 if ((1 > stride_len) || (GNUNET_YES == dfa->is_multistrided))
2202 /* Compute the new transitions of given stride_len */
2203 REGEX_INTERNAL_automaton_traverse (dfa,
2207 &dfa_add_multi_strides,
2210 /* Add all the new transitions to the automaton. */
2211 for (t = ctx.transitions_head; NULL != t; t = t_next)
2214 state_add_transition (regex_ctx, t->from_state, t->label, t->to_state);
2215 GNUNET_CONTAINER_DLL_remove (ctx.transitions_head, ctx.transitions_tail, t);
2216 GNUNET_free_non_null (t->label);
2220 /* Mark this automaton as multistrided */
2221 dfa->is_multistrided = GNUNET_YES;
2226 * Recursive Helper function for DFA path compression. Does DFS on the DFA graph
2227 * and adds new transitions to the given transitions DLL and marks states that
2228 * should be removed by setting state->contained to GNUNET_YES.
2230 * @param dfa DFA for which the paths should be compressed.
2231 * @param start starting state for linear path search.
2232 * @param cur current state in the recursive DFS.
2233 * @param label current label (string of traversed labels).
2234 * @param max_len maximal path compression length.
2235 * @param transitions_head transitions DLL.
2236 * @param transitions_tail transitions DLL.
2239 dfa_compress_paths_helper (struct REGEX_INTERNAL_Automaton *dfa,
2240 struct REGEX_INTERNAL_State *start,
2241 struct REGEX_INTERNAL_State *cur,
2243 unsigned int max_len,
2244 struct REGEX_INTERNAL_Transition **transitions_head,
2245 struct REGEX_INTERNAL_Transition **transitions_tail)
2247 struct REGEX_INTERNAL_Transition *t;
2251 if ((NULL != label) &&
2252 (((cur->incoming_transition_count > 1) || (GNUNET_YES ==
2254 (GNUNET_YES == cur->marked) ) ||
2255 ((start != dfa->start) && (max_len > 0) && (max_len == strlen (
2257 ((start == dfa->start) && (GNUNET_REGEX_INITIAL_BYTES == strlen (
2260 t = GNUNET_new (struct REGEX_INTERNAL_Transition);
2261 t->label = GNUNET_strdup (label);
2263 t->from_state = start;
2264 GNUNET_CONTAINER_DLL_insert (*transitions_head, *transitions_tail, t);
2266 if (GNUNET_NO == cur->marked)
2268 dfa_compress_paths_helper (dfa,
2278 else if (cur != start)
2279 cur->contained = GNUNET_YES;
2281 if ((GNUNET_YES == cur->marked) && (cur != start))
2284 cur->marked = GNUNET_YES;
2287 for (t = cur->transitions_head; NULL != t; t = t->next)
2290 GNUNET_asprintf (&new_label, "%s%s", label, t->label);
2292 new_label = GNUNET_strdup (t->label);
2294 if (t->to_state != cur)
2296 dfa_compress_paths_helper (dfa,
2304 GNUNET_free (new_label);
2310 * Compress paths in the given 'dfa'. Linear paths like 0->1->2->3 will be
2311 * compressed to 0->3 by combining transitions.
2313 * @param regex_ctx context for adding new transitions.
2314 * @param dfa DFA representation, will directly modify the given DFA.
2315 * @param max_len maximal length of the compressed paths.
2318 dfa_compress_paths (struct REGEX_INTERNAL_Context *regex_ctx,
2319 struct REGEX_INTERNAL_Automaton *dfa,
2320 unsigned int max_len)
2322 struct REGEX_INTERNAL_State *s;
2323 struct REGEX_INTERNAL_State *s_next;
2324 struct REGEX_INTERNAL_Transition *t;
2325 struct REGEX_INTERNAL_Transition *t_next;
2326 struct REGEX_INTERNAL_Transition *transitions_head = NULL;
2327 struct REGEX_INTERNAL_Transition *transitions_tail = NULL;
2332 /* Count the incoming transitions on each state. */
2333 for (s = dfa->states_head; NULL != s; s = s->next)
2335 for (t = s->transitions_head; NULL != t; t = t->next)
2337 if (NULL != t->to_state)
2338 t->to_state->incoming_transition_count++;
2342 /* Unmark all states. */
2343 for (s = dfa->states_head; NULL != s; s = s->next)
2345 s->marked = GNUNET_NO;
2346 s->contained = GNUNET_NO;
2349 /* Add strides and mark states that can be deleted. */
2350 dfa_compress_paths_helper (dfa,
2358 /* Add all the new transitions to the automaton. */
2359 for (t = transitions_head; NULL != t; t = t_next)
2362 state_add_transition (regex_ctx, t->from_state, t->label, t->to_state);
2363 GNUNET_CONTAINER_DLL_remove (transitions_head, transitions_tail, t);
2364 GNUNET_free_non_null (t->label);
2368 /* Remove marked states (including their incoming and outgoing transitions). */
2369 for (s = dfa->states_head; NULL != s; s = s_next)
2372 if (GNUNET_YES == s->contained)
2373 automaton_remove_state (dfa, s);
2379 * Creates a new NFA fragment. Needs to be cleared using
2380 * automaton_fragment_clear.
2382 * @param start starting state
2383 * @param end end state
2385 * @return new NFA fragment
2387 static struct REGEX_INTERNAL_Automaton *
2388 nfa_fragment_create (struct REGEX_INTERNAL_State *start,
2389 struct REGEX_INTERNAL_State *end)
2391 struct REGEX_INTERNAL_Automaton *n;
2393 n = GNUNET_new (struct REGEX_INTERNAL_Automaton);
2400 if ((NULL == start) || (NULL == end))
2403 automaton_add_state (n, end);
2404 automaton_add_state (n, start);
2416 * Adds a list of states to the given automaton 'n'.
2418 * @param n automaton to which the states should be added
2419 * @param states_head head of the DLL of states
2420 * @param states_tail tail of the DLL of states
2423 nfa_add_states (struct REGEX_INTERNAL_Automaton *n,
2424 struct REGEX_INTERNAL_State *states_head,
2425 struct REGEX_INTERNAL_State *states_tail)
2427 struct REGEX_INTERNAL_State *s;
2429 if ((NULL == n) || (NULL == states_head))
2431 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not add states\n");
2435 if (NULL == n->states_head)
2437 n->states_head = states_head;
2438 n->states_tail = states_tail;
2442 if (NULL != states_head)
2444 n->states_tail->next = states_head;
2445 n->states_tail = states_tail;
2448 for (s = states_head; NULL != s; s = s->next)
2454 * Creates a new NFA state. Needs to be freed using automaton_destroy_state.
2456 * @param ctx context
2457 * @param accepting is it an accepting state or not
2459 * @return new NFA state
2461 static struct REGEX_INTERNAL_State *
2462 nfa_state_create (struct REGEX_INTERNAL_Context *ctx, int accepting)
2464 struct REGEX_INTERNAL_State *s;
2466 s = GNUNET_new (struct REGEX_INTERNAL_State);
2467 s->id = ctx->state_id++;
2468 s->accepting = accepting;
2469 s->marked = GNUNET_NO;
2475 GNUNET_asprintf (&s->name, "s%i", s->id);
2482 * Calculates the closure set for the given set of states.
2484 * @param ret set to sorted nfa closure on 'label' (epsilon closure if 'label' is NULL)
2485 * @param nfa the NFA containing 's'
2486 * @param states list of states on which to base the closure on
2487 * @param label transitioning label for which to base the closure on,
2488 * pass NULL for epsilon transition
2491 nfa_closure_set_create (struct REGEX_INTERNAL_StateSet *ret,
2492 struct REGEX_INTERNAL_Automaton *nfa,
2493 struct REGEX_INTERNAL_StateSet *states,
2496 struct REGEX_INTERNAL_State *s;
2498 struct REGEX_INTERNAL_StateSet_MDLL cls_stack;
2499 struct REGEX_INTERNAL_State *clsstate;
2500 struct REGEX_INTERNAL_State *currentstate;
2501 struct REGEX_INTERNAL_Transition *ctran;
2503 memset (ret, 0, sizeof(struct REGEX_INTERNAL_StateSet));
2507 for (i = 0; i < states->off; i++)
2509 s = states->states[i];
2511 /* Add start state to closure only for epsilon closure */
2513 state_set_append (ret, s);
2515 /* initialize work stack */
2516 cls_stack.head = NULL;
2517 cls_stack.tail = NULL;
2518 GNUNET_CONTAINER_MDLL_insert (ST, cls_stack.head, cls_stack.tail, s);
2521 while (NULL != (currentstate = cls_stack.tail))
2523 GNUNET_CONTAINER_MDLL_remove (ST,
2528 for (ctran = currentstate->transitions_head; NULL != ctran;
2529 ctran = ctran->next)
2531 if (NULL == (clsstate = ctran->to_state))
2533 if (0 != clsstate->contained)
2535 if (0 != nullstrcmp (label, ctran->label))
2537 state_set_append (ret, clsstate);
2538 GNUNET_CONTAINER_MDLL_insert_tail (ST,
2543 clsstate->contained = 1;
2547 for (i = 0; i < ret->off; i++)
2548 ret->states[i]->contained = 0;
2553 sizeof(struct REGEX_INTERNAL_State *),
2559 * Pops two NFA fragments (a, b) from the stack and concatenates them (ab)
2561 * @param ctx context
2564 nfa_add_concatenation (struct REGEX_INTERNAL_Context *ctx)
2566 struct REGEX_INTERNAL_Automaton *a;
2567 struct REGEX_INTERNAL_Automaton *b;
2568 struct REGEX_INTERNAL_Automaton *new_nfa;
2570 b = ctx->stack_tail;
2571 GNUNET_assert (NULL != b);
2572 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, b);
2573 a = ctx->stack_tail;
2574 GNUNET_assert (NULL != a);
2575 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
2577 state_add_transition (ctx, a->end, NULL, b->start);
2578 a->end->accepting = 0;
2579 b->end->accepting = 1;
2581 new_nfa = nfa_fragment_create (NULL, NULL);
2582 nfa_add_states (new_nfa, a->states_head, a->states_tail);
2583 nfa_add_states (new_nfa, b->states_head, b->states_tail);
2584 new_nfa->start = a->start;
2585 new_nfa->end = b->end;
2586 new_nfa->state_count += a->state_count + b->state_count;
2587 automaton_fragment_clear (a);
2588 automaton_fragment_clear (b);
2590 GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new_nfa);
2595 * Pops a NFA fragment from the stack (a) and adds a new fragment (a*)
2597 * @param ctx context
2600 nfa_add_star_op (struct REGEX_INTERNAL_Context *ctx)
2602 struct REGEX_INTERNAL_Automaton *a;
2603 struct REGEX_INTERNAL_Automaton *new_nfa;
2604 struct REGEX_INTERNAL_State *start;
2605 struct REGEX_INTERNAL_State *end;
2607 a = ctx->stack_tail;
2612 GNUNET_ERROR_TYPE_ERROR,
2613 "nfa_add_star_op failed, because there was no element on the stack");
2617 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
2619 start = nfa_state_create (ctx, 0);
2620 end = nfa_state_create (ctx, 1);
2622 state_add_transition (ctx, start, NULL, a->start);
2623 state_add_transition (ctx, start, NULL, end);
2624 state_add_transition (ctx, a->end, NULL, a->start);
2625 state_add_transition (ctx, a->end, NULL, end);
2627 a->end->accepting = 0;
2630 new_nfa = nfa_fragment_create (start, end);
2631 nfa_add_states (new_nfa, a->states_head, a->states_tail);
2632 automaton_fragment_clear (a);
2634 GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new_nfa);
2639 * Pops an NFA fragment (a) from the stack and adds a new fragment (a+)
2641 * @param ctx context
2644 nfa_add_plus_op (struct REGEX_INTERNAL_Context *ctx)
2646 struct REGEX_INTERNAL_Automaton *a;
2648 a = ctx->stack_tail;
2653 GNUNET_ERROR_TYPE_ERROR,
2654 "nfa_add_plus_op failed, because there was no element on the stack");
2658 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
2660 state_add_transition (ctx, a->end, NULL, a->start);
2662 GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, a);
2667 * Pops an NFA fragment (a) from the stack and adds a new fragment (a?)
2669 * @param ctx context
2672 nfa_add_question_op (struct REGEX_INTERNAL_Context *ctx)
2674 struct REGEX_INTERNAL_Automaton *a;
2675 struct REGEX_INTERNAL_Automaton *new_nfa;
2676 struct REGEX_INTERNAL_State *start;
2677 struct REGEX_INTERNAL_State *end;
2679 a = ctx->stack_tail;
2683 GNUNET_ERROR_TYPE_ERROR,
2684 "nfa_add_question_op failed, because there was no element on the stack");
2688 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
2690 start = nfa_state_create (ctx, 0);
2691 end = nfa_state_create (ctx, 1);
2693 state_add_transition (ctx, start, NULL, a->start);
2694 state_add_transition (ctx, start, NULL, end);
2695 state_add_transition (ctx, a->end, NULL, end);
2697 a->end->accepting = 0;
2699 new_nfa = nfa_fragment_create (start, end);
2700 nfa_add_states (new_nfa, a->states_head, a->states_tail);
2701 GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new_nfa);
2702 automaton_fragment_clear (a);
2707 * Pops two NFA fragments (a, b) from the stack and adds a new NFA fragment that
2708 * alternates between a and b (a|b)
2710 * @param ctx context
2713 nfa_add_alternation (struct REGEX_INTERNAL_Context *ctx)
2715 struct REGEX_INTERNAL_Automaton *a;
2716 struct REGEX_INTERNAL_Automaton *b;
2717 struct REGEX_INTERNAL_Automaton *new_nfa;
2718 struct REGEX_INTERNAL_State *start;
2719 struct REGEX_INTERNAL_State *end;
2721 b = ctx->stack_tail;
2722 GNUNET_assert (NULL != b);
2723 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, b);
2724 a = ctx->stack_tail;
2725 GNUNET_assert (NULL != a);
2726 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
2728 start = nfa_state_create (ctx, 0);
2729 end = nfa_state_create (ctx, 1);
2730 state_add_transition (ctx, start, NULL, a->start);
2731 state_add_transition (ctx, start, NULL, b->start);
2733 state_add_transition (ctx, a->end, NULL, end);
2734 state_add_transition (ctx, b->end, NULL, end);
2736 a->end->accepting = 0;
2737 b->end->accepting = 0;
2740 new_nfa = nfa_fragment_create (start, end);
2741 nfa_add_states (new_nfa, a->states_head, a->states_tail);
2742 nfa_add_states (new_nfa, b->states_head, b->states_tail);
2743 automaton_fragment_clear (a);
2744 automaton_fragment_clear (b);
2746 GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new_nfa);
2751 * Adds a new nfa fragment to the stack
2753 * @param ctx context
2754 * @param label label for nfa transition
2757 nfa_add_label (struct REGEX_INTERNAL_Context *ctx, const char *label)
2759 struct REGEX_INTERNAL_Automaton *n;
2760 struct REGEX_INTERNAL_State *start;
2761 struct REGEX_INTERNAL_State *end;
2763 GNUNET_assert (NULL != ctx);
2765 start = nfa_state_create (ctx, 0);
2766 end = nfa_state_create (ctx, 1);
2767 state_add_transition (ctx, start, label, end);
2768 n = nfa_fragment_create (start, end);
2769 GNUNET_assert (NULL != n);
2770 GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, n);
2775 * Initialize a new context
2777 * @param ctx context
2780 REGEX_INTERNAL_context_init (struct REGEX_INTERNAL_Context *ctx)
2784 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Context was NULL!");
2788 ctx->transition_id = 0;
2789 ctx->stack_head = NULL;
2790 ctx->stack_tail = NULL;
2795 * Construct an NFA by parsing the regex string of length 'len'.
2797 * @param regex regular expression string
2798 * @param len length of the string
2800 * @return NFA, needs to be freed using REGEX_INTERNAL_destroy_automaton
2802 struct REGEX_INTERNAL_Automaton *
2803 REGEX_INTERNAL_construct_nfa (const char *regex, const size_t len)
2805 struct REGEX_INTERNAL_Context ctx;
2806 struct REGEX_INTERNAL_Automaton *nfa;
2811 unsigned int altcount;
2812 unsigned int atomcount;
2822 if ((NULL == regex) || (0 == strlen (regex)) || (0 == len))
2824 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
2825 "Could not parse regex. Empty regex string provided.\n");
2829 REGEX_INTERNAL_context_init (&ctx);
2840 for (count = 0; count < len && *regexp; count++, regexp++)
2848 nfa_add_concatenation (&ctx);
2851 GNUNET_array_grow (p, psize, psize * 2 + 4); /* FIXME why *2 +4? */
2852 p[poff].altcount = altcount;
2853 p[poff].atomcount = atomcount;
2862 error_msg = "Cannot append '|' to nothing";
2865 while (--atomcount > 0)
2866 nfa_add_concatenation (&ctx);
2873 error_msg = "Missing opening '('";
2878 /* Ignore this: "()" */
2880 altcount = p[poff].altcount;
2881 atomcount = p[poff].atomcount;
2884 while (--atomcount > 0)
2885 nfa_add_concatenation (&ctx);
2886 for (; altcount > 0; altcount--)
2887 nfa_add_alternation (&ctx);
2889 altcount = p[poff].altcount;
2890 atomcount = p[poff].atomcount;
2897 error_msg = "Cannot append '*' to nothing";
2900 nfa_add_star_op (&ctx);
2906 error_msg = "Cannot append '+' to nothing";
2909 nfa_add_plus_op (&ctx);
2915 error_msg = "Cannot append '?' to nothing";
2918 nfa_add_question_op (&ctx);
2925 nfa_add_concatenation (&ctx);
2927 curlabel[0] = *regexp;
2928 nfa_add_label (&ctx, curlabel);
2935 error_msg = "Unbalanced parenthesis";
2938 while (--atomcount > 0)
2939 nfa_add_concatenation (&ctx);
2940 for (; altcount > 0; altcount--)
2941 nfa_add_alternation (&ctx);
2943 GNUNET_array_grow (p, psize, 0);
2945 nfa = ctx.stack_tail;
2946 GNUNET_CONTAINER_DLL_remove (ctx.stack_head, ctx.stack_tail, nfa);
2948 if (NULL != ctx.stack_head)
2950 error_msg = "Creating the NFA failed. NFA stack was not empty!";
2954 /* Remember the regex that was used to generate this NFA */
2955 nfa->regex = GNUNET_strdup (regex);
2957 /* create depth-first numbering of the states for pretty printing */
2958 REGEX_INTERNAL_automaton_traverse (nfa,
2965 /* No multistriding added so far */
2966 nfa->is_multistrided = GNUNET_NO;
2971 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not parse regex: `%s'\n", regex);
2972 if (NULL != error_msg)
2973 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "%s\n", error_msg);
2975 GNUNET_free_non_null (p);
2977 while (NULL != (nfa = ctx.stack_head))
2979 GNUNET_CONTAINER_DLL_remove (ctx.stack_head, ctx.stack_tail, nfa);
2980 REGEX_INTERNAL_automaton_destroy (nfa);
2988 * Create DFA states based on given 'nfa' and starting with 'dfa_state'.
2990 * @param ctx context.
2991 * @param nfa NFA automaton.
2992 * @param dfa DFA automaton.
2993 * @param dfa_state current dfa state, pass epsilon closure of first nfa state
2997 construct_dfa_states (struct REGEX_INTERNAL_Context *ctx,
2998 struct REGEX_INTERNAL_Automaton *nfa,
2999 struct REGEX_INTERNAL_Automaton *dfa,
3000 struct REGEX_INTERNAL_State *dfa_state)
3002 struct REGEX_INTERNAL_Transition *ctran;
3003 struct REGEX_INTERNAL_State *new_dfa_state;
3004 struct REGEX_INTERNAL_State *state_contains;
3005 struct REGEX_INTERNAL_State *state_iter;
3006 struct REGEX_INTERNAL_StateSet tmp;
3007 struct REGEX_INTERNAL_StateSet nfa_set;
3009 for (ctran = dfa_state->transitions_head; NULL != ctran; ctran = ctran->next)
3011 if ((NULL == ctran->label) || (NULL != ctran->to_state) )
3014 nfa_closure_set_create (&tmp, nfa, &dfa_state->nfa_set, ctran->label);
3015 nfa_closure_set_create (&nfa_set, nfa, &tmp, NULL);
3016 state_set_clear (&tmp);
3018 state_contains = NULL;
3019 for (state_iter = dfa->states_head; NULL != state_iter;
3020 state_iter = state_iter->next)
3022 if (0 == state_set_compare (&state_iter->nfa_set, &nfa_set))
3024 state_contains = state_iter;
3028 if (NULL == state_contains)
3030 new_dfa_state = dfa_state_create (ctx, &nfa_set);
3031 automaton_add_state (dfa, new_dfa_state);
3032 ctran->to_state = new_dfa_state;
3033 construct_dfa_states (ctx, nfa, dfa, new_dfa_state);
3037 ctran->to_state = state_contains;
3038 state_set_clear (&nfa_set);
3045 * Construct DFA for the given 'regex' of length 'len'.
3047 * Path compression means, that for example a DFA o -> a -> b -> c -> o will be
3048 * compressed to o -> abc -> o. Note that this parameter influences the
3049 * non-determinism of states of the resulting NFA in the DHT (number of outgoing
3050 * edges with the same label). For example for an application that stores IPv4
3051 * addresses as bitstrings it could make sense to limit the path compression to
3054 * @param regex regular expression string.
3055 * @param len length of the regular expression.
3056 * @param max_path_len limit the path compression length to the
3057 * given value. If set to 1, no path compression is applied. Set to 0 for
3058 * maximal possible path compression (generally not desireable).
3059 * @return DFA, needs to be freed using REGEX_INTERNAL_automaton_destroy.
3061 struct REGEX_INTERNAL_Automaton *
3062 REGEX_INTERNAL_construct_dfa (const char *regex,
3064 unsigned int max_path_len)
3066 struct REGEX_INTERNAL_Context ctx;
3067 struct REGEX_INTERNAL_Automaton *dfa;
3068 struct REGEX_INTERNAL_Automaton *nfa;
3069 struct REGEX_INTERNAL_StateSet nfa_start_eps_cls;
3070 struct REGEX_INTERNAL_StateSet singleton_set;
3072 REGEX_INTERNAL_context_init (&ctx);
3075 nfa = REGEX_INTERNAL_construct_nfa (regex, len);
3079 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
3080 "Could not create DFA, because NFA creation failed\n");
3084 dfa = GNUNET_new (struct REGEX_INTERNAL_Automaton);
3086 dfa->regex = GNUNET_strdup (regex);
3088 /* Create DFA start state from epsilon closure */
3089 memset (&singleton_set, 0, sizeof(struct REGEX_INTERNAL_StateSet));
3090 state_set_append (&singleton_set, nfa->start);
3091 nfa_closure_set_create (&nfa_start_eps_cls, nfa, &singleton_set, NULL);
3092 state_set_clear (&singleton_set);
3093 dfa->start = dfa_state_create (&ctx, &nfa_start_eps_cls);
3094 automaton_add_state (dfa, dfa->start);
3096 construct_dfa_states (&ctx, nfa, dfa, dfa->start);
3097 REGEX_INTERNAL_automaton_destroy (nfa);
3100 if (GNUNET_OK != dfa_minimize (&ctx, dfa))
3102 REGEX_INTERNAL_automaton_destroy (dfa);
3106 /* Create proofs and hashes for all states */
3107 if (GNUNET_OK != automaton_create_proofs (dfa))
3109 REGEX_INTERNAL_automaton_destroy (dfa);
3113 /* Compress linear DFA paths */
3114 if (1 != max_path_len)
3115 dfa_compress_paths (&ctx, dfa, max_path_len);
3122 * Free the memory allocated by constructing the REGEX_INTERNAL_Automaton data
3125 * @param a automaton to be destroyed
3128 REGEX_INTERNAL_automaton_destroy (struct REGEX_INTERNAL_Automaton *a)
3130 struct REGEX_INTERNAL_State *s;
3131 struct REGEX_INTERNAL_State *next_state;
3136 GNUNET_free_non_null (a->regex);
3137 GNUNET_free_non_null (a->canonical_regex);
3139 for (s = a->states_head; NULL != s; s = next_state)
3141 next_state = s->next;
3142 GNUNET_CONTAINER_DLL_remove (a->states_head, a->states_tail, s);
3143 automaton_destroy_state (s);
3151 * Evaluates the given string using the given DFA automaton
3153 * @param a automaton, type must be DFA
3154 * @param string string that should be evaluated
3156 * @return 0 if string matches, non-0 otherwise
3159 evaluate_dfa (struct REGEX_INTERNAL_Automaton *a, const char *string)
3162 struct REGEX_INTERNAL_State *s;
3163 unsigned int step_len;
3167 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
3168 "Tried to evaluate DFA, but NFA automaton given");
3174 /* If the string is empty but the starting state is accepting, we accept. */
3175 if (((NULL == string) || (0 == strlen (string))) && s->accepting)
3178 for (strp = string; NULL != strp && *strp; strp += step_len)
3180 step_len = dfa_move (&s, strp);
3186 if ((NULL != s) && s->accepting)
3194 * Evaluates the given string using the given NFA automaton
3196 * @param a automaton, type must be NFA
3197 * @param string string that should be evaluated
3198 * @return 0 if string matches, non-0 otherwise
3201 evaluate_nfa (struct REGEX_INTERNAL_Automaton *a, const char *string)
3205 struct REGEX_INTERNAL_State *s;
3206 struct REGEX_INTERNAL_StateSet sset;
3207 struct REGEX_INTERNAL_StateSet new_sset;
3208 struct REGEX_INTERNAL_StateSet singleton_set;
3214 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
3215 "Tried to evaluate NFA, but DFA automaton given");
3219 /* If the string is empty but the starting state is accepting, we accept. */
3220 if (((NULL == string) || (0 == strlen (string))) && a->start->accepting)
3224 memset (&singleton_set, 0, sizeof(struct REGEX_INTERNAL_StateSet));
3225 state_set_append (&singleton_set, a->start);
3226 nfa_closure_set_create (&sset, a, &singleton_set, NULL);
3227 state_set_clear (&singleton_set);
3230 for (strp = string; NULL != strp && *strp; strp++)
3233 nfa_closure_set_create (&new_sset, a, &sset, str);
3234 state_set_clear (&sset);
3235 nfa_closure_set_create (&sset, a, &new_sset, 0);
3236 state_set_clear (&new_sset);
3239 for (i = 0; i < sset.off; i++)
3242 if ((NULL != s) && (s->accepting))
3249 state_set_clear (&sset);
3255 * Evaluates the given @a string against the given compiled regex @a a
3257 * @param a automaton
3258 * @param string string to check
3259 * @return 0 if string matches, non-0 otherwise
3262 REGEX_INTERNAL_eval (struct REGEX_INTERNAL_Automaton *a, const char *string)
3269 result = evaluate_dfa (a, string);
3273 result = evaluate_nfa (a, string);
3277 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
3278 "Evaluating regex failed, automaton has no type!\n");
3279 result = GNUNET_SYSERR;
3288 * Get the canonical regex of the given automaton.
3289 * When constructing the automaton a proof is computed for each state,
3290 * consisting of the regular expression leading to this state. A complete
3291 * regex for the automaton can be computed by combining these proofs.
3292 * As of now this function is only useful for testing.
3294 * @param a automaton for which the canonical regex should be returned.
3299 REGEX_INTERNAL_get_canonical_regex (struct REGEX_INTERNAL_Automaton *a)
3304 return a->canonical_regex;
3309 * Get the number of transitions that are contained in the given automaton.
3311 * @param a automaton for which the number of transitions should be returned.
3313 * @return number of transitions in the given automaton.
3316 REGEX_INTERNAL_get_transition_count (struct REGEX_INTERNAL_Automaton *a)
3318 unsigned int t_count;
3319 struct REGEX_INTERNAL_State *s;
3325 for (s = a->states_head; NULL != s; s = s->next)
3326 t_count += s->transition_count;
3333 * Get the first key for the given @a input_string. This hashes the first x bits
3334 * of the @a input_string.
3336 * @param input_string string.
3337 * @param string_len length of the @a input_string.
3338 * @param key pointer to where to write the hash code.
3339 * @return number of bits of @a input_string that have been consumed
3340 * to construct the key
3343 REGEX_INTERNAL_get_first_key (const char *input_string,
3345 struct GNUNET_HashCode *key)
3349 size = string_len < GNUNET_REGEX_INITIAL_BYTES ? string_len
3350 : GNUNET_REGEX_INITIAL_BYTES;
3351 if (NULL == input_string)
3353 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Given input string was NULL!\n");
3356 GNUNET_CRYPTO_hash (input_string, size, key);
3363 * Recursive function that calls the iterator for each synthetic start state.
3365 * @param min_len minimum length of the path in the graph.
3366 * @param max_len maximum length of the path in the graph.
3367 * @param consumed_string string consumed by traversing the graph till this state.
3368 * @param state current state of the automaton.
3369 * @param iterator iterator function called for each edge.
3370 * @param iterator_cls closure for the @a iterator function.
3373 iterate_initial_edge (unsigned int min_len,
3374 unsigned int max_len,
3375 char *consumed_string,
3376 struct REGEX_INTERNAL_State *state,
3377 REGEX_INTERNAL_KeyIterator iterator,
3381 struct REGEX_INTERNAL_Transition *t;
3382 unsigned int num_edges = state->transition_count;
3383 struct REGEX_BLOCK_Edge edges[num_edges];
3384 struct REGEX_BLOCK_Edge edge[1];
3385 struct GNUNET_HashCode hash;
3386 struct GNUNET_HashCode hash_new;
3387 unsigned int cur_len;
3389 if (NULL != consumed_string)
3390 cur_len = strlen (consumed_string);
3394 if (((cur_len >= min_len) || (GNUNET_YES == state->accepting)) &&
3395 (cur_len > 0) && (NULL != consumed_string))
3397 if (cur_len <= max_len)
3399 if ((NULL != state->proof) &&
3400 (0 != strcmp (consumed_string, state->proof)))
3402 (void) state_get_edges (state, edges);
3403 GNUNET_CRYPTO_hash (consumed_string, strlen (consumed_string), &hash);
3404 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
3405 "Start state for string `%s' is %s\n",
3407 GNUNET_h2s (&hash));
3408 iterator (iterator_cls,
3416 if ((GNUNET_YES == state->accepting) && (cur_len > 1) &&
3417 (state->transition_count < 1) && (cur_len < max_len))
3419 /* Special case for regex consisting of just a string that is shorter than
3421 edge[0].label = &consumed_string[cur_len - 1];
3422 edge[0].destination = state->hash;
3423 temp = GNUNET_strdup (consumed_string);
3424 temp[cur_len - 1] = '\0';
3425 GNUNET_CRYPTO_hash (temp, cur_len - 1, &hash_new);
3426 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
3427 "Start state for short string `%s' is %s\n",
3429 GNUNET_h2s (&hash_new));
3430 iterator (iterator_cls, &hash_new, temp, GNUNET_NO, 1, edge);
3434 else /* cur_len > max_len */
3436 /* Case where the concatenated labels are longer than max_len, then split. */
3437 edge[0].label = &consumed_string[max_len];
3438 edge[0].destination = state->hash;
3439 temp = GNUNET_strdup (consumed_string);
3440 temp[max_len] = '\0';
3441 GNUNET_CRYPTO_hash (temp, max_len, &hash);
3442 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
3443 "Start state at split edge `%s'-`%s` is %s\n",
3446 GNUNET_h2s (&hash_new));
3447 iterator (iterator_cls, &hash, temp, GNUNET_NO, 1, edge);
3452 if (cur_len < max_len)
3454 for (t = state->transitions_head; NULL != t; t = t->next)
3456 if (NULL != strchr (t->label, (int) '.'))
3458 /* Wildcards not allowed during starting states */
3462 if (NULL != consumed_string)
3463 GNUNET_asprintf (&temp, "%s%s", consumed_string, t->label);
3465 GNUNET_asprintf (&temp, "%s", t->label);
3466 iterate_initial_edge (min_len,
3479 * Iterate over all edges starting from start state of automaton 'a'. Calling
3480 * iterator for each edge.
3482 * @param a automaton.
3483 * @param iterator iterator called for each edge.
3484 * @param iterator_cls closure.
3487 REGEX_INTERNAL_iterate_all_edges (struct REGEX_INTERNAL_Automaton *a,
3488 REGEX_INTERNAL_KeyIterator iterator,
3491 struct REGEX_INTERNAL_State *s;
3493 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Iterating over starting edges\n");
3494 iterate_initial_edge (GNUNET_REGEX_INITIAL_BYTES,
3495 GNUNET_REGEX_INITIAL_BYTES,
3500 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Iterating over DFA edges\n");
3501 for (s = a->states_head; NULL != s; s = s->next)
3503 struct REGEX_BLOCK_Edge edges[s->transition_count];
3504 unsigned int num_edges;
3506 num_edges = state_get_edges (s, edges);
3507 if (((NULL != s->proof) && (0 < strlen (s->proof))) || s->accepting)
3509 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
3510 "Creating DFA edges at `%s' under key %s\n",
3512 GNUNET_h2s (&s->hash));
3513 iterator (iterator_cls,
3520 s->marked = GNUNET_NO;
3526 * Struct to hold all the relevant state information in the HashMap.
3528 * Contains the same info as the Regex Iterator parametes except the key,
3529 * which comes directly from the HashMap iterator.
3531 struct temporal_state_store
3537 struct REGEX_BLOCK_Edge *edges;
3542 * Store regex iterator and cls in one place to pass to the hashmap iterator.
3544 struct client_iterator
3546 REGEX_INTERNAL_KeyIterator iterator;
3552 * Iterator over all edges of a dfa. Stores all of them in a HashMap
3553 * for later reachability marking.
3555 * @param cls Closure (HashMap)
3556 * @param key hash for current state.
3557 * @param proof proof for current state
3558 * @param accepting GNUNET_YES if this is an accepting state, GNUNET_NO if not.
3559 * @param num_edges number of edges leaving current state.
3560 * @param edges edges leaving current state.
3563 store_all_states (void *cls,
3564 const struct GNUNET_HashCode *key,
3567 unsigned int num_edges,
3568 const struct REGEX_BLOCK_Edge *edges)
3570 struct GNUNET_CONTAINER_MultiHashMap *hm = cls;
3571 struct temporal_state_store *tmp;
3574 tmp = GNUNET_new (struct temporal_state_store);
3575 tmp->reachable = GNUNET_NO;
3576 tmp->proof = GNUNET_strdup (proof);
3577 tmp->accepting = accepting;
3578 tmp->num_edges = num_edges;
3579 edges_size = sizeof(struct REGEX_BLOCK_Edge) * num_edges;
3580 tmp->edges = GNUNET_malloc (edges_size);
3581 GNUNET_memcpy (tmp->edges, edges, edges_size);
3582 GNUNET_assert (GNUNET_YES ==
3583 GNUNET_CONTAINER_multihashmap_put (
3587 GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_FAST));
3592 * Mark state as reachable and call recursively on all its edges.
3594 * If already marked as reachable, do nothing.
3596 * @param state State to mark as reachable.
3597 * @param hm HashMap which stores all the states indexed by key.
3600 mark_as_reachable (struct temporal_state_store *state,
3601 struct GNUNET_CONTAINER_MultiHashMap *hm)
3603 struct temporal_state_store *child;
3606 if (GNUNET_YES == state->reachable)
3610 state->reachable = GNUNET_YES;
3611 for (i = 0; i < state->num_edges; i++)
3614 GNUNET_CONTAINER_multihashmap_get (hm, &state->edges[i].destination);
3620 mark_as_reachable (child, hm);
3626 * Iterator over hash map entries to mark the ones that are reachable.
3628 * @param cls closure
3629 * @param key current key code
3630 * @param value value in the hash map
3631 * @return #GNUNET_YES if we should continue to iterate,
3632 * #GNUNET_NO if not.
3635 reachability_iterator (void *cls,
3636 const struct GNUNET_HashCode *key,
3639 struct GNUNET_CONTAINER_MultiHashMap *hm = cls;
3640 struct temporal_state_store *state = value;
3642 if (GNUNET_YES == state->reachable)
3643 /* already visited and marked */
3646 if ((GNUNET_REGEX_INITIAL_BYTES > strlen (state->proof)) &&
3647 (GNUNET_NO == state->accepting) )
3648 /* not directly reachable */
3651 mark_as_reachable (state, hm);
3657 * Iterator over hash map entries.
3658 * Calling the callback on the ones marked as reachables.
3660 * @param cls closure
3661 * @param key current key code
3662 * @param value value in the hash map
3663 * @return #GNUNET_YES if we should continue to iterate,
3664 * #GNUNET_NO if not.
3667 iterate_reachables (void *cls, const struct GNUNET_HashCode *key, void *value)
3669 struct client_iterator *ci = cls;
3670 struct temporal_state_store *state = value;
3672 if (GNUNET_YES == state->reachable)
3674 ci->iterator (ci->iterator_cls,
3681 GNUNET_free (state->edges);
3682 GNUNET_free (state->proof);
3683 GNUNET_free (state);
3689 * Iterate over all edges of automaton 'a' that are reachable from a state with
3690 * a proof of at least GNUNET_REGEX_INITIAL_BYTES characters.
3692 * Call the iterator for each such edge.
3694 * @param a automaton.
3695 * @param iterator iterator called for each reachable edge.
3696 * @param iterator_cls closure.
3699 REGEX_INTERNAL_iterate_reachable_edges (struct REGEX_INTERNAL_Automaton *a,
3700 REGEX_INTERNAL_KeyIterator iterator,
3703 struct GNUNET_CONTAINER_MultiHashMap *hm;
3704 struct client_iterator ci;
3706 hm = GNUNET_CONTAINER_multihashmap_create (a->state_count * 2, GNUNET_NO);
3707 ci.iterator = iterator;
3708 ci.iterator_cls = iterator_cls;
3710 REGEX_INTERNAL_iterate_all_edges (a, &store_all_states, hm);
3711 GNUNET_CONTAINER_multihashmap_iterate (hm, &reachability_iterator, hm);
3712 GNUNET_CONTAINER_multihashmap_iterate (hm, &iterate_reachables, &ci);
3714 GNUNET_CONTAINER_multihashmap_destroy (hm);
3718 /* end of regex_internal.c */