2 This file is part of GNUnet
3 (C) 2012 Christian Grothoff (and other contributing authors)
5 GNUnet is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 3, or (at your
8 option) any later version.
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with GNUnet; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
21 * @file src/regex/regex_internal.c
22 * @brief library to create Deterministic Finite Automatons (DFAs) from regular
23 * expressions (regexes).
24 * @author Maximilian Szengel
27 #include "gnunet_util_lib.h"
28 #include "gnunet_regex_service.h"
29 #include "regex_internal_lib.h"
30 #include "regex_internal.h"
34 * Set this to GNUNET_YES to enable state naming. Used to debug NFA->DFA
35 * creation. Disabled by default for better performance.
37 #define REGEX_DEBUG_DFA GNUNET_NO
40 * Set of states using MDLL API.
42 struct REGEX_INTERNAL_StateSet_MDLL
47 struct REGEX_INTERNAL_State *head;
52 struct REGEX_INTERNAL_State *tail;
62 * Append state to the given StateSet '
64 * @param set set to be modified
65 * @param state state to be appended
68 state_set_append (struct REGEX_INTERNAL_StateSet *set,
69 struct REGEX_INTERNAL_State *state)
71 if (set->off == set->size)
72 GNUNET_array_grow (set->states, set->size, set->size * 2 + 4);
73 set->states[set->off++] = state;
78 * Compare two strings for equality. If either is NULL they are not equal.
80 * @param str1 first string for comparison.
81 * @param str2 second string for comparison.
83 * @return 0 if the strings are the same or both NULL, 1 or -1 if not.
86 nullstrcmp (const char *str1, const char *str2)
88 if ((NULL == str1) != (NULL == str2))
90 if ((NULL == str1) && (NULL == str2))
93 return strcmp (str1, str2);
98 * Adds a transition from one state to another on 'label'. Does not add
102 * @param from_state starting state for the transition
103 * @param label transition label
104 * @param to_state state to where the transition should point to
107 state_add_transition (struct REGEX_INTERNAL_Context *ctx,
108 struct REGEX_INTERNAL_State *from_state, const char *label,
109 struct REGEX_INTERNAL_State *to_state)
111 struct REGEX_INTERNAL_Transition *t;
112 struct REGEX_INTERNAL_Transition *oth;
114 if (NULL == from_state)
116 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not create Transition.\n");
120 /* Do not add duplicate state transitions */
121 for (t = from_state->transitions_head; NULL != t; t = t->next)
123 if (t->to_state == to_state && 0 == nullstrcmp (t->label, label) &&
124 t->from_state == from_state)
128 /* sort transitions by label */
129 for (oth = from_state->transitions_head; NULL != oth; oth = oth->next)
131 if (0 < nullstrcmp (oth->label, label))
135 t = GNUNET_malloc (sizeof (struct REGEX_INTERNAL_Transition));
137 t->id = ctx->transition_id++;
139 t->label = GNUNET_strdup (label);
142 t->to_state = to_state;
143 t->from_state = from_state;
145 /* Add outgoing transition to 'from_state' */
146 from_state->transition_count++;
147 GNUNET_CONTAINER_DLL_insert_before (from_state->transitions_head,
148 from_state->transitions_tail, oth, t);
153 * Remove a 'transition' from 'state'.
155 * @param state state from which the to-be-removed transition originates.
156 * @param transition transition that should be removed from state 'state'.
159 state_remove_transition (struct REGEX_INTERNAL_State *state,
160 struct REGEX_INTERNAL_Transition *transition)
162 if (NULL == state || NULL == transition)
165 if (transition->from_state != state)
168 GNUNET_free_non_null (transition->label);
170 state->transition_count--;
171 GNUNET_CONTAINER_DLL_remove (state->transitions_head, state->transitions_tail,
174 GNUNET_free (transition);
179 * Compare two states. Used for sorting.
181 * @param a first state
182 * @param b second state
184 * @return an integer less than, equal to, or greater than zero
185 * if the first argument is considered to be respectively
186 * less than, equal to, or greater than the second.
189 state_compare (const void *a, const void *b)
191 struct REGEX_INTERNAL_State **s1 = (struct REGEX_INTERNAL_State **) a;
192 struct REGEX_INTERNAL_State **s2 = (struct REGEX_INTERNAL_State **) b;
194 return (*s1)->id - (*s2)->id;
199 * Get all edges leaving state 's'.
202 * @param edges all edges leaving 's', expected to be allocated and have enough
203 * space for s->transitions_count elements.
205 * @return number of edges.
208 state_get_edges (struct REGEX_INTERNAL_State *s, struct REGEX_BLOCK_Edge *edges)
210 struct REGEX_INTERNAL_Transition *t;
218 for (t = s->transitions_head; NULL != t; t = t->next)
220 if (NULL != t->to_state)
222 edges[count].label = t->label;
223 edges[count].destination = t->to_state->hash;
232 * Compare to state sets by comparing the id's of the states that are contained
233 * in each set. Both sets are expected to be sorted by id!
235 * @param sset1 first state set
236 * @param sset2 second state set
237 * @return 0 if the sets are equal, otherwise non-zero
240 state_set_compare (struct REGEX_INTERNAL_StateSet *sset1,
241 struct REGEX_INTERNAL_StateSet *sset2)
246 if (NULL == sset1 || NULL == sset2)
249 result = sset1->off - sset2->off;
254 for (i = 0; i < sset1->off; i++)
255 if (0 != (result = state_compare (&sset1->states[i], &sset2->states[i])))
262 * Clears the given StateSet 'set'
264 * @param set set to be cleared
267 state_set_clear (struct REGEX_INTERNAL_StateSet *set)
269 GNUNET_array_grow (set->states, set->size, 0);
275 * Clears an automaton fragment. Does not destroy the states inside the
278 * @param a automaton to be cleared
281 automaton_fragment_clear (struct REGEX_INTERNAL_Automaton *a)
288 a->states_head = NULL;
289 a->states_tail = NULL;
296 * Frees the memory used by State 's'
298 * @param s state that should be destroyed
301 automaton_destroy_state (struct REGEX_INTERNAL_State *s)
303 struct REGEX_INTERNAL_Transition *t;
304 struct REGEX_INTERNAL_Transition *next_t;
309 GNUNET_free_non_null (s->name);
310 GNUNET_free_non_null (s->proof);
311 state_set_clear (&s->nfa_set);
312 for (t = s->transitions_head; NULL != t; t = next_t)
315 state_remove_transition (s, t);
323 * Remove a state from the given automaton 'a'. Always use this function when
324 * altering the states of an automaton. Will also remove all transitions leading
325 * to this state, before destroying it.
328 * @param s state to remove
331 automaton_remove_state (struct REGEX_INTERNAL_Automaton *a,
332 struct REGEX_INTERNAL_State *s)
334 struct REGEX_INTERNAL_State *s_check;
335 struct REGEX_INTERNAL_Transition *t_check;
336 struct REGEX_INTERNAL_Transition *t_check_next;
338 if (NULL == a || NULL == s)
341 /* remove all transitions leading to this state */
342 for (s_check = a->states_head; NULL != s_check; s_check = s_check->next)
344 for (t_check = s_check->transitions_head; NULL != t_check;
345 t_check = t_check_next)
347 t_check_next = t_check->next;
348 if (t_check->to_state == s)
349 state_remove_transition (s_check, t_check);
354 GNUNET_CONTAINER_DLL_remove (a->states_head, a->states_tail, s);
357 automaton_destroy_state (s);
362 * Merge two states into one. Will merge 's1' and 's2' into 's1' and destroy
363 * 's2'. 's1' will contain all (non-duplicate) outgoing transitions of 's2'.
367 * @param s1 first state
368 * @param s2 second state, will be destroyed
371 automaton_merge_states (struct REGEX_INTERNAL_Context *ctx,
372 struct REGEX_INTERNAL_Automaton *a,
373 struct REGEX_INTERNAL_State *s1,
374 struct REGEX_INTERNAL_State *s2)
376 struct REGEX_INTERNAL_State *s_check;
377 struct REGEX_INTERNAL_Transition *t_check;
378 struct REGEX_INTERNAL_Transition *t;
379 struct REGEX_INTERNAL_Transition *t_next;
385 /* 1. Make all transitions pointing to s2 point to s1, unless this transition
386 * does not already exists, if it already exists remove transition. */
387 for (s_check = a->states_head; NULL != s_check; s_check = s_check->next)
389 for (t_check = s_check->transitions_head; NULL != t_check; t_check = t_next)
391 t_next = t_check->next;
393 if (s2 == t_check->to_state)
396 for (t = t_check->from_state->transitions_head; NULL != t; t = t->next)
398 if (t->to_state == s1 && 0 == strcmp (t_check->label, t->label))
401 if (GNUNET_NO == is_dup)
402 t_check->to_state = s1;
404 state_remove_transition (t_check->from_state, t_check);
409 /* 2. Add all transitions from s2 to sX to s1 */
410 for (t_check = s2->transitions_head; NULL != t_check; t_check = t_check->next)
412 if (t_check->to_state != s1)
413 state_add_transition (ctx, s1, t_check->label, t_check->to_state);
416 /* 3. Rename s1 to {s1,s2} */
421 GNUNET_asprintf (&s1->name, "{%s,%s}", new_name, s2->name);
422 GNUNET_free (new_name);
426 GNUNET_CONTAINER_DLL_remove (a->states_head, a->states_tail, s2);
428 automaton_destroy_state (s2);
433 * Add a state to the automaton 'a', always use this function to alter the
434 * states DLL of the automaton.
436 * @param a automaton to add the state to
437 * @param s state that should be added
440 automaton_add_state (struct REGEX_INTERNAL_Automaton *a,
441 struct REGEX_INTERNAL_State *s)
443 GNUNET_CONTAINER_DLL_insert (a->states_head, a->states_tail, s);
449 * Depth-first traversal (DFS) of all states that are reachable from state
450 * 's'. Performs 'action' on each visited state.
452 * @param s start state.
453 * @param marks an array of size a->state_count to remember which state was
455 * @param count current count of the state.
456 * @param check function that is checked before advancing on each transition
458 * @param check_cls closure for check.
459 * @param action action to be performed on each state.
460 * @param action_cls closure for action.
463 automaton_state_traverse (struct REGEX_INTERNAL_State *s, int *marks,
465 REGEX_INTERNAL_traverse_check check, void *check_cls,
466 REGEX_INTERNAL_traverse_action action, void *action_cls)
468 struct REGEX_INTERNAL_Transition *t;
470 if (GNUNET_YES == marks[s->traversal_id])
473 marks[s->traversal_id] = GNUNET_YES;
476 action (action_cls, *count, s);
480 for (t = s->transitions_head; NULL != t; t = t->next)
483 (NULL != check && GNUNET_YES == check (check_cls, s, t)))
485 automaton_state_traverse (t->to_state, marks, count, check, check_cls,
493 * Traverses the given automaton using depth-first-search (DFS) from it's start
494 * state, visiting all reachable states and calling 'action' on each one of
497 * @param a automaton to be traversed.
498 * @param start start state, pass a->start or NULL to traverse the whole automaton.
499 * @param check function that is checked before advancing on each transition
501 * @param check_cls closure for check.
502 * @param action action to be performed on each state.
503 * @param action_cls closure for action
506 REGEX_INTERNAL_automaton_traverse (const struct REGEX_INTERNAL_Automaton *a,
507 struct REGEX_INTERNAL_State *start,
508 REGEX_INTERNAL_traverse_check check,
510 REGEX_INTERNAL_traverse_action action,
514 struct REGEX_INTERNAL_State *s;
516 if (NULL == a || 0 == a->state_count)
519 int marks[a->state_count];
521 for (count = 0, s = a->states_head; NULL != s && count < a->state_count;
522 s = s->next, count++)
524 s->traversal_id = count;
525 marks[s->traversal_id] = GNUNET_NO;
535 automaton_state_traverse (s, marks, &count, check, check_cls, action,
541 * String container for faster string operations.
546 * Buffer holding the string (may start in the middle!);
557 * Length of the string in the buffer.
562 * Number of bytes allocated for 'sbuf'
567 * Buffer currently represents "NULL" (not the empty string!)
572 * If this entry is part of the last/current generation array,
573 * this flag is GNUNET_YES if the last and current generation are
574 * identical (and thus copying is unnecessary if the value didn't
575 * change). This is used in an optimization that improves
576 * performance by about 1% --- if we use int16_t here. With just
577 * "int" for both flags, performance drops (on my system) significantly,
578 * most likely due to increased cache misses.
586 * Compare two strings for equality. If either is NULL they are not equal.
588 * @param s1 first string for comparison.
589 * @param s2 second string for comparison.
591 * @return 0 if the strings are the same or both NULL, 1 or -1 if not.
594 sb_nullstrcmp (const struct StringBuffer *s1,
595 const struct StringBuffer *s2)
597 if ( (GNUNET_YES == s1->null_flag) &&
598 (GNUNET_YES == s2->null_flag) )
600 if ( (GNUNET_YES == s1->null_flag) ||
601 (GNUNET_YES == s2->null_flag) )
603 if (s1->slen != s2->slen)
605 return memcmp (s1->sbuf, s2->sbuf, s1->slen);
610 * Compare two strings for equality.
612 * @param s1 first string for comparison.
613 * @param s2 second string for comparison.
615 * @return 0 if the strings are the same, 1 or -1 if not.
618 sb_strcmp (const struct StringBuffer *s1,
619 const struct StringBuffer *s2)
621 if (s1->slen != s2->slen)
623 return memcmp (s1->sbuf, s2->sbuf, s1->slen);
628 * Reallocate the buffer of 'ret' to fit 'nlen' characters;
629 * move the existing string to the beginning of the new buffer.
631 * @param ret current buffer, to be updated
632 * @param nlen target length for the buffer, must be at least ret->slen
635 sb_realloc (struct StringBuffer *ret,
640 GNUNET_assert (nlen >= ret->slen);
642 ret->abuf = GNUNET_malloc (nlen);
647 ret->sbuf = ret->abuf;
648 GNUNET_free_non_null (old);
655 * @param ret where to write the result
656 * @param sarg string to append
659 sb_append (struct StringBuffer *ret,
660 const struct StringBuffer *sarg)
662 if (GNUNET_YES == ret->null_flag)
664 ret->null_flag = GNUNET_NO;
665 if (ret->blen < sarg->slen + ret->slen)
666 sb_realloc (ret, ret->blen + sarg->slen + 128);
667 memcpy (&ret->sbuf[ret->slen],
670 ret->slen += sarg->slen;
677 * @param ret where to write the result
678 * @param cstr string to append
681 sb_append_cstr (struct StringBuffer *ret,
684 size_t cstr_len = strlen (cstr);
686 if (GNUNET_YES == ret->null_flag)
688 ret->null_flag = GNUNET_NO;
689 if (ret->blen < cstr_len + ret->slen)
690 sb_realloc (ret, ret->blen + cstr_len + 128);
691 memcpy (&ret->sbuf[ret->slen],
694 ret->slen += cstr_len;
699 * Wrap a string buffer, that is, set ret to the format string
700 * which contains an "%s" which is to be replaced with the original
701 * content of 'ret'. Note that optimizing this function is not
702 * really worth it, it is rarely called.
704 * @param ret where to write the result and take the input for %.*s from
705 * @param format format string, fprintf-style, with exactly one "%.*s"
706 * @param extra_chars how long will the result be, in addition to 'sarg' length
709 sb_wrap (struct StringBuffer *ret,
715 if (GNUNET_YES == ret->null_flag)
717 ret->null_flag = GNUNET_NO;
718 temp = GNUNET_malloc (ret->slen + extra_chars + 1);
719 GNUNET_snprintf (temp,
720 ret->slen + extra_chars + 1,
724 GNUNET_free_non_null (ret->abuf);
727 ret->blen = ret->slen + extra_chars + 1;
728 ret->slen = ret->slen + extra_chars;
733 * Format a string buffer. Note that optimizing this function is not
734 * really worth it, it is rarely called.
736 * @param ret where to write the result
737 * @param format format string, fprintf-style, with exactly one "%.*s"
738 * @param extra_chars how long will the result be, in addition to 'sarg' length
739 * @param sarg string to print into the format
742 sb_printf1 (struct StringBuffer *ret,
745 const struct StringBuffer *sarg)
747 if (ret->blen < sarg->slen + extra_chars + 1)
749 sarg->slen + extra_chars + 1);
750 ret->null_flag = GNUNET_NO;
751 ret->sbuf = ret->abuf;
752 ret->slen = sarg->slen + extra_chars;
753 GNUNET_snprintf (ret->sbuf,
762 * Format a string buffer.
764 * @param ret where to write the result
765 * @param format format string, fprintf-style, with exactly two "%.*s"
766 * @param extra_chars how long will the result be, in addition to 'sarg1/2' length
767 * @param sarg1 first string to print into the format
768 * @param sarg2 second string to print into the format
771 sb_printf2 (struct StringBuffer *ret,
774 const struct StringBuffer *sarg1,
775 const struct StringBuffer *sarg2)
777 if (ret->blen < sarg1->slen + sarg2->slen + extra_chars + 1)
779 sarg1->slen + sarg2->slen + extra_chars + 1);
780 ret->null_flag = GNUNET_NO;
781 ret->slen = sarg1->slen + sarg2->slen + extra_chars;
782 ret->sbuf = ret->abuf;
783 GNUNET_snprintf (ret->sbuf,
794 * Format a string buffer. Note that optimizing this function is not
795 * really worth it, it is rarely called.
797 * @param ret where to write the result
798 * @param format format string, fprintf-style, with exactly three "%.*s"
799 * @param extra_chars how long will the result be, in addition to 'sarg1/2/3' length
800 * @param sarg1 first string to print into the format
801 * @param sarg2 second string to print into the format
802 * @param sarg3 third string to print into the format
805 sb_printf3 (struct StringBuffer *ret,
808 const struct StringBuffer *sarg1,
809 const struct StringBuffer *sarg2,
810 const struct StringBuffer *sarg3)
812 if (ret->blen < sarg1->slen + sarg2->slen + sarg3->slen + extra_chars + 1)
814 sarg1->slen + sarg2->slen + sarg3->slen + extra_chars + 1);
815 ret->null_flag = GNUNET_NO;
816 ret->slen = sarg1->slen + sarg2->slen + sarg3->slen + extra_chars;
817 ret->sbuf = ret->abuf;
818 GNUNET_snprintf (ret->sbuf,
831 * Free resources of the given string buffer.
833 * @param sb buffer to free (actual pointer is not freed, as they
834 * should not be individually allocated)
837 sb_free (struct StringBuffer *sb)
839 GNUNET_array_grow (sb->abuf,
844 sb->null_flag= GNUNET_YES;
849 * Copy the given string buffer from 'in' to 'out'.
851 * @param in input string
852 * @param out output string
855 sb_strdup (struct StringBuffer *out,
856 const struct StringBuffer *in)
859 out->null_flag = in->null_flag;
860 if (GNUNET_YES == out->null_flag)
862 if (out->blen < in->slen)
864 GNUNET_array_grow (out->abuf,
868 out->sbuf = out->abuf;
869 out->slen = in->slen;
870 memcpy (out->sbuf, in->sbuf, out->slen);
875 * Copy the given string buffer from 'in' to 'out'.
877 * @param cstr input string
878 * @param out output string
881 sb_strdup_cstr (struct StringBuffer *out,
886 out->null_flag = GNUNET_YES;
889 out->null_flag = GNUNET_NO;
890 out->slen = strlen (cstr);
891 if (out->blen < out->slen)
893 GNUNET_array_grow (out->abuf,
897 out->sbuf = out->abuf;
898 memcpy (out->sbuf, cstr, out->slen);
903 * Check if the given string 'str' needs parentheses around it when
904 * using it to generate a regex.
908 * @return GNUNET_YES if parentheses are needed, GNUNET_NO otherwise
911 needs_parentheses (const struct StringBuffer *str)
920 if ((GNUNET_YES == str->null_flag) || ((slen = str->slen) < 2))
925 end = str->sbuf + slen;
930 cl = memchr (pos, ')', end - pos);
936 /* while '(' before ')', count opening parens */
937 while ( (NULL != (op = memchr (pos, '(', end - pos))) &&
947 return (*pos == '\0') ? GNUNET_NO : GNUNET_YES;
952 * Remove parentheses surrounding string 'str'.
953 * Example: "(a)" becomes "a", "(a|b)|(a|c)" stays the same.
954 * You need to GNUNET_free the returned string.
956 * @param str string, modified to contain a
957 * @return string without surrounding parentheses, string 'str' if no preceding
958 * epsilon could be found, NULL if 'str' was NULL
961 remove_parentheses (struct StringBuffer *str)
974 if ( (GNUNET_YES == str->null_flag) ||
975 (1 >= (slen = str->slen)) ||
976 ('(' != str->sbuf[0]) ||
977 (')' != str->sbuf[slen - 1]) )
981 end = &sbuf[slen - 1];
982 op = memchr (pos, '(', end - pos);
983 cp = memchr (pos, ')', end - pos);
986 while ( (NULL != op) &&
991 op = memchr (pos, '(', end - pos);
993 while ( (NULL != cp) &&
998 return; /* can't strip parens */
1001 cp = memchr (pos, ')', end - pos);
1015 * Check if the string 'str' starts with an epsilon (empty string).
1016 * Example: "(|a)" is starting with an epsilon.
1018 * @param str string to test
1020 * @return 0 if str has no epsilon, 1 if str starts with '(|' and ends with ')'
1023 has_epsilon (const struct StringBuffer *str)
1026 (GNUNET_YES != str->null_flag) &&
1028 ('(' == str->sbuf[0]) &&
1029 ('|' == str->sbuf[1]) &&
1030 (')' == str->sbuf[str->slen - 1]);
1035 * Remove an epsilon from the string str. Where epsilon is an empty string
1036 * Example: str = "(|a|b|c)", result: "a|b|c"
1037 * The returned string needs to be freed.
1039 * @param str original string
1040 * @param ret where to return string without preceding epsilon, string 'str' if no preceding
1041 * epsilon could be found, NULL if 'str' was NULL
1044 remove_epsilon (const struct StringBuffer *str,
1045 struct StringBuffer *ret)
1047 if (GNUNET_YES == str->null_flag)
1049 ret->null_flag = GNUNET_YES;
1052 if ( (str->slen > 1) &&
1053 ('(' == str->sbuf[0]) &&
1054 ('|' == str->sbuf[1]) &&
1055 (')' == str->sbuf[str->slen - 1]) )
1057 /* remove epsilon */
1058 if (ret->blen < str->slen - 3)
1060 GNUNET_array_grow (ret->abuf,
1064 ret->sbuf = ret->abuf;
1065 ret->slen = str->slen - 3;
1066 memcpy (ret->sbuf, &str->sbuf[2], ret->slen);
1069 sb_strdup (ret, str);
1074 * Compare n bytes of 'str1' and 'str2'
1076 * @param str1 first string to compare
1077 * @param str2 second string for comparison
1078 * @param n number of bytes to compare
1080 * @return -1 if any of the strings is NULL, 0 if equal, non 0 otherwise
1083 sb_strncmp (const struct StringBuffer *str1,
1084 const struct StringBuffer *str2, size_t n)
1088 if ( (str1->slen != str2->slen) &&
1089 ( (str1->slen < n) ||
1090 (str2->slen < n) ) )
1092 max = GNUNET_MAX (str1->slen, str2->slen);
1095 return memcmp (str1->sbuf, str2->sbuf, max);
1100 * Compare n bytes of 'str1' and 'str2'
1102 * @param str1 first string to compare
1103 * @param str2 second C string for comparison
1104 * @param n number of bytes to compare (and length of str2)
1106 * @return -1 if any of the strings is NULL, 0 if equal, non 0 otherwise
1109 sb_strncmp_cstr (const struct StringBuffer *str1,
1110 const char *str2, size_t n)
1114 return memcmp (str1->sbuf, str2, n);
1119 * Initialize string buffer for storing strings of up to n
1122 * @param sb buffer to initialize
1123 * @param n desired target length
1126 sb_init (struct StringBuffer *sb,
1129 sb->null_flag = GNUNET_NO;
1130 sb->abuf = sb->sbuf = (0 == n) ? NULL : GNUNET_malloc (n);
1137 * Compare 'str1', starting from position 'k', with whole 'str2'
1139 * @param str1 first string to compare, starting from position 'k'
1140 * @param str2 second string for comparison
1141 * @param k starting position in 'str1'
1143 * @return -1 if any of the strings is NULL, 0 if equal, non 0 otherwise
1146 sb_strkcmp (const struct StringBuffer *str1,
1147 const struct StringBuffer *str2, size_t k)
1149 if ( (GNUNET_YES == str1->null_flag) ||
1150 (GNUNET_YES == str2->null_flag) ||
1152 (str1->slen - k != str2->slen) )
1154 return memcmp (&str1->sbuf[k], str2->sbuf, str2->slen);
1159 * Helper function used as 'action' in 'REGEX_INTERNAL_automaton_traverse'
1160 * function to create the depth-first numbering of the states.
1162 * @param cls states array.
1163 * @param count current state counter.
1164 * @param s current state.
1167 number_states (void *cls, const unsigned int count,
1168 struct REGEX_INTERNAL_State *s)
1170 struct REGEX_INTERNAL_State **states = cls;
1180 ((GNUNET_YES == a.null_flag) ? 6 : (int) a.slen), \
1181 ((GNUNET_YES == a.null_flag) ? "(null)" : a.sbuf)
1185 * Construct the regular expression given the inductive step,
1186 * $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^*
1187 * R^{(k-1)}_{kj}, and simplify the resulting expression saved in R_cur_ij.
1189 * @param R_last_ij value of $R^{(k-1)_{ij}.
1190 * @param R_last_ik value of $R^{(k-1)_{ik}.
1191 * @param R_last_kk value of $R^{(k-1)_{kk}.
1192 * @param R_last_kj value of $R^{(k-1)_{kj}.
1193 * @param R_cur_ij result for this inductive step is saved in R_cur_ij, R_cur_ij
1194 * is expected to be NULL when called!
1195 * @param R_cur_l optimization -- kept between iterations to avoid realloc
1196 * @param R_cur_r optimization -- kept between iterations to avoid realloc
1199 automaton_create_proofs_simplify (const struct StringBuffer *R_last_ij,
1200 const struct StringBuffer *R_last_ik,
1201 const struct StringBuffer *R_last_kk,
1202 const struct StringBuffer *R_last_kj,
1203 struct StringBuffer *R_cur_ij,
1204 struct StringBuffer *R_cur_l,
1205 struct StringBuffer *R_cur_r)
1207 struct StringBuffer R_temp_ij;
1208 struct StringBuffer R_temp_ik;
1209 struct StringBuffer R_temp_kj;
1210 struct StringBuffer R_temp_kk;
1216 int clean_ik_kk_cmp;
1217 int clean_kk_kj_cmp;
1223 * $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj}
1224 * R_last == R^{(k-1)}, R_cur == R^{(k)}
1225 * R_cur_ij = R_cur_l | R_cur_r
1226 * R_cur_l == R^{(k-1)}_{ij}
1227 * R_cur_r == R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj}
1230 if ( (GNUNET_YES == R_last_ij->null_flag) &&
1231 ( (GNUNET_YES == R_last_ik->null_flag) ||
1232 (GNUNET_YES == R_last_kj->null_flag)))
1234 /* R^{(k)}_{ij} = N | N */
1235 R_cur_ij->null_flag = GNUNET_YES;
1236 R_cur_ij->synced = GNUNET_NO;
1240 if ( (GNUNET_YES == R_last_ik->null_flag) ||
1241 (GNUNET_YES == R_last_kj->null_flag) )
1243 /* R^{(k)}_{ij} = R^{(k-1)}_{ij} | N */
1244 if (GNUNET_YES == R_last_ij->synced)
1246 R_cur_ij->synced = GNUNET_YES;
1247 R_cur_ij->null_flag = GNUNET_NO;
1250 R_cur_ij->synced = GNUNET_YES;
1251 sb_strdup (R_cur_ij, R_last_ij);
1254 R_cur_ij->synced = GNUNET_NO;
1256 /* $R^{(k)}_{ij} = N | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj} OR
1257 * $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj} */
1259 R_cur_r->null_flag = GNUNET_YES;
1261 R_cur_l->null_flag = GNUNET_YES;
1264 /* cache results from strcmp, we might need these many times */
1265 ij_kj_cmp = sb_nullstrcmp (R_last_ij, R_last_kj);
1266 ij_ik_cmp = sb_nullstrcmp (R_last_ij, R_last_ik);
1267 ik_kk_cmp = sb_nullstrcmp (R_last_ik, R_last_kk);
1268 kk_kj_cmp = sb_nullstrcmp (R_last_kk, R_last_kj);
1270 /* Assign R_temp_(ik|kk|kj) to R_last[][] and remove epsilon as well
1271 * as parentheses, so we can better compare the contents */
1273 memset (&R_temp_ij, 0, sizeof (struct StringBuffer));
1274 memset (&R_temp_ik, 0, sizeof (struct StringBuffer));
1275 memset (&R_temp_kk, 0, sizeof (struct StringBuffer));
1276 memset (&R_temp_kj, 0, sizeof (struct StringBuffer));
1277 remove_epsilon (R_last_ik, &R_temp_ik);
1278 remove_epsilon (R_last_kk, &R_temp_kk);
1279 remove_epsilon (R_last_kj, &R_temp_kj);
1280 remove_parentheses (&R_temp_ik);
1281 remove_parentheses (&R_temp_kk);
1282 remove_parentheses (&R_temp_kj);
1283 clean_ik_kk_cmp = sb_nullstrcmp (R_last_ik, &R_temp_kk);
1284 clean_kk_kj_cmp = sb_nullstrcmp (&R_temp_kk, R_last_kj);
1286 /* construct R_cur_l (and, if necessary R_cur_r) */
1287 if (GNUNET_YES != R_last_ij->null_flag)
1289 /* Assign R_temp_ij to R_last_ij and remove epsilon as well
1290 * as parentheses, so we can better compare the contents */
1291 remove_epsilon (R_last_ij, &R_temp_ij);
1292 remove_parentheses (&R_temp_ij);
1294 if ( (0 == sb_strcmp (&R_temp_ij, &R_temp_ik)) &&
1295 (0 == sb_strcmp (&R_temp_ik, &R_temp_kk)) &&
1296 (0 == sb_strcmp (&R_temp_kk, &R_temp_kj)) )
1298 if (0 == R_temp_ij.slen)
1300 R_cur_r->null_flag = GNUNET_NO;
1302 else if ((0 == sb_strncmp_cstr (R_last_ij, "(|", 2)) ||
1303 (0 == sb_strncmp_cstr (R_last_ik, "(|", 2) &&
1304 0 == sb_strncmp_cstr (R_last_kj, "(|", 2)))
1307 * a|(e|a)a*(e|a) = a*
1308 * a|(e|a)(e|a)*(e|a) = a*
1310 * (e|a)|aa*(e|a) = a*
1311 * (e|a)|(e|a)a*a = a*
1312 * (e|a)|(e|a)a*(e|a) = a*
1313 * (e|a)|(e|a)(e|a)*(e|a) = a*
1315 if (GNUNET_YES == needs_parentheses (&R_temp_ij))
1316 sb_printf1 (R_cur_r, "(%.*s)*", 3, &R_temp_ij);
1318 sb_printf1 (R_cur_r, "%.*s*", 1, &R_temp_ij);
1326 * a|(e|a)(e|a)*a = a+
1327 * a|a(e|a)*(e|a) = a+
1329 if (GNUNET_YES == needs_parentheses (&R_temp_ij))
1330 sb_printf1 (R_cur_r, "(%.*s)+", 3, &R_temp_ij);
1332 sb_printf1 (R_cur_r, "%.*s+", 1, &R_temp_ij);
1335 else if ( (0 == ij_ik_cmp) && (0 == clean_kk_kj_cmp) && (0 != clean_ik_kk_cmp) )
1338 if (0 == R_last_kk->slen)
1339 sb_strdup (R_cur_r, R_last_ij);
1340 else if (GNUNET_YES == needs_parentheses (&R_temp_kk))
1341 sb_printf2 (R_cur_r, "%.*s(%.*s)*", 3, R_last_ij, &R_temp_kk);
1343 sb_printf2 (R_cur_r, "%.*s%.*s*", 1, R_last_ij, R_last_kk);
1344 R_cur_l->null_flag = GNUNET_YES;
1346 else if ( (0 == ij_kj_cmp) && (0 == clean_ik_kk_cmp) && (0 != clean_kk_kj_cmp))
1349 if (R_last_kk->slen < 1)
1351 sb_strdup (R_cur_r, R_last_kj);
1353 else if (GNUNET_YES == needs_parentheses (&R_temp_kk))
1354 sb_printf2 (R_cur_r, "(%.*s)*%.*s", 3, &R_temp_kk, R_last_kj);
1356 sb_printf2 (R_cur_r, "%.*s*%.*s", 1, &R_temp_kk, R_last_kj);
1358 R_cur_l->null_flag = GNUNET_YES;
1360 else if ( (0 == ij_ik_cmp) && (0 == kk_kj_cmp) && (! has_epsilon (R_last_ij)) &&
1361 has_epsilon (R_last_kk))
1363 /* a|a(e|b)*(e|b) = a|ab* = a|a|ab|abb|abbb|... = ab* */
1364 if (needs_parentheses (&R_temp_kk))
1365 sb_printf2 (R_cur_r, "%.*s(%.*s)*", 3, R_last_ij, &R_temp_kk);
1367 sb_printf2 (R_cur_r, "%.*s%.*s*", 1, R_last_ij, &R_temp_kk);
1368 R_cur_l->null_flag = GNUNET_YES;
1370 else if ( (0 == ij_kj_cmp) && (0 == ik_kk_cmp) && (! has_epsilon (R_last_ij)) &&
1371 has_epsilon (R_last_kk))
1373 /* a|(e|b)(e|b)*a = a|b*a = a|a|ba|bba|bbba|... = b*a */
1374 if (needs_parentheses (&R_temp_kk))
1375 sb_printf2 (R_cur_r, "(%.*s)*%.*s", 3, &R_temp_kk, R_last_ij);
1377 sb_printf2 (R_cur_r, "%.*s*%.*s", 1, &R_temp_kk, R_last_ij);
1378 R_cur_l->null_flag = GNUNET_YES;
1382 sb_strdup (R_cur_l, R_last_ij);
1383 remove_parentheses (R_cur_l);
1388 /* we have no left side */
1389 R_cur_l->null_flag = GNUNET_YES;
1392 /* construct R_cur_r, if not already constructed */
1393 if (GNUNET_YES == R_cur_r->null_flag)
1395 length = R_temp_kk.slen - R_last_ik->slen;
1397 /* a(ba)*bx = (ab)+x */
1398 if ( (length > 0) &&
1399 (GNUNET_YES != R_last_kk->null_flag) &&
1400 (0 < R_last_kk->slen) &&
1401 (GNUNET_YES != R_last_kj->null_flag) &&
1402 (0 < R_last_kj->slen) &&
1403 (GNUNET_YES != R_last_ik->null_flag) &&
1404 (0 < R_last_ik->slen) &&
1405 (0 == sb_strkcmp (&R_temp_kk, R_last_ik, length)) &&
1406 (0 == sb_strncmp (&R_temp_kk, R_last_kj, length)) )
1408 struct StringBuffer temp_a;
1409 struct StringBuffer temp_b;
1411 sb_init (&temp_a, length);
1412 sb_init (&temp_b, R_last_kj->slen - length);
1415 temp_a.sbuf = temp_a.abuf;
1416 memcpy (temp_a.sbuf, R_last_kj->sbuf, length_l);
1417 temp_a.slen = length_l;
1419 length_r = R_last_kj->slen - length;
1420 temp_b.sbuf = temp_b.abuf;
1421 memcpy (temp_b.sbuf, &R_last_kj->sbuf[length], length_r);
1422 temp_b.slen = length_r;
1424 /* e|(ab)+ = (ab)* */
1425 if ( (GNUNET_YES != R_cur_l->null_flag) &&
1426 (0 == R_cur_l->slen) &&
1427 (0 == temp_b.slen) )
1429 sb_printf2 (R_cur_r, "(%.*s%.*s)*", 3, R_last_ik, &temp_a);
1431 R_cur_l->null_flag = GNUNET_YES;
1435 sb_printf3 (R_cur_r, "(%.*s%.*s)+%.*s", 3, R_last_ik, &temp_a, &temp_b);
1440 else if (0 == sb_strcmp (&R_temp_ik, &R_temp_kk) &&
1441 0 == sb_strcmp (&R_temp_kk, &R_temp_kj))
1445 * (e|a)(e|a)*(e|a) = a*
1447 if (has_epsilon (R_last_ik) && has_epsilon (R_last_kj))
1449 if (needs_parentheses (&R_temp_kk))
1450 sb_printf1 (R_cur_r, "(%.*s)*", 3, &R_temp_kk);
1452 sb_printf1 (R_cur_r, "%.*s*", 1, &R_temp_kk);
1455 else if ( (0 == clean_ik_kk_cmp) &&
1456 (0 == clean_kk_kj_cmp) &&
1457 (! has_epsilon (R_last_ik)) )
1459 if (needs_parentheses (&R_temp_kk))
1460 sb_printf2 (R_cur_r, "(%.*s)+%.*s", 3, &R_temp_kk, &R_temp_kk);
1462 sb_printf2 (R_cur_r, "%.*s+%.*s", 1, &R_temp_kk, &R_temp_kk);
1473 (has_epsilon (R_last_ik) + has_epsilon (R_last_kk) +
1474 has_epsilon (R_last_kj));
1478 if (needs_parentheses (&R_temp_kk))
1479 sb_printf1 (R_cur_r, "(%.*s)+", 3, &R_temp_kk);
1481 sb_printf1 (R_cur_r, "%.*s+", 1, &R_temp_kk);
1487 * (e|a)(e|a)*b = a*b
1489 else if (0 == sb_strcmp (&R_temp_ik, &R_temp_kk))
1491 if (has_epsilon (R_last_ik))
1493 if (needs_parentheses (&R_temp_kk))
1494 sb_printf2 (R_cur_r, "(%.*s)*%.*s", 3, &R_temp_kk, R_last_kj);
1496 sb_printf2 (R_cur_r, "%.*s*%.*s", 1, &R_temp_kk, R_last_kj);
1500 if (needs_parentheses (&R_temp_kk))
1501 sb_printf2 (R_cur_r, "(%.*s)+%.*s", 3, &R_temp_kk, R_last_kj);
1503 sb_printf2 (R_cur_r, "%.*s+%.*s", 1, &R_temp_kk, R_last_kj);
1508 * b(e|a)*(e|a) = ba*
1510 else if (0 == sb_strcmp (&R_temp_kk, &R_temp_kj))
1512 if (has_epsilon (R_last_kj))
1514 if (needs_parentheses (&R_temp_kk))
1515 sb_printf2 (R_cur_r, "%.*s(%.*s)*", 3, R_last_ik, &R_temp_kk);
1517 sb_printf2 (R_cur_r, "%.*s%.*s*", 1, R_last_ik, &R_temp_kk);
1521 if (needs_parentheses (&R_temp_kk))
1522 sb_printf2 (R_cur_r, "(%.*s)+%.*s", 3, R_last_ik, &R_temp_kk);
1524 sb_printf2 (R_cur_r, "%.*s+%.*s", 1, R_last_ik, &R_temp_kk);
1529 if (0 < R_temp_kk.slen)
1531 if (needs_parentheses (&R_temp_kk))
1533 sb_printf3 (R_cur_r, "%.*s(%.*s)*%.*s", 3, R_last_ik, &R_temp_kk,
1538 sb_printf3 (R_cur_r, "%.*s%.*s*%.*s", 1, R_last_ik, &R_temp_kk,
1544 sb_printf2 (R_cur_r, "%.*s%.*s", 0, R_last_ik, R_last_kj);
1548 sb_free (&R_temp_ij);
1549 sb_free (&R_temp_ik);
1550 sb_free (&R_temp_kk);
1551 sb_free (&R_temp_kj);
1553 if ( (GNUNET_YES == R_cur_l->null_flag) &&
1554 (GNUNET_YES == R_cur_r->null_flag) )
1556 R_cur_ij->null_flag = GNUNET_YES;
1560 if ( (GNUNET_YES != R_cur_l->null_flag) &&
1561 (GNUNET_YES == R_cur_r->null_flag) )
1563 struct StringBuffer tmp;
1566 *R_cur_ij = *R_cur_l;
1571 if ( (GNUNET_YES == R_cur_l->null_flag) &&
1572 (GNUNET_YES != R_cur_r->null_flag) )
1574 struct StringBuffer tmp;
1577 *R_cur_ij = *R_cur_r;
1582 if (0 == sb_nullstrcmp (R_cur_l, R_cur_r))
1584 struct StringBuffer tmp;
1587 *R_cur_ij = *R_cur_l;
1591 sb_printf2 (R_cur_ij, "(%.*s|%.*s)", 3, R_cur_l, R_cur_r);
1596 * Create proofs for all states in the given automaton. Implementation of the
1597 * algorithm descriped in chapter 3.2.1 of "Automata Theory, Languages, and
1598 * Computation 3rd Edition" by Hopcroft, Motwani and Ullman.
1600 * Each state in the automaton gets assigned 'proof' and 'hash' (hash of the
1601 * proof) fields. The starting state will only have a valid proof/hash if it has
1602 * any incoming transitions.
1604 * @param a automaton for which to assign proofs and hashes, must not be NULL
1607 automaton_create_proofs (struct REGEX_INTERNAL_Automaton *a)
1609 unsigned int n = a->state_count;
1610 struct REGEX_INTERNAL_State *states[n];
1611 struct StringBuffer *R_last;
1612 struct StringBuffer *R_cur;
1613 struct StringBuffer R_cur_r;
1614 struct StringBuffer R_cur_l;
1615 struct StringBuffer *R_swap;
1616 struct REGEX_INTERNAL_Transition *t;
1617 struct StringBuffer complete_regex;
1622 R_last = GNUNET_malloc_large (sizeof (struct StringBuffer) * n * n);
1623 R_cur = GNUNET_malloc_large (sizeof (struct StringBuffer) * n * n);
1624 if ( (NULL == R_last) ||
1627 GNUNET_log_strerror (GNUNET_ERROR_TYPE_ERROR, "malloc");
1628 GNUNET_free_non_null (R_cur);
1629 GNUNET_free_non_null (R_last);
1630 return GNUNET_SYSERR;
1633 /* create depth-first numbering of the states, initializes 'state' */
1634 REGEX_INTERNAL_automaton_traverse (a, a->start, NULL, NULL, &number_states,
1637 for (i = 0; i < n; i++)
1638 GNUNET_assert (NULL != states[i]);
1639 for (i = 0; i < n; i++)
1640 for (j = 0; j < n; j++)
1641 R_last[i *n + j].null_flag = GNUNET_YES;
1643 /* Compute regular expressions of length "1" between each pair of states */
1644 for (i = 0; i < n; i++)
1646 for (t = states[i]->transitions_head; NULL != t; t = t->next)
1648 j = t->to_state->dfs_id;
1649 if (GNUNET_YES == R_last[i * n + j].null_flag)
1651 sb_strdup_cstr (&R_last[i * n + j], t->label);
1655 sb_append_cstr (&R_last[i * n + j], "|");
1656 sb_append_cstr (&R_last[i * n + j], t->label);
1659 /* add self-loop: i is reachable from i via epsilon-transition */
1660 if (GNUNET_YES == R_last[i * n + i].null_flag)
1662 R_last[i * n + i].slen = 0;
1663 R_last[i * n + i].null_flag = GNUNET_NO;
1667 sb_wrap (&R_last[i * n + i], "(|%.*s)", 3);
1670 for (i = 0; i < n; i++)
1671 for (j = 0; j < n; j++)
1672 if (needs_parentheses (&R_last[i * n + j]))
1673 sb_wrap (&R_last[i * n + j], "(%.*s)", 2);
1674 /* Compute regular expressions of length "k" between each pair of states per
1676 memset (&R_cur_l, 0, sizeof (struct StringBuffer));
1677 memset (&R_cur_r, 0, sizeof (struct StringBuffer));
1678 for (k = 0; k < n; k++)
1680 for (i = 0; i < n; i++)
1682 for (j = 0; j < n; j++)
1684 /* Basis for the recursion:
1685 * $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj}
1686 * R_last == R^{(k-1)}, R_cur == R^{(k)}
1689 /* Create R_cur[i][j] and simplify the expression */
1690 automaton_create_proofs_simplify (&R_last[i * n + j], &R_last[i * n + k],
1691 &R_last[k * n + k], &R_last[k * n + j],
1693 &R_cur_l, &R_cur_r);
1696 /* set R_last = R_cur */
1700 /* clear 'R_cur' for next iteration */
1701 for (i = 0; i < n; i++)
1702 for (j = 0; j < n; j++)
1703 R_cur[i * n + j].null_flag = GNUNET_YES;
1707 /* assign proofs and hashes */
1708 for (i = 0; i < n; i++)
1710 if (GNUNET_YES != R_last[a->start->dfs_id * n + i].null_flag)
1712 states[i]->proof = GNUNET_strndup (R_last[a->start->dfs_id * n + i].sbuf,
1713 R_last[a->start->dfs_id * n + i].slen);
1714 GNUNET_CRYPTO_hash (states[i]->proof, strlen (states[i]->proof),
1719 /* complete regex for whole DFA: union of all pairs (start state/accepting
1721 sb_init (&complete_regex, 16 * n);
1722 for (i = 0; i < n; i++)
1724 if (states[i]->accepting)
1726 if ( (0 == complete_regex.slen) &&
1727 (0 < R_last[a->start->dfs_id * n + i].slen) )
1729 sb_append (&complete_regex,
1730 &R_last[a->start->dfs_id * n + i]);
1732 else if ( (GNUNET_YES != R_last[a->start->dfs_id * n + i].null_flag) &&
1733 (0 < R_last[a->start->dfs_id * n + i].slen) )
1735 sb_append_cstr (&complete_regex, "|");
1736 sb_append (&complete_regex,
1737 &R_last[a->start->dfs_id * n + i]);
1741 a->canonical_regex = GNUNET_strndup (complete_regex.sbuf, complete_regex.slen);
1744 sb_free (&complete_regex);
1745 for (i = 0; i < n; i++)
1746 for (j = 0; j < n; j++)
1748 sb_free (&R_cur[i * n + j]);
1749 sb_free (&R_last[i * n + j]);
1751 GNUNET_free (R_cur);
1752 GNUNET_free (R_last);
1758 * Creates a new DFA state based on a set of NFA states. Needs to be freed using
1759 * automaton_destroy_state.
1761 * @param ctx context
1762 * @param nfa_states set of NFA states on which the DFA should be based on
1764 * @return new DFA state
1766 static struct REGEX_INTERNAL_State *
1767 dfa_state_create (struct REGEX_INTERNAL_Context *ctx,
1768 struct REGEX_INTERNAL_StateSet *nfa_states)
1770 struct REGEX_INTERNAL_State *s;
1773 struct REGEX_INTERNAL_State *cstate;
1774 struct REGEX_INTERNAL_Transition *ctran;
1777 s = GNUNET_malloc (sizeof (struct REGEX_INTERNAL_State));
1778 s->id = ctx->state_id++;
1782 if (NULL == nfa_states)
1784 GNUNET_asprintf (&s->name, "s%i", s->id);
1788 s->nfa_set = *nfa_states;
1790 if (nfa_states->off < 1)
1793 /* Create a name based on 'nfa_states' */
1794 len = nfa_states->off * 14 + 4;
1795 s->name = GNUNET_malloc (len);
1796 strcat (s->name, "{");
1799 for (i = 0; i < nfa_states->off; i++)
1801 cstate = nfa_states->states[i];
1802 GNUNET_snprintf (pos, pos - s->name + len,
1804 pos += strlen (pos);
1806 /* Add a transition for each distinct label to NULL state */
1807 for (ctran = cstate->transitions_head; NULL != ctran; ctran = ctran->next)
1808 if (NULL != ctran->label)
1809 state_add_transition (ctx, s, ctran->label, NULL);
1811 /* If the nfa_states contain an accepting state, the new dfa state is also
1813 if (cstate->accepting)
1817 s->name = GNUNET_realloc (s->name, strlen (s->name) + 1);
1819 memset (nfa_states, 0, sizeof (struct REGEX_INTERNAL_StateSet));
1825 * Move from the given state 's' to the next state on transition 'str'. Consumes
1826 * as much of the given 'str' as possible (usefull for strided DFAs). On return
1827 * 's' will point to the next state, and the length of the substring used for
1828 * this transition will be returned. If no transition possible 0 is returned and
1829 * 's' points to NULL.
1831 * @param s starting state, will point to the next state or NULL (if no
1832 * transition possible)
1833 * @param str edge label to follow (will match longest common prefix)
1835 * @return length of the substring comsumed from 'str'
1838 dfa_move (struct REGEX_INTERNAL_State **s, const char *str)
1840 struct REGEX_INTERNAL_Transition *t;
1841 struct REGEX_INTERNAL_State *new_s;
1843 unsigned int max_len;
1850 for (t = (*s)->transitions_head; NULL != t; t = t->next)
1852 len = strlen (t->label);
1854 if (0 == strncmp (t->label, str, len))
1859 new_s = t->to_state;
1870 * Set the given state 'marked' to GNUNET_YES. Used by the
1871 * 'dfa_remove_unreachable_states' function to detect unreachable states in the
1874 * @param cls closure, not used.
1875 * @param count count, not used.
1876 * @param s state where the marked attribute will be set to GNUNET_YES.
1879 mark_states (void *cls, const unsigned int count, struct REGEX_INTERNAL_State *s)
1881 s->marked = GNUNET_YES;
1886 * Remove all unreachable states from DFA 'a'. Unreachable states are those
1887 * states that are not reachable from the starting state.
1889 * @param a DFA automaton
1892 dfa_remove_unreachable_states (struct REGEX_INTERNAL_Automaton *a)
1894 struct REGEX_INTERNAL_State *s;
1895 struct REGEX_INTERNAL_State *s_next;
1897 /* 1. unmark all states */
1898 for (s = a->states_head; NULL != s; s = s->next)
1899 s->marked = GNUNET_NO;
1901 /* 2. traverse dfa from start state and mark all visited states */
1902 REGEX_INTERNAL_automaton_traverse (a, a->start, NULL, NULL, &mark_states, NULL);
1904 /* 3. delete all states that were not visited */
1905 for (s = a->states_head; NULL != s; s = s_next)
1908 if (GNUNET_NO == s->marked)
1909 automaton_remove_state (a, s);
1915 * Remove all dead states from the DFA 'a'. Dead states are those states that do
1916 * not transition to any other state but themselves.
1918 * @param a DFA automaton
1921 dfa_remove_dead_states (struct REGEX_INTERNAL_Automaton *a)
1923 struct REGEX_INTERNAL_State *s;
1924 struct REGEX_INTERNAL_State *s_next;
1925 struct REGEX_INTERNAL_Transition *t;
1928 GNUNET_assert (DFA == a->type);
1930 for (s = a->states_head; NULL != s; s = s_next)
1938 for (t = s->transitions_head; NULL != t; t = t->next)
1940 if (NULL != t->to_state && t->to_state != s)
1950 /* state s is dead, remove it */
1951 automaton_remove_state (a, s);
1957 * Merge all non distinguishable states in the DFA 'a'
1959 * @param ctx context
1960 * @param a DFA automaton
1961 * @return GNUNET_OK on success
1964 dfa_merge_nondistinguishable_states (struct REGEX_INTERNAL_Context *ctx,
1965 struct REGEX_INTERNAL_Automaton *a)
1968 struct REGEX_INTERNAL_State *s1;
1969 struct REGEX_INTERNAL_State *s2;
1970 struct REGEX_INTERNAL_Transition *t1;
1971 struct REGEX_INTERNAL_Transition *t2;
1972 struct REGEX_INTERNAL_State *s1_next;
1973 struct REGEX_INTERNAL_State *s2_next;
1975 unsigned int num_equal_edges;
1977 unsigned int state_cnt;
1978 unsigned long long idx;
1979 unsigned long long idx1;
1981 if ( (NULL == a) || (0 == a->state_count) )
1983 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
1984 "Could not merge nondistinguishable states, automaton was NULL.\n");
1985 return GNUNET_SYSERR;
1988 state_cnt = a->state_count;
1989 table = GNUNET_malloc_large ((sizeof (uint32_t) * state_cnt * state_cnt / 32) + sizeof (uint32_t));
1992 GNUNET_log_strerror (GNUNET_ERROR_TYPE_ERROR, "malloc");
1993 return GNUNET_SYSERR;
1996 for (i = 0, s1 = a->states_head; NULL != s1; s1 = s1->next)
1999 /* Mark all pairs of accepting/!accepting states */
2000 for (s1 = a->states_head; NULL != s1; s1 = s1->next)
2001 for (s2 = a->states_head; NULL != s2; s2 = s2->next)
2002 if ( (s1->accepting && !s2->accepting) ||
2003 (!s1->accepting && s2->accepting) )
2005 idx = s1->marked * state_cnt + s2->marked;
2006 table[idx / 32] |= (1 << (idx % 32));
2009 /* Find all equal states */
2014 for (s1 = a->states_head; NULL != s1; s1 = s1->next)
2016 for (s2 = a->states_head; NULL != s2 && s1 != s2; s2 = s2->next)
2018 idx = s1->marked * state_cnt + s2->marked;
2019 if (0 != (table[idx / 32] & (1 << (idx % 32))))
2021 num_equal_edges = 0;
2022 for (t1 = s1->transitions_head; NULL != t1; t1 = t1->next)
2024 for (t2 = s2->transitions_head; NULL != t2; t2 = t2->next)
2026 if (0 == strcmp (t1->label, t2->label))
2029 /* same edge, but targets definitively different, so we're different
2031 if (t1->to_state->marked > t2->to_state->marked)
2032 idx1 = t1->to_state->marked * state_cnt + t2->to_state->marked;
2034 idx1 = t2->to_state->marked * state_cnt + t1->to_state->marked;
2035 if (0 != (table[idx1 / 32] & (1 << (idx1 % 32))))
2037 table[idx / 32] |= (1 << (idx % 32));
2038 change = 1; /* changed a marker, need to run again */
2043 if ( (num_equal_edges != s1->transition_count) ||
2044 (num_equal_edges != s2->transition_count) )
2046 /* Make sure ALL edges of possible equal states are the same */
2047 table[idx / 32] |= (1 << (idx % 32));
2048 change = 1; /* changed a marker, need to run again */
2054 /* Merge states that are equal */
2055 for (s1 = a->states_head; NULL != s1; s1 = s1_next)
2058 for (s2 = a->states_head; NULL != s2 && s1 != s2; s2 = s2_next)
2061 idx = s1->marked * state_cnt + s2->marked;
2062 if (0 == (table[idx / 32] & (1 << (idx % 32))))
2063 automaton_merge_states (ctx, a, s1, s2);
2067 GNUNET_free (table);
2073 * Minimize the given DFA 'a' by removing all unreachable states, removing all
2074 * dead states and merging all non distinguishable states
2076 * @param ctx context
2077 * @param a DFA automaton
2078 * @return GNUNET_OK on success
2081 dfa_minimize (struct REGEX_INTERNAL_Context *ctx,
2082 struct REGEX_INTERNAL_Automaton *a)
2085 return GNUNET_SYSERR;
2087 GNUNET_assert (DFA == a->type);
2089 /* 1. remove unreachable states */
2090 dfa_remove_unreachable_states (a);
2092 /* 2. remove dead states */
2093 dfa_remove_dead_states (a);
2095 /* 3. Merge nondistinguishable states */
2096 if (GNUNET_OK != dfa_merge_nondistinguishable_states (ctx, a))
2097 return GNUNET_SYSERR;
2103 * Context for adding strided transitions to a DFA.
2105 struct REGEX_INTERNAL_Strided_Context
2108 * Length of the strides.
2110 const unsigned int stride;
2113 * Strided transitions DLL. New strided transitions will be stored in this DLL
2114 * and afterwards added to the DFA.
2116 struct REGEX_INTERNAL_Transition *transitions_head;
2119 * Strided transitions DLL.
2121 struct REGEX_INTERNAL_Transition *transitions_tail;
2126 * Recursive helper function to add strides to a DFA.
2128 * @param cls context, contains stride length and strided transitions DLL.
2129 * @param depth current depth of the depth-first traversal of the graph.
2130 * @param label current label, string that contains all labels on the path from
2132 * @param start start state for the depth-first traversal of the graph.
2133 * @param s current state in the depth-first traversal
2136 dfa_add_multi_strides_helper (void *cls, const unsigned int depth, char *label,
2137 struct REGEX_INTERNAL_State *start,
2138 struct REGEX_INTERNAL_State *s)
2140 struct REGEX_INTERNAL_Strided_Context *ctx = cls;
2141 struct REGEX_INTERNAL_Transition *t;
2144 if (depth == ctx->stride)
2146 t = GNUNET_malloc (sizeof (struct REGEX_INTERNAL_Transition));
2147 t->label = GNUNET_strdup (label);
2149 t->from_state = start;
2150 GNUNET_CONTAINER_DLL_insert (ctx->transitions_head, ctx->transitions_tail,
2155 for (t = s->transitions_head; NULL != t; t = t->next)
2157 /* Do not consider self-loops, because it end's up in too many
2159 if (t->to_state == t->from_state)
2164 GNUNET_asprintf (&new_label, "%s%s", label, t->label);
2167 new_label = GNUNET_strdup (t->label);
2169 dfa_add_multi_strides_helper (cls, (depth + 1), new_label, start,
2173 GNUNET_free_non_null (label);
2178 * Function called for each state in the DFA. Starts a traversal of depth set in
2179 * context starting from state 's'.
2181 * @param cls context.
2182 * @param count not used.
2183 * @param s current state.
2186 dfa_add_multi_strides (void *cls, const unsigned int count,
2187 struct REGEX_INTERNAL_State *s)
2189 dfa_add_multi_strides_helper (cls, 0, NULL, s, s);
2194 * Adds multi-strided transitions to the given 'dfa'.
2196 * @param regex_ctx regex context needed to add transitions to the automaton.
2197 * @param dfa DFA to which the multi strided transitions should be added.
2198 * @param stride_len length of the strides.
2201 REGEX_INTERNAL_dfa_add_multi_strides (struct REGEX_INTERNAL_Context *regex_ctx,
2202 struct REGEX_INTERNAL_Automaton *dfa,
2203 const unsigned int stride_len)
2205 struct REGEX_INTERNAL_Strided_Context ctx = { stride_len, NULL, NULL };
2206 struct REGEX_INTERNAL_Transition *t;
2207 struct REGEX_INTERNAL_Transition *t_next;
2209 if (1 > stride_len || GNUNET_YES == dfa->is_multistrided)
2212 /* Compute the new transitions of given stride_len */
2213 REGEX_INTERNAL_automaton_traverse (dfa, dfa->start, NULL, NULL,
2214 &dfa_add_multi_strides, &ctx);
2216 /* Add all the new transitions to the automaton. */
2217 for (t = ctx.transitions_head; NULL != t; t = t_next)
2220 state_add_transition (regex_ctx, t->from_state, t->label, t->to_state);
2221 GNUNET_CONTAINER_DLL_remove (ctx.transitions_head, ctx.transitions_tail, t);
2222 GNUNET_free_non_null (t->label);
2226 /* Mark this automaton as multistrided */
2227 dfa->is_multistrided = GNUNET_YES;
2231 * Recursive Helper function for DFA path compression. Does DFS on the DFA graph
2232 * and adds new transitions to the given transitions DLL and marks states that
2233 * should be removed by setting state->contained to GNUNET_YES.
2235 * @param dfa DFA for which the paths should be compressed.
2236 * @param start starting state for linear path search.
2237 * @param cur current state in the recursive DFS.
2238 * @param label current label (string of traversed labels).
2239 * @param max_len maximal path compression length.
2240 * @param transitions_head transitions DLL.
2241 * @param transitions_tail transitions DLL.
2244 dfa_compress_paths_helper (struct REGEX_INTERNAL_Automaton *dfa,
2245 struct REGEX_INTERNAL_State *start,
2246 struct REGEX_INTERNAL_State *cur, char *label,
2247 unsigned int max_len,
2248 struct REGEX_INTERNAL_Transition **transitions_head,
2249 struct REGEX_INTERNAL_Transition **transitions_tail)
2251 struct REGEX_INTERNAL_Transition *t;
2255 if (NULL != label &&
2256 ((cur->incoming_transition_count > 1 || GNUNET_YES == cur->accepting ||
2257 GNUNET_YES == cur->marked) || (start != dfa->start && max_len > 0 &&
2258 max_len == strlen (label)) ||
2259 (start == dfa->start && GNUNET_REGEX_INITIAL_BYTES == strlen (label))))
2261 t = GNUNET_malloc (sizeof (struct REGEX_INTERNAL_Transition));
2262 t->label = GNUNET_strdup (label);
2264 t->from_state = start;
2265 GNUNET_CONTAINER_DLL_insert (*transitions_head, *transitions_tail, t);
2267 if (GNUNET_NO == cur->marked)
2269 dfa_compress_paths_helper (dfa, cur, cur, NULL, max_len, transitions_head,
2274 else if (cur != start)
2275 cur->contained = GNUNET_YES;
2277 if (GNUNET_YES == cur->marked && cur != start)
2280 cur->marked = GNUNET_YES;
2283 for (t = cur->transitions_head; NULL != t; t = t->next)
2286 GNUNET_asprintf (&new_label, "%s%s", label, t->label);
2288 new_label = GNUNET_strdup (t->label);
2290 if (t->to_state != cur)
2292 dfa_compress_paths_helper (dfa, start, t->to_state, new_label, max_len,
2293 transitions_head, transitions_tail);
2295 GNUNET_free (new_label);
2301 * Compress paths in the given 'dfa'. Linear paths like 0->1->2->3 will be
2302 * compressed to 0->3 by combining transitions.
2304 * @param regex_ctx context for adding new transitions.
2305 * @param dfa DFA representation, will directly modify the given DFA.
2306 * @param max_len maximal length of the compressed paths.
2309 dfa_compress_paths (struct REGEX_INTERNAL_Context *regex_ctx,
2310 struct REGEX_INTERNAL_Automaton *dfa, unsigned int max_len)
2312 struct REGEX_INTERNAL_State *s;
2313 struct REGEX_INTERNAL_State *s_next;
2314 struct REGEX_INTERNAL_Transition *t;
2315 struct REGEX_INTERNAL_Transition *t_next;
2316 struct REGEX_INTERNAL_Transition *transitions_head = NULL;
2317 struct REGEX_INTERNAL_Transition *transitions_tail = NULL;
2322 /* Count the incoming transitions on each state. */
2323 for (s = dfa->states_head; NULL != s; s = s->next)
2325 for (t = s->transitions_head; NULL != t; t = t->next)
2327 if (NULL != t->to_state)
2328 t->to_state->incoming_transition_count++;
2332 /* Unmark all states. */
2333 for (s = dfa->states_head; NULL != s; s = s->next)
2335 s->marked = GNUNET_NO;
2336 s->contained = GNUNET_NO;
2339 /* Add strides and mark states that can be deleted. */
2340 dfa_compress_paths_helper (dfa, dfa->start, dfa->start, NULL, max_len,
2341 &transitions_head, &transitions_tail);
2343 /* Add all the new transitions to the automaton. */
2344 for (t = transitions_head; NULL != t; t = t_next)
2347 state_add_transition (regex_ctx, t->from_state, t->label, t->to_state);
2348 GNUNET_CONTAINER_DLL_remove (transitions_head, transitions_tail, t);
2349 GNUNET_free_non_null (t->label);
2353 /* Remove marked states (including their incoming and outgoing transitions). */
2354 for (s = dfa->states_head; NULL != s; s = s_next)
2357 if (GNUNET_YES == s->contained)
2358 automaton_remove_state (dfa, s);
2364 * Creates a new NFA fragment. Needs to be cleared using
2365 * automaton_fragment_clear.
2367 * @param start starting state
2368 * @param end end state
2370 * @return new NFA fragment
2372 static struct REGEX_INTERNAL_Automaton *
2373 nfa_fragment_create (struct REGEX_INTERNAL_State *start,
2374 struct REGEX_INTERNAL_State *end)
2376 struct REGEX_INTERNAL_Automaton *n;
2378 n = GNUNET_malloc (sizeof (struct REGEX_INTERNAL_Automaton));
2385 if (NULL == start || NULL == end)
2388 automaton_add_state (n, end);
2389 automaton_add_state (n, start);
2401 * Adds a list of states to the given automaton 'n'.
2403 * @param n automaton to which the states should be added
2404 * @param states_head head of the DLL of states
2405 * @param states_tail tail of the DLL of states
2408 nfa_add_states (struct REGEX_INTERNAL_Automaton *n,
2409 struct REGEX_INTERNAL_State *states_head,
2410 struct REGEX_INTERNAL_State *states_tail)
2412 struct REGEX_INTERNAL_State *s;
2414 if (NULL == n || NULL == states_head)
2416 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not add states\n");
2420 if (NULL == n->states_head)
2422 n->states_head = states_head;
2423 n->states_tail = states_tail;
2427 if (NULL != states_head)
2429 n->states_tail->next = states_head;
2430 n->states_tail = states_tail;
2433 for (s = states_head; NULL != s; s = s->next)
2439 * Creates a new NFA state. Needs to be freed using automaton_destroy_state.
2441 * @param ctx context
2442 * @param accepting is it an accepting state or not
2444 * @return new NFA state
2446 static struct REGEX_INTERNAL_State *
2447 nfa_state_create (struct REGEX_INTERNAL_Context *ctx, int accepting)
2449 struct REGEX_INTERNAL_State *s;
2451 s = GNUNET_malloc (sizeof (struct REGEX_INTERNAL_State));
2452 s->id = ctx->state_id++;
2453 s->accepting = accepting;
2454 s->marked = GNUNET_NO;
2460 GNUNET_asprintf (&s->name, "s%i", s->id);
2467 * Calculates the closure set for the given set of states.
2469 * @param ret set to sorted nfa closure on 'label' (epsilon closure if 'label' is NULL)
2470 * @param nfa the NFA containing 's'
2471 * @param states list of states on which to base the closure on
2472 * @param label transitioning label for which to base the closure on,
2473 * pass NULL for epsilon transition
2476 nfa_closure_set_create (struct REGEX_INTERNAL_StateSet *ret,
2477 struct REGEX_INTERNAL_Automaton *nfa,
2478 struct REGEX_INTERNAL_StateSet *states, const char *label)
2480 struct REGEX_INTERNAL_State *s;
2482 struct REGEX_INTERNAL_StateSet_MDLL cls_stack;
2483 struct REGEX_INTERNAL_State *clsstate;
2484 struct REGEX_INTERNAL_State *currentstate;
2485 struct REGEX_INTERNAL_Transition *ctran;
2487 memset (ret, 0, sizeof (struct REGEX_INTERNAL_StateSet));
2491 for (i = 0; i < states->off; i++)
2493 s = states->states[i];
2495 /* Add start state to closure only for epsilon closure */
2497 state_set_append (ret, s);
2499 /* initialize work stack */
2500 cls_stack.head = NULL;
2501 cls_stack.tail = NULL;
2502 GNUNET_CONTAINER_MDLL_insert (ST, cls_stack.head, cls_stack.tail, s);
2505 while (NULL != (currentstate = cls_stack.tail))
2507 GNUNET_CONTAINER_MDLL_remove (ST, cls_stack.head, cls_stack.tail,
2510 for (ctran = currentstate->transitions_head; NULL != ctran;
2511 ctran = ctran->next)
2513 if (NULL == (clsstate = ctran->to_state))
2515 if (0 != clsstate->contained)
2517 if (0 != nullstrcmp (label, ctran->label))
2519 state_set_append (ret, clsstate);
2520 GNUNET_CONTAINER_MDLL_insert_tail (ST, cls_stack.head, cls_stack.tail,
2523 clsstate->contained = 1;
2527 for (i = 0; i < ret->off; i++)
2528 ret->states[i]->contained = 0;
2531 qsort (ret->states, ret->off, sizeof (struct REGEX_INTERNAL_State *),
2537 * Pops two NFA fragments (a, b) from the stack and concatenates them (ab)
2539 * @param ctx context
2542 nfa_add_concatenation (struct REGEX_INTERNAL_Context *ctx)
2544 struct REGEX_INTERNAL_Automaton *a;
2545 struct REGEX_INTERNAL_Automaton *b;
2546 struct REGEX_INTERNAL_Automaton *new_nfa;
2548 b = ctx->stack_tail;
2549 GNUNET_assert (NULL != b);
2550 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, b);
2551 a = ctx->stack_tail;
2552 GNUNET_assert (NULL != a);
2553 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
2555 state_add_transition (ctx, a->end, NULL, b->start);
2556 a->end->accepting = 0;
2557 b->end->accepting = 1;
2559 new_nfa = nfa_fragment_create (NULL, NULL);
2560 nfa_add_states (new_nfa, a->states_head, a->states_tail);
2561 nfa_add_states (new_nfa, b->states_head, b->states_tail);
2562 new_nfa->start = a->start;
2563 new_nfa->end = b->end;
2564 new_nfa->state_count += a->state_count + b->state_count;
2565 automaton_fragment_clear (a);
2566 automaton_fragment_clear (b);
2568 GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new_nfa);
2573 * Pops a NFA fragment from the stack (a) and adds a new fragment (a*)
2575 * @param ctx context
2578 nfa_add_star_op (struct REGEX_INTERNAL_Context *ctx)
2580 struct REGEX_INTERNAL_Automaton *a;
2581 struct REGEX_INTERNAL_Automaton *new_nfa;
2582 struct REGEX_INTERNAL_State *start;
2583 struct REGEX_INTERNAL_State *end;
2585 a = ctx->stack_tail;
2589 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
2590 "nfa_add_star_op failed, because there was no element on the stack");
2594 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
2596 start = nfa_state_create (ctx, 0);
2597 end = nfa_state_create (ctx, 1);
2599 state_add_transition (ctx, start, NULL, a->start);
2600 state_add_transition (ctx, start, NULL, end);
2601 state_add_transition (ctx, a->end, NULL, a->start);
2602 state_add_transition (ctx, a->end, NULL, end);
2604 a->end->accepting = 0;
2607 new_nfa = nfa_fragment_create (start, end);
2608 nfa_add_states (new_nfa, a->states_head, a->states_tail);
2609 automaton_fragment_clear (a);
2611 GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new_nfa);
2616 * Pops an NFA fragment (a) from the stack and adds a new fragment (a+)
2618 * @param ctx context
2621 nfa_add_plus_op (struct REGEX_INTERNAL_Context *ctx)
2623 struct REGEX_INTERNAL_Automaton *a;
2625 a = ctx->stack_tail;
2629 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
2630 "nfa_add_plus_op failed, because there was no element on the stack");
2634 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
2636 state_add_transition (ctx, a->end, NULL, a->start);
2638 GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, a);
2643 * Pops an NFA fragment (a) from the stack and adds a new fragment (a?)
2645 * @param ctx context
2648 nfa_add_question_op (struct REGEX_INTERNAL_Context *ctx)
2650 struct REGEX_INTERNAL_Automaton *a;
2651 struct REGEX_INTERNAL_Automaton *new_nfa;
2652 struct REGEX_INTERNAL_State *start;
2653 struct REGEX_INTERNAL_State *end;
2655 a = ctx->stack_tail;
2658 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
2659 "nfa_add_question_op failed, because there was no element on the stack");
2663 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
2665 start = nfa_state_create (ctx, 0);
2666 end = nfa_state_create (ctx, 1);
2668 state_add_transition (ctx, start, NULL, a->start);
2669 state_add_transition (ctx, start, NULL, end);
2670 state_add_transition (ctx, a->end, NULL, end);
2672 a->end->accepting = 0;
2674 new_nfa = nfa_fragment_create (start, end);
2675 nfa_add_states (new_nfa, a->states_head, a->states_tail);
2676 GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new_nfa);
2677 automaton_fragment_clear (a);
2682 * Pops two NFA fragments (a, b) from the stack and adds a new NFA fragment that
2683 * alternates between a and b (a|b)
2685 * @param ctx context
2688 nfa_add_alternation (struct REGEX_INTERNAL_Context *ctx)
2690 struct REGEX_INTERNAL_Automaton *a;
2691 struct REGEX_INTERNAL_Automaton *b;
2692 struct REGEX_INTERNAL_Automaton *new_nfa;
2693 struct REGEX_INTERNAL_State *start;
2694 struct REGEX_INTERNAL_State *end;
2696 b = ctx->stack_tail;
2697 GNUNET_assert (NULL != b);
2698 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, b);
2699 a = ctx->stack_tail;
2700 GNUNET_assert (NULL != a);
2701 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
2703 start = nfa_state_create (ctx, 0);
2704 end = nfa_state_create (ctx, 1);
2705 state_add_transition (ctx, start, NULL, a->start);
2706 state_add_transition (ctx, start, NULL, b->start);
2708 state_add_transition (ctx, a->end, NULL, end);
2709 state_add_transition (ctx, b->end, NULL, end);
2711 a->end->accepting = 0;
2712 b->end->accepting = 0;
2715 new_nfa = nfa_fragment_create (start, end);
2716 nfa_add_states (new_nfa, a->states_head, a->states_tail);
2717 nfa_add_states (new_nfa, b->states_head, b->states_tail);
2718 automaton_fragment_clear (a);
2719 automaton_fragment_clear (b);
2721 GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new_nfa);
2726 * Adds a new nfa fragment to the stack
2728 * @param ctx context
2729 * @param label label for nfa transition
2732 nfa_add_label (struct REGEX_INTERNAL_Context *ctx, const char *label)
2734 struct REGEX_INTERNAL_Automaton *n;
2735 struct REGEX_INTERNAL_State *start;
2736 struct REGEX_INTERNAL_State *end;
2738 GNUNET_assert (NULL != ctx);
2740 start = nfa_state_create (ctx, 0);
2741 end = nfa_state_create (ctx, 1);
2742 state_add_transition (ctx, start, label, end);
2743 n = nfa_fragment_create (start, end);
2744 GNUNET_assert (NULL != n);
2745 GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, n);
2750 * Initialize a new context
2752 * @param ctx context
2755 REGEX_INTERNAL_context_init (struct REGEX_INTERNAL_Context *ctx)
2759 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Context was NULL!");
2763 ctx->transition_id = 0;
2764 ctx->stack_head = NULL;
2765 ctx->stack_tail = NULL;
2770 * Construct an NFA by parsing the regex string of length 'len'.
2772 * @param regex regular expression string
2773 * @param len length of the string
2775 * @return NFA, needs to be freed using REGEX_INTERNAL_destroy_automaton
2777 struct REGEX_INTERNAL_Automaton *
2778 REGEX_INTERNAL_construct_nfa (const char *regex, const size_t len)
2780 struct REGEX_INTERNAL_Context ctx;
2781 struct REGEX_INTERNAL_Automaton *nfa;
2786 unsigned int altcount;
2787 unsigned int atomcount;
2796 if (NULL == regex || 0 == strlen (regex) || 0 == len)
2798 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
2799 "Could not parse regex. Empty regex string provided.\n");
2803 REGEX_INTERNAL_context_init (&ctx);
2814 for (count = 0; count < len && *regexp; count++, regexp++)
2822 nfa_add_concatenation (&ctx);
2825 GNUNET_array_grow (p, psize, psize * 2 + 4); /* FIXME why *2 +4? */
2826 p[poff].altcount = altcount;
2827 p[poff].atomcount = atomcount;
2835 error_msg = "Cannot append '|' to nothing";
2838 while (--atomcount > 0)
2839 nfa_add_concatenation (&ctx);
2845 error_msg = "Missing opening '('";
2850 /* Ignore this: "()" */
2852 altcount = p[poff].altcount;
2853 atomcount = p[poff].atomcount;
2856 while (--atomcount > 0)
2857 nfa_add_concatenation (&ctx);
2858 for (; altcount > 0; altcount--)
2859 nfa_add_alternation (&ctx);
2861 altcount = p[poff].altcount;
2862 atomcount = p[poff].atomcount;
2868 error_msg = "Cannot append '*' to nothing";
2871 nfa_add_star_op (&ctx);
2876 error_msg = "Cannot append '+' to nothing";
2879 nfa_add_plus_op (&ctx);
2884 error_msg = "Cannot append '?' to nothing";
2887 nfa_add_question_op (&ctx);
2893 nfa_add_concatenation (&ctx);
2895 curlabel[0] = *regexp;
2896 nfa_add_label (&ctx, curlabel);
2903 error_msg = "Unbalanced parenthesis";
2906 while (--atomcount > 0)
2907 nfa_add_concatenation (&ctx);
2908 for (; altcount > 0; altcount--)
2909 nfa_add_alternation (&ctx);
2911 GNUNET_array_grow (p, psize, 0);
2913 nfa = ctx.stack_tail;
2914 GNUNET_CONTAINER_DLL_remove (ctx.stack_head, ctx.stack_tail, nfa);
2916 if (NULL != ctx.stack_head)
2918 error_msg = "Creating the NFA failed. NFA stack was not empty!";
2922 /* Remember the regex that was used to generate this NFA */
2923 nfa->regex = GNUNET_strdup (regex);
2925 /* create depth-first numbering of the states for pretty printing */
2926 REGEX_INTERNAL_automaton_traverse (nfa, NULL, NULL, NULL, &number_states, NULL);
2928 /* No multistriding added so far */
2929 nfa->is_multistrided = GNUNET_NO;
2934 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not parse regex: `%s'\n", regex);
2935 if (NULL != error_msg)
2936 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "%s\n", error_msg);
2938 GNUNET_free_non_null (p);
2940 while (NULL != (nfa = ctx.stack_head))
2942 GNUNET_CONTAINER_DLL_remove (ctx.stack_head, ctx.stack_tail, nfa);
2943 REGEX_INTERNAL_automaton_destroy (nfa);
2951 * Create DFA states based on given 'nfa' and starting with 'dfa_state'.
2953 * @param ctx context.
2954 * @param nfa NFA automaton.
2955 * @param dfa DFA automaton.
2956 * @param dfa_state current dfa state, pass epsilon closure of first nfa state
2960 construct_dfa_states (struct REGEX_INTERNAL_Context *ctx,
2961 struct REGEX_INTERNAL_Automaton *nfa,
2962 struct REGEX_INTERNAL_Automaton *dfa,
2963 struct REGEX_INTERNAL_State *dfa_state)
2965 struct REGEX_INTERNAL_Transition *ctran;
2966 struct REGEX_INTERNAL_State *new_dfa_state;
2967 struct REGEX_INTERNAL_State *state_contains;
2968 struct REGEX_INTERNAL_State *state_iter;
2969 struct REGEX_INTERNAL_StateSet tmp;
2970 struct REGEX_INTERNAL_StateSet nfa_set;
2972 for (ctran = dfa_state->transitions_head; NULL != ctran; ctran = ctran->next)
2974 if (NULL == ctran->label || NULL != ctran->to_state)
2977 nfa_closure_set_create (&tmp, nfa, &dfa_state->nfa_set, ctran->label);
2978 nfa_closure_set_create (&nfa_set, nfa, &tmp, NULL);
2979 state_set_clear (&tmp);
2981 state_contains = NULL;
2982 for (state_iter = dfa->states_head; NULL != state_iter;
2983 state_iter = state_iter->next)
2985 if (0 == state_set_compare (&state_iter->nfa_set, &nfa_set))
2987 state_contains = state_iter;
2991 if (NULL == state_contains)
2993 new_dfa_state = dfa_state_create (ctx, &nfa_set);
2994 automaton_add_state (dfa, new_dfa_state);
2995 ctran->to_state = new_dfa_state;
2996 construct_dfa_states (ctx, nfa, dfa, new_dfa_state);
3000 ctran->to_state = state_contains;
3001 state_set_clear (&nfa_set);
3008 * Construct DFA for the given 'regex' of length 'len'.
3010 * Path compression means, that for example a DFA o -> a -> b -> c -> o will be
3011 * compressed to o -> abc -> o. Note that this parameter influences the
3012 * non-determinism of states of the resulting NFA in the DHT (number of outgoing
3013 * edges with the same label). For example for an application that stores IPv4
3014 * addresses as bitstrings it could make sense to limit the path compression to
3017 * @param regex regular expression string.
3018 * @param len length of the regular expression.
3019 * @param max_path_len limit the path compression length to the
3020 * given value. If set to 1, no path compression is applied. Set to 0 for
3021 * maximal possible path compression (generally not desireable).
3022 * @return DFA, needs to be freed using REGEX_INTERNAL_automaton_destroy.
3024 struct REGEX_INTERNAL_Automaton *
3025 REGEX_INTERNAL_construct_dfa (const char *regex, const size_t len,
3026 unsigned int max_path_len)
3028 struct REGEX_INTERNAL_Context ctx;
3029 struct REGEX_INTERNAL_Automaton *dfa;
3030 struct REGEX_INTERNAL_Automaton *nfa;
3031 struct REGEX_INTERNAL_StateSet nfa_start_eps_cls;
3032 struct REGEX_INTERNAL_StateSet singleton_set;
3034 REGEX_INTERNAL_context_init (&ctx);
3037 nfa = REGEX_INTERNAL_construct_nfa (regex, len);
3041 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
3042 "Could not create DFA, because NFA creation failed\n");
3046 dfa = GNUNET_malloc (sizeof (struct REGEX_INTERNAL_Automaton));
3048 dfa->regex = GNUNET_strdup (regex);
3050 /* Create DFA start state from epsilon closure */
3051 memset (&singleton_set, 0, sizeof (struct REGEX_INTERNAL_StateSet));
3052 state_set_append (&singleton_set, nfa->start);
3053 nfa_closure_set_create (&nfa_start_eps_cls, nfa, &singleton_set, NULL);
3054 state_set_clear (&singleton_set);
3055 dfa->start = dfa_state_create (&ctx, &nfa_start_eps_cls);
3056 automaton_add_state (dfa, dfa->start);
3058 construct_dfa_states (&ctx, nfa, dfa, dfa->start);
3059 REGEX_INTERNAL_automaton_destroy (nfa);
3062 if (GNUNET_OK != dfa_minimize (&ctx, dfa))
3064 REGEX_INTERNAL_automaton_destroy (dfa);
3068 /* Create proofs and hashes for all states */
3069 if (GNUNET_OK != automaton_create_proofs (dfa))
3071 REGEX_INTERNAL_automaton_destroy (dfa);
3075 /* Compress linear DFA paths */
3076 if (1 != max_path_len)
3077 dfa_compress_paths (&ctx, dfa, max_path_len);
3084 * Free the memory allocated by constructing the REGEX_INTERNAL_Automaton data
3087 * @param a automaton to be destroyed
3090 REGEX_INTERNAL_automaton_destroy (struct REGEX_INTERNAL_Automaton *a)
3092 struct REGEX_INTERNAL_State *s;
3093 struct REGEX_INTERNAL_State *next_state;
3098 GNUNET_free_non_null (a->regex);
3099 GNUNET_free_non_null (a->canonical_regex);
3101 for (s = a->states_head; NULL != s; s = next_state)
3103 next_state = s->next;
3104 GNUNET_CONTAINER_DLL_remove (a->states_head, a->states_tail, s);
3105 automaton_destroy_state (s);
3113 * Evaluates the given string using the given DFA automaton
3115 * @param a automaton, type must be DFA
3116 * @param string string that should be evaluated
3118 * @return 0 if string matches, non 0 otherwise
3121 evaluate_dfa (struct REGEX_INTERNAL_Automaton *a, const char *string)
3124 struct REGEX_INTERNAL_State *s;
3125 unsigned int step_len;
3129 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
3130 "Tried to evaluate DFA, but NFA automaton given");
3136 /* If the string is empty but the starting state is accepting, we accept. */
3137 if ((NULL == string || 0 == strlen (string)) && s->accepting)
3140 for (strp = string; NULL != strp && *strp; strp += step_len)
3142 step_len = dfa_move (&s, strp);
3148 if (NULL != s && s->accepting)
3156 * Evaluates the given string using the given NFA automaton
3158 * @param a automaton, type must be NFA
3159 * @param string string that should be evaluated
3161 * @return 0 if string matches, non 0 otherwise
3164 evaluate_nfa (struct REGEX_INTERNAL_Automaton *a, const char *string)
3168 struct REGEX_INTERNAL_State *s;
3169 struct REGEX_INTERNAL_StateSet sset;
3170 struct REGEX_INTERNAL_StateSet new_sset;
3171 struct REGEX_INTERNAL_StateSet singleton_set;
3177 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
3178 "Tried to evaluate NFA, but DFA automaton given");
3182 /* If the string is empty but the starting state is accepting, we accept. */
3183 if ((NULL == string || 0 == strlen (string)) && a->start->accepting)
3187 memset (&singleton_set, 0, sizeof (struct REGEX_INTERNAL_StateSet));
3188 state_set_append (&singleton_set, a->start);
3189 nfa_closure_set_create (&sset, a, &singleton_set, NULL);
3190 state_set_clear (&singleton_set);
3193 for (strp = string; NULL != strp && *strp; strp++)
3196 nfa_closure_set_create (&new_sset, a, &sset, str);
3197 state_set_clear (&sset);
3198 nfa_closure_set_create (&sset, a, &new_sset, 0);
3199 state_set_clear (&new_sset);
3202 for (i = 0; i < sset.off; i++)
3205 if ( (NULL != s) && (s->accepting) )
3212 state_set_clear (&sset);
3218 * Evaluates the given 'string' against the given compiled regex
3220 * @param a automaton
3221 * @param string string to check
3223 * @return 0 if string matches, non 0 otherwise
3226 REGEX_INTERNAL_eval (struct REGEX_INTERNAL_Automaton *a, const char *string)
3233 result = evaluate_dfa (a, string);
3236 result = evaluate_nfa (a, string);
3239 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
3240 "Evaluating regex failed, automaton has no type!\n");
3241 result = GNUNET_SYSERR;
3250 * Get the canonical regex of the given automaton.
3251 * When constructing the automaton a proof is computed for each state,
3252 * consisting of the regular expression leading to this state. A complete
3253 * regex for the automaton can be computed by combining these proofs.
3254 * As of now this function is only useful for testing.
3256 * @param a automaton for which the canonical regex should be returned.
3261 REGEX_INTERNAL_get_canonical_regex (struct REGEX_INTERNAL_Automaton *a)
3266 return a->canonical_regex;
3271 * Get the number of transitions that are contained in the given automaton.
3273 * @param a automaton for which the number of transitions should be returned.
3275 * @return number of transitions in the given automaton.
3278 REGEX_INTERNAL_get_transition_count (struct REGEX_INTERNAL_Automaton *a)
3280 unsigned int t_count;
3281 struct REGEX_INTERNAL_State *s;
3287 for (s = a->states_head; NULL != s; s = s->next)
3288 t_count += s->transition_count;
3295 * Get the first key for the given 'input_string'. This hashes the first x bits
3296 * of the 'input_string'.
3298 * @param input_string string.
3299 * @param string_len length of the 'input_string'.
3300 * @param key pointer to where to write the hash code.
3302 * @return number of bits of 'input_string' that have been consumed
3303 * to construct the key
3306 REGEX_INTERNAL_get_first_key (const char *input_string, size_t string_len,
3307 struct GNUNET_HashCode * key)
3311 size = string_len < GNUNET_REGEX_INITIAL_BYTES ? string_len :
3312 GNUNET_REGEX_INITIAL_BYTES;
3313 if (NULL == input_string)
3315 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
3316 "Given input string was NULL!\n");
3319 GNUNET_CRYPTO_hash (input_string, size, key);
3326 * Recursive function that calls the iterator for each synthetic start state.
3328 * @param min_len minimum length of the path in the graph.
3329 * @param max_len maximum length of the path in the graph.
3330 * @param consumed_string string consumed by traversing the graph till this state.
3331 * @param state current state of the automaton.
3332 * @param iterator iterator function called for each edge.
3333 * @param iterator_cls closure for the iterator function.
3336 iterate_initial_edge (const unsigned int min_len, const unsigned int max_len,
3337 char *consumed_string, struct REGEX_INTERNAL_State *state,
3338 REGEX_INTERNAL_KeyIterator iterator, void *iterator_cls)
3341 struct REGEX_INTERNAL_Transition *t;
3342 unsigned int num_edges = state->transition_count;
3343 struct REGEX_BLOCK_Edge edges[num_edges];
3344 struct REGEX_BLOCK_Edge edge[1];
3345 struct GNUNET_HashCode hash;
3346 struct GNUNET_HashCode hash_new;
3347 unsigned int cur_len;
3349 if (NULL != consumed_string)
3350 cur_len = strlen (consumed_string);
3354 if ((cur_len >= min_len || GNUNET_YES == state->accepting) && cur_len > 0 &&
3355 NULL != consumed_string)
3357 if (cur_len <= max_len)
3359 if (state->proof != NULL && 0 != strcmp (consumed_string, state->proof))
3361 (void) state_get_edges (state, edges);
3362 GNUNET_CRYPTO_hash (consumed_string, strlen (consumed_string), &hash);
3363 iterator (iterator_cls, &hash, consumed_string, state->accepting,
3367 if (GNUNET_YES == state->accepting && cur_len > 1 &&
3368 state->transition_count < 1 && cur_len < max_len)
3370 /* Special case for regex consisting of just a string that is shorter than
3372 edge[0].label = &consumed_string[cur_len - 1];
3373 edge[0].destination = state->hash;
3374 temp = GNUNET_strdup (consumed_string);
3375 temp[cur_len - 1] = '\0';
3376 GNUNET_CRYPTO_hash (temp, cur_len - 1, &hash_new);
3377 iterator (iterator_cls, &hash_new, temp, GNUNET_NO, 1, edge);
3381 else /* cur_len > max_len */
3383 /* Case where the concatenated labels are longer than max_len, then split. */
3384 edge[0].label = &consumed_string[max_len];
3385 edge[0].destination = state->hash;
3386 temp = GNUNET_strdup (consumed_string);
3387 temp[max_len] = '\0';
3388 GNUNET_CRYPTO_hash (temp, max_len, &hash);
3389 iterator (iterator_cls, &hash, temp, GNUNET_NO, 1, edge);
3394 if (cur_len < max_len)
3396 for (t = state->transitions_head; NULL != t; t = t->next)
3398 if (NULL != consumed_string)
3399 GNUNET_asprintf (&temp, "%s%s", consumed_string, t->label);
3401 GNUNET_asprintf (&temp, "%s", t->label);
3403 iterate_initial_edge (min_len, max_len, temp, t->to_state, iterator,
3412 * Iterate over all edges starting from start state of automaton 'a'. Calling
3413 * iterator for each edge.
3415 * @param a automaton.
3416 * @param iterator iterator called for each edge.
3417 * @param iterator_cls closure.
3420 REGEX_INTERNAL_iterate_all_edges (struct REGEX_INTERNAL_Automaton *a,
3421 REGEX_INTERNAL_KeyIterator iterator,
3424 struct REGEX_INTERNAL_State *s;
3426 for (s = a->states_head; NULL != s; s = s->next)
3428 struct REGEX_BLOCK_Edge edges[s->transition_count];
3429 unsigned int num_edges;
3431 num_edges = state_get_edges (s, edges);
3432 if ( ( (NULL != s->proof) &&
3433 (0 < strlen (s->proof)) ) || s->accepting)
3434 iterator (iterator_cls, &s->hash, s->proof,
3437 s->marked = GNUNET_NO;
3440 iterate_initial_edge (GNUNET_REGEX_INITIAL_BYTES,
3441 GNUNET_REGEX_INITIAL_BYTES,
3443 iterator, iterator_cls);
3447 * Struct to hold all the relevant state information in the HashMap.
3449 * Contains the same info as the Regex Iterator parametes except the key,
3450 * which comes directly from the HashMap iterator.
3452 struct temporal_state_store {
3457 struct REGEX_BLOCK_Edge *edges;
3462 * Store regex iterator and cls in one place to pass to the hashmap iterator.
3464 struct client_iterator {
3465 REGEX_INTERNAL_KeyIterator iterator;
3471 * Iterator over all edges of a dfa. Stores all of them in a HashMap
3472 * for later reachability marking.
3474 * @param cls Closure (HashMap)
3475 * @param key hash for current state.
3476 * @param proof proof for current state
3477 * @param accepting GNUNET_YES if this is an accepting state, GNUNET_NO if not.
3478 * @param num_edges number of edges leaving current state.
3479 * @param edges edges leaving current state.
3482 store_all_states (void *cls,
3483 const struct GNUNET_HashCode *key,
3486 unsigned int num_edges,
3487 const struct REGEX_BLOCK_Edge *edges)
3489 struct GNUNET_CONTAINER_MultiHashMap *hm = cls;
3490 struct temporal_state_store *tmp;
3493 tmp = GNUNET_new (struct temporal_state_store);
3494 tmp->reachable = GNUNET_NO;
3495 tmp->proof = GNUNET_strdup (proof);
3496 tmp->accepting = accepting;
3497 tmp->num_edges = num_edges;
3498 edges_size = sizeof (struct REGEX_BLOCK_Edge) * num_edges;
3499 tmp->edges = GNUNET_malloc (edges_size);
3500 memcpy(tmp->edges, edges, edges_size);
3501 GNUNET_CONTAINER_multihashmap_put (hm, key, tmp,
3502 GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_FAST);
3507 * Mark state as reachable and call recursively on all its edges.
3509 * If already marked as reachable, do nothing.
3511 * @param state State to mark as reachable.
3512 * @param hm HashMap which stores all the states indexed by key.
3515 mark_as_reachable (struct temporal_state_store *state,
3516 struct GNUNET_CONTAINER_MultiHashMap *hm)
3518 struct temporal_state_store *child;
3521 if (GNUNET_YES == state->reachable)
3525 state->reachable = GNUNET_YES;
3526 for (i = 0; i < state->num_edges; i++)
3528 child = GNUNET_CONTAINER_multihashmap_get (hm,
3529 &state->edges[i].destination);
3535 mark_as_reachable (child, hm);
3541 * Iterator over hash map entries to mark the ones that are reachable.
3543 * @param cls closure
3544 * @param key current key code
3545 * @param value value in the hash map
3546 * @return #GNUNET_YES if we should continue to iterate,
3547 * #GNUNET_NO if not.
3550 reachability_iterator (void *cls,
3551 const struct GNUNET_HashCode *key,
3554 struct GNUNET_CONTAINER_MultiHashMap *hm = cls;
3555 struct temporal_state_store *state = value;
3557 if (GNUNET_YES == state->reachable)
3558 /* already visited and marked */
3561 if (GNUNET_REGEX_INITIAL_BYTES > strlen (state->proof) &&
3562 GNUNET_NO == state->accepting)
3563 /* not directly reachable */
3566 mark_as_reachable (state, hm);
3572 * Iterator over hash map entries.
3573 * Calling the callback on the ones marked as reachables.
3575 * @param cls closure
3576 * @param key current key code
3577 * @param value value in the hash map
3578 * @return #GNUNET_YES if we should continue to iterate,
3579 * #GNUNET_NO if not.
3582 iterate_reachables (void *cls,
3583 const struct GNUNET_HashCode *key,
3586 struct client_iterator *ci = cls;
3587 struct temporal_state_store *state = value;
3589 if (GNUNET_YES == state->reachable)
3591 ci->iterator (ci->iterator_cls, key,
3592 state->proof, state->accepting,
3593 state->num_edges, state->edges);
3595 GNUNET_free (state->edges);
3596 GNUNET_free (state->proof);
3597 GNUNET_free (state);
3603 * Iterate over all edges of automaton 'a' that are reachable from a state with
3604 * a proof of at least GNUNET_REGEX_INITIAL_BYTES characters.
3606 * Call the iterator for each such edge.
3608 * @param a automaton.
3609 * @param iterator iterator called for each reachable edge.
3610 * @param iterator_cls closure.
3613 REGEX_INTERNAL_iterate_reachable_edges (struct REGEX_INTERNAL_Automaton *a,
3614 REGEX_INTERNAL_KeyIterator iterator,
3617 struct GNUNET_CONTAINER_MultiHashMap *hm;
3618 struct client_iterator ci;
3620 hm = GNUNET_CONTAINER_multihashmap_create (a->state_count * 2, GNUNET_NO);
3621 ci.iterator = iterator;
3622 ci.iterator_cls = iterator_cls;
3624 REGEX_INTERNAL_iterate_all_edges (a, &store_all_states, hm);
3625 GNUNET_CONTAINER_multihashmap_iterate (hm, &reachability_iterator, hm);
3626 GNUNET_CONTAINER_multihashmap_iterate (hm, &iterate_reachables, &ci);
3628 GNUNET_CONTAINER_multihashmap_destroy (hm);
3632 /* end of regex_internal.c */