2 This file is part of GNUnet
3 (C) 2012 Christian Grothoff (and other contributing authors)
5 GNUnet is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 3, or (at your
8 option) any later version.
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with GNUnet; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
21 * @file src/regex/regex_internal.h
22 * @brief common internal definitions for regex library
23 * @author Maximilian Szengel
25 #ifndef REGEX_INTERNAL_H
26 #define REGEX_INTERNAL_H
28 #include "gnunet_regex_lib.h"
33 #if 0 /* keep Emacsens' auto-indent happy */
39 * char array of literals that are allowed inside a regex (apart from the
42 #define ALLOWED_LITERALS "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
46 * Transition between two states. Each state can have 0-n transitions. If label
47 * is 0, this is considered to be an epsilon transition.
49 struct GNUNET_REGEX_Transition
52 * This is a linked list.
54 struct GNUNET_REGEX_Transition *prev;
57 * This is a linked list.
59 struct GNUNET_REGEX_Transition *next;
62 * Unique id of this transition.
67 * Label for this transition. This is basically the edge label for the graph.
72 * State to which this transition leads.
74 struct GNUNET_REGEX_State *to_state;
77 * State from which this transition origins.
79 struct GNUNET_REGEX_State *from_state;
82 * Mark this transition. For example when reversing the automaton.
89 * A state. Can be used in DFA and NFA automatons.
91 struct GNUNET_REGEX_State
94 * This is a linked list.
96 struct GNUNET_REGEX_State *prev;
99 * This is a linked list.
101 struct GNUNET_REGEX_State *next;
109 * If this is an accepting state or not.
114 * Marking of the state. This is used for marking all visited states when
115 * traversing all states of an automaton and for cases where the state id
116 * cannot be used (dfa minimization).
121 * Marking the state as contained. This is used for checking, if the state is
122 * contained in a set in constant time
127 * Marking the state as part of an SCC (Strongly Connected Component). All
128 * states with the same scc_id are part of the same SCC. scc_id is 0, if state
129 * is not a part of any SCC.
134 * Used for SCC detection.
139 * Used for SCC detection.
144 * Human readable name of the automaton. Used for debugging and graph
152 struct GNUNET_HashCode hash;
155 * State ID for proof creation.
157 unsigned int proof_id;
160 * Proof for this state.
165 * Number of transitions from this state to other states.
167 unsigned int transition_count;
170 * DLL of transitions.
172 struct GNUNET_REGEX_Transition *transitions_head;
175 * DLL of transitions.
177 struct GNUNET_REGEX_Transition *transitions_tail;
180 * Set of states on which this state is based on. Used when creating a DFA out
181 * of several NFA states.
183 struct GNUNET_REGEX_StateSet *nfa_set;
188 * Type of an automaton.
190 enum GNUNET_REGEX_AutomatonType
198 * Automaton representation.
200 struct GNUNET_REGEX_Automaton
203 * Linked list of NFAs used for partial NFA creation.
205 struct GNUNET_REGEX_Automaton *prev;
208 * Linked list of NFAs used for partial NFA creation.
210 struct GNUNET_REGEX_Automaton *next;
213 * First state of the automaton. This is mainly used for constructing an NFA,
214 * where each NFA itself consists of one or more NFAs linked together.
216 struct GNUNET_REGEX_State *start;
219 * End state of the partial NFA. This is undefined for DFAs
221 struct GNUNET_REGEX_State *end;
224 * Number of states in the automaton.
226 unsigned int state_count;
231 struct GNUNET_REGEX_State *states_head;
236 struct GNUNET_REGEX_State *states_tail;
239 * Type of the automaton.
241 enum GNUNET_REGEX_AutomatonType type;
249 * Canonical regex (result of RX->NFA->DFA->RX)
251 char *canonical_regex;
256 * Function that is called with each state, when traversing an automaton.
258 * @param cls closure.
259 * @param count current count of the state, from 0 to a->state_count -1.
262 typedef void (*GNUNET_REGEX_traverse_action) (void *cls, unsigned int count,
263 struct GNUNET_REGEX_State * s);
267 * Traverses the given automaton from it's start state, visiting all reachable
268 * states and calling 'action' on each one of them.
270 * @param a automaton.
271 * @param action action to be performed on each state.
272 * @param action_cls closure for action
275 GNUNET_REGEX_automaton_traverse (struct GNUNET_REGEX_Automaton *a,
276 GNUNET_REGEX_traverse_action action,
281 * Get the canonical regex of the given automaton.
282 * When constructing the automaton a proof is computed for each state,
283 * consisting of the regular expression leading to this state. A complete
284 * regex for the automaton can be computed by combining these proofs.
285 * As of now this function is only useful for testing.
287 * @param a automaton for which the canonical regex should be returned.
292 GNUNET_REGEX_get_canonical_regex (struct GNUNET_REGEX_Automaton *a);
296 * Generate a (pseudo) random regular expression of length 'rx_length', as well
297 * as a (optional) string that will be matched by the generated regex. The
298 * returned regex needs to be freed.
300 * @param rx_length length of the random regex.
301 * @param matching_str (optional) pointer to a string that will contain a string
302 * that will be matched by the generated regex, if
303 * 'matching_str' pointer was not NULL.
305 * @return NULL if 'rx_length' is 0, a random regex of length 'rx_length', which
306 * needs to be freed, otherwise.
309 GNUNET_REGEX_generate_random_regex (size_t rx_length, char *matching_str);
313 * Generate a random string of maximum length 'max_len' that only contains literals allowed
314 * in a regular expression. The string might be 0 chars long but is garantueed
315 * to be shorter or equal to 'max_len'.
317 * @param max_len maximum length of the string that should be generated.
319 * @return random string that needs to be freed.
322 GNUNET_REGEX_generate_random_string (size_t max_len);
324 #if 0 /* keep Emacsens' auto-indent happy */