2 This file is part of GNUnet
3 (C) 2012 Christian Grothoff (and other contributing authors)
5 GNUnet is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 3, or (at your
8 option) any later version.
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with GNUnet; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
21 * @file src/regex/regex_internal.h
22 * @brief common internal definitions for regex library.
23 * @author Maximilian Szengel
25 #ifndef REGEX_INTERNAL_H
26 #define REGEX_INTERNAL_H
28 #include "gnunet_regex_lib.h"
33 #if 0 /* keep Emacsens' auto-indent happy */
39 * char array of literals that are allowed inside a regex (apart from the
42 #define ALLOWED_LITERALS "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
46 * Transition between two states. Transitions are stored at the states from
47 * which they origin ('from_state'). Each state can have 0-n transitions.
48 * If label is 0, this is considered to be an epsilon transition.
50 struct GNUNET_REGEX_Transition
53 * This is a linked list.
55 struct GNUNET_REGEX_Transition *prev;
58 * This is a linked list.
60 struct GNUNET_REGEX_Transition *next;
63 * Unique id of this transition.
68 * Label for this transition. This is basically the edge label for the graph.
73 * State to which this transition leads.
75 struct GNUNET_REGEX_State *to_state;
78 * State from which this transition origins.
80 struct GNUNET_REGEX_State *from_state;
85 * A state. Can be used in DFA and NFA automatons.
87 struct GNUNET_REGEX_State
90 * This is a linked list to keep states in an automaton.
92 struct GNUNET_REGEX_State *prev;
95 * This is a linked list to keep states in an automaton.
97 struct GNUNET_REGEX_State *next;
100 * This is a multi DLL for StateSet_MDLL.
102 struct GNUNET_REGEX_State *prev_SS;
105 * This is a multi DLL for StateSet_MDLL.
107 struct GNUNET_REGEX_State *next_SS;
110 * This is a multi DLL for StateSet_MDLL Stack.
112 struct GNUNET_REGEX_State *prev_ST;
115 * This is a multi DLL for StateSet_MDLL Stack.
117 struct GNUNET_REGEX_State *next_ST;
125 * Unique state id that is used for traversing the automaton. It is guaranteed
126 * to be > 0 and < state_count.
128 unsigned int traversal_id;
131 * If this is an accepting state or not.
136 * Marking of the state. This is used for marking all visited states when
137 * traversing all states of an automaton and for cases where the state id
138 * cannot be used (dfa minimization).
143 * Marking the state as contained. This is used for checking, if the state is
144 * contained in a set in constant time.
149 * Marking the state as part of an SCC (Strongly Connected Component). All
150 * states with the same scc_id are part of the same SCC. scc_id is 0, if state
151 * is not a part of any SCC.
156 * Used for SCC detection.
161 * Used for SCC detection.
166 * Human readable name of the state. Used for debugging and graph
174 struct GNUNET_HashCode hash;
177 * Linear state ID accquired by depth-first-search. This ID should be used for
178 * storing information about the state in an array, because the 'id' of the
179 * state is not guaranteed to be linear. The 'dfs_id' is guaranteed to be > 0
180 * and < 'state_count'.
185 * Proof for this state.
190 * Number of transitions from this state to other states.
192 unsigned int transition_count;
195 * DLL of transitions.
197 struct GNUNET_REGEX_Transition *transitions_head;
200 * DLL of transitions.
202 struct GNUNET_REGEX_Transition *transitions_tail;
205 * Number of incoming transitions. Used for compressing DFA paths.
207 unsigned int incoming_transition_count;
210 * Set of states on which this state is based on. Used when creating a DFA out
211 * of several NFA states.
213 struct GNUNET_REGEX_StateSet *nfa_set;
218 * Type of an automaton.
220 enum GNUNET_REGEX_AutomatonType
228 * Automaton representation.
230 struct GNUNET_REGEX_Automaton
233 * Linked list of NFAs used for partial NFA creation.
235 struct GNUNET_REGEX_Automaton *prev;
238 * Linked list of NFAs used for partial NFA creation.
240 struct GNUNET_REGEX_Automaton *next;
243 * First state of the automaton. This is mainly used for constructing an NFA,
244 * where each NFA itself consists of one or more NFAs linked together.
246 struct GNUNET_REGEX_State *start;
249 * End state of the partial NFA. This is undefined for DFAs
251 struct GNUNET_REGEX_State *end;
254 * Number of states in the automaton.
256 unsigned int state_count;
261 struct GNUNET_REGEX_State *states_head;
266 struct GNUNET_REGEX_State *states_tail;
269 * Type of the automaton.
271 enum GNUNET_REGEX_AutomatonType type;
279 * Canonical regex (result of RX->NFA->DFA->RX)
281 char *canonical_regex;
284 * GNUNET_YES, if multi strides have been added to the Automaton.
291 * Construct an NFA by parsing the regex string of length 'len'.
293 * @param regex regular expression string.
294 * @param len length of the string.
296 * @return NFA, needs to be freed using GNUNET_REGEX_automaton_destroy.
298 struct GNUNET_REGEX_Automaton *
299 GNUNET_REGEX_construct_nfa (const char *regex, const size_t len);
303 * Function that get's passed to automaton traversal and is called before each
304 * next traversal from state 's' using transition 't' to check if traversal
305 * should proceed. Return GNUNET_NO to stop traversal or GNUNET_YES to continue.
307 * @param cls closure for the check.
308 * @param s current state in the traversal.
309 * @param t current transition from state 's' that will be used for the next
312 * @return GNUNET_YES to proceed traversal, GNUNET_NO to stop.
314 typedef int (*GNUNET_REGEX_traverse_check) (void *cls,
315 struct GNUNET_REGEX_State * s,
316 struct GNUNET_REGEX_Transition * t);
320 * Function that is called with each state, when traversing an automaton.
322 * @param cls closure.
323 * @param count current count of the state, from 0 to a->state_count -1.
326 typedef void (*GNUNET_REGEX_traverse_action) (void *cls,
327 const unsigned int count,
328 struct GNUNET_REGEX_State * s);
332 * Traverses the given automaton using depth-first-search (DFS) from it's start
333 * state, visiting all reachable states and calling 'action' on each one of
336 * @param a automaton to be traversed.
337 * @param start start state, pass a->start or NULL to traverse the whole automaton.
338 * @param check function that is checked before advancing on each transition
340 * @param check_cls closure for check.
341 * @param action action to be performed on each state.
342 * @param action_cls closure for action
345 GNUNET_REGEX_automaton_traverse (const struct GNUNET_REGEX_Automaton *a,
346 struct GNUNET_REGEX_State *start,
347 GNUNET_REGEX_traverse_check check,
349 GNUNET_REGEX_traverse_action action,
353 * Get the canonical regex of the given automaton.
354 * When constructing the automaton a proof is computed for each state,
355 * consisting of the regular expression leading to this state. A complete
356 * regex for the automaton can be computed by combining these proofs.
357 * As of now this function is only useful for testing.
359 * @param a automaton for which the canonical regex should be returned.
361 * @return canonical regex string.
364 GNUNET_REGEX_get_canonical_regex (struct GNUNET_REGEX_Automaton *a);
368 * Get the number of transitions that are contained in the given automaton.
370 * @param a automaton for which the number of transitions should be returned.
372 * @return number of transitions in the given automaton.
375 GNUNET_REGEX_get_transition_count (struct GNUNET_REGEX_Automaton *a);
379 * Context that contains an id counter for states and transitions as well as a
380 * DLL of automatons used as a stack for NFA construction.
382 struct GNUNET_REGEX_Context
387 unsigned int state_id;
390 * Unique transition id.
392 unsigned int transition_id;
395 * DLL of GNUNET_REGEX_Automaton's used as a stack.
397 struct GNUNET_REGEX_Automaton *stack_head;
400 * DLL of GNUNET_REGEX_Automaton's used as a stack.
402 struct GNUNET_REGEX_Automaton *stack_tail;
407 * Adds multi-strided transitions to the given 'dfa'.
409 * @param regex_ctx regex context needed to add transitions to the automaton.
410 * @param dfa DFA to which the multi strided transitions should be added.
411 * @param stride_len length of the strides.
414 GNUNET_REGEX_dfa_add_multi_strides (struct GNUNET_REGEX_Context *regex_ctx,
415 struct GNUNET_REGEX_Automaton *dfa,
416 const unsigned int stride_len);
420 * Generate a (pseudo) random regular expression of length 'rx_length', as well
421 * as a (optional) string that will be matched by the generated regex. The
422 * returned regex needs to be freed.
424 * @param rx_length length of the random regex.
425 * @param matching_str (optional) pointer to a string that will contain a string
426 * that will be matched by the generated regex, if
427 * 'matching_str' pointer was not NULL.
429 * @return NULL if 'rx_length' is 0, a random regex of length 'rx_length', which
430 * needs to be freed, otherwise.
433 GNUNET_REGEX_generate_random_regex (size_t rx_length, char *matching_str);
437 * Generate a random string of maximum length 'max_len' that only contains literals allowed
438 * in a regular expression. The string might be 0 chars long but is garantueed
439 * to be shorter or equal to 'max_len'.
441 * @param max_len maximum length of the string that should be generated.
443 * @return random string that needs to be freed.
446 GNUNET_REGEX_generate_random_string (size_t max_len);
449 #if 0 /* keep Emacsens' auto-indent happy */