2 This file is part of GNUnet
3 Copyright (C) 2012 GNUnet e.V.
5 GNUnet is free software: you can redistribute it and/or modify it
6 under the terms of the GNU Affero General Public License as published
7 by the Free Software Foundation, either version 3 of the License,
8 or (at your option) any later version.
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Affero General Public License for more details.
16 * @file src/regex/regex_internal.h
17 * @brief common internal definitions for regex library.
18 * @author Maximilian Szengel
20 #ifndef REGEX_INTERNAL_H
21 #define REGEX_INTERNAL_H
23 #include "regex_internal_lib.h"
28 #if 0 /* keep Emacsens' auto-indent happy */
34 * char array of literals that are allowed inside a regex (apart from the
37 #define ALLOWED_LITERALS "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
41 * Transition between two states. Transitions are stored at the states from
42 * which they origin ('from_state'). Each state can have 0-n transitions.
43 * If label is NULL, this is considered to be an epsilon transition.
45 struct REGEX_INTERNAL_Transition
48 * This is a linked list.
50 struct REGEX_INTERNAL_Transition *prev;
53 * This is a linked list.
55 struct REGEX_INTERNAL_Transition *next;
58 * Unique id of this transition.
63 * Label for this transition. This is basically the edge label for the graph.
68 * State to which this transition leads.
70 struct REGEX_INTERNAL_State *to_state;
73 * State from which this transition origins.
75 struct REGEX_INTERNAL_State *from_state;
80 * A state. Can be used in DFA and NFA automatons.
82 struct REGEX_INTERNAL_State;
88 struct REGEX_INTERNAL_StateSet
93 struct REGEX_INTERNAL_State **states;
96 * Number of entries in *use* in the 'states' array.
101 * Length of the 'states' array.
108 * A state. Can be used in DFA and NFA automatons.
110 struct REGEX_INTERNAL_State
113 * This is a linked list to keep states in an automaton.
115 struct REGEX_INTERNAL_State *prev;
118 * This is a linked list to keep states in an automaton.
120 struct REGEX_INTERNAL_State *next;
123 * This is a multi DLL for StateSet_MDLL.
125 struct REGEX_INTERNAL_State *prev_SS;
128 * This is a multi DLL for StateSet_MDLL.
130 struct REGEX_INTERNAL_State *next_SS;
133 * This is a multi DLL for StateSet_MDLL Stack.
135 struct REGEX_INTERNAL_State *prev_ST;
138 * This is a multi DLL for StateSet_MDLL Stack.
140 struct REGEX_INTERNAL_State *next_ST;
148 * Unique state id that is used for traversing the automaton. It is guaranteed
149 * to be > 0 and < state_count.
151 unsigned int traversal_id;
154 * If this is an accepting state or not.
159 * Marking of the state. This is used for marking all visited states when
160 * traversing all states of an automaton and for cases where the state id
161 * cannot be used (dfa minimization).
166 * Marking the state as contained. This is used for checking, if the state is
167 * contained in a set in constant time.
172 * Marking the state as part of an SCC (Strongly Connected Component). All
173 * states with the same scc_id are part of the same SCC. scc_id is 0, if state
174 * is not a part of any SCC.
179 * Used for SCC detection.
184 * Used for SCC detection.
189 * Human readable name of the state. Used for debugging and graph
197 struct GNUNET_HashCode hash;
200 * Linear state ID accquired by depth-first-search. This ID should be used for
201 * storing information about the state in an array, because the 'id' of the
202 * state is not guaranteed to be linear. The 'dfs_id' is guaranteed to be > 0
203 * and < 'state_count'.
208 * Proof for this state.
213 * Number of transitions from this state to other states.
215 unsigned int transition_count;
218 * DLL of transitions.
220 struct REGEX_INTERNAL_Transition *transitions_head;
223 * DLL of transitions.
225 struct REGEX_INTERNAL_Transition *transitions_tail;
228 * Number of incoming transitions. Used for compressing DFA paths.
230 unsigned int incoming_transition_count;
233 * Set of states on which this state is based on. Used when creating a DFA out
234 * of several NFA states.
236 struct REGEX_INTERNAL_StateSet nfa_set;
241 * Type of an automaton.
243 enum REGEX_INTERNAL_AutomatonType
251 * Automaton representation.
253 struct REGEX_INTERNAL_Automaton
256 * Linked list of NFAs used for partial NFA creation.
258 struct REGEX_INTERNAL_Automaton *prev;
261 * Linked list of NFAs used for partial NFA creation.
263 struct REGEX_INTERNAL_Automaton *next;
266 * First state of the automaton. This is mainly used for constructing an NFA,
267 * where each NFA itself consists of one or more NFAs linked together.
269 struct REGEX_INTERNAL_State *start;
272 * End state of the partial NFA. This is undefined for DFAs
274 struct REGEX_INTERNAL_State *end;
277 * Number of states in the automaton.
279 unsigned int state_count;
284 struct REGEX_INTERNAL_State *states_head;
289 struct REGEX_INTERNAL_State *states_tail;
292 * Type of the automaton.
294 enum REGEX_INTERNAL_AutomatonType type;
302 * Canonical regex (result of RX->NFA->DFA->RX)
304 char *canonical_regex;
307 * GNUNET_YES, if multi strides have been added to the Automaton.
314 * Construct an NFA by parsing the regex string of length 'len'.
316 * @param regex regular expression string.
317 * @param len length of the string.
319 * @return NFA, needs to be freed using REGEX_INTERNAL_automaton_destroy.
321 struct REGEX_INTERNAL_Automaton *
322 REGEX_INTERNAL_construct_nfa (const char *regex, const size_t len);
326 * Function that get's passed to automaton traversal and is called before each
327 * next traversal from state 's' using transition 't' to check if traversal
328 * should proceed. Return GNUNET_NO to stop traversal or GNUNET_YES to continue.
330 * @param cls closure for the check.
331 * @param s current state in the traversal.
332 * @param t current transition from state 's' that will be used for the next
335 * @return GNUNET_YES to proceed traversal, GNUNET_NO to stop.
337 typedef int (*REGEX_INTERNAL_traverse_check) (void *cls,
338 struct REGEX_INTERNAL_State * s,
339 struct REGEX_INTERNAL_Transition * t);
343 * Function that is called with each state, when traversing an automaton.
345 * @param cls closure.
346 * @param count current count of the state, from 0 to a->state_count -1.
349 typedef void (*REGEX_INTERNAL_traverse_action) (void *cls,
350 const unsigned int count,
351 struct REGEX_INTERNAL_State * s);
355 * Traverses the given automaton using depth-first-search (DFS) from it's start
356 * state, visiting all reachable states and calling 'action' on each one of
359 * @param a automaton to be traversed.
360 * @param start start state, pass a->start or NULL to traverse the whole automaton.
361 * @param check function that is checked before advancing on each transition
363 * @param check_cls closure for check.
364 * @param action action to be performed on each state.
365 * @param action_cls closure for action
368 REGEX_INTERNAL_automaton_traverse (const struct REGEX_INTERNAL_Automaton *a,
369 struct REGEX_INTERNAL_State *start,
370 REGEX_INTERNAL_traverse_check check,
372 REGEX_INTERNAL_traverse_action action,
376 * Get the canonical regex of the given automaton.
377 * When constructing the automaton a proof is computed for each state,
378 * consisting of the regular expression leading to this state. A complete
379 * regex for the automaton can be computed by combining these proofs.
380 * As of now this function is only useful for testing.
382 * @param a automaton for which the canonical regex should be returned.
384 * @return canonical regex string.
387 REGEX_INTERNAL_get_canonical_regex (struct REGEX_INTERNAL_Automaton *a);
391 * Get the number of transitions that are contained in the given automaton.
393 * @param a automaton for which the number of transitions should be returned.
395 * @return number of transitions in the given automaton.
398 REGEX_INTERNAL_get_transition_count (struct REGEX_INTERNAL_Automaton *a);
402 * Context that contains an id counter for states and transitions as well as a
403 * DLL of automatons used as a stack for NFA construction.
405 struct REGEX_INTERNAL_Context
410 unsigned int state_id;
413 * Unique transition id.
415 unsigned int transition_id;
418 * DLL of REGEX_INTERNAL_Automaton's used as a stack.
420 struct REGEX_INTERNAL_Automaton *stack_head;
423 * DLL of REGEX_INTERNAL_Automaton's used as a stack.
425 struct REGEX_INTERNAL_Automaton *stack_tail;
430 * Adds multi-strided transitions to the given 'dfa'.
432 * @param regex_ctx regex context needed to add transitions to the automaton.
433 * @param dfa DFA to which the multi strided transitions should be added.
434 * @param stride_len length of the strides.
437 REGEX_INTERNAL_dfa_add_multi_strides (struct REGEX_INTERNAL_Context *regex_ctx,
438 struct REGEX_INTERNAL_Automaton *dfa,
439 const unsigned int stride_len);
443 #if 0 /* keep Emacsens' auto-indent happy */