/*
This file is part of GNUnet
- (C) 2012 Christian Grothoff (and other contributing authors)
+ Copyright (C) 2012 GNUnet e.V.
- GNUnet is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3, or (at your
- option) any later version.
+ GNUnet is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Affero General Public License as published
+ by the Free Software Foundation, either version 3 of the License,
+ or (at your option) any later version.
GNUnet is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
+ Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
- You should have received a copy of the GNU General Public License
- along with GNUnet; see the file COPYING. If not, write to the
- Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- Boston, MA 02111-1307, USA.
+ SPDX-License-Identifier: AGPL3.0-or-later
*/
/**
* @file src/regex/regex_internal.h
- * @brief common internal definitions for regex library
+ * @brief common internal definitions for regex library.
* @author Maximilian Szengel
*/
#ifndef REGEX_INTERNAL_H
#define REGEX_INTERNAL_H
-#include "gnunet_regex_lib.h"
+#include "regex_internal_lib.h"
#ifdef __cplusplus
extern "C"
/**
- * Transition between two states. Each state can have 0-n transitions. If label
- * is 0, this is considered to be an epsilon transition.
+ * Transition between two states. Transitions are stored at the states from
+ * which they origin ('from_state'). Each state can have 0-n transitions.
+ * If label is NULL, this is considered to be an epsilon transition.
*/
-struct GNUNET_REGEX_Transition
+struct REGEX_INTERNAL_Transition
{
/**
* This is a linked list.
*/
- struct GNUNET_REGEX_Transition *prev;
+ struct REGEX_INTERNAL_Transition *prev;
/**
* This is a linked list.
*/
- struct GNUNET_REGEX_Transition *next;
+ struct REGEX_INTERNAL_Transition *next;
/**
* Unique id of this transition.
/**
* State to which this transition leads.
*/
- struct GNUNET_REGEX_State *to_state;
+ struct REGEX_INTERNAL_State *to_state;
/**
* State from which this transition origins.
*/
- struct GNUNET_REGEX_State *from_state;
+ struct REGEX_INTERNAL_State *from_state;
};
/**
* A state. Can be used in DFA and NFA automatons.
*/
-struct GNUNET_REGEX_State
+struct REGEX_INTERNAL_State;
+
+
+/**
+ * Set of states.
+ */
+struct REGEX_INTERNAL_StateSet
{
/**
- * This is a linked list.
+ * Array of states.
*/
- struct GNUNET_REGEX_State *prev;
+ struct REGEX_INTERNAL_State **states;
/**
- * This is a linked list.
+ * Number of entries in *use* in the 'states' array.
+ */
+ unsigned int off;
+
+ /**
+ * Length of the 'states' array.
+ */
+ unsigned int size;
+};
+
+
+/**
+ * A state. Can be used in DFA and NFA automatons.
+ */
+struct REGEX_INTERNAL_State
+{
+ /**
+ * This is a linked list to keep states in an automaton.
+ */
+ struct REGEX_INTERNAL_State *prev;
+
+ /**
+ * This is a linked list to keep states in an automaton.
+ */
+ struct REGEX_INTERNAL_State *next;
+
+ /**
+ * This is a multi DLL for StateSet_MDLL.
+ */
+ struct REGEX_INTERNAL_State *prev_SS;
+
+ /**
+ * This is a multi DLL for StateSet_MDLL.
*/
- struct GNUNET_REGEX_State *next;
+ struct REGEX_INTERNAL_State *next_SS;
+
+ /**
+ * This is a multi DLL for StateSet_MDLL Stack.
+ */
+ struct REGEX_INTERNAL_State *prev_ST;
+
+ /**
+ * This is a multi DLL for StateSet_MDLL Stack.
+ */
+ struct REGEX_INTERNAL_State *next_ST;
/**
* Unique state id.
/**
* Marking the state as contained. This is used for checking, if the state is
- * contained in a set in constant time
+ * contained in a set in constant time.
*/
int contained;
int lowlink;
/**
- * Human readable name of the automaton. Used for debugging and graph
+ * Human readable name of the state. Used for debugging and graph
* creation.
*/
char *name;
/**
* DLL of transitions.
*/
- struct GNUNET_REGEX_Transition *transitions_head;
+ struct REGEX_INTERNAL_Transition *transitions_head;
/**
* DLL of transitions.
*/
- struct GNUNET_REGEX_Transition *transitions_tail;
+ struct REGEX_INTERNAL_Transition *transitions_tail;
+
+ /**
+ * Number of incoming transitions. Used for compressing DFA paths.
+ */
+ unsigned int incoming_transition_count;
/**
* Set of states on which this state is based on. Used when creating a DFA out
* of several NFA states.
*/
- struct GNUNET_REGEX_StateSet *nfa_set;
+ struct REGEX_INTERNAL_StateSet nfa_set;
};
/**
* Type of an automaton.
*/
-enum GNUNET_REGEX_AutomatonType
+enum REGEX_INTERNAL_AutomatonType
{
NFA,
DFA
/**
* Automaton representation.
*/
-struct GNUNET_REGEX_Automaton
+struct REGEX_INTERNAL_Automaton
{
/**
* Linked list of NFAs used for partial NFA creation.
*/
- struct GNUNET_REGEX_Automaton *prev;
+ struct REGEX_INTERNAL_Automaton *prev;
/**
* Linked list of NFAs used for partial NFA creation.
*/
- struct GNUNET_REGEX_Automaton *next;
+ struct REGEX_INTERNAL_Automaton *next;
/**
* First state of the automaton. This is mainly used for constructing an NFA,
* where each NFA itself consists of one or more NFAs linked together.
*/
- struct GNUNET_REGEX_State *start;
+ struct REGEX_INTERNAL_State *start;
/**
* End state of the partial NFA. This is undefined for DFAs
*/
- struct GNUNET_REGEX_State *end;
+ struct REGEX_INTERNAL_State *end;
/**
* Number of states in the automaton.
/**
* DLL of states.
*/
- struct GNUNET_REGEX_State *states_head;
+ struct REGEX_INTERNAL_State *states_head;
/**
* DLL of states
*/
- struct GNUNET_REGEX_State *states_tail;
+ struct REGEX_INTERNAL_State *states_tail;
/**
* Type of the automaton.
*/
- enum GNUNET_REGEX_AutomatonType type;
+ enum REGEX_INTERNAL_AutomatonType type;
/**
* Regex
};
+/**
+ * Construct an NFA by parsing the regex string of length 'len'.
+ *
+ * @param regex regular expression string.
+ * @param len length of the string.
+ *
+ * @return NFA, needs to be freed using REGEX_INTERNAL_automaton_destroy.
+ */
+struct REGEX_INTERNAL_Automaton *
+REGEX_INTERNAL_construct_nfa (const char *regex, const size_t len);
+
+
/**
* Function that get's passed to automaton traversal and is called before each
* next traversal from state 's' using transition 't' to check if traversal
*
* @return GNUNET_YES to proceed traversal, GNUNET_NO to stop.
*/
-typedef int (*GNUNET_REGEX_traverse_check) (void *cls,
- struct GNUNET_REGEX_State * s,
- struct GNUNET_REGEX_Transition * t);
+typedef int (*REGEX_INTERNAL_traverse_check) (void *cls,
+ struct REGEX_INTERNAL_State * s,
+ struct REGEX_INTERNAL_Transition * t);
/**
* @param count current count of the state, from 0 to a->state_count -1.
* @param s state.
*/
-typedef void (*GNUNET_REGEX_traverse_action) (void *cls,
+typedef void (*REGEX_INTERNAL_traverse_action) (void *cls,
const unsigned int count,
- struct GNUNET_REGEX_State * s);
+ struct REGEX_INTERNAL_State * s);
/**
* @param action_cls closure for action
*/
void
-GNUNET_REGEX_automaton_traverse (const struct GNUNET_REGEX_Automaton *a,
- struct GNUNET_REGEX_State *start,
- GNUNET_REGEX_traverse_check check,
+REGEX_INTERNAL_automaton_traverse (const struct REGEX_INTERNAL_Automaton *a,
+ struct REGEX_INTERNAL_State *start,
+ REGEX_INTERNAL_traverse_check check,
void *check_cls,
- GNUNET_REGEX_traverse_action action,
+ REGEX_INTERNAL_traverse_action action,
void *action_cls);
/**
* @return canonical regex string.
*/
const char *
-GNUNET_REGEX_get_canonical_regex (struct GNUNET_REGEX_Automaton *a);
+REGEX_INTERNAL_get_canonical_regex (struct REGEX_INTERNAL_Automaton *a);
/**
* @return number of transitions in the given automaton.
*/
unsigned int
-GNUNET_REGEX_get_transition_count (struct GNUNET_REGEX_Automaton *a);
+REGEX_INTERNAL_get_transition_count (struct REGEX_INTERNAL_Automaton *a);
/**
* Context that contains an id counter for states and transitions as well as a
* DLL of automatons used as a stack for NFA construction.
*/
-struct GNUNET_REGEX_Context
+struct REGEX_INTERNAL_Context
{
/**
* Unique state id.
unsigned int transition_id;
/**
- * DLL of GNUNET_REGEX_Automaton's used as a stack.
+ * DLL of REGEX_INTERNAL_Automaton's used as a stack.
*/
- struct GNUNET_REGEX_Automaton *stack_head;
+ struct REGEX_INTERNAL_Automaton *stack_head;
/**
- * DLL of GNUNET_REGEX_Automaton's used as a stack.
+ * DLL of REGEX_INTERNAL_Automaton's used as a stack.
*/
- struct GNUNET_REGEX_Automaton *stack_tail;
+ struct REGEX_INTERNAL_Automaton *stack_tail;
};
* @param stride_len length of the strides.
*/
void
-GNUNET_REGEX_add_multi_strides_to_dfa (struct GNUNET_REGEX_Context *regex_ctx,
- struct GNUNET_REGEX_Automaton *dfa,
- const unsigned int stride_len);
+REGEX_INTERNAL_dfa_add_multi_strides (struct REGEX_INTERNAL_Context *regex_ctx,
+ struct REGEX_INTERNAL_Automaton *dfa,
+ const unsigned int stride_len);
-/**
- * Generate a (pseudo) random regular expression of length 'rx_length', as well
- * as a (optional) string that will be matched by the generated regex. The
- * returned regex needs to be freed.
- *
- * @param rx_length length of the random regex.
- * @param matching_str (optional) pointer to a string that will contain a string
- * that will be matched by the generated regex, if
- * 'matching_str' pointer was not NULL.
- *
- * @return NULL if 'rx_length' is 0, a random regex of length 'rx_length', which
- * needs to be freed, otherwise.
- */
-char *
-GNUNET_REGEX_generate_random_regex (size_t rx_length, char *matching_str);
-
-
-/**
- * Generate a random string of maximum length 'max_len' that only contains literals allowed
- * in a regular expression. The string might be 0 chars long but is garantueed
- * to be shorter or equal to 'max_len'.
- *
- * @param max_len maximum length of the string that should be generated.
- *
- * @return random string that needs to be freed.
- */
-char *
-GNUNET_REGEX_generate_random_string (size_t max_len);
-
#if 0 /* keep Emacsens' auto-indent happy */
{