X-Git-Url: https://git.librecmc.org/?a=blobdiff_plain;f=src%2Fregex%2Fregex_internal.h;h=69a8971d5db9956424024fac28ebf9d842992ab0;hb=eb420e4b0f23c6ddb079cd40bc76b4f2a35bdbb1;hp=20e81d93cd8656f051a7f1a2b39e9e2c0d6ed704;hpb=09f394f4f8fc77de47857adf9b8630136d930005;p=oweals%2Fgnunet.git diff --git a/src/regex/regex_internal.h b/src/regex/regex_internal.h index 20e81d93c..69a8971d5 100644 --- a/src/regex/regex_internal.h +++ b/src/regex/regex_internal.h @@ -19,13 +19,13 @@ */ /** * @file src/regex/regex_internal.h - * @brief common internal definitions for regex library + * @brief common internal definitions for regex library. * @author Maximilian Szengel */ #ifndef REGEX_INTERNAL_H #define REGEX_INTERNAL_H -#include "gnunet_regex_lib.h" +#include "regex_internal_lib.h" #ifdef __cplusplus extern "C" @@ -43,20 +43,21 @@ extern "C" /** - * Transition between two states. Each state can have 0-n transitions. If label - * is 0, this is considered to be an epsilon transition. + * Transition between two states. Transitions are stored at the states from + * which they origin ('from_state'). Each state can have 0-n transitions. + * If label is NULL, this is considered to be an epsilon transition. */ -struct GNUNET_REGEX_Transition +struct REGEX_INTERNAL_Transition { /** * This is a linked list. */ - struct GNUNET_REGEX_Transition *prev; + struct REGEX_INTERNAL_Transition *prev; /** * This is a linked list. */ - struct GNUNET_REGEX_Transition *next; + struct REGEX_INTERNAL_Transition *next; /** * Unique id of this transition. @@ -71,29 +72,77 @@ struct GNUNET_REGEX_Transition /** * State to which this transition leads. */ - struct GNUNET_REGEX_State *to_state; + struct REGEX_INTERNAL_State *to_state; /** * State from which this transition origins. */ - struct GNUNET_REGEX_State *from_state; + struct REGEX_INTERNAL_State *from_state; }; /** * A state. Can be used in DFA and NFA automatons. */ -struct GNUNET_REGEX_State +struct REGEX_INTERNAL_State; + + +/** + * Set of states. + */ +struct REGEX_INTERNAL_StateSet { /** - * This is a linked list. + * Array of states. */ - struct GNUNET_REGEX_State *prev; + struct REGEX_INTERNAL_State **states; /** - * This is a linked list. + * Number of entries in *use* in the 'states' array. + */ + unsigned int off; + + /** + * Length of the 'states' array. + */ + unsigned int size; +}; + + +/** + * A state. Can be used in DFA and NFA automatons. + */ +struct REGEX_INTERNAL_State +{ + /** + * This is a linked list to keep states in an automaton. + */ + struct REGEX_INTERNAL_State *prev; + + /** + * This is a linked list to keep states in an automaton. + */ + struct REGEX_INTERNAL_State *next; + + /** + * This is a multi DLL for StateSet_MDLL. + */ + struct REGEX_INTERNAL_State *prev_SS; + + /** + * This is a multi DLL for StateSet_MDLL. + */ + struct REGEX_INTERNAL_State *next_SS; + + /** + * This is a multi DLL for StateSet_MDLL Stack. + */ + struct REGEX_INTERNAL_State *prev_ST; + + /** + * This is a multi DLL for StateSet_MDLL Stack. */ - struct GNUNET_REGEX_State *next; + struct REGEX_INTERNAL_State *next_ST; /** * Unique state id. @@ -120,7 +169,7 @@ struct GNUNET_REGEX_State /** * Marking the state as contained. This is used for checking, if the state is - * contained in a set in constant time + * contained in a set in constant time. */ int contained; @@ -142,7 +191,7 @@ struct GNUNET_REGEX_State int lowlink; /** - * Human readable name of the automaton. Used for debugging and graph + * Human readable name of the state. Used for debugging and graph * creation. */ char *name; @@ -173,25 +222,30 @@ struct GNUNET_REGEX_State /** * DLL of transitions. */ - struct GNUNET_REGEX_Transition *transitions_head; + struct REGEX_INTERNAL_Transition *transitions_head; /** * DLL of transitions. */ - struct GNUNET_REGEX_Transition *transitions_tail; + struct REGEX_INTERNAL_Transition *transitions_tail; + + /** + * Number of incoming transitions. Used for compressing DFA paths. + */ + unsigned int incoming_transition_count; /** * Set of states on which this state is based on. Used when creating a DFA out * of several NFA states. */ - struct GNUNET_REGEX_StateSet *nfa_set; + struct REGEX_INTERNAL_StateSet nfa_set; }; /** * Type of an automaton. */ -enum GNUNET_REGEX_AutomatonType +enum REGEX_INTERNAL_AutomatonType { NFA, DFA @@ -201,28 +255,28 @@ enum GNUNET_REGEX_AutomatonType /** * Automaton representation. */ -struct GNUNET_REGEX_Automaton +struct REGEX_INTERNAL_Automaton { /** * Linked list of NFAs used for partial NFA creation. */ - struct GNUNET_REGEX_Automaton *prev; + struct REGEX_INTERNAL_Automaton *prev; /** * Linked list of NFAs used for partial NFA creation. */ - struct GNUNET_REGEX_Automaton *next; + struct REGEX_INTERNAL_Automaton *next; /** * First state of the automaton. This is mainly used for constructing an NFA, * where each NFA itself consists of one or more NFAs linked together. */ - struct GNUNET_REGEX_State *start; + struct REGEX_INTERNAL_State *start; /** * End state of the partial NFA. This is undefined for DFAs */ - struct GNUNET_REGEX_State *end; + struct REGEX_INTERNAL_State *end; /** * Number of states in the automaton. @@ -232,17 +286,17 @@ struct GNUNET_REGEX_Automaton /** * DLL of states. */ - struct GNUNET_REGEX_State *states_head; + struct REGEX_INTERNAL_State *states_head; /** * DLL of states */ - struct GNUNET_REGEX_State *states_tail; + struct REGEX_INTERNAL_State *states_tail; /** * Type of the automaton. */ - enum GNUNET_REGEX_AutomatonType type; + enum REGEX_INTERNAL_AutomatonType type; /** * Regex @@ -253,9 +307,26 @@ struct GNUNET_REGEX_Automaton * Canonical regex (result of RX->NFA->DFA->RX) */ char *canonical_regex; + + /** + * GNUNET_YES, if multi strides have been added to the Automaton. + */ + int is_multistrided; }; +/** + * Construct an NFA by parsing the regex string of length 'len'. + * + * @param regex regular expression string. + * @param len length of the string. + * + * @return NFA, needs to be freed using REGEX_INTERNAL_automaton_destroy. + */ +struct REGEX_INTERNAL_Automaton * +REGEX_INTERNAL_construct_nfa (const char *regex, const size_t len); + + /** * Function that get's passed to automaton traversal and is called before each * next traversal from state 's' using transition 't' to check if traversal @@ -268,9 +339,9 @@ struct GNUNET_REGEX_Automaton * * @return GNUNET_YES to proceed traversal, GNUNET_NO to stop. */ -typedef int (*GNUNET_REGEX_traverse_check) (void *cls, - struct GNUNET_REGEX_State * s, - struct GNUNET_REGEX_Transition * t); +typedef int (*REGEX_INTERNAL_traverse_check) (void *cls, + struct REGEX_INTERNAL_State * s, + struct REGEX_INTERNAL_Transition * t); /** @@ -280,9 +351,9 @@ typedef int (*GNUNET_REGEX_traverse_check) (void *cls, * @param count current count of the state, from 0 to a->state_count -1. * @param s state. */ -typedef void (*GNUNET_REGEX_traverse_action) (void *cls, +typedef void (*REGEX_INTERNAL_traverse_action) (void *cls, const unsigned int count, - struct GNUNET_REGEX_State * s); + struct REGEX_INTERNAL_State * s); /** @@ -299,11 +370,11 @@ typedef void (*GNUNET_REGEX_traverse_action) (void *cls, * @param action_cls closure for action */ void -GNUNET_REGEX_automaton_traverse (const struct GNUNET_REGEX_Automaton *a, - struct GNUNET_REGEX_State *start, - GNUNET_REGEX_traverse_check check, +REGEX_INTERNAL_automaton_traverse (const struct REGEX_INTERNAL_Automaton *a, + struct REGEX_INTERNAL_State *start, + REGEX_INTERNAL_traverse_check check, void *check_cls, - GNUNET_REGEX_traverse_action action, + REGEX_INTERNAL_traverse_action action, void *action_cls); /** @@ -315,40 +386,63 @@ GNUNET_REGEX_automaton_traverse (const struct GNUNET_REGEX_Automaton *a, * * @param a automaton for which the canonical regex should be returned. * - * @return + * @return canonical regex string. */ const char * -GNUNET_REGEX_get_canonical_regex (struct GNUNET_REGEX_Automaton *a); +REGEX_INTERNAL_get_canonical_regex (struct REGEX_INTERNAL_Automaton *a); /** - * Generate a (pseudo) random regular expression of length 'rx_length', as well - * as a (optional) string that will be matched by the generated regex. The - * returned regex needs to be freed. + * Get the number of transitions that are contained in the given automaton. * - * @param rx_length length of the random regex. - * @param matching_str (optional) pointer to a string that will contain a string - * that will be matched by the generated regex, if - * 'matching_str' pointer was not NULL. + * @param a automaton for which the number of transitions should be returned. * - * @return NULL if 'rx_length' is 0, a random regex of length 'rx_length', which - * needs to be freed, otherwise. + * @return number of transitions in the given automaton. */ -char * -GNUNET_REGEX_generate_random_regex (size_t rx_length, char *matching_str); +unsigned int +REGEX_INTERNAL_get_transition_count (struct REGEX_INTERNAL_Automaton *a); /** - * Generate a random string of maximum length 'max_len' that only contains literals allowed - * in a regular expression. The string might be 0 chars long but is garantueed - * to be shorter or equal to 'max_len'. - * - * @param max_len maximum length of the string that should be generated. + * Context that contains an id counter for states and transitions as well as a + * DLL of automatons used as a stack for NFA construction. + */ +struct REGEX_INTERNAL_Context +{ + /** + * Unique state id. + */ + unsigned int state_id; + + /** + * Unique transition id. + */ + unsigned int transition_id; + + /** + * DLL of REGEX_INTERNAL_Automaton's used as a stack. + */ + struct REGEX_INTERNAL_Automaton *stack_head; + + /** + * DLL of REGEX_INTERNAL_Automaton's used as a stack. + */ + struct REGEX_INTERNAL_Automaton *stack_tail; +}; + + +/** + * Adds multi-strided transitions to the given 'dfa'. * - * @return random string that needs to be freed. + * @param regex_ctx regex context needed to add transitions to the automaton. + * @param dfa DFA to which the multi strided transitions should be added. + * @param stride_len length of the strides. */ -char * -GNUNET_REGEX_generate_random_string (size_t max_len); +void +REGEX_INTERNAL_dfa_add_multi_strides (struct REGEX_INTERNAL_Context *regex_ctx, + struct REGEX_INTERNAL_Automaton *dfa, + const unsigned int stride_len); + #if 0 /* keep Emacsens' auto-indent happy */