X-Git-Url: https://git.librecmc.org/?a=blobdiff_plain;f=src%2Finclude%2Fgnunet_regex_lib.h;h=e7c525304dcd62bfb4ba6490d3238de3a3d0c646;hb=db3b3123f9d157c71d8b3d9e3b1d5966fe471599;hp=100b73f50e058515cdd58c75cea8be46999af6b7;hpb=807bed4b1337805a14da1364597d9ef59dd5de55;p=oweals%2Fgnunet.git diff --git a/src/include/gnunet_regex_lib.h b/src/include/gnunet_regex_lib.h index 100b73f50..e7c525304 100644 --- a/src/include/gnunet_regex_lib.h +++ b/src/include/gnunet_regex_lib.h @@ -37,37 +37,69 @@ extern "C" #endif #endif + /** - * Automaton (NFA/DFA) representation. + * Constant for how many bytes the initial string regex should have. */ -struct GNUNET_REGEX_Automaton; +#define GNUNET_REGEX_INITIAL_BYTES 24 + /** - * State representation. + * Maximum regex string length for use with GNUNET_REGEX_ipv4toregex */ -struct GNUNET_REGEX_State; +#define GNUNET_REGEX_IPV4_REGEXLEN 32 + 6 + /** - * Construct an NFA by parsing the regex string of length 'len'. - * - * @param regex regular expression string. - * @param len length of the string. - * - * @return NFA, needs to be freed using GNUNET_REGEX_destroy_automaton. + * Maximum regex string length for use with GNUNET_REGEX_ipv6toregex */ -struct GNUNET_REGEX_Automaton * -GNUNET_REGEX_construct_nfa (const char *regex, const size_t len); +#define GNUNET_REGEX_IPV6_REGEXLEN 128 + 6 + + +/** + * Automaton (NFA/DFA) representation. + */ +struct GNUNET_REGEX_Automaton; + + +/** + * Edge representation. + */ +struct GNUNET_REGEX_Edge +{ + /** + * Label of the edge. FIXME: might want to not consume exactly multiples of 8 bits, need length? + */ + const char *label; + + /** + * Destionation of the edge. + */ + struct GNUNET_HashCode destination; +}; + /** * Construct DFA for the given 'regex' of length 'len'. * + * Path compression means, that for example a DFA o -> a -> b -> c -> o will be + * compressed to o -> abc -> o. Note that this parameter influences the + * non-determinism of states of the resulting NFA in the DHT (number of outgoing + * edges with the same label). For example for an application that stores IPv4 + * addresses as bitstrings it could make sense to limit the path compression to + * 4 or 8. + * * @param regex regular expression string. * @param len length of the regular expression. - * - * @return DFA, needs to be freed using GNUNET_REGEX_destroy_automaton. + * @param max_path_len limit the path compression length to the + * given value. If set to 1, no path compression is applied. Set to 0 for + * maximal possible path compression (generally not desireable). + * @return DFA, needs to be freed using GNUNET_REGEX_automaton_destroy. */ struct GNUNET_REGEX_Automaton * -GNUNET_REGEX_construct_dfa (const char *regex, const size_t len); +GNUNET_REGEX_construct_dfa (const char *regex, const size_t len, + int max_path_len); + /** * Free the memory allocated by constructing the GNUNET_REGEX_Automaton. @@ -78,15 +110,44 @@ GNUNET_REGEX_construct_dfa (const char *regex, const size_t len); void GNUNET_REGEX_automaton_destroy (struct GNUNET_REGEX_Automaton *a); + +/** + * Options for graph creation function + * GNUNET_REGEX_automaton_save_graph. + */ +enum GNUNET_REGEX_GraphSavingOptions +{ + /** + * Default. Do nothing special. + */ + GNUNET_REGEX_GRAPH_DEFAULT = 0, + + /** + * The generated graph will include extra information such as the NFA states + * that were used to generate the DFA state. + */ + GNUNET_REGEX_GRAPH_VERBOSE = 1, + + /** + * Enable graph coloring. Will color each SCC in a different color. + */ + GNUNET_REGEX_GRAPH_COLORING = 2 +}; + + /** * Save the given automaton as a GraphViz dot file. * * @param a the automaton to be saved. * @param filename where to save the file. + * @param options options for graph generation that include coloring or verbose + * mode */ void GNUNET_REGEX_automaton_save_graph (struct GNUNET_REGEX_Automaton *a, - const char *filename); + const char *filename, + enum GNUNET_REGEX_GraphSavingOptions options); + /** * Evaluates the given 'string' against the given compiled regex. @@ -101,86 +162,91 @@ GNUNET_REGEX_eval (struct GNUNET_REGEX_Automaton *a, const char *string); - /** - * Get the starting state of the given automaton 'a'. + * Get the first key for the given 'input_string'. This hashes + * the first x bits of the 'input_string'. * - * @param a automaton. + * @param input_string string. + * @param string_len length of the 'input_string'. + * @param key pointer to where to write the hash code. * - * @return starting state. - */ -struct GNUNET_REGEX_State * -GNUNET_REGEX_automaton_get_start (struct GNUNET_REGEX_Automaton *a); - - -/** * @return number of bits of 'input_string' that have been consumed * to construct the key */ -unsigned int -GNUNET_REGEX_get_first_key (const char *input_string, - GNUNET_HashCode *key); - +size_t +GNUNET_REGEX_get_first_key (const char *input_string, size_t string_len, + struct GNUNET_HashCode * key); /** - * @return GNUNET_OK if the proof is valid for the given key + * Check if the given 'proof' matches the given 'key'. + * + * @param proof partial regex of a state. + * @param key hash of a state. + * + * @return GNUNET_OK if the proof is valid for the given key. */ int GNUNET_REGEX_check_proof (const char *proof, - const GNUNET_HashCode *key); - - -struct GNUNET_REGEX_Edge -{ - const char *label; - GNUNET_HashCode destination; -}; + const struct GNUNET_HashCode *key); +/** + * Iterator callback function. + * + * @param cls closure. + * @param key hash for current state. + * @param proof proof for current state. + * @param accepting GNUNET_YES if this is an accepting state, GNUNET_NO if not. + * @param num_edges number of edges leaving current state. + * @param edges edges leaving current state. + */ typedef void (*GNUNET_REGEX_KeyIterator)(void *cls, - const GNUNET_HashCode *key, - const char *proof, - unsigned int num_edges, - const struct GNUNET_REGEX_Edge *edges); - - -int -GNUNET_REGEX_iterate_all_edges (struct GNUNET_REGEX_Automaton *a, - GNUNET_REGEX_KeyIterator iterator, - void *iterator_cls); + const struct GNUNET_HashCode *key, + const char *proof, + int accepting, + unsigned int num_edges, + const struct GNUNET_REGEX_Edge *edges); /** - * Get the next states, starting from states 's'. + * Iterate over all edges starting from start state of automaton 'a'. Calling + * iterator for each edge. * * @param a automaton. - * @param s states. - * @param count number of states given in 's'. Will contain number of - * states that were returned upon return. - * - * @return next states, 'count' will contain the number of states. + * @param iterator iterator called for each edge. + * @param iterator_cls closure. */ -struct GNUNET_REGEX_State ** -GNUNET_REGEX_automaton_states_get_next (struct GNUNET_REGEX_Automaton *a, - struct GNUNET_REGEX_State **s, - unsigned int *count); +void +GNUNET_REGEX_iterate_all_edges (struct GNUNET_REGEX_Automaton *a, + GNUNET_REGEX_KeyIterator iterator, + void *iterator_cls); + /** - * Hash a set of states. + * Create a regex in 'rxstr' from the given 'ip' and 'netmask'. * - * @param a automaton. - * @param s states. - * @param count number of states. - * - * @return hash. + * @param ip IPv4 representation. + * @param netmask netmask for the ip. + * @param rxstr generated regex, must be at least GNUNET_REGEX_IPV4_REGEXLEN + * bytes long. */ -struct GNUNET_HashCode -GNUNET_REGEX_automaton_states_hash (struct GNUNET_REGEX_Automaton *a, - struct GNUNET_REGEX_State **s, - unsigned int count); +void +GNUNET_REGEX_ipv4toregex (const struct in_addr *ip, const char *netmask, + char *rxstr); +/** + * Create a regex in 'rxstr' from the given 'ipv6' and 'prefixlen'. + * + * @param ipv6 IPv6 representation. + * @param prefixlen length of the ipv6 prefix. + * @param rxstr generated regex, must be at least GNUNET_REGEX_IPV6_REGEXLEN + * bytes long. + */ +void +GNUNET_REGEX_ipv6toregex (const struct in6_addr *ipv6, + unsigned int prefixlen, char *rxstr); #if 0 /* keep Emacsens' auto-indent happy */ @@ -192,4 +258,3 @@ GNUNET_REGEX_automaton_states_hash (struct GNUNET_REGEX_Automaton *a, /* end of gnunet_regex_lib.h */ #endif -