2 This file is part of GNUnet
3 Copyright (C) 2012, 2013 GNUnet e.V.
5 GNUnet is free software: you can redistribute it and/or modify it
6 under the terms of the GNU General Public License as published
7 by the Free Software Foundation, either version 3 of the License,
8 or (at your option) any later version.
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Affero General Public License for more details.
16 * @file regex/regex_internal_lib.h
17 * @brief library to parse regular expressions into dfa
18 * @author Maximilian Szengel
21 #ifndef REGEX_INTERNAL_LIB_H
22 #define REGEX_INTERNAL_LIB_H
24 #include "gnunet_util_lib.h"
25 #include "gnunet_dht_service.h"
26 #include "gnunet_statistics_service.h"
27 #include "regex_block_lib.h"
32 #if 0 /* keep Emacsens' auto-indent happy */
39 * Automaton (NFA/DFA) representation.
41 struct REGEX_INTERNAL_Automaton;
45 * Construct DFA for the given 'regex' of length 'len'.
47 * Path compression means, that for example a DFA o -> a -> b -> c -> o will be
48 * compressed to o -> abc -> o. Note that this parameter influences the
49 * non-determinism of states of the resulting NFA in the DHT (number of outgoing
50 * edges with the same label). For example for an application that stores IPv4
51 * addresses as bitstrings it could make sense to limit the path compression to
54 * @param regex regular expression string.
55 * @param len length of the regular expression.
56 * @param max_path_len limit the path compression length to the
57 * given value. If set to 1, no path compression is applied. Set to 0 for
58 * maximal possible path compression (generally not desireable).
59 * @return DFA, needs to be freed using #REGEX_INTERNAL_automaton_destroy().
61 struct REGEX_INTERNAL_Automaton *
62 REGEX_INTERNAL_construct_dfa (const char *regex,
64 unsigned int max_path_len);
68 * Free the memory allocated by constructing the REGEX_INTERNAL_Automaton.
71 * @param a automaton to be destroyed.
74 REGEX_INTERNAL_automaton_destroy (struct REGEX_INTERNAL_Automaton *a);
78 * Evaluates the given 'string' against the given compiled regex.
81 * @param string string to check.
83 * @return 0 if string matches, non 0 otherwise.
86 REGEX_INTERNAL_eval (struct REGEX_INTERNAL_Automaton *a,
91 * Get the first key for the given @a input_string. This hashes
92 * the first x bits of the @a input_string.
94 * @param input_string string.
95 * @param string_len length of the @a input_string.
96 * @param key pointer to where to write the hash code.
97 * @return number of bits of @a input_string that have been consumed
98 * to construct the key
101 REGEX_INTERNAL_get_first_key (const char *input_string,
103 struct GNUNET_HashCode * key);
107 * Iterator callback function.
109 * @param cls closure.
110 * @param key hash for current state.
111 * @param proof proof for current state
112 * @param accepting #GNUNET_YES if this is an accepting state, #GNUNET_NO if not.
113 * @param num_edges number of edges leaving current state.
114 * @param edges edges leaving current state.
117 (*REGEX_INTERNAL_KeyIterator)(void *cls,
118 const struct GNUNET_HashCode *key,
121 unsigned int num_edges,
122 const struct REGEX_BLOCK_Edge *edges);
126 * Iterate over all edges starting from start state of automaton 'a'. Calling
127 * iterator for each edge.
129 * @param a automaton.
130 * @param iterator iterator called for each edge.
131 * @param iterator_cls closure.
134 REGEX_INTERNAL_iterate_all_edges (struct REGEX_INTERNAL_Automaton *a,
135 REGEX_INTERNAL_KeyIterator iterator,
140 * Iterate over all edges of automaton 'a' that are reachable from a state with
141 * a proof of at least #GNUNET_REGEX_INITIAL_BYTES characters.
143 * Call the iterator for each such edge.
145 * @param a automaton.
146 * @param iterator iterator called for each reachable edge.
147 * @param iterator_cls closure.
150 REGEX_INTERNAL_iterate_reachable_edges (struct REGEX_INTERNAL_Automaton *a,
151 REGEX_INTERNAL_KeyIterator iterator,
157 * Handle to store cached data about a regex announce.
159 struct REGEX_INTERNAL_Announcement;
162 * Handle to store data about a regex search.
164 struct REGEX_INTERNAL_Search;
168 * Announce a regular expression: put all states of the automaton in the DHT.
169 * Does not free resources, must call #REGEX_INTERNAL_announce_cancel() for that.
171 * @param dht An existing and valid DHT service handle. CANNOT be NULL.
172 * @param priv our private key, must remain valid until the announcement is cancelled
173 * @param regex Regular expression to announce.
174 * @param compression How many characters per edge can we squeeze?
175 * @param stats Optional statistics handle to report usage. Can be NULL.
176 * @return Handle to reuse o free cached resources.
177 * Must be freed by calling #REGEX_INTERNAL_announce_cancel().
179 struct REGEX_INTERNAL_Announcement *
180 REGEX_INTERNAL_announce (struct GNUNET_DHT_Handle *dht,
181 const struct GNUNET_CRYPTO_EddsaPrivateKey *priv,
183 uint16_t compression,
184 struct GNUNET_STATISTICS_Handle *stats);
188 * Announce again a regular expression previously announced.
189 * Does use caching to speed up process.
191 * @param h Handle returned by a previous #REGEX_INTERNAL_announce() call.
194 REGEX_INTERNAL_reannounce (struct REGEX_INTERNAL_Announcement *h);
198 * Clear all cached data used by a regex announce.
199 * Does not close DHT connection.
201 * @param h Handle returned by a previous #REGEX_INTERNAL_announce() call.
204 REGEX_INTERNAL_announce_cancel (struct REGEX_INTERNAL_Announcement *h);
208 * Search callback function.
210 * @param cls Closure provided in #REGEX_INTERNAL_search().
211 * @param id Peer providing a regex that matches the string.
212 * @param get_path Path of the get request.
213 * @param get_path_length Length of @a get_path.
214 * @param put_path Path of the put request.
215 * @param put_path_length Length of the @a put_path.
218 (*REGEX_INTERNAL_Found)(void *cls,
219 const struct GNUNET_PeerIdentity *id,
220 const struct GNUNET_PeerIdentity *get_path,
221 unsigned int get_path_length,
222 const struct GNUNET_PeerIdentity *put_path,
223 unsigned int put_path_length);
227 * Search for a peer offering a regex matching certain string in the DHT.
228 * The search runs until #REGEX_INTERNAL_search_cancel() is called, even if results
231 * @param dht An existing and valid DHT service handle.
232 * @param string String to match against the regexes in the DHT.
233 * @param callback Callback for found peers.
234 * @param callback_cls Closure for @c callback.
235 * @param stats Optional statistics handle to report usage. Can be NULL.
236 * @return Handle to stop search and free resources.
237 * Must be freed by calling #REGEX_INTERNAL_search_cancel().
239 struct REGEX_INTERNAL_Search *
240 REGEX_INTERNAL_search (struct GNUNET_DHT_Handle *dht,
242 REGEX_INTERNAL_Found callback,
244 struct GNUNET_STATISTICS_Handle *stats);
247 * Stop search and free all data used by a #REGEX_INTERNAL_search() call.
248 * Does not close DHT connection.
250 * @param h Handle returned by a previous #REGEX_INTERNAL_search() call.
253 REGEX_INTERNAL_search_cancel (struct REGEX_INTERNAL_Search *h);
256 #if 0 /* keep Emacsens' auto-indent happy */
263 /* end of regex_internal_lib.h */