2 This file is part of GNUnet
3 (C) 2012 Christian Grothoff (and other contributing authors)
5 GNUnet is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 3, or (at your
8 option) any later version.
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with GNUnet; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
21 * @file include/gnunet_regex_lib.h
22 * @brief library to parse regular expressions into dfa
23 * @author Maximilian Szengel
27 #ifndef GNUNET_REGEX_LIB_H
28 #define GNUNET_REGEX_LIB_H
30 #include "gnunet_util_lib.h"
31 #include "gnunet_dht_service.h"
32 #include "gnunet_statistics_service.h"
37 #if 0 /* keep Emacsens' auto-indent happy */
44 * Constant for how many bytes the initial string regex should have.
46 #define GNUNET_REGEX_INITIAL_BYTES 24
50 * Maximum regex string length for use with GNUNET_REGEX_ipv4toregex
52 #define GNUNET_REGEX_IPV4_REGEXLEN 32 + 6
56 * Maximum regex string length for use with GNUNET_REGEX_ipv6toregex
58 #define GNUNET_REGEX_IPV6_REGEXLEN 128 + 6
62 * Automaton (NFA/DFA) representation.
64 struct GNUNET_REGEX_Automaton;
68 * Edge representation.
70 struct GNUNET_REGEX_Edge
73 * Label of the edge. FIXME: might want to not consume exactly multiples of 8 bits, need length?
78 * Destionation of the edge.
80 struct GNUNET_HashCode destination;
85 * Construct DFA for the given 'regex' of length 'len'.
87 * Path compression means, that for example a DFA o -> a -> b -> c -> o will be
88 * compressed to o -> abc -> o. Note that this parameter influences the
89 * non-determinism of states of the resulting NFA in the DHT (number of outgoing
90 * edges with the same label). For example for an application that stores IPv4
91 * addresses as bitstrings it could make sense to limit the path compression to
94 * @param regex regular expression string.
95 * @param len length of the regular expression.
96 * @param max_path_len limit the path compression length to the
97 * given value. If set to 1, no path compression is applied. Set to 0 for
98 * maximal possible path compression (generally not desireable).
99 * @return DFA, needs to be freed using GNUNET_REGEX_automaton_destroy.
101 struct GNUNET_REGEX_Automaton *
102 GNUNET_REGEX_construct_dfa (const char *regex, const size_t len,
103 unsigned int max_path_len);
107 * Free the memory allocated by constructing the GNUNET_REGEX_Automaton.
110 * @param a automaton to be destroyed.
113 GNUNET_REGEX_automaton_destroy (struct GNUNET_REGEX_Automaton *a);
117 * Options for graph creation function
118 * GNUNET_REGEX_automaton_save_graph.
120 enum GNUNET_REGEX_GraphSavingOptions
123 * Default. Do nothing special.
125 GNUNET_REGEX_GRAPH_DEFAULT = 0,
128 * The generated graph will include extra information such as the NFA states
129 * that were used to generate the DFA state.
131 GNUNET_REGEX_GRAPH_VERBOSE = 1,
134 * Enable graph coloring. Will color each SCC in a different color.
136 GNUNET_REGEX_GRAPH_COLORING = 2
141 * Save the given automaton as a GraphViz dot file.
143 * @param a the automaton to be saved.
144 * @param filename where to save the file.
145 * @param options options for graph generation that include coloring or verbose
149 GNUNET_REGEX_automaton_save_graph (struct GNUNET_REGEX_Automaton *a,
150 const char *filename,
151 enum GNUNET_REGEX_GraphSavingOptions options);
155 * Evaluates the given 'string' against the given compiled regex.
157 * @param a automaton.
158 * @param string string to check.
160 * @return 0 if string matches, non 0 otherwise.
163 GNUNET_REGEX_eval (struct GNUNET_REGEX_Automaton *a,
168 * Get the first key for the given 'input_string'. This hashes
169 * the first x bits of the 'input_string'.
171 * @param input_string string.
172 * @param string_len length of the 'input_string'.
173 * @param key pointer to where to write the hash code.
175 * @return number of bits of 'input_string' that have been consumed
176 * to construct the key
179 GNUNET_REGEX_get_first_key (const char *input_string, size_t string_len,
180 struct GNUNET_HashCode * key);
184 * Check if the given 'proof' matches the given 'key'.
186 * @param proof partial regex of a state.
187 * @param key hash of a state.
189 * @return GNUNET_OK if the proof is valid for the given key.
192 GNUNET_REGEX_check_proof (const char *proof,
193 const struct GNUNET_HashCode *key);
197 * Iterator callback function.
199 * @param cls closure.
200 * @param key hash for current state.
201 * @param proof proof for current state.
202 * @param accepting GNUNET_YES if this is an accepting state, GNUNET_NO if not.
203 * @param num_edges number of edges leaving current state.
204 * @param edges edges leaving current state.
206 typedef void (*GNUNET_REGEX_KeyIterator)(void *cls,
207 const struct GNUNET_HashCode *key,
210 unsigned int num_edges,
211 const struct GNUNET_REGEX_Edge *edges);
215 * Iterate over all edges starting from start state of automaton 'a'. Calling
216 * iterator for each edge.
218 * @param a automaton.
219 * @param iterator iterator called for each edge.
220 * @param iterator_cls closure.
223 GNUNET_REGEX_iterate_all_edges (struct GNUNET_REGEX_Automaton *a,
224 GNUNET_REGEX_KeyIterator iterator,
229 * Create a regex in 'rxstr' from the given 'ip' and 'netmask'.
231 * @param ip IPv4 representation.
232 * @param netmask netmask for the ip.
233 * @param rxstr generated regex, must be at least GNUNET_REGEX_IPV4_REGEXLEN
237 GNUNET_REGEX_ipv4toregex (const struct in_addr *ip, const char *netmask,
242 * Create a regex in 'rxstr' from the given 'ipv6' and 'prefixlen'.
244 * @param ipv6 IPv6 representation.
245 * @param prefixlen length of the ipv6 prefix.
246 * @param rxstr generated regex, must be at least GNUNET_REGEX_IPV6_REGEXLEN
250 GNUNET_REGEX_ipv6toregex (const struct in6_addr *ipv6,
251 unsigned int prefixlen, char *rxstr);
256 * Handle to store cached data about a regex announce.
258 struct GNUNET_REGEX_announce_handle;
261 * Handle to store data about a regex search.
263 struct GNUNET_REGEX_search_handle;
266 * Announce a regular expression: put all states of the automaton in the DHT.
267 * Does not free resources, must call GNUNET_REGEX_announce_cancel for that.
269 * @param dht An existing and valid DHT service handle.
270 * @param id ID to announce as provider of regex. Own ID in most cases.
271 * @param regex Regular expression to announce.
272 * @param compression How many characters per edge can we squeeze?
273 * @param stats Optional statistics handle to report usage. Can be NULL.
275 * @return Handle to reuse o free cached resources.
276 * Must be freed by calling GNUNET_REGEX_announce_cancel.
278 struct GNUNET_REGEX_announce_handle *
279 GNUNET_REGEX_announce (struct GNUNET_DHT_Handle *dht,
280 struct GNUNET_PeerIdentity *id,
282 uint16_t compression,
283 struct GNUNET_STATISTICS_Handle *stats);
286 * Announce again a regular expression previously announced.
287 * Does use caching to speed up process.
289 * @param h Handle returned by a previous GNUNET_REGEX_announce call.
292 GNUNET_REGEX_reannounce (struct GNUNET_REGEX_announce_handle *h);
296 * Clear all cached data used by a regex announce.
297 * Does not close DHT connection.
299 * @param h Handle returned by a previous GNUNET_REGEX_announce call.
302 GNUNET_REGEX_announce_cancel (struct GNUNET_REGEX_announce_handle *h);
306 * Search callback function.
308 * @param cls Closure provided in GNUNET_REGEX_search.
309 * @param id Peer providing a regex that matches the string.
310 * @param get_path Path of the get request.
311 * @param get_path_length Lenght of get_path.
312 * @param put_path Path of the put request.
313 * @param put_path_length Length of the put_path.
315 typedef void (*GNUNET_REGEX_Found)(void *cls,
316 const struct GNUNET_PeerIdentity *id,
317 const struct GNUNET_PeerIdentity *get_path,
318 unsigned int get_path_length,
319 const struct GNUNET_PeerIdentity *put_path,
320 unsigned int put_path_length);
324 * Search for a peer offering a regex matching certain string in the DHT.
325 * The search runs until GNUNET_REGEX_search_cancel is called, even if results
328 * @param dht An existing and valid DHT service handle.
329 * @param string String to match against the regexes in the DHT.
330 * @param callback Callback for found peers.
331 * @param callback_cls Closure for @c callback.
332 * @param stats Optional statistics handle to report usage. Can be NULL.
334 * @return Handle to stop search and free resources.
335 * Must be freed by calling GNUNET_REGEX_search_cancel.
337 struct GNUNET_REGEX_search_handle *
338 GNUNET_REGEX_search (struct GNUNET_DHT_Handle *dht,
340 GNUNET_REGEX_Found callback,
342 struct GNUNET_STATISTICS_Handle *stats);
345 * Stop search and free all data used by a GNUNET_REGEX_search call.
346 * Does not close DHT connection.
348 * @param h Handle returned by a previous GNUNET_REGEX_search call.
351 GNUNET_REGEX_search_cancel (struct GNUNET_REGEX_search_handle *h);
354 #if 0 /* keep Emacsens' auto-indent happy */
361 /* end of gnunet_regex_lib.h */