2 This file is part of GNUnet
3 (C) 2012, 2013 Christian Grothoff (and other contributing authors)
5 GNUnet is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 3, or (at your
8 option) any later version.
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with GNUnet; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
21 * @file regex/regex_internal_lib.h
22 * @brief library to parse regular expressions into dfa
23 * @author Maximilian Szengel
26 #ifndef REGEX_INTERNAL_LIB_H
27 #define REGEX_INTERNAL_LIB_H
29 #include "gnunet_util_lib.h"
30 #include "gnunet_dht_service.h"
31 #include "gnunet_statistics_service.h"
36 #if 0 /* keep Emacsens' auto-indent happy */
43 * Automaton (NFA/DFA) representation.
45 struct REGEX_INTERNAL_Automaton;
49 * Edge representation.
51 struct REGEX_INTERNAL_Edge
54 * Label of the edge. FIXME: might want to not consume exactly multiples of 8 bits, need length?
59 * Destionation of the edge.
61 struct GNUNET_HashCode destination;
66 * Construct DFA for the given 'regex' of length 'len'.
68 * Path compression means, that for example a DFA o -> a -> b -> c -> o will be
69 * compressed to o -> abc -> o. Note that this parameter influences the
70 * non-determinism of states of the resulting NFA in the DHT (number of outgoing
71 * edges with the same label). For example for an application that stores IPv4
72 * addresses as bitstrings it could make sense to limit the path compression to
75 * @param regex regular expression string.
76 * @param len length of the regular expression.
77 * @param max_path_len limit the path compression length to the
78 * given value. If set to 1, no path compression is applied. Set to 0 for
79 * maximal possible path compression (generally not desireable).
80 * @return DFA, needs to be freed using REGEX_INTERNAL_automaton_destroy.
82 struct REGEX_INTERNAL_Automaton *
83 REGEX_INTERNAL_construct_dfa (const char *regex, const size_t len,
84 unsigned int max_path_len);
88 * Free the memory allocated by constructing the REGEX_INTERNAL_Automaton.
91 * @param a automaton to be destroyed.
94 REGEX_INTERNAL_automaton_destroy (struct REGEX_INTERNAL_Automaton *a);
98 * Evaluates the given 'string' against the given compiled regex.
100 * @param a automaton.
101 * @param string string to check.
103 * @return 0 if string matches, non 0 otherwise.
106 REGEX_INTERNAL_eval (struct REGEX_INTERNAL_Automaton *a,
111 * Get the first key for the given 'input_string'. This hashes
112 * the first x bits of the 'input_string'.
114 * @param input_string string.
115 * @param string_len length of the 'input_string'.
116 * @param key pointer to where to write the hash code.
118 * @return number of bits of 'input_string' that have been consumed
119 * to construct the key
122 REGEX_INTERNAL_get_first_key (const char *input_string, size_t string_len,
123 struct GNUNET_HashCode * key);
127 * Check if the given 'proof' matches the given 'key'.
129 * @param proof partial regex of a state.
130 * @param key hash of a state.
132 * @return GNUNET_OK if the proof is valid for the given key.
135 REGEX_INTERNAL_check_proof (const char *proof,
136 const struct GNUNET_HashCode *key);
140 * Iterator callback function.
142 * @param cls closure.
143 * @param key hash for current state.
144 * @param proof proof for current state.
145 * @param accepting GNUNET_YES if this is an accepting state, GNUNET_NO if not.
146 * @param num_edges number of edges leaving current state.
147 * @param edges edges leaving current state.
149 typedef void (*REGEX_INTERNAL_KeyIterator)(void *cls,
150 const struct GNUNET_HashCode *key,
153 unsigned int num_edges,
154 const struct REGEX_INTERNAL_Edge *edges);
158 * Iterate over all edges starting from start state of automaton 'a'. Calling
159 * iterator for each edge.
161 * @param a automaton.
162 * @param iterator iterator called for each edge.
163 * @param iterator_cls closure.
166 REGEX_INTERNAL_iterate_all_edges (struct REGEX_INTERNAL_Automaton *a,
167 REGEX_INTERNAL_KeyIterator iterator,
173 * Handle to store cached data about a regex announce.
175 struct REGEX_INTERNAL_Announcement;
178 * Handle to store data about a regex search.
180 struct REGEX_INTERNAL_Search;
183 * Announce a regular expression: put all states of the automaton in the DHT.
184 * Does not free resources, must call REGEX_INTERNAL_announce_cancel for that.
186 * @param dht An existing and valid DHT service handle. CANNOT be NULL.
187 * @param id ID to announce as provider of regex. Own ID in most cases.
188 * @param regex Regular expression to announce.
189 * @param compression How many characters per edge can we squeeze?
190 * @param stats Optional statistics handle to report usage. Can be NULL.
192 * @return Handle to reuse o free cached resources.
193 * Must be freed by calling REGEX_INTERNAL_announce_cancel.
195 struct REGEX_INTERNAL_Announcement *
196 REGEX_INTERNAL_announce (struct GNUNET_DHT_Handle *dht,
197 const struct GNUNET_PeerIdentity *id,
199 uint16_t compression,
200 struct GNUNET_STATISTICS_Handle *stats);
203 * Announce again a regular expression previously announced.
204 * Does use caching to speed up process.
206 * @param h Handle returned by a previous REGEX_INTERNAL_announce call.
209 REGEX_INTERNAL_reannounce (struct REGEX_INTERNAL_Announcement *h);
213 * Clear all cached data used by a regex announce.
214 * Does not close DHT connection.
216 * @param h Handle returned by a previous REGEX_INTERNAL_announce call.
219 REGEX_INTERNAL_announce_cancel (struct REGEX_INTERNAL_Announcement *h);
223 * Search callback function.
225 * @param cls Closure provided in REGEX_INTERNAL_search.
226 * @param id Peer providing a regex that matches the string.
227 * @param get_path Path of the get request.
228 * @param get_path_length Lenght of get_path.
229 * @param put_path Path of the put request.
230 * @param put_path_length Length of the put_path.
232 typedef void (*REGEX_INTERNAL_Found)(void *cls,
233 const struct GNUNET_PeerIdentity *id,
234 const struct GNUNET_PeerIdentity *get_path,
235 unsigned int get_path_length,
236 const struct GNUNET_PeerIdentity *put_path,
237 unsigned int put_path_length);
241 * Search for a peer offering a regex matching certain string in the DHT.
242 * The search runs until REGEX_INTERNAL_search_cancel is called, even if results
245 * @param dht An existing and valid DHT service handle.
246 * @param string String to match against the regexes in the DHT.
247 * @param callback Callback for found peers.
248 * @param callback_cls Closure for @c callback.
249 * @param stats Optional statistics handle to report usage. Can be NULL.
251 * @return Handle to stop search and free resources.
252 * Must be freed by calling REGEX_INTERNAL_search_cancel.
254 struct REGEX_INTERNAL_Search *
255 REGEX_INTERNAL_search (struct GNUNET_DHT_Handle *dht,
257 REGEX_INTERNAL_Found callback,
259 struct GNUNET_STATISTICS_Handle *stats);
262 * Stop search and free all data used by a REGEX_INTERNAL_search call.
263 * Does not close DHT connection.
265 * @param h Handle returned by a previous REGEX_INTERNAL_search call.
268 REGEX_INTERNAL_search_cancel (struct REGEX_INTERNAL_Search *h);
271 #if 0 /* keep Emacsens' auto-indent happy */
278 /* end of regex_internal_lib.h */