2 This file is part of GNUnet
3 (C) 2012 Christian Grothoff (and other contributing authors)
5 GNUnet is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 3, or (at your
8 option) any later version.
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with GNUnet; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
21 * @file src/regex/regex_dht.c
22 * @brief library to announce regexes in the network and match strings
23 * against published regexes.
24 * @author Bartlomiej Polot
27 #include "gnunet_regex_lib.h"
28 #include "regex_block_lib.h"
29 #include "gnunet_dht_service.h"
30 #include "gnunet_statistics_service.h"
32 #define LOG(kind,...) GNUNET_log_from (kind,"regex-dht",__VA_ARGS__)
34 #define DHT_REPLICATION 5
35 #define DHT_TTL GNUNET_TIME_UNIT_HOURS
37 struct GNUNET_REGEX_announce_handle
40 * DHT handle to use, must be initialized externally.
42 struct GNUNET_DHT_Handle *dht;
50 * Automaton representation of the regex (expensive to build).
52 struct GNUNET_REGEX_Automaton* dfa;
55 * Identity under which to announce the regex.
57 struct GNUNET_PeerIdentity *id;
60 * Optional statistics handle to report usage. Can be NULL.
62 struct GNUNET_STATISTICS_Handle *stats;
67 * Regex callback iterator to store own service description in the DHT.
70 * @param key hash for current state.
71 * @param proof proof for current state.
72 * @param accepting GNUNET_YES if this is an accepting state, GNUNET_NO if not.
73 * @param num_edges number of edges leaving current state.
74 * @param edges edges leaving current state.
77 regex_iterator (void *cls,
78 const struct GNUNET_HashCode *key,
81 unsigned int num_edges,
82 const struct GNUNET_REGEX_Edge *edges)
84 struct GNUNET_REGEX_announce_handle *h = cls;
85 struct RegexBlock *block;
86 struct RegexEdge *block_edge;
87 enum GNUNET_DHT_RouteOption opt;
94 LOG (GNUNET_ERROR_TYPE_DEBUG,
95 " regex dht put for state %s\n",
97 LOG (GNUNET_ERROR_TYPE_DEBUG, " proof: %s\n", proof);
98 LOG (GNUNET_ERROR_TYPE_DEBUG, " num edges: %u\n", num_edges);
100 opt = GNUNET_DHT_RO_DEMULTIPLEX_EVERYWHERE;
101 if (GNUNET_YES == accepting)
103 struct RegexAccept block;
105 LOG (GNUNET_ERROR_TYPE_DEBUG,
106 " state %s is accepting, putting own id\n",
108 size = sizeof (block);
111 GNUNET_STATISTICS_update (h->stats, "# regex accepting blocks stored",
113 GNUNET_STATISTICS_update (h->stats, "# regex accepting block bytes stored",
114 sizeof (block), GNUNET_NO);
116 GNUNET_DHT_put (h->dht, key,
117 2, /* FIXME option */
118 opt /* | GNUNET_DHT_RO_RECORD_ROUTE*/,
119 GNUNET_BLOCK_TYPE_REGEX_ACCEPT,
122 GNUNET_TIME_relative_to_absolute (GNUNET_TIME_UNIT_HOURS), /* FIXME: expiration time should be option */
123 GNUNET_TIME_UNIT_HOURS, /* FIXME option */
127 size = sizeof (struct RegexBlock) + len;
128 block = GNUNET_malloc (size);
131 block->n_proof = htonl (len);
132 block->n_edges = htonl (num_edges);
133 block->accepting = htonl (accepting);
135 /* Store the proof at the end of the block. */
136 aux = (char *) &block[1];
137 memcpy (aux, proof, len);
140 /* Store each edge in a variable length MeshEdge struct at the
141 * very end of the MeshRegexBlock structure.
143 for (i = 0; i < num_edges; i++)
145 LOG (GNUNET_ERROR_TYPE_DEBUG, " edge %s towards %s\n",
146 edges[i].label, GNUNET_h2s(&edges[i].destination));
148 /* aux points at the end of the last block */
149 len = strlen (edges[i].label);
150 size += sizeof (struct RegexEdge) + len;
151 // Calculate offset FIXME is this ok? use size instead?
152 offset = aux - (char *) block;
153 block = GNUNET_realloc (block, size);
154 aux = &((char *) block)[offset];
155 block_edge = (struct RegexEdge *) aux;
156 block_edge->key = edges[i].destination;
157 block_edge->n_token = htonl (len);
158 aux = (char *) &block_edge[1];
159 memcpy (aux, edges[i].label, len);
163 GNUNET_DHT_put(h->dht, key,
164 DHT_REPLICATION, /* FIXME OPTION */
166 GNUNET_BLOCK_TYPE_REGEX, size,
168 GNUNET_TIME_relative_to_absolute (DHT_TTL), /* FIXME: this should be an option */
171 GNUNET_STATISTICS_update (h->stats, "# regex blocks stored",
173 GNUNET_STATISTICS_update (h->stats, "# regex block bytes stored",
180 struct GNUNET_REGEX_announce_handle *
181 GNUNET_REGEX_announce (struct GNUNET_DHT_Handle *dht,
182 struct GNUNET_PeerIdentity *id,
184 uint16_t compression,
185 struct GNUNET_STATISTICS_Handle *stats)
187 struct GNUNET_REGEX_announce_handle *h;
189 GNUNET_assert (NULL != dht);
190 h = GNUNET_malloc (sizeof (struct GNUNET_REGEX_announce_handle));
195 h->dfa = GNUNET_REGEX_construct_dfa (regex,
198 GNUNET_REGEX_reannounce (h);
203 GNUNET_REGEX_reannounce (struct GNUNET_REGEX_announce_handle *h)
205 GNUNET_REGEX_iterate_all_edges (h->dfa, ®ex_iterator, h);
209 GNUNET_REGEX_announce_cancel (struct GNUNET_REGEX_announce_handle *h)
211 GNUNET_REGEX_automaton_destroy (h->dfa);
216 /******************************************************************************/
220 * Struct to keep state of running searches that have consumed a part of
223 struct RegexSearchContext
226 * Part of the description already consumed by
227 * this particular search branch.
232 * Information about the search.
234 struct GNUNET_REGEX_search_handle *info;
237 * We just want to look for one edge, the longer the better.
240 unsigned int longest_match;
243 * Destination hash of the longest match.
245 struct GNUNET_HashCode hash;
250 * Struct to keep information of searches of services described by a regex
251 * using a user-provided string service description.
253 struct GNUNET_REGEX_search_handle
256 * DHT handle to use, must be initialized externally.
258 struct GNUNET_DHT_Handle *dht;
261 * Optional statistics handle to report usage. Can be NULL.
263 struct GNUNET_STATISTICS_Handle *stats;
266 * User provided description of the searched service.
273 struct GNUNET_CONTAINER_MultiHashMap *dht_get_handles;
276 * Results from running DHT GETs.
278 struct GNUNET_CONTAINER_MultiHashMap *dht_get_results;
281 * Contexts, for each running DHT GET. Free all on end of search.
283 struct RegexSearchContext **contexts;
286 * Number of contexts (branches/steps in search).
288 unsigned int n_contexts;
291 * @param callback Callback for found peers.
293 GNUNET_REGEX_Found callback;
296 * @param callback_cls Closure for @c callback.
304 * Jump to the next edge, with the longest matching token.
306 * @param block Block found in the DHT.
307 * @param size Size of the block.
308 * @param ctx Context of the search.
310 * @return GNUNET_YES if should keep iterating, GNUNET_NO otherwise.
313 regex_next_edge (const struct RegexBlock *block,
315 struct RegexSearchContext *ctx);
319 * Function to process DHT string to regex matching.
320 * Called on each result obtained for the DHT search.
322 * @param cls Closure (search context).
323 * @param exp When will this value expire.
324 * @param key Key of the result.
325 * @param get_path Path of the get request.
326 * @param get_path_length Lenght of get_path.
327 * @param put_path Path of the put request.
328 * @param put_path_length Length of the put_path.
329 * @param type Type of the result.
330 * @param size Number of bytes in data.
331 * @param data Pointer to the result data.
334 dht_get_string_accept_handler (void *cls, struct GNUNET_TIME_Absolute exp,
335 const struct GNUNET_HashCode * key,
336 const struct GNUNET_PeerIdentity *get_path,
337 unsigned int get_path_length,
338 const struct GNUNET_PeerIdentity *put_path,
339 unsigned int put_path_length,
340 enum GNUNET_BLOCK_Type type,
341 size_t size, const void *data)
343 const struct RegexAccept *block = data;
344 struct RegexSearchContext *ctx = cls;
345 struct GNUNET_REGEX_search_handle *info = ctx->info;
347 LOG (GNUNET_ERROR_TYPE_DEBUG, "Got regex results from DHT!\n");
348 LOG (GNUNET_ERROR_TYPE_DEBUG, " for %s\n", info->description);
350 GNUNET_STATISTICS_update (info->stats, "# regex accepting blocks found",
352 GNUNET_STATISTICS_update (info->stats, "# regex accepting block bytes found",
355 info->callback (info->callback_cls,
357 get_path, get_path_length,
358 put_path, put_path_length);
364 * Find a path to a peer that offers a regex servcie compatible
365 * with a given string.
367 * @param key The key of the accepting state.
368 * @param ctx Context containing info about the string, tunnel, etc.
371 regex_find_path (const struct GNUNET_HashCode *key,
372 struct RegexSearchContext *ctx)
374 struct GNUNET_DHT_GetHandle *get_h;
376 LOG (GNUNET_ERROR_TYPE_DEBUG, "Found peer by service\n");
377 get_h = GNUNET_DHT_get_start (ctx->info->dht, /* handle */
378 GNUNET_BLOCK_TYPE_REGEX_ACCEPT, /* type */
379 key, /* key to search */
380 DHT_REPLICATION, /* replication level */
381 GNUNET_DHT_RO_DEMULTIPLEX_EVERYWHERE |
382 GNUNET_DHT_RO_RECORD_ROUTE,
383 NULL, /* xquery */ // FIXME BLOOMFILTER
384 0, /* xquery bits */ // FIXME BLOOMFILTER SIZE
385 &dht_get_string_accept_handler, ctx);
386 GNUNET_break (GNUNET_OK ==
387 GNUNET_CONTAINER_multihashmap_put(ctx->info->dht_get_handles,
390 GNUNET_CONTAINER_MULTIHASHMAPOPTION_MULTIPLE));
395 * Function to process DHT string to regex matching.
396 * Called on each result obtained for the DHT search.
398 * @param cls closure (search context)
399 * @param exp when will this value expire
400 * @param key key of the result
401 * @param get_path path of the get request (not used)
402 * @param get_path_length lenght of get_path (not used)
403 * @param put_path path of the put request (not used)
404 * @param put_path_length length of the put_path (not used)
405 * @param type type of the result
406 * @param size number of bytes in data
407 * @param data pointer to the result data
409 * TODO: re-issue the request after certain time? cancel after X results?
412 dht_get_string_handler (void *cls, struct GNUNET_TIME_Absolute exp,
413 const struct GNUNET_HashCode * key,
414 const struct GNUNET_PeerIdentity *get_path,
415 unsigned int get_path_length,
416 const struct GNUNET_PeerIdentity *put_path,
417 unsigned int put_path_length,
418 enum GNUNET_BLOCK_Type type,
419 size_t size, const void *data)
421 const struct RegexBlock *block = data;
422 struct RegexSearchContext *ctx = cls;
423 struct GNUNET_REGEX_search_handle *info = ctx->info;
427 LOG (GNUNET_ERROR_TYPE_DEBUG, "DHT GET STRING RETURNED RESULTS\n");
428 LOG (GNUNET_ERROR_TYPE_DEBUG, " key: %s\n", GNUNET_h2s (key));
430 copy = GNUNET_malloc (size);
431 memcpy (copy, data, size);
432 GNUNET_break (GNUNET_OK ==
433 GNUNET_CONTAINER_multihashmap_put(info->dht_get_results, key, copy,
434 GNUNET_CONTAINER_MULTIHASHMAPOPTION_MULTIPLE));
435 len = ntohl (block->n_proof);
439 memcpy (proof, &block[1], len);
441 if (GNUNET_OK != GNUNET_REGEX_check_proof (proof, key))
447 len = strlen (info->description);
448 if (len == ctx->position) // String processed
450 if (GNUNET_YES == ntohl (block->accepting))
452 regex_find_path(key, ctx);
456 LOG (GNUNET_ERROR_TYPE_DEBUG, " block not accepting!\n");
457 // FIXME REGEX this block not successful, wait for more? start timeout?
462 regex_next_edge (block, size, ctx);
469 * Iterator over found existing mesh regex blocks that match an ongoing search.
472 * @param key current key code
473 * @param value value in the hash map
474 * @return GNUNET_YES if we should continue to iterate,
478 regex_result_iterator (void *cls,
479 const struct GNUNET_HashCode * key,
482 struct RegexBlock *block = value;
483 struct RegexSearchContext *ctx = cls;
485 if (GNUNET_YES == ntohl(block->accepting) &&
486 ctx->position == strlen (ctx->info->description))
488 LOG (GNUNET_ERROR_TYPE_DEBUG, "* Found accepting known block\n");
489 regex_find_path (key, ctx);
490 return GNUNET_YES; // We found an accept state!
494 LOG (GNUNET_ERROR_TYPE_DEBUG, "* %u, %u, [%u]\n",
495 ctx->position, strlen(ctx->info->description),
496 ntohl(block->accepting));
499 regex_next_edge(block, SIZE_MAX, ctx);
501 GNUNET_STATISTICS_update (ctx->info->stats, "# regex mesh blocks iterated",
509 * Iterator over edges in a regex block retrieved from the DHT.
511 * @param cls Closure (context of the search).
512 * @param token Token that follows to next state.
513 * @param len Lenght of token.
514 * @param key Hash of next state.
516 * @return GNUNET_YES if should keep iterating, GNUNET_NO otherwise.
519 regex_edge_iterator (void *cls,
522 const struct GNUNET_HashCode *key)
524 struct RegexSearchContext *ctx = cls;
525 struct GNUNET_REGEX_search_handle *info = ctx->info;
529 GNUNET_STATISTICS_update (info->stats, "# regex edges iterated",
532 LOG (GNUNET_ERROR_TYPE_DEBUG, "* Start of regex edge iterator\n");
533 LOG (GNUNET_ERROR_TYPE_DEBUG, "* descr : %s\n", info->description);
534 LOG (GNUNET_ERROR_TYPE_DEBUG, "* posit : %u\n", ctx->position);
535 current = &info->description[ctx->position];
536 LOG (GNUNET_ERROR_TYPE_DEBUG, "* currt : %s\n", current);
537 current_len = strlen (info->description) - ctx->position;
538 LOG (GNUNET_ERROR_TYPE_DEBUG, "* ctlen : %u\n", current_len);
539 LOG (GNUNET_ERROR_TYPE_DEBUG, "* tklen : %u\n", len);
540 LOG (GNUNET_ERROR_TYPE_DEBUG, "* token : %.*s\n", len, token);
541 LOG (GNUNET_ERROR_TYPE_DEBUG, "* nextk : %s\n", GNUNET_h2s(key));
542 if (len > current_len)
544 LOG (GNUNET_ERROR_TYPE_DEBUG, "* Token too long, END\n");
545 return GNUNET_YES; // Token too long, wont match
547 if (0 != strncmp (current, token, len))
549 LOG (GNUNET_ERROR_TYPE_DEBUG, "* Token doesn't match, END\n");
550 return GNUNET_YES; // Token doesn't match
553 if (len > ctx->longest_match)
555 LOG (GNUNET_ERROR_TYPE_DEBUG, "* Token is longer, KEEP\n");
556 ctx->longest_match = len;
561 LOG (GNUNET_ERROR_TYPE_DEBUG, "* Token is not longer, IGNORE\n");
564 LOG (GNUNET_ERROR_TYPE_DEBUG, "* End of regex edge iterator\n");
570 * Jump to the next edge, with the longest matching token.
572 * @param block Block found in the DHT.
573 * @param size Size of the block.
574 * @param ctx Context of the search.
576 * @return GNUNET_YES if should keep iterating, GNUNET_NO otherwise.
579 regex_next_edge (const struct RegexBlock *block,
581 struct RegexSearchContext *ctx)
583 struct RegexSearchContext *new_ctx;
584 struct GNUNET_REGEX_search_handle *info = ctx->info;
585 struct GNUNET_DHT_GetHandle *get_h;
589 /* Find the longest match for the current string position,
590 * among tokens in the given block */
591 ctx->longest_match = 0;
592 result = GNUNET_REGEX_block_iterate (block, size,
593 ®ex_edge_iterator, ctx);
594 GNUNET_break (GNUNET_OK == result);
596 /* Did anything match? */
597 if (0 == ctx->longest_match)
600 new_ctx = GNUNET_malloc (sizeof (struct RegexSearchContext));
601 new_ctx->info = info;
602 new_ctx->position = ctx->position + ctx->longest_match;
603 GNUNET_array_append (info->contexts, info->n_contexts, new_ctx);
605 /* Check whether we already have a DHT GET running for it */
607 GNUNET_CONTAINER_multihashmap_contains(info->dht_get_handles, &ctx->hash))
609 LOG (GNUNET_ERROR_TYPE_DEBUG, "* GET running, END\n");
610 GNUNET_CONTAINER_multihashmap_get_multiple (info->dht_get_results,
612 ®ex_result_iterator,
614 // FIXME: "leaks" new_ctx? avoid keeping it around?
615 return; // We are already looking for it
618 GNUNET_STATISTICS_update (info->stats, "# regex nodes traversed",
621 /* Start search in DHT */
622 rest = &new_ctx->info->description[new_ctx->position];
624 GNUNET_DHT_get_start (info->dht, /* handle */
625 GNUNET_BLOCK_TYPE_REGEX, /* type */
626 &ctx->hash, /* key to search */
627 DHT_REPLICATION, /* replication level */
628 GNUNET_DHT_RO_DEMULTIPLEX_EVERYWHERE,
630 // FIXME add BLOOMFILTER to exclude filtered peers
631 strlen(rest) + 1, /* xquery bits */
632 // FIXME add BLOOMFILTER SIZE
633 &dht_get_string_handler, new_ctx);
635 GNUNET_CONTAINER_multihashmap_put(info->dht_get_handles,
638 GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_FAST))
646 struct GNUNET_REGEX_search_handle *
647 GNUNET_REGEX_search (struct GNUNET_DHT_Handle *dht,
649 GNUNET_REGEX_Found callback,
651 struct GNUNET_STATISTICS_Handle *stats)
653 struct GNUNET_REGEX_search_handle *h;
655 LOG (GNUNET_ERROR_TYPE_DEBUG, "GNUNET_REGEX_search: %s\n", string);
656 GNUNET_assert (NULL != dht);
657 GNUNET_assert (NULL != callback);
658 h = GNUNET_malloc (sizeof (struct GNUNET_REGEX_search_handle));
660 h->description = GNUNET_strdup (string);
661 h->callback = callback;
662 h->callback_cls = callback_cls;
665 h->dht_get_handles = GNUNET_CONTAINER_multihashmap_create (32, GNUNET_YES);
666 h->dht_get_results = GNUNET_CONTAINER_multihashmap_create (32, GNUNET_YES);
672 * Iterator over hash map entries to cancel DHT GET requests after a
673 * successful connect_by_string.
675 * @param cls Closure (unused).
676 * @param key Current key code (unused).
677 * @param value Value in the hash map (get handle).
678 * @return GNUNET_YES if we should continue to iterate,
682 regex_cancel_dht_get (void *cls,
683 const struct GNUNET_HashCode * key,
686 struct GNUNET_DHT_GetHandle *h = value;
688 GNUNET_DHT_get_stop (h);
694 * Iterator over hash map entries to free MeshRegexBlocks stored during the
695 * search for connect_by_string.
697 * @param cls Closure (unused).
698 * @param key Current key code (unused).
699 * @param value MeshRegexBlock in the hash map.
700 * @return GNUNET_YES if we should continue to iterate,
704 regex_free_result (void *cls,
705 const struct GNUNET_HashCode * key,
715 * Cancel an ongoing regex search in the DHT and free all resources.
717 * @param ctx The search context.
720 regex_cancel_search (struct GNUNET_REGEX_search_handle *ctx)
722 GNUNET_free (ctx->description);
723 GNUNET_CONTAINER_multihashmap_iterate (ctx->dht_get_handles,
724 ®ex_cancel_dht_get, NULL);
725 GNUNET_CONTAINER_multihashmap_iterate (ctx->dht_get_results,
726 ®ex_free_result, NULL);
727 GNUNET_CONTAINER_multihashmap_destroy (ctx->dht_get_results);
728 GNUNET_CONTAINER_multihashmap_destroy (ctx->dht_get_handles);
729 if (0 < ctx->n_contexts)
733 for (i = 0; i < ctx->n_contexts; i++)
735 GNUNET_free (ctx->contexts[i]);
737 GNUNET_free (ctx->contexts);
742 GNUNET_REGEX_search_cancel (struct GNUNET_REGEX_search_handle *h)
744 regex_cancel_search (h);
750 /* end of regex_dht.c */