2 This file is part of GNUnet
3 (C) 2012 Christian Grothoff (and other contributing authors)
5 GNUnet is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 3, or (at your
8 option) any later version.
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with GNUnet; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
21 * @file src/regex/regex_dht.c
22 * @brief library to announce regexes in the network and match strings
23 * against published regexes.
24 * @author Bartlomiej Polot
27 #include "gnunet_regex_lib.h"
28 #include "regex_block_lib.h"
29 #include "gnunet_dht_service.h"
30 #include "gnunet_statistics_service.h"
32 #define LOG(kind,...) GNUNET_log_from (kind,"regex-dht",__VA_ARGS__)
34 #define DHT_REPLICATION 5
35 #define DHT_TTL GNUNET_TIME_UNIT_HOURS
37 struct GNUNET_REGEX_announce_handle
40 * DHT handle to use, must be initialized externally.
42 struct GNUNET_DHT_Handle *dht;
50 * Automaton representation of the regex (expensive to build).
52 struct GNUNET_REGEX_Automaton* dfa;
55 * Identity under which to announce the regex.
57 struct GNUNET_PeerIdentity *id;
60 * Optional statistics handle to report usage. Can be NULL.
62 struct GNUNET_STATISTICS_Handle *stats;
67 * Regex callback iterator to store own service description in the DHT.
70 * @param key hash for current state.
71 * @param proof proof for current state.
72 * @param accepting GNUNET_YES if this is an accepting state, GNUNET_NO if not.
73 * @param num_edges number of edges leaving current state.
74 * @param edges edges leaving current state.
77 regex_iterator (void *cls,
78 const struct GNUNET_HashCode *key,
81 unsigned int num_edges,
82 const struct GNUNET_REGEX_Edge *edges)
84 struct GNUNET_REGEX_announce_handle *h = cls;
85 struct RegexBlock *block;
86 struct RegexEdge *block_edge;
87 enum GNUNET_DHT_RouteOption opt;
94 LOG (GNUNET_ERROR_TYPE_DEBUG,
95 " regex dht put for state %s\n",
97 LOG (GNUNET_ERROR_TYPE_DEBUG, " proof: %s\n", proof);
98 LOG (GNUNET_ERROR_TYPE_DEBUG, " num edges: %u\n", num_edges);
100 opt = GNUNET_DHT_RO_DEMULTIPLEX_EVERYWHERE;
101 if (GNUNET_YES == accepting)
103 struct RegexAccept block;
105 LOG (GNUNET_ERROR_TYPE_DEBUG,
106 " state %s is accepting, putting own id\n",
108 size = sizeof (block);
111 GNUNET_STATISTICS_update (h->stats, "# regex accepting blocks stored",
113 GNUNET_STATISTICS_update (h->stats, "# regex accepting block bytes stored",
114 sizeof (block), GNUNET_NO);
116 GNUNET_DHT_put (h->dht, key,
117 2, /* FIXME option */
118 opt /* | GNUNET_DHT_RO_RECORD_ROUTE*/,
119 GNUNET_BLOCK_TYPE_REGEX_ACCEPT,
122 GNUNET_TIME_relative_to_absolute (GNUNET_TIME_UNIT_HOURS), /* FIXME: expiration time should be option */
123 GNUNET_TIME_UNIT_HOURS, /* FIXME option */
127 size = sizeof (struct RegexBlock) + len;
128 block = GNUNET_malloc (size);
131 block->n_proof = htonl (len);
132 block->n_edges = htonl (num_edges);
133 block->accepting = htonl (accepting);
135 /* Store the proof at the end of the block. */
136 aux = (char *) &block[1];
137 memcpy (aux, proof, len);
140 /* Store each edge in a variable length MeshEdge struct at the
141 * very end of the MeshRegexBlock structure.
143 for (i = 0; i < num_edges; i++)
145 LOG (GNUNET_ERROR_TYPE_DEBUG, " edge %s towards %s\n",
146 edges[i].label, GNUNET_h2s(&edges[i].destination));
148 /* aux points at the end of the last block */
149 len = strlen (edges[i].label);
150 size += sizeof (struct RegexEdge) + len;
151 // Calculate offset FIXME is this ok? use size instead?
152 offset = aux - (char *) block;
153 block = GNUNET_realloc (block, size);
154 aux = &((char *) block)[offset];
155 block_edge = (struct RegexEdge *) aux;
156 block_edge->key = edges[i].destination;
157 block_edge->n_token = htonl (len);
158 aux = (char *) &block_edge[1];
159 memcpy (aux, edges[i].label, len);
163 GNUNET_DHT_put(h->dht, key,
164 DHT_REPLICATION, /* FIXME OPTION */
166 GNUNET_BLOCK_TYPE_REGEX, size,
168 GNUNET_TIME_relative_to_absolute (DHT_TTL), /* FIXME: this should be an option */
171 GNUNET_STATISTICS_update (h->stats, "# regex blocks stored",
173 GNUNET_STATISTICS_update (h->stats, "# regex block bytes stored",
180 struct GNUNET_REGEX_announce_handle *
181 GNUNET_REGEX_announce (struct GNUNET_DHT_Handle *dht,
182 struct GNUNET_PeerIdentity *id,
184 uint16_t compression,
185 struct GNUNET_STATISTICS_Handle *stats)
187 struct GNUNET_REGEX_announce_handle *h;
189 GNUNET_assert (NULL != dht);
190 h = GNUNET_malloc (sizeof (struct GNUNET_REGEX_announce_handle));
195 h->dfa = GNUNET_REGEX_construct_dfa (regex,
198 GNUNET_REGEX_reannounce (h);
203 GNUNET_REGEX_reannounce (struct GNUNET_REGEX_announce_handle *h)
205 GNUNET_REGEX_iterate_all_edges (h->dfa, ®ex_iterator, h);
209 GNUNET_REGEX_announce_cancel (struct GNUNET_REGEX_announce_handle *h)
211 GNUNET_REGEX_automaton_destroy (h->dfa);
216 /******************************************************************************/
220 * Struct to keep state of running searches that have consumed a part of
223 struct RegexSearchContext
226 * Part of the description already consumed by
227 * this particular search branch.
232 * Information about the search.
234 struct GNUNET_REGEX_search_handle *info;
237 * We just want to look for one edge, the longer the better.
240 unsigned int longest_match;
243 * Destination hash of the longest match.
245 struct GNUNET_HashCode hash;
250 * Struct to keep information of searches of services described by a regex
251 * using a user-provided string service description.
253 struct GNUNET_REGEX_search_handle
256 * DHT handle to use, must be initialized externally.
258 struct GNUNET_DHT_Handle *dht;
261 * Optional statistics handle to report usage. Can be NULL.
263 struct GNUNET_STATISTICS_Handle *stats;
266 * User provided description of the searched service.
273 struct GNUNET_CONTAINER_MultiHashMap *dht_get_handles;
276 * Results from running DHT GETs.
278 struct GNUNET_CONTAINER_MultiHashMap *dht_get_results;
281 * Contexts, for each running DHT GET. Free all on end of search.
283 struct RegexSearchContext **contexts;
286 * Number of contexts (branches/steps in search).
288 unsigned int n_contexts;
291 * @param callback Callback for found peers.
293 GNUNET_REGEX_Found callback;
296 * @param callback_cls Closure for @c callback.
304 * Jump to the next edge, with the longest matching token.
306 * @param block Block found in the DHT.
307 * @param size Size of the block.
308 * @param ctx Context of the search.
310 * @return GNUNET_YES if should keep iterating, GNUNET_NO otherwise.
313 regex_next_edge (const struct RegexBlock *block,
315 struct RegexSearchContext *ctx);
319 * Function to process DHT string to regex matching.
320 * Called on each result obtained for the DHT search.
322 * @param cls Closure (search context).
323 * @param exp When will this value expire.
324 * @param key Key of the result.
325 * @param get_path Path of the get request.
326 * @param get_path_length Lenght of get_path.
327 * @param put_path Path of the put request.
328 * @param put_path_length Length of the put_path.
329 * @param type Type of the result.
330 * @param size Number of bytes in data.
331 * @param data Pointer to the result data.
334 dht_get_string_accept_handler (void *cls, struct GNUNET_TIME_Absolute exp,
335 const struct GNUNET_HashCode * key,
336 const struct GNUNET_PeerIdentity *get_path,
337 unsigned int get_path_length,
338 const struct GNUNET_PeerIdentity *put_path,
339 unsigned int put_path_length,
340 enum GNUNET_BLOCK_Type type,
341 size_t size, const void *data)
343 const struct RegexAccept *block = data;
344 struct RegexSearchContext *ctx = cls;
345 struct GNUNET_REGEX_search_handle *info = ctx->info;
347 LOG (GNUNET_ERROR_TYPE_DEBUG, "Got regex results from DHT!\n");
348 LOG (GNUNET_ERROR_TYPE_DEBUG, " for %s\n", info->description);
350 GNUNET_STATISTICS_update (info->stats, "# regex accepting blocks found",
352 GNUNET_STATISTICS_update (info->stats, "# regex accepting block bytes found",
355 info->callback (info->callback_cls,
357 get_path, get_path_length,
358 put_path, put_path_length);
364 * Find a path to a peer that offers a regex servcie compatible
365 * with a given string.
367 * @param key The key of the accepting state.
368 * @param ctx Context containing info about the string, tunnel, etc.
371 regex_find_path (const struct GNUNET_HashCode *key,
372 struct RegexSearchContext *ctx)
374 struct GNUNET_DHT_GetHandle *get_h;
376 LOG (GNUNET_ERROR_TYPE_DEBUG, "Found peer by service\n");
377 get_h = GNUNET_DHT_get_start (ctx->info->dht, /* handle */
378 GNUNET_BLOCK_TYPE_REGEX_ACCEPT, /* type */
379 key, /* key to search */
380 DHT_REPLICATION, /* replication level */
381 GNUNET_DHT_RO_DEMULTIPLEX_EVERYWHERE |
382 GNUNET_DHT_RO_RECORD_ROUTE,
383 NULL, /* xquery */ // FIXME BLOOMFILTER
384 0, /* xquery bits */ // FIXME BLOOMFILTER SIZE
385 &dht_get_string_accept_handler, ctx);
386 GNUNET_break (GNUNET_OK ==
387 GNUNET_CONTAINER_multihashmap_put(ctx->info->dht_get_handles,
390 GNUNET_CONTAINER_MULTIHASHMAPOPTION_MULTIPLE));
395 * Function to process DHT string to regex matching.
396 * Called on each result obtained for the DHT search.
398 * @param cls closure (search context)
399 * @param exp when will this value expire
400 * @param key key of the result
401 * @param get_path path of the get request (not used)
402 * @param get_path_length lenght of get_path (not used)
403 * @param put_path path of the put request (not used)
404 * @param put_path_length length of the put_path (not used)
405 * @param type type of the result
406 * @param size number of bytes in data
407 * @param data pointer to the result data
409 * TODO: re-issue the request after certain time? cancel after X results?
412 dht_get_string_handler (void *cls, struct GNUNET_TIME_Absolute exp,
413 const struct GNUNET_HashCode * key,
414 const struct GNUNET_PeerIdentity *get_path,
415 unsigned int get_path_length,
416 const struct GNUNET_PeerIdentity *put_path,
417 unsigned int put_path_length,
418 enum GNUNET_BLOCK_Type type,
419 size_t size, const void *data)
421 const struct RegexBlock *block = data;
422 struct RegexSearchContext *ctx = cls;
423 struct GNUNET_REGEX_search_handle *info = ctx->info;
427 LOG (GNUNET_ERROR_TYPE_DEBUG, "DHT GET STRING RETURNED RESULTS\n");
428 LOG (GNUNET_ERROR_TYPE_DEBUG, " for: %s\n", ctx->info->description);
429 LOG (GNUNET_ERROR_TYPE_DEBUG, " key: %s\n", GNUNET_h2s (key));
431 copy = GNUNET_malloc (size);
432 memcpy (copy, data, size);
435 GNUNET_CONTAINER_multihashmap_put (info->dht_get_results, key, copy,
436 GNUNET_CONTAINER_MULTIHASHMAPOPTION_MULTIPLE)
438 len = ntohl (block->n_proof);
442 memcpy (proof, &block[1], len);
444 if (GNUNET_OK != GNUNET_REGEX_check_proof (proof, key))
450 len = strlen (info->description);
451 if (len == ctx->position) // String processed
453 if (GNUNET_YES == ntohl (block->accepting))
455 regex_find_path (key, ctx);
459 LOG (GNUNET_ERROR_TYPE_DEBUG, " block not accepting!\n");
460 // FIXME REGEX this block not successful, wait for more? start timeout?
465 regex_next_edge (block, size, ctx);
472 * Iterator over found existing mesh regex blocks that match an ongoing search.
475 * @param key current key code
476 * @param value value in the hash map
477 * @return GNUNET_YES if we should continue to iterate,
481 regex_result_iterator (void *cls,
482 const struct GNUNET_HashCode * key,
485 struct RegexBlock *block = value;
486 struct RegexSearchContext *ctx = cls;
488 if (GNUNET_YES == ntohl(block->accepting) &&
489 ctx->position == strlen (ctx->info->description))
491 LOG (GNUNET_ERROR_TYPE_DEBUG, "* Found accepting known block\n");
492 regex_find_path (key, ctx);
493 return GNUNET_YES; // We found an accept state!
497 LOG (GNUNET_ERROR_TYPE_DEBUG, "* %u, %u, [%u]\n",
498 ctx->position, strlen(ctx->info->description),
499 ntohl(block->accepting));
502 regex_next_edge(block, SIZE_MAX, ctx);
504 GNUNET_STATISTICS_update (ctx->info->stats, "# regex mesh blocks iterated",
512 * Iterator over edges in a regex block retrieved from the DHT.
514 * @param cls Closure (context of the search).
515 * @param token Token that follows to next state.
516 * @param len Lenght of token.
517 * @param key Hash of next state.
519 * @return GNUNET_YES if should keep iterating, GNUNET_NO otherwise.
522 regex_edge_iterator (void *cls,
525 const struct GNUNET_HashCode *key)
527 struct RegexSearchContext *ctx = cls;
528 struct GNUNET_REGEX_search_handle *info = ctx->info;
532 GNUNET_STATISTICS_update (info->stats, "# regex edges iterated",
535 LOG (GNUNET_ERROR_TYPE_DEBUG, "* Start of regex edge iterator\n");
536 LOG (GNUNET_ERROR_TYPE_DEBUG, "* descr : %s\n", info->description);
537 LOG (GNUNET_ERROR_TYPE_DEBUG, "* posit : %u\n", ctx->position);
538 current = &info->description[ctx->position];
539 LOG (GNUNET_ERROR_TYPE_DEBUG, "* currt : %s\n", current);
540 current_len = strlen (info->description) - ctx->position;
541 LOG (GNUNET_ERROR_TYPE_DEBUG, "* ctlen : %u\n", current_len);
542 LOG (GNUNET_ERROR_TYPE_DEBUG, "* tklen : %u\n", len);
543 LOG (GNUNET_ERROR_TYPE_DEBUG, "* token : %.*s\n", len, token);
544 LOG (GNUNET_ERROR_TYPE_DEBUG, "* nextk : %s\n", GNUNET_h2s(key));
545 if (len > current_len)
547 LOG (GNUNET_ERROR_TYPE_DEBUG, "* Token too long, END\n");
548 return GNUNET_YES; // Token too long, wont match
550 if (0 != strncmp (current, token, len))
552 LOG (GNUNET_ERROR_TYPE_DEBUG, "* Token doesn't match, END\n");
553 return GNUNET_YES; // Token doesn't match
556 if (len > ctx->longest_match)
558 LOG (GNUNET_ERROR_TYPE_DEBUG, "* Token is longer, KEEP\n");
559 ctx->longest_match = len;
564 LOG (GNUNET_ERROR_TYPE_DEBUG, "* Token is not longer, IGNORE\n");
567 LOG (GNUNET_ERROR_TYPE_DEBUG, "* End of regex edge iterator\n");
573 * Jump to the next edge, with the longest matching token.
575 * @param block Block found in the DHT.
576 * @param size Size of the block.
577 * @param ctx Context of the search.
579 * @return GNUNET_YES if should keep iterating, GNUNET_NO otherwise.
582 regex_next_edge (const struct RegexBlock *block,
584 struct RegexSearchContext *ctx)
586 struct RegexSearchContext *new_ctx;
587 struct GNUNET_REGEX_search_handle *info = ctx->info;
588 struct GNUNET_DHT_GetHandle *get_h;
592 /* Find the longest match for the current string position,
593 * among tokens in the given block */
594 ctx->longest_match = 0;
595 result = GNUNET_REGEX_block_iterate (block, size,
596 ®ex_edge_iterator, ctx);
597 GNUNET_break (GNUNET_OK == result);
599 /* Did anything match? */
600 if (0 == ctx->longest_match)
603 new_ctx = GNUNET_malloc (sizeof (struct RegexSearchContext));
604 new_ctx->info = info;
605 new_ctx->position = ctx->position + ctx->longest_match;
606 GNUNET_array_append (info->contexts, info->n_contexts, new_ctx);
608 /* Check whether we already have a DHT GET running for it */
610 GNUNET_CONTAINER_multihashmap_contains(info->dht_get_handles, &ctx->hash))
612 LOG (GNUNET_ERROR_TYPE_DEBUG, "* GET running, END\n");
613 GNUNET_CONTAINER_multihashmap_get_multiple (info->dht_get_results,
615 ®ex_result_iterator,
617 // FIXME: "leaks" new_ctx? avoid keeping it around?
618 return; // We are already looking for it
621 GNUNET_STATISTICS_update (info->stats, "# regex nodes traversed",
624 /* Start search in DHT */
625 rest = &new_ctx->info->description[new_ctx->position];
627 GNUNET_DHT_get_start (info->dht, /* handle */
628 GNUNET_BLOCK_TYPE_REGEX, /* type */
629 &ctx->hash, /* key to search */
630 DHT_REPLICATION, /* replication level */
631 GNUNET_DHT_RO_DEMULTIPLEX_EVERYWHERE,
633 // FIXME add BLOOMFILTER to exclude filtered peers
634 strlen(rest) + 1, /* xquery bits */
635 // FIXME add BLOOMFILTER SIZE
636 &dht_get_string_handler, new_ctx);
638 GNUNET_CONTAINER_multihashmap_put(info->dht_get_handles,
641 GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_FAST))
649 struct GNUNET_REGEX_search_handle *
650 GNUNET_REGEX_search (struct GNUNET_DHT_Handle *dht,
652 GNUNET_REGEX_Found callback,
654 struct GNUNET_STATISTICS_Handle *stats)
656 struct GNUNET_REGEX_search_handle *h;
657 struct GNUNET_DHT_GetHandle *get_h;
658 struct RegexSearchContext *ctx;
659 struct GNUNET_HashCode key;
663 /* Initialize handle */
664 LOG (GNUNET_ERROR_TYPE_DEBUG, "GNUNET_REGEX_search: %s\n", string);
665 GNUNET_assert (NULL != dht);
666 GNUNET_assert (NULL != callback);
667 h = GNUNET_malloc (sizeof (struct GNUNET_REGEX_search_handle));
669 h->description = GNUNET_strdup (string);
670 h->callback = callback;
671 h->callback_cls = callback_cls;
673 h->dht_get_handles = GNUNET_CONTAINER_multihashmap_create (32, GNUNET_YES);
674 h->dht_get_results = GNUNET_CONTAINER_multihashmap_create (32, GNUNET_YES);
676 /* Initialize context */
677 len = strlen (string);
678 size = GNUNET_REGEX_get_first_key (string, len, &key);
679 ctx = GNUNET_malloc (sizeof (struct RegexSearchContext));
680 ctx->position = size;
682 GNUNET_array_append (h->contexts, h->n_contexts, ctx);
683 LOG (GNUNET_ERROR_TYPE_DEBUG,
684 " consumed %u bits out of %u\n", size, len);
685 LOG (GNUNET_ERROR_TYPE_DEBUG,
686 " looking for %s\n", GNUNET_h2s (&key));
688 /* Start search in DHT */
689 get_h = GNUNET_DHT_get_start (h->dht, /* handle */
690 GNUNET_BLOCK_TYPE_REGEX, /* type */
691 &key, /* key to search */
692 DHT_REPLICATION, /* replication level */
693 GNUNET_DHT_RO_DEMULTIPLEX_EVERYWHERE,
694 &h->description[size], /* xquery */
695 // FIXME add BLOOMFILTER to exclude filtered peers
696 len + 1 - size, /* xquery bits */
697 // FIXME add BLOOMFILTER SIZE
698 &dht_get_string_handler, ctx);
701 GNUNET_CONTAINER_multihashmap_put (h->dht_get_handles,
704 GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_FAST)
712 * Iterator over hash map entries to cancel DHT GET requests after a
713 * successful connect_by_string.
715 * @param cls Closure (unused).
716 * @param key Current key code (unused).
717 * @param value Value in the hash map (get handle).
718 * @return GNUNET_YES if we should continue to iterate,
722 regex_cancel_dht_get (void *cls,
723 const struct GNUNET_HashCode * key,
726 struct GNUNET_DHT_GetHandle *h = value;
728 GNUNET_DHT_get_stop (h);
734 * Iterator over hash map entries to free MeshRegexBlocks stored during the
735 * search for connect_by_string.
737 * @param cls Closure (unused).
738 * @param key Current key code (unused).
739 * @param value MeshRegexBlock in the hash map.
740 * @return GNUNET_YES if we should continue to iterate,
744 regex_free_result (void *cls,
745 const struct GNUNET_HashCode * key,
755 * Cancel an ongoing regex search in the DHT and free all resources.
757 * @param ctx The search context.
760 regex_cancel_search (struct GNUNET_REGEX_search_handle *ctx)
762 GNUNET_free (ctx->description);
763 GNUNET_CONTAINER_multihashmap_iterate (ctx->dht_get_handles,
764 ®ex_cancel_dht_get, NULL);
765 GNUNET_CONTAINER_multihashmap_iterate (ctx->dht_get_results,
766 ®ex_free_result, NULL);
767 GNUNET_CONTAINER_multihashmap_destroy (ctx->dht_get_results);
768 GNUNET_CONTAINER_multihashmap_destroy (ctx->dht_get_handles);
769 if (0 < ctx->n_contexts)
773 for (i = 0; i < ctx->n_contexts; i++)
775 GNUNET_free (ctx->contexts[i]);
777 GNUNET_free (ctx->contexts);
782 GNUNET_REGEX_search_cancel (struct GNUNET_REGEX_search_handle *h)
784 regex_cancel_search (h);
790 /* end of regex_dht.c */