2 This file is part of GNUnet
3 (C) 2012 Christian Grothoff (and other contributing authors)
5 GNUnet is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 3, or (at your
8 option) any later version.
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with GNUnet; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
21 * @file src/regex/regex_dht.c
22 * @brief library to announce regexes in the network and match strings
23 * against published regexes.
24 * @author Bartlomiej Polot
27 #include "gnunet_regex_lib.h"
28 #include "regex_block_lib.h"
29 #include "gnunet_dht_service.h"
30 #include "gnunet_statistics_service.h"
32 #define LOG(kind,...) GNUNET_log_from (kind,"regex-dht",__VA_ARGS__)
34 /* FIXME: OPTION (API, CONFIG) */
35 #define DHT_REPLICATION 5
36 #define DHT_TTL GNUNET_TIME_UNIT_HOURS
37 #define DEBUG_DHT GNUNET_YES
40 #define DHT_OPT GNUNET_DHT_RO_DEMULTIPLEX_EVERYWHERE | GNUNET_DHT_RO_RECORD_ROUTE
42 #define DHT_OPT GNUNET_DHT_RO_DEMULTIPLEX_EVERYWHERE
45 struct GNUNET_REGEX_announce_handle
48 * DHT handle to use, must be initialized externally.
50 struct GNUNET_DHT_Handle *dht;
58 * Automaton representation of the regex (expensive to build).
60 struct GNUNET_REGEX_Automaton* dfa;
63 * Identity under which to announce the regex.
65 struct GNUNET_PeerIdentity *id;
68 * Optional statistics handle to report usage. Can be NULL.
70 struct GNUNET_STATISTICS_Handle *stats;
75 * Regex callback iterator to store own service description in the DHT.
78 * @param key hash for current state.
79 * @param proof proof for current state.
80 * @param accepting GNUNET_YES if this is an accepting state, GNUNET_NO if not.
81 * @param num_edges number of edges leaving current state.
82 * @param edges edges leaving current state.
85 regex_iterator (void *cls,
86 const struct GNUNET_HashCode *key,
89 unsigned int num_edges,
90 const struct GNUNET_REGEX_Edge *edges)
92 struct GNUNET_REGEX_announce_handle *h = cls;
93 struct RegexBlock *block;
94 struct RegexEdge *block_edge;
101 LOG (GNUNET_ERROR_TYPE_DEBUG,
102 " regex dht put for state %s\n",
104 LOG (GNUNET_ERROR_TYPE_DEBUG, " proof: %s\n", proof);
105 LOG (GNUNET_ERROR_TYPE_DEBUG, " num edges: %u\n", num_edges);
107 if (GNUNET_YES == accepting)
109 struct RegexAccept block;
111 LOG (GNUNET_ERROR_TYPE_DEBUG,
112 " state %s is accepting, putting own id\n",
114 size = sizeof (block);
117 GNUNET_STATISTICS_update (h->stats, "# regex accepting blocks stored",
119 GNUNET_STATISTICS_update (h->stats, "# regex accepting block bytes stored",
120 sizeof (block), GNUNET_NO);
122 GNUNET_DHT_put (h->dht, key,
124 DHT_OPT | GNUNET_DHT_RO_RECORD_ROUTE,
125 GNUNET_BLOCK_TYPE_REGEX_ACCEPT,
128 GNUNET_TIME_relative_to_absolute (DHT_TTL),
133 size = sizeof (struct RegexBlock) + len;
134 block = GNUNET_malloc (size);
137 block->n_proof = htonl (len);
138 block->n_edges = htonl (num_edges);
139 block->accepting = htonl (accepting);
141 /* Store the proof at the end of the block. */
142 aux = (char *) &block[1];
143 memcpy (aux, proof, len);
146 /* Store each edge in a variable length MeshEdge struct at the
147 * very end of the MeshRegexBlock structure.
149 for (i = 0; i < num_edges; i++)
151 LOG (GNUNET_ERROR_TYPE_DEBUG, " edge %s towards %s\n",
152 edges[i].label, GNUNET_h2s(&edges[i].destination));
154 /* aux points at the end of the last block */
155 len = strlen (edges[i].label);
156 size += sizeof (struct RegexEdge) + len;
157 // Calculate offset FIXME is this ok? use size instead?
158 offset = aux - (char *) block;
159 block = GNUNET_realloc (block, size);
160 aux = &((char *) block)[offset];
161 block_edge = (struct RegexEdge *) aux;
162 block_edge->key = edges[i].destination;
163 block_edge->n_token = htonl (len);
164 aux = (char *) &block_edge[1];
165 memcpy (aux, edges[i].label, len);
169 GNUNET_DHT_put (h->dht, key,
172 GNUNET_BLOCK_TYPE_REGEX, size,
174 GNUNET_TIME_relative_to_absolute (DHT_TTL),
177 GNUNET_STATISTICS_update (h->stats, "# regex blocks stored",
179 GNUNET_STATISTICS_update (h->stats, "# regex block bytes stored",
185 struct GNUNET_REGEX_announce_handle *
186 GNUNET_REGEX_announce (struct GNUNET_DHT_Handle *dht,
187 struct GNUNET_PeerIdentity *id,
189 uint16_t compression,
190 struct GNUNET_STATISTICS_Handle *stats)
192 struct GNUNET_REGEX_announce_handle *h;
194 GNUNET_assert (NULL != dht);
195 h = GNUNET_malloc (sizeof (struct GNUNET_REGEX_announce_handle));
200 h->dfa = GNUNET_REGEX_construct_dfa (regex,
203 GNUNET_REGEX_reannounce (h);
208 GNUNET_REGEX_reannounce (struct GNUNET_REGEX_announce_handle *h)
210 GNUNET_assert (NULL != h->dfa); /* make sure to call announce first */
211 LOG (GNUNET_ERROR_TYPE_INFO, "GNUNET_REGEX_reannounce: %.60s\n", h->regex);
212 LOG (GNUNET_ERROR_TYPE_DEBUG, " full: %s\n", h->regex);
213 GNUNET_REGEX_iterate_all_edges (h->dfa, ®ex_iterator, h);
217 GNUNET_REGEX_announce_cancel (struct GNUNET_REGEX_announce_handle *h)
219 GNUNET_REGEX_automaton_destroy (h->dfa);
224 /******************************************************************************/
228 * Struct to keep state of running searches that have consumed a part of
231 struct RegexSearchContext
234 * Part of the description already consumed by
235 * this particular search branch.
240 * Information about the search.
242 struct GNUNET_REGEX_search_handle *info;
245 * We just want to look for one edge, the longer the better.
248 unsigned int longest_match;
251 * Destination hash of the longest match.
253 struct GNUNET_HashCode hash;
258 * Struct to keep information of searches of services described by a regex
259 * using a user-provided string service description.
261 struct GNUNET_REGEX_search_handle
264 * DHT handle to use, must be initialized externally.
266 struct GNUNET_DHT_Handle *dht;
269 * Optional statistics handle to report usage. Can be NULL.
271 struct GNUNET_STATISTICS_Handle *stats;
274 * User provided description of the searched service.
281 struct GNUNET_CONTAINER_MultiHashMap *dht_get_handles;
284 * Results from running DHT GETs.
286 struct GNUNET_CONTAINER_MultiHashMap *dht_get_results;
289 * Contexts, for each running DHT GET. Free all on end of search.
291 struct RegexSearchContext **contexts;
294 * Number of contexts (branches/steps in search).
296 unsigned int n_contexts;
299 * @param callback Callback for found peers.
301 GNUNET_REGEX_Found callback;
304 * @param callback_cls Closure for @c callback.
312 * Jump to the next edge, with the longest matching token.
314 * @param block Block found in the DHT.
315 * @param size Size of the block.
316 * @param ctx Context of the search.
318 * @return GNUNET_YES if should keep iterating, GNUNET_NO otherwise.
321 regex_next_edge (const struct RegexBlock *block,
323 struct RegexSearchContext *ctx);
327 * Function to process DHT string to regex matching.
328 * Called on each result obtained for the DHT search.
330 * @param cls Closure (search context).
331 * @param exp When will this value expire.
332 * @param key Key of the result.
333 * @param get_path Path of the get request.
334 * @param get_path_length Lenght of get_path.
335 * @param put_path Path of the put request.
336 * @param put_path_length Length of the put_path.
337 * @param type Type of the result.
338 * @param size Number of bytes in data.
339 * @param data Pointer to the result data.
342 dht_get_string_accept_handler (void *cls, struct GNUNET_TIME_Absolute exp,
343 const struct GNUNET_HashCode * key,
344 const struct GNUNET_PeerIdentity *get_path,
345 unsigned int get_path_length,
346 const struct GNUNET_PeerIdentity *put_path,
347 unsigned int put_path_length,
348 enum GNUNET_BLOCK_Type type,
349 size_t size, const void *data)
351 const struct RegexAccept *block = data;
352 struct RegexSearchContext *ctx = cls;
353 struct GNUNET_REGEX_search_handle *info = ctx->info;
355 LOG (GNUNET_ERROR_TYPE_DEBUG, "Got regex results from DHT!\n");
356 LOG (GNUNET_ERROR_TYPE_INFO, " accept for %s (key %s)\n",
357 info->description, GNUNET_h2s(key));
359 GNUNET_STATISTICS_update (info->stats, "# regex accepting blocks found",
361 GNUNET_STATISTICS_update (info->stats, "# regex accepting block bytes found",
364 info->callback (info->callback_cls,
366 get_path, get_path_length,
367 put_path, put_path_length);
373 * Find a path to a peer that offers a regex servcie compatible
374 * with a given string.
376 * @param key The key of the accepting state.
377 * @param ctx Context containing info about the string, tunnel, etc.
380 regex_find_path (const struct GNUNET_HashCode *key,
381 struct RegexSearchContext *ctx)
383 struct GNUNET_DHT_GetHandle *get_h;
385 LOG (GNUNET_ERROR_TYPE_DEBUG, "Found peer by service\n");
386 LOG (GNUNET_ERROR_TYPE_INFO, " find accept for %s\n", GNUNET_h2s (key));
387 get_h = GNUNET_DHT_get_start (ctx->info->dht, /* handle */
388 GNUNET_BLOCK_TYPE_REGEX_ACCEPT, /* type */
389 key, /* key to search */
390 DHT_REPLICATION, /* replication level */
391 DHT_OPT | GNUNET_DHT_RO_RECORD_ROUTE,
392 NULL, /* xquery */ // FIXME BLOOMFILTER
393 0, /* xquery bits */ // FIXME BLOOMFILTER SIZE
394 &dht_get_string_accept_handler, ctx);
395 GNUNET_break (GNUNET_OK ==
396 GNUNET_CONTAINER_multihashmap_put(ctx->info->dht_get_handles,
399 GNUNET_CONTAINER_MULTIHASHMAPOPTION_MULTIPLE));
404 * Function to process DHT string to regex matching.
405 * Called on each result obtained for the DHT search.
407 * @param cls closure (search context)
408 * @param exp when will this value expire
409 * @param key key of the result
410 * @param get_path path of the get request (not used)
411 * @param get_path_length lenght of get_path (not used)
412 * @param put_path path of the put request (not used)
413 * @param put_path_length length of the put_path (not used)
414 * @param type type of the result
415 * @param size number of bytes in data
416 * @param data pointer to the result data
418 * TODO: re-issue the request after certain time? cancel after X results?
421 dht_get_string_handler (void *cls, struct GNUNET_TIME_Absolute exp,
422 const struct GNUNET_HashCode * key,
423 const struct GNUNET_PeerIdentity *get_path,
424 unsigned int get_path_length,
425 const struct GNUNET_PeerIdentity *put_path,
426 unsigned int put_path_length,
427 enum GNUNET_BLOCK_Type type,
428 size_t size, const void *data)
430 const struct RegexBlock *block = data;
431 struct RegexSearchContext *ctx = cls;
432 struct GNUNET_REGEX_search_handle *info = ctx->info;
438 if (NULL != put_path && 0 != put_path_length)
440 datastore = GNUNET_strdup (GNUNET_i2s (&put_path[put_path_length - 1]));
444 GNUNET_asprintf (&datastore, "?? %u/%u", put_path_length, get_path_length);
447 datastore = GNUNET_strdup ("N/A");
450 LOG (GNUNET_ERROR_TYPE_INFO, " DHT GET result for %s (%s) at %s\n",
451 GNUNET_h2s (key), ctx->info->description, datastore);
452 GNUNET_free (datastore);
454 copy = GNUNET_malloc (size);
455 memcpy (copy, data, size);
458 GNUNET_CONTAINER_multihashmap_put (info->dht_get_results,
459 &((struct RegexBlock *)copy)->key, copy,
460 GNUNET_CONTAINER_MULTIHASHMAPOPTION_MULTIPLE)
462 len = ntohl (block->n_proof);
466 memcpy (proof, &block[1], len);
468 if (GNUNET_OK != GNUNET_REGEX_check_proof (proof, key))
474 len = strlen (info->description);
475 if (len == ctx->position) // String processed
477 if (GNUNET_YES == ntohl (block->accepting))
479 regex_find_path (key, ctx);
483 LOG (GNUNET_ERROR_TYPE_INFO, " block not accepting!\n");
484 // FIXME REGEX this block not successful, wait for more? start timeout?
489 regex_next_edge (block, size, ctx);
496 * Iterator over found existing mesh regex blocks that match an ongoing search.
498 * @param cls Closure (current context)-
499 * @param key Current key code (key for cached block).
500 * @param value Value in the hash map (cached RegexBlock).
501 * @return GNUNET_YES: we should always continue to iterate.
504 regex_result_iterator (void *cls,
505 const struct GNUNET_HashCode * key,
508 struct RegexBlock *block = value;
509 struct RegexSearchContext *ctx = cls;
511 if (GNUNET_YES == ntohl(block->accepting) &&
512 ctx->position == strlen (ctx->info->description))
514 LOG (GNUNET_ERROR_TYPE_INFO, " * Found accepting known block\n");
515 regex_find_path (key, ctx);
516 return GNUNET_YES; // We found an accept state!
518 LOG (GNUNET_ERROR_TYPE_DEBUG, "* %u, %u, [%u]\n",
519 ctx->position, strlen(ctx->info->description),
520 ntohl(block->accepting));
522 regex_next_edge (block, SIZE_MAX, ctx);
524 GNUNET_STATISTICS_update (ctx->info->stats, "# regex mesh blocks iterated",
532 * Iterator over edges in a regex block retrieved from the DHT.
534 * @param cls Closure (context of the search).
535 * @param token Token that follows to next state.
536 * @param len Lenght of token.
537 * @param key Hash of next state.
539 * @return GNUNET_YES if should keep iterating, GNUNET_NO otherwise.
542 regex_edge_iterator (void *cls,
545 const struct GNUNET_HashCode *key)
547 struct RegexSearchContext *ctx = cls;
548 struct GNUNET_REGEX_search_handle *info = ctx->info;
552 GNUNET_STATISTICS_update (info->stats, "# regex edges iterated",
555 LOG (GNUNET_ERROR_TYPE_DEBUG, "* Start of regex edge iterator\n");
556 LOG (GNUNET_ERROR_TYPE_DEBUG, "* descr : %s\n", info->description);
557 LOG (GNUNET_ERROR_TYPE_DEBUG, "* posit : %u\n", ctx->position);
558 current = &info->description[ctx->position];
559 LOG (GNUNET_ERROR_TYPE_DEBUG, "* currt : %s\n", current);
560 current_len = strlen (info->description) - ctx->position;
561 LOG (GNUNET_ERROR_TYPE_DEBUG, "* ctlen : %u\n", current_len);
562 LOG (GNUNET_ERROR_TYPE_DEBUG, "* tklen : %u\n", len);
563 LOG (GNUNET_ERROR_TYPE_DEBUG, "* token : %.*s\n", len, token);
564 LOG (GNUNET_ERROR_TYPE_DEBUG, "* nextk : %s\n", GNUNET_h2s(key));
565 if (len > current_len)
567 LOG (GNUNET_ERROR_TYPE_DEBUG, "* Token too long, END\n");
568 return GNUNET_YES; // Token too long, wont match
570 if (0 != strncmp (current, token, len))
572 LOG (GNUNET_ERROR_TYPE_DEBUG, "* Token doesn't match, END\n");
573 return GNUNET_YES; // Token doesn't match
576 if (len > ctx->longest_match)
578 LOG (GNUNET_ERROR_TYPE_DEBUG, "* Token is longer, KEEP\n");
579 ctx->longest_match = len;
584 LOG (GNUNET_ERROR_TYPE_DEBUG, "* Token is not longer, IGNORE\n");
587 LOG (GNUNET_ERROR_TYPE_DEBUG, "* End of regex edge iterator\n");
593 * Jump to the next edge, with the longest matching token.
595 * @param block Block found in the DHT.
596 * @param size Size of the block.
597 * @param ctx Context of the search.
599 * @return GNUNET_YES if should keep iterating, GNUNET_NO otherwise.
602 regex_next_edge (const struct RegexBlock *block,
604 struct RegexSearchContext *ctx)
606 struct RegexSearchContext *new_ctx;
607 struct GNUNET_REGEX_search_handle *info = ctx->info;
608 struct GNUNET_DHT_GetHandle *get_h;
609 struct GNUNET_HashCode *hash;
613 /* Find the longest match for the current string position,
614 * among tokens in the given block */
615 ctx->longest_match = 0;
616 result = GNUNET_REGEX_block_iterate (block, size,
617 ®ex_edge_iterator, ctx);
618 GNUNET_break (GNUNET_OK == result);
620 /* Did anything match? */
621 if (0 == ctx->longest_match)
623 LOG (GNUNET_ERROR_TYPE_DEBUG, " no match in block\n");
628 new_ctx = GNUNET_malloc (sizeof (struct RegexSearchContext));
629 new_ctx->info = info;
630 new_ctx->position = ctx->position + ctx->longest_match;
631 GNUNET_array_append (info->contexts, info->n_contexts, new_ctx);
633 /* Check whether we already have a DHT GET running for it */
635 GNUNET_CONTAINER_multihashmap_contains (info->dht_get_handles, hash))
637 LOG (GNUNET_ERROR_TYPE_DEBUG, "* GET for %s running, END\n",
639 GNUNET_CONTAINER_multihashmap_get_multiple (info->dht_get_results,
641 ®ex_result_iterator,
643 return; /* We are already looking for it */
646 GNUNET_STATISTICS_update (info->stats, "# regex nodes traversed",
649 /* Start search in DHT */
650 LOG (GNUNET_ERROR_TYPE_INFO, " looking for %s\n", GNUNET_h2s (hash));
651 rest = &new_ctx->info->description[new_ctx->position];
653 GNUNET_DHT_get_start (info->dht, /* handle */
654 GNUNET_BLOCK_TYPE_REGEX, /* type */
655 hash, /* key to search */
656 DHT_REPLICATION, /* replication level */
659 // FIXME add BLOOMFILTER to exclude filtered peers
660 strlen(rest) + 1, /* xquery bits */
661 // FIXME add BLOOMFILTER SIZE
662 &dht_get_string_handler, new_ctx);
664 GNUNET_CONTAINER_multihashmap_put(info->dht_get_handles,
667 GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_FAST))
675 struct GNUNET_REGEX_search_handle *
676 GNUNET_REGEX_search (struct GNUNET_DHT_Handle *dht,
678 GNUNET_REGEX_Found callback,
680 struct GNUNET_STATISTICS_Handle *stats)
682 struct GNUNET_REGEX_search_handle *h;
683 struct GNUNET_DHT_GetHandle *get_h;
684 struct RegexSearchContext *ctx;
685 struct GNUNET_HashCode key;
689 /* Initialize handle */
690 LOG (GNUNET_ERROR_TYPE_INFO, "GNUNET_REGEX_search: %s\n", string);
691 GNUNET_assert (NULL != dht);
692 GNUNET_assert (NULL != callback);
693 h = GNUNET_malloc (sizeof (struct GNUNET_REGEX_search_handle));
695 h->description = GNUNET_strdup (string);
696 h->callback = callback;
697 h->callback_cls = callback_cls;
699 h->dht_get_handles = GNUNET_CONTAINER_multihashmap_create (32, GNUNET_NO);
700 h->dht_get_results = GNUNET_CONTAINER_multihashmap_create (32, GNUNET_YES);
702 /* Initialize context */
703 len = strlen (string);
704 size = GNUNET_REGEX_get_first_key (string, len, &key);
705 ctx = GNUNET_malloc (sizeof (struct RegexSearchContext));
706 ctx->position = size;
708 GNUNET_array_append (h->contexts, h->n_contexts, ctx);
709 LOG (GNUNET_ERROR_TYPE_DEBUG, " consumed %u bits out of %u\n", size, len);
710 LOG (GNUNET_ERROR_TYPE_INFO, " looking for %s\n", GNUNET_h2s (&key));
712 /* Start search in DHT */
713 get_h = GNUNET_DHT_get_start (h->dht, /* handle */
714 GNUNET_BLOCK_TYPE_REGEX, /* type */
715 &key, /* key to search */
716 DHT_REPLICATION, /* replication level */
718 &h->description[size], /* xquery */
719 // FIXME add BLOOMFILTER to exclude filtered peers
720 len + 1 - size, /* xquery bits */
721 // FIXME add BLOOMFILTER SIZE
722 &dht_get_string_handler, ctx);
725 GNUNET_CONTAINER_multihashmap_put (h->dht_get_handles,
728 GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_FAST)
736 * Iterator over hash map entries to cancel DHT GET requests after a
737 * successful connect_by_string.
739 * @param cls Closure (unused).
740 * @param key Current key code (unused).
741 * @param value Value in the hash map (get handle).
742 * @return GNUNET_YES if we should continue to iterate,
746 regex_cancel_dht_get (void *cls,
747 const struct GNUNET_HashCode * key,
750 struct GNUNET_DHT_GetHandle *h = value;
752 GNUNET_DHT_get_stop (h);
758 * Iterator over hash map entries to free MeshRegexBlocks stored during the
759 * search for connect_by_string.
761 * @param cls Closure (unused).
762 * @param key Current key code (unused).
763 * @param value MeshRegexBlock in the hash map.
764 * @return GNUNET_YES if we should continue to iterate,
768 regex_free_result (void *cls,
769 const struct GNUNET_HashCode * key,
779 * Cancel an ongoing regex search in the DHT and free all resources.
781 * @param ctx The search context.
784 regex_cancel_search (struct GNUNET_REGEX_search_handle *ctx)
786 GNUNET_free (ctx->description);
787 GNUNET_CONTAINER_multihashmap_iterate (ctx->dht_get_handles,
788 ®ex_cancel_dht_get, NULL);
789 GNUNET_CONTAINER_multihashmap_iterate (ctx->dht_get_results,
790 ®ex_free_result, NULL);
791 GNUNET_CONTAINER_multihashmap_destroy (ctx->dht_get_results);
792 GNUNET_CONTAINER_multihashmap_destroy (ctx->dht_get_handles);
793 if (0 < ctx->n_contexts)
797 for (i = 0; i < ctx->n_contexts; i++)
799 GNUNET_free (ctx->contexts[i]);
801 GNUNET_free (ctx->contexts);
806 GNUNET_REGEX_search_cancel (struct GNUNET_REGEX_search_handle *h)
808 regex_cancel_search (h);
814 /* end of regex_dht.c */