2 This file is part of GNUnet
3 (C) 2012 Christian Grothoff (and other contributing authors)
5 GNUnet is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 3, or (at your
8 option) any later version.
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with GNUnet; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
21 * @file src/regex/regex_internal_dht.c
22 * @brief library to announce regexes in the network and match strings
23 * against published regexes.
24 * @author Bartlomiej Polot
27 #include "regex_internal_lib.h"
28 #include "regex_block_lib.h"
29 #include "gnunet_dht_service.h"
30 #include "gnunet_statistics_service.h"
32 #define LOG(kind,...) GNUNET_log_from (kind,"regex-dht",__VA_ARGS__)
34 /* FIXME: OPTION (API, CONFIG) */
35 #define DHT_REPLICATION 5
36 #define DHT_TTL GNUNET_TIME_UNIT_HOURS
37 #define DEBUG_DHT GNUNET_NO
40 #define DHT_OPT GNUNET_DHT_RO_DEMULTIPLEX_EVERYWHERE | GNUNET_DHT_RO_RECORD_ROUTE
42 #define DHT_OPT GNUNET_DHT_RO_DEMULTIPLEX_EVERYWHERE
45 struct REGEX_INTERNAL_Announcement
48 * DHT handle to use, must be initialized externally.
50 struct GNUNET_DHT_Handle *dht;
58 * Automaton representation of the regex (expensive to build).
60 struct REGEX_INTERNAL_Automaton* dfa;
63 * Identity under which to announce the regex.
65 struct GNUNET_PeerIdentity id;
68 * Optional statistics handle to report usage. Can be NULL.
70 struct GNUNET_STATISTICS_Handle *stats;
75 * Regex callback iterator to store own service description in the DHT.
78 * @param key hash for current state.
79 * @param proof proof for current state.
80 * @param accepting GNUNET_YES if this is an accepting state, GNUNET_NO if not.
81 * @param num_edges number of edges leaving current state.
82 * @param edges edges leaving current state.
85 regex_iterator (void *cls,
86 const struct GNUNET_HashCode *key,
89 unsigned int num_edges,
90 const struct REGEX_INTERNAL_Edge *edges)
92 struct REGEX_INTERNAL_Announcement *h = cls;
93 struct RegexBlock *block;
96 LOG (GNUNET_ERROR_TYPE_DEBUG,
97 "DHT PUT for state %s with proof `%s' and %u edges\n",
101 if (GNUNET_YES == accepting)
103 struct RegexAccept block;
105 LOG (GNUNET_ERROR_TYPE_DEBUG,
106 "State %s is accepting, putting own id\n",
108 size = sizeof (block);
111 GNUNET_STATISTICS_update (h->stats, "# regex accepting blocks stored",
113 GNUNET_STATISTICS_update (h->stats, "# regex accepting block bytes stored",
114 sizeof (block), GNUNET_NO);
116 GNUNET_DHT_put (h->dht, key,
118 DHT_OPT | GNUNET_DHT_RO_RECORD_ROUTE,
119 GNUNET_BLOCK_TYPE_REGEX_ACCEPT,
122 GNUNET_TIME_relative_to_absolute (DHT_TTL),
126 block = REGEX_INTERNAL_block_create (key, proof,
131 GNUNET_DHT_put (h->dht, key,
134 GNUNET_BLOCK_TYPE_REGEX,
136 GNUNET_TIME_relative_to_absolute (DHT_TTL),
139 GNUNET_STATISTICS_update (h->stats, "# regex blocks stored",
141 GNUNET_STATISTICS_update (h->stats, "# regex block bytes stored",
147 struct REGEX_INTERNAL_Announcement *
148 REGEX_INTERNAL_announce (struct GNUNET_DHT_Handle *dht,
149 const struct GNUNET_PeerIdentity *id,
151 uint16_t compression,
152 struct GNUNET_STATISTICS_Handle *stats)
154 struct REGEX_INTERNAL_Announcement *h;
156 GNUNET_assert (NULL != dht);
157 h = GNUNET_malloc (sizeof (struct REGEX_INTERNAL_Announcement));
162 h->dfa = REGEX_INTERNAL_construct_dfa (regex,
165 REGEX_INTERNAL_reannounce (h);
171 REGEX_INTERNAL_reannounce (struct REGEX_INTERNAL_Announcement *h)
173 GNUNET_assert (NULL != h->dfa); /* make sure to call announce first */
174 LOG (GNUNET_ERROR_TYPE_INFO, "REGEX_INTERNAL_reannounce: %.60s\n", h->regex);
175 LOG (GNUNET_ERROR_TYPE_DEBUG, " full: %s\n", h->regex);
176 REGEX_INTERNAL_iterate_all_edges (h->dfa, ®ex_iterator, h);
181 REGEX_INTERNAL_announce_cancel (struct REGEX_INTERNAL_Announcement *h)
183 REGEX_INTERNAL_automaton_destroy (h->dfa);
188 /******************************************************************************/
192 * Struct to keep state of running searches that have consumed a part of
195 struct RegexSearchContext
198 * Part of the description already consumed by
199 * this particular search branch.
204 * Information about the search.
206 struct REGEX_INTERNAL_Search *info;
209 * We just want to look for one edge, the longer the better.
212 unsigned int longest_match;
215 * Destination hash of the longest match.
217 struct GNUNET_HashCode hash;
222 * Struct to keep information of searches of services described by a regex
223 * using a user-provided string service description.
225 struct REGEX_INTERNAL_Search
228 * DHT handle to use, must be initialized externally.
230 struct GNUNET_DHT_Handle *dht;
233 * Optional statistics handle to report usage. Can be NULL.
235 struct GNUNET_STATISTICS_Handle *stats;
238 * User provided description of the searched service.
245 struct GNUNET_CONTAINER_MultiHashMap *dht_get_handles;
248 * Results from running DHT GETs.
250 struct GNUNET_CONTAINER_MultiHashMap *dht_get_results;
253 * Contexts, for each running DHT GET. Free all on end of search.
255 struct RegexSearchContext **contexts;
258 * Number of contexts (branches/steps in search).
260 unsigned int n_contexts;
263 * @param callback Callback for found peers.
265 REGEX_INTERNAL_Found callback;
268 * @param callback_cls Closure for @c callback.
276 * Jump to the next edge, with the longest matching token.
278 * @param block Block found in the DHT.
279 * @param size Size of the block.
280 * @param ctx Context of the search.
282 * @return GNUNET_YES if should keep iterating, GNUNET_NO otherwise.
285 regex_next_edge (const struct RegexBlock *block,
287 struct RegexSearchContext *ctx);
291 * Function to process DHT string to regex matching.
292 * Called on each result obtained for the DHT search.
294 * @param cls Closure (search context).
295 * @param exp When will this value expire.
296 * @param key Key of the result.
297 * @param get_path Path of the get request.
298 * @param get_path_length Lenght of get_path.
299 * @param put_path Path of the put request.
300 * @param put_path_length Length of the put_path.
301 * @param type Type of the result.
302 * @param size Number of bytes in data.
303 * @param data Pointer to the result data.
306 dht_get_string_accept_handler (void *cls, struct GNUNET_TIME_Absolute exp,
307 const struct GNUNET_HashCode * key,
308 const struct GNUNET_PeerIdentity *get_path,
309 unsigned int get_path_length,
310 const struct GNUNET_PeerIdentity *put_path,
311 unsigned int put_path_length,
312 enum GNUNET_BLOCK_Type type,
313 size_t size, const void *data)
315 const struct RegexAccept *block = data;
316 struct RegexSearchContext *ctx = cls;
317 struct REGEX_INTERNAL_Search *info = ctx->info;
319 LOG (GNUNET_ERROR_TYPE_DEBUG, "Got regex results from DHT!\n");
320 LOG (GNUNET_ERROR_TYPE_INFO, " accept for %s (key %s)\n",
321 info->description, GNUNET_h2s(key));
323 GNUNET_STATISTICS_update (info->stats, "# regex accepting blocks found",
325 GNUNET_STATISTICS_update (info->stats, "# regex accepting block bytes found",
328 info->callback (info->callback_cls,
330 get_path, get_path_length,
331 put_path, put_path_length);
336 * Find a path to a peer that offers a regex servcie compatible
337 * with a given string.
339 * @param key The key of the accepting state.
340 * @param ctx Context containing info about the string, tunnel, etc.
343 regex_find_path (const struct GNUNET_HashCode *key,
344 struct RegexSearchContext *ctx)
346 struct GNUNET_DHT_GetHandle *get_h;
348 LOG (GNUNET_ERROR_TYPE_DEBUG, "Found peer by service\n");
349 LOG (GNUNET_ERROR_TYPE_INFO, " find accept for %s\n", GNUNET_h2s (key));
350 get_h = GNUNET_DHT_get_start (ctx->info->dht, /* handle */
351 GNUNET_BLOCK_TYPE_REGEX_ACCEPT, /* type */
352 key, /* key to search */
353 DHT_REPLICATION, /* replication level */
354 DHT_OPT | GNUNET_DHT_RO_RECORD_ROUTE,
355 NULL, /* xquery */ // FIXME BLOOMFILTER
356 0, /* xquery bits */ // FIXME BLOOMFILTER SIZE
357 &dht_get_string_accept_handler, ctx);
358 GNUNET_break (GNUNET_OK ==
359 GNUNET_CONTAINER_multihashmap_put(ctx->info->dht_get_handles,
362 GNUNET_CONTAINER_MULTIHASHMAPOPTION_MULTIPLE));
367 * Function to process DHT string to regex matching.
368 * Called on each result obtained for the DHT search.
370 * @param cls closure (search context)
371 * @param exp when will this value expire
372 * @param key key of the result
373 * @param get_path path of the get request (not used)
374 * @param get_path_length lenght of get_path (not used)
375 * @param put_path path of the put request (not used)
376 * @param put_path_length length of the put_path (not used)
377 * @param type type of the result
378 * @param size number of bytes in data
379 * @param data pointer to the result data
381 * TODO: re-issue the request after certain time? cancel after X results?
384 dht_get_string_handler (void *cls, struct GNUNET_TIME_Absolute exp,
385 const struct GNUNET_HashCode * key,
386 const struct GNUNET_PeerIdentity *get_path,
387 unsigned int get_path_length,
388 const struct GNUNET_PeerIdentity *put_path,
389 unsigned int put_path_length,
390 enum GNUNET_BLOCK_Type type,
391 size_t size, const void *data)
393 const struct RegexBlock *block = data;
394 struct RegexSearchContext *ctx = cls;
395 struct REGEX_INTERNAL_Search *info = ctx->info;
401 if (NULL != put_path && 0 != put_path_length)
403 datastore = GNUNET_strdup (GNUNET_i2s (&put_path[put_path_length - 1]));
407 GNUNET_asprintf (&datastore, "?? %u/%u", put_path_length, get_path_length);
410 datastore = GNUNET_strdup ("N/A");
413 LOG (GNUNET_ERROR_TYPE_INFO, " DHT GET result for %s (%s) at %s\n",
414 GNUNET_h2s (key), ctx->info->description, datastore);
415 GNUNET_free (datastore);
417 copy = GNUNET_malloc (size);
418 memcpy (copy, data, size);
421 GNUNET_CONTAINER_multihashmap_put (info->dht_get_results,
422 &((struct RegexBlock *)copy)->key, copy,
423 GNUNET_CONTAINER_MULTIHASHMAPOPTION_MULTIPLE)
425 len = ntohl (block->n_proof);
429 memcpy (proof, &block[1], len);
431 if (GNUNET_OK != REGEX_INTERNAL_check_proof (proof, key))
437 len = strlen (info->description);
438 if (len == ctx->position) // String processed
440 if (GNUNET_YES == ntohl (block->accepting))
442 regex_find_path (key, ctx);
446 LOG (GNUNET_ERROR_TYPE_INFO, " block not accepting!\n");
447 // FIXME REGEX this block not successful, wait for more? start timeout?
451 regex_next_edge (block, size, ctx);
456 * Iterator over found existing mesh regex blocks that match an ongoing search.
458 * @param cls Closure (current context)-
459 * @param key Current key code (key for cached block).
460 * @param value Value in the hash map (cached RegexBlock).
461 * @return GNUNET_YES: we should always continue to iterate.
464 regex_result_iterator (void *cls,
465 const struct GNUNET_HashCode * key,
468 struct RegexBlock *block = value;
469 struct RegexSearchContext *ctx = cls;
471 if (GNUNET_YES == ntohl(block->accepting) &&
472 ctx->position == strlen (ctx->info->description))
474 LOG (GNUNET_ERROR_TYPE_INFO, " * Found accepting known block\n");
475 regex_find_path (key, ctx);
476 return GNUNET_YES; // We found an accept state!
478 LOG (GNUNET_ERROR_TYPE_DEBUG, "* %u, %u, [%u]\n",
479 ctx->position, strlen(ctx->info->description),
480 ntohl(block->accepting));
482 regex_next_edge (block, SIZE_MAX, ctx);
484 GNUNET_STATISTICS_update (ctx->info->stats, "# regex mesh blocks iterated",
492 * Iterator over edges in a regex block retrieved from the DHT.
494 * @param cls Closure (context of the search).
495 * @param token Token that follows to next state.
496 * @param len Lenght of token.
497 * @param key Hash of next state.
499 * @return GNUNET_YES if should keep iterating, GNUNET_NO otherwise.
502 regex_edge_iterator (void *cls,
505 const struct GNUNET_HashCode *key)
507 struct RegexSearchContext *ctx = cls;
508 struct REGEX_INTERNAL_Search *info = ctx->info;
512 GNUNET_STATISTICS_update (info->stats, "# regex edges iterated",
515 LOG (GNUNET_ERROR_TYPE_DEBUG, "* Start of regex edge iterator\n");
516 LOG (GNUNET_ERROR_TYPE_DEBUG, "* descr : %s\n", info->description);
517 LOG (GNUNET_ERROR_TYPE_DEBUG, "* posit : %u\n", ctx->position);
518 current = &info->description[ctx->position];
519 LOG (GNUNET_ERROR_TYPE_DEBUG, "* currt : %s\n", current);
520 current_len = strlen (info->description) - ctx->position;
521 LOG (GNUNET_ERROR_TYPE_DEBUG, "* ctlen : %u\n", current_len);
522 LOG (GNUNET_ERROR_TYPE_DEBUG, "* tklen : %u\n", len);
523 LOG (GNUNET_ERROR_TYPE_DEBUG, "* token : %.*s\n", len, token);
524 LOG (GNUNET_ERROR_TYPE_DEBUG, "* nextk : %s\n", GNUNET_h2s(key));
525 if (len > current_len)
527 LOG (GNUNET_ERROR_TYPE_DEBUG, "* Token too long, END\n");
528 return GNUNET_YES; // Token too long, wont match
530 if (0 != strncmp (current, token, len))
532 LOG (GNUNET_ERROR_TYPE_DEBUG, "* Token doesn't match, END\n");
533 return GNUNET_YES; // Token doesn't match
536 if (len > ctx->longest_match)
538 LOG (GNUNET_ERROR_TYPE_DEBUG, "* Token is longer, KEEP\n");
539 ctx->longest_match = len;
544 LOG (GNUNET_ERROR_TYPE_DEBUG, "* Token is not longer, IGNORE\n");
547 LOG (GNUNET_ERROR_TYPE_DEBUG, "* End of regex edge iterator\n");
553 * Jump to the next edge, with the longest matching token.
555 * @param block Block found in the DHT.
556 * @param size Size of the block.
557 * @param ctx Context of the search.
559 * @return GNUNET_YES if should keep iterating, GNUNET_NO otherwise.
562 regex_next_edge (const struct RegexBlock *block,
564 struct RegexSearchContext *ctx)
566 struct RegexSearchContext *new_ctx;
567 struct REGEX_INTERNAL_Search *info = ctx->info;
568 struct GNUNET_DHT_GetHandle *get_h;
569 struct GNUNET_HashCode *hash;
573 /* Find the longest match for the current string position,
574 * among tokens in the given block */
575 ctx->longest_match = 0;
576 result = REGEX_INTERNAL_block_iterate (block, size,
577 ®ex_edge_iterator, ctx);
578 GNUNET_break (GNUNET_OK == result);
580 /* Did anything match? */
581 if (0 == ctx->longest_match)
583 LOG (GNUNET_ERROR_TYPE_DEBUG, " no match in block\n");
588 new_ctx = GNUNET_malloc (sizeof (struct RegexSearchContext));
589 new_ctx->info = info;
590 new_ctx->position = ctx->position + ctx->longest_match;
591 GNUNET_array_append (info->contexts, info->n_contexts, new_ctx);
593 /* Check whether we already have a DHT GET running for it */
595 GNUNET_CONTAINER_multihashmap_contains (info->dht_get_handles, hash))
597 LOG (GNUNET_ERROR_TYPE_DEBUG, "* GET for %s running, END\n",
599 GNUNET_CONTAINER_multihashmap_get_multiple (info->dht_get_results,
601 ®ex_result_iterator,
603 return; /* We are already looking for it */
606 GNUNET_STATISTICS_update (info->stats, "# regex nodes traversed",
609 /* Start search in DHT */
610 LOG (GNUNET_ERROR_TYPE_INFO, " looking for %s\n", GNUNET_h2s (hash));
611 rest = &new_ctx->info->description[new_ctx->position];
613 GNUNET_DHT_get_start (info->dht, /* handle */
614 GNUNET_BLOCK_TYPE_REGEX, /* type */
615 hash, /* key to search */
616 DHT_REPLICATION, /* replication level */
619 // FIXME add BLOOMFILTER to exclude filtered peers
620 strlen(rest) + 1, /* xquery bits */
621 // FIXME add BLOOMFILTER SIZE
622 &dht_get_string_handler, new_ctx);
624 GNUNET_CONTAINER_multihashmap_put(info->dht_get_handles,
627 GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_FAST))
635 struct REGEX_INTERNAL_Search *
636 REGEX_INTERNAL_search (struct GNUNET_DHT_Handle *dht,
638 REGEX_INTERNAL_Found callback,
640 struct GNUNET_STATISTICS_Handle *stats)
642 struct REGEX_INTERNAL_Search *h;
643 struct GNUNET_DHT_GetHandle *get_h;
644 struct RegexSearchContext *ctx;
645 struct GNUNET_HashCode key;
649 /* Initialize handle */
650 LOG (GNUNET_ERROR_TYPE_INFO, "REGEX_INTERNAL_search: %s\n", string);
651 GNUNET_assert (NULL != dht);
652 GNUNET_assert (NULL != callback);
653 h = GNUNET_malloc (sizeof (struct REGEX_INTERNAL_Search));
655 h->description = GNUNET_strdup (string);
656 h->callback = callback;
657 h->callback_cls = callback_cls;
659 h->dht_get_handles = GNUNET_CONTAINER_multihashmap_create (32, GNUNET_NO);
660 h->dht_get_results = GNUNET_CONTAINER_multihashmap_create (32, GNUNET_YES);
662 /* Initialize context */
663 len = strlen (string);
664 size = REGEX_INTERNAL_get_first_key (string, len, &key);
665 ctx = GNUNET_malloc (sizeof (struct RegexSearchContext));
666 ctx->position = size;
668 GNUNET_array_append (h->contexts, h->n_contexts, ctx);
669 LOG (GNUNET_ERROR_TYPE_DEBUG, " consumed %u bits out of %u\n", size, len);
670 LOG (GNUNET_ERROR_TYPE_INFO, " looking for %s\n", GNUNET_h2s (&key));
672 /* Start search in DHT */
673 get_h = GNUNET_DHT_get_start (h->dht, /* handle */
674 GNUNET_BLOCK_TYPE_REGEX, /* type */
675 &key, /* key to search */
676 DHT_REPLICATION, /* replication level */
678 &h->description[size], /* xquery */
679 // FIXME add BLOOMFILTER to exclude filtered peers
680 len + 1 - size, /* xquery bits */
681 // FIXME add BLOOMFILTER SIZE
682 &dht_get_string_handler, ctx);
685 GNUNET_CONTAINER_multihashmap_put (h->dht_get_handles,
688 GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_FAST)
696 * Iterator over hash map entries to cancel DHT GET requests after a
697 * successful connect_by_string.
699 * @param cls Closure (unused).
700 * @param key Current key code (unused).
701 * @param value Value in the hash map (get handle).
702 * @return GNUNET_YES if we should continue to iterate,
706 regex_cancel_dht_get (void *cls,
707 const struct GNUNET_HashCode * key,
710 struct GNUNET_DHT_GetHandle *h = value;
712 GNUNET_DHT_get_stop (h);
718 * Iterator over hash map entries to free MeshRegexBlocks stored during the
719 * search for connect_by_string.
721 * @param cls Closure (unused).
722 * @param key Current key code (unused).
723 * @param value MeshRegexBlock in the hash map.
724 * @return GNUNET_YES if we should continue to iterate,
728 regex_free_result (void *cls,
729 const struct GNUNET_HashCode * key,
739 * Cancel an ongoing regex search in the DHT and free all resources.
741 * @param ctx The search context.
744 regex_cancel_search (struct REGEX_INTERNAL_Search *ctx)
746 GNUNET_free (ctx->description);
747 GNUNET_CONTAINER_multihashmap_iterate (ctx->dht_get_handles,
748 ®ex_cancel_dht_get, NULL);
749 GNUNET_CONTAINER_multihashmap_iterate (ctx->dht_get_results,
750 ®ex_free_result, NULL);
751 GNUNET_CONTAINER_multihashmap_destroy (ctx->dht_get_results);
752 GNUNET_CONTAINER_multihashmap_destroy (ctx->dht_get_handles);
753 if (0 < ctx->n_contexts)
757 for (i = 0; i < ctx->n_contexts; i++)
759 GNUNET_free (ctx->contexts[i]);
761 GNUNET_free (ctx->contexts);
766 REGEX_INTERNAL_search_cancel (struct REGEX_INTERNAL_Search *h)
768 regex_cancel_search (h);
774 /* end of regex_dht.c */