2 This file is part of GNUnet
3 Copyright (C) 2012, 2015 GNUnet e.V.
5 GNUnet is free software: you can redistribute it and/or modify it
6 under the terms of the GNU Affero General Public License as published
7 by the Free Software Foundation, either version 3 of the License,
8 or (at your option) any later version.
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Affero General Public License for more details.
15 You should have received a copy of the GNU Affero General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>.
18 SPDX-License-Identifier: AGPL3.0-or-later
21 * @file src/regex/regex_internal_dht.c
22 * @brief library to announce regexes in the network and match strings
23 * against published regexes.
24 * @author Bartlomiej Polot
27 #include "regex_internal_lib.h"
28 #include "regex_block_lib.h"
29 #include "gnunet_dht_service.h"
30 #include "gnunet_statistics_service.h"
31 #include "gnunet_constants.h"
32 #include "gnunet_signatures.h"
35 #define LOG(kind,...) GNUNET_log_from (kind,"regex-dht",__VA_ARGS__)
38 * DHT replication level to use.
40 #define DHT_REPLICATION 5
43 * DHT record lifetime to use.
45 #define DHT_TTL GNUNET_TIME_UNIT_HOURS
50 #define DHT_OPT GNUNET_DHT_RO_DEMULTIPLEX_EVERYWHERE
54 * Handle to store cached data about a regex announce.
56 struct REGEX_INTERNAL_Announcement
59 * DHT handle to use, must be initialized externally.
61 struct GNUNET_DHT_Handle *dht;
69 * Automaton representation of the regex (expensive to build).
71 struct REGEX_INTERNAL_Automaton *dfa;
76 const struct GNUNET_CRYPTO_EddsaPrivateKey *priv;
79 * Optional statistics handle to report usage. Can be NULL.
81 struct GNUNET_STATISTICS_Handle *stats;
86 * Regex callback iterator to store own service description in the DHT.
89 * @param key hash for current state.
90 * @param proof proof for current state.
91 * @param accepting #GNUNET_YES if this is an accepting state, #GNUNET_NO if not.
92 * @param num_edges number of edges leaving current state.
93 * @param edges edges leaving current state.
96 regex_iterator (void *cls,
97 const struct GNUNET_HashCode *key,
100 unsigned int num_edges,
101 const struct REGEX_BLOCK_Edge *edges)
103 struct REGEX_INTERNAL_Announcement *h = cls;
104 struct RegexBlock *block;
108 LOG (GNUNET_ERROR_TYPE_INFO,
109 "DHT PUT for state %s with proof `%s' and %u edges:\n",
113 for (i = 0; i < num_edges; i++)
115 LOG (GNUNET_ERROR_TYPE_INFO,
116 "Edge %u `%s' towards %s\n",
119 GNUNET_h2s (&edges[i].destination));
121 if (GNUNET_YES == accepting)
123 struct RegexAcceptBlock ab;
125 LOG (GNUNET_ERROR_TYPE_INFO,
126 "State %s is accepting, putting own id\n",
128 size = sizeof (struct RegexAcceptBlock);
129 ab.purpose.size = ntohl (sizeof (struct GNUNET_CRYPTO_EccSignaturePurpose) +
130 sizeof (struct GNUNET_TIME_AbsoluteNBO) +
131 sizeof (struct GNUNET_HashCode));
132 ab.purpose.purpose = ntohl (GNUNET_SIGNATURE_PURPOSE_REGEX_ACCEPT);
133 ab.expiration_time = GNUNET_TIME_absolute_hton (GNUNET_TIME_relative_to_absolute (GNUNET_CONSTANTS_DHT_MAX_EXPIRATION));
135 GNUNET_CRYPTO_eddsa_key_get_public (h->priv,
136 &ab.peer.public_key);
137 GNUNET_assert (GNUNET_OK ==
138 GNUNET_CRYPTO_eddsa_sign (h->priv,
142 GNUNET_STATISTICS_update (h->stats, "# regex accepting blocks stored",
144 GNUNET_STATISTICS_update (h->stats, "# regex accepting block bytes stored",
145 sizeof (struct RegexAcceptBlock), GNUNET_NO);
147 GNUNET_DHT_put (h->dht, key,
149 DHT_OPT | GNUNET_DHT_RO_RECORD_ROUTE,
150 GNUNET_BLOCK_TYPE_REGEX_ACCEPT,
153 GNUNET_TIME_relative_to_absolute (DHT_TTL),
156 block = REGEX_BLOCK_create (proof,
163 (void) GNUNET_DHT_put (h->dht,
167 GNUNET_BLOCK_TYPE_REGEX,
170 GNUNET_TIME_relative_to_absolute (DHT_TTL),
173 GNUNET_STATISTICS_update (h->stats,
174 "# regex blocks stored",
177 GNUNET_STATISTICS_update (h->stats,
178 "# regex block bytes stored",
186 * Announce a regular expression: put all states of the automaton in the DHT.
187 * Does not free resources, must call #REGEX_INTERNAL_announce_cancel() for that.
189 * @param dht An existing and valid DHT service handle. CANNOT be NULL.
190 * @param priv our private key, must remain valid until the announcement is cancelled
191 * @param regex Regular expression to announce.
192 * @param compression How many characters per edge can we squeeze?
193 * @param stats Optional statistics handle to report usage. Can be NULL.
194 * @return Handle to reuse o free cached resources.
195 * Must be freed by calling #REGEX_INTERNAL_announce_cancel().
197 struct REGEX_INTERNAL_Announcement *
198 REGEX_INTERNAL_announce (struct GNUNET_DHT_Handle *dht,
199 const struct GNUNET_CRYPTO_EddsaPrivateKey *priv,
201 uint16_t compression,
202 struct GNUNET_STATISTICS_Handle *stats)
204 struct REGEX_INTERNAL_Announcement *h;
206 GNUNET_assert (NULL != dht);
207 h = GNUNET_new (struct REGEX_INTERNAL_Announcement);
212 h->dfa = REGEX_INTERNAL_construct_dfa (regex, strlen (regex), compression);
213 REGEX_INTERNAL_reannounce (h);
219 * Announce again a regular expression previously announced.
220 * Does use caching to speed up process.
222 * @param h Handle returned by a previous #REGEX_INTERNAL_announce call().
225 REGEX_INTERNAL_reannounce (struct REGEX_INTERNAL_Announcement *h)
227 GNUNET_assert (NULL != h->dfa); /* make sure to call announce first */
228 LOG (GNUNET_ERROR_TYPE_INFO,
229 "REGEX_INTERNAL_reannounce: %s\n",
231 REGEX_INTERNAL_iterate_reachable_edges (h->dfa,
238 * Clear all cached data used by a regex announce.
239 * Does not close DHT connection.
241 * @param h Handle returned by a previous #REGEX_INTERNAL_announce() call.
244 REGEX_INTERNAL_announce_cancel (struct REGEX_INTERNAL_Announcement *h)
246 REGEX_INTERNAL_automaton_destroy (h->dfa);
251 /******************************************************************************/
255 * Struct to keep state of running searches that have consumed a part of
258 struct RegexSearchContext
261 * Part of the description already consumed by
262 * this particular search branch.
267 * Information about the search.
269 struct REGEX_INTERNAL_Search *info;
272 * We just want to look for one edge, the longer the better.
275 unsigned int longest_match;
278 * Destination hash of the longest match.
280 struct GNUNET_HashCode hash;
285 * Type of values in `dht_get_results`.
290 * Number of bytes in data.
295 * The raw result data.
302 * Struct to keep information of searches of services described by a regex
303 * using a user-provided string service description.
305 struct REGEX_INTERNAL_Search
308 * DHT handle to use, must be initialized externally.
310 struct GNUNET_DHT_Handle *dht;
313 * Optional statistics handle to report usage. Can be NULL.
315 struct GNUNET_STATISTICS_Handle *stats;
318 * User provided description of the searched service.
325 struct GNUNET_CONTAINER_MultiHashMap *dht_get_handles;
328 * Results from running DHT GETs, values are of type
331 struct GNUNET_CONTAINER_MultiHashMap *dht_get_results;
334 * Contexts, for each running DHT GET. Free all on end of search.
336 struct RegexSearchContext **contexts;
339 * Number of contexts (branches/steps in search).
341 unsigned int n_contexts;
344 * @param callback Callback for found peers.
346 REGEX_INTERNAL_Found callback;
349 * @param callback_cls Closure for @c callback.
356 * Jump to the next edge, with the longest matching token.
358 * @param block Block found in the DHT.
359 * @param size Size of the block.
360 * @param ctx Context of the search.
363 regex_next_edge (const struct RegexBlock *block,
365 struct RegexSearchContext *ctx);
369 * Function to process DHT string to regex matching.
370 * Called on each result obtained for the DHT search.
372 * @param cls Closure (search context).
373 * @param exp When will this value expire.
374 * @param key Key of the result.
375 * @param get_path Path of the get request.
376 * @param get_path_length Lenght of get_path.
377 * @param put_path Path of the put request.
378 * @param put_path_length Length of the put_path.
379 * @param type Type of the result.
380 * @param size Number of bytes in data.
381 * @param data Pointer to the result data.
384 dht_get_string_accept_handler (void *cls, struct GNUNET_TIME_Absolute exp,
385 const struct GNUNET_HashCode *key,
386 const struct GNUNET_PeerIdentity *get_path,
387 unsigned int get_path_length,
388 const struct GNUNET_PeerIdentity *put_path,
389 unsigned int put_path_length,
390 enum GNUNET_BLOCK_Type type,
391 size_t size, const void *data)
393 const struct RegexAcceptBlock *block = data;
394 struct RegexSearchContext *ctx = cls;
395 struct REGEX_INTERNAL_Search *info = ctx->info;
397 LOG (GNUNET_ERROR_TYPE_DEBUG,
398 "Regex result accept for %s (key %s)\n",
399 info->description, GNUNET_h2s(key));
401 GNUNET_STATISTICS_update (info->stats,
402 "# regex accepting blocks found",
404 GNUNET_STATISTICS_update (info->stats,
405 "# regex accepting block bytes found",
407 info->callback (info->callback_cls,
409 get_path, get_path_length,
410 put_path, put_path_length);
415 * Find a path to a peer that offers a regex service compatible
416 * with a given string.
418 * @param key The key of the accepting state.
419 * @param ctx Context containing info about the string, tunnel, etc.
422 regex_find_path (const struct GNUNET_HashCode *key,
423 struct RegexSearchContext *ctx)
425 struct GNUNET_DHT_GetHandle *get_h;
427 LOG (GNUNET_ERROR_TYPE_DEBUG,
428 "Accept state found, now searching for paths to %s\n",
430 (unsigned int) ctx->position);
431 get_h = GNUNET_DHT_get_start (ctx->info->dht, /* handle */
432 GNUNET_BLOCK_TYPE_REGEX_ACCEPT, /* type */
433 key, /* key to search */
434 DHT_REPLICATION, /* replication level */
435 DHT_OPT | GNUNET_DHT_RO_RECORD_ROUTE,
436 NULL, /* xquery */ // FIXME BLOOMFILTER
437 0, /* xquery bits */ // FIXME BLOOMFILTER SIZE
438 &dht_get_string_accept_handler, ctx);
439 GNUNET_break (GNUNET_OK ==
440 GNUNET_CONTAINER_multihashmap_put(ctx->info->dht_get_handles,
443 GNUNET_CONTAINER_MULTIHASHMAPOPTION_MULTIPLE));
448 * Function to process DHT string to regex matching.
449 * Called on each result obtained for the DHT search.
451 * @param cls closure (search context)
452 * @param exp when will this value expire
453 * @param key key of the result
454 * @param get_path path of the get request (not used)
455 * @param get_path_length length of @a get_path (not used)
456 * @param put_path path of the put request (not used)
457 * @param put_path_length length of the @a put_path (not used)
458 * @param type type of the result
459 * @param size number of bytes in data
460 * @param data pointer to the result data
462 * TODO: re-issue the request after certain time? cancel after X results?
465 dht_get_string_handler (void *cls, struct GNUNET_TIME_Absolute exp,
466 const struct GNUNET_HashCode *key,
467 const struct GNUNET_PeerIdentity *get_path,
468 unsigned int get_path_length,
469 const struct GNUNET_PeerIdentity *put_path,
470 unsigned int put_path_length,
471 enum GNUNET_BLOCK_Type type,
472 size_t size, const void *data)
474 const struct RegexBlock *block = data;
475 struct RegexSearchContext *ctx = cls;
476 struct REGEX_INTERNAL_Search *info = ctx->info;
480 LOG (GNUNET_ERROR_TYPE_INFO,
481 "DHT GET result for %s (%s)\n",
482 GNUNET_h2s (key), ctx->info->description);
483 copy = GNUNET_malloc (sizeof (struct Result) + size);
485 copy->data = ©[1];
486 GNUNET_memcpy (©[1], block, size);
487 GNUNET_break (GNUNET_OK ==
488 GNUNET_CONTAINER_multihashmap_put (info->dht_get_results,
490 GNUNET_CONTAINER_MULTIHASHMAPOPTION_MULTIPLE));
491 len = strlen (info->description);
492 if (len == ctx->position) // String processed
494 if (GNUNET_YES == GNUNET_BLOCK_is_accepting (block, size))
496 regex_find_path (key, ctx);
500 LOG (GNUNET_ERROR_TYPE_INFO, "block not accepting!\n");
501 /* FIXME REGEX this block not successful, wait for more? start timeout? */
505 regex_next_edge (block, size, ctx);
510 * Iterator over found existing cadet regex blocks that match an ongoing search.
512 * @param cls Closure (current context)-
513 * @param key Current key code (key for cached block).
514 * @param value Value in the hash map (cached RegexBlock).
515 * @return #GNUNET_YES: we should always continue to iterate.
518 regex_result_iterator (void *cls,
519 const struct GNUNET_HashCode * key,
522 struct Result *result = value;
523 const struct RegexBlock *block = result->data;
524 struct RegexSearchContext *ctx = cls;
527 GNUNET_BLOCK_is_accepting (block, result->size)) &&
528 (ctx->position == strlen (ctx->info->description)) )
530 LOG (GNUNET_ERROR_TYPE_INFO,
531 "Found accepting known block\n");
532 regex_find_path (key, ctx);
533 return GNUNET_YES; // We found an accept state!
535 LOG (GNUNET_ERROR_TYPE_DEBUG,
538 strlen (ctx->info->description),
539 GNUNET_BLOCK_is_accepting (block, result->size));
540 regex_next_edge (block, result->size, ctx);
542 GNUNET_STATISTICS_update (ctx->info->stats, "# regex cadet blocks iterated",
550 * Iterator over edges in a regex block retrieved from the DHT.
552 * @param cls Closure (context of the search).
553 * @param token Token that follows to next state.
554 * @param len Lenght of token.
555 * @param key Hash of next state.
556 * @return #GNUNET_YES if should keep iterating, #GNUNET_NO otherwise.
559 regex_edge_iterator (void *cls,
562 const struct GNUNET_HashCode *key)
564 struct RegexSearchContext *ctx = cls;
565 struct REGEX_INTERNAL_Search *info = ctx->info;
569 GNUNET_STATISTICS_update (info->stats, "# regex edges iterated",
571 current = &info->description[ctx->position];
572 current_len = strlen (info->description) - ctx->position;
573 if (len > current_len)
575 LOG (GNUNET_ERROR_TYPE_DEBUG, "Token too long, END\n");
578 if (0 != strncmp (current, token, len))
580 LOG (GNUNET_ERROR_TYPE_DEBUG, "Token doesn't match, END\n");
584 if (len > ctx->longest_match)
586 LOG (GNUNET_ERROR_TYPE_DEBUG, "Token is longer, KEEP\n");
587 ctx->longest_match = len;
592 LOG (GNUNET_ERROR_TYPE_DEBUG, "Token is not longer, IGNORE\n");
595 LOG (GNUNET_ERROR_TYPE_DEBUG, "* End of regex edge iterator\n");
601 * Jump to the next edge, with the longest matching token.
603 * @param block Block found in the DHT.
604 * @param size Size of the block.
605 * @param ctx Context of the search.
608 regex_next_edge (const struct RegexBlock *block,
610 struct RegexSearchContext *ctx)
612 struct RegexSearchContext *new_ctx;
613 struct REGEX_INTERNAL_Search *info = ctx->info;
614 struct GNUNET_DHT_GetHandle *get_h;
615 struct GNUNET_HashCode *hash;
619 LOG (GNUNET_ERROR_TYPE_DEBUG, "Next edge\n");
620 /* Find the longest match for the current string position,
621 * among tokens in the given block */
622 ctx->longest_match = 0;
623 result = REGEX_BLOCK_iterate (block, size,
624 ®ex_edge_iterator, ctx);
625 GNUNET_break (GNUNET_OK == result);
627 /* Did anything match? */
628 if (0 == ctx->longest_match)
630 LOG (GNUNET_ERROR_TYPE_DEBUG,
631 "no match in block\n");
636 new_ctx = GNUNET_new (struct RegexSearchContext);
637 new_ctx->info = info;
638 new_ctx->position = ctx->position + ctx->longest_match;
639 GNUNET_array_append (info->contexts, info->n_contexts, new_ctx);
641 /* Check whether we already have a DHT GET running for it */
643 GNUNET_CONTAINER_multihashmap_contains (info->dht_get_handles, hash))
645 LOG (GNUNET_ERROR_TYPE_DEBUG,
646 "GET for %s running, END\n",
648 GNUNET_CONTAINER_multihashmap_get_multiple (info->dht_get_results,
650 ®ex_result_iterator,
652 return; /* We are already looking for it */
655 GNUNET_STATISTICS_update (info->stats, "# regex nodes traversed",
658 LOG (GNUNET_ERROR_TYPE_DEBUG,
659 "Following edges at %s for offset %u in `%s'\n",
661 (unsigned int) ctx->position,
663 rest = &new_ctx->info->description[new_ctx->position];
665 GNUNET_DHT_get_start (info->dht, /* handle */
666 GNUNET_BLOCK_TYPE_REGEX, /* type */
667 hash, /* key to search */
668 DHT_REPLICATION, /* replication level */
671 strlen (rest) + 1, /* xquery bits */
672 &dht_get_string_handler, new_ctx);
674 GNUNET_CONTAINER_multihashmap_put(info->dht_get_handles,
677 GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_FAST))
686 * Search for a peer offering a regex matching certain string in the DHT.
687 * The search runs until #REGEX_INTERNAL_search_cancel() is called, even if results
690 * @param dht An existing and valid DHT service handle.
691 * @param string String to match against the regexes in the DHT.
692 * @param callback Callback for found peers.
693 * @param callback_cls Closure for @c callback.
694 * @param stats Optional statistics handle to report usage. Can be NULL.
695 * @return Handle to stop search and free resources.
696 * Must be freed by calling #REGEX_INTERNAL_search_cancel().
698 struct REGEX_INTERNAL_Search *
699 REGEX_INTERNAL_search (struct GNUNET_DHT_Handle *dht,
701 REGEX_INTERNAL_Found callback,
703 struct GNUNET_STATISTICS_Handle *stats)
705 struct REGEX_INTERNAL_Search *h;
706 struct GNUNET_DHT_GetHandle *get_h;
707 struct RegexSearchContext *ctx;
708 struct GNUNET_HashCode key;
712 /* Initialize handle */
713 GNUNET_assert (NULL != dht);
714 GNUNET_assert (NULL != callback);
715 h = GNUNET_new (struct REGEX_INTERNAL_Search);
717 h->description = GNUNET_strdup (string);
718 h->callback = callback;
719 h->callback_cls = callback_cls;
721 h->dht_get_handles = GNUNET_CONTAINER_multihashmap_create (32, GNUNET_NO);
722 h->dht_get_results = GNUNET_CONTAINER_multihashmap_create (32, GNUNET_NO);
724 /* Initialize context */
725 len = strlen (string);
726 size = REGEX_INTERNAL_get_first_key (string, len, &key);
727 LOG (GNUNET_ERROR_TYPE_INFO,
728 "Initial key for `%s' is %s (based on `%.*s')\n",
733 ctx = GNUNET_new (struct RegexSearchContext);
734 ctx->position = size;
736 GNUNET_array_append (h->contexts,
739 /* Start search in DHT */
740 get_h = GNUNET_DHT_get_start (h->dht, /* handle */
741 GNUNET_BLOCK_TYPE_REGEX, /* type */
742 &key, /* key to search */
743 DHT_REPLICATION, /* replication level */
745 &h->description[size], /* xquery */
746 // FIXME add BLOOMFILTER to exclude filtered peers
747 len + 1 - size, /* xquery bits */
748 // FIXME add BLOOMFILTER SIZE
749 &dht_get_string_handler, ctx);
752 GNUNET_CONTAINER_multihashmap_put (h->dht_get_handles,
755 GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_FAST)
763 * Iterator over hash map entries to cancel DHT GET requests after a
764 * successful connect_by_string.
766 * @param cls Closure (unused).
767 * @param key Current key code (unused).
768 * @param value Value in the hash map (get handle).
769 * @return #GNUNET_YES if we should continue to iterate,
773 regex_cancel_dht_get (void *cls,
774 const struct GNUNET_HashCode * key,
777 struct GNUNET_DHT_GetHandle *h = value;
779 GNUNET_DHT_get_stop (h);
785 * Iterator over hash map entries to free CadetRegexBlocks stored during the
786 * search for connect_by_string.
788 * @param cls Closure (unused).
789 * @param key Current key code (unused).
790 * @param value CadetRegexBlock in the hash map.
791 * @return #GNUNET_YES if we should continue to iterate,
795 regex_free_result (void *cls,
796 const struct GNUNET_HashCode * key,
805 * Cancel an ongoing regex search in the DHT and free all resources.
807 * @param h the search context.
810 REGEX_INTERNAL_search_cancel (struct REGEX_INTERNAL_Search *h)
814 GNUNET_free (h->description);
815 GNUNET_CONTAINER_multihashmap_iterate (h->dht_get_handles,
816 ®ex_cancel_dht_get, NULL);
817 GNUNET_CONTAINER_multihashmap_iterate (h->dht_get_results,
818 ®ex_free_result, NULL);
819 GNUNET_CONTAINER_multihashmap_destroy (h->dht_get_results);
820 GNUNET_CONTAINER_multihashmap_destroy (h->dht_get_handles);
821 if (0 < h->n_contexts)
823 for (i = 0; i < h->n_contexts; i++)
824 GNUNET_free (h->contexts[i]);
825 GNUNET_free (h->contexts);
831 /* end of regex_internal_dht.c */