2 This file is part of GNUnet
3 Copyright (C) 2012, 2015 GNUnet e.V.
5 GNUnet is free software: you can redistribute it and/or modify it
6 under the terms of the GNU Affero General Public License as published
7 by the Free Software Foundation, either version 3 of the License,
8 or (at your option) any later version.
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Affero General Public License for more details.
15 You should have received a copy of the GNU Affero General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>.
18 SPDX-License-Identifier: AGPL3.0-or-later
21 * @file src/regex/regex_internal_dht.c
22 * @brief library to announce regexes in the network and match strings
23 * against published regexes.
24 * @author Bartlomiej Polot
27 #include "regex_internal_lib.h"
28 #include "regex_block_lib.h"
29 #include "gnunet_dht_service.h"
30 #include "gnunet_statistics_service.h"
31 #include "gnunet_constants.h"
32 #include "gnunet_signatures.h"
35 #define LOG(kind, ...) GNUNET_log_from (kind, "regex-dht", __VA_ARGS__)
38 * DHT replication level to use.
40 #define DHT_REPLICATION 5
43 * DHT record lifetime to use.
45 #define DHT_TTL GNUNET_TIME_UNIT_HOURS
50 #define DHT_OPT GNUNET_DHT_RO_DEMULTIPLEX_EVERYWHERE
54 * Handle to store cached data about a regex announce.
56 struct REGEX_INTERNAL_Announcement
59 * DHT handle to use, must be initialized externally.
61 struct GNUNET_DHT_Handle *dht;
69 * Automaton representation of the regex (expensive to build).
71 struct REGEX_INTERNAL_Automaton *dfa;
76 const struct GNUNET_CRYPTO_EddsaPrivateKey *priv;
79 * Optional statistics handle to report usage. Can be NULL.
81 struct GNUNET_STATISTICS_Handle *stats;
86 * Regex callback iterator to store own service description in the DHT.
89 * @param key hash for current state.
90 * @param proof proof for current state.
91 * @param accepting #GNUNET_YES if this is an accepting state, #GNUNET_NO if not.
92 * @param num_edges number of edges leaving current state.
93 * @param edges edges leaving current state.
96 regex_iterator (void *cls,
97 const struct GNUNET_HashCode *key,
100 unsigned int num_edges,
101 const struct REGEX_BLOCK_Edge *edges)
103 struct REGEX_INTERNAL_Announcement *h = cls;
104 struct RegexBlock *block;
108 LOG (GNUNET_ERROR_TYPE_INFO,
109 "DHT PUT for state %s with proof `%s' and %u edges:\n",
113 for (i = 0; i < num_edges; i++)
115 LOG (GNUNET_ERROR_TYPE_INFO,
116 "Edge %u `%s' towards %s\n",
119 GNUNET_h2s (&edges[i].destination));
121 if (GNUNET_YES == accepting)
123 struct RegexAcceptBlock ab;
125 LOG (GNUNET_ERROR_TYPE_INFO,
126 "State %s is accepting, putting own id\n",
128 size = sizeof(struct RegexAcceptBlock);
129 ab.purpose.size = ntohl (sizeof(struct GNUNET_CRYPTO_EccSignaturePurpose)
130 + sizeof(struct GNUNET_TIME_AbsoluteNBO)
131 + sizeof(struct GNUNET_HashCode));
132 ab.purpose.purpose = ntohl (GNUNET_SIGNATURE_PURPOSE_REGEX_ACCEPT);
133 ab.expiration_time = GNUNET_TIME_absolute_hton (
134 GNUNET_TIME_relative_to_absolute (GNUNET_CONSTANTS_DHT_MAX_EXPIRATION));
136 GNUNET_CRYPTO_eddsa_key_get_public (h->priv,
137 &ab.peer.public_key);
138 GNUNET_assert (GNUNET_OK ==
139 GNUNET_CRYPTO_eddsa_sign (h->priv,
143 GNUNET_STATISTICS_update (h->stats, "# regex accepting blocks stored",
145 GNUNET_STATISTICS_update (h->stats, "# regex accepting block bytes stored",
146 sizeof(struct RegexAcceptBlock), GNUNET_NO);
148 GNUNET_DHT_put (h->dht, key,
150 DHT_OPT | GNUNET_DHT_RO_RECORD_ROUTE,
151 GNUNET_BLOCK_TYPE_REGEX_ACCEPT,
154 GNUNET_TIME_relative_to_absolute (DHT_TTL),
157 block = REGEX_BLOCK_create (proof,
164 (void) GNUNET_DHT_put (h->dht,
168 GNUNET_BLOCK_TYPE_REGEX,
171 GNUNET_TIME_relative_to_absolute (DHT_TTL),
174 GNUNET_STATISTICS_update (h->stats,
175 "# regex blocks stored",
178 GNUNET_STATISTICS_update (h->stats,
179 "# regex block bytes stored",
187 * Announce a regular expression: put all states of the automaton in the DHT.
188 * Does not free resources, must call #REGEX_INTERNAL_announce_cancel() for that.
190 * @param dht An existing and valid DHT service handle. CANNOT be NULL.
191 * @param priv our private key, must remain valid until the announcement is cancelled
192 * @param regex Regular expression to announce.
193 * @param compression How many characters per edge can we squeeze?
194 * @param stats Optional statistics handle to report usage. Can be NULL.
195 * @return Handle to reuse o free cached resources.
196 * Must be freed by calling #REGEX_INTERNAL_announce_cancel().
198 struct REGEX_INTERNAL_Announcement *
199 REGEX_INTERNAL_announce (struct GNUNET_DHT_Handle *dht,
200 const struct GNUNET_CRYPTO_EddsaPrivateKey *priv,
202 uint16_t compression,
203 struct GNUNET_STATISTICS_Handle *stats)
205 struct REGEX_INTERNAL_Announcement *h;
207 GNUNET_assert (NULL != dht);
208 h = GNUNET_new (struct REGEX_INTERNAL_Announcement);
213 h->dfa = REGEX_INTERNAL_construct_dfa (regex, strlen (regex), compression);
214 REGEX_INTERNAL_reannounce (h);
220 * Announce again a regular expression previously announced.
221 * Does use caching to speed up process.
223 * @param h Handle returned by a previous #REGEX_INTERNAL_announce call().
226 REGEX_INTERNAL_reannounce (struct REGEX_INTERNAL_Announcement *h)
228 GNUNET_assert (NULL != h->dfa); /* make sure to call announce first */
229 LOG (GNUNET_ERROR_TYPE_INFO,
230 "REGEX_INTERNAL_reannounce: %s\n",
232 REGEX_INTERNAL_iterate_reachable_edges (h->dfa,
239 * Clear all cached data used by a regex announce.
240 * Does not close DHT connection.
242 * @param h Handle returned by a previous #REGEX_INTERNAL_announce() call.
245 REGEX_INTERNAL_announce_cancel (struct REGEX_INTERNAL_Announcement *h)
247 REGEX_INTERNAL_automaton_destroy (h->dfa);
252 /******************************************************************************/
256 * Struct to keep state of running searches that have consumed a part of
259 struct RegexSearchContext
262 * Part of the description already consumed by
263 * this particular search branch.
268 * Information about the search.
270 struct REGEX_INTERNAL_Search *info;
273 * We just want to look for one edge, the longer the better.
276 unsigned int longest_match;
279 * Destination hash of the longest match.
281 struct GNUNET_HashCode hash;
286 * Type of values in `dht_get_results`.
291 * Number of bytes in data.
296 * The raw result data.
303 * Struct to keep information of searches of services described by a regex
304 * using a user-provided string service description.
306 struct REGEX_INTERNAL_Search
309 * DHT handle to use, must be initialized externally.
311 struct GNUNET_DHT_Handle *dht;
314 * Optional statistics handle to report usage. Can be NULL.
316 struct GNUNET_STATISTICS_Handle *stats;
319 * User provided description of the searched service.
326 struct GNUNET_CONTAINER_MultiHashMap *dht_get_handles;
329 * Results from running DHT GETs, values are of type
332 struct GNUNET_CONTAINER_MultiHashMap *dht_get_results;
335 * Contexts, for each running DHT GET. Free all on end of search.
337 struct RegexSearchContext **contexts;
340 * Number of contexts (branches/steps in search).
342 unsigned int n_contexts;
345 * @param callback Callback for found peers.
347 REGEX_INTERNAL_Found callback;
350 * @param callback_cls Closure for @c callback.
357 * Jump to the next edge, with the longest matching token.
359 * @param block Block found in the DHT.
360 * @param size Size of the block.
361 * @param ctx Context of the search.
364 regex_next_edge (const struct RegexBlock *block,
366 struct RegexSearchContext *ctx);
370 * Function to process DHT string to regex matching.
371 * Called on each result obtained for the DHT search.
373 * @param cls Closure (search context).
374 * @param exp When will this value expire.
375 * @param key Key of the result.
376 * @param get_path Path of the get request.
377 * @param get_path_length Lenght of get_path.
378 * @param put_path Path of the put request.
379 * @param put_path_length Length of the put_path.
380 * @param type Type of the result.
381 * @param size Number of bytes in data.
382 * @param data Pointer to the result data.
385 dht_get_string_accept_handler (void *cls, struct GNUNET_TIME_Absolute exp,
386 const struct GNUNET_HashCode *key,
387 const struct GNUNET_PeerIdentity *get_path,
388 unsigned int get_path_length,
389 const struct GNUNET_PeerIdentity *put_path,
390 unsigned int put_path_length,
391 enum GNUNET_BLOCK_Type type,
392 size_t size, const void *data)
394 const struct RegexAcceptBlock *block = data;
395 struct RegexSearchContext *ctx = cls;
396 struct REGEX_INTERNAL_Search *info = ctx->info;
398 LOG (GNUNET_ERROR_TYPE_DEBUG,
399 "Regex result accept for %s (key %s)\n",
400 info->description, GNUNET_h2s (key));
402 GNUNET_STATISTICS_update (info->stats,
403 "# regex accepting blocks found",
405 GNUNET_STATISTICS_update (info->stats,
406 "# regex accepting block bytes found",
408 info->callback (info->callback_cls,
410 get_path, get_path_length,
411 put_path, put_path_length);
416 * Find a path to a peer that offers a regex service compatible
417 * with a given string.
419 * @param key The key of the accepting state.
420 * @param ctx Context containing info about the string, tunnel, etc.
423 regex_find_path (const struct GNUNET_HashCode *key,
424 struct RegexSearchContext *ctx)
426 struct GNUNET_DHT_GetHandle *get_h;
428 LOG (GNUNET_ERROR_TYPE_DEBUG,
429 "Accept state found, now searching for paths to %s\n",
431 (unsigned int) ctx->position);
432 get_h = GNUNET_DHT_get_start (ctx->info->dht, /* handle */
433 GNUNET_BLOCK_TYPE_REGEX_ACCEPT, /* type */
434 key, /* key to search */
435 DHT_REPLICATION, /* replication level */
436 DHT_OPT | GNUNET_DHT_RO_RECORD_ROUTE,
437 NULL, /* xquery */ // FIXME BLOOMFILTER
438 0, /* xquery bits */ // FIXME BLOOMFILTER SIZE
439 &dht_get_string_accept_handler, ctx);
440 GNUNET_break (GNUNET_OK ==
441 GNUNET_CONTAINER_multihashmap_put (ctx->info->dht_get_handles,
444 GNUNET_CONTAINER_MULTIHASHMAPOPTION_MULTIPLE));
449 * Function to process DHT string to regex matching.
450 * Called on each result obtained for the DHT search.
452 * @param cls closure (search context)
453 * @param exp when will this value expire
454 * @param key key of the result
455 * @param get_path path of the get request (not used)
456 * @param get_path_length length of @a get_path (not used)
457 * @param put_path path of the put request (not used)
458 * @param put_path_length length of the @a put_path (not used)
459 * @param type type of the result
460 * @param size number of bytes in data
461 * @param data pointer to the result data
463 * TODO: re-issue the request after certain time? cancel after X results?
466 dht_get_string_handler (void *cls, struct GNUNET_TIME_Absolute exp,
467 const struct GNUNET_HashCode *key,
468 const struct GNUNET_PeerIdentity *get_path,
469 unsigned int get_path_length,
470 const struct GNUNET_PeerIdentity *put_path,
471 unsigned int put_path_length,
472 enum GNUNET_BLOCK_Type type,
473 size_t size, const void *data)
475 const struct RegexBlock *block = data;
476 struct RegexSearchContext *ctx = cls;
477 struct REGEX_INTERNAL_Search *info = ctx->info;
481 LOG (GNUNET_ERROR_TYPE_INFO,
482 "DHT GET result for %s (%s)\n",
483 GNUNET_h2s (key), ctx->info->description);
484 copy = GNUNET_malloc (sizeof(struct Result) + size);
486 copy->data = ©[1];
487 GNUNET_memcpy (©[1], block, size);
488 GNUNET_break (GNUNET_OK ==
489 GNUNET_CONTAINER_multihashmap_put (info->dht_get_results,
491 GNUNET_CONTAINER_MULTIHASHMAPOPTION_MULTIPLE));
492 len = strlen (info->description);
493 if (len == ctx->position) // String processed
495 if (GNUNET_YES == GNUNET_BLOCK_is_accepting (block, size))
497 regex_find_path (key, ctx);
501 LOG (GNUNET_ERROR_TYPE_INFO, "block not accepting!\n");
502 /* FIXME REGEX this block not successful, wait for more? start timeout? */
506 regex_next_edge (block, size, ctx);
511 * Iterator over found existing cadet regex blocks that match an ongoing search.
513 * @param cls Closure (current context)-
514 * @param key Current key code (key for cached block).
515 * @param value Value in the hash map (cached RegexBlock).
516 * @return #GNUNET_YES: we should always continue to iterate.
519 regex_result_iterator (void *cls,
520 const struct GNUNET_HashCode *key,
523 struct Result *result = value;
524 const struct RegexBlock *block = result->data;
525 struct RegexSearchContext *ctx = cls;
528 GNUNET_BLOCK_is_accepting (block, result->size)) &&
529 (ctx->position == strlen (ctx->info->description)))
531 LOG (GNUNET_ERROR_TYPE_INFO,
532 "Found accepting known block\n");
533 regex_find_path (key, ctx);
534 return GNUNET_YES; // We found an accept state!
536 LOG (GNUNET_ERROR_TYPE_DEBUG,
539 strlen (ctx->info->description),
540 GNUNET_BLOCK_is_accepting (block, result->size));
541 regex_next_edge (block, result->size, ctx);
543 GNUNET_STATISTICS_update (ctx->info->stats, "# regex cadet blocks iterated",
551 * Iterator over edges in a regex block retrieved from the DHT.
553 * @param cls Closure (context of the search).
554 * @param token Token that follows to next state.
555 * @param len Lenght of token.
556 * @param key Hash of next state.
557 * @return #GNUNET_YES if should keep iterating, #GNUNET_NO otherwise.
560 regex_edge_iterator (void *cls,
563 const struct GNUNET_HashCode *key)
565 struct RegexSearchContext *ctx = cls;
566 struct REGEX_INTERNAL_Search *info = ctx->info;
570 GNUNET_STATISTICS_update (info->stats, "# regex edges iterated",
572 current = &info->description[ctx->position];
573 current_len = strlen (info->description) - ctx->position;
574 if (len > current_len)
576 LOG (GNUNET_ERROR_TYPE_DEBUG, "Token too long, END\n");
579 if (0 != strncmp (current, token, len))
581 LOG (GNUNET_ERROR_TYPE_DEBUG, "Token doesn't match, END\n");
585 if (len > ctx->longest_match)
587 LOG (GNUNET_ERROR_TYPE_DEBUG, "Token is longer, KEEP\n");
588 ctx->longest_match = len;
593 LOG (GNUNET_ERROR_TYPE_DEBUG, "Token is not longer, IGNORE\n");
596 LOG (GNUNET_ERROR_TYPE_DEBUG, "* End of regex edge iterator\n");
602 * Jump to the next edge, with the longest matching token.
604 * @param block Block found in the DHT.
605 * @param size Size of the block.
606 * @param ctx Context of the search.
609 regex_next_edge (const struct RegexBlock *block,
611 struct RegexSearchContext *ctx)
613 struct RegexSearchContext *new_ctx;
614 struct REGEX_INTERNAL_Search *info = ctx->info;
615 struct GNUNET_DHT_GetHandle *get_h;
616 struct GNUNET_HashCode *hash;
620 LOG (GNUNET_ERROR_TYPE_DEBUG, "Next edge\n");
621 /* Find the longest match for the current string position,
622 * among tokens in the given block */
623 ctx->longest_match = 0;
624 result = REGEX_BLOCK_iterate (block, size,
625 ®ex_edge_iterator, ctx);
626 GNUNET_break (GNUNET_OK == result);
628 /* Did anything match? */
629 if (0 == ctx->longest_match)
631 LOG (GNUNET_ERROR_TYPE_DEBUG,
632 "no match in block\n");
637 new_ctx = GNUNET_new (struct RegexSearchContext);
638 new_ctx->info = info;
639 new_ctx->position = ctx->position + ctx->longest_match;
640 GNUNET_array_append (info->contexts, info->n_contexts, new_ctx);
642 /* Check whether we already have a DHT GET running for it */
644 GNUNET_CONTAINER_multihashmap_contains (info->dht_get_handles, hash))
646 LOG (GNUNET_ERROR_TYPE_DEBUG,
647 "GET for %s running, END\n",
649 GNUNET_CONTAINER_multihashmap_get_multiple (info->dht_get_results,
651 ®ex_result_iterator,
653 return; /* We are already looking for it */
656 GNUNET_STATISTICS_update (info->stats, "# regex nodes traversed",
659 LOG (GNUNET_ERROR_TYPE_DEBUG,
660 "Following edges at %s for offset %u in `%s'\n",
662 (unsigned int) ctx->position,
664 rest = &new_ctx->info->description[new_ctx->position];
666 GNUNET_DHT_get_start (info->dht, /* handle */
667 GNUNET_BLOCK_TYPE_REGEX, /* type */
668 hash, /* key to search */
669 DHT_REPLICATION, /* replication level */
672 strlen (rest) + 1, /* xquery bits */
673 &dht_get_string_handler, new_ctx);
675 GNUNET_CONTAINER_multihashmap_put (info->dht_get_handles,
678 GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_FAST))
687 * Search for a peer offering a regex matching certain string in the DHT.
688 * The search runs until #REGEX_INTERNAL_search_cancel() is called, even if results
691 * @param dht An existing and valid DHT service handle.
692 * @param string String to match against the regexes in the DHT.
693 * @param callback Callback for found peers.
694 * @param callback_cls Closure for @c callback.
695 * @param stats Optional statistics handle to report usage. Can be NULL.
696 * @return Handle to stop search and free resources.
697 * Must be freed by calling #REGEX_INTERNAL_search_cancel().
699 struct REGEX_INTERNAL_Search *
700 REGEX_INTERNAL_search (struct GNUNET_DHT_Handle *dht,
702 REGEX_INTERNAL_Found callback,
704 struct GNUNET_STATISTICS_Handle *stats)
706 struct REGEX_INTERNAL_Search *h;
707 struct GNUNET_DHT_GetHandle *get_h;
708 struct RegexSearchContext *ctx;
709 struct GNUNET_HashCode key;
713 /* Initialize handle */
714 GNUNET_assert (NULL != dht);
715 GNUNET_assert (NULL != callback);
716 h = GNUNET_new (struct REGEX_INTERNAL_Search);
718 h->description = GNUNET_strdup (string);
719 h->callback = callback;
720 h->callback_cls = callback_cls;
722 h->dht_get_handles = GNUNET_CONTAINER_multihashmap_create (32, GNUNET_NO);
723 h->dht_get_results = GNUNET_CONTAINER_multihashmap_create (32, GNUNET_NO);
725 /* Initialize context */
726 len = strlen (string);
727 size = REGEX_INTERNAL_get_first_key (string, len, &key);
728 LOG (GNUNET_ERROR_TYPE_INFO,
729 "Initial key for `%s' is %s (based on `%.*s')\n",
734 ctx = GNUNET_new (struct RegexSearchContext);
735 ctx->position = size;
737 GNUNET_array_append (h->contexts,
740 /* Start search in DHT */
741 get_h = GNUNET_DHT_get_start (h->dht, /* handle */
742 GNUNET_BLOCK_TYPE_REGEX, /* type */
743 &key, /* key to search */
744 DHT_REPLICATION, /* replication level */
746 &h->description[size], /* xquery */
747 // FIXME add BLOOMFILTER to exclude filtered peers
748 len + 1 - size, /* xquery bits */
749 // FIXME add BLOOMFILTER SIZE
750 &dht_get_string_handler, ctx);
753 GNUNET_CONTAINER_multihashmap_put (h->dht_get_handles,
756 GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_FAST)
764 * Iterator over hash map entries to cancel DHT GET requests after a
765 * successful connect_by_string.
767 * @param cls Closure (unused).
768 * @param key Current key code (unused).
769 * @param value Value in the hash map (get handle).
770 * @return #GNUNET_YES if we should continue to iterate,
774 regex_cancel_dht_get (void *cls,
775 const struct GNUNET_HashCode *key,
778 struct GNUNET_DHT_GetHandle *h = value;
780 GNUNET_DHT_get_stop (h);
786 * Iterator over hash map entries to free CadetRegexBlocks stored during the
787 * search for connect_by_string.
789 * @param cls Closure (unused).
790 * @param key Current key code (unused).
791 * @param value CadetRegexBlock in the hash map.
792 * @return #GNUNET_YES if we should continue to iterate,
796 regex_free_result (void *cls,
797 const struct GNUNET_HashCode *key,
806 * Cancel an ongoing regex search in the DHT and free all resources.
808 * @param h the search context.
811 REGEX_INTERNAL_search_cancel (struct REGEX_INTERNAL_Search *h)
815 GNUNET_free (h->description);
816 GNUNET_CONTAINER_multihashmap_iterate (h->dht_get_handles,
817 ®ex_cancel_dht_get, NULL);
818 GNUNET_CONTAINER_multihashmap_iterate (h->dht_get_results,
819 ®ex_free_result, NULL);
820 GNUNET_CONTAINER_multihashmap_destroy (h->dht_get_results);
821 GNUNET_CONTAINER_multihashmap_destroy (h->dht_get_handles);
822 if (0 < h->n_contexts)
824 for (i = 0; i < h->n_contexts; i++)
825 GNUNET_free (h->contexts[i]);
826 GNUNET_free (h->contexts);
832 /* end of regex_internal_dht.c */