2 This file is part of GNUnet
3 Copyright (C) 2012, 2015 GNUnet e.V.
5 GNUnet is free software: you can redistribute it and/or modify it
6 under the terms of the GNU Affero General Public License as published
7 by the Free Software Foundation, either version 3 of the License,
8 or (at your option) any later version.
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Affero General Public License for more details.
15 You should have received a copy of the GNU Affero General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>.
18 SPDX-License-Identifier: AGPL3.0-or-later
21 * @file src/regex/regex_internal_dht.c
22 * @brief library to announce regexes in the network and match strings
23 * against published regexes.
24 * @author Bartlomiej Polot
27 #include "regex_internal_lib.h"
28 #include "regex_block_lib.h"
29 #include "gnunet_dht_service.h"
30 #include "gnunet_statistics_service.h"
31 #include "gnunet_constants.h"
32 #include "gnunet_signatures.h"
35 #define LOG(kind, ...) GNUNET_log_from(kind, "regex-dht", __VA_ARGS__)
38 * DHT replication level to use.
40 #define DHT_REPLICATION 5
43 * DHT record lifetime to use.
45 #define DHT_TTL GNUNET_TIME_UNIT_HOURS
50 #define DHT_OPT GNUNET_DHT_RO_DEMULTIPLEX_EVERYWHERE
54 * Handle to store cached data about a regex announce.
56 struct REGEX_INTERNAL_Announcement {
58 * DHT handle to use, must be initialized externally.
60 struct GNUNET_DHT_Handle *dht;
68 * Automaton representation of the regex (expensive to build).
70 struct REGEX_INTERNAL_Automaton *dfa;
75 const struct GNUNET_CRYPTO_EddsaPrivateKey *priv;
78 * Optional statistics handle to report usage. Can be NULL.
80 struct GNUNET_STATISTICS_Handle *stats;
85 * Regex callback iterator to store own service description in the DHT.
88 * @param key hash for current state.
89 * @param proof proof for current state.
90 * @param accepting #GNUNET_YES if this is an accepting state, #GNUNET_NO if not.
91 * @param num_edges number of edges leaving current state.
92 * @param edges edges leaving current state.
95 regex_iterator(void *cls,
96 const struct GNUNET_HashCode *key,
99 unsigned int num_edges,
100 const struct REGEX_BLOCK_Edge *edges)
102 struct REGEX_INTERNAL_Announcement *h = cls;
103 struct RegexBlock *block;
107 LOG(GNUNET_ERROR_TYPE_INFO,
108 "DHT PUT for state %s with proof `%s' and %u edges:\n",
112 for (i = 0; i < num_edges; i++)
114 LOG(GNUNET_ERROR_TYPE_INFO,
115 "Edge %u `%s' towards %s\n",
118 GNUNET_h2s(&edges[i].destination));
120 if (GNUNET_YES == accepting)
122 struct RegexAcceptBlock ab;
124 LOG(GNUNET_ERROR_TYPE_INFO,
125 "State %s is accepting, putting own id\n",
127 size = sizeof(struct RegexAcceptBlock);
128 ab.purpose.size = ntohl(sizeof(struct GNUNET_CRYPTO_EccSignaturePurpose) +
129 sizeof(struct GNUNET_TIME_AbsoluteNBO) +
130 sizeof(struct GNUNET_HashCode));
131 ab.purpose.purpose = ntohl(GNUNET_SIGNATURE_PURPOSE_REGEX_ACCEPT);
132 ab.expiration_time = GNUNET_TIME_absolute_hton(GNUNET_TIME_relative_to_absolute(GNUNET_CONSTANTS_DHT_MAX_EXPIRATION));
134 GNUNET_CRYPTO_eddsa_key_get_public(h->priv,
135 &ab.peer.public_key);
136 GNUNET_assert(GNUNET_OK ==
137 GNUNET_CRYPTO_eddsa_sign(h->priv,
141 GNUNET_STATISTICS_update(h->stats, "# regex accepting blocks stored",
143 GNUNET_STATISTICS_update(h->stats, "# regex accepting block bytes stored",
144 sizeof(struct RegexAcceptBlock), GNUNET_NO);
146 GNUNET_DHT_put(h->dht, key,
148 DHT_OPT | GNUNET_DHT_RO_RECORD_ROUTE,
149 GNUNET_BLOCK_TYPE_REGEX_ACCEPT,
152 GNUNET_TIME_relative_to_absolute(DHT_TTL),
155 block = REGEX_BLOCK_create(proof,
162 (void)GNUNET_DHT_put(h->dht,
166 GNUNET_BLOCK_TYPE_REGEX,
169 GNUNET_TIME_relative_to_absolute(DHT_TTL),
172 GNUNET_STATISTICS_update(h->stats,
173 "# regex blocks stored",
176 GNUNET_STATISTICS_update(h->stats,
177 "# regex block bytes stored",
185 * Announce a regular expression: put all states of the automaton in the DHT.
186 * Does not free resources, must call #REGEX_INTERNAL_announce_cancel() for that.
188 * @param dht An existing and valid DHT service handle. CANNOT be NULL.
189 * @param priv our private key, must remain valid until the announcement is cancelled
190 * @param regex Regular expression to announce.
191 * @param compression How many characters per edge can we squeeze?
192 * @param stats Optional statistics handle to report usage. Can be NULL.
193 * @return Handle to reuse o free cached resources.
194 * Must be freed by calling #REGEX_INTERNAL_announce_cancel().
196 struct REGEX_INTERNAL_Announcement *
197 REGEX_INTERNAL_announce(struct GNUNET_DHT_Handle *dht,
198 const struct GNUNET_CRYPTO_EddsaPrivateKey *priv,
200 uint16_t compression,
201 struct GNUNET_STATISTICS_Handle *stats)
203 struct REGEX_INTERNAL_Announcement *h;
205 GNUNET_assert(NULL != dht);
206 h = GNUNET_new(struct REGEX_INTERNAL_Announcement);
211 h->dfa = REGEX_INTERNAL_construct_dfa(regex, strlen(regex), compression);
212 REGEX_INTERNAL_reannounce(h);
218 * Announce again a regular expression previously announced.
219 * Does use caching to speed up process.
221 * @param h Handle returned by a previous #REGEX_INTERNAL_announce call().
224 REGEX_INTERNAL_reannounce(struct REGEX_INTERNAL_Announcement *h)
226 GNUNET_assert(NULL != h->dfa); /* make sure to call announce first */
227 LOG(GNUNET_ERROR_TYPE_INFO,
228 "REGEX_INTERNAL_reannounce: %s\n",
230 REGEX_INTERNAL_iterate_reachable_edges(h->dfa,
237 * Clear all cached data used by a regex announce.
238 * Does not close DHT connection.
240 * @param h Handle returned by a previous #REGEX_INTERNAL_announce() call.
243 REGEX_INTERNAL_announce_cancel(struct REGEX_INTERNAL_Announcement *h)
245 REGEX_INTERNAL_automaton_destroy(h->dfa);
250 /******************************************************************************/
254 * Struct to keep state of running searches that have consumed a part of
257 struct RegexSearchContext {
259 * Part of the description already consumed by
260 * this particular search branch.
265 * Information about the search.
267 struct REGEX_INTERNAL_Search *info;
270 * We just want to look for one edge, the longer the better.
273 unsigned int longest_match;
276 * Destination hash of the longest match.
278 struct GNUNET_HashCode hash;
283 * Type of values in `dht_get_results`.
287 * Number of bytes in data.
292 * The raw result data.
299 * Struct to keep information of searches of services described by a regex
300 * using a user-provided string service description.
302 struct REGEX_INTERNAL_Search {
304 * DHT handle to use, must be initialized externally.
306 struct GNUNET_DHT_Handle *dht;
309 * Optional statistics handle to report usage. Can be NULL.
311 struct GNUNET_STATISTICS_Handle *stats;
314 * User provided description of the searched service.
321 struct GNUNET_CONTAINER_MultiHashMap *dht_get_handles;
324 * Results from running DHT GETs, values are of type
327 struct GNUNET_CONTAINER_MultiHashMap *dht_get_results;
330 * Contexts, for each running DHT GET. Free all on end of search.
332 struct RegexSearchContext **contexts;
335 * Number of contexts (branches/steps in search).
337 unsigned int n_contexts;
340 * @param callback Callback for found peers.
342 REGEX_INTERNAL_Found callback;
345 * @param callback_cls Closure for @c callback.
352 * Jump to the next edge, with the longest matching token.
354 * @param block Block found in the DHT.
355 * @param size Size of the block.
356 * @param ctx Context of the search.
359 regex_next_edge(const struct RegexBlock *block,
361 struct RegexSearchContext *ctx);
365 * Function to process DHT string to regex matching.
366 * Called on each result obtained for the DHT search.
368 * @param cls Closure (search context).
369 * @param exp When will this value expire.
370 * @param key Key of the result.
371 * @param get_path Path of the get request.
372 * @param get_path_length Lenght of get_path.
373 * @param put_path Path of the put request.
374 * @param put_path_length Length of the put_path.
375 * @param type Type of the result.
376 * @param size Number of bytes in data.
377 * @param data Pointer to the result data.
380 dht_get_string_accept_handler(void *cls, struct GNUNET_TIME_Absolute exp,
381 const struct GNUNET_HashCode *key,
382 const struct GNUNET_PeerIdentity *get_path,
383 unsigned int get_path_length,
384 const struct GNUNET_PeerIdentity *put_path,
385 unsigned int put_path_length,
386 enum GNUNET_BLOCK_Type type,
387 size_t size, const void *data)
389 const struct RegexAcceptBlock *block = data;
390 struct RegexSearchContext *ctx = cls;
391 struct REGEX_INTERNAL_Search *info = ctx->info;
393 LOG(GNUNET_ERROR_TYPE_DEBUG,
394 "Regex result accept for %s (key %s)\n",
395 info->description, GNUNET_h2s(key));
397 GNUNET_STATISTICS_update(info->stats,
398 "# regex accepting blocks found",
400 GNUNET_STATISTICS_update(info->stats,
401 "# regex accepting block bytes found",
403 info->callback(info->callback_cls,
405 get_path, get_path_length,
406 put_path, put_path_length);
411 * Find a path to a peer that offers a regex service compatible
412 * with a given string.
414 * @param key The key of the accepting state.
415 * @param ctx Context containing info about the string, tunnel, etc.
418 regex_find_path(const struct GNUNET_HashCode *key,
419 struct RegexSearchContext *ctx)
421 struct GNUNET_DHT_GetHandle *get_h;
423 LOG(GNUNET_ERROR_TYPE_DEBUG,
424 "Accept state found, now searching for paths to %s\n",
426 (unsigned int)ctx->position);
427 get_h = GNUNET_DHT_get_start(ctx->info->dht, /* handle */
428 GNUNET_BLOCK_TYPE_REGEX_ACCEPT, /* type */
429 key, /* key to search */
430 DHT_REPLICATION, /* replication level */
431 DHT_OPT | GNUNET_DHT_RO_RECORD_ROUTE,
432 NULL, /* xquery */ // FIXME BLOOMFILTER
433 0, /* xquery bits */ // FIXME BLOOMFILTER SIZE
434 &dht_get_string_accept_handler, ctx);
435 GNUNET_break(GNUNET_OK ==
436 GNUNET_CONTAINER_multihashmap_put(ctx->info->dht_get_handles,
439 GNUNET_CONTAINER_MULTIHASHMAPOPTION_MULTIPLE));
444 * Function to process DHT string to regex matching.
445 * Called on each result obtained for the DHT search.
447 * @param cls closure (search context)
448 * @param exp when will this value expire
449 * @param key key of the result
450 * @param get_path path of the get request (not used)
451 * @param get_path_length length of @a get_path (not used)
452 * @param put_path path of the put request (not used)
453 * @param put_path_length length of the @a put_path (not used)
454 * @param type type of the result
455 * @param size number of bytes in data
456 * @param data pointer to the result data
458 * TODO: re-issue the request after certain time? cancel after X results?
461 dht_get_string_handler(void *cls, struct GNUNET_TIME_Absolute exp,
462 const struct GNUNET_HashCode *key,
463 const struct GNUNET_PeerIdentity *get_path,
464 unsigned int get_path_length,
465 const struct GNUNET_PeerIdentity *put_path,
466 unsigned int put_path_length,
467 enum GNUNET_BLOCK_Type type,
468 size_t size, const void *data)
470 const struct RegexBlock *block = data;
471 struct RegexSearchContext *ctx = cls;
472 struct REGEX_INTERNAL_Search *info = ctx->info;
476 LOG(GNUNET_ERROR_TYPE_INFO,
477 "DHT GET result for %s (%s)\n",
478 GNUNET_h2s(key), ctx->info->description);
479 copy = GNUNET_malloc(sizeof(struct Result) + size);
481 copy->data = ©[1];
482 GNUNET_memcpy(©[1], block, size);
483 GNUNET_break(GNUNET_OK ==
484 GNUNET_CONTAINER_multihashmap_put(info->dht_get_results,
486 GNUNET_CONTAINER_MULTIHASHMAPOPTION_MULTIPLE));
487 len = strlen(info->description);
488 if (len == ctx->position) // String processed
490 if (GNUNET_YES == GNUNET_BLOCK_is_accepting(block, size))
492 regex_find_path(key, ctx);
496 LOG(GNUNET_ERROR_TYPE_INFO, "block not accepting!\n");
497 /* FIXME REGEX this block not successful, wait for more? start timeout? */
501 regex_next_edge(block, size, ctx);
506 * Iterator over found existing cadet regex blocks that match an ongoing search.
508 * @param cls Closure (current context)-
509 * @param key Current key code (key for cached block).
510 * @param value Value in the hash map (cached RegexBlock).
511 * @return #GNUNET_YES: we should always continue to iterate.
514 regex_result_iterator(void *cls,
515 const struct GNUNET_HashCode * key,
518 struct Result *result = value;
519 const struct RegexBlock *block = result->data;
520 struct RegexSearchContext *ctx = cls;
523 GNUNET_BLOCK_is_accepting(block, result->size)) &&
524 (ctx->position == strlen(ctx->info->description)))
526 LOG(GNUNET_ERROR_TYPE_INFO,
527 "Found accepting known block\n");
528 regex_find_path(key, ctx);
529 return GNUNET_YES; // We found an accept state!
531 LOG(GNUNET_ERROR_TYPE_DEBUG,
534 strlen(ctx->info->description),
535 GNUNET_BLOCK_is_accepting(block, result->size));
536 regex_next_edge(block, result->size, ctx);
538 GNUNET_STATISTICS_update(ctx->info->stats, "# regex cadet blocks iterated",
546 * Iterator over edges in a regex block retrieved from the DHT.
548 * @param cls Closure (context of the search).
549 * @param token Token that follows to next state.
550 * @param len Lenght of token.
551 * @param key Hash of next state.
552 * @return #GNUNET_YES if should keep iterating, #GNUNET_NO otherwise.
555 regex_edge_iterator(void *cls,
558 const struct GNUNET_HashCode *key)
560 struct RegexSearchContext *ctx = cls;
561 struct REGEX_INTERNAL_Search *info = ctx->info;
565 GNUNET_STATISTICS_update(info->stats, "# regex edges iterated",
567 current = &info->description[ctx->position];
568 current_len = strlen(info->description) - ctx->position;
569 if (len > current_len)
571 LOG(GNUNET_ERROR_TYPE_DEBUG, "Token too long, END\n");
574 if (0 != strncmp(current, token, len))
576 LOG(GNUNET_ERROR_TYPE_DEBUG, "Token doesn't match, END\n");
580 if (len > ctx->longest_match)
582 LOG(GNUNET_ERROR_TYPE_DEBUG, "Token is longer, KEEP\n");
583 ctx->longest_match = len;
588 LOG(GNUNET_ERROR_TYPE_DEBUG, "Token is not longer, IGNORE\n");
591 LOG(GNUNET_ERROR_TYPE_DEBUG, "* End of regex edge iterator\n");
597 * Jump to the next edge, with the longest matching token.
599 * @param block Block found in the DHT.
600 * @param size Size of the block.
601 * @param ctx Context of the search.
604 regex_next_edge(const struct RegexBlock *block,
606 struct RegexSearchContext *ctx)
608 struct RegexSearchContext *new_ctx;
609 struct REGEX_INTERNAL_Search *info = ctx->info;
610 struct GNUNET_DHT_GetHandle *get_h;
611 struct GNUNET_HashCode *hash;
615 LOG(GNUNET_ERROR_TYPE_DEBUG, "Next edge\n");
616 /* Find the longest match for the current string position,
617 * among tokens in the given block */
618 ctx->longest_match = 0;
619 result = REGEX_BLOCK_iterate(block, size,
620 ®ex_edge_iterator, ctx);
621 GNUNET_break(GNUNET_OK == result);
623 /* Did anything match? */
624 if (0 == ctx->longest_match)
626 LOG(GNUNET_ERROR_TYPE_DEBUG,
627 "no match in block\n");
632 new_ctx = GNUNET_new(struct RegexSearchContext);
633 new_ctx->info = info;
634 new_ctx->position = ctx->position + ctx->longest_match;
635 GNUNET_array_append(info->contexts, info->n_contexts, new_ctx);
637 /* Check whether we already have a DHT GET running for it */
639 GNUNET_CONTAINER_multihashmap_contains(info->dht_get_handles, hash))
641 LOG(GNUNET_ERROR_TYPE_DEBUG,
642 "GET for %s running, END\n",
644 GNUNET_CONTAINER_multihashmap_get_multiple(info->dht_get_results,
646 ®ex_result_iterator,
648 return; /* We are already looking for it */
651 GNUNET_STATISTICS_update(info->stats, "# regex nodes traversed",
654 LOG(GNUNET_ERROR_TYPE_DEBUG,
655 "Following edges at %s for offset %u in `%s'\n",
657 (unsigned int)ctx->position,
659 rest = &new_ctx->info->description[new_ctx->position];
661 GNUNET_DHT_get_start(info->dht, /* handle */
662 GNUNET_BLOCK_TYPE_REGEX, /* type */
663 hash, /* key to search */
664 DHT_REPLICATION, /* replication level */
667 strlen(rest) + 1, /* xquery bits */
668 &dht_get_string_handler, new_ctx);
670 GNUNET_CONTAINER_multihashmap_put(info->dht_get_handles,
673 GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_FAST))
682 * Search for a peer offering a regex matching certain string in the DHT.
683 * The search runs until #REGEX_INTERNAL_search_cancel() is called, even if results
686 * @param dht An existing and valid DHT service handle.
687 * @param string String to match against the regexes in the DHT.
688 * @param callback Callback for found peers.
689 * @param callback_cls Closure for @c callback.
690 * @param stats Optional statistics handle to report usage. Can be NULL.
691 * @return Handle to stop search and free resources.
692 * Must be freed by calling #REGEX_INTERNAL_search_cancel().
694 struct REGEX_INTERNAL_Search *
695 REGEX_INTERNAL_search(struct GNUNET_DHT_Handle *dht,
697 REGEX_INTERNAL_Found callback,
699 struct GNUNET_STATISTICS_Handle *stats)
701 struct REGEX_INTERNAL_Search *h;
702 struct GNUNET_DHT_GetHandle *get_h;
703 struct RegexSearchContext *ctx;
704 struct GNUNET_HashCode key;
708 /* Initialize handle */
709 GNUNET_assert(NULL != dht);
710 GNUNET_assert(NULL != callback);
711 h = GNUNET_new(struct REGEX_INTERNAL_Search);
713 h->description = GNUNET_strdup(string);
714 h->callback = callback;
715 h->callback_cls = callback_cls;
717 h->dht_get_handles = GNUNET_CONTAINER_multihashmap_create(32, GNUNET_NO);
718 h->dht_get_results = GNUNET_CONTAINER_multihashmap_create(32, GNUNET_NO);
720 /* Initialize context */
721 len = strlen(string);
722 size = REGEX_INTERNAL_get_first_key(string, len, &key);
723 LOG(GNUNET_ERROR_TYPE_INFO,
724 "Initial key for `%s' is %s (based on `%.*s')\n",
729 ctx = GNUNET_new(struct RegexSearchContext);
730 ctx->position = size;
732 GNUNET_array_append(h->contexts,
735 /* Start search in DHT */
736 get_h = GNUNET_DHT_get_start(h->dht, /* handle */
737 GNUNET_BLOCK_TYPE_REGEX, /* type */
738 &key, /* key to search */
739 DHT_REPLICATION, /* replication level */
741 &h->description[size], /* xquery */
742 // FIXME add BLOOMFILTER to exclude filtered peers
743 len + 1 - size, /* xquery bits */
744 // FIXME add BLOOMFILTER SIZE
745 &dht_get_string_handler, ctx);
748 GNUNET_CONTAINER_multihashmap_put(h->dht_get_handles,
751 GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_FAST)
759 * Iterator over hash map entries to cancel DHT GET requests after a
760 * successful connect_by_string.
762 * @param cls Closure (unused).
763 * @param key Current key code (unused).
764 * @param value Value in the hash map (get handle).
765 * @return #GNUNET_YES if we should continue to iterate,
769 regex_cancel_dht_get(void *cls,
770 const struct GNUNET_HashCode * key,
773 struct GNUNET_DHT_GetHandle *h = value;
775 GNUNET_DHT_get_stop(h);
781 * Iterator over hash map entries to free CadetRegexBlocks stored during the
782 * search for connect_by_string.
784 * @param cls Closure (unused).
785 * @param key Current key code (unused).
786 * @param value CadetRegexBlock in the hash map.
787 * @return #GNUNET_YES if we should continue to iterate,
791 regex_free_result(void *cls,
792 const struct GNUNET_HashCode * key,
801 * Cancel an ongoing regex search in the DHT and free all resources.
803 * @param h the search context.
806 REGEX_INTERNAL_search_cancel(struct REGEX_INTERNAL_Search *h)
810 GNUNET_free(h->description);
811 GNUNET_CONTAINER_multihashmap_iterate(h->dht_get_handles,
812 ®ex_cancel_dht_get, NULL);
813 GNUNET_CONTAINER_multihashmap_iterate(h->dht_get_results,
814 ®ex_free_result, NULL);
815 GNUNET_CONTAINER_multihashmap_destroy(h->dht_get_results);
816 GNUNET_CONTAINER_multihashmap_destroy(h->dht_get_handles);
817 if (0 < h->n_contexts)
819 for (i = 0; i < h->n_contexts; i++)
820 GNUNET_free(h->contexts[i]);
821 GNUNET_free(h->contexts);
827 /* end of regex_internal_dht.c */