2 This file is part of GNUnet
3 Copyright (C) 2012, 2015 GNUnet e.V.
5 GNUnet is free software: you can redistribute it and/or modify it
6 under the terms of the GNU Affero General Public License as published
7 by the Free Software Foundation, either version 3 of the License,
8 or (at your option) any later version.
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Affero General Public License for more details.
15 You should have received a copy of the GNU Affero General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>.
19 * @file src/regex/regex_internal_dht.c
20 * @brief library to announce regexes in the network and match strings
21 * against published regexes.
22 * @author Bartlomiej Polot
25 #include "regex_internal_lib.h"
26 #include "regex_block_lib.h"
27 #include "gnunet_dht_service.h"
28 #include "gnunet_statistics_service.h"
29 #include "gnunet_constants.h"
30 #include "gnunet_signatures.h"
33 #define LOG(kind,...) GNUNET_log_from (kind,"regex-dht",__VA_ARGS__)
36 * DHT replication level to use.
38 #define DHT_REPLICATION 5
41 * DHT record lifetime to use.
43 #define DHT_TTL GNUNET_TIME_UNIT_HOURS
48 #define DHT_OPT GNUNET_DHT_RO_DEMULTIPLEX_EVERYWHERE
52 * Handle to store cached data about a regex announce.
54 struct REGEX_INTERNAL_Announcement
57 * DHT handle to use, must be initialized externally.
59 struct GNUNET_DHT_Handle *dht;
67 * Automaton representation of the regex (expensive to build).
69 struct REGEX_INTERNAL_Automaton *dfa;
74 const struct GNUNET_CRYPTO_EddsaPrivateKey *priv;
77 * Optional statistics handle to report usage. Can be NULL.
79 struct GNUNET_STATISTICS_Handle *stats;
84 * Regex callback iterator to store own service description in the DHT.
87 * @param key hash for current state.
88 * @param proof proof for current state.
89 * @param accepting #GNUNET_YES if this is an accepting state, #GNUNET_NO if not.
90 * @param num_edges number of edges leaving current state.
91 * @param edges edges leaving current state.
94 regex_iterator (void *cls,
95 const struct GNUNET_HashCode *key,
98 unsigned int num_edges,
99 const struct REGEX_BLOCK_Edge *edges)
101 struct REGEX_INTERNAL_Announcement *h = cls;
102 struct RegexBlock *block;
106 LOG (GNUNET_ERROR_TYPE_INFO,
107 "DHT PUT for state %s with proof `%s' and %u edges:\n",
111 for (i = 0; i < num_edges; i++)
113 LOG (GNUNET_ERROR_TYPE_INFO,
114 "Edge %u `%s' towards %s\n",
117 GNUNET_h2s (&edges[i].destination));
119 if (GNUNET_YES == accepting)
121 struct RegexAcceptBlock ab;
123 LOG (GNUNET_ERROR_TYPE_INFO,
124 "State %s is accepting, putting own id\n",
126 size = sizeof (struct RegexAcceptBlock);
127 ab.purpose.size = ntohl (sizeof (struct GNUNET_CRYPTO_EccSignaturePurpose) +
128 sizeof (struct GNUNET_TIME_AbsoluteNBO) +
129 sizeof (struct GNUNET_HashCode));
130 ab.purpose.purpose = ntohl (GNUNET_SIGNATURE_PURPOSE_REGEX_ACCEPT);
131 ab.expiration_time = GNUNET_TIME_absolute_hton (GNUNET_TIME_relative_to_absolute (GNUNET_CONSTANTS_DHT_MAX_EXPIRATION));
133 GNUNET_CRYPTO_eddsa_key_get_public (h->priv,
134 &ab.peer.public_key);
135 GNUNET_assert (GNUNET_OK ==
136 GNUNET_CRYPTO_eddsa_sign (h->priv,
140 GNUNET_STATISTICS_update (h->stats, "# regex accepting blocks stored",
142 GNUNET_STATISTICS_update (h->stats, "# regex accepting block bytes stored",
143 sizeof (struct RegexAcceptBlock), GNUNET_NO);
145 GNUNET_DHT_put (h->dht, key,
147 DHT_OPT | GNUNET_DHT_RO_RECORD_ROUTE,
148 GNUNET_BLOCK_TYPE_REGEX_ACCEPT,
151 GNUNET_TIME_relative_to_absolute (DHT_TTL),
154 block = REGEX_BLOCK_create (proof,
161 (void) GNUNET_DHT_put (h->dht,
165 GNUNET_BLOCK_TYPE_REGEX,
168 GNUNET_TIME_relative_to_absolute (DHT_TTL),
171 GNUNET_STATISTICS_update (h->stats,
172 "# regex blocks stored",
175 GNUNET_STATISTICS_update (h->stats,
176 "# regex block bytes stored",
184 * Announce a regular expression: put all states of the automaton in the DHT.
185 * Does not free resources, must call #REGEX_INTERNAL_announce_cancel() for that.
187 * @param dht An existing and valid DHT service handle. CANNOT be NULL.
188 * @param priv our private key, must remain valid until the announcement is cancelled
189 * @param regex Regular expression to announce.
190 * @param compression How many characters per edge can we squeeze?
191 * @param stats Optional statistics handle to report usage. Can be NULL.
192 * @return Handle to reuse o free cached resources.
193 * Must be freed by calling #REGEX_INTERNAL_announce_cancel().
195 struct REGEX_INTERNAL_Announcement *
196 REGEX_INTERNAL_announce (struct GNUNET_DHT_Handle *dht,
197 const struct GNUNET_CRYPTO_EddsaPrivateKey *priv,
199 uint16_t compression,
200 struct GNUNET_STATISTICS_Handle *stats)
202 struct REGEX_INTERNAL_Announcement *h;
204 GNUNET_assert (NULL != dht);
205 h = GNUNET_new (struct REGEX_INTERNAL_Announcement);
210 h->dfa = REGEX_INTERNAL_construct_dfa (regex, strlen (regex), compression);
211 REGEX_INTERNAL_reannounce (h);
217 * Announce again a regular expression previously announced.
218 * Does use caching to speed up process.
220 * @param h Handle returned by a previous #REGEX_INTERNAL_announce call().
223 REGEX_INTERNAL_reannounce (struct REGEX_INTERNAL_Announcement *h)
225 GNUNET_assert (NULL != h->dfa); /* make sure to call announce first */
226 LOG (GNUNET_ERROR_TYPE_INFO,
227 "REGEX_INTERNAL_reannounce: %s\n",
229 REGEX_INTERNAL_iterate_reachable_edges (h->dfa,
236 * Clear all cached data used by a regex announce.
237 * Does not close DHT connection.
239 * @param h Handle returned by a previous #REGEX_INTERNAL_announce() call.
242 REGEX_INTERNAL_announce_cancel (struct REGEX_INTERNAL_Announcement *h)
244 REGEX_INTERNAL_automaton_destroy (h->dfa);
249 /******************************************************************************/
253 * Struct to keep state of running searches that have consumed a part of
256 struct RegexSearchContext
259 * Part of the description already consumed by
260 * this particular search branch.
265 * Information about the search.
267 struct REGEX_INTERNAL_Search *info;
270 * We just want to look for one edge, the longer the better.
273 unsigned int longest_match;
276 * Destination hash of the longest match.
278 struct GNUNET_HashCode hash;
283 * Type of values in `dht_get_results`.
288 * Number of bytes in data.
293 * The raw result data.
300 * Struct to keep information of searches of services described by a regex
301 * using a user-provided string service description.
303 struct REGEX_INTERNAL_Search
306 * DHT handle to use, must be initialized externally.
308 struct GNUNET_DHT_Handle *dht;
311 * Optional statistics handle to report usage. Can be NULL.
313 struct GNUNET_STATISTICS_Handle *stats;
316 * User provided description of the searched service.
323 struct GNUNET_CONTAINER_MultiHashMap *dht_get_handles;
326 * Results from running DHT GETs, values are of type
329 struct GNUNET_CONTAINER_MultiHashMap *dht_get_results;
332 * Contexts, for each running DHT GET. Free all on end of search.
334 struct RegexSearchContext **contexts;
337 * Number of contexts (branches/steps in search).
339 unsigned int n_contexts;
342 * @param callback Callback for found peers.
344 REGEX_INTERNAL_Found callback;
347 * @param callback_cls Closure for @c callback.
354 * Jump to the next edge, with the longest matching token.
356 * @param block Block found in the DHT.
357 * @param size Size of the block.
358 * @param ctx Context of the search.
361 regex_next_edge (const struct RegexBlock *block,
363 struct RegexSearchContext *ctx);
367 * Function to process DHT string to regex matching.
368 * Called on each result obtained for the DHT search.
370 * @param cls Closure (search context).
371 * @param exp When will this value expire.
372 * @param key Key of the result.
373 * @param get_path Path of the get request.
374 * @param get_path_length Lenght of get_path.
375 * @param put_path Path of the put request.
376 * @param put_path_length Length of the put_path.
377 * @param type Type of the result.
378 * @param size Number of bytes in data.
379 * @param data Pointer to the result data.
382 dht_get_string_accept_handler (void *cls, struct GNUNET_TIME_Absolute exp,
383 const struct GNUNET_HashCode *key,
384 const struct GNUNET_PeerIdentity *get_path,
385 unsigned int get_path_length,
386 const struct GNUNET_PeerIdentity *put_path,
387 unsigned int put_path_length,
388 enum GNUNET_BLOCK_Type type,
389 size_t size, const void *data)
391 const struct RegexAcceptBlock *block = data;
392 struct RegexSearchContext *ctx = cls;
393 struct REGEX_INTERNAL_Search *info = ctx->info;
395 LOG (GNUNET_ERROR_TYPE_DEBUG,
396 "Regex result accept for %s (key %s)\n",
397 info->description, GNUNET_h2s(key));
399 GNUNET_STATISTICS_update (info->stats,
400 "# regex accepting blocks found",
402 GNUNET_STATISTICS_update (info->stats,
403 "# regex accepting block bytes found",
405 info->callback (info->callback_cls,
407 get_path, get_path_length,
408 put_path, put_path_length);
413 * Find a path to a peer that offers a regex service compatible
414 * with a given string.
416 * @param key The key of the accepting state.
417 * @param ctx Context containing info about the string, tunnel, etc.
420 regex_find_path (const struct GNUNET_HashCode *key,
421 struct RegexSearchContext *ctx)
423 struct GNUNET_DHT_GetHandle *get_h;
425 LOG (GNUNET_ERROR_TYPE_DEBUG,
426 "Accept state found, now searching for paths to %s\n",
428 (unsigned int) ctx->position);
429 get_h = GNUNET_DHT_get_start (ctx->info->dht, /* handle */
430 GNUNET_BLOCK_TYPE_REGEX_ACCEPT, /* type */
431 key, /* key to search */
432 DHT_REPLICATION, /* replication level */
433 DHT_OPT | GNUNET_DHT_RO_RECORD_ROUTE,
434 NULL, /* xquery */ // FIXME BLOOMFILTER
435 0, /* xquery bits */ // FIXME BLOOMFILTER SIZE
436 &dht_get_string_accept_handler, ctx);
437 GNUNET_break (GNUNET_OK ==
438 GNUNET_CONTAINER_multihashmap_put(ctx->info->dht_get_handles,
441 GNUNET_CONTAINER_MULTIHASHMAPOPTION_MULTIPLE));
446 * Function to process DHT string to regex matching.
447 * Called on each result obtained for the DHT search.
449 * @param cls closure (search context)
450 * @param exp when will this value expire
451 * @param key key of the result
452 * @param get_path path of the get request (not used)
453 * @param get_path_length length of @a get_path (not used)
454 * @param put_path path of the put request (not used)
455 * @param put_path_length length of the @a put_path (not used)
456 * @param type type of the result
457 * @param size number of bytes in data
458 * @param data pointer to the result data
460 * TODO: re-issue the request after certain time? cancel after X results?
463 dht_get_string_handler (void *cls, struct GNUNET_TIME_Absolute exp,
464 const struct GNUNET_HashCode *key,
465 const struct GNUNET_PeerIdentity *get_path,
466 unsigned int get_path_length,
467 const struct GNUNET_PeerIdentity *put_path,
468 unsigned int put_path_length,
469 enum GNUNET_BLOCK_Type type,
470 size_t size, const void *data)
472 const struct RegexBlock *block = data;
473 struct RegexSearchContext *ctx = cls;
474 struct REGEX_INTERNAL_Search *info = ctx->info;
478 LOG (GNUNET_ERROR_TYPE_INFO,
479 "DHT GET result for %s (%s)\n",
480 GNUNET_h2s (key), ctx->info->description);
481 copy = GNUNET_malloc (sizeof (struct Result) + size);
483 copy->data = ©[1];
484 GNUNET_memcpy (©[1], block, size);
485 GNUNET_break (GNUNET_OK ==
486 GNUNET_CONTAINER_multihashmap_put (info->dht_get_results,
488 GNUNET_CONTAINER_MULTIHASHMAPOPTION_MULTIPLE));
489 len = strlen (info->description);
490 if (len == ctx->position) // String processed
492 if (GNUNET_YES == GNUNET_BLOCK_is_accepting (block, size))
494 regex_find_path (key, ctx);
498 LOG (GNUNET_ERROR_TYPE_INFO, "block not accepting!\n");
499 /* FIXME REGEX this block not successful, wait for more? start timeout? */
503 regex_next_edge (block, size, ctx);
508 * Iterator over found existing cadet regex blocks that match an ongoing search.
510 * @param cls Closure (current context)-
511 * @param key Current key code (key for cached block).
512 * @param value Value in the hash map (cached RegexBlock).
513 * @return #GNUNET_YES: we should always continue to iterate.
516 regex_result_iterator (void *cls,
517 const struct GNUNET_HashCode * key,
520 struct Result *result = value;
521 const struct RegexBlock *block = result->data;
522 struct RegexSearchContext *ctx = cls;
525 GNUNET_BLOCK_is_accepting (block, result->size)) &&
526 (ctx->position == strlen (ctx->info->description)) )
528 LOG (GNUNET_ERROR_TYPE_INFO,
529 "Found accepting known block\n");
530 regex_find_path (key, ctx);
531 return GNUNET_YES; // We found an accept state!
533 LOG (GNUNET_ERROR_TYPE_DEBUG,
536 strlen (ctx->info->description),
537 GNUNET_BLOCK_is_accepting (block, result->size));
538 regex_next_edge (block, result->size, ctx);
540 GNUNET_STATISTICS_update (ctx->info->stats, "# regex cadet blocks iterated",
548 * Iterator over edges in a regex block retrieved from the DHT.
550 * @param cls Closure (context of the search).
551 * @param token Token that follows to next state.
552 * @param len Lenght of token.
553 * @param key Hash of next state.
554 * @return #GNUNET_YES if should keep iterating, #GNUNET_NO otherwise.
557 regex_edge_iterator (void *cls,
560 const struct GNUNET_HashCode *key)
562 struct RegexSearchContext *ctx = cls;
563 struct REGEX_INTERNAL_Search *info = ctx->info;
567 GNUNET_STATISTICS_update (info->stats, "# regex edges iterated",
569 current = &info->description[ctx->position];
570 current_len = strlen (info->description) - ctx->position;
571 if (len > current_len)
573 LOG (GNUNET_ERROR_TYPE_DEBUG, "Token too long, END\n");
576 if (0 != strncmp (current, token, len))
578 LOG (GNUNET_ERROR_TYPE_DEBUG, "Token doesn't match, END\n");
582 if (len > ctx->longest_match)
584 LOG (GNUNET_ERROR_TYPE_DEBUG, "Token is longer, KEEP\n");
585 ctx->longest_match = len;
590 LOG (GNUNET_ERROR_TYPE_DEBUG, "Token is not longer, IGNORE\n");
593 LOG (GNUNET_ERROR_TYPE_DEBUG, "* End of regex edge iterator\n");
599 * Jump to the next edge, with the longest matching token.
601 * @param block Block found in the DHT.
602 * @param size Size of the block.
603 * @param ctx Context of the search.
606 regex_next_edge (const struct RegexBlock *block,
608 struct RegexSearchContext *ctx)
610 struct RegexSearchContext *new_ctx;
611 struct REGEX_INTERNAL_Search *info = ctx->info;
612 struct GNUNET_DHT_GetHandle *get_h;
613 struct GNUNET_HashCode *hash;
617 LOG (GNUNET_ERROR_TYPE_DEBUG, "Next edge\n");
618 /* Find the longest match for the current string position,
619 * among tokens in the given block */
620 ctx->longest_match = 0;
621 result = REGEX_BLOCK_iterate (block, size,
622 ®ex_edge_iterator, ctx);
623 GNUNET_break (GNUNET_OK == result);
625 /* Did anything match? */
626 if (0 == ctx->longest_match)
628 LOG (GNUNET_ERROR_TYPE_DEBUG,
629 "no match in block\n");
634 new_ctx = GNUNET_new (struct RegexSearchContext);
635 new_ctx->info = info;
636 new_ctx->position = ctx->position + ctx->longest_match;
637 GNUNET_array_append (info->contexts, info->n_contexts, new_ctx);
639 /* Check whether we already have a DHT GET running for it */
641 GNUNET_CONTAINER_multihashmap_contains (info->dht_get_handles, hash))
643 LOG (GNUNET_ERROR_TYPE_DEBUG,
644 "GET for %s running, END\n",
646 GNUNET_CONTAINER_multihashmap_get_multiple (info->dht_get_results,
648 ®ex_result_iterator,
650 return; /* We are already looking for it */
653 GNUNET_STATISTICS_update (info->stats, "# regex nodes traversed",
656 LOG (GNUNET_ERROR_TYPE_DEBUG,
657 "Following edges at %s for offset %u in `%s'\n",
659 (unsigned int) ctx->position,
661 rest = &new_ctx->info->description[new_ctx->position];
663 GNUNET_DHT_get_start (info->dht, /* handle */
664 GNUNET_BLOCK_TYPE_REGEX, /* type */
665 hash, /* key to search */
666 DHT_REPLICATION, /* replication level */
669 strlen (rest) + 1, /* xquery bits */
670 &dht_get_string_handler, new_ctx);
672 GNUNET_CONTAINER_multihashmap_put(info->dht_get_handles,
675 GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_FAST))
684 * Search for a peer offering a regex matching certain string in the DHT.
685 * The search runs until #REGEX_INTERNAL_search_cancel() is called, even if results
688 * @param dht An existing and valid DHT service handle.
689 * @param string String to match against the regexes in the DHT.
690 * @param callback Callback for found peers.
691 * @param callback_cls Closure for @c callback.
692 * @param stats Optional statistics handle to report usage. Can be NULL.
693 * @return Handle to stop search and free resources.
694 * Must be freed by calling #REGEX_INTERNAL_search_cancel().
696 struct REGEX_INTERNAL_Search *
697 REGEX_INTERNAL_search (struct GNUNET_DHT_Handle *dht,
699 REGEX_INTERNAL_Found callback,
701 struct GNUNET_STATISTICS_Handle *stats)
703 struct REGEX_INTERNAL_Search *h;
704 struct GNUNET_DHT_GetHandle *get_h;
705 struct RegexSearchContext *ctx;
706 struct GNUNET_HashCode key;
710 /* Initialize handle */
711 GNUNET_assert (NULL != dht);
712 GNUNET_assert (NULL != callback);
713 h = GNUNET_new (struct REGEX_INTERNAL_Search);
715 h->description = GNUNET_strdup (string);
716 h->callback = callback;
717 h->callback_cls = callback_cls;
719 h->dht_get_handles = GNUNET_CONTAINER_multihashmap_create (32, GNUNET_NO);
720 h->dht_get_results = GNUNET_CONTAINER_multihashmap_create (32, GNUNET_NO);
722 /* Initialize context */
723 len = strlen (string);
724 size = REGEX_INTERNAL_get_first_key (string, len, &key);
725 LOG (GNUNET_ERROR_TYPE_INFO,
726 "Initial key for `%s' is %s (based on `%.*s')\n",
731 ctx = GNUNET_new (struct RegexSearchContext);
732 ctx->position = size;
734 GNUNET_array_append (h->contexts,
737 /* Start search in DHT */
738 get_h = GNUNET_DHT_get_start (h->dht, /* handle */
739 GNUNET_BLOCK_TYPE_REGEX, /* type */
740 &key, /* key to search */
741 DHT_REPLICATION, /* replication level */
743 &h->description[size], /* xquery */
744 // FIXME add BLOOMFILTER to exclude filtered peers
745 len + 1 - size, /* xquery bits */
746 // FIXME add BLOOMFILTER SIZE
747 &dht_get_string_handler, ctx);
750 GNUNET_CONTAINER_multihashmap_put (h->dht_get_handles,
753 GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_FAST)
761 * Iterator over hash map entries to cancel DHT GET requests after a
762 * successful connect_by_string.
764 * @param cls Closure (unused).
765 * @param key Current key code (unused).
766 * @param value Value in the hash map (get handle).
767 * @return #GNUNET_YES if we should continue to iterate,
771 regex_cancel_dht_get (void *cls,
772 const struct GNUNET_HashCode * key,
775 struct GNUNET_DHT_GetHandle *h = value;
777 GNUNET_DHT_get_stop (h);
783 * Iterator over hash map entries to free CadetRegexBlocks stored during the
784 * search for connect_by_string.
786 * @param cls Closure (unused).
787 * @param key Current key code (unused).
788 * @param value CadetRegexBlock in the hash map.
789 * @return #GNUNET_YES if we should continue to iterate,
793 regex_free_result (void *cls,
794 const struct GNUNET_HashCode * key,
803 * Cancel an ongoing regex search in the DHT and free all resources.
805 * @param h the search context.
808 REGEX_INTERNAL_search_cancel (struct REGEX_INTERNAL_Search *h)
812 GNUNET_free (h->description);
813 GNUNET_CONTAINER_multihashmap_iterate (h->dht_get_handles,
814 ®ex_cancel_dht_get, NULL);
815 GNUNET_CONTAINER_multihashmap_iterate (h->dht_get_results,
816 ®ex_free_result, NULL);
817 GNUNET_CONTAINER_multihashmap_destroy (h->dht_get_results);
818 GNUNET_CONTAINER_multihashmap_destroy (h->dht_get_handles);
819 if (0 < h->n_contexts)
821 for (i = 0; i < h->n_contexts; i++)
822 GNUNET_free (h->contexts[i]);
823 GNUNET_free (h->contexts);
829 /* end of regex_internal_dht.c */