2 This file is part of GNUnet
3 (C) 2012 Christian Grothoff (and other contributing authors)
5 GNUnet is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 3, or (at your
8 option) any later version.
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with GNUnet; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
21 * @file src/regex/regex_internal_dht.c
22 * @brief library to announce regexes in the network and match strings
23 * against published regexes.
24 * @author Bartlomiej Polot
27 #include "regex_internal_lib.h"
28 #include "regex_block_lib.h"
29 #include "gnunet_dht_service.h"
30 #include "gnunet_statistics_service.h"
31 #include "gnunet_constants.h"
32 #include "gnunet_signatures.h"
35 #define LOG(kind,...) GNUNET_log_from (kind,"regex-dht",__VA_ARGS__)
37 #define DHT_REPLICATION 5
38 #define DHT_TTL GNUNET_TIME_UNIT_HOURS
39 #define DHT_OPT GNUNET_DHT_RO_DEMULTIPLEX_EVERYWHERE
43 * Handle to store cached data about a regex announce.
45 struct REGEX_INTERNAL_Announcement
48 * DHT handle to use, must be initialized externally.
50 struct GNUNET_DHT_Handle *dht;
58 * Automaton representation of the regex (expensive to build).
60 struct REGEX_INTERNAL_Automaton* dfa;
65 const struct GNUNET_CRYPTO_EddsaPrivateKey *priv;
68 * Optional statistics handle to report usage. Can be NULL.
70 struct GNUNET_STATISTICS_Handle *stats;
75 * Regex callback iterator to store own service description in the DHT.
78 * @param key hash for current state.
79 * @param proof proof for current state.
80 * @param accepting GNUNET_YES if this is an accepting state, GNUNET_NO if not.
81 * @param num_edges number of edges leaving current state.
82 * @param edges edges leaving current state.
85 regex_iterator (void *cls,
86 const struct GNUNET_HashCode *key,
89 unsigned int num_edges,
90 const struct REGEX_BLOCK_Edge *edges)
92 struct REGEX_INTERNAL_Announcement *h = cls;
93 struct RegexBlock *block;
97 LOG (GNUNET_ERROR_TYPE_INFO,
98 "DHT PUT for state %s with proof `%s' and %u edges\n",
102 for (i = 0; i < num_edges; i++)
104 LOG (GNUNET_ERROR_TYPE_INFO,
105 " edge %s towards %s (%s)\n",
107 GNUNET_h2s (&edges[i].destination),
110 if (GNUNET_YES == accepting)
112 struct RegexAcceptBlock ab;
114 LOG (GNUNET_ERROR_TYPE_INFO,
115 "State %s is accepting, putting own id\n",
117 size = sizeof (struct RegexAcceptBlock);
118 ab.purpose.size = ntohl (sizeof (struct GNUNET_CRYPTO_EccSignaturePurpose) +
119 sizeof (struct GNUNET_TIME_AbsoluteNBO) +
120 sizeof (struct GNUNET_HashCode));
121 ab.purpose.purpose = ntohl (GNUNET_SIGNATURE_PURPOSE_REGEX_ACCEPT);
122 ab.expiration_time = GNUNET_TIME_absolute_hton (GNUNET_TIME_relative_to_absolute (GNUNET_CONSTANTS_DHT_MAX_EXPIRATION));
124 GNUNET_CRYPTO_eddsa_key_get_public (h->priv,
125 &ab.peer.public_key);
126 GNUNET_assert (GNUNET_OK ==
127 GNUNET_CRYPTO_eddsa_sign (h->priv,
131 GNUNET_STATISTICS_update (h->stats, "# regex accepting blocks stored",
133 GNUNET_STATISTICS_update (h->stats, "# regex accepting block bytes stored",
134 sizeof (struct RegexAcceptBlock), GNUNET_NO);
136 GNUNET_DHT_put (h->dht, key,
138 DHT_OPT | GNUNET_DHT_RO_RECORD_ROUTE,
139 GNUNET_BLOCK_TYPE_REGEX_ACCEPT,
142 GNUNET_TIME_relative_to_absolute (DHT_TTL),
146 block = REGEX_BLOCK_create (proof,
151 GNUNET_DHT_put (h->dht, key,
154 GNUNET_BLOCK_TYPE_REGEX,
156 GNUNET_TIME_relative_to_absolute (DHT_TTL),
159 GNUNET_STATISTICS_update (h->stats, "# regex blocks stored",
161 GNUNET_STATISTICS_update (h->stats, "# regex block bytes stored",
168 * Announce a regular expression: put all states of the automaton in the DHT.
169 * Does not free resources, must call REGEX_INTERNAL_announce_cancel for that.
171 * @param dht An existing and valid DHT service handle. CANNOT be NULL.
172 * @param priv our private key, must remain valid until the announcement is cancelled
173 * @param regex Regular expression to announce.
174 * @param compression How many characters per edge can we squeeze?
175 * @param stats Optional statistics handle to report usage. Can be NULL.
177 * @return Handle to reuse o free cached resources.
178 * Must be freed by calling REGEX_INTERNAL_announce_cancel.
180 struct REGEX_INTERNAL_Announcement *
181 REGEX_INTERNAL_announce (struct GNUNET_DHT_Handle *dht,
182 const struct GNUNET_CRYPTO_EddsaPrivateKey *priv,
184 uint16_t compression,
185 struct GNUNET_STATISTICS_Handle *stats)
187 struct REGEX_INTERNAL_Announcement *h;
189 GNUNET_assert (NULL != dht);
190 h = GNUNET_new (struct REGEX_INTERNAL_Announcement);
195 h->dfa = REGEX_INTERNAL_construct_dfa (regex, strlen (regex), compression);
196 REGEX_INTERNAL_reannounce (h);
202 * Announce again a regular expression previously announced.
203 * Does use caching to speed up process.
205 * @param h Handle returned by a previous REGEX_INTERNAL_announce call.
208 REGEX_INTERNAL_reannounce (struct REGEX_INTERNAL_Announcement *h)
210 GNUNET_assert (NULL != h->dfa); /* make sure to call announce first */
211 LOG (GNUNET_ERROR_TYPE_INFO,
212 "REGEX_INTERNAL_reannounce: %s\n",
214 REGEX_INTERNAL_iterate_reachable_edges (h->dfa, ®ex_iterator, h);
219 * Clear all cached data used by a regex announce.
220 * Does not close DHT connection.
222 * @param h Handle returned by a previous REGEX_INTERNAL_announce call.
225 REGEX_INTERNAL_announce_cancel (struct REGEX_INTERNAL_Announcement *h)
227 REGEX_INTERNAL_automaton_destroy (h->dfa);
232 /******************************************************************************/
236 * Struct to keep state of running searches that have consumed a part of
239 struct RegexSearchContext
242 * Part of the description already consumed by
243 * this particular search branch.
248 * Information about the search.
250 struct REGEX_INTERNAL_Search *info;
253 * We just want to look for one edge, the longer the better.
256 unsigned int longest_match;
259 * Destination hash of the longest match.
261 struct GNUNET_HashCode hash;
266 * Type of values in 'dht_get_results'.
271 * Number of bytes in data.
276 * The raw result data.
283 * Struct to keep information of searches of services described by a regex
284 * using a user-provided string service description.
286 struct REGEX_INTERNAL_Search
289 * DHT handle to use, must be initialized externally.
291 struct GNUNET_DHT_Handle *dht;
294 * Optional statistics handle to report usage. Can be NULL.
296 struct GNUNET_STATISTICS_Handle *stats;
299 * User provided description of the searched service.
306 struct GNUNET_CONTAINER_MultiHashMap *dht_get_handles;
309 * Results from running DHT GETs, values are of type
312 struct GNUNET_CONTAINER_MultiHashMap *dht_get_results;
315 * Contexts, for each running DHT GET. Free all on end of search.
317 struct RegexSearchContext **contexts;
320 * Number of contexts (branches/steps in search).
322 unsigned int n_contexts;
325 * @param callback Callback for found peers.
327 REGEX_INTERNAL_Found callback;
330 * @param callback_cls Closure for @c callback.
337 * Jump to the next edge, with the longest matching token.
339 * @param block Block found in the DHT.
340 * @param size Size of the block.
341 * @param ctx Context of the search.
344 regex_next_edge (const struct RegexBlock *block,
346 struct RegexSearchContext *ctx);
350 * Function to process DHT string to regex matching.
351 * Called on each result obtained for the DHT search.
353 * @param cls Closure (search context).
354 * @param exp When will this value expire.
355 * @param key Key of the result.
356 * @param get_path Path of the get request.
357 * @param get_path_length Lenght of get_path.
358 * @param put_path Path of the put request.
359 * @param put_path_length Length of the put_path.
360 * @param type Type of the result.
361 * @param size Number of bytes in data.
362 * @param data Pointer to the result data.
365 dht_get_string_accept_handler (void *cls, struct GNUNET_TIME_Absolute exp,
366 const struct GNUNET_HashCode *key,
367 const struct GNUNET_PeerIdentity *get_path,
368 unsigned int get_path_length,
369 const struct GNUNET_PeerIdentity *put_path,
370 unsigned int put_path_length,
371 enum GNUNET_BLOCK_Type type,
372 size_t size, const void *data)
374 const struct RegexAcceptBlock *block = data;
375 struct RegexSearchContext *ctx = cls;
376 struct REGEX_INTERNAL_Search *info = ctx->info;
378 LOG (GNUNET_ERROR_TYPE_DEBUG,
379 "Regex result accept for %s (key %s)\n",
380 info->description, GNUNET_h2s(key));
382 GNUNET_STATISTICS_update (info->stats,
383 "# regex accepting blocks found",
385 GNUNET_STATISTICS_update (info->stats,
386 "# regex accepting block bytes found",
388 info->callback (info->callback_cls,
390 get_path, get_path_length,
391 put_path, put_path_length);
396 * Find a path to a peer that offers a regex servcie compatible
397 * with a given string.
399 * @param key The key of the accepting state.
400 * @param ctx Context containing info about the string, tunnel, etc.
403 regex_find_path (const struct GNUNET_HashCode *key,
404 struct RegexSearchContext *ctx)
406 struct GNUNET_DHT_GetHandle *get_h;
408 LOG (GNUNET_ERROR_TYPE_DEBUG,
409 "regex finds path for %s\n",
411 get_h = GNUNET_DHT_get_start (ctx->info->dht, /* handle */
412 GNUNET_BLOCK_TYPE_REGEX_ACCEPT, /* type */
413 key, /* key to search */
414 DHT_REPLICATION, /* replication level */
415 DHT_OPT | GNUNET_DHT_RO_RECORD_ROUTE,
416 NULL, /* xquery */ // FIXME BLOOMFILTER
417 0, /* xquery bits */ // FIXME BLOOMFILTER SIZE
418 &dht_get_string_accept_handler, ctx);
419 GNUNET_break (GNUNET_OK ==
420 GNUNET_CONTAINER_multihashmap_put(ctx->info->dht_get_handles,
423 GNUNET_CONTAINER_MULTIHASHMAPOPTION_MULTIPLE));
428 * Function to process DHT string to regex matching.
429 * Called on each result obtained for the DHT search.
431 * @param cls closure (search context)
432 * @param exp when will this value expire
433 * @param key key of the result
434 * @param get_path path of the get request (not used)
435 * @param get_path_length lenght of get_path (not used)
436 * @param put_path path of the put request (not used)
437 * @param put_path_length length of the put_path (not used)
438 * @param type type of the result
439 * @param size number of bytes in data
440 * @param data pointer to the result data
442 * TODO: re-issue the request after certain time? cancel after X results?
445 dht_get_string_handler (void *cls, struct GNUNET_TIME_Absolute exp,
446 const struct GNUNET_HashCode *key,
447 const struct GNUNET_PeerIdentity *get_path,
448 unsigned int get_path_length,
449 const struct GNUNET_PeerIdentity *put_path,
450 unsigned int put_path_length,
451 enum GNUNET_BLOCK_Type type,
452 size_t size, const void *data)
454 const struct RegexBlock *block = data;
455 struct RegexSearchContext *ctx = cls;
456 struct REGEX_INTERNAL_Search *info = ctx->info;
460 LOG (GNUNET_ERROR_TYPE_INFO,
461 "DHT GET result for %s (%s)\n",
462 GNUNET_h2s (key), ctx->info->description);
463 copy = GNUNET_malloc (sizeof (struct Result) + size);
465 copy->data = ©[1];
466 memcpy (©[1], block, size);
467 GNUNET_break (GNUNET_OK ==
468 GNUNET_CONTAINER_multihashmap_put (info->dht_get_results,
470 GNUNET_CONTAINER_MULTIHASHMAPOPTION_MULTIPLE));
471 len = strlen (info->description);
472 if (len == ctx->position) // String processed
474 if (GNUNET_YES == GNUNET_BLOCK_is_accepting (block, size))
476 regex_find_path (key, ctx);
480 LOG (GNUNET_ERROR_TYPE_INFO, "block not accepting!\n");
481 /* FIXME REGEX this block not successful, wait for more? start timeout? */
485 regex_next_edge (block, size, ctx);
490 * Iterator over found existing cadet regex blocks that match an ongoing search.
492 * @param cls Closure (current context)-
493 * @param key Current key code (key for cached block).
494 * @param value Value in the hash map (cached RegexBlock).
495 * @return GNUNET_YES: we should always continue to iterate.
498 regex_result_iterator (void *cls,
499 const struct GNUNET_HashCode * key,
502 struct Result *result = value;
503 const struct RegexBlock *block = result->data;
504 struct RegexSearchContext *ctx = cls;
507 GNUNET_BLOCK_is_accepting (block, result->size)) &&
508 (ctx->position == strlen (ctx->info->description)) )
510 LOG (GNUNET_ERROR_TYPE_INFO,
511 "Found accepting known block\n");
512 regex_find_path (key, ctx);
513 return GNUNET_YES; // We found an accept state!
515 LOG (GNUNET_ERROR_TYPE_DEBUG,
518 strlen (ctx->info->description),
519 GNUNET_BLOCK_is_accepting (block, result->size));
520 regex_next_edge (block, result->size, ctx);
522 GNUNET_STATISTICS_update (ctx->info->stats, "# regex cadet blocks iterated",
530 * Iterator over edges in a regex block retrieved from the DHT.
532 * @param cls Closure (context of the search).
533 * @param token Token that follows to next state.
534 * @param len Lenght of token.
535 * @param key Hash of next state.
537 * @return GNUNET_YES if should keep iterating, GNUNET_NO otherwise.
540 regex_edge_iterator (void *cls,
543 const struct GNUNET_HashCode *key)
545 struct RegexSearchContext *ctx = cls;
546 struct REGEX_INTERNAL_Search *info = ctx->info;
550 GNUNET_STATISTICS_update (info->stats, "# regex edges iterated",
552 current = &info->description[ctx->position];
553 current_len = strlen (info->description) - ctx->position;
554 if (len > current_len)
556 LOG (GNUNET_ERROR_TYPE_DEBUG, "Token too long, END\n");
559 if (0 != strncmp (current, token, len))
561 LOG (GNUNET_ERROR_TYPE_DEBUG, "Token doesn't match, END\n");
565 if (len > ctx->longest_match)
567 LOG (GNUNET_ERROR_TYPE_DEBUG, "Token is longer, KEEP\n");
568 ctx->longest_match = len;
573 LOG (GNUNET_ERROR_TYPE_DEBUG, "Token is not longer, IGNORE\n");
576 LOG (GNUNET_ERROR_TYPE_DEBUG, "* End of regex edge iterator\n");
582 * Jump to the next edge, with the longest matching token.
584 * @param block Block found in the DHT.
585 * @param size Size of the block.
586 * @param ctx Context of the search.
589 regex_next_edge (const struct RegexBlock *block,
591 struct RegexSearchContext *ctx)
593 struct RegexSearchContext *new_ctx;
594 struct REGEX_INTERNAL_Search *info = ctx->info;
595 struct GNUNET_DHT_GetHandle *get_h;
596 struct GNUNET_HashCode *hash;
600 LOG (GNUNET_ERROR_TYPE_DEBUG, "Next edge\n");
601 /* Find the longest match for the current string position,
602 * among tokens in the given block */
603 ctx->longest_match = 0;
604 result = REGEX_BLOCK_iterate (block, size,
605 ®ex_edge_iterator, ctx);
606 GNUNET_break (GNUNET_OK == result);
608 /* Did anything match? */
609 if (0 == ctx->longest_match)
611 LOG (GNUNET_ERROR_TYPE_DEBUG,
612 "no match in block\n");
617 new_ctx = GNUNET_new (struct RegexSearchContext);
618 new_ctx->info = info;
619 new_ctx->position = ctx->position + ctx->longest_match;
620 GNUNET_array_append (info->contexts, info->n_contexts, new_ctx);
622 /* Check whether we already have a DHT GET running for it */
624 GNUNET_CONTAINER_multihashmap_contains (info->dht_get_handles, hash))
626 LOG (GNUNET_ERROR_TYPE_DEBUG,
627 "GET for %s running, END\n",
629 GNUNET_CONTAINER_multihashmap_get_multiple (info->dht_get_results,
631 ®ex_result_iterator,
633 return; /* We are already looking for it */
636 GNUNET_STATISTICS_update (info->stats, "# regex nodes traversed",
639 LOG (GNUNET_ERROR_TYPE_INFO,
642 rest = &new_ctx->info->description[new_ctx->position];
644 GNUNET_DHT_get_start (info->dht, /* handle */
645 GNUNET_BLOCK_TYPE_REGEX, /* type */
646 hash, /* key to search */
647 DHT_REPLICATION, /* replication level */
650 strlen (rest) + 1, /* xquery bits */
651 &dht_get_string_handler, new_ctx);
653 GNUNET_CONTAINER_multihashmap_put(info->dht_get_handles,
656 GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_FAST))
665 * Search for a peer offering a regex matching certain string in the DHT.
666 * The search runs until REGEX_INTERNAL_search_cancel is called, even if results
669 * @param dht An existing and valid DHT service handle.
670 * @param string String to match against the regexes in the DHT.
671 * @param callback Callback for found peers.
672 * @param callback_cls Closure for @c callback.
673 * @param stats Optional statistics handle to report usage. Can be NULL.
675 * @return Handle to stop search and free resources.
676 * Must be freed by calling REGEX_INTERNAL_search_cancel.
678 struct REGEX_INTERNAL_Search *
679 REGEX_INTERNAL_search (struct GNUNET_DHT_Handle *dht,
681 REGEX_INTERNAL_Found callback,
683 struct GNUNET_STATISTICS_Handle *stats)
685 struct REGEX_INTERNAL_Search *h;
686 struct GNUNET_DHT_GetHandle *get_h;
687 struct RegexSearchContext *ctx;
688 struct GNUNET_HashCode key;
692 /* Initialize handle */
693 LOG (GNUNET_ERROR_TYPE_INFO, "REGEX_INTERNAL_search: %s\n", string);
694 GNUNET_assert (NULL != dht);
695 GNUNET_assert (NULL != callback);
696 h = GNUNET_new (struct REGEX_INTERNAL_Search);
698 h->description = GNUNET_strdup (string);
699 h->callback = callback;
700 h->callback_cls = callback_cls;
702 h->dht_get_handles = GNUNET_CONTAINER_multihashmap_create (32, GNUNET_NO);
703 h->dht_get_results = GNUNET_CONTAINER_multihashmap_create (32, GNUNET_NO);
705 /* Initialize context */
706 len = strlen (string);
707 size = REGEX_INTERNAL_get_first_key (string, len, &key);
708 LOG (GNUNET_ERROR_TYPE_INFO,
709 " initial key for %s: %s (%.*s)\n",
710 string, GNUNET_h2s (&key), size, string);
711 ctx = GNUNET_new (struct RegexSearchContext);
712 ctx->position = size;
714 GNUNET_array_append (h->contexts, h->n_contexts, ctx);
715 LOG (GNUNET_ERROR_TYPE_DEBUG,
716 "consumed %u bits out of %u, now looking for %s\n",
720 /* Start search in DHT */
721 get_h = GNUNET_DHT_get_start (h->dht, /* handle */
722 GNUNET_BLOCK_TYPE_REGEX, /* type */
723 &key, /* key to search */
724 DHT_REPLICATION, /* replication level */
726 &h->description[size], /* xquery */
727 // FIXME add BLOOMFILTER to exclude filtered peers
728 len + 1 - size, /* xquery bits */
729 // FIXME add BLOOMFILTER SIZE
730 &dht_get_string_handler, ctx);
733 GNUNET_CONTAINER_multihashmap_put (h->dht_get_handles,
736 GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_FAST)
744 * Iterator over hash map entries to cancel DHT GET requests after a
745 * successful connect_by_string.
747 * @param cls Closure (unused).
748 * @param key Current key code (unused).
749 * @param value Value in the hash map (get handle).
750 * @return GNUNET_YES if we should continue to iterate,
754 regex_cancel_dht_get (void *cls,
755 const struct GNUNET_HashCode * key,
758 struct GNUNET_DHT_GetHandle *h = value;
760 GNUNET_DHT_get_stop (h);
766 * Iterator over hash map entries to free CadetRegexBlocks stored during the
767 * search for connect_by_string.
769 * @param cls Closure (unused).
770 * @param key Current key code (unused).
771 * @param value CadetRegexBlock in the hash map.
772 * @return GNUNET_YES if we should continue to iterate,
776 regex_free_result (void *cls,
777 const struct GNUNET_HashCode * key,
786 * Cancel an ongoing regex search in the DHT and free all resources.
788 * @param h the search context.
791 REGEX_INTERNAL_search_cancel (struct REGEX_INTERNAL_Search *h)
795 GNUNET_free (h->description);
796 GNUNET_CONTAINER_multihashmap_iterate (h->dht_get_handles,
797 ®ex_cancel_dht_get, NULL);
798 GNUNET_CONTAINER_multihashmap_iterate (h->dht_get_results,
799 ®ex_free_result, NULL);
800 GNUNET_CONTAINER_multihashmap_destroy (h->dht_get_results);
801 GNUNET_CONTAINER_multihashmap_destroy (h->dht_get_handles);
802 if (0 < h->n_contexts)
804 for (i = 0; i < h->n_contexts; i++)
805 GNUNET_free (h->contexts[i]);
806 GNUNET_free (h->contexts);
812 /* end of regex_internal_dht.c */