2 This file is part of GNUnet
3 (C) 2012 Christian Grothoff (and other contributing authors)
5 GNUnet is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 3, or (at your
8 option) any later version.
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with GNUnet; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
21 * @file src/regex/regex_internal_dht.c
22 * @brief library to announce regexes in the network and match strings
23 * against published regexes.
24 * @author Bartlomiej Polot
27 #include "regex_internal_lib.h"
28 #include "regex_block_lib.h"
29 #include "gnunet_dht_service.h"
30 #include "gnunet_statistics_service.h"
31 #include "gnunet_constants.h"
32 #include "gnunet_signatures.h"
35 #define LOG(kind,...) GNUNET_log_from (kind,"regex-dht",__VA_ARGS__)
37 #define DHT_REPLICATION 5
38 #define DHT_TTL GNUNET_TIME_UNIT_HOURS
39 #define DHT_OPT GNUNET_DHT_RO_DEMULTIPLEX_EVERYWHERE
43 * Handle to store cached data about a regex announce.
45 struct REGEX_INTERNAL_Announcement
48 * DHT handle to use, must be initialized externally.
50 struct GNUNET_DHT_Handle *dht;
58 * Automaton representation of the regex (expensive to build).
60 struct REGEX_INTERNAL_Automaton* dfa;
65 const struct GNUNET_CRYPTO_EccPrivateKey *priv;
68 * Optional statistics handle to report usage. Can be NULL.
70 struct GNUNET_STATISTICS_Handle *stats;
75 * Regex callback iterator to store own service description in the DHT.
78 * @param key hash for current state.
79 * @param proof proof for current state.
80 * @param accepting GNUNET_YES if this is an accepting state, GNUNET_NO if not.
81 * @param num_edges number of edges leaving current state.
82 * @param edges edges leaving current state.
85 regex_iterator (void *cls,
86 const struct GNUNET_HashCode *key,
89 unsigned int num_edges,
90 const struct REGEX_BLOCK_Edge *edges)
92 struct REGEX_INTERNAL_Announcement *h = cls;
93 struct RegexBlock *block;
96 LOG (GNUNET_ERROR_TYPE_DEBUG,
97 "DHT PUT for state %s with proof `%s' and %u edges\n",
101 if (GNUNET_YES == accepting)
103 struct RegexAcceptBlock ab;
105 LOG (GNUNET_ERROR_TYPE_DEBUG,
106 "State %s is accepting, putting own id\n",
108 size = sizeof (struct RegexAcceptBlock);
109 ab.purpose.size = ntohl (sizeof (struct GNUNET_CRYPTO_EccSignaturePurpose) +
110 sizeof (struct GNUNET_TIME_AbsoluteNBO) +
111 sizeof (struct GNUNET_HashCode));
112 ab.purpose.purpose = ntohl (GNUNET_SIGNATURE_PURPOSE_REGEX_ACCEPT);
113 ab.expiration_time = GNUNET_TIME_absolute_hton (GNUNET_TIME_relative_to_absolute (GNUNET_CONSTANTS_DHT_MAX_EXPIRATION));
115 GNUNET_CRYPTO_ecc_key_get_public_for_signature (h->priv,
117 GNUNET_assert (GNUNET_OK ==
118 GNUNET_CRYPTO_ecc_sign (h->priv,
122 GNUNET_STATISTICS_update (h->stats, "# regex accepting blocks stored",
124 GNUNET_STATISTICS_update (h->stats, "# regex accepting block bytes stored",
125 sizeof (struct RegexAcceptBlock), GNUNET_NO);
127 GNUNET_DHT_put (h->dht, key,
129 DHT_OPT | GNUNET_DHT_RO_RECORD_ROUTE,
130 GNUNET_BLOCK_TYPE_REGEX_ACCEPT,
133 GNUNET_TIME_relative_to_absolute (DHT_TTL),
137 block = REGEX_BLOCK_create (proof,
142 GNUNET_DHT_put (h->dht, key,
145 GNUNET_BLOCK_TYPE_REGEX,
147 GNUNET_TIME_relative_to_absolute (DHT_TTL),
150 GNUNET_STATISTICS_update (h->stats, "# regex blocks stored",
152 GNUNET_STATISTICS_update (h->stats, "# regex block bytes stored",
159 * Announce a regular expression: put all states of the automaton in the DHT.
160 * Does not free resources, must call REGEX_INTERNAL_announce_cancel for that.
162 * @param dht An existing and valid DHT service handle. CANNOT be NULL.
163 * @param priv our private key, must remain valid until the announcement is cancelled
164 * @param regex Regular expression to announce.
165 * @param compression How many characters per edge can we squeeze?
166 * @param stats Optional statistics handle to report usage. Can be NULL.
168 * @return Handle to reuse o free cached resources.
169 * Must be freed by calling REGEX_INTERNAL_announce_cancel.
171 struct REGEX_INTERNAL_Announcement *
172 REGEX_INTERNAL_announce (struct GNUNET_DHT_Handle *dht,
173 const struct GNUNET_CRYPTO_EccPrivateKey *priv,
175 uint16_t compression,
176 struct GNUNET_STATISTICS_Handle *stats)
178 struct REGEX_INTERNAL_Announcement *h;
180 GNUNET_assert (NULL != dht);
181 h = GNUNET_malloc (sizeof (struct REGEX_INTERNAL_Announcement));
186 h->dfa = REGEX_INTERNAL_construct_dfa (regex,
189 REGEX_INTERNAL_reannounce (h);
195 * Announce again a regular expression previously announced.
196 * Does use caching to speed up process.
198 * @param h Handle returned by a previous REGEX_INTERNAL_announce call.
201 REGEX_INTERNAL_reannounce (struct REGEX_INTERNAL_Announcement *h)
203 GNUNET_assert (NULL != h->dfa); /* make sure to call announce first */
204 LOG (GNUNET_ERROR_TYPE_INFO,
205 "REGEX_INTERNAL_reannounce: %s\n",
207 REGEX_INTERNAL_iterate_all_edges (h->dfa,
213 * Clear all cached data used by a regex announce.
214 * Does not close DHT connection.
216 * @param h Handle returned by a previous REGEX_INTERNAL_announce call.
219 REGEX_INTERNAL_announce_cancel (struct REGEX_INTERNAL_Announcement *h)
221 REGEX_INTERNAL_automaton_destroy (h->dfa);
226 /******************************************************************************/
230 * Struct to keep state of running searches that have consumed a part of
233 struct RegexSearchContext
236 * Part of the description already consumed by
237 * this particular search branch.
242 * Information about the search.
244 struct REGEX_INTERNAL_Search *info;
247 * We just want to look for one edge, the longer the better.
250 unsigned int longest_match;
253 * Destination hash of the longest match.
255 struct GNUNET_HashCode hash;
260 * Type of values in 'dht_get_results'.
265 * Number of bytes in data.
270 * The raw result data.
277 * Struct to keep information of searches of services described by a regex
278 * using a user-provided string service description.
280 struct REGEX_INTERNAL_Search
283 * DHT handle to use, must be initialized externally.
285 struct GNUNET_DHT_Handle *dht;
288 * Optional statistics handle to report usage. Can be NULL.
290 struct GNUNET_STATISTICS_Handle *stats;
293 * User provided description of the searched service.
300 struct GNUNET_CONTAINER_MultiHashMap *dht_get_handles;
303 * Results from running DHT GETs, values are of type
306 struct GNUNET_CONTAINER_MultiHashMap *dht_get_results;
309 * Contexts, for each running DHT GET. Free all on end of search.
311 struct RegexSearchContext **contexts;
314 * Number of contexts (branches/steps in search).
316 unsigned int n_contexts;
319 * @param callback Callback for found peers.
321 REGEX_INTERNAL_Found callback;
324 * @param callback_cls Closure for @c callback.
331 * Jump to the next edge, with the longest matching token.
333 * @param block Block found in the DHT.
334 * @param size Size of the block.
335 * @param ctx Context of the search.
338 regex_next_edge (const struct RegexBlock *block,
340 struct RegexSearchContext *ctx);
344 * Function to process DHT string to regex matching.
345 * Called on each result obtained for the DHT search.
347 * @param cls Closure (search context).
348 * @param exp When will this value expire.
349 * @param key Key of the result.
350 * @param get_path Path of the get request.
351 * @param get_path_length Lenght of get_path.
352 * @param put_path Path of the put request.
353 * @param put_path_length Length of the put_path.
354 * @param type Type of the result.
355 * @param size Number of bytes in data.
356 * @param data Pointer to the result data.
359 dht_get_string_accept_handler (void *cls, struct GNUNET_TIME_Absolute exp,
360 const struct GNUNET_HashCode *key,
361 const struct GNUNET_PeerIdentity *get_path,
362 unsigned int get_path_length,
363 const struct GNUNET_PeerIdentity *put_path,
364 unsigned int put_path_length,
365 enum GNUNET_BLOCK_Type type,
366 size_t size, const void *data)
368 const struct RegexAcceptBlock *block = data;
369 struct RegexSearchContext *ctx = cls;
370 struct REGEX_INTERNAL_Search *info = ctx->info;
371 struct GNUNET_PeerIdentity pid;
373 LOG (GNUNET_ERROR_TYPE_DEBUG,
374 "Regex result accept for %s (key %s)\n",
375 info->description, GNUNET_h2s(key));
377 GNUNET_STATISTICS_update (info->stats,
378 "# regex accepting blocks found",
380 GNUNET_STATISTICS_update (info->stats,
381 "# regex accepting block bytes found",
383 GNUNET_CRYPTO_hash (&block->public_key,
384 sizeof (struct GNUNET_CRYPTO_EccPublicSignKey),
386 info->callback (info->callback_cls,
388 get_path, get_path_length,
389 put_path, put_path_length);
394 * Find a path to a peer that offers a regex servcie compatible
395 * with a given string.
397 * @param key The key of the accepting state.
398 * @param ctx Context containing info about the string, tunnel, etc.
401 regex_find_path (const struct GNUNET_HashCode *key,
402 struct RegexSearchContext *ctx)
404 struct GNUNET_DHT_GetHandle *get_h;
406 LOG (GNUNET_ERROR_TYPE_DEBUG,
407 "regex finds path for %s\n",
409 get_h = GNUNET_DHT_get_start (ctx->info->dht, /* handle */
410 GNUNET_BLOCK_TYPE_REGEX_ACCEPT, /* type */
411 key, /* key to search */
412 DHT_REPLICATION, /* replication level */
413 DHT_OPT | GNUNET_DHT_RO_RECORD_ROUTE,
414 NULL, /* xquery */ // FIXME BLOOMFILTER
415 0, /* xquery bits */ // FIXME BLOOMFILTER SIZE
416 &dht_get_string_accept_handler, ctx);
417 GNUNET_break (GNUNET_OK ==
418 GNUNET_CONTAINER_multihashmap_put(ctx->info->dht_get_handles,
421 GNUNET_CONTAINER_MULTIHASHMAPOPTION_MULTIPLE));
426 * Function to process DHT string to regex matching.
427 * Called on each result obtained for the DHT search.
429 * @param cls closure (search context)
430 * @param exp when will this value expire
431 * @param key key of the result
432 * @param get_path path of the get request (not used)
433 * @param get_path_length lenght of get_path (not used)
434 * @param put_path path of the put request (not used)
435 * @param put_path_length length of the put_path (not used)
436 * @param type type of the result
437 * @param size number of bytes in data
438 * @param data pointer to the result data
440 * TODO: re-issue the request after certain time? cancel after X results?
443 dht_get_string_handler (void *cls, struct GNUNET_TIME_Absolute exp,
444 const struct GNUNET_HashCode *key,
445 const struct GNUNET_PeerIdentity *get_path,
446 unsigned int get_path_length,
447 const struct GNUNET_PeerIdentity *put_path,
448 unsigned int put_path_length,
449 enum GNUNET_BLOCK_Type type,
450 size_t size, const void *data)
452 const struct RegexBlock *block = data;
453 struct RegexSearchContext *ctx = cls;
454 struct REGEX_INTERNAL_Search *info = ctx->info;
458 LOG (GNUNET_ERROR_TYPE_INFO,
459 "DHT GET result for %s (%s)\n",
460 GNUNET_h2s (key), ctx->info->description);
461 copy = GNUNET_malloc (sizeof (struct Result) + size);
463 copy->data = ©[1];
464 memcpy (©[1], block, size);
465 GNUNET_break (GNUNET_OK ==
466 GNUNET_CONTAINER_multihashmap_put (info->dht_get_results,
468 GNUNET_CONTAINER_MULTIHASHMAPOPTION_MULTIPLE));
469 len = strlen (info->description);
470 if (len == ctx->position) // String processed
472 if (GNUNET_YES == GNUNET_BLOCK_is_accepting (block, size))
474 regex_find_path (key, ctx);
478 LOG (GNUNET_ERROR_TYPE_INFO, "block not accepting!\n");
479 /* FIXME REGEX this block not successful, wait for more? start timeout? */
483 regex_next_edge (block, size, ctx);
488 * Iterator over found existing mesh regex blocks that match an ongoing search.
490 * @param cls Closure (current context)-
491 * @param key Current key code (key for cached block).
492 * @param value Value in the hash map (cached RegexBlock).
493 * @return GNUNET_YES: we should always continue to iterate.
496 regex_result_iterator (void *cls,
497 const struct GNUNET_HashCode * key,
500 struct Result *result = value;
501 const struct RegexBlock *block = result->data;
502 struct RegexSearchContext *ctx = cls;
505 GNUNET_BLOCK_is_accepting (block, result->size)) &&
506 (ctx->position == strlen (ctx->info->description)) )
508 LOG (GNUNET_ERROR_TYPE_INFO,
509 "Found accepting known block\n");
510 regex_find_path (key, ctx);
511 return GNUNET_YES; // We found an accept state!
513 LOG (GNUNET_ERROR_TYPE_DEBUG,
516 strlen (ctx->info->description),
517 GNUNET_BLOCK_is_accepting (block, result->size));
518 regex_next_edge (block, result->size, ctx);
520 GNUNET_STATISTICS_update (ctx->info->stats, "# regex mesh blocks iterated",
528 * Iterator over edges in a regex block retrieved from the DHT.
530 * @param cls Closure (context of the search).
531 * @param token Token that follows to next state.
532 * @param len Lenght of token.
533 * @param key Hash of next state.
535 * @return GNUNET_YES if should keep iterating, GNUNET_NO otherwise.
538 regex_edge_iterator (void *cls,
541 const struct GNUNET_HashCode *key)
543 struct RegexSearchContext *ctx = cls;
544 struct REGEX_INTERNAL_Search *info = ctx->info;
548 GNUNET_STATISTICS_update (info->stats, "# regex edges iterated",
550 current = &info->description[ctx->position];
551 current_len = strlen (info->description) - ctx->position;
552 if (len > current_len)
554 LOG (GNUNET_ERROR_TYPE_DEBUG, "Token too long, END\n");
557 if (0 != strncmp (current, token, len))
559 LOG (GNUNET_ERROR_TYPE_DEBUG, "Token doesn't match, END\n");
563 if (len > ctx->longest_match)
565 LOG (GNUNET_ERROR_TYPE_DEBUG, "Token is longer, KEEP\n");
566 ctx->longest_match = len;
571 LOG (GNUNET_ERROR_TYPE_DEBUG, "Token is not longer, IGNORE\n");
574 LOG (GNUNET_ERROR_TYPE_DEBUG, "* End of regex edge iterator\n");
580 * Jump to the next edge, with the longest matching token.
582 * @param block Block found in the DHT.
583 * @param size Size of the block.
584 * @param ctx Context of the search.
587 regex_next_edge (const struct RegexBlock *block,
589 struct RegexSearchContext *ctx)
591 struct RegexSearchContext *new_ctx;
592 struct REGEX_INTERNAL_Search *info = ctx->info;
593 struct GNUNET_DHT_GetHandle *get_h;
594 struct GNUNET_HashCode *hash;
598 /* Find the longest match for the current string position,
599 * among tokens in the given block */
600 ctx->longest_match = 0;
601 result = REGEX_BLOCK_iterate (block, size,
602 ®ex_edge_iterator, ctx);
603 GNUNET_break (GNUNET_OK == result);
605 /* Did anything match? */
606 if (0 == ctx->longest_match)
608 LOG (GNUNET_ERROR_TYPE_DEBUG,
609 "no match in block\n");
614 new_ctx = GNUNET_malloc (sizeof (struct RegexSearchContext));
615 new_ctx->info = info;
616 new_ctx->position = ctx->position + ctx->longest_match;
617 GNUNET_array_append (info->contexts, info->n_contexts, new_ctx);
619 /* Check whether we already have a DHT GET running for it */
621 GNUNET_CONTAINER_multihashmap_contains (info->dht_get_handles, hash))
623 LOG (GNUNET_ERROR_TYPE_DEBUG,
624 "GET for %s running, END\n",
626 GNUNET_CONTAINER_multihashmap_get_multiple (info->dht_get_results,
628 ®ex_result_iterator,
630 return; /* We are already looking for it */
633 GNUNET_STATISTICS_update (info->stats, "# regex nodes traversed",
636 LOG (GNUNET_ERROR_TYPE_INFO,
639 rest = &new_ctx->info->description[new_ctx->position];
641 GNUNET_DHT_get_start (info->dht, /* handle */
642 GNUNET_BLOCK_TYPE_REGEX, /* type */
643 hash, /* key to search */
644 DHT_REPLICATION, /* replication level */
647 strlen (rest) + 1, /* xquery bits */
648 &dht_get_string_handler, new_ctx);
650 GNUNET_CONTAINER_multihashmap_put(info->dht_get_handles,
653 GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_FAST))
662 * Search for a peer offering a regex matching certain string in the DHT.
663 * The search runs until REGEX_INTERNAL_search_cancel is called, even if results
666 * @param dht An existing and valid DHT service handle.
667 * @param string String to match against the regexes in the DHT.
668 * @param callback Callback for found peers.
669 * @param callback_cls Closure for @c callback.
670 * @param stats Optional statistics handle to report usage. Can be NULL.
672 * @return Handle to stop search and free resources.
673 * Must be freed by calling REGEX_INTERNAL_search_cancel.
675 struct REGEX_INTERNAL_Search *
676 REGEX_INTERNAL_search (struct GNUNET_DHT_Handle *dht,
678 REGEX_INTERNAL_Found callback,
680 struct GNUNET_STATISTICS_Handle *stats)
682 struct REGEX_INTERNAL_Search *h;
683 struct GNUNET_DHT_GetHandle *get_h;
684 struct RegexSearchContext *ctx;
685 struct GNUNET_HashCode key;
689 /* Initialize handle */
690 LOG (GNUNET_ERROR_TYPE_INFO, "REGEX_INTERNAL_search: %s\n", string);
691 GNUNET_assert (NULL != dht);
692 GNUNET_assert (NULL != callback);
693 h = GNUNET_malloc (sizeof (struct REGEX_INTERNAL_Search));
695 h->description = GNUNET_strdup (string);
696 h->callback = callback;
697 h->callback_cls = callback_cls;
699 h->dht_get_handles = GNUNET_CONTAINER_multihashmap_create (32, GNUNET_NO);
700 h->dht_get_results = GNUNET_CONTAINER_multihashmap_create (32, GNUNET_NO);
702 /* Initialize context */
703 len = strlen (string);
704 size = REGEX_INTERNAL_get_first_key (string, len, &key);
705 ctx = GNUNET_malloc (sizeof (struct RegexSearchContext));
706 ctx->position = size;
708 GNUNET_array_append (h->contexts, h->n_contexts, ctx);
709 LOG (GNUNET_ERROR_TYPE_DEBUG,
710 "consumed %u bits out of %u, now looking for %s\n",
714 /* Start search in DHT */
715 get_h = GNUNET_DHT_get_start (h->dht, /* handle */
716 GNUNET_BLOCK_TYPE_REGEX, /* type */
717 &key, /* key to search */
718 DHT_REPLICATION, /* replication level */
720 &h->description[size], /* xquery */
721 // FIXME add BLOOMFILTER to exclude filtered peers
722 len + 1 - size, /* xquery bits */
723 // FIXME add BLOOMFILTER SIZE
724 &dht_get_string_handler, ctx);
727 GNUNET_CONTAINER_multihashmap_put (h->dht_get_handles,
730 GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_FAST)
738 * Iterator over hash map entries to cancel DHT GET requests after a
739 * successful connect_by_string.
741 * @param cls Closure (unused).
742 * @param key Current key code (unused).
743 * @param value Value in the hash map (get handle).
744 * @return GNUNET_YES if we should continue to iterate,
748 regex_cancel_dht_get (void *cls,
749 const struct GNUNET_HashCode * key,
752 struct GNUNET_DHT_GetHandle *h = value;
754 GNUNET_DHT_get_stop (h);
760 * Iterator over hash map entries to free MeshRegexBlocks stored during the
761 * search for connect_by_string.
763 * @param cls Closure (unused).
764 * @param key Current key code (unused).
765 * @param value MeshRegexBlock in the hash map.
766 * @return GNUNET_YES if we should continue to iterate,
770 regex_free_result (void *cls,
771 const struct GNUNET_HashCode * key,
780 * Cancel an ongoing regex search in the DHT and free all resources.
782 * @param h the search context.
785 REGEX_INTERNAL_search_cancel (struct REGEX_INTERNAL_Search *h)
789 GNUNET_free (h->description);
790 GNUNET_CONTAINER_multihashmap_iterate (h->dht_get_handles,
791 ®ex_cancel_dht_get, NULL);
792 GNUNET_CONTAINER_multihashmap_iterate (h->dht_get_results,
793 ®ex_free_result, NULL);
794 GNUNET_CONTAINER_multihashmap_destroy (h->dht_get_results);
795 GNUNET_CONTAINER_multihashmap_destroy (h->dht_get_handles);
796 if (0 < h->n_contexts)
798 for (i = 0; i < h->n_contexts; i++)
799 GNUNET_free (h->contexts[i]);
800 GNUNET_free (h->contexts);
806 /* end of regex_internal_dht.c */