2 This file is part of GNUnet
3 (C) 2012 Christian Grothoff (and other contributing authors)
5 GNUnet is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 3, or (at your
8 option) any later version.
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with GNUnet; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
21 * @file src/regex/regex_internal_dht.c
22 * @brief library to announce regexes in the network and match strings
23 * against published regexes.
24 * @author Bartlomiej Polot
27 #include "regex_internal_lib.h"
28 #include "regex_block_lib.h"
29 #include "gnunet_dht_service.h"
30 #include "gnunet_statistics_service.h"
31 #include "gnunet_constants.h"
32 #include "gnunet_signatures.h"
35 #define LOG(kind,...) GNUNET_log_from (kind,"regex-dht",__VA_ARGS__)
37 #define DHT_REPLICATION 5
38 #define DHT_TTL GNUNET_TIME_UNIT_HOURS
39 #define DHT_OPT GNUNET_DHT_RO_DEMULTIPLEX_EVERYWHERE
43 * Handle to store cached data about a regex announce.
45 struct REGEX_INTERNAL_Announcement
48 * DHT handle to use, must be initialized externally.
50 struct GNUNET_DHT_Handle *dht;
58 * Automaton representation of the regex (expensive to build).
60 struct REGEX_INTERNAL_Automaton* dfa;
65 const struct GNUNET_CRYPTO_EccPrivateKey *priv;
68 * Optional statistics handle to report usage. Can be NULL.
70 struct GNUNET_STATISTICS_Handle *stats;
75 * Regex callback iterator to store own service description in the DHT.
78 * @param key hash for current state.
79 * @param proof proof for current state.
80 * @param accepting GNUNET_YES if this is an accepting state, GNUNET_NO if not.
81 * @param num_edges number of edges leaving current state.
82 * @param edges edges leaving current state.
85 regex_iterator (void *cls,
86 const struct GNUNET_HashCode *key,
89 unsigned int num_edges,
90 const struct REGEX_BLOCK_Edge *edges)
92 struct REGEX_INTERNAL_Announcement *h = cls;
93 struct RegexBlock *block;
97 LOG (GNUNET_ERROR_TYPE_INFO,
98 "DHT PUT for state %s with proof `%s' and %u edges\n",
102 for (i = 0; i < num_edges; i++)
104 LOG (GNUNET_ERROR_TYPE_INFO,
105 " edge %s towards %s (%s)\n",
107 GNUNET_h2s (&edges[i].destination),
110 if (GNUNET_YES == accepting)
112 struct RegexAcceptBlock ab;
114 LOG (GNUNET_ERROR_TYPE_INFO,
115 "State %s is accepting, putting own id\n",
117 size = sizeof (struct RegexAcceptBlock);
118 ab.purpose.size = ntohl (sizeof (struct GNUNET_CRYPTO_EccSignaturePurpose) +
119 sizeof (struct GNUNET_TIME_AbsoluteNBO) +
120 sizeof (struct GNUNET_HashCode));
121 ab.purpose.purpose = ntohl (GNUNET_SIGNATURE_PURPOSE_REGEX_ACCEPT);
122 ab.expiration_time = GNUNET_TIME_absolute_hton (GNUNET_TIME_relative_to_absolute (GNUNET_CONSTANTS_DHT_MAX_EXPIRATION));
124 GNUNET_CRYPTO_ecc_key_get_public_for_signature (h->priv, &ab.public_key);
125 GNUNET_assert (GNUNET_OK ==
126 GNUNET_CRYPTO_ecc_sign (h->priv,
130 GNUNET_STATISTICS_update (h->stats, "# regex accepting blocks stored",
132 GNUNET_STATISTICS_update (h->stats, "# regex accepting block bytes stored",
133 sizeof (struct RegexAcceptBlock), GNUNET_NO);
135 GNUNET_DHT_put (h->dht, key,
137 DHT_OPT | GNUNET_DHT_RO_RECORD_ROUTE,
138 GNUNET_BLOCK_TYPE_REGEX_ACCEPT,
141 GNUNET_TIME_relative_to_absolute (DHT_TTL),
145 block = REGEX_BLOCK_create (proof,
150 GNUNET_DHT_put (h->dht, key,
153 GNUNET_BLOCK_TYPE_REGEX,
155 GNUNET_TIME_relative_to_absolute (DHT_TTL),
158 GNUNET_STATISTICS_update (h->stats, "# regex blocks stored",
160 GNUNET_STATISTICS_update (h->stats, "# regex block bytes stored",
167 * Announce a regular expression: put all states of the automaton in the DHT.
168 * Does not free resources, must call REGEX_INTERNAL_announce_cancel for that.
170 * @param dht An existing and valid DHT service handle. CANNOT be NULL.
171 * @param priv our private key, must remain valid until the announcement is cancelled
172 * @param regex Regular expression to announce.
173 * @param compression How many characters per edge can we squeeze?
174 * @param stats Optional statistics handle to report usage. Can be NULL.
176 * @return Handle to reuse o free cached resources.
177 * Must be freed by calling REGEX_INTERNAL_announce_cancel.
179 struct REGEX_INTERNAL_Announcement *
180 REGEX_INTERNAL_announce (struct GNUNET_DHT_Handle *dht,
181 const struct GNUNET_CRYPTO_EccPrivateKey *priv,
183 uint16_t compression,
184 struct GNUNET_STATISTICS_Handle *stats)
186 struct REGEX_INTERNAL_Announcement *h;
188 GNUNET_assert (NULL != dht);
189 h = GNUNET_malloc (sizeof (struct REGEX_INTERNAL_Announcement));
194 h->dfa = REGEX_INTERNAL_construct_dfa (regex, strlen (regex), compression);
195 REGEX_INTERNAL_reannounce (h);
201 * Announce again a regular expression previously announced.
202 * Does use caching to speed up process.
204 * @param h Handle returned by a previous REGEX_INTERNAL_announce call.
207 REGEX_INTERNAL_reannounce (struct REGEX_INTERNAL_Announcement *h)
209 GNUNET_assert (NULL != h->dfa); /* make sure to call announce first */
210 LOG (GNUNET_ERROR_TYPE_INFO,
211 "REGEX_INTERNAL_reannounce: %s\n",
213 REGEX_INTERNAL_iterate_reachable_edges (h->dfa, ®ex_iterator, h);
218 * Clear all cached data used by a regex announce.
219 * Does not close DHT connection.
221 * @param h Handle returned by a previous REGEX_INTERNAL_announce call.
224 REGEX_INTERNAL_announce_cancel (struct REGEX_INTERNAL_Announcement *h)
226 REGEX_INTERNAL_automaton_destroy (h->dfa);
231 /******************************************************************************/
235 * Struct to keep state of running searches that have consumed a part of
238 struct RegexSearchContext
241 * Part of the description already consumed by
242 * this particular search branch.
247 * Information about the search.
249 struct REGEX_INTERNAL_Search *info;
252 * We just want to look for one edge, the longer the better.
255 unsigned int longest_match;
258 * Destination hash of the longest match.
260 struct GNUNET_HashCode hash;
265 * Type of values in 'dht_get_results'.
270 * Number of bytes in data.
275 * The raw result data.
282 * Struct to keep information of searches of services described by a regex
283 * using a user-provided string service description.
285 struct REGEX_INTERNAL_Search
288 * DHT handle to use, must be initialized externally.
290 struct GNUNET_DHT_Handle *dht;
293 * Optional statistics handle to report usage. Can be NULL.
295 struct GNUNET_STATISTICS_Handle *stats;
298 * User provided description of the searched service.
305 struct GNUNET_CONTAINER_MultiHashMap *dht_get_handles;
308 * Results from running DHT GETs, values are of type
311 struct GNUNET_CONTAINER_MultiHashMap *dht_get_results;
314 * Contexts, for each running DHT GET. Free all on end of search.
316 struct RegexSearchContext **contexts;
319 * Number of contexts (branches/steps in search).
321 unsigned int n_contexts;
324 * @param callback Callback for found peers.
326 REGEX_INTERNAL_Found callback;
329 * @param callback_cls Closure for @c callback.
336 * Jump to the next edge, with the longest matching token.
338 * @param block Block found in the DHT.
339 * @param size Size of the block.
340 * @param ctx Context of the search.
343 regex_next_edge (const struct RegexBlock *block,
345 struct RegexSearchContext *ctx);
349 * Function to process DHT string to regex matching.
350 * Called on each result obtained for the DHT search.
352 * @param cls Closure (search context).
353 * @param exp When will this value expire.
354 * @param key Key of the result.
355 * @param get_path Path of the get request.
356 * @param get_path_length Lenght of get_path.
357 * @param put_path Path of the put request.
358 * @param put_path_length Length of the put_path.
359 * @param type Type of the result.
360 * @param size Number of bytes in data.
361 * @param data Pointer to the result data.
364 dht_get_string_accept_handler (void *cls, struct GNUNET_TIME_Absolute exp,
365 const struct GNUNET_HashCode *key,
366 const struct GNUNET_PeerIdentity *get_path,
367 unsigned int get_path_length,
368 const struct GNUNET_PeerIdentity *put_path,
369 unsigned int put_path_length,
370 enum GNUNET_BLOCK_Type type,
371 size_t size, const void *data)
373 const struct RegexAcceptBlock *block = data;
374 struct RegexSearchContext *ctx = cls;
375 struct REGEX_INTERNAL_Search *info = ctx->info;
376 struct GNUNET_PeerIdentity pid;
378 LOG (GNUNET_ERROR_TYPE_DEBUG,
379 "Regex result accept for %s (key %s)\n",
380 info->description, GNUNET_h2s(key));
382 GNUNET_STATISTICS_update (info->stats,
383 "# regex accepting blocks found",
385 GNUNET_STATISTICS_update (info->stats,
386 "# regex accepting block bytes found",
388 GNUNET_CRYPTO_hash (&block->public_key,
389 sizeof (struct GNUNET_CRYPTO_EccPublicSignKey),
391 info->callback (info->callback_cls,
393 get_path, get_path_length,
394 put_path, put_path_length);
399 * Find a path to a peer that offers a regex servcie compatible
400 * with a given string.
402 * @param key The key of the accepting state.
403 * @param ctx Context containing info about the string, tunnel, etc.
406 regex_find_path (const struct GNUNET_HashCode *key,
407 struct RegexSearchContext *ctx)
409 struct GNUNET_DHT_GetHandle *get_h;
411 LOG (GNUNET_ERROR_TYPE_DEBUG,
412 "regex finds path for %s\n",
414 get_h = GNUNET_DHT_get_start (ctx->info->dht, /* handle */
415 GNUNET_BLOCK_TYPE_REGEX_ACCEPT, /* type */
416 key, /* key to search */
417 DHT_REPLICATION, /* replication level */
418 DHT_OPT | GNUNET_DHT_RO_RECORD_ROUTE,
419 NULL, /* xquery */ // FIXME BLOOMFILTER
420 0, /* xquery bits */ // FIXME BLOOMFILTER SIZE
421 &dht_get_string_accept_handler, ctx);
422 GNUNET_break (GNUNET_OK ==
423 GNUNET_CONTAINER_multihashmap_put(ctx->info->dht_get_handles,
426 GNUNET_CONTAINER_MULTIHASHMAPOPTION_MULTIPLE));
431 * Function to process DHT string to regex matching.
432 * Called on each result obtained for the DHT search.
434 * @param cls closure (search context)
435 * @param exp when will this value expire
436 * @param key key of the result
437 * @param get_path path of the get request (not used)
438 * @param get_path_length lenght of get_path (not used)
439 * @param put_path path of the put request (not used)
440 * @param put_path_length length of the put_path (not used)
441 * @param type type of the result
442 * @param size number of bytes in data
443 * @param data pointer to the result data
445 * TODO: re-issue the request after certain time? cancel after X results?
448 dht_get_string_handler (void *cls, struct GNUNET_TIME_Absolute exp,
449 const struct GNUNET_HashCode *key,
450 const struct GNUNET_PeerIdentity *get_path,
451 unsigned int get_path_length,
452 const struct GNUNET_PeerIdentity *put_path,
453 unsigned int put_path_length,
454 enum GNUNET_BLOCK_Type type,
455 size_t size, const void *data)
457 const struct RegexBlock *block = data;
458 struct RegexSearchContext *ctx = cls;
459 struct REGEX_INTERNAL_Search *info = ctx->info;
463 LOG (GNUNET_ERROR_TYPE_INFO,
464 "DHT GET result for %s (%s)\n",
465 GNUNET_h2s (key), ctx->info->description);
466 copy = GNUNET_malloc (sizeof (struct Result) + size);
468 copy->data = ©[1];
469 memcpy (©[1], block, size);
470 GNUNET_break (GNUNET_OK ==
471 GNUNET_CONTAINER_multihashmap_put (info->dht_get_results,
473 GNUNET_CONTAINER_MULTIHASHMAPOPTION_MULTIPLE));
474 len = strlen (info->description);
475 if (len == ctx->position) // String processed
477 if (GNUNET_YES == GNUNET_BLOCK_is_accepting (block, size))
479 regex_find_path (key, ctx);
483 LOG (GNUNET_ERROR_TYPE_INFO, "block not accepting!\n");
484 /* FIXME REGEX this block not successful, wait for more? start timeout? */
488 regex_next_edge (block, size, ctx);
493 * Iterator over found existing mesh regex blocks that match an ongoing search.
495 * @param cls Closure (current context)-
496 * @param key Current key code (key for cached block).
497 * @param value Value in the hash map (cached RegexBlock).
498 * @return GNUNET_YES: we should always continue to iterate.
501 regex_result_iterator (void *cls,
502 const struct GNUNET_HashCode * key,
505 struct Result *result = value;
506 const struct RegexBlock *block = result->data;
507 struct RegexSearchContext *ctx = cls;
510 GNUNET_BLOCK_is_accepting (block, result->size)) &&
511 (ctx->position == strlen (ctx->info->description)) )
513 LOG (GNUNET_ERROR_TYPE_INFO,
514 "Found accepting known block\n");
515 regex_find_path (key, ctx);
516 return GNUNET_YES; // We found an accept state!
518 LOG (GNUNET_ERROR_TYPE_DEBUG,
521 strlen (ctx->info->description),
522 GNUNET_BLOCK_is_accepting (block, result->size));
523 regex_next_edge (block, result->size, ctx);
525 GNUNET_STATISTICS_update (ctx->info->stats, "# regex mesh blocks iterated",
533 * Iterator over edges in a regex block retrieved from the DHT.
535 * @param cls Closure (context of the search).
536 * @param token Token that follows to next state.
537 * @param len Lenght of token.
538 * @param key Hash of next state.
540 * @return GNUNET_YES if should keep iterating, GNUNET_NO otherwise.
543 regex_edge_iterator (void *cls,
546 const struct GNUNET_HashCode *key)
548 struct RegexSearchContext *ctx = cls;
549 struct REGEX_INTERNAL_Search *info = ctx->info;
553 GNUNET_STATISTICS_update (info->stats, "# regex edges iterated",
555 current = &info->description[ctx->position];
556 current_len = strlen (info->description) - ctx->position;
557 if (len > current_len)
559 LOG (GNUNET_ERROR_TYPE_DEBUG, "Token too long, END\n");
562 if (0 != strncmp (current, token, len))
564 LOG (GNUNET_ERROR_TYPE_DEBUG, "Token doesn't match, END\n");
568 if (len > ctx->longest_match)
570 LOG (GNUNET_ERROR_TYPE_DEBUG, "Token is longer, KEEP\n");
571 ctx->longest_match = len;
576 LOG (GNUNET_ERROR_TYPE_DEBUG, "Token is not longer, IGNORE\n");
579 LOG (GNUNET_ERROR_TYPE_DEBUG, "* End of regex edge iterator\n");
585 * Jump to the next edge, with the longest matching token.
587 * @param block Block found in the DHT.
588 * @param size Size of the block.
589 * @param ctx Context of the search.
592 regex_next_edge (const struct RegexBlock *block,
594 struct RegexSearchContext *ctx)
596 struct RegexSearchContext *new_ctx;
597 struct REGEX_INTERNAL_Search *info = ctx->info;
598 struct GNUNET_DHT_GetHandle *get_h;
599 struct GNUNET_HashCode *hash;
603 LOG (GNUNET_ERROR_TYPE_DEBUG, "Next edge\n");
604 /* Find the longest match for the current string position,
605 * among tokens in the given block */
606 ctx->longest_match = 0;
607 result = REGEX_BLOCK_iterate (block, size,
608 ®ex_edge_iterator, ctx);
609 GNUNET_break (GNUNET_OK == result);
611 /* Did anything match? */
612 if (0 == ctx->longest_match)
614 LOG (GNUNET_ERROR_TYPE_DEBUG,
615 "no match in block\n");
620 new_ctx = GNUNET_malloc (sizeof (struct RegexSearchContext));
621 new_ctx->info = info;
622 new_ctx->position = ctx->position + ctx->longest_match;
623 GNUNET_array_append (info->contexts, info->n_contexts, new_ctx);
625 /* Check whether we already have a DHT GET running for it */
627 GNUNET_CONTAINER_multihashmap_contains (info->dht_get_handles, hash))
629 LOG (GNUNET_ERROR_TYPE_DEBUG,
630 "GET for %s running, END\n",
632 GNUNET_CONTAINER_multihashmap_get_multiple (info->dht_get_results,
634 ®ex_result_iterator,
636 return; /* We are already looking for it */
639 GNUNET_STATISTICS_update (info->stats, "# regex nodes traversed",
642 LOG (GNUNET_ERROR_TYPE_INFO,
645 rest = &new_ctx->info->description[new_ctx->position];
647 GNUNET_DHT_get_start (info->dht, /* handle */
648 GNUNET_BLOCK_TYPE_REGEX, /* type */
649 hash, /* key to search */
650 DHT_REPLICATION, /* replication level */
653 strlen (rest) + 1, /* xquery bits */
654 &dht_get_string_handler, new_ctx);
656 GNUNET_CONTAINER_multihashmap_put(info->dht_get_handles,
659 GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_FAST))
668 * Search for a peer offering a regex matching certain string in the DHT.
669 * The search runs until REGEX_INTERNAL_search_cancel is called, even if results
672 * @param dht An existing and valid DHT service handle.
673 * @param string String to match against the regexes in the DHT.
674 * @param callback Callback for found peers.
675 * @param callback_cls Closure for @c callback.
676 * @param stats Optional statistics handle to report usage. Can be NULL.
678 * @return Handle to stop search and free resources.
679 * Must be freed by calling REGEX_INTERNAL_search_cancel.
681 struct REGEX_INTERNAL_Search *
682 REGEX_INTERNAL_search (struct GNUNET_DHT_Handle *dht,
684 REGEX_INTERNAL_Found callback,
686 struct GNUNET_STATISTICS_Handle *stats)
688 struct REGEX_INTERNAL_Search *h;
689 struct GNUNET_DHT_GetHandle *get_h;
690 struct RegexSearchContext *ctx;
691 struct GNUNET_HashCode key;
695 /* Initialize handle */
696 LOG (GNUNET_ERROR_TYPE_INFO, "REGEX_INTERNAL_search: %s\n", string);
697 GNUNET_assert (NULL != dht);
698 GNUNET_assert (NULL != callback);
699 h = GNUNET_malloc (sizeof (struct REGEX_INTERNAL_Search));
701 h->description = GNUNET_strdup (string);
702 h->callback = callback;
703 h->callback_cls = callback_cls;
705 h->dht_get_handles = GNUNET_CONTAINER_multihashmap_create (32, GNUNET_NO);
706 h->dht_get_results = GNUNET_CONTAINER_multihashmap_create (32, GNUNET_NO);
708 /* Initialize context */
709 len = strlen (string);
710 size = REGEX_INTERNAL_get_first_key (string, len, &key);
711 LOG (GNUNET_ERROR_TYPE_INFO,
712 " initial key for %s: %s (%.*s)\n",
713 string, GNUNET_h2s (&key), size, string);
714 ctx = GNUNET_malloc (sizeof (struct RegexSearchContext));
715 ctx->position = size;
717 GNUNET_array_append (h->contexts, h->n_contexts, ctx);
718 LOG (GNUNET_ERROR_TYPE_DEBUG,
719 "consumed %u bits out of %u, now looking for %s\n",
723 /* Start search in DHT */
724 get_h = GNUNET_DHT_get_start (h->dht, /* handle */
725 GNUNET_BLOCK_TYPE_REGEX, /* type */
726 &key, /* key to search */
727 DHT_REPLICATION, /* replication level */
729 &h->description[size], /* xquery */
730 // FIXME add BLOOMFILTER to exclude filtered peers
731 len + 1 - size, /* xquery bits */
732 // FIXME add BLOOMFILTER SIZE
733 &dht_get_string_handler, ctx);
736 GNUNET_CONTAINER_multihashmap_put (h->dht_get_handles,
739 GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_FAST)
747 * Iterator over hash map entries to cancel DHT GET requests after a
748 * successful connect_by_string.
750 * @param cls Closure (unused).
751 * @param key Current key code (unused).
752 * @param value Value in the hash map (get handle).
753 * @return GNUNET_YES if we should continue to iterate,
757 regex_cancel_dht_get (void *cls,
758 const struct GNUNET_HashCode * key,
761 struct GNUNET_DHT_GetHandle *h = value;
763 GNUNET_DHT_get_stop (h);
769 * Iterator over hash map entries to free MeshRegexBlocks stored during the
770 * search for connect_by_string.
772 * @param cls Closure (unused).
773 * @param key Current key code (unused).
774 * @param value MeshRegexBlock in the hash map.
775 * @return GNUNET_YES if we should continue to iterate,
779 regex_free_result (void *cls,
780 const struct GNUNET_HashCode * key,
789 * Cancel an ongoing regex search in the DHT and free all resources.
791 * @param h the search context.
794 REGEX_INTERNAL_search_cancel (struct REGEX_INTERNAL_Search *h)
798 GNUNET_free (h->description);
799 GNUNET_CONTAINER_multihashmap_iterate (h->dht_get_handles,
800 ®ex_cancel_dht_get, NULL);
801 GNUNET_CONTAINER_multihashmap_iterate (h->dht_get_results,
802 ®ex_free_result, NULL);
803 GNUNET_CONTAINER_multihashmap_destroy (h->dht_get_results);
804 GNUNET_CONTAINER_multihashmap_destroy (h->dht_get_handles);
805 if (0 < h->n_contexts)
807 for (i = 0; i < h->n_contexts; i++)
808 GNUNET_free (h->contexts[i]);
809 GNUNET_free (h->contexts);
815 /* end of regex_internal_dht.c */