2 This file is part of GNUnet
3 (C) 2012 Christian Grothoff (and other contributing authors)
5 GNUnet is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 3, or (at your
8 option) any later version.
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with GNUnet; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
21 * @file src/regex/regex_internal_dht.c
22 * @brief library to announce regexes in the network and match strings
23 * against published regexes.
24 * @author Bartlomiej Polot
27 #include "regex_internal_lib.h"
28 #include "regex_block_lib.h"
29 #include "gnunet_dht_service.h"
30 #include "gnunet_statistics_service.h"
31 #include "gnunet_constants.h"
32 #include "gnunet_signatures.h"
35 #define LOG(kind,...) GNUNET_log_from (kind,"regex-dht",__VA_ARGS__)
37 /* FIXME: OPTION (API, CONFIG) */
38 #define DHT_REPLICATION 5
39 #define DHT_TTL GNUNET_TIME_UNIT_HOURS
40 #define DEBUG_DHT GNUNET_NO
43 #define DHT_OPT GNUNET_DHT_RO_DEMULTIPLEX_EVERYWHERE | GNUNET_DHT_RO_RECORD_ROUTE
45 #define DHT_OPT GNUNET_DHT_RO_DEMULTIPLEX_EVERYWHERE
50 * Handle to store cached data about a regex announce.
52 struct REGEX_INTERNAL_Announcement
55 * DHT handle to use, must be initialized externally.
57 struct GNUNET_DHT_Handle *dht;
65 * Automaton representation of the regex (expensive to build).
67 struct REGEX_INTERNAL_Automaton* dfa;
72 const struct GNUNET_CRYPTO_EccPrivateKey *priv;
75 * Optional statistics handle to report usage. Can be NULL.
77 struct GNUNET_STATISTICS_Handle *stats;
82 * Regex callback iterator to store own service description in the DHT.
85 * @param key hash for current state.
86 * @param proof proof for current state.
87 * @param accepting GNUNET_YES if this is an accepting state, GNUNET_NO if not.
88 * @param num_edges number of edges leaving current state.
89 * @param edges edges leaving current state.
92 regex_iterator (void *cls,
93 const struct GNUNET_HashCode *key,
96 unsigned int num_edges,
97 const struct REGEX_BLOCK_Edge *edges)
99 struct REGEX_INTERNAL_Announcement *h = cls;
100 struct RegexBlock *block;
103 LOG (GNUNET_ERROR_TYPE_DEBUG,
104 "DHT PUT for state %s with proof `%s' and %u edges\n",
108 if (GNUNET_YES == accepting)
110 struct RegexAcceptBlock ab;
112 LOG (GNUNET_ERROR_TYPE_DEBUG,
113 "State %s is accepting, putting own id\n",
115 size = sizeof (struct RegexAcceptBlock);
116 ab.purpose.size = sizeof (struct GNUNET_CRYPTO_EccSignaturePurpose) +
117 sizeof (struct GNUNET_TIME_AbsoluteNBO) +
118 sizeof (struct GNUNET_HashCode);
119 ab.purpose.purpose = ntohl (GNUNET_SIGNATURE_PURPOSE_REGEX_ACCEPT);
120 ab.expiration_time = GNUNET_TIME_absolute_hton (GNUNET_TIME_relative_to_absolute (GNUNET_CONSTANTS_DHT_MAX_EXPIRATION));
122 GNUNET_CRYPTO_ecc_key_get_public (h->priv,
124 GNUNET_assert (GNUNET_OK ==
125 GNUNET_CRYPTO_ecc_sign (h->priv,
129 GNUNET_STATISTICS_update (h->stats, "# regex accepting blocks stored",
131 GNUNET_STATISTICS_update (h->stats, "# regex accepting block bytes stored",
132 sizeof (struct RegexAcceptBlock), GNUNET_NO);
134 GNUNET_DHT_put (h->dht, key,
136 DHT_OPT | GNUNET_DHT_RO_RECORD_ROUTE,
137 GNUNET_BLOCK_TYPE_REGEX_ACCEPT,
140 GNUNET_TIME_relative_to_absolute (DHT_TTL),
144 block = REGEX_BLOCK_create (proof,
149 GNUNET_DHT_put (h->dht, key,
152 GNUNET_BLOCK_TYPE_REGEX,
154 GNUNET_TIME_relative_to_absolute (DHT_TTL),
157 GNUNET_STATISTICS_update (h->stats, "# regex blocks stored",
159 GNUNET_STATISTICS_update (h->stats, "# regex block bytes stored",
166 * Announce a regular expression: put all states of the automaton in the DHT.
167 * Does not free resources, must call REGEX_INTERNAL_announce_cancel for that.
169 * @param dht An existing and valid DHT service handle. CANNOT be NULL.
170 * @param priv our private key, must remain valid until the announcement is cancelled
171 * @param regex Regular expression to announce.
172 * @param compression How many characters per edge can we squeeze?
173 * @param stats Optional statistics handle to report usage. Can be NULL.
175 * @return Handle to reuse o free cached resources.
176 * Must be freed by calling REGEX_INTERNAL_announce_cancel.
178 struct REGEX_INTERNAL_Announcement *
179 REGEX_INTERNAL_announce (struct GNUNET_DHT_Handle *dht,
180 const struct GNUNET_CRYPTO_EccPrivateKey *priv,
182 uint16_t compression,
183 struct GNUNET_STATISTICS_Handle *stats)
185 struct REGEX_INTERNAL_Announcement *h;
187 GNUNET_assert (NULL != dht);
188 h = GNUNET_malloc (sizeof (struct REGEX_INTERNAL_Announcement));
193 h->dfa = REGEX_INTERNAL_construct_dfa (regex,
196 REGEX_INTERNAL_reannounce (h);
202 * Announce again a regular expression previously announced.
203 * Does use caching to speed up process.
205 * @param h Handle returned by a previous REGEX_INTERNAL_announce call.
208 REGEX_INTERNAL_reannounce (struct REGEX_INTERNAL_Announcement *h)
210 GNUNET_assert (NULL != h->dfa); /* make sure to call announce first */
211 LOG (GNUNET_ERROR_TYPE_INFO, "REGEX_INTERNAL_reannounce: %.60s\n", h->regex);
212 LOG (GNUNET_ERROR_TYPE_DEBUG, " full: %s\n", h->regex);
213 REGEX_INTERNAL_iterate_all_edges (h->dfa, ®ex_iterator, h);
219 * Clear all cached data used by a regex announce.
220 * Does not close DHT connection.
222 * @param h Handle returned by a previous REGEX_INTERNAL_announce call.
225 REGEX_INTERNAL_announce_cancel (struct REGEX_INTERNAL_Announcement *h)
227 REGEX_INTERNAL_automaton_destroy (h->dfa);
232 /******************************************************************************/
236 * Struct to keep state of running searches that have consumed a part of
239 struct RegexSearchContext
242 * Part of the description already consumed by
243 * this particular search branch.
248 * Information about the search.
250 struct REGEX_INTERNAL_Search *info;
253 * We just want to look for one edge, the longer the better.
256 unsigned int longest_match;
259 * Destination hash of the longest match.
261 struct GNUNET_HashCode hash;
266 * Struct to keep information of searches of services described by a regex
267 * using a user-provided string service description.
269 struct REGEX_INTERNAL_Search
272 * DHT handle to use, must be initialized externally.
274 struct GNUNET_DHT_Handle *dht;
277 * Optional statistics handle to report usage. Can be NULL.
279 struct GNUNET_STATISTICS_Handle *stats;
282 * User provided description of the searched service.
289 struct GNUNET_CONTAINER_MultiHashMap *dht_get_handles;
292 * Results from running DHT GETs.
294 struct GNUNET_CONTAINER_MultiHashMap *dht_get_results;
297 * Contexts, for each running DHT GET. Free all on end of search.
299 struct RegexSearchContext **contexts;
302 * Number of contexts (branches/steps in search).
304 unsigned int n_contexts;
307 * @param callback Callback for found peers.
309 REGEX_INTERNAL_Found callback;
312 * @param callback_cls Closure for @c callback.
320 * Jump to the next edge, with the longest matching token.
322 * @param block Block found in the DHT.
323 * @param size Size of the block.
324 * @param ctx Context of the search.
326 * @return GNUNET_YES if should keep iterating, GNUNET_NO otherwise.
329 regex_next_edge (const struct RegexBlock *block,
331 struct RegexSearchContext *ctx);
335 * Function to process DHT string to regex matching.
336 * Called on each result obtained for the DHT search.
338 * @param cls Closure (search context).
339 * @param exp When will this value expire.
340 * @param key Key of the result.
341 * @param get_path Path of the get request.
342 * @param get_path_length Lenght of get_path.
343 * @param put_path Path of the put request.
344 * @param put_path_length Length of the put_path.
345 * @param type Type of the result.
346 * @param size Number of bytes in data.
347 * @param data Pointer to the result data.
350 dht_get_string_accept_handler (void *cls, struct GNUNET_TIME_Absolute exp,
351 const struct GNUNET_HashCode *key,
352 const struct GNUNET_PeerIdentity *get_path,
353 unsigned int get_path_length,
354 const struct GNUNET_PeerIdentity *put_path,
355 unsigned int put_path_length,
356 enum GNUNET_BLOCK_Type type,
357 size_t size, const void *data)
359 const struct RegexAcceptBlock *block = data;
360 struct RegexSearchContext *ctx = cls;
361 struct REGEX_INTERNAL_Search *info = ctx->info;
362 struct GNUNET_PeerIdentity pid;
364 LOG (GNUNET_ERROR_TYPE_DEBUG, "Got regex results from DHT!\n");
365 LOG (GNUNET_ERROR_TYPE_INFO, " accept for %s (key %s)\n",
366 info->description, GNUNET_h2s(key));
368 GNUNET_STATISTICS_update (info->stats, "# regex accepting blocks found",
370 GNUNET_STATISTICS_update (info->stats, "# regex accepting block bytes found",
372 GNUNET_CRYPTO_hash (&block->public_key,
373 sizeof (struct GNUNET_CRYPTO_EccPublicKeyBinaryEncoded),
375 info->callback (info->callback_cls,
377 get_path, get_path_length,
378 put_path, put_path_length);
383 * Find a path to a peer that offers a regex servcie compatible
384 * with a given string.
386 * @param key The key of the accepting state.
387 * @param ctx Context containing info about the string, tunnel, etc.
390 regex_find_path (const struct GNUNET_HashCode *key,
391 struct RegexSearchContext *ctx)
393 struct GNUNET_DHT_GetHandle *get_h;
395 LOG (GNUNET_ERROR_TYPE_DEBUG, "Found peer by service\n");
396 LOG (GNUNET_ERROR_TYPE_INFO, " find accept for %s\n", GNUNET_h2s (key));
397 get_h = GNUNET_DHT_get_start (ctx->info->dht, /* handle */
398 GNUNET_BLOCK_TYPE_REGEX_ACCEPT, /* type */
399 key, /* key to search */
400 DHT_REPLICATION, /* replication level */
401 DHT_OPT | GNUNET_DHT_RO_RECORD_ROUTE,
402 NULL, /* xquery */ // FIXME BLOOMFILTER
403 0, /* xquery bits */ // FIXME BLOOMFILTER SIZE
404 &dht_get_string_accept_handler, ctx);
405 GNUNET_break (GNUNET_OK ==
406 GNUNET_CONTAINER_multihashmap_put(ctx->info->dht_get_handles,
409 GNUNET_CONTAINER_MULTIHASHMAPOPTION_MULTIPLE));
414 * Function to process DHT string to regex matching.
415 * Called on each result obtained for the DHT search.
417 * @param cls closure (search context)
418 * @param exp when will this value expire
419 * @param key key of the result
420 * @param get_path path of the get request (not used)
421 * @param get_path_length lenght of get_path (not used)
422 * @param put_path path of the put request (not used)
423 * @param put_path_length length of the put_path (not used)
424 * @param type type of the result
425 * @param size number of bytes in data
426 * @param data pointer to the result data
428 * TODO: re-issue the request after certain time? cancel after X results?
431 dht_get_string_handler (void *cls, struct GNUNET_TIME_Absolute exp,
432 const struct GNUNET_HashCode *key,
433 const struct GNUNET_PeerIdentity *get_path,
434 unsigned int get_path_length,
435 const struct GNUNET_PeerIdentity *put_path,
436 unsigned int put_path_length,
437 enum GNUNET_BLOCK_Type type,
438 size_t size, const void *data)
440 const struct RegexBlock *block = data;
441 struct RegexSearchContext *ctx = cls;
442 struct REGEX_INTERNAL_Search *info = ctx->info;
448 if ( (NULL != put_path) &&
449 (0 != put_path_length) )
451 datastore = GNUNET_strdup (GNUNET_i2s (&put_path[put_path_length - 1]));
455 GNUNET_asprintf (&datastore, "?? %u/%u", put_path_length, get_path_length);
458 datastore = GNUNET_strdup ("N/A");
461 LOG (GNUNET_ERROR_TYPE_INFO, " DHT GET result for %s (%s) at %s\n",
462 GNUNET_h2s (key), ctx->info->description, datastore);
463 GNUNET_free (datastore);
465 copy = GNUNET_malloc (size);
466 memcpy (copy, block, size);
469 GNUNET_CONTAINER_multihashmap_put (info->dht_get_results,
471 GNUNET_CONTAINER_MULTIHASHMAPOPTION_MULTIPLE)
473 len = strlen (info->description);
474 if (len == ctx->position) // String processed
476 if (GNUNET_YES == GNUNET_BLOCK_is_accepting (block))
478 regex_find_path (key, ctx);
482 LOG (GNUNET_ERROR_TYPE_INFO, "block not accepting!\n");
483 /* FIXME REGEX this block not successful, wait for more? start timeout? */
487 regex_next_edge (block, size, ctx);
492 * Iterator over found existing mesh regex blocks that match an ongoing search.
494 * @param cls Closure (current context)-
495 * @param key Current key code (key for cached block).
496 * @param value Value in the hash map (cached RegexBlock).
497 * @return GNUNET_YES: we should always continue to iterate.
500 regex_result_iterator (void *cls,
501 const struct GNUNET_HashCode * key,
504 struct RegexBlock *block = value;
505 struct RegexSearchContext *ctx = cls;
508 GNUNET_BLOCK_is_accepting (block)) &&
509 (ctx->position == strlen (ctx->info->description)) )
511 LOG (GNUNET_ERROR_TYPE_INFO, " * Found accepting known block\n");
512 regex_find_path (key, ctx);
513 return GNUNET_YES; // We found an accept state!
515 LOG (GNUNET_ERROR_TYPE_DEBUG, "* %u, %u, [%u]\n",
516 ctx->position, strlen(ctx->info->description),
517 GNUNET_BLOCK_is_accepting (block));
519 regex_next_edge (block, SIZE_MAX, ctx);
521 GNUNET_STATISTICS_update (ctx->info->stats, "# regex mesh blocks iterated",
529 * Iterator over edges in a regex block retrieved from the DHT.
531 * @param cls Closure (context of the search).
532 * @param token Token that follows to next state.
533 * @param len Lenght of token.
534 * @param key Hash of next state.
536 * @return GNUNET_YES if should keep iterating, GNUNET_NO otherwise.
539 regex_edge_iterator (void *cls,
542 const struct GNUNET_HashCode *key)
544 struct RegexSearchContext *ctx = cls;
545 struct REGEX_INTERNAL_Search *info = ctx->info;
549 GNUNET_STATISTICS_update (info->stats, "# regex edges iterated",
552 LOG (GNUNET_ERROR_TYPE_DEBUG, "* Start of regex edge iterator\n");
553 LOG (GNUNET_ERROR_TYPE_DEBUG, "* descr : %s\n", info->description);
554 LOG (GNUNET_ERROR_TYPE_DEBUG, "* posit : %u\n", ctx->position);
555 current = &info->description[ctx->position];
556 LOG (GNUNET_ERROR_TYPE_DEBUG, "* currt : %s\n", current);
557 current_len = strlen (info->description) - ctx->position;
558 LOG (GNUNET_ERROR_TYPE_DEBUG, "* ctlen : %u\n", current_len);
559 LOG (GNUNET_ERROR_TYPE_DEBUG, "* tklen : %u\n", len);
560 LOG (GNUNET_ERROR_TYPE_DEBUG, "* token : %.*s\n", len, token);
561 LOG (GNUNET_ERROR_TYPE_DEBUG, "* nextk : %s\n", GNUNET_h2s(key));
562 if (len > current_len)
564 LOG (GNUNET_ERROR_TYPE_DEBUG, "* Token too long, END\n");
565 return GNUNET_YES; // Token too long, wont match
567 if (0 != strncmp (current, token, len))
569 LOG (GNUNET_ERROR_TYPE_DEBUG, "* Token doesn't match, END\n");
570 return GNUNET_YES; // Token doesn't match
573 if (len > ctx->longest_match)
575 LOG (GNUNET_ERROR_TYPE_DEBUG, "* Token is longer, KEEP\n");
576 ctx->longest_match = len;
581 LOG (GNUNET_ERROR_TYPE_DEBUG, "* Token is not longer, IGNORE\n");
584 LOG (GNUNET_ERROR_TYPE_DEBUG, "* End of regex edge iterator\n");
590 * Jump to the next edge, with the longest matching token.
592 * @param block Block found in the DHT.
593 * @param size Size of the block.
594 * @param ctx Context of the search.
596 * @return GNUNET_YES if should keep iterating, GNUNET_NO otherwise.
599 regex_next_edge (const struct RegexBlock *block,
601 struct RegexSearchContext *ctx)
603 struct RegexSearchContext *new_ctx;
604 struct REGEX_INTERNAL_Search *info = ctx->info;
605 struct GNUNET_DHT_GetHandle *get_h;
606 struct GNUNET_HashCode *hash;
610 /* Find the longest match for the current string position,
611 * among tokens in the given block */
612 ctx->longest_match = 0;
613 result = REGEX_BLOCK_iterate (block, size,
614 ®ex_edge_iterator, ctx);
615 GNUNET_break (GNUNET_OK == result);
617 /* Did anything match? */
618 if (0 == ctx->longest_match)
620 LOG (GNUNET_ERROR_TYPE_DEBUG, " no match in block\n");
625 new_ctx = GNUNET_malloc (sizeof (struct RegexSearchContext));
626 new_ctx->info = info;
627 new_ctx->position = ctx->position + ctx->longest_match;
628 GNUNET_array_append (info->contexts, info->n_contexts, new_ctx);
630 /* Check whether we already have a DHT GET running for it */
632 GNUNET_CONTAINER_multihashmap_contains (info->dht_get_handles, hash))
634 LOG (GNUNET_ERROR_TYPE_DEBUG, "* GET for %s running, END\n",
636 GNUNET_CONTAINER_multihashmap_get_multiple (info->dht_get_results,
638 ®ex_result_iterator,
640 return; /* We are already looking for it */
643 GNUNET_STATISTICS_update (info->stats, "# regex nodes traversed",
646 /* Start search in DHT */
647 LOG (GNUNET_ERROR_TYPE_INFO, " looking for %s\n", GNUNET_h2s (hash));
648 rest = &new_ctx->info->description[new_ctx->position];
650 GNUNET_DHT_get_start (info->dht, /* handle */
651 GNUNET_BLOCK_TYPE_REGEX, /* type */
652 hash, /* key to search */
653 DHT_REPLICATION, /* replication level */
656 // FIXME add BLOOMFILTER to exclude filtered peers
657 strlen(rest) + 1, /* xquery bits */
658 // FIXME add BLOOMFILTER SIZE
659 &dht_get_string_handler, new_ctx);
661 GNUNET_CONTAINER_multihashmap_put(info->dht_get_handles,
664 GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_FAST))
672 struct REGEX_INTERNAL_Search *
673 REGEX_INTERNAL_search (struct GNUNET_DHT_Handle *dht,
675 REGEX_INTERNAL_Found callback,
677 struct GNUNET_STATISTICS_Handle *stats)
679 struct REGEX_INTERNAL_Search *h;
680 struct GNUNET_DHT_GetHandle *get_h;
681 struct RegexSearchContext *ctx;
682 struct GNUNET_HashCode key;
686 /* Initialize handle */
687 LOG (GNUNET_ERROR_TYPE_INFO, "REGEX_INTERNAL_search: %s\n", string);
688 GNUNET_assert (NULL != dht);
689 GNUNET_assert (NULL != callback);
690 h = GNUNET_malloc (sizeof (struct REGEX_INTERNAL_Search));
692 h->description = GNUNET_strdup (string);
693 h->callback = callback;
694 h->callback_cls = callback_cls;
696 h->dht_get_handles = GNUNET_CONTAINER_multihashmap_create (32, GNUNET_NO);
697 h->dht_get_results = GNUNET_CONTAINER_multihashmap_create (32, GNUNET_NO);
699 /* Initialize context */
700 len = strlen (string);
701 size = REGEX_INTERNAL_get_first_key (string, len, &key);
702 ctx = GNUNET_malloc (sizeof (struct RegexSearchContext));
703 ctx->position = size;
705 GNUNET_array_append (h->contexts, h->n_contexts, ctx);
706 LOG (GNUNET_ERROR_TYPE_DEBUG, " consumed %u bits out of %u\n", size, len);
707 LOG (GNUNET_ERROR_TYPE_INFO, " looking for %s\n", GNUNET_h2s (&key));
709 /* Start search in DHT */
710 get_h = GNUNET_DHT_get_start (h->dht, /* handle */
711 GNUNET_BLOCK_TYPE_REGEX, /* type */
712 &key, /* key to search */
713 DHT_REPLICATION, /* replication level */
715 &h->description[size], /* xquery */
716 // FIXME add BLOOMFILTER to exclude filtered peers
717 len + 1 - size, /* xquery bits */
718 // FIXME add BLOOMFILTER SIZE
719 &dht_get_string_handler, ctx);
722 GNUNET_CONTAINER_multihashmap_put (h->dht_get_handles,
725 GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_FAST)
733 * Iterator over hash map entries to cancel DHT GET requests after a
734 * successful connect_by_string.
736 * @param cls Closure (unused).
737 * @param key Current key code (unused).
738 * @param value Value in the hash map (get handle).
739 * @return GNUNET_YES if we should continue to iterate,
743 regex_cancel_dht_get (void *cls,
744 const struct GNUNET_HashCode * key,
747 struct GNUNET_DHT_GetHandle *h = value;
749 GNUNET_DHT_get_stop (h);
755 * Iterator over hash map entries to free MeshRegexBlocks stored during the
756 * search for connect_by_string.
758 * @param cls Closure (unused).
759 * @param key Current key code (unused).
760 * @param value MeshRegexBlock in the hash map.
761 * @return GNUNET_YES if we should continue to iterate,
765 regex_free_result (void *cls,
766 const struct GNUNET_HashCode * key,
775 * Cancel an ongoing regex search in the DHT and free all resources.
777 * @param h the search context.
780 REGEX_INTERNAL_search_cancel (struct REGEX_INTERNAL_Search *h)
784 GNUNET_free (h->description);
785 GNUNET_CONTAINER_multihashmap_iterate (h->dht_get_handles,
786 ®ex_cancel_dht_get, NULL);
787 GNUNET_CONTAINER_multihashmap_iterate (h->dht_get_results,
788 ®ex_free_result, NULL);
789 GNUNET_CONTAINER_multihashmap_destroy (h->dht_get_results);
790 GNUNET_CONTAINER_multihashmap_destroy (h->dht_get_handles);
791 if (0 < h->n_contexts)
793 for (i = 0; i < h->n_contexts; i++)
794 GNUNET_free (h->contexts[i]);
795 GNUNET_free (h->contexts);
801 /* end of regex_internal_dht.c */