2 This file is part of GNUnet
3 (C) 2012 Christian Grothoff (and other contributing authors)
5 GNUnet is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 3, or (at your
8 option) any later version.
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with GNUnet; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
21 * @file src/regex/regex_dht.c
22 * @brief library to announce regexes in the network and match strings
23 * against published regexes.
24 * @author Bartlomiej Polot
27 #include "gnunet_regex_lib.h"
28 #include "regex_block_lib.h"
29 #include "gnunet_dht_service.h"
30 #include "gnunet_statistics_service.h"
32 #define DHT_REPLICATION 5
33 #define DHT_TTL GNUNET_TIME_UNIT_HOURS
35 struct GNUNET_REGEX_announce_handle
38 * DHT handle to use, must be initialized externally.
40 struct GNUNET_DHT_Handle *dht;
48 * Automaton representation of the regex (expensive to build).
50 struct GNUNET_REGEX_Automaton* dfa;
53 * Identity under which to announce the regex.
55 struct GNUNET_PeerIdentity *id;
58 * Optional statistics handle to report usage. Can be NULL.
60 struct GNUNET_STATISTICS_Handle *stats;
65 * Regex callback iterator to store own service description in the DHT.
68 * @param key hash for current state.
69 * @param proof proof for current state.
70 * @param accepting GNUNET_YES if this is an accepting state, GNUNET_NO if not.
71 * @param num_edges number of edges leaving current state.
72 * @param edges edges leaving current state.
75 regex_iterator (void *cls,
76 const struct GNUNET_HashCode *key,
79 unsigned int num_edges,
80 const struct GNUNET_REGEX_Edge *edges)
82 struct GNUNET_REGEX_announce_handle *h = cls;
83 struct RegexBlock *block;
84 struct RegexEdge *block_edge;
85 enum GNUNET_DHT_RouteOption opt;
92 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
93 " regex dht put for state %s\n",
95 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
98 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
102 opt = GNUNET_DHT_RO_DEMULTIPLEX_EVERYWHERE;
103 if (GNUNET_YES == accepting)
105 struct RegexAccept block;
107 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
108 " state %s is accepting, putting own id\n",
110 size = sizeof (block);
113 GNUNET_STATISTICS_update (h->stats, "# regex accepting blocks stored",
115 GNUNET_STATISTICS_update (h->stats, "# regex accepting block bytes stored",
116 sizeof (block), GNUNET_NO);
118 GNUNET_DHT_put (h->dht, key,
119 2, /* FIXME option */
120 opt /* | GNUNET_DHT_RO_RECORD_ROUTE*/,
121 GNUNET_BLOCK_TYPE_REGEX_ACCEPT,
124 GNUNET_TIME_relative_to_absolute (GNUNET_TIME_UNIT_HOURS), /* FIXME: expiration time should be option */
125 GNUNET_TIME_UNIT_HOURS, /* FIXME option */
129 size = sizeof (struct RegexBlock) + len;
130 block = GNUNET_malloc (size);
133 block->n_proof = htonl (len);
134 block->n_edges = htonl (num_edges);
135 block->accepting = htonl (accepting);
137 /* Store the proof at the end of the block. */
138 aux = (char *) &block[1];
139 memcpy (aux, proof, len);
142 /* Store each edge in a variable length MeshEdge struct at the
143 * very end of the MeshRegexBlock structure.
145 for (i = 0; i < num_edges; i++)
147 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
148 " edge %s towards %s\n",
150 GNUNET_h2s(&edges[i].destination));
152 /* aux points at the end of the last block */
153 len = strlen (edges[i].label);
154 size += sizeof (struct RegexEdge) + len;
155 // Calculate offset FIXME is this ok? use size instead?
156 offset = aux - (char *) block;
157 block = GNUNET_realloc (block, size);
158 aux = &((char *) block)[offset];
159 block_edge = (struct RegexEdge *) aux;
160 block_edge->key = edges[i].destination;
161 block_edge->n_token = htonl (len);
162 aux = (char *) &block_edge[1];
163 memcpy (aux, edges[i].label, len);
167 GNUNET_DHT_put(h->dht, key,
168 DHT_REPLICATION, /* FIXME OPTION */
170 GNUNET_BLOCK_TYPE_REGEX, size,
172 GNUNET_TIME_relative_to_absolute (DHT_TTL), /* FIXME: this should be an option */
175 GNUNET_STATISTICS_update (h->stats, "# regex blocks stored",
177 GNUNET_STATISTICS_update (h->stats, "# regex block bytes stored",
184 struct GNUNET_REGEX_announce_handle *
185 GNUNET_REGEX_announce (struct GNUNET_DHT_Handle *dht,
186 struct GNUNET_PeerIdentity *id,
188 uint16_t compression,
189 struct GNUNET_STATISTICS_Handle *stats)
191 struct GNUNET_REGEX_announce_handle *h;
193 GNUNET_assert (NULL == dht);
194 h = GNUNET_malloc (sizeof (struct GNUNET_REGEX_announce_handle));
199 h->dfa = GNUNET_REGEX_construct_dfa (regex,
202 GNUNET_REGEX_reannounce (h);
207 GNUNET_REGEX_reannounce (struct GNUNET_REGEX_announce_handle *h)
209 GNUNET_REGEX_iterate_all_edges (h->dfa, ®ex_iterator, h);
213 GNUNET_REGEX_announce_cancel (struct GNUNET_REGEX_announce_handle *h)
215 GNUNET_REGEX_automaton_destroy (h->dfa);
220 /******************************************************************************/
224 * Struct to keep state of running searches that have consumed a part of
227 struct RegexSearchContext
230 * Part of the description already consumed by
231 * this particular search branch.
236 * Information about the search.
238 struct GNUNET_REGEX_search_handle *info;
241 * We just want to look for one edge, the longer the better.
244 unsigned int longest_match;
247 * Destination hash of the longest match.
249 struct GNUNET_HashCode hash;
254 * Struct to keep information of searches of services described by a regex
255 * using a user-provided string service description.
257 struct GNUNET_REGEX_search_handle
260 * DHT handle to use, must be initialized externally.
262 struct GNUNET_DHT_Handle *dht;
265 * Optional statistics handle to report usage. Can be NULL.
267 struct GNUNET_STATISTICS_Handle *stats;
270 * User provided description of the searched service.
277 struct GNUNET_CONTAINER_MultiHashMap *dht_get_handles;
280 * Results from running DHT GETs.
282 struct GNUNET_CONTAINER_MultiHashMap *dht_get_results;
285 * Contexts, for each running DHT GET. Free all on end of search.
287 struct RegexSearchContext **contexts;
290 * Number of contexts (branches/steps in search).
292 unsigned int n_contexts;
295 * @param callback Callback for found peers.
297 GNUNET_REGEX_Found callback;
300 * @param callback_cls Closure for @c callback.
308 * Jump to the next edge, with the longest matching token.
310 * @param block Block found in the DHT.
311 * @param size Size of the block.
312 * @param ctx Context of the search.
314 * @return GNUNET_YES if should keep iterating, GNUNET_NO otherwise.
317 regex_next_edge (const struct RegexBlock *block,
319 struct RegexSearchContext *ctx);
323 * Function to process DHT string to regex matching.
324 * Called on each result obtained for the DHT search.
326 * @param cls Closure (search context).
327 * @param exp When will this value expire.
328 * @param key Key of the result.
329 * @param get_path Path of the get request.
330 * @param get_path_length Lenght of get_path.
331 * @param put_path Path of the put request.
332 * @param put_path_length Length of the put_path.
333 * @param type Type of the result.
334 * @param size Number of bytes in data.
335 * @param data Pointer to the result data.
338 dht_get_string_accept_handler (void *cls, struct GNUNET_TIME_Absolute exp,
339 const struct GNUNET_HashCode * key,
340 const struct GNUNET_PeerIdentity *get_path,
341 unsigned int get_path_length,
342 const struct GNUNET_PeerIdentity *put_path,
343 unsigned int put_path_length,
344 enum GNUNET_BLOCK_Type type,
345 size_t size, const void *data)
347 const struct RegexAccept *block = data;
348 struct RegexSearchContext *ctx = cls;
349 struct GNUNET_REGEX_search_handle *info = ctx->info;
351 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Got regex results from DHT!\n");
352 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " for %s\n", info->description);
354 GNUNET_STATISTICS_update (info->stats, "# regex accepting blocks found",
356 GNUNET_STATISTICS_update (info->stats, "# regex accepting block bytes found",
359 info->callback (info->callback_cls,
361 get_path, get_path_length,
362 put_path, put_path_length);
368 * Find a path to a peer that offers a regex servcie compatible
369 * with a given string.
371 * @param key The key of the accepting state.
372 * @param ctx Context containing info about the string, tunnel, etc.
375 regex_find_path (const struct GNUNET_HashCode *key,
376 struct RegexSearchContext *ctx)
378 struct GNUNET_DHT_GetHandle *get_h;
380 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Found peer by service\n");
381 get_h = GNUNET_DHT_get_start (ctx->info->dht, /* handle */
382 GNUNET_BLOCK_TYPE_REGEX_ACCEPT, /* type */
383 key, /* key to search */
384 DHT_REPLICATION, /* replication level */
385 GNUNET_DHT_RO_DEMULTIPLEX_EVERYWHERE |
386 GNUNET_DHT_RO_RECORD_ROUTE,
387 NULL, /* xquery */ // FIXME BLOOMFILTER
388 0, /* xquery bits */ // FIXME BLOOMFILTER SIZE
389 &dht_get_string_accept_handler, ctx);
390 GNUNET_break (GNUNET_OK ==
391 GNUNET_CONTAINER_multihashmap_put(ctx->info->dht_get_handles,
394 GNUNET_CONTAINER_MULTIHASHMAPOPTION_MULTIPLE));
399 * Function to process DHT string to regex matching.
400 * Called on each result obtained for the DHT search.
402 * @param cls closure (search context)
403 * @param exp when will this value expire
404 * @param key key of the result
405 * @param get_path path of the get request (not used)
406 * @param get_path_length lenght of get_path (not used)
407 * @param put_path path of the put request (not used)
408 * @param put_path_length length of the put_path (not used)
409 * @param type type of the result
410 * @param size number of bytes in data
411 * @param data pointer to the result data
413 * TODO: re-issue the request after certain time? cancel after X results?
416 dht_get_string_handler (void *cls, struct GNUNET_TIME_Absolute exp,
417 const struct GNUNET_HashCode * key,
418 const struct GNUNET_PeerIdentity *get_path,
419 unsigned int get_path_length,
420 const struct GNUNET_PeerIdentity *put_path,
421 unsigned int put_path_length,
422 enum GNUNET_BLOCK_Type type,
423 size_t size, const void *data)
425 const struct RegexBlock *block = data;
426 struct RegexSearchContext *ctx = cls;
427 struct GNUNET_REGEX_search_handle *info = ctx->info;
431 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
432 "DHT GET STRING RETURNED RESULTS\n");
433 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
434 " key: %s\n", GNUNET_h2s (key));
436 copy = GNUNET_malloc (size);
437 memcpy (copy, data, size);
438 GNUNET_break (GNUNET_OK ==
439 GNUNET_CONTAINER_multihashmap_put(info->dht_get_results, key, copy,
440 GNUNET_CONTAINER_MULTIHASHMAPOPTION_MULTIPLE));
441 len = ntohl (block->n_proof);
445 memcpy (proof, &block[1], len);
447 if (GNUNET_OK != GNUNET_REGEX_check_proof (proof, key))
453 len = strlen (info->description);
454 if (len == ctx->position) // String processed
456 if (GNUNET_YES == ntohl (block->accepting))
458 regex_find_path(key, ctx);
462 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " block not accepting!\n");
463 // FIXME REGEX this block not successful, wait for more? start timeout?
468 regex_next_edge (block, size, ctx);
475 * Iterator over found existing mesh regex blocks that match an ongoing search.
478 * @param key current key code
479 * @param value value in the hash map
480 * @return GNUNET_YES if we should continue to iterate,
484 regex_result_iterator (void *cls,
485 const struct GNUNET_HashCode * key,
488 struct RegexBlock *block = value;
489 struct RegexSearchContext *ctx = cls;
491 if (GNUNET_YES == ntohl(block->accepting) &&
492 ctx->position == strlen (ctx->info->description))
494 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "* Found accepting known block\n");
495 regex_find_path (key, ctx);
496 return GNUNET_YES; // We found an accept state!
500 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "* %u, %u, [%u]\n",
501 ctx->position, strlen(ctx->info->description),
502 ntohl(block->accepting));
505 regex_next_edge(block, SIZE_MAX, ctx);
507 GNUNET_STATISTICS_update (ctx->info->stats, "# regex mesh blocks iterated",
515 * Iterator over edges in a regex block retrieved from the DHT.
517 * @param cls Closure (context of the search).
518 * @param token Token that follows to next state.
519 * @param len Lenght of token.
520 * @param key Hash of next state.
522 * @return GNUNET_YES if should keep iterating, GNUNET_NO otherwise.
525 regex_edge_iterator (void *cls,
528 const struct GNUNET_HashCode *key)
530 struct RegexSearchContext *ctx = cls;
531 struct GNUNET_REGEX_search_handle *info = ctx->info;
535 GNUNET_STATISTICS_update (info->stats, "# regex edges iterated",
538 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "* Start of regex edge iterator\n");
539 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "* descr : %s\n", info->description);
540 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "* posit : %u\n", ctx->position);
541 current = &info->description[ctx->position];
542 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "* currt : %s\n", current);
543 current_len = strlen (info->description) - ctx->position;
544 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "* ctlen : %u\n", current_len);
545 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "* tklen : %u\n", len);
546 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "* token : %.*s\n", len, token);
547 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "* nextk : %s\n", GNUNET_h2s(key));
548 if (len > current_len)
550 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "* Token too long, END\n");
551 return GNUNET_YES; // Token too long, wont match
553 if (0 != strncmp (current, token, len))
555 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "* Token doesn't match, END\n");
556 return GNUNET_YES; // Token doesn't match
559 if (len > ctx->longest_match)
561 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "* Token is longer, KEEP\n");
562 ctx->longest_match = len;
567 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "* Token is not longer, IGNORE\n");
570 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "* End of regex edge iterator\n");
576 * Jump to the next edge, with the longest matching token.
578 * @param block Block found in the DHT.
579 * @param size Size of the block.
580 * @param ctx Context of the search.
582 * @return GNUNET_YES if should keep iterating, GNUNET_NO otherwise.
585 regex_next_edge (const struct RegexBlock *block,
587 struct RegexSearchContext *ctx)
589 struct RegexSearchContext *new_ctx;
590 struct GNUNET_REGEX_search_handle *info = ctx->info;
591 struct GNUNET_DHT_GetHandle *get_h;
595 /* Find the longest match for the current string position,
596 * among tokens in the given block */
597 ctx->longest_match = 0;
598 result = GNUNET_REGEX_block_iterate (block, size,
599 ®ex_edge_iterator, ctx);
600 GNUNET_break (GNUNET_OK == result);
602 /* Did anything match? */
603 if (0 == ctx->longest_match)
606 new_ctx = GNUNET_malloc (sizeof (struct RegexSearchContext));
607 new_ctx->info = info;
608 new_ctx->position = ctx->position + ctx->longest_match;
609 GNUNET_array_append (info->contexts, info->n_contexts, new_ctx);
611 /* Check whether we already have a DHT GET running for it */
613 GNUNET_CONTAINER_multihashmap_contains(info->dht_get_handles, &ctx->hash))
615 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "* GET running, END\n");
616 GNUNET_CONTAINER_multihashmap_get_multiple (info->dht_get_results,
618 ®ex_result_iterator,
620 // FIXME: "leaks" new_ctx? avoid keeping it around?
621 return; // We are already looking for it
624 GNUNET_STATISTICS_update (info->stats, "# regex nodes traversed",
627 /* Start search in DHT */
628 rest = &new_ctx->info->description[new_ctx->position];
630 GNUNET_DHT_get_start (info->dht, /* handle */
631 GNUNET_BLOCK_TYPE_REGEX, /* type */
632 &ctx->hash, /* key to search */
633 DHT_REPLICATION, /* replication level */
634 GNUNET_DHT_RO_DEMULTIPLEX_EVERYWHERE,
636 // FIXME add BLOOMFILTER to exclude filtered peers
637 strlen(rest) + 1, /* xquery bits */
638 // FIXME add BLOOMFILTER SIZE
639 &dht_get_string_handler, new_ctx);
641 GNUNET_CONTAINER_multihashmap_put(info->dht_get_handles,
644 GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_FAST))
652 struct GNUNET_REGEX_search_handle *
653 GNUNET_REGEX_search (struct GNUNET_DHT_Handle *dht,
655 GNUNET_REGEX_Found callback,
657 struct GNUNET_STATISTICS_Handle *stats)
659 struct GNUNET_REGEX_search_handle *h;
661 GNUNET_assert (NULL == dht);
662 h = GNUNET_malloc (sizeof (struct GNUNET_REGEX_search_handle));
664 h->description = GNUNET_strdup (string);
665 h->callback = callback;
666 h->callback_cls = callback_cls;
669 h->dht_get_handles = GNUNET_CONTAINER_multihashmap_create (32, GNUNET_YES);
670 h->dht_get_results = GNUNET_CONTAINER_multihashmap_create (32, GNUNET_YES);
676 * Iterator over hash map entries to cancel DHT GET requests after a
677 * successful connect_by_string.
679 * @param cls Closure (unused).
680 * @param key Current key code (unused).
681 * @param value Value in the hash map (get handle).
682 * @return GNUNET_YES if we should continue to iterate,
686 regex_cancel_dht_get (void *cls,
687 const struct GNUNET_HashCode * key,
690 struct GNUNET_DHT_GetHandle *h = value;
692 GNUNET_DHT_get_stop (h);
698 * Iterator over hash map entries to free MeshRegexBlocks stored during the
699 * search for connect_by_string.
701 * @param cls Closure (unused).
702 * @param key Current key code (unused).
703 * @param value MeshRegexBlock in the hash map.
704 * @return GNUNET_YES if we should continue to iterate,
708 regex_free_result (void *cls,
709 const struct GNUNET_HashCode * key,
719 * Cancel an ongoing regex search in the DHT and free all resources.
721 * @param ctx The search context.
724 regex_cancel_search (struct GNUNET_REGEX_search_handle *ctx)
726 GNUNET_free (ctx->description);
727 GNUNET_CONTAINER_multihashmap_iterate (ctx->dht_get_handles,
728 ®ex_cancel_dht_get, NULL);
729 GNUNET_CONTAINER_multihashmap_iterate (ctx->dht_get_results,
730 ®ex_free_result, NULL);
731 GNUNET_CONTAINER_multihashmap_destroy (ctx->dht_get_results);
732 GNUNET_CONTAINER_multihashmap_destroy (ctx->dht_get_handles);
733 if (0 < ctx->n_contexts)
737 for (i = 0; i < ctx->n_contexts; i++)
739 GNUNET_free (ctx->contexts[i]);
741 GNUNET_free (ctx->contexts);
746 GNUNET_REGEX_search_cancel (struct GNUNET_REGEX_search_handle *h)
748 regex_cancel_search (h);
754 /* end of regex_dht.c */