2 This file is part of GNUnet.
3 (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009 Christian Grothoff (and other contributing authors)
5 GNUnet is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 2, or (at your
8 option) any later version.
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with GNUnet; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
23 * @brief definitions for the entire fs module
24 * @author Igor Wronsky, Christian Grothoff
29 #include "gnunet_datastore_service.h"
30 #include "gnunet_fs_service.h"
33 * Size of the individual blocks used for file-sharing.
35 #define DBLOCK_SIZE (32*1024)
39 * Pick a multiple of 2 here to achive 8-byte alignment!
40 * We also probably want DBlocks to have (roughly) the
41 * same size as IBlocks. With SHA-512, the optimal
42 * value is 32768 byte / 128 byte = 256
43 * (128 byte = 2 * 512 bits). DO NOT CHANGE!
45 #define CHK_PER_INODE 256
49 * Maximum size for a file to be considered for
50 * inlining in a directory.
52 #define MAX_INLINE_SIZE 65536
56 * Blocksize to use when hashing files
57 * for indexing (blocksize for IO, not for
58 * the DBlocks). Larger blocksizes can
59 * be more efficient but will be more disruptive
60 * as far as the scheduler is concerned.
62 #define HASHING_BLOCKSIZE (1024 * 1024)
66 * @brief content hash key
71 GNUNET_HashCode query;
76 * @brief complete information needed
83 * Total size of the file in bytes. (network byte order (!))
88 * Query and key of the top GNUNET_EC_IBlock.
90 struct ContentHashKey chk;
96 * Information about a file and its location
97 * (peer claiming to share the file).
102 * Information about the shared file.
104 struct FileIdentifier fi;
107 * Identity of the peer sharing the file.
109 struct GNUNET_CRYPTO_RsaPublicKeyBinaryEncoded peer;
112 * Time when this location URI expires.
114 struct GNUNET_TIME_Absolute expirationTime;
117 * RSA signature over the GNUNET_EC_FileIdentifier,
118 * GNUNET_hash of the peer and expiration time.
120 struct GNUNET_CRYPTO_RsaSignature contentSignature;
125 { chk, sks, ksk, loc };
128 * A Universal Resource Identifier (URI), opaque.
138 * Keywords start with a '+' if they are
139 * mandatory (in which case the '+' is NOT
140 * part of the keyword) and with a
141 * simple space if they are optional
142 * (in which case the space is ALSO not
143 * part of the actual keyword).
145 * Double-quotes to protect spaces and
146 * %-encoding are NOT used internally
147 * (only in URI-strings).
152 * Size of the keywords array.
154 unsigned int keywordCount;
160 * Hash of the public key for the namespace.
162 GNUNET_HashCode namespace;
165 * Human-readable identifier chosen for this
166 * entry in the namespace.
172 * Information needed to retrieve a file (content-hash-key
175 struct FileIdentifier chk;
178 * Information needed to retrieve a file including signed
179 * location (identity of a peer) of the content.
188 * Information for a file or directory that is
189 * about to be published.
191 struct GNUNET_FS_FileInformation
195 * Files in a directory are kept as a linked list.
197 struct GNUNET_FS_FileInformation *next;
200 * If this is a file in a directory, "dir" refers to
201 * the directory; otherwise NULL.
203 struct GNUNET_FS_FileInformation *dir;
206 * Pointer kept for the client.
211 * Metadata to use for the file.
213 struct GNUNET_CONTAINER_MetaData *meta;
216 * Keywords to use for KBlocks.
218 struct GNUNET_FS_Uri *keywords;
221 * CHK for this file or directory. NULL if
222 * we have not yet computed it.
224 struct GNUNET_FS_Uri *chk_uri;
227 * At what time should the content expire?
229 struct GNUNET_TIME_Absolute expirationTime;
232 * At what time did we start this upload?
234 struct GNUNET_TIME_Absolute start_time;
237 * Under what filename is this struct serialized
238 * (for operational persistence).
243 * Encoder being used to publish this file.
245 struct GNUNET_FS_TreeEncoder *te;
248 * Error message (non-NULL if this operation
254 * Data describing either the file or the directory.
265 * Function that can be used to read the data for the file.
267 GNUNET_FS_DataReader reader;
270 * Closure for reader.
275 * Name of the file (must be an absolute path).
276 * Only required for indexing. FIXME: not yet
282 * If this file is being indexed, this value
283 * is set to the hash over the entire file
284 * (when the indexing process is started).
285 * Otherwise this field is not used.
287 GNUNET_HashCode file_id;
290 * Size of the file (in bytes).
295 * Should the file be indexed or inserted?
302 * Data for a directory.
307 * Name of the directory.
312 * Linked list of entries in the directory.
314 struct GNUNET_FS_FileInformation *entries;
317 * Size of the directory itself (in bytes); 0 if the
318 * size has not yet been calculated.
323 * Pointer to the data for the directory (or NULL if not
333 * Is this struct for a file or directory?
338 * Desired anonymity level.
343 * Desired priority (for keeping the content in the DB).
351 * Master context for most FS operations.
353 struct GNUNET_FS_Handle
358 struct GNUNET_SCHEDULER_Handle *sched;
361 * Configuration to use.
363 const struct GNUNET_CONFIGURATION_Handle *cfg;
366 * Name of our client.
371 * Function to call with updates on our progress.
373 GNUNET_FS_ProgressCallback upcb;
381 * Connection to the FS service.
383 struct GNUNET_CLIENT_Connection *client;
386 * How many downloads probing availability
387 * of search results do we have running
390 unsigned int active_probes;
395 enum GNUNET_FS_Flags flags;
401 * Handle for controlling an upload.
403 struct GNUNET_FS_PublishContext
406 * Handle to the global fs context.
408 struct GNUNET_FS_Handle *h;
411 * Argument to pass to the client in callbacks.
416 * File-structure that is being shared.
418 struct GNUNET_FS_FileInformation *fi;
421 * Namespace that we are publishing in, NULL if we have no namespace.
423 struct GNUNET_FS_Namespace *namespace;
426 * ID of the content in the namespace, NULL if we have no namespace.
431 * ID for future updates, NULL if we have no namespace or no updates.
436 * Our own client handle for the FS service;
437 * only briefly used when we start to index a
438 * file, otherwise NULL.
440 struct GNUNET_CLIENT_Connection *client;
443 * Current position in the file-tree for the
446 struct GNUNET_FS_FileInformation *fi_pos;
449 * Connection to the datastore service.
451 struct GNUNET_DATASTORE_Handle *dsh;
454 * ID of the task performing the upload. NO_TASK
455 * if the upload has completed.
457 GNUNET_SCHEDULER_TaskIdentifier upload_task;
460 * Typically GNUNET_NO. Set to GNUNET_YES if
461 * "upload_task" is GNUNET_SCHEDULER_NO_TASK
462 * and we're waiting for a response from the
463 * datastore service (in which case this
464 * struct must not be freed until we have that
465 * response). If someone tries to stop the
466 * download for good during this period,
467 * "in_network_wait" is set to GNUNET_SYSERR
468 * which will cause the struct to be destroyed
469 * right after we have the reply (or timeout)
470 * from the datastore service.
475 * Options for publishing.
477 enum GNUNET_FS_PublishOptions options;
480 * Space reservation ID with datastore service
488 * Phases of unindex processing (state machine).
493 * We're currently hashing the file.
495 UNINDEX_STATE_HASHING = 0,
498 * We're notifying the FS service about
501 UNINDEX_STATE_FS_NOTIFY = 1,
504 * We're telling the datastore to delete
505 * the respective entries.
507 UNINDEX_STATE_DS_REMOVE = 2,
512 UNINDEX_STATE_COMPLETE = 3,
515 * We've encountered a fatal error.
517 UNINDEX_STATE_ERROR = 4,
520 * We've been aborted. The next callback should clean up the
523 UNINDEX_STATE_ABORTED = 5
528 * Handle for controlling an unindexing operation.
530 struct GNUNET_FS_UnindexContext
536 struct GNUNET_FS_Handle *h;
539 * Name of the file that we are unindexing.
544 * Connection to the FS service,
545 * only valid during the UNINDEX_STATE_FS_NOTIFY
548 struct GNUNET_CLIENT_Connection *client;
551 * Connection to the datastore service,
552 * only valid during the UNINDEX_STATE_DS_NOTIFY
555 struct GNUNET_DATASTORE_Handle *dsh;
558 * Pointer kept for the client.
563 * Merkle-ish tree encoder context.
565 struct GNUNET_FS_TreeEncoder *tc;
568 * Handle used to read the file.
570 struct GNUNET_DISK_FileHandle *fh;
573 * Overall size of the file.
580 struct GNUNET_TIME_Absolute start_time;
583 * Hash of the file's contents (once
586 GNUNET_HashCode file_id;
589 * Current operatinonal phase.
591 enum UnindexState state;
597 * Information we store for each search result.
603 * URI to which this search result
606 struct GNUNET_FS_Uri *uri;
609 * Metadata for the search result.
611 struct GNUNET_CONTAINER_MetaData *meta;
614 * Client info for this search result.
619 * ID of a job that is currently probing
620 * this results' availability (NULL if we
621 * are not currently probing).
623 struct GNUNET_FS_DownloadContext *probe_ctx;
626 * ID of the task that will clean up the probe_ctx
627 * should it not complete on time (and that will
628 * need to be cancelled if we clean up the search
629 * result before then).
631 GNUNET_SCHEDULER_TaskIdentifier probe_cancel_task;
634 * Number of mandatory keywords for which
635 * we have NOT yet found the search result;
636 * when this value hits zero, the search
637 * result is given to the callback.
639 uint32_t mandatory_missing;
642 * Number of optional keywords under which
643 * this result was also found.
645 uint32_t optional_support;
648 * Number of availability tests that
649 * have succeeded for this result.
651 uint32_t availability_success;
654 * Number of availability trials that we
655 * have performed for this search result.
657 uint32_t availability_trials;
663 * Information we keep for each keyword in
666 struct SearchRequestEntry
669 * Hash of the original keyword, also known as the
670 * key (for decrypting the KBlock).
675 * Hash of the public key, also known as the query.
677 GNUNET_HashCode query;
680 * Map that contains a "struct SearchResult" for each result that
681 * was found under this keyword. Note that the entries will point
682 * to the same locations as those in the master result map (in
683 * "struct GNUNET_FS_SearchContext"), so they should not be freed.
684 * The key for each entry is the XOR of the key and query in the CHK
685 * URI (as a unique identifier for the search result).
687 struct GNUNET_CONTAINER_MultiHashMap *results;
690 * Is this keyword a mandatory keyword
691 * (started with '+')?
699 * Handle for controlling a search.
701 struct GNUNET_FS_SearchContext
704 * Handle to the global FS context.
706 struct GNUNET_FS_Handle *h;
709 * List of keywords that we're looking for.
711 struct GNUNET_FS_Uri *uri;
714 * For update-searches, link to the
715 * base-SKS search that triggered the
716 * update search; otherwise NULL.
718 struct GNUNET_FS_SearchContext *parent;
721 * Connection to the FS service.
723 struct GNUNET_CLIENT_Connection *client;
726 * Pointer we keep for the client.
731 * Map that contains a "struct SearchResult" for each result that
732 * was found in the search. The key for each entry is the XOR of
733 * the key and query in the CHK URI (as a unique identifier for the
736 struct GNUNET_CONTAINER_MultiHashMap *master_result_map;
739 * Per-keyword information for a keyword search.
740 * This array will have exactly as many entries
741 * as there were keywords.
743 struct SearchRequestEntry *requests;
748 struct GNUNET_TIME_Absolute start_time;
751 * ID of a task that is using this struct
752 * and that must be cancelled when the search
753 * is being stopped (if not GNUNET_SCHEDULER_NO_TASK).
754 * Used for the task that adds some artificial
755 * delay when trying to reconnect to the FS
758 GNUNET_SCHEDULER_TaskIdentifier task;
761 * Anonymity level for the search.
766 * Number of mandatory keywords in this query.
768 uint32_t mandatory_count;
773 * Information about an active download request.
775 struct DownloadRequest
778 * While pending, we keep all download requests
781 struct DownloadRequest *next;
784 * CHK for the request.
786 struct ContentHashKey chk;
789 * Offset of the corresponding block.
794 * Depth of the corresponding block in the tree.
799 * Set if this request is currently in the linked list of pending
800 * requests. Needed in case we get a response for a request that we
801 * have not yet send (due to FS bug or two blocks with identical
802 * content); in this case, we would need to remove the block from
803 * the pending list (and need a fast way to check if the block is on
812 * Context for controlling a download.
814 struct GNUNET_FS_DownloadContext
820 struct GNUNET_FS_Handle *h;
823 * Connection to the FS service.
825 struct GNUNET_CLIENT_Connection *client;
828 * Parent download (used when downloading files
831 struct GNUNET_FS_DownloadContext *parent;
834 * Context kept for the client.
839 * URI that identifies the file that
840 * we are downloading.
842 struct GNUNET_FS_Uri *uri;
845 * Known meta-data for the file (can be NULL).
847 struct GNUNET_CONTAINER_MetaData *meta;
850 * Error message, NULL if we're doing OK.
855 * Where are we writing the data (name of the
856 * file, can be NULL!).
861 * Map of active requests (those waiting
862 * for a response). The key is the hash
863 * of the encryped block (aka query).
865 struct GNUNET_CONTAINER_MultiHashMap *active;
868 * Linked list of pending requests.
870 struct DownloadRequest *pending;
873 * The file handle, NULL if we don't create
876 struct GNUNET_DISK_FileHandle *handle;
879 * Identity of the peer having the content, or all-zeros
880 * if we don't know of such a peer.
882 struct GNUNET_PeerIdentity target;
885 * ID of a task that is using this struct
886 * and that must be cancelled when the download
887 * is being stopped (if not GNUNET_SCHEDULER_NO_TASK).
888 * Used for the task that adds some artificial
889 * delay when trying to reconnect to the FS
892 GNUNET_SCHEDULER_TaskIdentifier task;
895 * What was the size of the file on disk that we're downloading
896 * before we started? Used to detect if there is a point in
897 * checking an existing block on disk for matching the desired
898 * content. 0 if the file did not exist already.
900 uint64_t old_file_size;
903 * What is the first offset that we're interested
909 * How many bytes starting from offset are desired?
910 * This is NOT the overall length of the file!
915 * How many bytes have we already received within
916 * the specified range (DBlocks only).
921 * Time download was started.
923 struct GNUNET_TIME_Absolute start_time;
926 * Desired level of anonymity.
931 * The depth of the file-tree.
933 unsigned int treedepth;
936 * Options for the download.
938 enum GNUNET_FS_DownloadOptions options;
942 struct GNUNET_FS_Namespace
946 * Private key for the namespace.
948 struct GNUNET_CRYPTO_RsaPrivateKey *key;
958 * @brief index block (indexing a DBlock that
959 * can be obtained directly from reading
960 * the plaintext file)
965 * Hash code of the entire content of the
966 * file that was indexed (used to uniquely
967 * identify the plaintext file).
969 GNUNET_HashCode file_id;
972 * At which offset should we be able to find
973 * this on-demand encoded block?
975 uint64_t offset GNUNET_PACKED;
981 * @brief keyword block (advertising data under a keyword)
987 * GNUNET_RSA_Signature using RSA-key generated from search keyword.
989 struct GNUNET_CRYPTO_RsaSignature signature;
992 * What is being signed and why?
994 struct GNUNET_CRYPTO_RsaSignaturePurpose purpose;
997 * Key generated (!) from the H(keyword) as the seed!
999 struct GNUNET_CRYPTO_RsaPublicKeyBinaryEncoded keyspace;
1001 /* 0-terminated URI here */
1003 /* variable-size Meta-Data follows here */
1008 * @brief namespace content block (advertising data under an identifier in a namespace)
1014 * GNUNET_RSA_Signature using RSA-key of the namespace
1016 struct GNUNET_CRYPTO_RsaSignature signature;
1019 * What is being signed and why?
1021 struct GNUNET_CRYPTO_RsaSignaturePurpose purpose;
1024 * Hash of the hash of the human-readable identifier used for
1025 * this entry (the hash of the human-readable identifier is
1026 * used as the key for decryption; the xor of this identifier
1027 * and the hash of the "keyspace" is the datastore-query hash).
1029 GNUNET_HashCode identifier;
1032 * Public key of the namespace.
1034 struct GNUNET_CRYPTO_RsaPublicKeyBinaryEncoded subspace;
1036 /* 0-terminated update-identifier here */
1038 /* 0-terminated URI here */
1040 /* variable-size Meta-Data follows here */
1046 * Message sent from a GNUnet (fs) publishing
1047 * activity to the gnunet-fs-service to
1048 * initiate indexing of a file. The service
1049 * is supposed to check if the specified file
1050 * is available and has the same cryptographic
1051 * hash. It should then respond with either
1052 * a confirmation or a denial.
1054 * On OSes where this works, it is considered
1055 * acceptable if the service only checks that
1056 * the path, device and inode match (it can
1057 * then be assumed that the hash will also match
1058 * without actually computing it; this is an
1059 * optimization that should be safe given that
1060 * the client is not our adversary).
1062 struct IndexStartMessage
1066 * Message type will be
1067 * GNUNET_MESSAGE_TYPE_FS_INDEX_START.
1069 struct GNUNET_MessageHeader header;
1072 * ID of device containing the file, as seen by the client. This
1073 * device ID is obtained using a call like "statvfs" (and converting
1074 * the "f_fsid" field to a 32-bit big-endian number). Use 0 if the
1075 * OS does not support this, in which case the service must do a
1076 * full hash recomputation.
1078 uint32_t device GNUNET_PACKED;
1081 * Inode of the file on the given device, as seen by the client
1082 * ("st_ino" field from "struct stat"). Use 0 if the OS does not
1083 * support this, in which case the service must do a full hash
1086 uint64_t inode GNUNET_PACKED;
1089 * Hash of the file that we would like to index.
1091 GNUNET_HashCode file_id;
1093 /* this is followed by a 0-terminated
1094 filename of a file with the hash
1095 "file_id" as seen by the client */
1101 * Message send by FS service in response to a request
1102 * asking for a list of all indexed files.
1104 struct IndexInfoMessage
1107 * Message type will be
1108 * GNUNET_MESSAGE_TYPE_FS_INDEX_LIST_ENTRY.
1110 struct GNUNET_MessageHeader header;
1115 uint32_t reserved GNUNET_PACKED;
1118 * Hash of the indexed file.
1120 GNUNET_HashCode file_id;
1122 /* this is followed by a 0-terminated
1123 filename of a file with the hash
1124 "file_id" as seen by the client */
1130 * Message sent from a GNUnet (fs) unindexing
1131 * activity to the gnunet-fs-service to
1132 * indicate that a file will be unindexed. The service
1133 * is supposed to remove the file from the
1134 * list of indexed files and response with
1135 * a confirmation message (even if the file
1136 * was already not on the list).
1138 struct UnindexMessage
1142 * Message type will be
1143 * GNUNET_MESSAGE_TYPE_FS_UNINDEX.
1145 struct GNUNET_MessageHeader header;
1150 uint32_t reserved GNUNET_PACKED;
1153 * Hash of the file that we will unindex.
1155 GNUNET_HashCode file_id;
1161 * Message sent from a GNUnet (fs) search
1162 * activity to the gnunet-fs-service to
1165 struct SearchMessage
1169 * Message type will be
1170 * GNUNET_MESSAGE_TYPE_FS_START_SEARCH.
1172 struct GNUNET_MessageHeader header;
1177 int32_t reserved GNUNET_PACKED;
1180 * Type of the content that we're looking for.
1183 uint32_t type GNUNET_PACKED;
1186 * Desired anonymity level, big-endian.
1188 uint32_t anonymity_level GNUNET_PACKED;
1191 * If the request is for a DBLOCK or IBLOCK, this is the identity of
1192 * the peer that is known to have a response. Set to all-zeros if
1193 * such a target is not known (note that even if OUR anonymity
1194 * level is >0 we may happen to know the responder's identity;
1195 * nevertheless, we should probably not use it for a DHT-lookup
1196 * or similar blunt actions in order to avoid exposing ourselves).
1198 * If the request is for an SBLOCK, this is the identity of the
1199 * pseudonym to which the SBLOCK belongs.
1201 * If the request is for a KBLOCK, "target" must be all zeros.
1203 GNUNET_HashCode target;
1206 * Hash of the keyword (aka query) for KBLOCKs; Hash of
1207 * the CHK-encoded block for DBLOCKS and IBLOCKS (aka query)
1208 * and hash of the identifier XORed with the target for
1209 * SBLOCKS (aka query).
1211 GNUNET_HashCode query;
1217 * Response from FS service with a result for
1218 * a previous FS search. Note that queries
1219 * for DBLOCKS and IBLOCKS that have received
1220 * a single response are considered done.
1222 struct ContentMessage
1226 * Message type will be
1227 * GNUNET_MESSAGE_TYPE_FS_CONTENT.
1229 struct GNUNET_MessageHeader header;
1232 * Type of the content that was found,
1233 * should never be 0.
1235 uint32_t type GNUNET_PACKED;
1238 * When will this result expire?
1240 struct GNUNET_TIME_AbsoluteNBO expiration;
1242 /* followed by the actual block of data */