From: Christian Grothoff Date: Sun, 30 Aug 2009 21:07:10 +0000 (+0000) Subject: adding indexing support X-Git-Tag: initial-import-from-subversion-38251~23549 X-Git-Url: https://git.librecmc.org/?a=commitdiff_plain;h=c3d7c40c3cd0ec03c7f6b27e6b5f7eac1aa80ed5;p=oweals%2Fgnunet.git adding indexing support --- diff --git a/TODO b/TODO index 391fd483f..073c5e137 100644 --- a/TODO +++ b/TODO @@ -37,11 +37,10 @@ PHASE #2: (Goal: recover basic file-sharing functionality) - implement testcases * FS (anonymous FS only) - design network structs (CS) - + list-indexed, index, unindex + + list-indexed, unindex + search/download, response - implement basic FS library + sharing API - ~ publish (indexing) ~ unindex & list indexed!!! (need publish to be done) ~ search (need publish to be done) ~ download (need publish/search to be done) @@ -68,6 +67,7 @@ PHASE #2: (Goal: recover basic file-sharing functionality) + location URIs (publish, search, download) + persistence support (publish, unindex, search, download) + datastore reservation (publishing) + + indexing: index-failure-cleanup - implement adv. testcases + insert: sblocks, loc uris + download: loc uris diff --git a/src/fs/fs.h b/src/fs/fs.h index 288903418..e4eee7fd0 100644 --- a/src/fs/fs.h +++ b/src/fs/fs.h @@ -288,6 +288,21 @@ struct GNUNET_FS_FileInformation */ void *reader_cls; + /** + * Name of the file (must be an absolute path). + * Only required for indexing. FIXME: not yet + * initialized! + */ + char *filename; + + /** + * If this file is being indexed, this value + * is set to the hash over the entire file + * (when the indexing process is started). + * Otherwise this field is not used. + */ + GNUNET_HashCode file_id; + /** * Size of the file (in bytes). */ @@ -429,6 +444,13 @@ struct GNUNET_FS_PublishContext */ GNUNET_SCHEDULER_TaskIdentifier upload_task; + /** + * Our own client handle for the FS service; + * only briefly used when we start to index a + * file, otherwise NULL. + */ + struct GNUNET_CLIENT_Connection *client; + /** * Typically GNUNET_NO. Set to GNUNET_YES if * "upload_task" is GNUNET_SCHEDULER_NO_TASK @@ -506,6 +528,29 @@ struct GNUNET_FS_Namespace }; +/** + * @brief index block (indexing a DBlock that + * can be obtained directly from reading + * the plaintext file) + */ +struct OnDemandBlock +{ + /** + * Hash code of the entire content of the + * file that was indexed (used to uniquely + * identify the plaintext file). + */ + GNUNET_HashCode file_id; + + /** + * At which offset should we be able to find + * this on-demand encoded block? + */ + uint64_t offset; + +}; + + /** * @brief keyword block (advertising data under a keyword) */ @@ -571,9 +616,58 @@ struct SBlock }; +/** + * Message sent from a GNUnet (fs) publishing + * activity to the gnunet-fs-service to + * initiate indexing of a file. The service + * is supposed to check if the specified file + * is available and has the same cryptographic + * hash. It should then respond with either + * a confirmation or a denial. + * + * On OSes where this works, it is considered + * acceptable if the service only checks that + * the path, device and inode match (it can + * then be assumed that the hash will also match + * without actually computing it; this is an + * optimization that should be safe given that + * the client is not our adversary). + */ struct IndexStartMessage { + /** + * Message type will be + * GNUNET_MESSAGE_TYPE_FS_INDEX_START. + */ + struct GNUNET_MessageHeader header; + + /** + * ID of device containing the file, as seen by the client. This + * device ID is obtained using a call like "statvfs" (and converting + * the "f_fsid" field to a 32-bit big-endian number). Use 0 if the + * OS does not support this, in which case the service must do a + * full hash recomputation. + */ + uint32_t device; + + /** + * Inode of the file on the given device, as seen by the client + * ("st_ino" field from "struct stat"). Use 0 if the OS does not + * support this, in which case the service must do a full hash + * recomputation. + */ + uint64_t inode; + + /** + * Hash of the file that we would like to index. + */ + GNUNET_HashCode file_id; + + /* this is followed by a 0-terminated + filename of a file with the hash + "file_id" as seen by the client */ + }; diff --git a/src/fs/fs_publish.c b/src/fs/fs_publish.c index 91ca3240a..13ce4d5aa 100644 --- a/src/fs/fs_publish.c +++ b/src/fs/fs_publish.c @@ -26,7 +26,7 @@ * @author Christian Grothoff * * TODO: - * - indexing support + * - indexing cleanup: unindex on failure (can wait) * - code-sharing with unindex (can wait) * - persistence support (can wait) * - datastore reservation support (optimization) @@ -52,6 +52,14 @@ */ #define MAX_SBLOCK_SIZE 60000 +/** + * Blocksize to use when hashing files + * for indexing (blocksize for IO, not for + * the DBlocks). Larger blocksizes can + * be more efficient but will be more disruptive + * as far as the scheduler is concerned. + */ +#define HASHING_BLOCKSIZE (1024 * 1024) /** * Main function that performs the upload. @@ -471,6 +479,7 @@ publish_content (struct GNUNET_FS_PublishContext *sc, void *raw_data; char *dd; struct PutContCtx * dpc_cls; + struct OnDemandBlock odb; // FIXME: figure out how to share this code // with unindex! @@ -593,8 +602,6 @@ publish_content (struct GNUNET_FS_PublishContext *sc, enc); // NOTE: this block below is all that really differs // between publish/unindex! Parameterize & move this code! - // FIXME: something around here would need to change - // for indexing! if (NULL == sc->dsh) { sc->upload_task @@ -614,20 +621,42 @@ publish_content (struct GNUNET_FS_PublishContext *sc, dpc_cls->cont = &do_upload; dpc_cls->cont_cls = sc; dpc_cls->p = p; - GNUNET_DATASTORE_put (sc->dsh, - sc->rid, - &mychk->query, - pt_size, - enc, - (p->current_depth == p->chk_tree_depth) - ? GNUNET_DATASTORE_BLOCKTYPE_DBLOCK - : GNUNET_DATASTORE_BLOCKTYPE_IBLOCK, - p->priority, - p->anonymity, - p->expirationTime, - GNUNET_CONSTANTS_SERVICE_TIMEOUT, - &ds_put_cont, - dpc_cls); + if ( (p->is_directory) && + (p->data.file.do_index) && + (p->current_depth == p->chk_tree_depth) ) + { + odb.offset = p->publish_offset; + odb.file_id = p->data.file.file_id; + GNUNET_DATASTORE_put (sc->dsh, + sc->rid, + &mychk->query, + sizeof(struct OnDemandBlock), + &odb, + GNUNET_DATASTORE_BLOCKTYPE_ONDEMAND, + p->priority, + p->anonymity, + p->expirationTime, + GNUNET_CONSTANTS_SERVICE_TIMEOUT, + &ds_put_cont, + dpc_cls); + } + else + { + GNUNET_DATASTORE_put (sc->dsh, + sc->rid, + &mychk->query, + pt_size, + enc, + (p->current_depth == p->chk_tree_depth) + ? GNUNET_DATASTORE_BLOCKTYPE_DBLOCK + : GNUNET_DATASTORE_BLOCKTYPE_IBLOCK, + p->priority, + p->anonymity, + p->expirationTime, + GNUNET_CONSTANTS_SERVICE_TIMEOUT, + &ds_put_cont, + dpc_cls); + } } if (p->current_depth == p->chk_tree_depth) { @@ -668,6 +697,153 @@ publish_content (struct GNUNET_FS_PublishContext *sc, } + + +/** + * Process the response (or lack thereof) from + * the "fs" service to our 'start index' request. + * + * @param cls closure (of type "struct GNUNET_FS_PublishContext*"_) + * @param msg the response we got + */ +static void +process_index_start_response (void *cls, + const struct GNUNET_MessageHeader *msg) +{ + struct GNUNET_FS_PublishContext *sc = cls; + struct GNUNET_FS_FileInformation *p; + const char *emsg; + uint16_t msize; + + GNUNET_CLIENT_disconnect (sc->client); + sc->client = NULL; + p = sc->fi_pos; + if (msg == NULL) + { + GNUNET_log (GNUNET_ERROR_TYPE_WARNING, + _("Can not index file `%s': %s. Will try to insert instead.\n"), + p->data.file.filename, + _("timeout on index-start request to `fs' service")); + p->data.file.do_index = GNUNET_NO; + publish_content (sc, p); + return; + } + if (ntohs (msg->type) != GNUNET_MESSAGE_TYPE_FS_INDEX_START_OK) + { + msize = ntohs (msg->size); + emsg = (const char *) &msg[1]; + if ( (msize <= sizeof (struct GNUNET_MessageHeader)) || + (emsg[msize - sizeof(struct GNUNET_MessageHeader) - 1] != '\0') ) + emsg = gettext_noop ("unknown error"); + GNUNET_log (GNUNET_ERROR_TYPE_WARNING, + _("Can not index file `%s': %s. Will try to insert instead.\n"), + p->data.file.filename, + gettext (emsg)); + p->data.file.do_index = GNUNET_NO; + publish_content (sc, p); + return; + } + /* success! continue with indexing */ + publish_content (sc, p); +} + + +#if LINUX +#include +#endif + +/** + * Function called once the hash computation over an + * indexed file has completed. + * + * @param cls closure, our publishing context + * @param res resulting hash, NULL on error + */ +static void +hash_for_index_cb (void *cls, + const GNUNET_HashCode * + res) +{ + struct GNUNET_FS_PublishContext *sc = cls; + struct GNUNET_FS_FileInformation *p; + struct IndexStartMessage *ism; + size_t slen; + struct GNUNET_CLIENT_Connection *client; +#if LINUX + struct stat sbuf; + struct statvfs fbuf; +#endif + + p = sc->fi_pos; + if (NULL == res) + { + GNUNET_log (GNUNET_ERROR_TYPE_WARNING, + _("Can not index file `%s': %s. Will try to insert instead.\n"), + p->data.file.filename, + _("failed to compute hash")); + p->data.file.do_index = GNUNET_NO; + publish_content (sc, p); + return; + } + slen = strlen (p->data.file.filename) + 1; + if (slen > GNUNET_SERVER_MAX_MESSAGE_SIZE - sizeof(struct IndexStartMessage)) + { + GNUNET_log (GNUNET_ERROR_TYPE_WARNING, + _("Can not index file `%s': %s. Will try to insert instead.\n"), + p->data.file.filename, + _("filename too long")); + p->data.file.do_index = GNUNET_NO; + publish_content (sc, p); + return; + } + client = GNUNET_CLIENT_connect (sc->h->sched, + "fs", + sc->h->cfg); + if (NULL == client) + { + GNUNET_log (GNUNET_ERROR_TYPE_WARNING, + _("Can not index file `%s': %s. Will try to insert instead.\n"), + p->data.file.filename, + _("could not connect to `fs' service")); + p->data.file.do_index = GNUNET_NO; + publish_content (sc, p); + return; + } + p->data.file.file_id = *res; + ism = GNUNET_malloc (sizeof(struct IndexStartMessage) + + slen); + ism->header.size = htons(sizeof(struct IndexStartMessage) + + slen); + ism->header.type = htons(GNUNET_MESSAGE_TYPE_FS_INDEX_START); + /* FIXME: activate this on other OSes that + support it (or something very similar; make + sure to also adjust corresponding code + on the service-side) */ + /* FIXME: the block below should probably be + abstracted into a function in the DISK API */ +#if LINUX + if ( (0 == stat(p->data.file.filename, + &sbuf)) && + (0 == statvfs (p->data.file.filename, + &fbuf) ) ) + { + ism->device = htonl ((uint32_t) fbuf.f_fsid); + ism->inode = GNUNET_htonll( (uint64_t) sbuf.st_ino); + } +#endif + memcpy (&ism[1], + p->data.file.filename, + slen); + sc->client = client; + GNUNET_CLIENT_transmit_and_get_response (client, + &ism->header, + GNUNET_TIME_UNIT_FOREVER_REL, + &process_index_start_response, + sc); + GNUNET_free (ism); +} + + /** * Main function that performs the upload. * @param cls "struct GNUNET_FS_PublishContext" identifies the upload @@ -744,9 +920,23 @@ do_upload (void *cls, if ( (!p->is_directory) && (p->data.file.do_index) ) { - // FIXME: need to pre-compute hash over - // the entire file and ask FS to prepare - // for indexing! + if (NULL == p->data.file.filename) + { + p->data.file.do_index = GNUNET_NO; + GNUNET_log (GNUNET_ERROR_TYPE_WARNING, + _("Can not index file `%s': %s. Will try to insert instead.\n"), + "", + _("needs to be an actual file")); + publish_content (sc, p); + return; + } + GNUNET_CRYPTO_hash_file (sc->h->sched, + GNUNET_SCHEDULER_PRIORITY_IDLE, + GNUNET_NO, + p->data.file.filename, + HASHING_BLOCKSIZE, + &hash_for_index_cb, + sc); return; } publish_content (sc, p); diff --git a/src/include/gnunet_datastore_service.h b/src/include/gnunet_datastore_service.h index 3805dea97..acd9af1de 100644 --- a/src/include/gnunet_datastore_service.h +++ b/src/include/gnunet_datastore_service.h @@ -46,7 +46,8 @@ extern "C" #define GNUNET_DATASTORE_BLOCKTYPE_IBLOCK 2 #define GNUNET_DATASTORE_BLOCKTYPE_KBLOCK 3 #define GNUNET_DATASTORE_BLOCKTYPE_SBLOCK 4 -#define GNUNET_DATASTORE_BLOCKTYPE_SKBLOCK 5 +#define GNUNET_DATASTORE_BLOCKTYPE_ONDEMAND 5 +#define GNUNET_DATASTORE_BLOCKTYPE_SKBLOCK 6 /* not yet used */ /** * Handle to the datastore service. diff --git a/src/include/gnunet_protocols.h b/src/include/gnunet_protocols.h index 419bbe28d..686205c31 100644 --- a/src/include/gnunet_protocols.h +++ b/src/include/gnunet_protocols.h @@ -367,6 +367,24 @@ extern "C" */ #define GNUNET_MESSAGE_TYPE_DATASTORE_DROP 102 + +/** + * Message sent by fs client to start indexing. + */ +#define GNUNET_MESSAGE_TYPE_FS_INDEX_START 128 + +/** + * Affirmative response to a request for start indexing. + */ +#define GNUNET_MESSAGE_TYPE_FS_INDEX_START_OK 129 + + +/** + * Response to a request for start indexing that + * refuses. + */ +#define GNUNET_MESSAGE_TYPE_FS_INDEX_START_FAILED 130 + /* TODO: - DV