X-Git-Url: https://git.librecmc.org/?a=blobdiff_plain;f=src%2Ffs%2Ffs_file_information.c;h=806592784bfc4ae9e06280cdfac828417669f09a;hb=0238d7e2f30821e7b94e9ea25ce3918fcc04b2c3;hp=c2ab84ec2e311cdf10f48f616d8dbc2446cb02b2;hpb=d60192bbfe69cb3d8e36afaf459f72bb9fa4d288;p=oweals%2Fgnunet.git diff --git a/src/fs/fs_file_information.c b/src/fs/fs_file_information.c index c2ab84ec2..806592784 100644 --- a/src/fs/fs_file_information.c +++ b/src/fs/fs_file_information.c @@ -22,10 +22,6 @@ * @file fs/fs_file_information.c * @brief Manage information for publishing directory hierarchies * @author Christian Grothoff - * - * TODO: - * - metadata filename clean up code - * - metadata/ksk generation for directories from contained files */ #include "platform.h" #include @@ -35,79 +31,55 @@ /** - * Add meta data that libextractor finds to our meta data - * container. + * Obtain the name under which this file information + * structure is stored on disk. Only works for top-level + * file information structures. * - * @param cls closure, our meta data container - * @param plugin_name name of the plugin that produced this value; - * special values can be used (i.e. '<zlib>' for zlib being - * used in the main libextractor library and yielding - * meta data). - * @param type libextractor-type describing the meta data - * @param format basic format information about data - * @param data_mime_type mime-type of data (not of the original file); - * can be NULL (if mime-type is not known) - * @param data actual meta-data found - * @param data_len number of bytes in data - * @return always 0 to continue extracting + * @param s structure to get the filename for + * @return NULL on error, otherwise filename that + * can be passed to "GNUNET_FS_file_information_recover" + * to read this fi-struct from disk. */ -static int -add_to_md (void *cls, const char *plugin_name, enum EXTRACTOR_MetaType type, - enum EXTRACTOR_MetaFormat format, const char *data_mime_type, - const char *data, size_t data_len) +const char * +GNUNET_FS_file_information_get_id (struct GNUNET_FS_FileInformation *s) { - struct GNUNET_CONTAINER_MetaData *md = cls; - - (void) GNUNET_CONTAINER_meta_data_insert (md, plugin_name, type, format, - data_mime_type, data, data_len); - return 0; + if (NULL != s->dir) + return NULL; + return s->serialization; } - /** - * Extract meta-data from a file. + * Obtain the filename from the file information structure. * - * @return GNUNET_SYSERR on error, otherwise the number - * of meta-data items obtained + * @param s structure to get the filename for + * @return "filename" field of the structure (can be NULL) */ -int -GNUNET_FS_meta_data_extract_from_file (struct GNUNET_CONTAINER_MetaData *md, - const char *filename, - struct EXTRACTOR_PluginList *extractors) +const char * +GNUNET_FS_file_information_get_filename (struct GNUNET_FS_FileInformation *s) { - int old; - - if (filename == NULL) - return GNUNET_SYSERR; - if (extractors == NULL) - return 0; - old = GNUNET_CONTAINER_meta_data_iterate (md, NULL, NULL); - GNUNET_assert (old >= 0); - EXTRACTOR_extract (extractors, filename, NULL, 0, &add_to_md, md); - return (GNUNET_CONTAINER_meta_data_iterate (md, NULL, NULL) - old); + return s->filename; } - /** - * Obtain the name under which this file information - * structure is stored on disk. Only works for top-level - * file information structures. + * Set the filename in the file information structure. + * If filename was already set, frees it before setting the new one. + * Makes a copy of the argument. * * @param s structure to get the filename for - * @return NULL on error, otherwise filename that - * can be passed to "GNUNET_FS_file_information_recover" - * to read this fi-struct from disk. + * @param filename filename to set */ -const char * -GNUNET_FS_file_information_get_id (struct GNUNET_FS_FileInformation *s) +void +GNUNET_FS_file_information_set_filename (struct GNUNET_FS_FileInformation *s, + const char *filename) { - if (NULL != s->dir) - return NULL; - return s->serialization; + GNUNET_free_non_null (s->filename); + if (filename) + s->filename = GNUNET_strdup (filename); + else + s->filename = NULL; } - /** * Create an entry for a file in a publish-structure. * @@ -135,7 +107,7 @@ GNUNET_FS_file_information_create_from_file (struct GNUNET_FS_Handle *h, *bo) { struct FileInfo *fi; - struct stat sbuf; + uint64_t fsize; struct GNUNET_FS_FileInformation *ret; const char *fn; const char *ss; @@ -144,7 +116,8 @@ GNUNET_FS_file_information_create_from_file (struct GNUNET_FS_Handle *h, char fn_conv[MAX_PATH]; #endif - if (0 != STAT (filename, &sbuf)) + /* FIXME: should includeSymLinks be GNUNET_NO or GNUNET_YES here? */ + if (GNUNET_OK != GNUNET_DISK_file_size (filename, &fsize, GNUNET_NO, GNUNET_YES)) { GNUNET_log_strerror_file (GNUNET_ERROR_TYPE_WARNING, "stat", filename); return NULL; @@ -157,7 +130,7 @@ GNUNET_FS_file_information_create_from_file (struct GNUNET_FS_Handle *h, } ret = GNUNET_FS_file_information_create_from_reader (h, client_info, - sbuf.st_size, + fsize, &GNUNET_FS_data_reader_file_, fi, keywords, meta, do_index, bo); @@ -173,6 +146,9 @@ GNUNET_FS_file_information_create_from_file (struct GNUNET_FS_Handle *h, #endif while (NULL != (ss = strstr (fn, DIR_SEPARATOR_STR))) fn = ss + 1; +/* FIXME: If we assume that on other platforms CRT is UTF-8-aware, then + * this should be changed to EXTRACTOR_METAFORMAT_UTF8 + */ #if !WINDOWS GNUNET_CONTAINER_meta_data_insert (ret->meta, "", EXTRACTOR_METATYPE_GNUNET_ORIGINAL_FILENAME, @@ -281,519 +257,6 @@ GNUNET_FS_file_information_create_from_reader (struct GNUNET_FS_Handle *h, } -/** - * Closure for "dir_scan_cb". - */ -struct DirScanCls -{ - /** - * Metadata extractors to use. - */ - struct EXTRACTOR_PluginList *extractors; - - /** - * Master context. - */ - struct GNUNET_FS_Handle *h; - - /** - * Function to call on each directory entry. - */ - GNUNET_FS_FileProcessor proc; - - /** - * Closure for proc. - */ - void *proc_cls; - - /** - * Scanner to use for subdirectories. - */ - GNUNET_FS_DirectoryScanner scanner; - - /** - * Closure for scanner. - */ - void *scanner_cls; - - /** - * Set to an error message (if any). - */ - char *emsg; - - /** - * Block options. - */ - const struct GNUNET_FS_BlockOptions *bo; - - /** - * Should files be indexed? - */ - int do_index; - -}; - - -/** - * Function called on each entry in a file to cause - * default-publishing. - * - * @param cls closure (struct DirScanCls) - * @param filename name of the file to be published - * @return GNUNET_OK on success, GNUNET_SYSERR to abort - */ -static int -dir_scan_cb (void *cls, const char *filename) -{ - struct DirScanCls *dsc = cls; - struct stat sbuf; - struct GNUNET_FS_FileInformation *fi; - struct GNUNET_FS_Uri *keywords; - struct GNUNET_CONTAINER_MetaData *meta; - - if (0 != STAT (filename, &sbuf)) - { - GNUNET_asprintf (&dsc->emsg, _("`%s' failed on file `%s': %s"), "stat", - filename, STRERROR (errno)); - return GNUNET_SYSERR; - } - if (S_ISDIR (sbuf.st_mode)) - { - fi = GNUNET_FS_file_information_create_from_directory (dsc->h, NULL, - filename, - dsc->scanner, - dsc->scanner_cls, - dsc->do_index, - dsc->bo, &dsc->emsg); - if (NULL == fi) - { - GNUNET_assert (NULL != dsc->emsg); - return GNUNET_SYSERR; - } - } - else - { - meta = GNUNET_CONTAINER_meta_data_create (); - GNUNET_FS_meta_data_extract_from_file (meta, filename, dsc->extractors); - keywords = GNUNET_FS_uri_ksk_create_from_meta_data (meta); - fi = GNUNET_FS_file_information_create_from_file (dsc->h, NULL, filename, - keywords, meta, - dsc->do_index, dsc->bo); - GNUNET_CONTAINER_meta_data_destroy (meta); - GNUNET_FS_uri_destroy (keywords); - } - dsc->proc (dsc->proc_cls, filename, fi); - return GNUNET_OK; -} - - -/** - * Simple, useful default implementation of a directory scanner - * (GNUNET_FS_DirectoryScanner). This implementation expects to get a - * UNIX filename, will publish all files in the directory except hidden - * files (those starting with a "."). Metadata will be extracted - * using GNU libextractor; the specific list of plugins should be - * specified in "cls", passing NULL will disable (!) metadata - * extraction. Keywords will be derived from the metadata and be - * subject to default canonicalization. This is strictly a - * convenience function. - * - * @param cls must be of type "struct EXTRACTOR_Extractor*" - * @param h handle to the file sharing subsystem - * @param dirname name of the directory to scan - * @param do_index should files be indexed or inserted - * @param bo block options - * @param proc function called on each entry - * @param proc_cls closure for proc - * @param emsg where to store an error message (on errors) - * @return GNUNET_OK on success - */ -int -GNUNET_FS_directory_scanner_default (void *cls, struct GNUNET_FS_Handle *h, - const char *dirname, int do_index, - const struct GNUNET_FS_BlockOptions *bo, - GNUNET_FS_FileProcessor proc, - void *proc_cls, char **emsg) -{ - struct EXTRACTOR_PluginList *ex = cls; - struct DirScanCls dsc; - - dsc.h = h; - dsc.extractors = ex; - dsc.proc = proc; - dsc.proc_cls = proc_cls; - dsc.scanner = &GNUNET_FS_directory_scanner_default; - dsc.scanner_cls = cls; - dsc.do_index = do_index; - dsc.bo = bo; - if (-1 == GNUNET_DISK_directory_scan (dirname, &dir_scan_cb, &dsc)) - { - GNUNET_assert (NULL != dsc.emsg); - *emsg = dsc.emsg; - return GNUNET_SYSERR; - } - return GNUNET_OK; -} - - -/** - * Aggregate information we keep for meta data in each directory. - */ -struct MetaValueInformation -{ - - /** - * Mime-type of data. - */ - const char *mime_type; - - /** - * The actual meta data. - */ - const char *data; - - /** - * Number of bytes in 'data'. - */ - size_t data_size; - - /** - * Type of the meta data. - */ - enum EXTRACTOR_MetaType type; - - /** - * Format of the meta data. - */ - enum EXTRACTOR_MetaFormat format; - - /** - * How often does this meta value occur in this directory? - */ - unsigned int frequency; - -}; - - -/** - * Type of a function that libextractor calls for each - * meta data item found. - * - * @param cls the container multihashmap to update - * @param plugin_name name of the plugin that produced this value; - * special values can be used (i.e. '<zlib>' for zlib being - * used in the main libextractor library and yielding - * meta data). - * @param type libextractor-type describing the meta data - * @param format basic format information about data - * @param data_mime_type mime-type of data (not of the original file); - * can be NULL (if mime-type is not known) - * @param data actual meta-data found - * @param data_len number of bytes in data - * @return 0 to continue extracting / iterating - */ -static int -update_metamap (void *cls, const char *plugin_name, - enum EXTRACTOR_MetaType type, enum EXTRACTOR_MetaFormat format, - const char *data_mime_type, const char *data, size_t data_len) -{ - struct GNUNET_CONTAINER_MultiHashMap *map = cls; - GNUNET_HashCode key; - struct MetaValueInformation *mvi; - - GNUNET_CRYPTO_hash (data, data_len, &key); - mvi = GNUNET_CONTAINER_multihashmap_get (map, &key); - if (mvi == NULL) - { - mvi = GNUNET_malloc (sizeof (struct MetaValueInformation)); - mvi->mime_type = data_mime_type; - mvi->data = data; - mvi->data_size = data_len; - mvi->type = type; - mvi->format = format; - GNUNET_CONTAINER_multihashmap_put (map, &key, mvi, - GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_ONLY); - } - mvi->frequency++; - return 0; -} - - -/** - * Aggregate information we keep for keywords in each directory. - */ -struct KeywordInformation -{ - - /** - * Mime-type of keyword. - */ - const char *keyword; - - /** - * How often does this meta value occur in this directory? - */ - unsigned int frequency; - -}; - - -/** - * Closure for dirproc function. - */ -struct EntryProcCls -{ - /** - * Linked list of directory entries that is being - * created. - */ - struct GNUNET_FS_FileInformation *entries; - - /** - * Map describing the meta data for all entries in the - * directory. Keys are the hash of the meta-value, - * values are of type 'struct MetaValueInformation'. - */ - struct GNUNET_CONTAINER_MultiHashMap *metamap; - - /** - * Map describing the keywords for all entries in the - * directory. Keys are the hash of the keyword, - * values are of type 'struct KeywordInformation'. - */ - struct GNUNET_CONTAINER_MultiHashMap *keywordmap; - - /** - * Number of entries in 'entries'. - */ - unsigned int count; - -}; - - -/** - * Function that processes a directory entry that - * was obtained from the scanner. Adds each entry to - * the directory and computes directroy meta map. - * - * @param cls our closure - * @param filename name of the file (unused, why there???) - * @param fi information for publishing the file - */ -static void -dirproc_add (void *cls, const char *filename, - struct GNUNET_FS_FileInformation *fi) -{ - struct EntryProcCls *dc = cls; - unsigned int i; - const char *kw; - struct KeywordInformation *ki; - GNUNET_HashCode key; - - GNUNET_assert (fi->next == NULL); - GNUNET_assert (fi->dir == NULL); - fi->next = dc->entries; - dc->entries = fi; - dc->count++; - if (NULL != fi->meta) - GNUNET_CONTAINER_meta_data_iterate (fi->meta, &update_metamap, dc->metamap); - for (i = 0; i < fi->keywords->data.ksk.keywordCount; i++) - { - kw = fi->keywords->data.ksk.keywords[i]; - GNUNET_CRYPTO_hash (kw, strlen (kw), &key); - ki = GNUNET_CONTAINER_multihashmap_get (dc->keywordmap, &key); - if (ki == NULL) - { - ki = GNUNET_malloc (sizeof (struct KeywordInformation)); - ki->keyword = &kw[1]; - GNUNET_CONTAINER_multihashmap_put (dc->keywordmap, &key, ki, - GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_ONLY); - } - ki->frequency++; - } -} - - -/** - * Closure for 'compute_directory_metadata'. - */ -struct ComputeDirectoryMetadataContext -{ - /** - * Where to store the extracted keywords. - */ - struct GNUNET_FS_Uri *ksk; - - /** - * Where to store the extracted meta data. - */ - struct GNUNET_CONTAINER_MetaData *meta; - - /** - * Threshold to apply for adding meta data. - */ - unsigned int threshold; -}; - - -/** - * Add metadata that occurs in more than the threshold entries of the - * directory to the directory itself. For example, if most files in a - * directory are of the same mime-type, the directory should have that - * mime-type as a keyword. - * - * @param cls the 'struct ComputeDirectoryMetadataContext' - * @param key unused - * @param value the 'struct MetaValueInformation' (to be freed as well) - * @return GNUNET_OK - */ -static int -compute_directory_metadata (void *cls, const GNUNET_HashCode * key, void *value) -{ - struct ComputeDirectoryMetadataContext *cdmc = cls; - struct MetaValueInformation *mvi = value; - - if (mvi->frequency > cdmc->threshold) - { - if (mvi->type != EXTRACTOR_METATYPE_GNUNET_ORIGINAL_FILENAME) - (void) GNUNET_CONTAINER_meta_data_insert (cdmc->meta, "", - mvi->type, mvi->format, - mvi->mime_type, mvi->data, - mvi->data_size); - if ((mvi->format == EXTRACTOR_METAFORMAT_UTF8) || - (mvi->format == EXTRACTOR_METAFORMAT_C_STRING)) - GNUNET_FS_uri_ksk_add_keyword (cdmc->ksk, mvi->data, GNUNET_NO); - } - GNUNET_free (mvi); - return GNUNET_OK; -} - - -/** - * Add keywords that occur in more than the threshold entries of the - * directory to the directory itself. - * - * @param cls the 'struct ComputeDirectoryMetadataContext' - * @param key unused - * @param value the 'struct Keywordnformation' (to be freed as well) - * @return GNUNET_OK - */ -static int -compute_directory_keywords (void *cls, const GNUNET_HashCode * key, void *value) -{ - struct ComputeDirectoryMetadataContext *cdmc = cls; - struct KeywordInformation *ki = value; - - if (ki->frequency > cdmc->threshold) - (void) GNUNET_FS_uri_ksk_add_keyword (cdmc->ksk, ki->keyword, GNUNET_NO); - GNUNET_free (ki); - return GNUNET_OK; -} - - -/** - * Create a publish-structure from an existing file hierarchy, inferring - * and organizing keywords and metadata as much as possible. This - * function primarily performs the recursive build and re-organizes - * keywords and metadata; for automatically getting metadata - * extraction, scanning of directories and creation of the respective - * GNUNET_FS_FileInformation entries the default scanner should be - * passed (GNUNET_FS_directory_scanner_default). This is strictly a - * convenience function. - * - * @param h handle to the file sharing subsystem - * @param client_info initial value for the client-info value for this entry - * @param filename name of the top-level file or directory - * @param scanner function used to get a list of files in a directory - * @param scanner_cls closure for scanner - * @param do_index should files in the hierarchy be indexed? - * @param bo block options - * @param emsg where to store an error message - * @return publish structure entry for the directory, NULL on error - */ -struct GNUNET_FS_FileInformation * -GNUNET_FS_file_information_create_from_directory (struct GNUNET_FS_Handle *h, - void *client_info, - const char *filename, - GNUNET_FS_DirectoryScanner - scanner, void *scanner_cls, - int do_index, - const struct - GNUNET_FS_BlockOptions *bo, - char **emsg) -{ - struct GNUNET_FS_FileInformation *ret; - struct ComputeDirectoryMetadataContext cdmc; - struct EntryProcCls dc; - const char *fn; - const char *ss; - char *dn; - struct GNUNET_FS_FileInformation *epos; - unsigned int i; - const char *kw; - - dc.entries = NULL; - dc.count = 0; - dc.metamap = GNUNET_CONTAINER_multihashmap_create (64); - dc.keywordmap = GNUNET_CONTAINER_multihashmap_create (64); - /* update children to point to directory and generate statistics - * on all meta data in children */ - scanner (scanner_cls, h, filename, do_index, bo, &dirproc_add, &dc, emsg); - cdmc.meta = GNUNET_CONTAINER_meta_data_create (); - cdmc.ksk = GNUNET_malloc (sizeof (struct GNUNET_FS_Uri)); - cdmc.ksk->type = ksk; - cdmc.threshold = 1 + dc.count / 2; /* 50% threshold for now */ - GNUNET_FS_meta_data_make_directory (cdmc.meta); - GNUNET_CONTAINER_multihashmap_iterate (dc.metamap, - &compute_directory_metadata, &cdmc); - GNUNET_CONTAINER_multihashmap_iterate (dc.keywordmap, - &compute_directory_keywords, &cdmc); - GNUNET_CONTAINER_multihashmap_destroy (dc.metamap); - GNUNET_CONTAINER_multihashmap_destroy (dc.keywordmap); - - /* remove keywords in children that are already in the - * parent */ - for (epos = dc.entries; NULL != epos; epos = epos->next) - { - for (i = 0; i < cdmc.ksk->data.ksk.keywordCount; i++) - { - kw = cdmc.ksk->data.ksk.keywords[i]; - GNUNET_FS_uri_ksk_remove_keyword (epos->keywords, &kw[1]); - } - } - GNUNET_FS_uri_ksk_add_keyword (cdmc.ksk, GNUNET_FS_DIRECTORY_MIME, GNUNET_NO); - ret = - GNUNET_FS_file_information_create_empty_directory (h, client_info, cdmc.ksk, - cdmc.meta, bo); - GNUNET_CONTAINER_meta_data_destroy (cdmc.meta); - GNUNET_FS_uri_destroy (cdmc.ksk); - ret->data.dir.entries = dc.entries; - while (dc.entries != NULL) - { - dc.entries->dir = ret; - dc.entries = dc.entries->next; - } - fn = filename; - while ((NULL != (ss = strstr (fn, DIR_SEPARATOR_STR))) && (strlen (ss) > 1)) - fn = ss + 1; - GNUNET_asprintf (&dn, "%s/", fn); -#if !WINDOWS - GNUNET_CONTAINER_meta_data_insert (ret->meta, "", - EXTRACTOR_METATYPE_GNUNET_ORIGINAL_FILENAME, - EXTRACTOR_METAFORMAT_C_STRING, - "text/plain", dn, strlen (dn) + 1); -#else - GNUNET_CONTAINER_meta_data_insert (ret->meta, "", - EXTRACTOR_METATYPE_GNUNET_ORIGINAL_FILENAME, - EXTRACTOR_METAFORMAT_UTF8, - "text/plain", dn, strlen (dn) + 1); -#endif - GNUNET_free (dn); - ret->filename = GNUNET_strdup (filename); - return ret; -} - - /** * Test if a given entry represents a directory. * @@ -820,6 +283,7 @@ GNUNET_FS_file_information_is_directory (const struct GNUNET_FS_FileInformation * @param keywords under which keywords should this directory be available * directly; can be NULL * @param bo block options + * @param filename name of the directory; can be NULL * @return publish structure entry for the directory , NULL on error */ struct GNUNET_FS_FileInformation * @@ -831,7 +295,8 @@ GNUNET_FS_file_information_create_empty_directory (struct GNUNET_FS_Handle *h, GNUNET_CONTAINER_MetaData *meta, const struct - GNUNET_FS_BlockOptions *bo) + GNUNET_FS_BlockOptions *bo, + const char *filename) { struct GNUNET_FS_FileInformation *ret; @@ -842,6 +307,8 @@ GNUNET_FS_file_information_create_empty_directory (struct GNUNET_FS_Handle *h, ret->keywords = GNUNET_FS_uri_dup (keywords); ret->bo = *bo; ret->is_directory = GNUNET_YES; + if (filename != NULL) + ret->filename = GNUNET_strdup (filename); return ret; } @@ -861,7 +328,7 @@ int GNUNET_FS_file_information_add (struct GNUNET_FS_FileInformation *dir, struct GNUNET_FS_FileInformation *ent) { - if ((ent->dir != NULL) || (ent->next != NULL) || (!dir->is_directory)) + if ((ent->dir != NULL) || (ent->next != NULL) || (dir->is_directory != GNUNET_YES)) { GNUNET_break (0); return GNUNET_SYSERR; @@ -899,12 +366,12 @@ GNUNET_FS_file_information_inspect (struct GNUNET_FS_FileInformation *dir, no = GNUNET_NO; if (GNUNET_OK != proc (proc_cls, dir, - (dir->is_directory) ? dir->data.dir.dir_size : dir->data. + (dir->is_directory == GNUNET_YES) ? dir->data.dir.dir_size : dir->data. file.file_size, dir->meta, &dir->keywords, &dir->bo, - (dir->is_directory) ? &no : &dir->data.file.do_index, + (dir->is_directory == GNUNET_YES) ? &no : &dir->data.file.do_index, &dir->client_info)) return; - if (!dir->is_directory) + if (dir->is_directory != GNUNET_YES) return; pos = dir->data.dir.entries; while (pos != NULL) @@ -912,9 +379,9 @@ GNUNET_FS_file_information_inspect (struct GNUNET_FS_FileInformation *dir, no = GNUNET_NO; if (GNUNET_OK != proc (proc_cls, pos, - (pos->is_directory) ? pos->data.dir.dir_size : pos->data. + (pos->is_directory == GNUNET_YES) ? pos->data.dir.dir_size : pos->data. file.file_size, pos->meta, &pos->keywords, &pos->bo, - (dir->is_directory) ? &no : &dir->data.file.do_index, + (pos->is_directory == GNUNET_YES) ? &no : &pos->data.file.do_index, &pos->client_info)) break; pos = pos->next; @@ -941,7 +408,7 @@ GNUNET_FS_file_information_destroy (struct GNUNET_FS_FileInformation *fi, int no; no = GNUNET_NO; - if (fi->is_directory) + if (fi->is_directory == GNUNET_YES) { /* clean up directory */ while (NULL != (pos = fi->data.dir.entries))