#include "gnunet_fs_service.h"
#include "gnunet_signatures.h"
#include "fs_api.h"
+#include <unitypes.h>
+#include <unicase.h>
+#include <uniconv.h>
+#include <unistr.h>
+#include <unistdio.h>
+
/**
* @param key wherer to store the unique key
*/
void
-GNUNET_FS_uri_to_key (const struct GNUNET_FS_Uri *uri, GNUNET_HashCode * key)
+GNUNET_FS_uri_to_key (const struct GNUNET_FS_Uri *uri, struct GNUNET_HashCode * key)
{
switch (uri->type)
{
key);
break;
default:
- memset (key, 0, sizeof (GNUNET_HashCode));
+ memset (key, 0, sizeof (struct GNUNET_HashCode));
break;
}
}
{
if (out[rpos] == '%')
{
- if (1 != sscanf (&out[rpos + 1], "%2X", &hx))
+ if (1 != SSCANF (&out[rpos + 1], "%2X", &hx))
{
GNUNET_free (out);
*emsg = GNUNET_strdup (_("`%' must be followed by HEX number"));
uri_sks_parse (const char *s, char **emsg)
{
struct GNUNET_FS_Uri *ret;
- GNUNET_HashCode namespace;
+ struct GNUNET_HashCode ns;
char *identifier;
unsigned int pos;
size_t slen;
}
memcpy (enc, &s[pos], sizeof (struct GNUNET_CRYPTO_HashAsciiEncoded));
enc[sizeof (struct GNUNET_CRYPTO_HashAsciiEncoded) - 1] = '\0';
- if (GNUNET_OK != GNUNET_CRYPTO_hash_from_string (enc, &namespace))
+ if (GNUNET_OK != GNUNET_CRYPTO_hash_from_string (enc, &ns))
{
*emsg = GNUNET_strdup (_("Malformed SKS URI"));
return NULL;
GNUNET_strdup (&s[pos + sizeof (struct GNUNET_CRYPTO_HashAsciiEncoded)]);
ret = GNUNET_malloc (sizeof (struct GNUNET_FS_Uri));
ret->type = sks;
- ret->data.sks.namespace = namespace;
+ ret->data.sks.ns = ns;
ret->data.sks.identifier = identifier;
return ret;
}
_("Lacking key configuration settings.\n"));
return NULL;
}
- my_private_key = GNUNET_CRYPTO_rsa_key_create_from_file (keyfile);
- if (my_private_key == NULL)
+ if (NULL == (my_private_key = GNUNET_CRYPTO_rsa_key_create_from_file (keyfile)))
{
GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
_("Could not access hostkey file `%s'.\n"), keyfile);
ns_uri = GNUNET_malloc (sizeof (struct GNUNET_FS_Uri));
ns_uri->type = sks;
GNUNET_CRYPTO_rsa_key_get_public (ns->key, &pk);
- GNUNET_CRYPTO_hash (&pk, sizeof (pk), &ns_uri->data.sks.namespace);
+ GNUNET_CRYPTO_hash (&pk, sizeof (pk), &ns_uri->data.sks.ns);
ns_uri->data.sks.identifier = GNUNET_strdup (id);
return ns_uri;
}
* @return an FS URI for the given namespace and identifier
*/
struct GNUNET_FS_Uri *
-GNUNET_FS_uri_sks_create_from_nsid (GNUNET_HashCode * nsid, const char *id)
+GNUNET_FS_uri_sks_create_from_nsid (struct GNUNET_HashCode * nsid, const char *id)
{
struct GNUNET_FS_Uri *ns_uri;
ns_uri = GNUNET_malloc (sizeof (struct GNUNET_FS_Uri));
ns_uri->type = sks;
- ns_uri->data.sks.namespace = *nsid;
+ ns_uri->data.sks.ns = *nsid;
ns_uri->data.sks.identifier = GNUNET_strdup (id);
return ns_uri;
}
return GNUNET_NO;
case sks:
if ((0 ==
- memcmp (&u1->data.sks.namespace, &u2->data.sks.namespace,
- sizeof (GNUNET_HashCode))) &&
+ memcmp (&u1->data.sks.ns, &u2->data.sks.ns,
+ sizeof (struct GNUNET_HashCode))) &&
(0 == strcmp (u1->data.sks.identifier, u2->data.sks.identifier)))
return GNUNET_YES;
*/
int
GNUNET_FS_uri_sks_get_namespace (const struct GNUNET_FS_Uri *uri,
- GNUNET_HashCode * nsid)
+ struct GNUNET_HashCode * nsid)
{
if (!GNUNET_FS_uri_test_sks (uri))
{
GNUNET_break (0);
return GNUNET_SYSERR;
}
- *nsid = uri->data.sks.namespace;
+ *nsid = uri->data.sks.ns;
return GNUNET_OK;
}
{
char *ret;
char *name;
+ char *unique_name;
if (uri->type != sks)
return NULL;
- name = GNUNET_PSEUDONYM_id_to_name (cfg, &uri->data.sks.namespace);
- if (name == NULL)
- return GNUNET_FS_uri_to_string (uri);
- GNUNET_asprintf (&ret, "%s: %s", name, uri->data.sks.identifier);
+ (void) GNUNET_PSEUDONYM_get_info (cfg, &uri->data.sks.ns,
+ NULL, NULL, &name, NULL);
+ unique_name = GNUNET_PSEUDONYM_name_uniquify (cfg, &uri->data.sks.ns, name, NULL);
GNUNET_free (name);
+ GNUNET_asprintf (&ret, "%s: %s", unique_name, uri->data.sks.identifier);
+ GNUNET_free (unique_name);
return ret;
}
if (uri->type == ksk)
{
- for (i = uri->data.ksk.keywordCount - 1; i >= 0; i--)
+ for (i=0;i < uri->data.ksk.keywordCount; i++)
GNUNET_assert (uri->data.ksk.keywords[i] != NULL);
}
#endif
}
+/**
+ * FIXME: comment
+ */
+static char *
+normalize_metadata (enum EXTRACTOR_MetaFormat format, const char *data,
+ size_t data_len)
+{
+ uint8_t *free_str = NULL;
+ uint8_t *str_to_normalize = (uint8_t *) data;
+ uint8_t *normalized;
+ size_t r_len;
+ if (str_to_normalize == NULL)
+ return NULL;
+ /* Don't trust libextractor */
+ if (format == EXTRACTOR_METAFORMAT_UTF8)
+ {
+ free_str = (uint8_t *) u8_check ((const uint8_t *) data, data_len);
+ if (free_str == NULL)
+ free_str = NULL;
+ else
+ format = EXTRACTOR_METAFORMAT_C_STRING;
+ }
+ if (format == EXTRACTOR_METAFORMAT_C_STRING)
+ {
+ free_str = u8_strconv_from_encoding (data, locale_charset (), iconveh_escape_sequence);
+ if (free_str == NULL)
+ return NULL;
+ }
+
+ normalized = u8_tolower (str_to_normalize, strlen ((char *) str_to_normalize), NULL, UNINORM_NFD, NULL, &r_len);
+ /* free_str is allocated by libunistring internally, use free() */
+ if (free_str != NULL)
+ free (free_str);
+ if (normalized != NULL)
+ {
+ /* u8_tolower allocates a non-NULL-terminated string! */
+ free_str = GNUNET_malloc (r_len + 1);
+ memcpy (free_str, normalized, r_len);
+ free_str[r_len] = '\0';
+ free (normalized);
+ normalized = free_str;
+ }
+ return (char *) normalized;
+}
+
+/**
+ * Counts the number of UTF-8 characters (not bytes) in the string,
+ * returns that count.
+ */
+static size_t
+u8_strcount (const uint8_t *s)
+{
+ size_t count;
+ ucs4_t c;
+ GNUNET_assert (s != NULL);
+ if (s[0] == 0)
+ return 0;
+ for (count = 0; s != NULL; count++)
+ s = u8_next (&c, s);
+ return count - 1;
+}
+
+
/**
* Break the filename up by matching [], () and {} pairs to make
* keywords. In case of nesting parentheses only the inner pair counts.
}
if (match && (close_paren - open_paren > 1))
{
+ tmp = close_paren[0];
+ close_paren[0] = '\0';
+ /* Keywords must be at least 3 characters long */
+ if (u8_strcount ((const uint8_t *) &open_paren[1]) <= 2)
+ {
+ close_paren[0] = tmp;
+ continue;
+ }
if (NULL != array)
{
- tmp = close_paren[0];
- close_paren[0] = '\0';
- if (GNUNET_NO == find_duplicate ((const char *) &open_paren[1], (const char **) array, index + count))
+ char *normalized;
+ if (GNUNET_NO == find_duplicate ((const char *) &open_paren[1],
+ (const char **) array, index + count))
{
insert_non_mandatory_keyword ((const char *) &open_paren[1], array,
index + count);
count++;
}
- close_paren[0] = tmp;
+ normalized = normalize_metadata (EXTRACTOR_METAFORMAT_UTF8,
+ &open_paren[1], close_paren - &open_paren[1]);
+ if (normalized != NULL)
+ {
+ if (GNUNET_NO == find_duplicate ((const char *) normalized,
+ (const char **) array, index + count))
+ {
+ insert_non_mandatory_keyword ((const char *) normalized, array,
+ index + count);
+ count++;
+ }
+ GNUNET_free (normalized);
+ }
}
else
count++;
+ close_paren[0] = tmp;
}
}
GNUNET_free (ss);
/**
* Where to break up keywords
*/
-#define TOKENS "_. /-!?#&+@\"\'\\;:"
+#define TOKENS "_. /-!?#&+@\"\'\\;:,()[]{}$<>|"
/**
* Break the filename up by TOKENS to make
ss = GNUNET_strdup (s);
for (p = strtok (ss, TOKENS); p != NULL; p = strtok (NULL, TOKENS))
{
+ /* Keywords must be at least 3 characters long */
+ if (u8_strcount ((const uint8_t *) p) <= 2)
+ continue;
if (NULL != array)
{
+ char *normalized;
if (GNUNET_NO == find_duplicate (p, (const char **) array, index + seps))
{
insert_non_mandatory_keyword (p, array,
index + seps);
seps++;
}
+ normalized = normalize_metadata (EXTRACTOR_METAFORMAT_UTF8,
+ p, strlen (p));
+ if (normalized != NULL)
+ {
+ if (GNUNET_NO == find_duplicate ((const char *) normalized,
+ (const char **) array, index + seps))
+ {
+ insert_non_mandatory_keyword ((const char *) normalized, array,
+ index + seps);
+ seps++;
+ }
+ GNUNET_free (normalized);
+ }
}
else
seps++;
}
#undef TOKENS
-
/**
* Function called on each value in the meta data.
* Adds it to the URI.
const char *data_mime_type, const char *data, size_t data_len)
{
struct GNUNET_FS_Uri *uri = cls;
+ char *normalized_data;
if ((format != EXTRACTOR_METAFORMAT_UTF8) &&
(format != EXTRACTOR_METAFORMAT_C_STRING))
return 0;
- if (find_duplicate (data, (const char **) uri->data.ksk.keywords, uri->data.ksk.keywordCount))
- return GNUNET_OK;
- insert_non_mandatory_keyword (data,
- uri->data.ksk.keywords, uri->data.ksk.keywordCount);
- uri->data.ksk.keywordCount++;
+ /* Keywords must be at least 3 characters long
+ * If given non-utf8 string it will, most likely, find it to be invalid,
+ * and will return the length of its valid part, skipping the keyword.
+ * If it does - fix the extractor, not this check!
+ */
+ if (u8_strcount ((const uint8_t *) data) <= 2)
+ {
+ return 0;
+ }
+ normalized_data = normalize_metadata (format, data, data_len);
+ if (!find_duplicate (data, (const char **) uri->data.ksk.keywords, uri->data.ksk.keywordCount))
+ {
+ insert_non_mandatory_keyword (data,
+ uri->data.ksk.keywords, uri->data.ksk.keywordCount);
+ uri->data.ksk.keywordCount++;
+ }
+ if (normalized_data != NULL)
+ {
+ if (!find_duplicate (normalized_data, (const char **) uri->data.ksk.keywords, uri->data.ksk.keywordCount))
+ {
+ insert_non_mandatory_keyword (normalized_data,
+ uri->data.ksk.keywords, uri->data.ksk.keywordCount);
+ uri->data.ksk.keywordCount++;
+ }
+ GNUNET_free (normalized_data);
+ }
return 0;
}
if (ent > 0)
{
full_name = GNUNET_CONTAINER_meta_data_get_first_by_types (md,
- EXTRACTOR_METATYPE_FILENAME, -1);
+ EXTRACTOR_METATYPE_GNUNET_ORIGINAL_FILENAME, -1);
if (NULL != full_name)
{
filename = full_name;
tok_keywords = get_keywords_from_tokens (filename, NULL, 0);
paren_keywords = get_keywords_from_parens (filename, NULL, 0);
}
+ /* x2 because there might be a normalized variant of every keyword */
ret->data.ksk.keywords = GNUNET_malloc (sizeof (char *) * (ent
- + tok_keywords + paren_keywords));
+ + tok_keywords + paren_keywords) * 2);
GNUNET_CONTAINER_meta_data_iterate (md, &gather_uri_data, ret);
}
if (tok_keywords > 0)
static char *
uri_sks_to_string (const struct GNUNET_FS_Uri *uri)
{
- const GNUNET_HashCode *namespace;
+ const struct GNUNET_HashCode *ns;
const char *identifier;
char *ret;
- struct GNUNET_CRYPTO_HashAsciiEncoded ns;
+ struct GNUNET_CRYPTO_HashAsciiEncoded nsasc;
if (uri->type != sks)
return NULL;
- namespace = &uri->data.sks.namespace;
+ ns = &uri->data.sks.ns;
identifier = uri->data.sks.identifier;
- GNUNET_CRYPTO_hash_to_enc (namespace, &ns);
+ GNUNET_CRYPTO_hash_to_enc (ns, &nsasc);
GNUNET_asprintf (&ret, "%s%s%s/%s", GNUNET_FS_URI_PREFIX,
- GNUNET_FS_URI_SKS_INFIX, (const char *) &ns, identifier);
+ GNUNET_FS_URI_SKS_INFIX, (const char *) &nsasc, identifier);
return ret;
}