From: Christian Grothoff <christian@grothoff.org>
Date: Sat, 14 Jan 2012 15:25:14 +0000 (+0000)
Subject: -LRN: use correct character counting, instead of byte counting
X-Git-Tag: initial-import-from-subversion-38251~15314
X-Git-Url: https://git.librecmc.org/?a=commitdiff_plain;h=429c91dd404c7d938004e9f3c90ed5dcd29823ff;p=oweals%2Fgnunet.git

-LRN: use correct character counting, instead of byte counting
---

diff --git a/src/fs/fs_uri.c b/src/fs/fs_uri.c
index 157295b09..fac478d38 100644
--- a/src/fs/fs_uri.c
+++ b/src/fs/fs_uri.c
@@ -1541,6 +1541,23 @@ normalize_metadata (enum EXTRACTOR_MetaFormat format, const char *data,
   return (char *) normalized;
 }
 
+/**
+ * Counts the number of UTF-8 characters (not bytes) in the string,
+ * returns that count.
+ */
+static size_t
+u8_strcount (const uint8_t *s)
+{
+  size_t count;
+  ucs4_t c;
+  GNUNET_assert (s != NULL);
+  if (s[0] == 0)
+    return 0;
+  for (count = 0; s != NULL; count++)
+    s = u8_next (&c, s);
+  return count - 1;
+}
+
 
 /**
  * Break the filename up by matching [], () and {} pairs to make
@@ -1600,7 +1617,7 @@ get_keywords_from_parens (const char *s, char **array, int index)
       tmp = close_paren[0];
       close_paren[0] = '\0';
       /* Keywords must be at least 3 characters long */
-      if (u8_strlen ((const uint8_t *) &open_paren[1]) <= 2)
+      if (u8_strcount ((const uint8_t *) &open_paren[1]) <= 2)
       {
         close_paren[0] = tmp;
         continue;
@@ -1669,7 +1686,7 @@ get_keywords_from_tokens (const char *s, char **array, int index)
   for (p = strtok (ss, TOKENS); p != NULL; p = strtok (NULL, TOKENS))
   {
     /* Keywords must be at least 3 characters long */
-    if (u8_strlen ((const uint8_t *) p) <= 2)
+    if (u8_strcount ((const uint8_t *) p) <= 2)
       continue;
     if (NULL != array)
     {
@@ -1735,7 +1752,7 @@ gather_uri_data (void *cls, const char *plugin_name,
    * and will return the length of its valid part, skipping the keyword.
    * If it does - fix the extractor, not this check!
    */
-  if (u8_strlen ((const uint8_t *) data) <= 2)
+  if (u8_strcount ((const uint8_t *) data) <= 2)
   {
     return 0;
   }