libs/web: fix sanitize_utf8(), passes all testcases now
authorJo-Philipp Wich <jow@openwrt.org>
Fri, 12 Nov 2010 22:48:17 +0000 (22:48 +0000)
committerJo-Philipp Wich <jow@openwrt.org>
Fri, 12 Nov 2010 22:48:17 +0000 (22:48 +0000)
libs/web/src/template_utils.c

index c560b7a0210add80db75eb1299f12510990ac038..f17d3b3e9b64cbc99d3f9a5cdc6d3442c881450a 100644 (file)
@@ -181,70 +181,67 @@ static int _validate_utf8(unsigned char **s, int l, struct template_buffer *buf)
        unsigned char *ptr = *s;
        unsigned int o = 0, v, n;
 
-       //for (o = 0; o < l; o++)
+       /* ascii byte without null */
+       if ((*(ptr+0) >= 0x01) && (*(ptr+0) <= 0x7F))
        {
-               /* ascii byte without null */
-               if ((*(ptr+0) >= 0x01) && (*(ptr+0) <= 0x7F))
-               {
-                       if (!buf_putchar(buf, *ptr++))
-                               return 0;
+               if (!buf_putchar(buf, *ptr++))
+                       return 0;
 
-                       o = 1;
-               }
+               o = 1;
+       }
 
-               /* multi byte sequence */
-               else if ((n = mb_num_chars(*ptr)) > 1)
+       /* multi byte sequence */
+       else if ((n = mb_num_chars(*ptr)) > 1)
+       {
+               /* count valid chars */
+               for (v = 1; (v <= n) && ((o+v) < l) && mb_is_cont(*(ptr+v)); v++);
+
+               switch (n)
                {
-                       /* count valid chars */
-                       for (v = 1; (v <= n) && ((o+v) < l) && mb_is_cont(*(ptr+v)); v++);
-
-                       switch (n)
-                       {
-                               case 6:
-                               case 5:
-                                       /* five and six byte sequences are always invalid */
+                       case 6:
+                       case 5:
+                               /* five and six byte sequences are always invalid */
+                               if (!buf_putchar(buf, '?'))
+                                       return 0;
+
+                               break;
+
+                       default:
+                               /* if the number of valid continuation bytes matches the
+                                * expected number and if the sequence is legal, copy
+                                * the bytes to the destination buffer */
+                               if ((v == n) && mb_is_shortest(ptr, n) &&
+                                       !mb_is_surrogate(ptr, n) && !mb_is_illegal(ptr, n))
+                               {
+                                       /* copy sequence */
+                                       if (!buf_append(buf, ptr, n))
+                                               return 0;
+                               }
+
+                               /* the found sequence is illegal, skip it */
+                               else
+                               {
+                                       /* invalid sequence */
                                        if (!buf_putchar(buf, '?'))
                                                return 0;
+                               }
 
-                                       break;
-
-                               default:
-                                       /* if the number of valid continuation bytes matches the
-                                        * expected number and if the sequence is legal, copy
-                                        * the bytes to the destination buffer */
-                                       if ((v == n) && mb_is_shortest(ptr, n) &&
-                                               !mb_is_surrogate(ptr, n) && !mb_is_illegal(ptr, n))
-                                       {
-                                               /* copy sequence */
-                                               if (!buf_append(buf, ptr, n))
-                                                       return 0;
-                                       }
-
-                                       /* the found sequence is illegal, skip it */
-                                       else
-                                       {
-                                               /* invalid sequence */
-                                               if (!buf_putchar(buf, '?'))
-                                                       return 0;
-                                       }
-
-                                       break;
-                       }
-
-                       /* advance beyound the last found valid continuation char */
-                       o = v;
-                       ptr += v;
+                               break;
                }
 
-               /* invalid byte (0x00) */
-               else
-               {
-                       if (!buf_putchar(buf, '?')) /* or 0xEF, 0xBF, 0xBD */
-                               return 0;
+               /* advance beyound the last found valid continuation char */
+               o = v;
+               ptr += v;
+       }
 
-                       o = 1;
-                       ptr++;
-               }
+       /* invalid byte (0x00) */
+       else
+       {
+               if (!buf_putchar(buf, '?')) /* or 0xEF, 0xBF, 0xBD */
+                       return 0;
+
+               o = 1;
+               ptr++;
        }
 
        *s = ptr;
@@ -256,15 +253,28 @@ char * sanitize_utf8(const char *s, unsigned int l)
 {
        struct template_buffer *buf = buf_init();
        unsigned char *ptr = (unsigned char *)s;
+       unsigned int v, o;
 
        if (!buf)
                return NULL;
 
-       if (!_validate_utf8(&ptr, l, buf))
+       for (o = 0; o < l; o++)
        {
-               free(buf->data);
-               free(buf);
-               return NULL;
+               /* ascii char */
+               if ((*ptr >= 0x01) && (*ptr <= 0x7F))
+               {
+                       if (!buf_putchar(buf, *ptr++))
+                               break;
+               }
+
+               /* invalid byte or multi byte sequence */
+               else
+               {
+                       if (!(v = _validate_utf8(&ptr, l - o, buf)))
+                               break;
+
+                       o += (v - 1);
+               }
        }
 
        return buf_destroy(buf);