From 09e71acf6baa4597bc0948046c3983a8ac2bda09 Mon Sep 17 00:00:00 2001 From: Jo-Philipp Wich Date: Fri, 12 Nov 2010 22:37:50 +0000 Subject: [PATCH] libs/web: add UTF-8 validation and pcdata escaping C routines to template parser --- libs/web/Makefile | 6 +- libs/web/src/template_lualib.c | 42 ++++- libs/web/src/template_lualib.h | 1 + libs/web/src/template_utils.c | 330 +++++++++++++++++++++++++++++++++ libs/web/src/template_utils.h | 38 ++++ 5 files changed, 410 insertions(+), 7 deletions(-) create mode 100644 libs/web/src/template_utils.c create mode 100644 libs/web/src/template_utils.h diff --git a/libs/web/Makefile b/libs/web/Makefile index cc3bc96fc..d9f9700c1 100644 --- a/libs/web/Makefile +++ b/libs/web/Makefile @@ -5,11 +5,11 @@ include ../../build/gccconfig.mk TPL_LDFLAGS = TPL_CFLAGS = TPL_SO = parser.so -TPL_COMMON_OBJ = src/template_parser.o +TPL_COMMON_OBJ = src/template_parser.o src/template_utils.o TPL_LUALIB_OBJ = src/template_lualib.o %.o: %.c - $(COMPILE) $(TPL_CFLAGS) $(LUA_CFLAGS) $(FPIC) -c -o $@ $< + $(COMPILE) $(TPL_CFLAGS) $(LUA_CFLAGS) $(FPIC) -c -o $@ $< compile: build-clean $(TPL_COMMON_OBJ) $(TPL_LUALIB_OBJ) $(LINK) $(SHLIB_FLAGS) $(TPL_LDFLAGS) -o src/$(TPL_SO) \ @@ -24,5 +24,3 @@ clean: build-clean build-clean: rm -f src/*.o src/$(TPL_SO) - - diff --git a/libs/web/src/template_lualib.c b/libs/web/src/template_lualib.c index f91b19ceb..d3a5f89bb 100644 --- a/libs/web/src/template_lualib.c +++ b/libs/web/src/template_lualib.c @@ -54,14 +54,50 @@ int template_L_parse(lua_State *L) return 3; } +int template_L_sanitize_utf8(lua_State *L) +{ + size_t len = 0; + const char *str = luaL_checklstring(L, 1, &len); + char *res = sanitize_utf8(str, len); + + if (res != NULL) + { + lua_pushstring(L, res); + free(res); + + return 1; + } + + return 0; +} + +int template_L_sanitize_pcdata(lua_State *L) +{ + size_t len = 0; + const char *str = luaL_checklstring(L, 1, &len); + char *res = sanitize_pcdata(str, len); + + if (res != NULL) + { + lua_pushstring(L, res); + free(res); + + return 1; + } + + return 0; +} + + /* module table */ static const luaL_reg R[] = { - {"parse", template_L_parse}, - {NULL, NULL} + { "parse", template_L_parse }, + { "sanitize_utf8", template_L_sanitize_utf8 }, + { "sanitize_pcdata", template_L_sanitize_pcdata }, + { NULL, NULL } }; LUALIB_API int luaopen_luci_template_parser(lua_State *L) { luaL_register(L, TEMPLATE_LUALIB_META, R); return 1; } - diff --git a/libs/web/src/template_lualib.h b/libs/web/src/template_lualib.h index de915e1a2..d628b9dce 100644 --- a/libs/web/src/template_lualib.h +++ b/libs/web/src/template_lualib.h @@ -20,6 +20,7 @@ #define _TEMPLATE_LUALIB_H_ #include "template_parser.h" +#include "template_utils.h" #define TEMPLATE_LUALIB_META "template.parser" diff --git a/libs/web/src/template_utils.c b/libs/web/src/template_utils.c new file mode 100644 index 000000000..c560b7a02 --- /dev/null +++ b/libs/web/src/template_utils.c @@ -0,0 +1,330 @@ +/* + * LuCI Template - Utility functions + * + * Copyright (C) 2010 Jo-Philipp Wich + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "template_utils.h" + +/* initialize a buffer object */ +static struct template_buffer * buf_init(void) +{ + struct template_buffer *buf; + + buf = (struct template_buffer *)malloc(sizeof(struct template_buffer)); + + if (buf != NULL) + { + buf->fill = 0; + buf->size = 1024; + buf->data = (unsigned char *)malloc(buf->size); + + if (buf->data != NULL) + { + buf->dptr = buf->data; + buf->data[0] = 0; + + return buf; + } + + free(buf); + } + + return NULL; +} + +/* grow buffer */ +static int buf_grow(struct template_buffer *buf) +{ + unsigned int off = (buf->dptr - buf->data); + unsigned char *data = + (unsigned char *)realloc(buf->data, buf->size + 1024); + + if (data != NULL) + { + buf->data = data; + buf->dptr = data + off; + buf->size += 1024; + + return buf->size; + } + + return 0; +} + +/* put one char into buffer object */ +static int buf_putchar(struct template_buffer *buf, unsigned char c) +{ + if( ((buf->fill + 1) >= buf->size) && !buf_grow(buf) ) + return 0; + + *(buf->dptr++) = c; + *(buf->dptr) = 0; + + buf->fill++; + return 1; +} + +/* append data to buffer */ +static int buf_append(struct template_buffer *buf, unsigned char *s, int len) +{ + while ((buf->fill + len + 1) >= buf->size) + { + if (!buf_grow(buf)) + return 0; + } + + memcpy(buf->dptr, s, len); + buf->fill += len; + buf->dptr += len; + + *(buf->dptr) = 0; + + return len; +} + +/* destroy buffer object and return pointer to data */ +static char * buf_destroy(struct template_buffer *buf) +{ + unsigned char *data = buf->data; + + free(buf); + return (char *)data; +} + + +/* calculate the number of expected continuation chars */ +static inline int mb_num_chars(unsigned char c) +{ + if ((c & 0xE0) == 0xC0) + return 2; + else if ((c & 0xF0) == 0xE0) + return 3; + else if ((c & 0xF8) == 0xF0) + return 4; + else if ((c & 0xFC) == 0xF8) + return 5; + else if ((c & 0xFE) == 0xFC) + return 6; + + return 1; +} + +/* test whether the given byte is a valid continuation char */ +static inline int mb_is_cont(unsigned char c) +{ + return ((c >= 0x80) && (c <= 0xBF)); +} + +/* test whether the byte sequence at the given pointer with the given + * length is the shortest possible representation of the code point */ +static inline int mb_is_shortest(unsigned char *s, int n) +{ + switch (n) + { + case 2: + /* 1100000x (10xxxxxx) */ + return ((*s & 0x1E) > 0); + + case 3: + /* 11100000 100xxxxx (10xxxxxx) */ + return ((*s & 0x1F) > 0) && ((*(s+1) & 0x60) > 0); + + case 4: + /* 11110000 1000xxxx (10xxxxxx 10xxxxxx) */ + return ((*s & 0x0F) > 0) && ((*(s+1) & 0x70) > 0); + + case 5: + /* 11111000 10000xxx (10xxxxxx 10xxxxxx 10xxxxxx) */ + return ((*s & 0x07) > 0) && ((*(s+1) & 0x78) > 0); + + case 6: + /* 11111100 100000xx (10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx) */ + return ((*s & 0x03) > 0) && ((*(s+1) & 0x7C) > 0); + } + + return 1; +} + +/* test whether the byte sequence at the given pointer with the given + * length is an UTF-16 surrogate */ +static inline int mb_is_surrogate(unsigned char *s, int n) +{ + return ((n == 3) && (*s == 0xED) && (*(s+1) >= 0xA0) && (*(s+1) <= 0xBF)); +} + +/* test whether the byte sequence at the given pointer with the given + * length is an illegal UTF-8 code point */ +static inline int mb_is_illegal(unsigned char *s, int n) +{ + return ((n == 3) && (*s == 0xEF) && (*(s+1) == 0xBF) && + (*(s+2) >= 0xBE) && (*(s+2) <= 0xBF)); +} + + +/* scan given source string, validate UTF-8 sequence and store result + * in given buffer object */ +static int _validate_utf8(unsigned char **s, int l, struct template_buffer *buf) +{ + unsigned char *ptr = *s; + unsigned int o = 0, v, n; + + //for (o = 0; o < l; o++) + { + /* ascii byte without null */ + if ((*(ptr+0) >= 0x01) && (*(ptr+0) <= 0x7F)) + { + if (!buf_putchar(buf, *ptr++)) + return 0; + + o = 1; + } + + /* multi byte sequence */ + else if ((n = mb_num_chars(*ptr)) > 1) + { + /* count valid chars */ + for (v = 1; (v <= n) && ((o+v) < l) && mb_is_cont(*(ptr+v)); v++); + + switch (n) + { + case 6: + case 5: + /* five and six byte sequences are always invalid */ + if (!buf_putchar(buf, '?')) + return 0; + + break; + + default: + /* if the number of valid continuation bytes matches the + * expected number and if the sequence is legal, copy + * the bytes to the destination buffer */ + if ((v == n) && mb_is_shortest(ptr, n) && + !mb_is_surrogate(ptr, n) && !mb_is_illegal(ptr, n)) + { + /* copy sequence */ + if (!buf_append(buf, ptr, n)) + return 0; + } + + /* the found sequence is illegal, skip it */ + else + { + /* invalid sequence */ + if (!buf_putchar(buf, '?')) + return 0; + } + + break; + } + + /* advance beyound the last found valid continuation char */ + o = v; + ptr += v; + } + + /* invalid byte (0x00) */ + else + { + if (!buf_putchar(buf, '?')) /* or 0xEF, 0xBF, 0xBD */ + return 0; + + o = 1; + ptr++; + } + } + + *s = ptr; + return o; +} + +/* sanitize given string and replace all invalid UTF-8 sequences with "?" */ +char * sanitize_utf8(const char *s, unsigned int l) +{ + struct template_buffer *buf = buf_init(); + unsigned char *ptr = (unsigned char *)s; + + if (!buf) + return NULL; + + if (!_validate_utf8(&ptr, l, buf)) + { + free(buf->data); + free(buf); + return NULL; + } + + return buf_destroy(buf); +} + +/* Sanitize given string and strip all invalid XML bytes + * Validate UTF-8 sequences + * Escape XML control chars */ +char * sanitize_pcdata(const char *s, unsigned int l) +{ + struct template_buffer *buf = buf_init(); + unsigned char *ptr = (unsigned char *)s; + unsigned int o, v; + char esq[8]; + int esl; + + if (!buf) + return NULL; + + for (o = 0; o < l; o++) + { + /* Invalid XML bytes */ + if (((*ptr >= 0x00) && (*ptr <= 0x08)) || + ((*ptr >= 0x0B) && (*ptr <= 0x0C)) || + ((*ptr >= 0x0E) && (*ptr <= 0x1F)) || + (*ptr == 0x7F)) + { + ptr++; + } + + /* Escapes */ + else if ((*ptr == 0x26) || + (*ptr == 0x27) || + (*ptr == 0x22) || + (*ptr == 0x3C) || + (*ptr == 0x3E)) + { + esl = snprintf(esq, sizeof(esq), "&#%i;", *ptr); + + if (!buf_append(buf, (unsigned char *)esq, esl)) + break; + + ptr++; + } + + /* ascii char */ + else if (*ptr <= 0x7F) + { + buf_putchar(buf, *ptr++); + } + + /* multi byte sequence */ + else + { + if (!(v = _validate_utf8(&ptr, l - o, buf))) + break; + + o += (v - 1); + } + } + + return buf_destroy(buf); +} diff --git a/libs/web/src/template_utils.h b/libs/web/src/template_utils.h new file mode 100644 index 000000000..1f7d438c6 --- /dev/null +++ b/libs/web/src/template_utils.h @@ -0,0 +1,38 @@ +/* + * LuCI Template - Utility header + * + * Copyright (C) 2010 Jo-Philipp Wich + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _TEMPLATE_UTILS_H_ +#define _TEMPLATE_UTILS_H_ + +#include +#include +#include + + +/* buffer object */ +struct template_buffer { + unsigned char *data; + unsigned char *dptr; + unsigned int size; + unsigned int fill; +}; + +char * sanitize_utf8(const char *s, unsigned int l); +char * sanitize_pcdata(const char *s, unsigned int l); + +#endif -- 2.25.1