+ /* Check syntax and work out the encoded value (if correct) */
+ if((*p & 0x80) == 0) {
+ value = *p++ & 0x7f;
+ ret = 1;
+ } else if((*p & 0xe0) == 0xc0) {
+ if(len < 2) return -1;
+ if((p[1] & 0xc0) != 0x80) return -3;
+ value = (*p++ & 0x1f) << 6;
+ value |= *p++ & 0x3f;
+ if(value < 0x80) return -4;
+ ret = 2;
+ } else if((*p & 0xf0) == 0xe0) {
+ if(len < 3) return -1;
+ if( ((p[1] & 0xc0) != 0x80)
+ || ((p[2] & 0xc0) != 0x80) ) return -3;
+ value = (*p++ & 0xf) << 12;
+ value |= (*p++ & 0x3f) << 6;
+ value |= *p++ & 0x3f;
+ if(value < 0x800) return -4;
+ ret = 3;
+ } else if((*p & 0xf8) == 0xf0) {
+ if(len < 4) return -1;
+ if( ((p[1] & 0xc0) != 0x80)
+ || ((p[2] & 0xc0) != 0x80)
+ || ((p[3] & 0xc0) != 0x80) ) return -3;
+ value = ((unsigned long)(*p++ & 0x7)) << 18;
+ value |= (*p++ & 0x3f) << 12;
+ value |= (*p++ & 0x3f) << 6;
+ value |= *p++ & 0x3f;
+ if(value < 0x10000) return -4;
+ ret = 4;
+ } else if((*p & 0xfc) == 0xf8) {
+ if(len < 5) return -1;
+ if( ((p[1] & 0xc0) != 0x80)
+ || ((p[2] & 0xc0) != 0x80)
+ || ((p[3] & 0xc0) != 0x80)
+ || ((p[4] & 0xc0) != 0x80) ) return -3;
+ value = ((unsigned long)(*p++ & 0x3)) << 24;
+ value |= ((unsigned long)(*p++ & 0x3f)) << 18;
+ value |= ((unsigned long)(*p++ & 0x3f)) << 12;
+ value |= (*p++ & 0x3f) << 6;
+ value |= *p++ & 0x3f;
+ if(value < 0x200000) return -4;
+ ret = 5;
+ } else if((*p & 0xfe) == 0xfc) {
+ if(len < 6) return -1;
+ if( ((p[1] & 0xc0) != 0x80)
+ || ((p[2] & 0xc0) != 0x80)
+ || ((p[3] & 0xc0) != 0x80)
+ || ((p[4] & 0xc0) != 0x80)
+ || ((p[5] & 0xc0) != 0x80) ) return -3;
+ value = ((unsigned long)(*p++ & 0x1)) << 30;
+ value |= ((unsigned long)(*p++ & 0x3f)) << 24;
+ value |= ((unsigned long)(*p++ & 0x3f)) << 18;
+ value |= ((unsigned long)(*p++ & 0x3f)) << 12;
+ value |= (*p++ & 0x3f) << 6;
+ value |= *p++ & 0x3f;
+ if(value < 0x4000000) return -4;
+ ret = 6;
+ } else return -2;
+ *val = value;
+ return ret;
+}
+
+/* This takes a character 'value' and writes the UTF8 encoded value in
+ * 'str' where 'str' is a buffer containing 'len' characters. Returns
+ * the number of characters written or -1 if 'len' is too small. 'str' can
+ * be set to NULL in which case it just returns the number of characters.
+ * It will need at most 6 characters.
+ */
+
+int UTF8_putc(unsigned char *str, int len, unsigned long value)
+{
+ if(!str) len = 6; /* Maximum we will need */
+ else if(len <= 0) return -1;
+ if(value < 0x80) {
+ if(str) *str = (unsigned char)value;
+ return 1;
+ }
+ if(value < 0x800) {
+ if(len < 2) return -1;
+ if(str) {
+ *str++ = (unsigned char)(((value >> 6) & 0x1f) | 0xc0);
+ *str = (unsigned char)((value & 0x3f) | 0x80);