2 * Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved.
4 * Licensed under the OpenSSL license (the "License"). You may not use
5 * this file except in compliance with the License. You can obtain a copy
6 * in the file LICENSE in the source distribution or at
7 * https://www.openssl.org/source/license.html
11 #include "internal/cryptlib.h"
12 #include <openssl/asn1.h>
17 * This parses a UTF8 string one character at a time. It is passed a pointer
18 * to the string and the length of the string. It sets 'value' to the value of
19 * the current character. It returns the number of characters read or a
20 * negative error code:
21 * -1 = string too short
22 * -2 = illegal character
23 * -3 = subsequent characters not of the form 10xxxxxx
24 * -4 = character encoded incorrectly (not minimal length).
27 int UTF8_getc(const unsigned char *str, int len, unsigned long *val)
29 const unsigned char *p;
36 /* Check syntax and work out the encoded value (if correct) */
37 if ((*p & 0x80) == 0) {
40 } else if ((*p & 0xe0) == 0xc0) {
43 if ((p[1] & 0xc0) != 0x80)
45 value = (*p++ & 0x1f) << 6;
50 } else if ((*p & 0xf0) == 0xe0) {
53 if (((p[1] & 0xc0) != 0x80)
54 || ((p[2] & 0xc0) != 0x80))
56 value = (*p++ & 0xf) << 12;
57 value |= (*p++ & 0x3f) << 6;
62 } else if ((*p & 0xf8) == 0xf0) {
65 if (((p[1] & 0xc0) != 0x80)
66 || ((p[2] & 0xc0) != 0x80)
67 || ((p[3] & 0xc0) != 0x80))
69 value = ((unsigned long)(*p++ & 0x7)) << 18;
70 value |= (*p++ & 0x3f) << 12;
71 value |= (*p++ & 0x3f) << 6;
76 } else if ((*p & 0xfc) == 0xf8) {
79 if (((p[1] & 0xc0) != 0x80)
80 || ((p[2] & 0xc0) != 0x80)
81 || ((p[3] & 0xc0) != 0x80)
82 || ((p[4] & 0xc0) != 0x80))
84 value = ((unsigned long)(*p++ & 0x3)) << 24;
85 value |= ((unsigned long)(*p++ & 0x3f)) << 18;
86 value |= ((unsigned long)(*p++ & 0x3f)) << 12;
87 value |= (*p++ & 0x3f) << 6;
92 } else if ((*p & 0xfe) == 0xfc) {
95 if (((p[1] & 0xc0) != 0x80)
96 || ((p[2] & 0xc0) != 0x80)
97 || ((p[3] & 0xc0) != 0x80)
98 || ((p[4] & 0xc0) != 0x80)
99 || ((p[5] & 0xc0) != 0x80))
101 value = ((unsigned long)(*p++ & 0x1)) << 30;
102 value |= ((unsigned long)(*p++ & 0x3f)) << 24;
103 value |= ((unsigned long)(*p++ & 0x3f)) << 18;
104 value |= ((unsigned long)(*p++ & 0x3f)) << 12;
105 value |= (*p++ & 0x3f) << 6;
106 value |= *p++ & 0x3f;
107 if (value < 0x4000000)
117 * This takes a character 'value' and writes the UTF8 encoded value in 'str'
118 * where 'str' is a buffer containing 'len' characters. Returns the number of
119 * characters written or -1 if 'len' is too small. 'str' can be set to NULL
120 * in which case it just returns the number of characters. It will need at
124 int UTF8_putc(unsigned char *str, int len, unsigned long value)
127 len = 6; /* Maximum we will need */
132 *str = (unsigned char)value;
139 *str++ = (unsigned char)(((value >> 6) & 0x1f) | 0xc0);
140 *str = (unsigned char)((value & 0x3f) | 0x80);
144 if (value < 0x10000) {
148 *str++ = (unsigned char)(((value >> 12) & 0xf) | 0xe0);
149 *str++ = (unsigned char)(((value >> 6) & 0x3f) | 0x80);
150 *str = (unsigned char)((value & 0x3f) | 0x80);
154 if (value < 0x200000) {
158 *str++ = (unsigned char)(((value >> 18) & 0x7) | 0xf0);
159 *str++ = (unsigned char)(((value >> 12) & 0x3f) | 0x80);
160 *str++ = (unsigned char)(((value >> 6) & 0x3f) | 0x80);
161 *str = (unsigned char)((value & 0x3f) | 0x80);
165 if (value < 0x4000000) {
169 *str++ = (unsigned char)(((value >> 24) & 0x3) | 0xf8);
170 *str++ = (unsigned char)(((value >> 18) & 0x3f) | 0x80);
171 *str++ = (unsigned char)(((value >> 12) & 0x3f) | 0x80);
172 *str++ = (unsigned char)(((value >> 6) & 0x3f) | 0x80);
173 *str = (unsigned char)((value & 0x3f) | 0x80);
180 *str++ = (unsigned char)(((value >> 30) & 0x1) | 0xfc);
181 *str++ = (unsigned char)(((value >> 24) & 0x3f) | 0x80);
182 *str++ = (unsigned char)(((value >> 18) & 0x3f) | 0x80);
183 *str++ = (unsigned char)(((value >> 12) & 0x3f) | 0x80);
184 *str++ = (unsigned char)(((value >> 6) & 0x3f) | 0x80);
185 *str = (unsigned char)((value & 0x3f) | 0x80);