lib: add some utf16 handling helpers
[oweals/u-boot.git] / lib / charset.c
1 /*
2  *  charset conversion utils
3  *
4  *  Copyright (c) 2017 Rob Clark
5  *
6  *  SPDX-License-Identifier:     GPL-2.0+
7  */
8
9 #include <common.h>
10 #include <charset.h>
11 #include <malloc.h>
12
13 /*
14  * utf8/utf16 conversion mostly lifted from grub
15  */
16
17 size_t utf16_strlen(const uint16_t *in)
18 {
19         size_t i;
20         for (i = 0; in[i]; i++);
21         return i;
22 }
23
24 size_t utf16_strnlen(const uint16_t *in, size_t count)
25 {
26         size_t i;
27         for (i = 0; count-- && in[i]; i++);
28         return i;
29 }
30
31 uint16_t *utf16_strcpy(uint16_t *dest, const uint16_t *src)
32 {
33         uint16_t *tmp = dest;
34
35         while ((*dest++ = *src++) != '\0')
36                 /* nothing */;
37         return tmp;
38
39 }
40
41 uint16_t *utf16_strdup(const uint16_t *s)
42 {
43         uint16_t *new;
44         if (!s || !(new = malloc((utf16_strlen(s) + 1) * 2)))
45                 return NULL;
46         utf16_strcpy(new, s);
47         return new;
48 }
49
50 /* Convert UTF-16 to UTF-8.  */
51 uint8_t *utf16_to_utf8(uint8_t *dest, const uint16_t *src, size_t size)
52 {
53         uint32_t code_high = 0;
54
55         while (size--) {
56                 uint32_t code = *src++;
57
58                 if (code_high) {
59                         if (code >= 0xDC00 && code <= 0xDFFF) {
60                                 /* Surrogate pair.  */
61                                 code = ((code_high - 0xD800) << 10) + (code - 0xDC00) + 0x10000;
62
63                                 *dest++ = (code >> 18) | 0xF0;
64                                 *dest++ = ((code >> 12) & 0x3F) | 0x80;
65                                 *dest++ = ((code >> 6) & 0x3F) | 0x80;
66                                 *dest++ = (code & 0x3F) | 0x80;
67                         } else {
68                                 /* Error...  */
69                                 *dest++ = '?';
70                                 /* *src may be valid. Don't eat it.  */
71                                 src--;
72                         }
73
74                         code_high = 0;
75                 } else {
76                         if (code <= 0x007F) {
77                                 *dest++ = code;
78                         } else if (code <= 0x07FF) {
79                                 *dest++ = (code >> 6) | 0xC0;
80                                 *dest++ = (code & 0x3F) | 0x80;
81                         } else if (code >= 0xD800 && code <= 0xDBFF) {
82                                 code_high = code;
83                                 continue;
84                         } else if (code >= 0xDC00 && code <= 0xDFFF) {
85                                 /* Error... */
86                                 *dest++ = '?';
87                         } else if (code < 0x10000) {
88                                 *dest++ = (code >> 12) | 0xE0;
89                                 *dest++ = ((code >> 6) & 0x3F) | 0x80;
90                                 *dest++ = (code & 0x3F) | 0x80;
91                         } else {
92                                 *dest++ = (code >> 18) | 0xF0;
93                                 *dest++ = ((code >> 12) & 0x3F) | 0x80;
94                                 *dest++ = ((code >> 6) & 0x3F) | 0x80;
95                                 *dest++ = (code & 0x3F) | 0x80;
96                         }
97                 }
98         }
99
100         return dest;
101 }