efi_loader: new function utf8_to_utf16
authorHeinrich Schuchardt <xypron.glpk@gmx.de>
Wed, 18 Oct 2017 16:13:06 +0000 (18:13 +0200)
committerAlexander Graf <agraf@suse.de>
Fri, 1 Dec 2017 12:22:55 +0000 (13:22 +0100)
Provide a conversion function from utf8 to utf16.

Add missing #include <linux/types.h> in include/charset.h.
Remove superfluous #include <common.h> in lib/charset.c.

Signed-off-by: Heinrich Schuchardt <xypron.glpk@gmx.de>
Signed-off-by: Alexander Graf <agraf@suse.de>
include/charset.h
lib/charset.c

index 37a32784998aaf633a786ba31b9d87334bdb56fd..2662c2f7c9a68cf6647f6799a63ca9ba17fb28b7 100644 (file)
@@ -9,6 +9,8 @@
 #ifndef __CHARSET_H_
 #define __CHARSET_H_
 
+#include <linux/types.h>
+
 #define MAX_UTF8_PER_UTF16 3
 
 /**
@@ -62,4 +64,17 @@ uint16_t *utf16_strdup(const uint16_t *s);
  */
 uint8_t *utf16_to_utf8(uint8_t *dest, const uint16_t *src, size_t size);
 
+/**
+ * utf8_to_utf16() - Convert an utf8 string to utf16
+ *
+ * Converts up to 'size' characters of the utf16 string 'src' to utf8
+ * written to the 'dest' buffer. Stops at 0x00.
+ *
+ * @dest   the destination buffer to write the utf8 characters
+ * @src    the source utf16 string
+ * @size   maximum number of utf16 characters to convert
+ * @return the pointer to the first unwritten byte in 'dest'
+ */
+uint16_t *utf8_to_utf16(uint16_t *dest, const uint8_t *src, size_t size);
+
 #endif /* __CHARSET_H_ */
index ff76e88c776b1b39a715ae6081a0243bdd83119d..8cd17ea1cb78363b17f049d05e4fb0f5153da37c 100644 (file)
@@ -6,7 +6,6 @@
  *  SPDX-License-Identifier:     GPL-2.0+
  */
 
-#include <common.h>
 #include <charset.h>
 #include <malloc.h>
 
@@ -99,3 +98,59 @@ uint8_t *utf16_to_utf8(uint8_t *dest, const uint16_t *src, size_t size)
 
        return dest;
 }
+
+uint16_t *utf8_to_utf16(uint16_t *dest, const uint8_t *src, size_t size)
+{
+       while (size--) {
+               int extension_bytes;
+               uint32_t code;
+
+               extension_bytes = 0;
+               if (*src <= 0x7f) {
+                       code = *src++;
+                       /* Exit on zero byte */
+                       if (!code)
+                               size = 0;
+               } else if (*src <= 0xbf) {
+                       /* Illegal code */
+                       code = '?';
+               } else if (*src <= 0xdf) {
+                       code = *src++ & 0x1f;
+                       extension_bytes = 1;
+               } else if (*src <= 0xef) {
+                       code = *src++ & 0x0f;
+                       extension_bytes = 2;
+               } else if (*src <= 0xf7) {
+                       code = *src++ & 0x07;
+                       extension_bytes = 3;
+               } else {
+                       /* Illegal code */
+                       code = '?';
+               }
+
+               for (; extension_bytes && size; --size, --extension_bytes) {
+                       if ((*src & 0xc0) == 0x80) {
+                               code <<= 6;
+                               code |= *src++ & 0x3f;
+                       } else {
+                               /* Illegal code */
+                               code = '?';
+                               ++src;
+                               --size;
+                               break;
+                       }
+               }
+
+               if (code < 0x10000) {
+                       *dest++ = code;
+               } else {
+                       /*
+                        * Simplified expression for
+                        * (((code - 0x10000) >> 10) & 0x3ff) | 0xd800
+                        */
+                       *dest++ = (code >> 10) + 0xd7c0;
+                       *dest++ = (code & 0x3ff) | 0xdc00;
+               }
+       }
+       return dest;
+}