summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHeinrich Schuchardt <xypron.glpk@gmx.de>2017-10-18 19:13:06 +0300
committerAlexander Graf <agraf@suse.de>2017-12-01 15:22:55 +0300
commitf58c5ecb87e0ab170dfa92cd0c1052dd18fffc2c (patch)
treefd4eb80cdc17f2874685215a79bb4b9ef93d9f93
parent30a0045a54e96e799b0694451a977dba572675fb (diff)
downloadu-boot-f58c5ecb87e0ab170dfa92cd0c1052dd18fffc2c.tar.xz
efi_loader: new function utf8_to_utf16
Provide a conversion function from utf8 to utf16. Add missing #include <linux/types.h> in include/charset.h. Remove superfluous #include <common.h> in lib/charset.c. Signed-off-by: Heinrich Schuchardt <xypron.glpk@gmx.de> Signed-off-by: Alexander Graf <agraf@suse.de>
-rw-r--r--include/charset.h15
-rw-r--r--lib/charset.c57
2 files changed, 71 insertions, 1 deletions
diff --git a/include/charset.h b/include/charset.h
index 37a3278499..2662c2f7c9 100644
--- a/include/charset.h
+++ b/include/charset.h
@@ -9,6 +9,8 @@
#ifndef __CHARSET_H_
#define __CHARSET_H_
+#include <linux/types.h>
+
#define MAX_UTF8_PER_UTF16 3
/**
@@ -62,4 +64,17 @@ uint16_t *utf16_strdup(const uint16_t *s);
*/
uint8_t *utf16_to_utf8(uint8_t *dest, const uint16_t *src, size_t size);
+/**
+ * utf8_to_utf16() - Convert an utf8 string to utf16
+ *
+ * Converts up to 'size' characters of the utf16 string 'src' to utf8
+ * written to the 'dest' buffer. Stops at 0x00.
+ *
+ * @dest the destination buffer to write the utf8 characters
+ * @src the source utf16 string
+ * @size maximum number of utf16 characters to convert
+ * @return the pointer to the first unwritten byte in 'dest'
+ */
+uint16_t *utf8_to_utf16(uint16_t *dest, const uint8_t *src, size_t size);
+
#endif /* __CHARSET_H_ */
diff --git a/lib/charset.c b/lib/charset.c
index ff76e88c77..8cd17ea1cb 100644
--- a/lib/charset.c
+++ b/lib/charset.c
@@ -6,7 +6,6 @@
* SPDX-License-Identifier: GPL-2.0+
*/
-#include <common.h>
#include <charset.h>
#include <malloc.h>
@@ -99,3 +98,59 @@ uint8_t *utf16_to_utf8(uint8_t *dest, const uint16_t *src, size_t size)
return dest;
}
+
+uint16_t *utf8_to_utf16(uint16_t *dest, const uint8_t *src, size_t size)
+{
+ while (size--) {
+ int extension_bytes;
+ uint32_t code;
+
+ extension_bytes = 0;
+ if (*src <= 0x7f) {
+ code = *src++;
+ /* Exit on zero byte */
+ if (!code)
+ size = 0;
+ } else if (*src <= 0xbf) {
+ /* Illegal code */
+ code = '?';
+ } else if (*src <= 0xdf) {
+ code = *src++ & 0x1f;
+ extension_bytes = 1;
+ } else if (*src <= 0xef) {
+ code = *src++ & 0x0f;
+ extension_bytes = 2;
+ } else if (*src <= 0xf7) {
+ code = *src++ & 0x07;
+ extension_bytes = 3;
+ } else {
+ /* Illegal code */
+ code = '?';
+ }
+
+ for (; extension_bytes && size; --size, --extension_bytes) {
+ if ((*src & 0xc0) == 0x80) {
+ code <<= 6;
+ code |= *src++ & 0x3f;
+ } else {
+ /* Illegal code */
+ code = '?';
+ ++src;
+ --size;
+ break;
+ }
+ }
+
+ if (code < 0x10000) {
+ *dest++ = code;
+ } else {
+ /*
+ * Simplified expression for
+ * (((code - 0x10000) >> 10) & 0x3ff) | 0xd800
+ */
+ *dest++ = (code >> 10) + 0xd7c0;
+ *dest++ = (code & 0x3ff) | 0xdc00;
+ }
+ }
+ return dest;
+}