efi: x86: Handle arbitrary Unicode characters

Instead of truncating UTF-16 assuming all characters is ASCII,
properly convert it to UTF-8.

Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
[ Bug and style fixes. ]
Signed-off-by: Roy Franz <roy.franz@linaro.org>
Signed-off-by: Leif Lindholm <leif.lindholm@linaro.org>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
This commit is contained in:
H. Peter Anvin 2013-09-20 09:55:39 -05:00 committed by Matt Fleming
parent 9bb40191e8
commit c625d1c203
2 changed files with 68 additions and 22 deletions

View file

@ -1087,8 +1087,7 @@ struct boot_params *make_boot_params(struct efi_config *c)
hdr->type_of_loader = 0x21;
/* Convert unicode cmdline to ascii */
cmdline_ptr = efi_convert_cmdline_to_ascii(sys_table, image,
&options_size);
cmdline_ptr = efi_convert_cmdline(sys_table, image, &options_size);
if (!cmdline_ptr)
goto fail;
hdr->cmd_line_ptr = (unsigned long)cmdline_ptr;

View file

@ -535,53 +535,100 @@ static efi_status_t efi_relocate_kernel(efi_system_table_t *sys_table_arg,
return status;
}
/*
* Get the number of UTF-8 bytes corresponding to an UTF-16 character.
* This overestimates for surrogates, but that is okay.
*/
static int efi_utf8_bytes(u16 c)
{
return 1 + (c >= 0x80) + (c >= 0x800);
}
/*
* Convert an UTF-16 string, not necessarily null terminated, to UTF-8.
*/
static u8 *efi_utf16_to_utf8(u8 *dst, const u16 *src, int n)
{
unsigned int c;
while (n--) {
c = *src++;
if (n && c >= 0xd800 && c <= 0xdbff &&
*src >= 0xdc00 && *src <= 0xdfff) {
c = 0x10000 + ((c & 0x3ff) << 10) + (*src & 0x3ff);
src++;
n--;
}
if (c >= 0xd800 && c <= 0xdfff)
c = 0xfffd; /* Unmatched surrogate */
if (c < 0x80) {
*dst++ = c;
continue;
}
if (c < 0x800) {
*dst++ = 0xc0 + (c >> 6);
goto t1;
}
if (c < 0x10000) {
*dst++ = 0xe0 + (c >> 12);
goto t2;
}
*dst++ = 0xf0 + (c >> 18);
*dst++ = 0x80 + ((c >> 12) & 0x3f);
t2:
*dst++ = 0x80 + ((c >> 6) & 0x3f);
t1:
*dst++ = 0x80 + (c & 0x3f);
}
return dst;
}
/*
* Convert the unicode UEFI command line to ASCII to pass to kernel.
* Size of memory allocated return in *cmd_line_len.
* Returns NULL on error.
*/
static char *efi_convert_cmdline_to_ascii(efi_system_table_t *sys_table_arg,
efi_loaded_image_t *image,
int *cmd_line_len)
static char *efi_convert_cmdline(efi_system_table_t *sys_table_arg,
efi_loaded_image_t *image,
int *cmd_line_len)
{
u16 *s2;
const u16 *s2;
u8 *s1 = NULL;
unsigned long cmdline_addr = 0;
int load_options_size = image->load_options_size / 2; /* ASCII */
void *options = image->load_options;
int options_size = 0;
int load_options_chars = image->load_options_size / 2; /* UTF-16 */
const u16 *options = image->load_options;
int options_bytes = 0; /* UTF-8 bytes */
int options_chars = 0; /* UTF-16 chars */
efi_status_t status;
int i;
u16 zero = 0;
if (options) {
s2 = options;
while (*s2 && *s2 != '\n' && options_size < load_options_size) {
s2++;
options_size++;
while (*s2 && *s2 != '\n'
&& options_chars < load_options_chars) {
options_bytes += efi_utf8_bytes(*s2++);
options_chars++;
}
}
if (options_size == 0) {
if (!options_chars) {
/* No command line options, so return empty string*/
options_size = 1;
options = &zero;
}
options_size++; /* NUL termination */
options_bytes++; /* NUL termination */
status = efi_low_alloc(sys_table_arg, options_size, 0, &cmdline_addr);
status = efi_low_alloc(sys_table_arg, options_bytes, 0, &cmdline_addr);
if (status != EFI_SUCCESS)
return NULL;
s1 = (u8 *)cmdline_addr;
s2 = (u16 *)options;
for (i = 0; i < options_size - 1; i++)
*s1++ = *s2++;
s2 = (const u16 *)options;
s1 = efi_utf16_to_utf8(s1, s2, options_chars);
*s1 = '\0';
*cmd_line_len = options_size;
*cmd_line_len = options_bytes;
return (char *)cmdline_addr;
}