diff --git a/core/string/ustring.cpp b/core/string/ustring.cpp index 7541598e06a..45d8497af81 100644 --- a/core/string/ustring.cpp +++ b/core/string/ustring.cpp @@ -155,7 +155,13 @@ void String::append_latin1(const Span &p_cstr) { for (; src < end; ++src, ++dst) { // If char is int8_t, a set sign bit will be reinterpreted as 256 - val implicitly. - *dst = static_cast(*src); + if (unlikely(*src == '\0')) { + // NUL in string is allowed by the unicode standard, but unsupported in our implementation right now. + print_unicode_error("Unexpected NUL character", true); + *dst = _replacement_char; + } else { + *dst = static_cast(*src); + } } *dst = 0; } @@ -174,17 +180,19 @@ void String::append_utf32(const Span &p_cstr) { // Copy the string, and check for UTF-32 problems. for (; src < end; ++src, ++dst) { const char32_t chr = *src; - if ((chr & 0xfffff800) == 0xd800) { + if (unlikely(chr == U'\0')) { + // NUL in string is allowed by the unicode standard, but unsupported in our implementation right now. + print_unicode_error("Unexpected NUL character", true); + *dst = _replacement_char; + } else if (unlikely((chr & 0xfffff800) == 0xd800)) { print_unicode_error(vformat("Unpaired surrogate (%x)", (uint32_t)chr), true); *dst = _replacement_char; - continue; - } - if (chr > 0x10ffff) { + } else if (unlikely(chr > 0x10ffff)) { print_unicode_error(vformat("Invalid unicode codepoint (%x)", (uint32_t)chr), true); *dst = _replacement_char; - continue; + } else { + *dst = chr; } - *dst = chr; } *dst = 0; } @@ -1737,7 +1745,11 @@ Error String::append_ascii(const Span &p_range) { for (; src < end; ++src, ++dst) { // If char is int8_t, a set sign bit will be reinterpreted as 256 - val implicitly. const uint8_t chr = *src; - if (chr > 127) { + if (unlikely(chr == '\0')) { + // NUL in string is allowed by the unicode standard, but unsupported in our implementation right now. + print_unicode_error("Unexpected NUL character", true); + *dst = _replacement_char; + } else if (unlikely(chr > 127)) { print_unicode_error(vformat("Invalid ASCII codepoint (%x)", (uint32_t)chr), true); decode_failed = true; *dst = _replacement_char;