diff --git a/src/common/filesystem/source/unicode.cpp b/src/common/filesystem/source/unicode.cpp index d102f1fc05..7b5fd2aefb 100644 --- a/src/common/filesystem/source/unicode.cpp +++ b/src/common/filesystem/source/unicode.cpp @@ -121,7 +121,7 @@ void ibm437_to_utf8(const char* in, std::vector& buffer) while (int char1 = (uint8_t)*in++) { - if (char1 >= 0x80) char1 = ibm437map[char1]; + if (char1 >= 0x80) char1 = ibm437map[char1 - 0x80]; utf8_encode(char1, buffer); } buffer.push_back(0); @@ -140,4 +140,22 @@ char *tolower_normalize(const char *str) return (char*)retval; } +//========================================================================== +// +// validates the string for proper UTF-8 +// +//========================================================================== + +bool unicode_validate(const char* str) +{ + while (*str != 0) + { + int cp; + int result = utf8proc_iterate((const uint8_t*)str, -1, &cp); + if (result < 0) return false; + } + return true; +} + + } diff --git a/src/common/filesystem/source/unicode.h b/src/common/filesystem/source/unicode.h index 6df51668d1..8783dbc718 100644 --- a/src/common/filesystem/source/unicode.h +++ b/src/common/filesystem/source/unicode.h @@ -6,7 +6,7 @@ namespace FileSys { void utf16_to_utf8(const unsigned short* in, std::vector& buffer); void ibm437_to_utf8(const char* in, std::vector& buffer); -int unicode_tolower(int c); char *tolower_normalize(const char *str); +bool unicode_validate(const char* str); }