From cc54db6e6df481e94d669f58fcbf58810a02504a Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Sat, 2 Dec 2017 21:21:57 +0100 Subject: [PATCH] - let the 3 relevant text functions handle UTF-8 strings These functions are: DCanvas::DrawTextCommon, V_BreakLines and FFont::StringWidth. This will allow strings from UTF-8 encoded assets to display properly, but also handle the OpenAL device name on international systems, as this will be returned as an UTF-8 string. Due to backwards compatibility needs the decoding function is rather lax to allow both UTF-8 and ISO 8859-1 to pass through correctly - and this also implies that it will allow mixed encodings which may happen if strings from different sources get concatenated. --- src/v_font.cpp | 10 +++---- src/v_text.cpp | 76 ++++++++++++++++++++++++++++++++++++++++++++++++-- src/v_text.h | 2 ++ 3 files changed, 81 insertions(+), 7 deletions(-) diff --git a/src/v_font.cpp b/src/v_font.cpp index b475fb558..9716f2744 100644 --- a/src/v_font.cpp +++ b/src/v_font.cpp @@ -890,9 +890,10 @@ int FFont::StringWidth(const uint8_t *string) const while (*string) { - if (*string == TEXTCOLOR_ESCAPE) + auto chr = GetCharFromString(string); + if (chr == TEXTCOLOR_ESCAPE) { - ++string; + // We do not need to check for UTF-8 in here. if (*string == '[') { while (*string != '\0' && *string != ']') @@ -906,16 +907,15 @@ int FFont::StringWidth(const uint8_t *string) const } continue; } - else if (*string == '\n') + else if (chr == '\n') { if (w > maxw) maxw = w; w = 0; - ++string; } else { - w += GetCharWidth(*string++) + GlobalKerning; + w += GetCharWidth(chr) + GlobalKerning; } } diff --git a/src/v_text.cpp b/src/v_text.cpp index cf8ce8cc7..f4829ec18 100644 --- a/src/v_text.cpp +++ b/src/v_text.cpp @@ -51,6 +51,78 @@ int ListGetInt(VMVa_List &tags); +//========================================================================== +// +// reads one character from the string. +// This can handle both ISO 8859-1 and UTF-8, as well as mixed strings +// between both encodings, which may happen if inconsistent encoding is +// used between different files in a mod. +// +//========================================================================== + +int GetCharFromString(const uint8_t *&string) +{ + int z, y, x; + + z = *string++; + + if (z < 192) + { + return z; + } + else if (z <= 223) + { + y = *string++; + if (y < 128 || y >= 192) + { + // not an UTF-8 sequence so return the first byte unchanged + string--; + } + else + { + z = (z - 192) * 64 + (y - 128); + } + } + else if (z >= 224 && z <= 239) + { + y = *string++; + if (y < 128 || y >= 192) + { + // not an UTF-8 sequence so return the first byte unchanged + string--; + } + else + { + x = *string++; + if (x < 128 || x >= 192) + { + // not an UTF-8 sequence so return the first byte unchanged + string -= 2; + } + else + { + z = (z - 224) * 4096 + (y - 128) * 64 + (x - 128); + } + } + } + else if (z >= 240) + { + y = *string++; + if (y < 128 || y >= 192) + { + // not an UTF-8 sequence so return the first byte unchanged + string--; + } + else + { + // we do not support 4-Byte UTF-8 here + string += 2; + return '?'; + } + } + return z; +} + //========================================================================== // // DrawChar @@ -170,7 +242,7 @@ void DCanvas::DrawTextCommon(FFont *font, int normalcolor, double x, double y, c while ((const char *)ch - string < parms.maxstrlen) { - c = *ch++; + c = GetCharFromString(ch); if (!c) break; @@ -288,7 +360,7 @@ FBrokenLines *V_BreakLines (FFont *font, int maxwidth, const uint8_t *string, bo w = 0; - while ( (c = *string++) ) + while ( (c = GetCharFromString(string)) ) { if (c == TEXTCOLOR_ESCAPE) { diff --git a/src/v_text.h b/src/v_text.h index b76024fa5..cd7d3e6ad 100644 --- a/src/v_text.h +++ b/src/v_text.h @@ -86,4 +86,6 @@ inline FBrokenLines *V_BreakLines (FFont *font, int maxwidth, const char *str, b inline FBrokenLines *V_BreakLines (FFont *font, int maxwidth, const FString &str, bool preservecolor = false, unsigned int *count = nullptr) { return V_BreakLines (font, maxwidth, (const uint8_t *)str.GetChars(), preservecolor, count); } +int GetCharFromString(const uint8_t *&string); + #endif //__V_TEXT_H__