- let the 3 relevant text functions handle UTF-8 strings

These functions are: DCanvas::DrawTextCommon, V_BreakLines and FFont::StringWidth.
This will allow strings from UTF-8 encoded assets to display properly, but also handle the OpenAL device name on international systems, as this will be returned as an UTF-8 string.

Due to backwards compatibility needs the decoding function is rather lax to allow both UTF-8 and ISO 8859-1 to pass through correctly - and this also implies that it will allow mixed encodings which may happen if strings from different sources get concatenated.
This commit is contained in:
Christoph Oelckers 2017-12-02 21:21:57 +01:00
parent 7cbcbe66c0
commit cc54db6e6d
3 changed files with 81 additions and 7 deletions

View file

@ -890,9 +890,10 @@ int FFont::StringWidth(const uint8_t *string) const
while (*string)
{
if (*string == TEXTCOLOR_ESCAPE)
auto chr = GetCharFromString(string);
if (chr == TEXTCOLOR_ESCAPE)
{
++string;
// We do not need to check for UTF-8 in here.
if (*string == '[')
{
while (*string != '\0' && *string != ']')
@ -906,16 +907,15 @@ int FFont::StringWidth(const uint8_t *string) const
}
continue;
}
else if (*string == '\n')
else if (chr == '\n')
{
if (w > maxw)
maxw = w;
w = 0;
++string;
}
else
{
w += GetCharWidth(*string++) + GlobalKerning;
w += GetCharWidth(chr) + GlobalKerning;
}
}

View file

@ -51,6 +51,78 @@
int ListGetInt(VMVa_List &tags);
//==========================================================================
//
// reads one character from the string.
// This can handle both ISO 8859-1 and UTF-8, as well as mixed strings
// between both encodings, which may happen if inconsistent encoding is
// used between different files in a mod.
//
//==========================================================================
int GetCharFromString(const uint8_t *&string)
{
int z, y, x;
z = *string++;
if (z < 192)
{
return z;
}
else if (z <= 223)
{
y = *string++;
if (y < 128 || y >= 192)
{
// not an UTF-8 sequence so return the first byte unchanged
string--;
}
else
{
z = (z - 192) * 64 + (y - 128);
}
}
else if (z >= 224 && z <= 239)
{
y = *string++;
if (y < 128 || y >= 192)
{
// not an UTF-8 sequence so return the first byte unchanged
string--;
}
else
{
x = *string++;
if (x < 128 || x >= 192)
{
// not an UTF-8 sequence so return the first byte unchanged
string -= 2;
}
else
{
z = (z - 224) * 4096 + (y - 128) * 64 + (x - 128);
}
}
}
else if (z >= 240)
{
y = *string++;
if (y < 128 || y >= 192)
{
// not an UTF-8 sequence so return the first byte unchanged
string--;
}
else
{
// we do not support 4-Byte UTF-8 here
string += 2;
return '?';
}
}
return z;
}
//==========================================================================
//
// DrawChar
@ -170,7 +242,7 @@ void DCanvas::DrawTextCommon(FFont *font, int normalcolor, double x, double y, c
while ((const char *)ch - string < parms.maxstrlen)
{
c = *ch++;
c = GetCharFromString(ch);
if (!c)
break;
@ -288,7 +360,7 @@ FBrokenLines *V_BreakLines (FFont *font, int maxwidth, const uint8_t *string, bo
w = 0;
while ( (c = *string++) )
while ( (c = GetCharFromString(string)) )
{
if (c == TEXTCOLOR_ESCAPE)
{

View file

@ -86,4 +86,6 @@ inline FBrokenLines *V_BreakLines (FFont *font, int maxwidth, const char *str, b
inline FBrokenLines *V_BreakLines (FFont *font, int maxwidth, const FString &str, bool preservecolor = false, unsigned int *count = nullptr)
{ return V_BreakLines (font, maxwidth, (const uint8_t *)str.GetChars(), preservecolor, count); }
int GetCharFromString(const uint8_t *&string);
#endif //__V_TEXT_H__