- made the console Unicode-capable.

This also necessitated some character remapping in the console font to move the Windows-1252 extra characters to their proper Unicode code points.
This commit is contained in:
Christoph Oelckers 2019-02-16 13:05:19 +01:00
parent 8018ebfab5
commit 64685705d0
10 changed files with 454 additions and 368 deletions

View file

@ -1271,6 +1271,7 @@ set (PCH_SOURCES
utility/name.cpp
utility/s_playlist.cpp
utility/v_collection.cpp
utility/utf8.cpp
utility/zstrformat.cpp
)

View file

@ -61,6 +61,7 @@
#include "c_consolebuffer.h"
#include "g_levellocals.h"
#include "vm.h"
#include "utf8.h"
#include "gi.h"
@ -167,17 +168,19 @@ struct History
struct FCommandBuffer
{
private:
FString Text; // The actual command line text
unsigned CursorPos;
unsigned StartPos; // First character to display
unsigned CursorPos = 0;
unsigned StartPos = 0; // First character to display
unsigned CursorPosChars = 0;
unsigned StartPosChars = 0;
FString YankBuffer; // Deleted text buffer
bool AppendToYankBuffer; // Append consecutive deletes to buffer
FCommandBuffer()
{
CursorPos = StartPos = 0;
}
public:
bool AppendToYankBuffer = false; // Append consecutive deletes to buffer
FCommandBuffer() = default;
FCommandBuffer(const FCommandBuffer &o)
{
@ -186,6 +189,16 @@ struct FCommandBuffer
StartPos = o.StartPos;
}
FString GetText() const
{
return Text;
}
size_t TextLength() const
{
return Text.Len();
}
void Draw(int x, int y, int scale, bool cursor)
{
if (scale == 1)
@ -197,7 +210,7 @@ struct FCommandBuffer
if (cursor)
{
screen->DrawChar(ConFont, CR_YELLOW,
x + ConFont->GetCharWidth(0x1c) + (CursorPos - StartPos) * ConFont->GetCharWidth(0xb),
x + ConFont->GetCharWidth(0x1c) + (CursorPosChars - StartPosChars) * ConFont->GetCharWidth(0xb),
y, '\xb', TAG_DONE);
}
}
@ -217,7 +230,7 @@ struct FCommandBuffer
if (cursor)
{
screen->DrawChar(ConFont, CR_YELLOW,
x + ConFont->GetCharWidth(0x1c) + (CursorPos - StartPos) * ConFont->GetCharWidth(0xb),
x + ConFont->GetCharWidth(0x1c) + (CursorPosChars - StartPosChars) * ConFont->GetCharWidth(0xb),
y, '\xb',
DTA_VirtualWidth, screen->GetWidth() / scale,
DTA_VirtualHeight, screen->GetHeight() / scale,
@ -226,108 +239,127 @@ struct FCommandBuffer
}
}
unsigned BytesForChars(unsigned chars)
{
unsigned bytes = 0;
while (chars > 0)
{
if ((Text[bytes++] & 0xc0) != 0x80) chars--;
}
return bytes;
}
void MakeStartPosGood()
{
int n = StartPos;
int n = StartPosChars;
unsigned cols = ConCols / active_con_scale();
if (StartPos >= Text.Len())
if (StartPosChars >= Text.CharacterCount())
{ // Start of visible line is beyond end of line
n = CursorPos - cols + 2;
n = CursorPosChars - cols + 2;
}
if ((CursorPos - StartPos) >= cols - 2)
if ((CursorPosChars - StartPosChars) >= cols - 2)
{ // The cursor is beyond the visible part of the line
n = CursorPos - cols + 2;
n = CursorPosChars - cols + 2;
}
if (StartPos > CursorPos)
if (StartPosChars > CursorPosChars)
{ // The cursor is in front of the visible part of the line
n = CursorPos;
n = CursorPosChars;
}
StartPos = MAX(0, n);
}
unsigned WordBoundaryRight()
{
unsigned index = CursorPos;
while (index < Text.Len() && Text[index] == ' ') {
index++;
}
while (index < Text.Len() && Text[index] != ' ') {
index++;
}
return index;
}
unsigned WordBoundaryLeft()
{
int index = CursorPos - 1;
while (index > -1 && Text[index] == ' ') {
index--;
}
while (index > -1 && Text[index] != ' ') {
index--;
}
return (unsigned)index + 1;
StartPosChars = MAX(0, n);
StartPos = BytesForChars(StartPosChars);
}
void CursorStart()
{
CursorPos = 0;
StartPos = 0;
CursorPosChars = 0;
StartPosChars = 0;
}
void CursorEnd()
{
CursorPos = (unsigned)Text.Len();
StartPos = 0;
CursorPosChars = (unsigned)Text.CharacterCount();
StartPosChars = 0;
MakeStartPosGood();
}
private:
void MoveCursorLeft()
{
CursorPosChars--;
do CursorPos--;
while ((Text[CursorPos] & 0xc0) == 0x80); // Step back to the last non-continuation byte.
}
void MoveCursorRight()
{
CursorPosChars++;
do CursorPos++;
while ((Text[CursorPos] & 0xc0) == 0x80); // Step back to the last non-continuation byte.
}
public:
void CursorLeft()
{
if (CursorPos > 0)
if (CursorPosChars > 0)
{
CursorPos--;
MoveCursorLeft();
MakeStartPosGood();
}
}
void CursorRight()
{
if (CursorPos < Text.Len())
if (CursorPosChars < Text.CharacterCount())
{
CursorPos++;
MoveCursorRight();
MakeStartPosGood();
}
}
void CursorWordLeft()
{
CursorPos = WordBoundaryLeft();
if (CursorPosChars > 0)
{
do MoveCursorLeft();
while (CursorPosChars > 0 && Text[CursorPos - 1] != ' ');
MakeStartPosGood();
}
}
void CursorWordRight()
{
CursorPos = WordBoundaryRight();
if (CursorPosChars < Text.CharacterCount())
{
do MoveCursorRight();
while (CursorPosChars < Text.CharacterCount() && Text[CursorPos] != ' ');
MakeStartPosGood();
}
}
void DeleteLeft()
{
if (CursorPos > 0)
{
Text.Remove(CursorPos - 1, 1);
CursorPos--;
auto now = CursorPos;
MoveCursorLeft();
Text.Remove(CursorPos, now - CursorPos);
MakeStartPosGood();
}
}
void DeleteRight()
{
if (CursorPos < Text.Len())
if (CursorPosChars < Text.CharacterCount())
{
Text.Remove(CursorPos, 1);
auto now = CursorPos;
MoveCursorRight();
Text.Remove(now, CursorPos - now);
CursorPos = now;
CursorPosChars--;
MakeStartPosGood();
}
}
@ -336,14 +368,16 @@ struct FCommandBuffer
{
if (CursorPos > 0)
{
unsigned index = WordBoundaryLeft();
auto now = CursorPos;
CursorWordLeft();
if (AppendToYankBuffer) {
YankBuffer = FString(&Text[index], CursorPos - index) + YankBuffer;
YankBuffer = FString(&Text[CursorPos], now - CursorPos) + YankBuffer;
} else {
YankBuffer = FString(&Text[index], CursorPos - index);
YankBuffer = FString(&Text[CursorPos], now - CursorPos);
}
Text.Remove(index, CursorPos - index);
CursorPos = index;
Text.Remove(CursorPos, now - CursorPos);
MakeStartPosGood();
}
}
@ -378,19 +412,24 @@ struct FCommandBuffer
void AddChar(int character)
{
///FIXME: Not Unicode-aware
if (CursorPos == Text.Len())
uint8_t encoded[5];
int size;
if (utf8_encode(character, encoded, &size) == 0)
{
Text += char(character);
encoded[size] = 0;
if (Text.IsEmpty())
{
Text = (char*)encoded;
}
else
{
char foo = char(character);
Text.Insert(CursorPos, &foo, 1);
Text.Insert(CursorPos, (char*)encoded);
}
CursorPos++;
CursorPos += size;
CursorPosChars++;
MakeStartPosGood();
}
}
void AddString(FString clip)
{
@ -401,6 +440,7 @@ struct FCommandBuffer
if (brk >= 0)
{
clip.Truncate(brk);
clip = MakeUTF8(clip.GetChars()); // Make sure that we actually have UTF-8 text.
}
if (Text.IsEmpty())
{
@ -411,16 +451,22 @@ struct FCommandBuffer
Text.Insert(CursorPos, clip);
}
CursorPos += (unsigned)clip.Len();
CursorPosChars += (unsigned)clip.CharacterCount();
MakeStartPosGood();
}
}
void SetString(FString str)
void SetString(const FString &str)
{
Text = str;
CursorPos = (unsigned)Text.Len();
Text = MakeUTF8(str);
CursorEnd();
MakeStartPosGood();
}
void AddYankBuffer()
{
AddString(YankBuffer);
}
};
static FCommandBuffer CmdLine;
@ -798,33 +844,6 @@ void FNotifyBuffer::AddString(int printlevel, FString source)
TopGoal = 0;
}
/* Adds a string to the console and also to the notify buffer */
int utf8_encode(int32_t codepoint, char *buffer, int *size);
static TArray<char> UTF8String;
const char *MakeUTF8(const char *outline, int *numchars = nullptr)
{
UTF8String.Clear();
const uint8_t *in = (const uint8_t*)outline;
if (numchars) *numchars = 0;
while (int chr = GetCharFromString(in))
{
int size = 0;
char encode[4];
if (!utf8_encode(chr, encode, &size))
{
for (int i = 0; i < size; i++)
{
UTF8String.Push(encode[i]);
}
}
if (numchars) *numchars++;
}
UTF8String.Push(0);
return UTF8String.Data();
}
void AddToConsole (int printlevel, const char *text)
{
conbuffer->AddText(printlevel, MakeUTF8(text), Logfile);
@ -844,7 +863,7 @@ int PrintString (int printlevel, const char *outline)
if (printlevel != PRINT_LOG)
{
I_PrintStr(UTF8String.Data());
I_PrintStr(outline);
conbuffer->AddText(printlevel, outline, Logfile);
if (vidactive && screen && SmallFont)
@ -1242,10 +1261,7 @@ void C_DrawConsole ()
{
if (gamestate != GS_STARTUP)
{
// Make a copy of the command line, in case an input event is handled
// while we draw the console and it changes.
FCommandBuffer command(CmdLine);
command.Draw(left, bottomline, textScale, cursoron);
CmdLine.Draw(left, bottomline, textScale, cursoron);
}
if (RowAdjust && ConBottom >= ConFont->GetHeight()*7/2)
{
@ -1527,7 +1543,7 @@ static bool C_HandleKey (event_t *ev, FCommandBuffer &buffer)
break;
case 'D':
if (ev->data3 & GKM_CTRL && buffer.Text.Len() == 0)
if (ev->data3 & GKM_CTRL && buffer.TextLength() == 0)
{ // Control-D pressed on an empty line
if (strlen(con_ctrl_d) == 0)
{
@ -1542,16 +1558,18 @@ static bool C_HandleKey (event_t *ev, FCommandBuffer &buffer)
// Intentional fall-through for command(s) added with Ctrl-D
case '\r':
{
// Execute command line (ENTER)
FString bufferText = buffer.GetText();
buffer.Text.StripLeftRight();
Printf(127, TEXTCOLOR_WHITE "]%s\n", buffer.Text.GetChars());
bufferText.StripLeftRight();
Printf(127, TEXTCOLOR_WHITE "]%s\n", bufferText.GetChars());
if (buffer.Text.Len() == 0)
if (bufferText.Len() == 0)
{
// Command line is empty, so do nothing to the history
}
else if (HistHead && HistHead->String.CompareNoCase(buffer.Text) == 0)
else if (HistHead && HistHead->String.CompareNoCase(bufferText) == 0)
{
// Command line was the same as the previous one,
// so leave the history list alone
@ -1563,7 +1581,7 @@ static bool C_HandleKey (event_t *ev, FCommandBuffer &buffer)
// so add it to the history list.
History *temp = new History;
temp->String = buffer.Text;
temp->String = bufferText;
temp->Older = HistHead;
if (HistHead)
{
@ -1589,16 +1607,12 @@ static bool C_HandleKey (event_t *ev, FCommandBuffer &buffer)
}
}
HistPos = NULL;
{
// Work with a copy of command to avoid side effects caused by
// exception raised during execution, like with 'error' CCMD.
FString copy = buffer.Text;
buffer.SetString("");
AddCommandString(copy);
}
AddCommandString(bufferText);
TabbedLast = false;
TabbedList = false;
break;
}
case '`':
// Check to see if we have ` bound to the console before accepting
@ -1639,9 +1653,9 @@ static bool C_HandleKey (event_t *ev, FCommandBuffer &buffer)
{
if (data1 == 'C')
{ // copy to clipboard
if (buffer.Text.IsNotEmpty())
if (buffer.TextLength() > 0)
{
I_PutInClipboard(buffer.Text);
I_PutInClipboard(buffer.GetText());
}
}
else
@ -1696,7 +1710,7 @@ static bool C_HandleKey (event_t *ev, FCommandBuffer &buffer)
case 'Y':
if (ev->data3 & GKM_CTRL)
{
buffer.AddString(buffer.YankBuffer);
buffer.AddYankBuffer();
TabbedLast = false;
TabbedList = false;
HistPos = NULL;
@ -1939,25 +1953,27 @@ static void C_TabComplete (bool goForward)
unsigned i;
int diffpoint;
auto CmdLineText = CmdLine.GetText();
if (!TabbedLast)
{
bool cancomplete;
// Skip any spaces at beginning of command line
for (i = 0; i < CmdLine.Text.Len(); ++i)
for (i = 0; i < CmdLineText.Len(); ++i)
{
if (CmdLine.Text[i] != ' ')
if (CmdLineText[i] != ' ')
break;
}
if (i == CmdLine.Text.Len())
if (i == CmdLineText.Len())
{ // Line was nothing but spaces
return;
}
TabStart = i;
TabSize = (int)CmdLine.Text.Len() - TabStart;
TabSize = (int)CmdLineText.Len() - TabStart;
if (!FindTabCommand(&CmdLine.Text[TabStart], &TabPos, TabSize))
if (!FindTabCommand(&CmdLineText[TabStart], &TabPos, TabSize))
return; // No initial matches
// Show a list of possible completions, if more than one.
@ -1980,7 +1996,7 @@ static void C_TabComplete (bool goForward)
{ // Find the last matching tab, then go one past it.
while (++TabPos < (int)TabCommands.Size())
{
if (FindDiffPoint(TabCommands[TabPos].TabName, &CmdLine.Text[TabStart]) < TabSize)
if (FindDiffPoint(TabCommands[TabPos].TabName, &CmdLineText[TabStart]) < TabSize)
{
break;
}
@ -1997,25 +2013,25 @@ static void C_TabComplete (bool goForward)
(!goForward && --TabPos < 0))
{
TabbedLast = false;
CmdLine.Text.Truncate(TabSize);
CmdLineText.Truncate(TabSize);
}
else
{
diffpoint = FindDiffPoint(TabCommands[TabPos].TabName, &CmdLine.Text[TabStart]);
diffpoint = FindDiffPoint(TabCommands[TabPos].TabName, &CmdLineText[TabStart]);
if (diffpoint < TabSize)
{
// No more matches
TabbedLast = false;
CmdLine.Text.Truncate(TabSize - TabStart);
CmdLineText.Truncate(TabSize - TabStart);
}
else
{
CmdLine.Text.Truncate(TabStart);
CmdLine.Text << TabCommands[TabPos].TabName << ' ';
CmdLineText.Truncate(TabStart);
CmdLineText << TabCommands[TabPos].TabName << ' ';
}
}
CmdLine.CursorPos = (unsigned)CmdLine.Text.Len();
CmdLine.SetString(CmdLineText);
CmdLine.MakeStartPosGood();
}
@ -2028,9 +2044,10 @@ static bool C_TabCompleteList ()
nummatches = 0;
maxwidth = 0;
auto CmdLineText = CmdLine.GetText();
for (i = TabPos; i < (int)TabCommands.Size(); ++i)
{
if (FindDiffPoint (TabCommands[i].TabName, &CmdLine.Text[TabStart]) < TabSize)
if (FindDiffPoint (TabCommands[i].TabName, &CmdLineText[TabStart]) < TabSize)
{
break;
}
@ -2055,7 +2072,7 @@ static bool C_TabCompleteList ()
{
size_t x = 0;
maxwidth += 3;
Printf (TEXTCOLOR_BLUE "Completions for %s:\n", CmdLine.Text.GetChars());
Printf (TEXTCOLOR_BLUE "Completions for %s:\n", CmdLineText.GetChars());
for (i = TabPos; nummatches > 0; ++i, --nummatches)
{
// [Dusk] Print console commands blue, CVars green, aliases red.
@ -2087,9 +2104,9 @@ static bool C_TabCompleteList ()
if (TabSize != commonsize)
{
TabSize = commonsize;
CmdLine.Text.Truncate(TabStart);
CmdLine.Text.AppendCStrPart(TabCommands[TabPos].TabName.GetChars(), commonsize);
CmdLine.CursorPos = (unsigned)CmdLine.Text.Len();
CmdLineText.Truncate(TabStart);
CmdLineText.AppendCStrPart(TabCommands[TabPos].TabName.GetChars(), commonsize);
CmdLine.SetString(CmdLineText);
}
return false;
}

View file

@ -270,8 +270,7 @@ void FConsoleBuffer::FormatText(FFont *formatfont, int displaywidth)
unsigned brokensize = m_BrokenConsoleText.Size();
if (brokensize == mConsoleText.Size())
{
// The last line got text appended. We have to wait until here to format it because
// it is possible that during display new text will be added from the NetUpdate calls in the software version of DrawTextureV.
// The last line got text appended.
if (mLastLineNeedsUpdate)
{
brokensize--;

View file

@ -46,6 +46,7 @@
#include "events.h"
#include "g_game.h"
#include "g_levellocals.h"
#include "utf8.h"
static void I_CheckGUICapture ();
@ -471,10 +472,9 @@ void MessagePump (const SDL_Event &sev)
case SDL_TEXTINPUT:
if (GUICapture)
{
int utf8_decode(const char *src, int *size);
int size;
int unichar = utf8_decode(sev.text.text, &size);
int unichar = utf8_decode((const uint8_t*)sev.text.text, &size);
if (size != 4)
{
event.type = EV_GUI_Event;

View file

@ -49,10 +49,10 @@
#include "doomstat.h"
#include "g_levellocals.h"
#include "v_video.h"
#include "utf8.h"
extern FRandom pr_exrandom;
FMemArena FxAlloc(65536);
int utf8_decode(const char *src, int *size);
struct FLOP
{
@ -318,19 +318,13 @@ static FxExpression *StringConstToChar(FxExpression *basex)
// This serves as workaround for not being able to use single quoted literals because those are taken for names.
ExpVal constval = static_cast<FxConstant *>(basex)->GetValue();
FString str = constval.GetString();
if (str.Len() == 1)
int position = 0;
int chr = str.GetNextCharacter(position);
// Only succeed if the full string is consumed, i.e. it contains only one code point.
if (position == str.Len())
{
return new FxConstant(str[0], basex->ScriptPosition);
}
else if (str.Len() > 1)
{
// If the string is UTF-8, allow a single character UTF-8 sequence.
int size;
int c = utf8_decode(str.GetChars(), &size);
if (c >= 0 && size_t(size) == str.Len())
{
return new FxConstant(c, basex->ScriptPosition);
}
return new FxConstant(chr, basex->ScriptPosition);
}
return nullptr;
}

View file

@ -59,114 +59,31 @@
#include "v_text.h"
#include "cmdlib.h"
#include "g_levellocals.h"
#include "utf8.h"
char nulspace[1024 * 1024 * 4];
bool save_full = false; // for testing. Should be removed afterward.
int utf8_encode(int32_t codepoint, char *buffer, int *size)
{
if (codepoint < 0)
return -1;
else if (codepoint < 0x80)
{
buffer[0] = (char)codepoint;
*size = 1;
}
else if (codepoint < 0x800)
{
buffer[0] = 0xC0 + ((codepoint & 0x7C0) >> 6);
buffer[1] = 0x80 + ((codepoint & 0x03F));
*size = 2;
}
else if (codepoint < 0x10000)
{
buffer[0] = 0xE0 + ((codepoint & 0xF000) >> 12);
buffer[1] = 0x80 + ((codepoint & 0x0FC0) >> 6);
buffer[2] = 0x80 + ((codepoint & 0x003F));
*size = 3;
}
else if (codepoint <= 0x10FFFF)
{
buffer[0] = 0xF0 + ((codepoint & 0x1C0000) >> 18);
buffer[1] = 0x80 + ((codepoint & 0x03F000) >> 12);
buffer[2] = 0x80 + ((codepoint & 0x000FC0) >> 6);
buffer[3] = 0x80 + ((codepoint & 0x00003F));
*size = 4;
}
else
return -1;
return 0;
}
int utf8_decode(const char *src, int *size)
{
int c = src[0] & 255;
int r;
*size = 1;
if ((c & 0x80) == 0)
{
return c;
}
int c1 = src[1] & 255;
if ((c & 0xE0) == 0xC0)
{
r = ((c & 0x1F) << 6) | c1;
if (r >= 128)
{
*size = 2;
return r;
}
return -1;
}
int c2 = src[2] & 255;
if ((c & 0xF0) == 0xE0)
{
r = ((c & 0x0F) << 12) | (c1 << 6) | c2;
if (r >= 2048 && (r < 55296 || r > 57343))
{
*size = 3;
return r;
}
return -1;
}
int c3 = src[3] & 255;
if ((c & 0xF8) == 0xF0)
{
r = ((c & 0x07) << 18) | (c1 << 12) | (c2 << 6) | c3;
if (r >= 65536 && r <= 1114111)
{
*size = 4;
return r;
}
}
return -1;
}
//==========================================================================
//
// This will double-encode already existing UTF-8 content.
// The reason for this behavior is to preserve any original data coming through here, no matter what it is.
// If these are script-based strings, exact preservation in the serializer is very important.
//
//==========================================================================
static TArray<char> out;
static const char *StringToUnicode(const char *cc, int size = -1)
{
int ch;
const char *c = cc;
const uint8_t *c = (const uint8_t*)cc;
int count = 0;
int count1 = 0;
out.Clear();
while ((ch = (*c++) & 255))
{
count1++;
if (ch >= 128)
{
if (ch < 0x800) count += 2;
else count += 3;
// The source cannot contain 4-byte chars.
}
if (ch >= 128) count += 2;
else count++;
if (count1 == size && size > 0) break;
}
@ -174,11 +91,11 @@ static const char *StringToUnicode(const char *cc, int size = -1)
// we need to convert
out.Resize(count + 1);
out.Last() = 0;
c = cc;
c = (const uint8_t*)cc;
int i = 0;
while ((ch = (*c++) & 255))
while ((ch = (*c++)))
{
utf8_encode(ch, &out[i], &count1);
utf8_encode(ch, (uint8_t*)&out[i], &count1);
i += count1;
}
return &out[0];
@ -191,8 +108,8 @@ static const char *UnicodeToString(const char *cc)
while (*cc != 0)
{
int size;
int c = utf8_decode(cc, &size);
if (c < 0 || c > 255) c = '?';
int c = utf8_decode((const uint8_t*)cc, &size);
if (c < 0 || c > 255) c = '?'; // This should never happen because all content was encoded with StringToUnicode which only produces code points 0-255.
out[ndx++] = c;
cc += size;
}

249
src/utility/utf8.cpp Normal file
View file

@ -0,0 +1,249 @@
/*
** utf8.cpp
** UTF-8 utilities
**
**---------------------------------------------------------------------------
** Copyright 2019 Christoph Oelckers
** All rights reserved.
**
** Redistribution and use in source and binary forms, with or without
** modification, are permitted provided that the following conditions
** are met:
**
** 1. Redistributions of source code must retain the above copyright
** notice, this list of conditions and the following disclaimer.
** 2. Redistributions in binary form must reproduce the above copyright
** notice, this list of conditions and the following disclaimer in the
** documentation and/or other materials provided with the distribution.
** 3. The name of the author may not be used to endorse or promote products
** derived from this software without specific prior written permission.
**
** THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
** IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
** OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
** IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
** INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
** NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
** DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
** THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
** THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**---------------------------------------------------------------------------
**
*/
#include <stdint.h>
#include "tarray.h"
//==========================================================================
//
//
//
//==========================================================================
int utf8_encode(int32_t codepoint, uint8_t *buffer, int *size)
{
if (codepoint < 0)
return -1;
else if (codepoint < 0x80)
{
buffer[0] = (char)codepoint;
*size = 1;
}
else if (codepoint < 0x800)
{
buffer[0] = 0xC0 + ((codepoint & 0x7C0) >> 6);
buffer[1] = 0x80 + ((codepoint & 0x03F));
*size = 2;
}
else if (codepoint < 0x10000)
{
buffer[0] = 0xE0 + ((codepoint & 0xF000) >> 12);
buffer[1] = 0x80 + ((codepoint & 0x0FC0) >> 6);
buffer[2] = 0x80 + ((codepoint & 0x003F));
*size = 3;
}
else if (codepoint <= 0x10FFFF)
{
buffer[0] = 0xF0 + ((codepoint & 0x1C0000) >> 18);
buffer[1] = 0x80 + ((codepoint & 0x03F000) >> 12);
buffer[2] = 0x80 + ((codepoint & 0x000FC0) >> 6);
buffer[3] = 0x80 + ((codepoint & 0x00003F));
*size = 4;
}
else
return -1;
return 0;
}
//==========================================================================
//
//
//
//==========================================================================
int utf8_decode(const uint8_t *src, int *size)
{
int c = src[0];
int r;
*size = 1;
if ((c & 0x80) == 0)
{
return c;
}
int c1 = src[1];
if ((c & 0xE0) == 0xC0)
{
r = ((c & 0x1F) << 6) | c1;
if (r >= 128)
{
*size = 2;
return r;
}
return -1;
}
int c2 = src[2];
if ((c & 0xF0) == 0xE0)
{
r = ((c & 0x0F) << 12) | (c1 << 6) | c2;
if (r >= 2048 && (r < 55296 || r > 57343))
{
*size = 3;
return r;
}
return -1;
}
int c3 = src[3];
if ((c & 0xF8) == 0xF0)
{
r = ((c & 0x07) << 18) | (c1 << 12) | (c2 << 6) | c3;
if (r >= 65536 && r <= 1114111)
{
*size = 4;
return r;
}
}
return -1;
}
//==========================================================================
//
// Unicode mapping for the 0x80-0x9f range of the Windows 1252 code page
//
//==========================================================================
uint16_t win1252map[] = {
0x20AC,
0x81 ,
0x201A,
0x0192,
0x201E,
0x2026,
0x2020,
0x2021,
0x02C6,
0x2030,
0x0160,
0x2039,
0x0152,
0x8d ,
0x017D,
0x8f ,
0x90 ,
0x2018,
0x2019,
0x201C,
0x201D,
0x2022,
0x2013,
0x2014,
0x02DC,
0x2122,
0x0161,
0x203A,
0x0153,
0x9d ,
0x017E,
0x0178,
};
//==========================================================================
//
// reads one character from the string.
// This can handle both ISO 8859-1/Windows-1252 and UTF-8, as well as mixed strings
// between both encodings, which may happen if inconsistent encoding is
// used between different files in a mod.
//
//==========================================================================
int GetCharFromString(const uint8_t *&string)
{
int z;
z = *string;
if (z < 192)
{
string++;
// Handle Windows 1252 characters
if (z >= 128 && z < 160)
{
return win1252map[z - 128];
}
return z;
}
else
{
int size = 0;
auto chr = utf8_decode(string, &size);
if (chr >= 0)
{
string += size;
return chr;
}
string++;
return z;
}
}
//==========================================================================
//
// convert a potentially mixed-encoded string to pure UTF-8
// this returns a pointer to a static buffer,
// assuming that its caller will immediately process the result.
//
//==========================================================================
static TArray<char> UTF8String;
const char *MakeUTF8(const char *outline, int *numchars = nullptr)
{
UTF8String.Clear();
const uint8_t *in = (const uint8_t*)outline;
if (numchars) *numchars = 0;
while (int chr = GetCharFromString(in))
{
int size = 0;
uint8_t encode[4];
if (!utf8_encode(chr, encode, &size))
{
for (int i = 0; i < size; i++)
{
UTF8String.Push(encode[i]);
}
}
if (numchars) *numchars++;
}
UTF8String.Push(0);
return UTF8String.Data();
}

8
src/utility/utf8.h Normal file
View file

@ -0,0 +1,8 @@
#pragma once
int utf8_encode(int32_t codepoint, uint8_t *buffer, int *size);
int utf8_decode(const uint8_t *src, int *size);
int GetCharFromString(const uint8_t *&string);
const char *MakeUTF8(const char *outline, int *numchars = nullptr); // returns a pointer to a static buffer, assuming that its caller will immediately process the result.
extern uint16_t win1252map[];

View file

@ -91,6 +91,7 @@ The FON2 header is followed by variable length data:
#include "v_text.h"
#include "vm.h"
#include "image.h"
#include "utf8.h"
#include "textures/formats/fontchars.h"
// MACROS ------------------------------------------------------------------
@ -1844,7 +1845,10 @@ void FSingleLumpFont::LoadFON1 (int lump, const uint8_t *data)
{
int w, h;
Chars.Resize(256);
// The default console font is for Windows-1252 and fills the 0x80-0x9f range with valid glyphs.
// Since now all internal text is processed as Unicode, these have to be remapped to their proper places.
// The highest valid character in this range is 0x2122, so we need 0x2123 entries in our character table.
Chars.Resize(0x2123);
w = data[4] + data[5]*256;
h = data[6] + data[7]*256;
@ -1853,10 +1857,20 @@ void FSingleLumpFont::LoadFON1 (int lump, const uint8_t *data)
FontHeight = h;
SpaceWidth = w;
FirstChar = 0;
LastChar = 255;
LastChar = 255; // This is to allow LoadTranslations to function. The way this is all set up really needs to be changed.
GlobalKerning = 0;
translateUntranslated = true;
LoadTranslations();
LastChar = 0x2122;
// Move the Windows-1252 characters to their proper place.
for (int i = 0x80; i < 0xa0; i++)
{
if (win1252map[i-0x80] != i && Chars[i].TranslatedPic != nullptr && Chars[win1252map[i - 0x80]].TranslatedPic == nullptr)
{
std::swap(Chars[i], Chars[win1252map[i - 0x80]]);
}
}
}
//==========================================================================

View file

@ -49,119 +49,6 @@
int ListGetInt(VMVa_List &tags);
//==========================================================================
//
// reads one character from the string.
// This can handle both ISO 8859-1 and UTF-8, as well as mixed strings
// between both encodings, which may happen if inconsistent encoding is
// used between different files in a mod.
// The long term goal should be to convert all text to UTF-8 on loading and
// make this require pure UTF-8 input.
//
//==========================================================================
int GetCharFromString(const uint8_t *&string)
{
int z, y, x;
z = *string++;
if (z < 192)
{
// Handle Windows 1252 characters
if (z >= 128 && z < 160)
{
static const uint16_t map0x80_0x9f[] = {
0x20AC,
0x81 ,
0x201A,
0x0192,
0x201E,
0x2026,
0x2020,
0x2021,
0x02C6,
0x2030,
0x0160,
0x2039,
0x0152,
0x8d ,
0x017D,
0x8f ,
0x90 ,
0x2018,
0x2019,
0x201C,
0x201D,
0x2022,
0x2013,
0x2014,
0x02DC,
0x2122,
0x0161,
0x203A,
0x0153,
0x9d ,
0x017E,
0x0178,
};
return map0x80_0x9f[z - 128];
}
return z;
}
else if (z <= 223)
{
y = *string++;
if (y < 128 || y >= 192)
{
// not an UTF-8 sequence so return the first byte unchanged
string--;
}
else
{
z = (z - 192) * 64 + (y - 128);
}
}
else if (z >= 224 && z <= 239)
{
y = *string++;
if (y < 128 || y >= 192)
{
// not an UTF-8 sequence so return the first byte unchanged
string--;
}
else
{
x = *string++;
if (x < 128 || x >= 192)
{
// not an UTF-8 sequence so return the first byte unchanged
string -= 2;
}
else
{
z = (z - 224) * 4096 + (y - 128) * 64 + (x - 128);
}
}
}
else if (z >= 240)
{
y = *string++;
if (y < 128 || y >= 192)
{
// not an UTF-8 sequence so return the first byte unchanged
string--;
}
else
{
// we do not support 4-Byte UTF-8 here
string += 2;
return '?';
}
}
return z;
}
//==========================================================================
//
// DrawChar