- fixed: RapidJSON in ASCII mode cannot handle extended 8 bit character sets and will produce broken data if the input contains some. This means we need to perform the conversion to UTF-8 on ZDoom's side and run RapidJSON in UTF-8 mode.

Daedalus triggers this with a 0x85 character which in Windows CP 1252 is the ellipsis (...) The converter will assume ISO-8859-1, though, but cannot do anything with these characters because they map to the font being used here.
This commit is contained in:
Christoph Oelckers 2016-11-01 13:33:18 +01:00 committed by Rachael Alexanderson
parent 33637dda23
commit bfb8886e93
2 changed files with 160 additions and 17 deletions

View file

@ -791,6 +791,10 @@ void ACSStringPool::WriteStrings(FSerializer &file, const char *key) const
{ {
if (file.BeginObject(nullptr)) if (file.BeginObject(nullptr))
{ {
if (i == 430)
{
int a = 0;
}
file("index", i) file("index", i)
("string", entry->Str) ("string", entry->Str)
("lockcount", entry->LockCount) ("lockcount", entry->LockCount)

View file

@ -67,6 +67,143 @@
char nulspace[1024 * 1024 * 4]; char nulspace[1024 * 1024 * 4];
bool save_full = false; // for testing. Should be removed afterward. bool save_full = false; // for testing. Should be removed afterward.
int utf8_encode(int32_t codepoint, char *buffer, int *size)
{
if (codepoint < 0)
return -1;
else if (codepoint < 0x80)
{
buffer[0] = (char)codepoint;
*size = 1;
}
else if (codepoint < 0x800)
{
buffer[0] = 0xC0 + ((codepoint & 0x7C0) >> 6);
buffer[1] = 0x80 + ((codepoint & 0x03F));
*size = 2;
}
else if (codepoint < 0x10000)
{
buffer[0] = 0xE0 + ((codepoint & 0xF000) >> 12);
buffer[1] = 0x80 + ((codepoint & 0x0FC0) >> 6);
buffer[2] = 0x80 + ((codepoint & 0x003F));
*size = 3;
}
else if (codepoint <= 0x10FFFF)
{
buffer[0] = 0xF0 + ((codepoint & 0x1C0000) >> 18);
buffer[1] = 0x80 + ((codepoint & 0x03F000) >> 12);
buffer[2] = 0x80 + ((codepoint & 0x000FC0) >> 6);
buffer[3] = 0x80 + ((codepoint & 0x00003F));
*size = 4;
}
else
return -1;
return 0;
}
int utf8_decode(const char *src, int *size)
{
int c = src[0] & 255;
int r;
*size = 1;
if ((c & 0x80) == 0)
{
return c;
}
int c1 = src[1] & 255;
if ((c & 0xE0) == 0xC0)
{
r = ((c & 0x1F) << 6) | c1;
if (r >= 128)
{
*size = 2;
return r;
}
return -1;
}
int c2 = src[2] & 255;
if ((c & 0xF0) == 0xE0)
{
r = ((c & 0x0F) << 12) | (c1 << 6) | c2;
if (r >= 2048 && (r < 55296 || r > 57343))
{
*size = 3;
return r;
}
return -1;
}
int c3 = src[3] & 255;
if ((c & 0xF8) == 0xF0)
{
r = ((c & 0x07) << 18) | (c1 << 12) | (c2 << 6) | c3;
if (r >= 65536 && r <= 1114111)
{
*size = 4;
return r;
}
}
return -1;
}
static TArray<char> out;
static const char *StringToUnicode(const char *cc, int size = -1)
{
int ch;
const char *c = cc;
int count = 0;
int count1 = 0;
out.Clear();
while (ch = (*c++) & 255)
{
count1++;
if (ch >= 128)
{
if (ch < 0x800) count += 2;
else count += 3;
// The source cannot contain 4-byte chars.
}
else count++;
if (count1 == size && size > 0) break;
}
if (count == count1) return cc; // string is pure ASCII.
// we need to convert
out.Resize(count + 1);
out.Last() = 0;
c = cc;
int i = 0;
while (ch = (*c++) & 255)
{
utf8_encode(ch, &out[i], &count1);
i += count1;
}
return &out[0];
}
static const char *UnicodeToString(const char *cc)
{
out.Resize((unsigned)strlen(cc) + 1);
int ndx = 0;
while (*cc != 0)
{
int size;
int c = utf8_decode(cc, &size);
if (c < 0 || c > 255) c = '?';
out[ndx++] = c;
cc += size;
}
out[ndx] = 0;
return &out[0];
}
//========================================================================== //==========================================================================
// //
// //
@ -99,8 +236,8 @@ struct FJSONObject
struct FWriter struct FWriter
{ {
typedef rapidjson::Writer<rapidjson::StringBuffer, rapidjson::ASCII<> > Writer; typedef rapidjson::Writer<rapidjson::StringBuffer, rapidjson::UTF8<> > Writer;
typedef rapidjson::PrettyWriter<rapidjson::StringBuffer, rapidjson::ASCII<> > PrettyWriter; typedef rapidjson::PrettyWriter<rapidjson::StringBuffer, rapidjson::UTF8<> > PrettyWriter;
Writer *mWriter1; Writer *mWriter1;
PrettyWriter *mWriter2; PrettyWriter *mWriter2;
@ -173,14 +310,16 @@ struct FWriter
void String(const char *k) void String(const char *k)
{ {
k = StringToUnicode(k);
if (mWriter1) mWriter1->String(k); if (mWriter1) mWriter1->String(k);
else if (mWriter2) mWriter2->String(k); else if (mWriter2) mWriter2->String(k);
} }
void String(const char *k, int size) void String(const char *k, int size)
{ {
if (mWriter1) mWriter1->String(k, size); k = StringToUnicode(k, size);
else if (mWriter2) mWriter2->String(k, size); if (mWriter1) mWriter1->String(k);
else if (mWriter2) mWriter2->String(k);
} }
void Bool(bool k) void Bool(bool k)
@ -602,7 +741,7 @@ FSerializer &FSerializer::Args(const char *key, int *args, int *defargs, int spe
} }
else if (i == 0 && aval.IsString()) else if (i == 0 && aval.IsString())
{ {
args[i] = -FName(aval.GetString()); args[i] = -FName(UnicodeToString(aval.GetString()));
} }
else else
{ {
@ -654,7 +793,7 @@ FSerializer &FSerializer::ScriptNum(const char *key, int &num)
} }
else if (val->IsString()) else if (val->IsString())
{ {
num = -FName(val->GetString()); num = -FName(UnicodeToString(val->GetString()));
} }
else else
{ {
@ -709,7 +848,7 @@ FSerializer &FSerializer::Sprite(const char *key, int32_t &spritenum, int32_t *d
{ {
if (val->IsString()) if (val->IsString())
{ {
uint32_t name = *reinterpret_cast<const uint32_t*>(val->GetString()); uint32_t name = *reinterpret_cast<const uint32_t*>(UnicodeToString(val->GetString()));
for (auto hint = NumStdSprites; hint-- != 0; ) for (auto hint = NumStdSprites; hint-- != 0; )
{ {
if (sprites[hint].dwName == name) if (sprites[hint].dwName == name)
@ -747,7 +886,7 @@ FSerializer &FSerializer::StringPtr(const char *key, const char *&charptr)
{ {
if (val->IsString()) if (val->IsString())
{ {
charptr = val->GetString(); charptr = UnicodeToString(val->GetString());
} }
else else
{ {
@ -1403,7 +1542,7 @@ FSerializer &Serialize(FSerializer &arc, const char *key, FTextureID &value, FTe
assert(nameval.IsString() && typeval.IsInt()); assert(nameval.IsString() && typeval.IsInt());
if (nameval.IsString() && typeval.IsInt()) if (nameval.IsString() && typeval.IsInt())
{ {
value = TexMan.GetTexture(nameval.GetString(), typeval.GetInt()); value = TexMan.GetTexture(UnicodeToString(nameval.GetString()), typeval.GetInt());
} }
else else
{ {
@ -1553,7 +1692,7 @@ FSerializer &Serialize(FSerializer &arc, const char *key, FName &value, FName *d
assert(val->IsString()); assert(val->IsString());
if (val->IsString()) if (val->IsString())
{ {
value = val->GetString(); value = UnicodeToString(val->GetString());
} }
else else
{ {
@ -1638,7 +1777,7 @@ FSerializer &Serialize(FSerializer &arc, const char *key, FSoundID &sid, FSoundI
assert(val->IsString() || val->IsNull()); assert(val->IsString() || val->IsNull());
if (val->IsString()) if (val->IsString())
{ {
sid = val->GetString(); sid = UnicodeToString(val->GetString());
} }
else if (val->IsNull()) else if (val->IsNull())
{ {
@ -1687,7 +1826,7 @@ template<> FSerializer &Serialize(FSerializer &arc, const char *key, PClassActor
assert(val->IsString() || val->IsNull()); assert(val->IsString() || val->IsNull());
if (val->IsString()) if (val->IsString())
{ {
clst = PClass::FindActor(val->GetString()); clst = PClass::FindActor(UnicodeToString(val->GetString()));
} }
else if (val->IsNull()) else if (val->IsNull())
{ {
@ -1735,7 +1874,7 @@ template<> FSerializer &Serialize(FSerializer &arc, const char *key, PClass *&cl
{ {
if (val->IsString()) if (val->IsString())
{ {
clst = PClass::FindClass(val->GetString()); clst = PClass::FindClass(UnicodeToString(val->GetString()));
} }
else if (val->IsNull()) else if (val->IsNull())
{ {
@ -1810,7 +1949,7 @@ FSerializer &Serialize(FSerializer &arc, const char *key, FState *&state, FState
assert(cls.IsString() && ndx.IsUint()); assert(cls.IsString() && ndx.IsUint());
if (cls.IsString() && ndx.IsUint()) if (cls.IsString() && ndx.IsUint())
{ {
PClassActor *clas = PClass::FindActor(cls.GetString()); PClassActor *clas = PClass::FindActor(UnicodeToString(cls.GetString()));
if (clas && ndx.GetUint() < (unsigned)clas->NumOwnedStates) if (clas && ndx.GetUint() < (unsigned)clas->NumOwnedStates)
{ {
state = clas->OwnedStates + ndx.GetUint(); state = clas->OwnedStates + ndx.GetUint();
@ -1932,7 +2071,7 @@ template<> FSerializer &Serialize(FSerializer &arc, const char *key, FString *&p
} }
else if (val->IsString()) else if (val->IsString())
{ {
pstr = AActor::mStringPropertyData.Alloc(val->GetString()); pstr = AActor::mStringPropertyData.Alloc(UnicodeToString(val->GetString()));
} }
else else
{ {
@ -1974,7 +2113,7 @@ FSerializer &Serialize(FSerializer &arc, const char *key, FString &pstr, FString
} }
else if (val->IsString()) else if (val->IsString())
{ {
pstr = val->GetString(); pstr = UnicodeToString(val->GetString());
} }
else else
{ {
@ -2023,7 +2162,7 @@ template<> FSerializer &Serialize(FSerializer &arc, const char *key, char *&pstr
} }
else if (val->IsString()) else if (val->IsString())
{ {
pstr = copystring(val->GetString()); pstr = copystring(UnicodeToString(val->GetString()));
} }
else else
{ {