1
0
Fork 0
forked from fte/fteqw

------------------------------------------------------------------------

r4182 | acceptthis | 2013-01-27 18:32:31 +0000 (Sun, 27 Jan 2013) | 1 line

handle of utf-8 encoded utf-16 surrogates (aka: cesu-8), because we can.
------------------------------------------------------------------------


git-svn-id: https://svn.code.sf.net/p/fteqw/code/trunk@4180 fc73d0e0-1445-4013-8a0c-d673dee63da5
This commit is contained in:
Spoike 2013-03-12 22:38:42 +00:00
parent 1e9b79279c
commit 141b00a636

View file

@ -2044,11 +2044,11 @@ unsigned int utf8_decode(int *error, const void *in, char **out)
//try to deal with surrogates by decoding the low if we see a high.
if (uc >= 0xd800u && uc < 0xdc00u)
{
#if 0
#if 1
//cesu-8
void *lowend;
unsigned int lowsur = utf_decode(&error, str + l, &lowend);
if (*error == 4 && lowsur >= 0xdc00u && lowsur < 0xe000u)
char *lowend;
unsigned int lowsur = utf8_decode(error, str + l, &lowend);
if (*error == 4)
{
*out = lowend;
uc = (((uc&0x3ff) << 10) || (lowsur&0x3ff)) + 0x10000;
@ -2057,11 +2057,11 @@ unsigned int utf8_decode(int *error, const void *in, char **out)
else
#endif
{
*error = 3; //bad lead surrogate.
*error = 3; //bad - lead surrogate without tail.
}
}
if (uc >= 0xd800u && uc < 0xdc00u)
*error = 4; //bad tail surrogate
if (uc >= 0xdc00u && uc < 0xe000u)
*error = 4; //bad - tail surrogate
//these are meant to be illegal too
if (uc == 0xfffeu || uc == 0xffffu || uc > 0x10ffff)