------------------------------------------------------------------------
r4182 | acceptthis | 2013-01-27 18:32:31 +0000 (Sun, 27 Jan 2013) | 1 line handle of utf-8 encoded utf-16 surrogates (aka: cesu-8), because we can. ------------------------------------------------------------------------ git-svn-id: https://svn.code.sf.net/p/fteqw/code/trunk@4180 fc73d0e0-1445-4013-8a0c-d673dee63da5
This commit is contained in:
parent
1e9b79279c
commit
141b00a636
1 changed files with 7 additions and 7 deletions
|
@ -2044,11 +2044,11 @@ unsigned int utf8_decode(int *error, const void *in, char **out)
|
|||
//try to deal with surrogates by decoding the low if we see a high.
|
||||
if (uc >= 0xd800u && uc < 0xdc00u)
|
||||
{
|
||||
#if 0
|
||||
#if 1
|
||||
//cesu-8
|
||||
void *lowend;
|
||||
unsigned int lowsur = utf_decode(&error, str + l, &lowend);
|
||||
if (*error == 4 && lowsur >= 0xdc00u && lowsur < 0xe000u)
|
||||
char *lowend;
|
||||
unsigned int lowsur = utf8_decode(error, str + l, &lowend);
|
||||
if (*error == 4)
|
||||
{
|
||||
*out = lowend;
|
||||
uc = (((uc&0x3ff) << 10) || (lowsur&0x3ff)) + 0x10000;
|
||||
|
@ -2057,11 +2057,11 @@ unsigned int utf8_decode(int *error, const void *in, char **out)
|
|||
else
|
||||
#endif
|
||||
{
|
||||
*error = 3; //bad lead surrogate.
|
||||
*error = 3; //bad - lead surrogate without tail.
|
||||
}
|
||||
}
|
||||
if (uc >= 0xd800u && uc < 0xdc00u)
|
||||
*error = 4; //bad tail surrogate
|
||||
if (uc >= 0xdc00u && uc < 0xe000u)
|
||||
*error = 4; //bad - tail surrogate
|
||||
|
||||
//these are meant to be illegal too
|
||||
if (uc == 0xfffeu || uc == 0xffffu || uc > 0x10ffff)
|
||||
|
|
Loading…
Reference in a new issue