mirror of
https://github.com/gnustep/libs-base.git
synced 2025-04-23 00:41:02 +00:00
Make utf8 handling stricter
git-svn-id: svn+ssh://svn.gna.org/svn/gnustep/libs/base/trunk@22714 72102866-910b-0410-8b05-ffd578937521
This commit is contained in:
parent
f9cf084fcf
commit
c7cae70ec4
2 changed files with 35 additions and 8 deletions
|
@ -2,8 +2,9 @@
|
|||
|
||||
* Source/win32/NSStreamWin32.m: Various tidyups for pipe streams.
|
||||
* Source/NSDictionary.m: Avoid compiler warning.
|
||||
* Source/Additions/Unicode.m: Add unicode validation function.
|
||||
* Headers/Additions/GNUstepBase/Unicode.h: ditto
|
||||
* Headers/Additions/GNUstepBase/Unicode.h: Add validation function.
|
||||
* Source/Additions/Unicode.m: Add unicode validation function and
|
||||
alter UTF8 handling to be stricter.
|
||||
* Source/GSString.m: validate unicode when initialisiung a string.
|
||||
Also create 8bit data strings rather than 16bit where possible.
|
||||
|
||||
|
|
|
@ -1361,15 +1361,22 @@ GSToUnicode(unichar **dst, unsigned int *size, const unsigned char *src,
|
|||
* We also discard invalid codepoints here.
|
||||
*/
|
||||
|
||||
if (u == 0xfffe || u == 0xffff
|
||||
|| (u >= 0xfdd0 && u <= 0xfdef))
|
||||
{
|
||||
result = NO; // Invalid character.
|
||||
break;
|
||||
}
|
||||
|
||||
if ((u >= 0xd800) && (u <= 0xdfff))
|
||||
{
|
||||
result = NO;
|
||||
result = NO; // Unmatched half of surrogate pair.
|
||||
break;
|
||||
}
|
||||
|
||||
if (u > 0x10ffff)
|
||||
{
|
||||
result = NO;
|
||||
result = NO; // Too large
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -1901,14 +1908,29 @@ GSFromUnicode(unsigned char **dst, unsigned int *size, const unichar *src,
|
|||
{
|
||||
u1 = ((u1 & 0xff00 >> 8) + ((u1 & 0x00ff) << 8));
|
||||
}
|
||||
if (u1 == 0xfffe || u1 == 0xffff // unexpcted BOM
|
||||
|| (u1 >= 0xfdd0 && u1 <= 0xfdef) // invalid character
|
||||
|| (u1 >= 0xdc00 && u1 <= 0xdfff)) // bad pairing
|
||||
{
|
||||
if (strict)
|
||||
{
|
||||
result = NO;
|
||||
break;
|
||||
}
|
||||
continue; // Skip invalid character.
|
||||
}
|
||||
|
||||
/* possibly get second character and calculate 'u' */
|
||||
if ((u1 >= 0xd800) && (u1 < 0xdc00))
|
||||
{
|
||||
if (spos >= slen)
|
||||
{
|
||||
result = NO;
|
||||
break;
|
||||
if (strict)
|
||||
{
|
||||
result = NO;
|
||||
break;
|
||||
}
|
||||
continue; // At end.
|
||||
}
|
||||
|
||||
/* get second unichar */
|
||||
|
@ -1921,8 +1943,12 @@ GSFromUnicode(unsigned char **dst, unsigned int *size, const unichar *src,
|
|||
if ((u2 < 0xdc00) && (u2 > 0xdfff))
|
||||
{
|
||||
spos--;
|
||||
result = NO;
|
||||
break;
|
||||
if (strict)
|
||||
{
|
||||
result = NO;
|
||||
break;
|
||||
}
|
||||
continue; // Skip bad half of surrogate pair.
|
||||
}
|
||||
|
||||
/* make the full value */
|
||||
|
|
Loading…
Reference in a new issue