Make utf8 handling stricter

git-svn-id: svn+ssh://svn.gna.org/svn/gnustep/libs/base/trunk@22714 72102866-910b-0410-8b05-ffd578937521
This commit is contained in:
Richard Frith-MacDonald 2006-03-26 11:55:56 +00:00
parent f9cf084fcf
commit c7cae70ec4
2 changed files with 35 additions and 8 deletions

View file

@ -2,8 +2,9 @@
* Source/win32/NSStreamWin32.m: Various tidyups for pipe streams.
* Source/NSDictionary.m: Avoid compiler warning.
* Source/Additions/Unicode.m: Add unicode validation function.
* Headers/Additions/GNUstepBase/Unicode.h: ditto
* Headers/Additions/GNUstepBase/Unicode.h: Add validation function.
* Source/Additions/Unicode.m: Add unicode validation function and
alter UTF8 handling to be stricter.
* Source/GSString.m: validate unicode when initialisiung a string.
Also create 8bit data strings rather than 16bit where possible.

View file

@ -1361,15 +1361,22 @@ GSToUnicode(unichar **dst, unsigned int *size, const unsigned char *src,
* We also discard invalid codepoints here.
*/
if (u == 0xfffe || u == 0xffff
|| (u >= 0xfdd0 && u <= 0xfdef))
{
result = NO; // Invalid character.
break;
}
if ((u >= 0xd800) && (u <= 0xdfff))
{
result = NO;
result = NO; // Unmatched half of surrogate pair.
break;
}
if (u > 0x10ffff)
{
result = NO;
result = NO; // Too large
break;
}
@ -1901,14 +1908,29 @@ GSFromUnicode(unsigned char **dst, unsigned int *size, const unichar *src,
{
u1 = ((u1 & 0xff00 >> 8) + ((u1 & 0x00ff) << 8));
}
if (u1 == 0xfffe || u1 == 0xffff // unexpcted BOM
|| (u1 >= 0xfdd0 && u1 <= 0xfdef) // invalid character
|| (u1 >= 0xdc00 && u1 <= 0xdfff)) // bad pairing
{
if (strict)
{
result = NO;
break;
}
continue; // Skip invalid character.
}
/* possibly get second character and calculate 'u' */
if ((u1 >= 0xd800) && (u1 < 0xdc00))
{
if (spos >= slen)
{
result = NO;
break;
if (strict)
{
result = NO;
break;
}
continue; // At end.
}
/* get second unichar */
@ -1921,8 +1943,12 @@ GSFromUnicode(unsigned char **dst, unsigned int *size, const unichar *src,
if ((u2 < 0xdc00) && (u2 > 0xdfff))
{
spos--;
result = NO;
break;
if (strict)
{
result = NO;
break;
}
continue; // Skip bad half of surrogate pair.
}
/* make the full value */