Improve string validation .. check for invalid unicode characters.

git-svn-id: svn+ssh://svn.gna.org/svn/gnustep/libs/base/trunk@22712 72102866-910b-0410-8b05-ffd578937521
This commit is contained in:
rfm 2006-03-26 10:59:57 +00:00
parent eaedea3169
commit 65e8986db1
5 changed files with 218 additions and 13 deletions

View file

@ -1090,6 +1090,66 @@ int encode_cstrtoustr(unichar *dst, int dl, const char *src, int sl,
}
/**
* Function to check a block of data for validity as a unicode string and
* say whether it contains solely ASCII or solely Latin1 data.<br />
* Any leading BOM must already have been removed and the data must already
* be in native byte order.
*/
BOOL
GSIsUnicode(const unichar *chars, unsigned length,
BOOL *isASCII, BOOL *isLatin1)
{
unsigned i = 0;
unichar c;
*isASCII = YES;
*isLatin1 = YES;
while (i < length)
{
if ((c = chars[i++]) > 127)
{
*isASCII = NO;
i--;
while (i < length)
{
if ((c = chars[i++]) > 255)
{
*isLatin1 = NO;
i--;
while (i < length)
{
c = chars[i++];
if (c == 0xfffe || c == 0xffff
|| (c >= 0xfdd0 && c <= 0xfdef))
{
return NO; // Non-characters.
}
if (c >= 0xdc00 && c <= 0xdfff)
{
return NO; // Second half of a surrogate pair.
}
if (c >= 0xd800 && c <= 0xdbff)
{
// First half of a surrogate pair.
if (i >= length)
{
return NO; // Second half missing
}
c = chars[i];
if (c < 0xdc00 || c > 0xdfff)
{
return NO; // Second half missing
}
i++; // Step past second half
}
}
}
}
}
}
return YES;
}
#define GROW() \
if (dst == 0) \