mirror of
https://github.com/gnustep/libs-base.git
synced 2025-05-31 08:41:03 +00:00
Validate data returned by dataUsingEncoding:allowLossyConversion: for unicode
git-svn-id: svn+ssh://svn.gna.org/svn/gnustep/libs/base/trunk@22713 72102866-910b-0410-8b05-ffd578937521
This commit is contained in:
parent
65e8986db1
commit
d77062ba88
3 changed files with 52 additions and 21 deletions
|
@ -64,7 +64,7 @@ GS_EXPORT unichar *uni_is_decomp(unichar u);
|
||||||
#define GSUniBOM 0x08
|
#define GSUniBOM 0x08
|
||||||
#define GSUniShortOk 0x10
|
#define GSUniShortOk 0x10
|
||||||
|
|
||||||
GS_EXPORT BOOL GSIsUnicode(const unichar *chars, unsigned length,
|
GS_EXPORT unsigned GSUnicode(const unichar *chars, unsigned length,
|
||||||
BOOL *isASCII, BOOL *isLatin1);
|
BOOL *isASCII, BOOL *isLatin1);
|
||||||
GS_EXPORT BOOL GSFromUnicode(unsigned char **dst, unsigned int *size,
|
GS_EXPORT BOOL GSFromUnicode(unsigned char **dst, unsigned int *size,
|
||||||
const unichar *src, unsigned int slen, NSStringEncoding enc, NSZone *zone,
|
const unichar *src, unsigned int slen, NSStringEncoding enc, NSZone *zone,
|
||||||
|
|
|
@ -1094,28 +1094,29 @@ int encode_cstrtoustr(unichar *dst, int dl, const char *src, int sl,
|
||||||
* Function to check a block of data for validity as a unicode string and
|
* Function to check a block of data for validity as a unicode string and
|
||||||
* say whether it contains solely ASCII or solely Latin1 data.<br />
|
* say whether it contains solely ASCII or solely Latin1 data.<br />
|
||||||
* Any leading BOM must already have been removed and the data must already
|
* Any leading BOM must already have been removed and the data must already
|
||||||
* be in native byte order.
|
* be in native byte order.<br />
|
||||||
|
* Returns the number of characters which were found valid.
|
||||||
*/
|
*/
|
||||||
BOOL
|
unsigned
|
||||||
GSIsUnicode(const unichar *chars, unsigned length,
|
GSUnicode(const unichar *chars, unsigned length,
|
||||||
BOOL *isASCII, BOOL *isLatin1)
|
BOOL *isASCII, BOOL *isLatin1)
|
||||||
{
|
{
|
||||||
unsigned i = 0;
|
unsigned i = 0;
|
||||||
unichar c;
|
unichar c;
|
||||||
|
|
||||||
*isASCII = YES;
|
if (isASCII) *isASCII = YES;
|
||||||
*isLatin1 = YES;
|
if (isLatin1) *isLatin1 = YES;
|
||||||
while (i < length)
|
while (i < length)
|
||||||
{
|
{
|
||||||
if ((c = chars[i++]) > 127)
|
if ((c = chars[i++]) > 127)
|
||||||
{
|
{
|
||||||
*isASCII = NO;
|
if (isASCII) *isASCII = NO;
|
||||||
i--;
|
i--;
|
||||||
while (i < length)
|
while (i < length)
|
||||||
{
|
{
|
||||||
if ((c = chars[i++]) > 255)
|
if ((c = chars[i++]) > 255)
|
||||||
{
|
{
|
||||||
*isLatin1 = NO;
|
if (isLatin1) *isLatin1 = NO;
|
||||||
i--;
|
i--;
|
||||||
while (i < length)
|
while (i < length)
|
||||||
{
|
{
|
||||||
|
@ -1123,23 +1124,23 @@ GSIsUnicode(const unichar *chars, unsigned length,
|
||||||
if (c == 0xfffe || c == 0xffff
|
if (c == 0xfffe || c == 0xffff
|
||||||
|| (c >= 0xfdd0 && c <= 0xfdef))
|
|| (c >= 0xfdd0 && c <= 0xfdef))
|
||||||
{
|
{
|
||||||
return NO; // Non-characters.
|
return i - 1; // Non-characters.
|
||||||
}
|
}
|
||||||
if (c >= 0xdc00 && c <= 0xdfff)
|
if (c >= 0xdc00 && c <= 0xdfff)
|
||||||
{
|
{
|
||||||
return NO; // Second half of a surrogate pair.
|
return i - 1; // Second half of a surrogate pair.
|
||||||
}
|
}
|
||||||
if (c >= 0xd800 && c <= 0xdbff)
|
if (c >= 0xd800 && c <= 0xdbff)
|
||||||
{
|
{
|
||||||
// First half of a surrogate pair.
|
// First half of a surrogate pair.
|
||||||
if (i >= length)
|
if (i >= length)
|
||||||
{
|
{
|
||||||
return NO; // Second half missing
|
return i - 1; // Second half missing
|
||||||
}
|
}
|
||||||
c = chars[i];
|
c = chars[i];
|
||||||
if (c < 0xdc00 || c > 0xdfff)
|
if (c < 0xdc00 || c > 0xdfff)
|
||||||
{
|
{
|
||||||
return NO; // Second half missing
|
return i - 1; // Second half missing
|
||||||
}
|
}
|
||||||
i++; // Step past second half
|
i++; // Step past second half
|
||||||
}
|
}
|
||||||
|
@ -1148,7 +1149,7 @@ GSIsUnicode(const unichar *chars, unsigned length,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return YES;
|
return i;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define GROW() \
|
#define GROW() \
|
||||||
|
|
|
@ -358,7 +358,7 @@ setup(void)
|
||||||
BOOL isASCII;
|
BOOL isASCII;
|
||||||
BOOL isLatin1;
|
BOOL isLatin1;
|
||||||
|
|
||||||
if (GSIsUnicode(chars, length, &isASCII, &isLatin1) == NO)
|
if (GSUnicode(chars, length, &isASCII, &isLatin1) != length)
|
||||||
{
|
{
|
||||||
return nil; // Invalid data
|
return nil; // Invalid data
|
||||||
}
|
}
|
||||||
|
@ -400,7 +400,7 @@ setup(void)
|
||||||
BOOL isASCII;
|
BOOL isASCII;
|
||||||
BOOL isLatin1;
|
BOOL isLatin1;
|
||||||
|
|
||||||
if (GSIsUnicode(chars, length, &isASCII, &isLatin1) == NO)
|
if (GSUnicode(chars, length, &isASCII, &isLatin1) != length)
|
||||||
{
|
{
|
||||||
return nil; // Invalid data
|
return nil; // Invalid data
|
||||||
}
|
}
|
||||||
|
@ -1286,14 +1286,44 @@ dataUsingEncoding_u(GSStr self, NSStringEncoding encoding, BOOL flag)
|
||||||
|
|
||||||
if (encoding == NSUnicodeStringEncoding)
|
if (encoding == NSUnicodeStringEncoding)
|
||||||
{
|
{
|
||||||
unichar *buff;
|
unichar *buff;
|
||||||
|
unsigned l;
|
||||||
|
unsigned from = 0;
|
||||||
|
unsigned to = 1;
|
||||||
|
|
||||||
|
if ((l = GSUnicode(self->_contents.u, len, 0, 0)) != len)
|
||||||
|
{
|
||||||
|
if (flag == NO)
|
||||||
|
{
|
||||||
|
return nil;
|
||||||
|
}
|
||||||
|
}
|
||||||
buff = (unichar*)NSZoneMalloc(NSDefaultMallocZone(),
|
buff = (unichar*)NSZoneMalloc(NSDefaultMallocZone(),
|
||||||
sizeof(unichar)*(len+1));
|
sizeof(unichar)*(len+1));
|
||||||
buff[0] = 0xFEFF;
|
buff[0] = 0xFEFF;
|
||||||
memcpy(buff+1, self->_contents.u, sizeof(unichar)*len);
|
|
||||||
|
while (len > 0)
|
||||||
|
{
|
||||||
|
if (l > 0)
|
||||||
|
{
|
||||||
|
memcpy(buff + to, self->_contents.u + from, sizeof(unichar)*l);
|
||||||
|
from += l;
|
||||||
|
to += l;
|
||||||
|
len -= l;
|
||||||
|
}
|
||||||
|
if (len > 0)
|
||||||
|
{
|
||||||
|
// A bad character in the string ... skip it.
|
||||||
|
if (--len > 0)
|
||||||
|
{
|
||||||
|
// Not at end ... try another batch.
|
||||||
|
from++;
|
||||||
|
l = GSUnicode(self->_contents.u + from, len, 0, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
return [NSData dataWithBytesNoCopy: buff
|
return [NSData dataWithBytesNoCopy: buff
|
||||||
length: sizeof(unichar)*(len+1)];
|
length: sizeof(unichar)*to];
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -3060,7 +3090,7 @@ agree, create a new GSUnicodeInlineString otherwise.
|
||||||
BOOL isASCII;
|
BOOL isASCII;
|
||||||
BOOL isLatin1;
|
BOOL isLatin1;
|
||||||
|
|
||||||
if (GSIsUnicode(chars, length, &isASCII, &isLatin1) == NO)
|
if (GSUnicode(chars, length, &isASCII, &isLatin1) != length)
|
||||||
{
|
{
|
||||||
RELEASE(self);
|
RELEASE(self);
|
||||||
return nil; // Invalid data
|
return nil; // Invalid data
|
||||||
|
@ -3120,7 +3150,7 @@ agree, create a new GSUnicodeInlineString otherwise.
|
||||||
BOOL isASCII;
|
BOOL isASCII;
|
||||||
BOOL isLatin1;
|
BOOL isLatin1;
|
||||||
|
|
||||||
if (GSIsUnicode(chars, length, &isASCII, &isLatin1) == NO)
|
if (GSUnicode(chars, length, &isASCII, &isLatin1) != length)
|
||||||
{
|
{
|
||||||
RELEASE(self);
|
RELEASE(self);
|
||||||
return nil; // Invalid data
|
return nil; // Invalid data
|
||||||
|
@ -3526,7 +3556,7 @@ agree, create a new GSUnicodeInlineString otherwise.
|
||||||
BOOL isASCII;
|
BOOL isASCII;
|
||||||
BOOL isLatin1;
|
BOOL isLatin1;
|
||||||
|
|
||||||
if (GSIsUnicode(chars, length, &isASCII, &isLatin1) == NO)
|
if (GSUnicode(chars, length, &isASCII, &isLatin1) != length)
|
||||||
{
|
{
|
||||||
RELEASE(self);
|
RELEASE(self);
|
||||||
return nil; // Invalid data
|
return nil; // Invalid data
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue