mirror of
https://github.com/gnustep/libs-base.git
synced 2025-04-23 00:41:02 +00:00
Improve string validation .. check for invalid unicode characters.
git-svn-id: svn+ssh://svn.gna.org/svn/gnustep/libs/base/trunk@22712 72102866-910b-0410-8b05-ffd578937521
This commit is contained in:
parent
ba8f64f9e1
commit
55e0ca9228
5 changed files with 218 additions and 13 deletions
|
@ -1,6 +1,11 @@
|
|||
2006-03-25 Richard Frith-Macdonald <rfm@gnu.org>
|
||||
|
||||
* Source/win32/NSStreamWin32.m: Variout tidyups for pipe streams.
|
||||
* Source/win32/NSStreamWin32.m: Various tidyups for pipe streams.
|
||||
* Source/NSDictionary.m: Avoid compiler warning.
|
||||
* Source/Additions/Unicode.m: Add unicode validation function.
|
||||
* Headers/Additions/GNUstepBase/Unicode.h: ditto
|
||||
* Source/GSString.m: validate unicode when initialisiung a string.
|
||||
Also create 8bit data strings rather than 16bit where possible.
|
||||
|
||||
2006-03-24 Richard Frith-Macdonald <rfm@gnu.org>
|
||||
|
||||
|
|
|
@ -64,6 +64,8 @@ GS_EXPORT unichar *uni_is_decomp(unichar u);
|
|||
#define GSUniBOM 0x08
|
||||
#define GSUniShortOk 0x10
|
||||
|
||||
GS_EXPORT BOOL GSIsUnicode(const unichar *chars, unsigned length,
|
||||
BOOL *isASCII, BOOL *isLatin1);
|
||||
GS_EXPORT BOOL GSFromUnicode(unsigned char **dst, unsigned int *size,
|
||||
const unichar *src, unsigned int slen, NSStringEncoding enc, NSZone *zone,
|
||||
unsigned int options);
|
||||
|
|
|
@ -1090,6 +1090,66 @@ int encode_cstrtoustr(unichar *dst, int dl, const char *src, int sl,
|
|||
}
|
||||
|
||||
|
||||
/**
|
||||
* Function to check a block of data for validity as a unicode string and
|
||||
* say whether it contains solely ASCII or solely Latin1 data.<br />
|
||||
* Any leading BOM must already have been removed and the data must already
|
||||
* be in native byte order.
|
||||
*/
|
||||
BOOL
|
||||
GSIsUnicode(const unichar *chars, unsigned length,
|
||||
BOOL *isASCII, BOOL *isLatin1)
|
||||
{
|
||||
unsigned i = 0;
|
||||
unichar c;
|
||||
|
||||
*isASCII = YES;
|
||||
*isLatin1 = YES;
|
||||
while (i < length)
|
||||
{
|
||||
if ((c = chars[i++]) > 127)
|
||||
{
|
||||
*isASCII = NO;
|
||||
i--;
|
||||
while (i < length)
|
||||
{
|
||||
if ((c = chars[i++]) > 255)
|
||||
{
|
||||
*isLatin1 = NO;
|
||||
i--;
|
||||
while (i < length)
|
||||
{
|
||||
c = chars[i++];
|
||||
if (c == 0xfffe || c == 0xffff
|
||||
|| (c >= 0xfdd0 && c <= 0xfdef))
|
||||
{
|
||||
return NO; // Non-characters.
|
||||
}
|
||||
if (c >= 0xdc00 && c <= 0xdfff)
|
||||
{
|
||||
return NO; // Second half of a surrogate pair.
|
||||
}
|
||||
if (c >= 0xd800 && c <= 0xdbff)
|
||||
{
|
||||
// First half of a surrogate pair.
|
||||
if (i >= length)
|
||||
{
|
||||
return NO; // Second half missing
|
||||
}
|
||||
c = chars[i];
|
||||
if (c < 0xdc00 || c > 0xdfff)
|
||||
{
|
||||
return NO; // Second half missing
|
||||
}
|
||||
i++; // Step past second half
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return YES;
|
||||
}
|
||||
|
||||
#define GROW() \
|
||||
if (dst == 0) \
|
||||
|
|
|
@ -355,14 +355,37 @@ setup(void)
|
|||
length: (unsigned)length
|
||||
{
|
||||
GSStr me;
|
||||
BOOL isASCII;
|
||||
BOOL isLatin1;
|
||||
|
||||
me = (GSStr)NSAllocateObject(GSUnicodeInlineStringClass,
|
||||
length*sizeof(unichar), GSObjCZone(self));
|
||||
me->_contents.u = (unichar*)&((GSUnicodeInlineString*)me)[1];
|
||||
me->_count = length;
|
||||
me->_flags.wide = 1;
|
||||
me->_flags.free = 1;
|
||||
memcpy(me->_contents.u, chars, length*sizeof(unichar));
|
||||
if (GSIsUnicode(chars, length, &isASCII, &isLatin1) == NO)
|
||||
{
|
||||
return nil; // Invalid data
|
||||
}
|
||||
if (isASCII == YES
|
||||
|| (intEnc == NSISOLatin1StringEncoding && isLatin1 == YES))
|
||||
{
|
||||
me = (GSStr)NSAllocateObject(GSCInlineStringClass, length,
|
||||
GSObjCZone(self));
|
||||
me->_contents.c = (unsigned char*)&((GSCInlineString*)me)[1];
|
||||
me->_count = length;
|
||||
me->_flags.wide = 0;
|
||||
me->_flags.free = 1;
|
||||
while (length-- > 0)
|
||||
{
|
||||
me->_contents.c[length] = (unsigned char)chars[length];
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
me = (GSStr)NSAllocateObject(GSUnicodeInlineStringClass,
|
||||
length*sizeof(unichar), GSObjCZone(self));
|
||||
me->_contents.u = (unichar*)&((GSUnicodeInlineString*)me)[1];
|
||||
me->_count = length;
|
||||
me->_flags.wide = 1;
|
||||
me->_flags.free = 1;
|
||||
memcpy(me->_contents.u, chars, length*sizeof(unichar));
|
||||
}
|
||||
return (id)me;
|
||||
}
|
||||
|
||||
|
@ -374,14 +397,41 @@ setup(void)
|
|||
freeWhenDone: (BOOL)flag
|
||||
{
|
||||
GSStr me;
|
||||
BOOL isASCII;
|
||||
BOOL isLatin1;
|
||||
|
||||
me = (GSStr)NSAllocateObject(GSUnicodeBufferStringClass, 0, GSObjCZone(self));
|
||||
me->_contents.u = chars;
|
||||
me->_count = length;
|
||||
me->_flags.wide = 1;
|
||||
if (flag == YES)
|
||||
if (GSIsUnicode(chars, length, &isASCII, &isLatin1) == NO)
|
||||
{
|
||||
return nil; // Invalid data
|
||||
}
|
||||
if (isASCII == YES
|
||||
|| (intEnc == NSISOLatin1StringEncoding && isLatin1 == YES))
|
||||
{
|
||||
/*
|
||||
* OK ... we can do a more compact version
|
||||
*/
|
||||
me = (GSStr)NSAllocateObject(GSCInlineStringClass, length,
|
||||
GSObjCZone(self));
|
||||
me->_contents.c = (unsigned char*)&((GSCInlineString*)me)[1];
|
||||
me->_count = length;
|
||||
me->_flags.wide = 0;
|
||||
me->_flags.free = 1;
|
||||
while (length-- > 0)
|
||||
{
|
||||
me->_contents.c[length] = (unsigned char)chars[length];
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
me = (GSStr)NSAllocateObject(GSUnicodeBufferStringClass,
|
||||
0, GSObjCZone(self));
|
||||
me->_contents.u = chars;
|
||||
me->_count = length;
|
||||
me->_flags.wide = 1;
|
||||
if (flag == YES)
|
||||
{
|
||||
me->_flags.free = 1;
|
||||
}
|
||||
}
|
||||
return (id)me;
|
||||
}
|
||||
|
@ -3007,6 +3057,35 @@ agree, create a new GSUnicodeInlineString otherwise.
|
|||
length: (unsigned int)length
|
||||
freeWhenDone: (BOOL)flag
|
||||
{
|
||||
BOOL isASCII;
|
||||
BOOL isLatin1;
|
||||
|
||||
if (GSIsUnicode(chars, length, &isASCII, &isLatin1) == NO)
|
||||
{
|
||||
RELEASE(self);
|
||||
return nil; // Invalid data
|
||||
}
|
||||
if (isASCII == YES
|
||||
|| (intEnc == NSISOLatin1StringEncoding && isLatin1 == YES))
|
||||
{
|
||||
GSStr me;
|
||||
|
||||
/*
|
||||
* OK ... we can do a more compact version
|
||||
*/
|
||||
me = (GSStr)NSAllocateObject(GSCInlineStringClass, length,
|
||||
GSObjCZone(self));
|
||||
me->_contents.c = (unsigned char*)&((GSCInlineString*)me)[1];
|
||||
me->_count = length;
|
||||
me->_flags.wide = 0;
|
||||
me->_flags.free = 1;
|
||||
while (length-- > 0)
|
||||
{
|
||||
me->_contents.c[length] = (unsigned char)chars[length];
|
||||
}
|
||||
RELEASE(self);
|
||||
return (id)me;
|
||||
}
|
||||
if (_contents.u != 0)
|
||||
{
|
||||
[NSException raise: NSInternalInconsistencyException
|
||||
|
@ -3038,6 +3117,35 @@ agree, create a new GSUnicodeInlineString otherwise.
|
|||
@implementation GSUnicodeInlineString
|
||||
- (id) initWithCharacters: (const unichar*)chars length: (unsigned)length
|
||||
{
|
||||
BOOL isASCII;
|
||||
BOOL isLatin1;
|
||||
|
||||
if (GSIsUnicode(chars, length, &isASCII, &isLatin1) == NO)
|
||||
{
|
||||
RELEASE(self);
|
||||
return nil; // Invalid data
|
||||
}
|
||||
if (isASCII == YES
|
||||
|| (intEnc == NSISOLatin1StringEncoding && isLatin1 == YES))
|
||||
{
|
||||
GSStr me;
|
||||
|
||||
/*
|
||||
* OK ... we can do a more compact version
|
||||
*/
|
||||
me = (GSStr)NSAllocateObject(GSCInlineStringClass, length,
|
||||
GSObjCZone(self));
|
||||
me->_contents.c = (unsigned char*)&((GSCInlineString*)me)[1];
|
||||
me->_count = length;
|
||||
me->_flags.wide = 0;
|
||||
me->_flags.free = 1;
|
||||
while (length-- > 0)
|
||||
{
|
||||
me->_contents.c[length] = (unsigned char)chars[length];
|
||||
}
|
||||
RELEASE(self);
|
||||
return (id)me;
|
||||
}
|
||||
if (_contents.u != 0)
|
||||
{
|
||||
[NSException raise: NSInternalInconsistencyException
|
||||
|
@ -3415,6 +3523,35 @@ agree, create a new GSUnicodeInlineString otherwise.
|
|||
length: (unsigned int)length
|
||||
freeWhenDone: (BOOL)flag
|
||||
{
|
||||
BOOL isASCII;
|
||||
BOOL isLatin1;
|
||||
|
||||
if (GSIsUnicode(chars, length, &isASCII, &isLatin1) == NO)
|
||||
{
|
||||
RELEASE(self);
|
||||
return nil; // Invalid data
|
||||
}
|
||||
if (isASCII == YES
|
||||
|| (intEnc == NSISOLatin1StringEncoding && isLatin1 == YES))
|
||||
{
|
||||
GSStr me;
|
||||
|
||||
/*
|
||||
* OK ... we can do a more compact version
|
||||
*/
|
||||
me = (GSStr)NSAllocateObject(GSCInlineStringClass, length,
|
||||
GSObjCZone(self));
|
||||
me->_contents.c = (unsigned char*)&((GSCInlineString*)me)[1];
|
||||
me->_count = length;
|
||||
me->_flags.wide = 0;
|
||||
me->_flags.free = 1;
|
||||
while (length-- > 0)
|
||||
{
|
||||
me->_contents.c[length] = (unsigned char)chars[length];
|
||||
}
|
||||
RELEASE(self);
|
||||
return (id)me;
|
||||
}
|
||||
_count = length;
|
||||
_capacity = length;
|
||||
_contents.u = chars;
|
||||
|
|
|
@ -39,6 +39,7 @@
|
|||
#include "Foundation/NSDebug.h"
|
||||
#include "Foundation/NSObjCRuntime.h"
|
||||
#include "Foundation/NSValue.h"
|
||||
#include "Foundation/NSKeyValueCoding.h"
|
||||
// For private method _decodeArrayOfObjectsForKey:
|
||||
#include "Foundation/NSKeyedArchiver.h"
|
||||
#include "GNUstepBase/GSCategories.h"
|
||||
|
|
Loading…
Reference in a new issue