Fix issues in -getCString:...

git-svn-id: svn+ssh://svn.gna.org/svn/gnustep/libs/base/trunk@17174 72102866-910b-0410-8b05-ffd578937521
This commit is contained in:
Alexander Malmberg 2003-07-09 02:13:58 +00:00
parent a13e212bbf
commit 04927afede
3 changed files with 105 additions and 51 deletions

View file

@ -1,3 +1,14 @@
2003-07-09 02:41 Alexander Malmberg <alexander@malmberg.org>
* Source/GSString.m (getCString_u): Rewrite to correctly handle
all encodings, the range argument, and the leftoverRange attribute.
* Source/Unicode.m (GSToUnicode): Clarify the documentation (and fix
a few typos in it).
(GSFromUnicode): Clarify the documentation. Signal failure correctly
from the utf8 encoder. Make sure dst isn't set to NULL if zone is
NULL but *size is 0.
2003-07-08 Richard Frith-Macdonald <rfm@gnu.org>
* Source/NSString.m: Avoid using atof() in parsing plists ... we

View file

@ -939,7 +939,7 @@ else \
}
/**
* Function to convert from 8-bit character data to 16-bit unicode.
* Function to convert from 8-bit data to 16-bit unicode characters.
* <p>The dst argument is a pointer to a pointer to a buffer in which the
* converted string is to be stored. If it is a null pointer, this function
* discards converted data, and is used only to determine the length of the
@ -949,17 +949,18 @@ else \
* </p>
* <p>The size argument is a pointer to the initial size of the destination
* buffer. If the function changes the buffer size, this value will be
* altered to the new size. This is measured in characters, not bytes.
* altered to the new size. This is measured in 16-bit unicode characters,
* not bytes.
* </p>
* <p>The src argument is a pointer to the 8-bit character string which is
* <p>The src argument is a pointer to the byte sequence which is
* to be converted to 16-bit unicode.
* </p>
* <p>The slen argument is the length (bytes) of the 8-bit character string
* <p>The slen argument is the length of the byte sequence
* which is to be converted to 16-bit unicode.
* This is measured in characters, not bytes.
* This is measured in bytes.
* </p>
* <p>The end argument specifies the encoding type of the 8-bit character
* string which is to be converted to 16-bit unicode.
* <p>The enc argument specifies the encoding type of the 8-bit byte sequence
* which is to be converted to 16-bit unicode.
* </p>
* <p>The zone argument specifies a memory zone in which the function may
* allocate a buffer to return data in.
@ -971,7 +972,7 @@ else \
* <list>
* <item>If GSUniTerminate is set, the function is expected to null terminate
* the output string, and will assume that it is safe to place the nul
* just beyond the ned of the stated buffer size.
* just beyond the end of the stated buffer size.
* Also, if the function grows the buffer, it will allow for an extra
* termination character.</item>
* <item>If GSUniTemporary is set, the function will return the results in
@ -1033,8 +1034,6 @@ GSToUnicode(unichar **dst, unsigned int *size, const unsigned char *src,
{
case NSUTF8StringEncoding:
{
result = YES;
while (spos < slen)
{
unsigned char c = src[spos++];
@ -1452,27 +1451,27 @@ static inline int chop(unichar c, _ucc_ *table, int hi)
}
/**
* Function to convert from 16-bit unicode to 8-bit character data.
* Function to convert from 16-bit unicode to 8-bit data.
* <p>The dst argument is a pointer to a pointer to a buffer in which the
* converted string is to be stored. If it is a null pointer, this function
* converted data is to be stored. If it is a null pointer, this function
* discards converted data, and is used only to determine the length of the
* converted string. If the zone argument is non-nul, the function is free
* converted data. If the zone argument is non-nul, the function is free
* to allocate a larger buffer if necessary, and store this new buffer in
* the dst argument. It will *NOT* deallocate the original buffer!
* </p>
* <p>The size argument is a pointer to the initial size of the destination
* buffer. If the function changes the buffer size, this value will be
* altered to the new size. This is measured in characters, not bytes.
* altered to the new size. This is measured in bytes.
* </p>
* <p>The src argument is a pointer to the 16-bit unicode string which is
* to be converted to 8-bit data.
* </p>
* <p>The slen argument is the length (bytes) of the 16-bit unicode string
* <p>The slen argument is the length of the 16-bit unicode string
* which is to be converted to 8-bit data.
* This is measured in characters, not bytes.
* This is measured in 16-bit characters, not bytes.
* </p>
* <p>The end argument specifies the encoding type of the 8-bit character
* string which is to be produced from the 16-bit unicode.
* <p>The enc argument specifies the encoding type of the 8-bit byte sequence
* which is to be produced from the 16-bit unicode.
* </p>
* <p>The zone argument specifies a memory zone in which the function may
* allocate a buffer to return data in.
@ -1483,13 +1482,13 @@ static inline int chop(unichar c, _ucc_ *table, int hi)
* The options argument controls some special behavior.
* <list>
* <item>If GSUniStrict is set, the function will fail if a character is
* encountered which can't be displayed in the source. Otherwise, some
* encountered in the source which can't be converted. Otherwise, some
* approximation or marker will be placed in the destination.</item>
* <item>If GSUniTerminate is set, the function is expected to null terminate
* the output string, and will assume that it is safe to place the nul
* just beyond the ned of the stated buffer size.
* <item>If GSUniTerminate is set, the function is expected to nul terminate
* the output data, and will assume that it is safe to place the nul
* just beyond the end of the stated buffer size.
* Also, if the function grows the buffer, it will allow for an extra
* termination character.</item>
* termination byte.</item>
* <item>If GSUniTemporary is set, the function will return the results in
* an autoreleased buffer rather than in a buffer that the caller must
* release.</item>
@ -1500,8 +1499,8 @@ static inline int chop(unichar c, _ucc_ *table, int hi)
* </list>
* <p>On return, the function result is a flag indicating success (YES)
* or failure (NO), and on success, the value stored in size is the number
* of characters in the converted string. The converted string itsself is
* stored in the location gioven by dst.<br />
* of bytes in the converted data. The converted data itself is
* stored in the location given by dst.<br />
* NB. If the value stored in dst has been changed, it is a pointer to
* allocated memory which the caller is responsible for freeing, and the
* caller is <em>still</em> responsible for freeing the original buffer.
@ -1620,7 +1619,6 @@ GSFromUnicode(unsigned char **dst, unsigned int *size, const unichar *src,
ptr[dpos++] = (u & 0x3f) | 0x80;
}
}
result = YES;
}
break;
@ -1946,6 +1944,7 @@ tables:
NSZoneFree(zone, ptr);
}
ptr = r;
*dst = ptr;
}
else if (zone != 0 && (ptr == buf || bsize > dpos))
{
@ -1970,13 +1969,13 @@ tables:
{
ptr = NSZoneRealloc(zone, ptr, bytes);
}
*dst = ptr;
}
else if (ptr == buf)
{
ptr = NULL;
result = NO;
}
*dst = ptr;
}
else if (ptr != buf && dst != 0 && ptr != *dst)
{

View file

@ -1123,38 +1123,82 @@ static inline void
getCString_u(ivars self, char *buffer, unsigned int maxLength,
NSRange aRange, NSRange *leftoverRange)
{
unsigned int len;
/* The primitive we have for converting from unicode, GSFromUnicode,
can't deal with our leftoverRange case, so we need to use a bit of
complexity instead. */
unsigned int len;
if (maxLength > self->_count)
/* TODO: this is an extremely ugly hack to work around buggy iconvs
that return -1/E2BIG for buffers larger than 0x40000acf */
if (maxLength > 0x40000000)
maxLength = 0x40000000;
/* First, try converting the whole thing. */
len = maxLength;
if (GSFromUnicode((unsigned char **)&buffer, &len,
self->_contents.u + aRange.location, aRange.length,
defEnc, 0, GSUniTerminate | GSUniStrict) == YES)
{
maxLength = self->_count;
}
if (maxLength < aRange.length)
{
len = maxLength;
if (leftoverRange != 0)
{
leftoverRange->location = aRange.location + maxLength;
leftoverRange->length = aRange.length - maxLength;
}
}
else
{
len = aRange.length;
if (leftoverRange != 0)
{
leftoverRange->location = 0;
leftoverRange->length = 0;
}
if (leftoverRange)
leftoverRange->location = leftoverRange->length = 0;
return;
}
if (GSFromUnicode((unsigned char **)&buffer, &len, self->_contents.u, len,
defEnc, 0, GSUniTerminate | GSUniStrict) == NO)
/* The conversion failed. Either the buffer is too small for the whole
range, or there are characters in it we can't convert. Check for
unconvertable characters first. */
len = 0;
if (GSFromUnicode(NULL, &len,
self->_contents.u + aRange.location, aRange.length,
defEnc, 0, GSUniTerminate | GSUniStrict) == NO)
{
[NSException raise: NSCharacterConversionException
format: @"Can't get cString from Unicode string."];
return;
}
buffer[len] = '\0';
/* The string can be converted, but not all of it. Do a binary search
to find the longest subrange that fits in the buffer. */
{
unsigned int lo, hi, mid;
lo = 0;
hi = aRange.length;
while (lo < hi)
{
mid = (lo + hi + 1) / 2; /* round up to get edge case right */
len = maxLength;
if (GSFromUnicode((unsigned char **)&buffer, &len,
self->_contents.u + aRange.location, mid,
defEnc, 0, GSUniTerminate | GSUniStrict) == YES)
{
lo = mid;
}
else
{
hi = mid - 1;
}
}
/* lo==hi characters fit. Do the real conversion. */
len = maxLength;
if (lo == 0)
{
buffer[0] = 0;
}
else if (GSFromUnicode((unsigned char **)&buffer, &len,
self->_contents.u + aRange.location, lo,
defEnc, 0, GSUniTerminate | GSUniStrict) == NO)
{
NSCAssert(NO, @"binary search gave inconsistent results");
}
if (leftoverRange)
{
leftoverRange->location = aRange.location + lo;
leftoverRange->length = NSMaxRange(aRange) - leftoverRange->location;
}
}
}
static inline int