Fix issues in -getCString:...

git-svn-id: svn+ssh://svn.gna.org/svn/gnustep/libs/base/trunk@17174 72102866-910b-0410-8b05-ffd578937521
This commit is contained in:
alexm 2003-07-09 02:13:58 +00:00
parent 0383992b80
commit ea152c1c5e
3 changed files with 105 additions and 51 deletions

View file

@ -1,3 +1,14 @@
2003-07-09 02:41 Alexander Malmberg <alexander@malmberg.org>
* Source/GSString.m (getCString_u): Rewrite to correctly handle
all encodings, the range argument, and the leftoverRange attribute.
* Source/Unicode.m (GSToUnicode): Clarify the documentation (and fix
a few typos in it).
(GSFromUnicode): Clarify the documentation. Signal failure correctly
from the utf8 encoder. Make sure dst isn't set to NULL if zone is
NULL but *size is 0.
2003-07-08 Richard Frith-Macdonald <rfm@gnu.org> 2003-07-08 Richard Frith-Macdonald <rfm@gnu.org>
* Source/NSString.m: Avoid using atof() in parsing plists ... we * Source/NSString.m: Avoid using atof() in parsing plists ... we

View file

@ -939,7 +939,7 @@ else \
} }
/** /**
* Function to convert from 8-bit character data to 16-bit unicode. * Function to convert from 8-bit data to 16-bit unicode characters.
* <p>The dst argument is a pointer to a pointer to a buffer in which the * <p>The dst argument is a pointer to a pointer to a buffer in which the
* converted string is to be stored. If it is a null pointer, this function * converted string is to be stored. If it is a null pointer, this function
* discards converted data, and is used only to determine the length of the * discards converted data, and is used only to determine the length of the
@ -949,17 +949,18 @@ else \
* </p> * </p>
* <p>The size argument is a pointer to the initial size of the destination * <p>The size argument is a pointer to the initial size of the destination
* buffer. If the function changes the buffer size, this value will be * buffer. If the function changes the buffer size, this value will be
* altered to the new size. This is measured in characters, not bytes. * altered to the new size. This is measured in 16-bit unicode characters,
* not bytes.
* </p> * </p>
* <p>The src argument is a pointer to the 8-bit character string which is * <p>The src argument is a pointer to the byte sequence which is
* to be converted to 16-bit unicode. * to be converted to 16-bit unicode.
* </p> * </p>
* <p>The slen argument is the length (bytes) of the 8-bit character string * <p>The slen argument is the length of the byte sequence
* which is to be converted to 16-bit unicode. * which is to be converted to 16-bit unicode.
* This is measured in characters, not bytes. * This is measured in bytes.
* </p> * </p>
* <p>The end argument specifies the encoding type of the 8-bit character * <p>The enc argument specifies the encoding type of the 8-bit byte sequence
* string which is to be converted to 16-bit unicode. * which is to be converted to 16-bit unicode.
* </p> * </p>
* <p>The zone argument specifies a memory zone in which the function may * <p>The zone argument specifies a memory zone in which the function may
* allocate a buffer to return data in. * allocate a buffer to return data in.
@ -971,7 +972,7 @@ else \
* <list> * <list>
* <item>If GSUniTerminate is set, the function is expected to null terminate * <item>If GSUniTerminate is set, the function is expected to null terminate
* the output string, and will assume that it is safe to place the nul * the output string, and will assume that it is safe to place the nul
* just beyond the ned of the stated buffer size. * just beyond the end of the stated buffer size.
* Also, if the function grows the buffer, it will allow for an extra * Also, if the function grows the buffer, it will allow for an extra
* termination character.</item> * termination character.</item>
* <item>If GSUniTemporary is set, the function will return the results in * <item>If GSUniTemporary is set, the function will return the results in
@ -1033,8 +1034,6 @@ GSToUnicode(unichar **dst, unsigned int *size, const unsigned char *src,
{ {
case NSUTF8StringEncoding: case NSUTF8StringEncoding:
{ {
result = YES;
while (spos < slen) while (spos < slen)
{ {
unsigned char c = src[spos++]; unsigned char c = src[spos++];
@ -1452,27 +1451,27 @@ static inline int chop(unichar c, _ucc_ *table, int hi)
} }
/** /**
* Function to convert from 16-bit unicode to 8-bit character data. * Function to convert from 16-bit unicode to 8-bit data.
* <p>The dst argument is a pointer to a pointer to a buffer in which the * <p>The dst argument is a pointer to a pointer to a buffer in which the
* converted string is to be stored. If it is a null pointer, this function * converted data is to be stored. If it is a null pointer, this function
* discards converted data, and is used only to determine the length of the * discards converted data, and is used only to determine the length of the
* converted string. If the zone argument is non-nul, the function is free * converted data. If the zone argument is non-nul, the function is free
* to allocate a larger buffer if necessary, and store this new buffer in * to allocate a larger buffer if necessary, and store this new buffer in
* the dst argument. It will *NOT* deallocate the original buffer! * the dst argument. It will *NOT* deallocate the original buffer!
* </p> * </p>
* <p>The size argument is a pointer to the initial size of the destination * <p>The size argument is a pointer to the initial size of the destination
* buffer. If the function changes the buffer size, this value will be * buffer. If the function changes the buffer size, this value will be
* altered to the new size. This is measured in characters, not bytes. * altered to the new size. This is measured in bytes.
* </p> * </p>
* <p>The src argument is a pointer to the 16-bit unicode string which is * <p>The src argument is a pointer to the 16-bit unicode string which is
* to be converted to 8-bit data. * to be converted to 8-bit data.
* </p> * </p>
* <p>The slen argument is the length (bytes) of the 16-bit unicode string * <p>The slen argument is the length of the 16-bit unicode string
* which is to be converted to 8-bit data. * which is to be converted to 8-bit data.
* This is measured in characters, not bytes. * This is measured in 16-bit characters, not bytes.
* </p> * </p>
* <p>The end argument specifies the encoding type of the 8-bit character * <p>The enc argument specifies the encoding type of the 8-bit byte sequence
* string which is to be produced from the 16-bit unicode. * which is to be produced from the 16-bit unicode.
* </p> * </p>
* <p>The zone argument specifies a memory zone in which the function may * <p>The zone argument specifies a memory zone in which the function may
* allocate a buffer to return data in. * allocate a buffer to return data in.
@ -1483,13 +1482,13 @@ static inline int chop(unichar c, _ucc_ *table, int hi)
* The options argument controls some special behavior. * The options argument controls some special behavior.
* <list> * <list>
* <item>If GSUniStrict is set, the function will fail if a character is * <item>If GSUniStrict is set, the function will fail if a character is
* encountered which can't be displayed in the source. Otherwise, some * encountered in the source which can't be converted. Otherwise, some
* approximation or marker will be placed in the destination.</item> * approximation or marker will be placed in the destination.</item>
* <item>If GSUniTerminate is set, the function is expected to null terminate * <item>If GSUniTerminate is set, the function is expected to nul terminate
* the output string, and will assume that it is safe to place the nul * the output data, and will assume that it is safe to place the nul
* just beyond the ned of the stated buffer size. * just beyond the end of the stated buffer size.
* Also, if the function grows the buffer, it will allow for an extra * Also, if the function grows the buffer, it will allow for an extra
* termination character.</item> * termination byte.</item>
* <item>If GSUniTemporary is set, the function will return the results in * <item>If GSUniTemporary is set, the function will return the results in
* an autoreleased buffer rather than in a buffer that the caller must * an autoreleased buffer rather than in a buffer that the caller must
* release.</item> * release.</item>
@ -1500,8 +1499,8 @@ static inline int chop(unichar c, _ucc_ *table, int hi)
* </list> * </list>
* <p>On return, the function result is a flag indicating success (YES) * <p>On return, the function result is a flag indicating success (YES)
* or failure (NO), and on success, the value stored in size is the number * or failure (NO), and on success, the value stored in size is the number
* of characters in the converted string. The converted string itsself is * of bytes in the converted data. The converted data itself is
* stored in the location gioven by dst.<br /> * stored in the location given by dst.<br />
* NB. If the value stored in dst has been changed, it is a pointer to * NB. If the value stored in dst has been changed, it is a pointer to
* allocated memory which the caller is responsible for freeing, and the * allocated memory which the caller is responsible for freeing, and the
* caller is <em>still</em> responsible for freeing the original buffer. * caller is <em>still</em> responsible for freeing the original buffer.
@ -1620,7 +1619,6 @@ GSFromUnicode(unsigned char **dst, unsigned int *size, const unichar *src,
ptr[dpos++] = (u & 0x3f) | 0x80; ptr[dpos++] = (u & 0x3f) | 0x80;
} }
} }
result = YES;
} }
break; break;
@ -1946,6 +1944,7 @@ tables:
NSZoneFree(zone, ptr); NSZoneFree(zone, ptr);
} }
ptr = r; ptr = r;
*dst = ptr;
} }
else if (zone != 0 && (ptr == buf || bsize > dpos)) else if (zone != 0 && (ptr == buf || bsize > dpos))
{ {
@ -1970,13 +1969,13 @@ tables:
{ {
ptr = NSZoneRealloc(zone, ptr, bytes); ptr = NSZoneRealloc(zone, ptr, bytes);
} }
*dst = ptr;
} }
else if (ptr == buf) else if (ptr == buf)
{ {
ptr = NULL; ptr = NULL;
result = NO; result = NO;
} }
*dst = ptr;
} }
else if (ptr != buf && dst != 0 && ptr != *dst) else if (ptr != buf && dst != 0 && ptr != *dst)
{ {

View file

@ -1123,38 +1123,82 @@ static inline void
getCString_u(ivars self, char *buffer, unsigned int maxLength, getCString_u(ivars self, char *buffer, unsigned int maxLength,
NSRange aRange, NSRange *leftoverRange) NSRange aRange, NSRange *leftoverRange)
{ {
unsigned int len; /* The primitive we have for converting from unicode, GSFromUnicode,
can't deal with our leftoverRange case, so we need to use a bit of
complexity instead. */
unsigned int len;
if (maxLength > self->_count) /* TODO: this is an extremely ugly hack to work around buggy iconvs
that return -1/E2BIG for buffers larger than 0x40000acf */
if (maxLength > 0x40000000)
maxLength = 0x40000000;
/* First, try converting the whole thing. */
len = maxLength;
if (GSFromUnicode((unsigned char **)&buffer, &len,
self->_contents.u + aRange.location, aRange.length,
defEnc, 0, GSUniTerminate | GSUniStrict) == YES)
{ {
maxLength = self->_count; if (leftoverRange)
} leftoverRange->location = leftoverRange->length = 0;
if (maxLength < aRange.length) return;
{
len = maxLength;
if (leftoverRange != 0)
{
leftoverRange->location = aRange.location + maxLength;
leftoverRange->length = aRange.length - maxLength;
}
}
else
{
len = aRange.length;
if (leftoverRange != 0)
{
leftoverRange->location = 0;
leftoverRange->length = 0;
}
} }
if (GSFromUnicode((unsigned char **)&buffer, &len, self->_contents.u, len, /* The conversion failed. Either the buffer is too small for the whole
defEnc, 0, GSUniTerminate | GSUniStrict) == NO) range, or there are characters in it we can't convert. Check for
unconvertable characters first. */
len = 0;
if (GSFromUnicode(NULL, &len,
self->_contents.u + aRange.location, aRange.length,
defEnc, 0, GSUniTerminate | GSUniStrict) == NO)
{ {
[NSException raise: NSCharacterConversionException [NSException raise: NSCharacterConversionException
format: @"Can't get cString from Unicode string."]; format: @"Can't get cString from Unicode string."];
return;
} }
buffer[len] = '\0';
/* The string can be converted, but not all of it. Do a binary search
to find the longest subrange that fits in the buffer. */
{
unsigned int lo, hi, mid;
lo = 0;
hi = aRange.length;
while (lo < hi)
{
mid = (lo + hi + 1) / 2; /* round up to get edge case right */
len = maxLength;
if (GSFromUnicode((unsigned char **)&buffer, &len,
self->_contents.u + aRange.location, mid,
defEnc, 0, GSUniTerminate | GSUniStrict) == YES)
{
lo = mid;
}
else
{
hi = mid - 1;
}
}
/* lo==hi characters fit. Do the real conversion. */
len = maxLength;
if (lo == 0)
{
buffer[0] = 0;
}
else if (GSFromUnicode((unsigned char **)&buffer, &len,
self->_contents.u + aRange.location, lo,
defEnc, 0, GSUniTerminate | GSUniStrict) == NO)
{
NSCAssert(NO, @"binary search gave inconsistent results");
}
if (leftoverRange)
{
leftoverRange->location = aRange.location + lo;
leftoverRange->length = NSMaxRange(aRange) - leftoverRange->location;
}
}
} }
static inline int static inline int