mirror of
https://github.com/gnustep/libs-base.git
synced 2025-05-31 16:50:58 +00:00
Fix issues in -getCString:...
git-svn-id: svn+ssh://svn.gna.org/svn/gnustep/libs/base/trunk@17174 72102866-910b-0410-8b05-ffd578937521
This commit is contained in:
parent
0383992b80
commit
ea152c1c5e
3 changed files with 105 additions and 51 deletions
11
ChangeLog
11
ChangeLog
|
@ -1,3 +1,14 @@
|
||||||
|
2003-07-09 02:41 Alexander Malmberg <alexander@malmberg.org>
|
||||||
|
|
||||||
|
* Source/GSString.m (getCString_u): Rewrite to correctly handle
|
||||||
|
all encodings, the range argument, and the leftoverRange attribute.
|
||||||
|
|
||||||
|
* Source/Unicode.m (GSToUnicode): Clarify the documentation (and fix
|
||||||
|
a few typos in it).
|
||||||
|
(GSFromUnicode): Clarify the documentation. Signal failure correctly
|
||||||
|
from the utf8 encoder. Make sure dst isn't set to NULL if zone is
|
||||||
|
NULL but *size is 0.
|
||||||
|
|
||||||
2003-07-08 Richard Frith-Macdonald <rfm@gnu.org>
|
2003-07-08 Richard Frith-Macdonald <rfm@gnu.org>
|
||||||
|
|
||||||
* Source/NSString.m: Avoid using atof() in parsing plists ... we
|
* Source/NSString.m: Avoid using atof() in parsing plists ... we
|
||||||
|
|
|
@ -939,7 +939,7 @@ else \
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Function to convert from 8-bit character data to 16-bit unicode.
|
* Function to convert from 8-bit data to 16-bit unicode characters.
|
||||||
* <p>The dst argument is a pointer to a pointer to a buffer in which the
|
* <p>The dst argument is a pointer to a pointer to a buffer in which the
|
||||||
* converted string is to be stored. If it is a null pointer, this function
|
* converted string is to be stored. If it is a null pointer, this function
|
||||||
* discards converted data, and is used only to determine the length of the
|
* discards converted data, and is used only to determine the length of the
|
||||||
|
@ -949,17 +949,18 @@ else \
|
||||||
* </p>
|
* </p>
|
||||||
* <p>The size argument is a pointer to the initial size of the destination
|
* <p>The size argument is a pointer to the initial size of the destination
|
||||||
* buffer. If the function changes the buffer size, this value will be
|
* buffer. If the function changes the buffer size, this value will be
|
||||||
* altered to the new size. This is measured in characters, not bytes.
|
* altered to the new size. This is measured in 16-bit unicode characters,
|
||||||
|
* not bytes.
|
||||||
* </p>
|
* </p>
|
||||||
* <p>The src argument is a pointer to the 8-bit character string which is
|
* <p>The src argument is a pointer to the byte sequence which is
|
||||||
* to be converted to 16-bit unicode.
|
* to be converted to 16-bit unicode.
|
||||||
* </p>
|
* </p>
|
||||||
* <p>The slen argument is the length (bytes) of the 8-bit character string
|
* <p>The slen argument is the length of the byte sequence
|
||||||
* which is to be converted to 16-bit unicode.
|
* which is to be converted to 16-bit unicode.
|
||||||
* This is measured in characters, not bytes.
|
* This is measured in bytes.
|
||||||
* </p>
|
* </p>
|
||||||
* <p>The end argument specifies the encoding type of the 8-bit character
|
* <p>The enc argument specifies the encoding type of the 8-bit byte sequence
|
||||||
* string which is to be converted to 16-bit unicode.
|
* which is to be converted to 16-bit unicode.
|
||||||
* </p>
|
* </p>
|
||||||
* <p>The zone argument specifies a memory zone in which the function may
|
* <p>The zone argument specifies a memory zone in which the function may
|
||||||
* allocate a buffer to return data in.
|
* allocate a buffer to return data in.
|
||||||
|
@ -971,7 +972,7 @@ else \
|
||||||
* <list>
|
* <list>
|
||||||
* <item>If GSUniTerminate is set, the function is expected to null terminate
|
* <item>If GSUniTerminate is set, the function is expected to null terminate
|
||||||
* the output string, and will assume that it is safe to place the nul
|
* the output string, and will assume that it is safe to place the nul
|
||||||
* just beyond the ned of the stated buffer size.
|
* just beyond the end of the stated buffer size.
|
||||||
* Also, if the function grows the buffer, it will allow for an extra
|
* Also, if the function grows the buffer, it will allow for an extra
|
||||||
* termination character.</item>
|
* termination character.</item>
|
||||||
* <item>If GSUniTemporary is set, the function will return the results in
|
* <item>If GSUniTemporary is set, the function will return the results in
|
||||||
|
@ -1033,8 +1034,6 @@ GSToUnicode(unichar **dst, unsigned int *size, const unsigned char *src,
|
||||||
{
|
{
|
||||||
case NSUTF8StringEncoding:
|
case NSUTF8StringEncoding:
|
||||||
{
|
{
|
||||||
result = YES;
|
|
||||||
|
|
||||||
while (spos < slen)
|
while (spos < slen)
|
||||||
{
|
{
|
||||||
unsigned char c = src[spos++];
|
unsigned char c = src[spos++];
|
||||||
|
@ -1452,27 +1451,27 @@ static inline int chop(unichar c, _ucc_ *table, int hi)
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Function to convert from 16-bit unicode to 8-bit character data.
|
* Function to convert from 16-bit unicode to 8-bit data.
|
||||||
* <p>The dst argument is a pointer to a pointer to a buffer in which the
|
* <p>The dst argument is a pointer to a pointer to a buffer in which the
|
||||||
* converted string is to be stored. If it is a null pointer, this function
|
* converted data is to be stored. If it is a null pointer, this function
|
||||||
* discards converted data, and is used only to determine the length of the
|
* discards converted data, and is used only to determine the length of the
|
||||||
* converted string. If the zone argument is non-nul, the function is free
|
* converted data. If the zone argument is non-nul, the function is free
|
||||||
* to allocate a larger buffer if necessary, and store this new buffer in
|
* to allocate a larger buffer if necessary, and store this new buffer in
|
||||||
* the dst argument. It will *NOT* deallocate the original buffer!
|
* the dst argument. It will *NOT* deallocate the original buffer!
|
||||||
* </p>
|
* </p>
|
||||||
* <p>The size argument is a pointer to the initial size of the destination
|
* <p>The size argument is a pointer to the initial size of the destination
|
||||||
* buffer. If the function changes the buffer size, this value will be
|
* buffer. If the function changes the buffer size, this value will be
|
||||||
* altered to the new size. This is measured in characters, not bytes.
|
* altered to the new size. This is measured in bytes.
|
||||||
* </p>
|
* </p>
|
||||||
* <p>The src argument is a pointer to the 16-bit unicode string which is
|
* <p>The src argument is a pointer to the 16-bit unicode string which is
|
||||||
* to be converted to 8-bit data.
|
* to be converted to 8-bit data.
|
||||||
* </p>
|
* </p>
|
||||||
* <p>The slen argument is the length (bytes) of the 16-bit unicode string
|
* <p>The slen argument is the length of the 16-bit unicode string
|
||||||
* which is to be converted to 8-bit data.
|
* which is to be converted to 8-bit data.
|
||||||
* This is measured in characters, not bytes.
|
* This is measured in 16-bit characters, not bytes.
|
||||||
* </p>
|
* </p>
|
||||||
* <p>The end argument specifies the encoding type of the 8-bit character
|
* <p>The enc argument specifies the encoding type of the 8-bit byte sequence
|
||||||
* string which is to be produced from the 16-bit unicode.
|
* which is to be produced from the 16-bit unicode.
|
||||||
* </p>
|
* </p>
|
||||||
* <p>The zone argument specifies a memory zone in which the function may
|
* <p>The zone argument specifies a memory zone in which the function may
|
||||||
* allocate a buffer to return data in.
|
* allocate a buffer to return data in.
|
||||||
|
@ -1483,13 +1482,13 @@ static inline int chop(unichar c, _ucc_ *table, int hi)
|
||||||
* The options argument controls some special behavior.
|
* The options argument controls some special behavior.
|
||||||
* <list>
|
* <list>
|
||||||
* <item>If GSUniStrict is set, the function will fail if a character is
|
* <item>If GSUniStrict is set, the function will fail if a character is
|
||||||
* encountered which can't be displayed in the source. Otherwise, some
|
* encountered in the source which can't be converted. Otherwise, some
|
||||||
* approximation or marker will be placed in the destination.</item>
|
* approximation or marker will be placed in the destination.</item>
|
||||||
* <item>If GSUniTerminate is set, the function is expected to null terminate
|
* <item>If GSUniTerminate is set, the function is expected to nul terminate
|
||||||
* the output string, and will assume that it is safe to place the nul
|
* the output data, and will assume that it is safe to place the nul
|
||||||
* just beyond the ned of the stated buffer size.
|
* just beyond the end of the stated buffer size.
|
||||||
* Also, if the function grows the buffer, it will allow for an extra
|
* Also, if the function grows the buffer, it will allow for an extra
|
||||||
* termination character.</item>
|
* termination byte.</item>
|
||||||
* <item>If GSUniTemporary is set, the function will return the results in
|
* <item>If GSUniTemporary is set, the function will return the results in
|
||||||
* an autoreleased buffer rather than in a buffer that the caller must
|
* an autoreleased buffer rather than in a buffer that the caller must
|
||||||
* release.</item>
|
* release.</item>
|
||||||
|
@ -1500,8 +1499,8 @@ static inline int chop(unichar c, _ucc_ *table, int hi)
|
||||||
* </list>
|
* </list>
|
||||||
* <p>On return, the function result is a flag indicating success (YES)
|
* <p>On return, the function result is a flag indicating success (YES)
|
||||||
* or failure (NO), and on success, the value stored in size is the number
|
* or failure (NO), and on success, the value stored in size is the number
|
||||||
* of characters in the converted string. The converted string itsself is
|
* of bytes in the converted data. The converted data itself is
|
||||||
* stored in the location gioven by dst.<br />
|
* stored in the location given by dst.<br />
|
||||||
* NB. If the value stored in dst has been changed, it is a pointer to
|
* NB. If the value stored in dst has been changed, it is a pointer to
|
||||||
* allocated memory which the caller is responsible for freeing, and the
|
* allocated memory which the caller is responsible for freeing, and the
|
||||||
* caller is <em>still</em> responsible for freeing the original buffer.
|
* caller is <em>still</em> responsible for freeing the original buffer.
|
||||||
|
@ -1620,7 +1619,6 @@ GSFromUnicode(unsigned char **dst, unsigned int *size, const unichar *src,
|
||||||
ptr[dpos++] = (u & 0x3f) | 0x80;
|
ptr[dpos++] = (u & 0x3f) | 0x80;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
result = YES;
|
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -1946,6 +1944,7 @@ tables:
|
||||||
NSZoneFree(zone, ptr);
|
NSZoneFree(zone, ptr);
|
||||||
}
|
}
|
||||||
ptr = r;
|
ptr = r;
|
||||||
|
*dst = ptr;
|
||||||
}
|
}
|
||||||
else if (zone != 0 && (ptr == buf || bsize > dpos))
|
else if (zone != 0 && (ptr == buf || bsize > dpos))
|
||||||
{
|
{
|
||||||
|
@ -1970,13 +1969,13 @@ tables:
|
||||||
{
|
{
|
||||||
ptr = NSZoneRealloc(zone, ptr, bytes);
|
ptr = NSZoneRealloc(zone, ptr, bytes);
|
||||||
}
|
}
|
||||||
|
*dst = ptr;
|
||||||
}
|
}
|
||||||
else if (ptr == buf)
|
else if (ptr == buf)
|
||||||
{
|
{
|
||||||
ptr = NULL;
|
ptr = NULL;
|
||||||
result = NO;
|
result = NO;
|
||||||
}
|
}
|
||||||
*dst = ptr;
|
|
||||||
}
|
}
|
||||||
else if (ptr != buf && dst != 0 && ptr != *dst)
|
else if (ptr != buf && dst != 0 && ptr != *dst)
|
||||||
{
|
{
|
||||||
|
|
|
@ -1123,38 +1123,82 @@ static inline void
|
||||||
getCString_u(ivars self, char *buffer, unsigned int maxLength,
|
getCString_u(ivars self, char *buffer, unsigned int maxLength,
|
||||||
NSRange aRange, NSRange *leftoverRange)
|
NSRange aRange, NSRange *leftoverRange)
|
||||||
{
|
{
|
||||||
unsigned int len;
|
/* The primitive we have for converting from unicode, GSFromUnicode,
|
||||||
|
can't deal with our leftoverRange case, so we need to use a bit of
|
||||||
|
complexity instead. */
|
||||||
|
unsigned int len;
|
||||||
|
|
||||||
if (maxLength > self->_count)
|
/* TODO: this is an extremely ugly hack to work around buggy iconvs
|
||||||
|
that return -1/E2BIG for buffers larger than 0x40000acf */
|
||||||
|
if (maxLength > 0x40000000)
|
||||||
|
maxLength = 0x40000000;
|
||||||
|
|
||||||
|
/* First, try converting the whole thing. */
|
||||||
|
len = maxLength;
|
||||||
|
if (GSFromUnicode((unsigned char **)&buffer, &len,
|
||||||
|
self->_contents.u + aRange.location, aRange.length,
|
||||||
|
defEnc, 0, GSUniTerminate | GSUniStrict) == YES)
|
||||||
{
|
{
|
||||||
maxLength = self->_count;
|
if (leftoverRange)
|
||||||
}
|
leftoverRange->location = leftoverRange->length = 0;
|
||||||
if (maxLength < aRange.length)
|
return;
|
||||||
{
|
|
||||||
len = maxLength;
|
|
||||||
if (leftoverRange != 0)
|
|
||||||
{
|
|
||||||
leftoverRange->location = aRange.location + maxLength;
|
|
||||||
leftoverRange->length = aRange.length - maxLength;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
len = aRange.length;
|
|
||||||
if (leftoverRange != 0)
|
|
||||||
{
|
|
||||||
leftoverRange->location = 0;
|
|
||||||
leftoverRange->length = 0;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (GSFromUnicode((unsigned char **)&buffer, &len, self->_contents.u, len,
|
/* The conversion failed. Either the buffer is too small for the whole
|
||||||
defEnc, 0, GSUniTerminate | GSUniStrict) == NO)
|
range, or there are characters in it we can't convert. Check for
|
||||||
|
unconvertable characters first. */
|
||||||
|
len = 0;
|
||||||
|
if (GSFromUnicode(NULL, &len,
|
||||||
|
self->_contents.u + aRange.location, aRange.length,
|
||||||
|
defEnc, 0, GSUniTerminate | GSUniStrict) == NO)
|
||||||
{
|
{
|
||||||
[NSException raise: NSCharacterConversionException
|
[NSException raise: NSCharacterConversionException
|
||||||
format: @"Can't get cString from Unicode string."];
|
format: @"Can't get cString from Unicode string."];
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
buffer[len] = '\0';
|
|
||||||
|
/* The string can be converted, but not all of it. Do a binary search
|
||||||
|
to find the longest subrange that fits in the buffer. */
|
||||||
|
{
|
||||||
|
unsigned int lo, hi, mid;
|
||||||
|
|
||||||
|
lo = 0;
|
||||||
|
hi = aRange.length;
|
||||||
|
while (lo < hi)
|
||||||
|
{
|
||||||
|
mid = (lo + hi + 1) / 2; /* round up to get edge case right */
|
||||||
|
len = maxLength;
|
||||||
|
if (GSFromUnicode((unsigned char **)&buffer, &len,
|
||||||
|
self->_contents.u + aRange.location, mid,
|
||||||
|
defEnc, 0, GSUniTerminate | GSUniStrict) == YES)
|
||||||
|
{
|
||||||
|
lo = mid;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
hi = mid - 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* lo==hi characters fit. Do the real conversion. */
|
||||||
|
len = maxLength;
|
||||||
|
if (lo == 0)
|
||||||
|
{
|
||||||
|
buffer[0] = 0;
|
||||||
|
}
|
||||||
|
else if (GSFromUnicode((unsigned char **)&buffer, &len,
|
||||||
|
self->_contents.u + aRange.location, lo,
|
||||||
|
defEnc, 0, GSUniTerminate | GSUniStrict) == NO)
|
||||||
|
{
|
||||||
|
NSCAssert(NO, @"binary search gave inconsistent results");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (leftoverRange)
|
||||||
|
{
|
||||||
|
leftoverRange->location = aRange.location + lo;
|
||||||
|
leftoverRange->length = NSMaxRange(aRange) - leftoverRange->location;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int
|
static inline int
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue