mirror of
https://github.com/gnustep/libs-base.git
synced 2025-04-23 00:41:02 +00:00
Fix issues in -getCString:...
git-svn-id: svn+ssh://svn.gna.org/svn/gnustep/libs/base/trunk@17174 72102866-910b-0410-8b05-ffd578937521
This commit is contained in:
parent
a13e212bbf
commit
04927afede
3 changed files with 105 additions and 51 deletions
11
ChangeLog
11
ChangeLog
|
@ -1,3 +1,14 @@
|
|||
2003-07-09 02:41 Alexander Malmberg <alexander@malmberg.org>
|
||||
|
||||
* Source/GSString.m (getCString_u): Rewrite to correctly handle
|
||||
all encodings, the range argument, and the leftoverRange attribute.
|
||||
|
||||
* Source/Unicode.m (GSToUnicode): Clarify the documentation (and fix
|
||||
a few typos in it).
|
||||
(GSFromUnicode): Clarify the documentation. Signal failure correctly
|
||||
from the utf8 encoder. Make sure dst isn't set to NULL if zone is
|
||||
NULL but *size is 0.
|
||||
|
||||
2003-07-08 Richard Frith-Macdonald <rfm@gnu.org>
|
||||
|
||||
* Source/NSString.m: Avoid using atof() in parsing plists ... we
|
||||
|
|
|
@ -939,7 +939,7 @@ else \
|
|||
}
|
||||
|
||||
/**
|
||||
* Function to convert from 8-bit character data to 16-bit unicode.
|
||||
* Function to convert from 8-bit data to 16-bit unicode characters.
|
||||
* <p>The dst argument is a pointer to a pointer to a buffer in which the
|
||||
* converted string is to be stored. If it is a null pointer, this function
|
||||
* discards converted data, and is used only to determine the length of the
|
||||
|
@ -949,17 +949,18 @@ else \
|
|||
* </p>
|
||||
* <p>The size argument is a pointer to the initial size of the destination
|
||||
* buffer. If the function changes the buffer size, this value will be
|
||||
* altered to the new size. This is measured in characters, not bytes.
|
||||
* altered to the new size. This is measured in 16-bit unicode characters,
|
||||
* not bytes.
|
||||
* </p>
|
||||
* <p>The src argument is a pointer to the 8-bit character string which is
|
||||
* <p>The src argument is a pointer to the byte sequence which is
|
||||
* to be converted to 16-bit unicode.
|
||||
* </p>
|
||||
* <p>The slen argument is the length (bytes) of the 8-bit character string
|
||||
* <p>The slen argument is the length of the byte sequence
|
||||
* which is to be converted to 16-bit unicode.
|
||||
* This is measured in characters, not bytes.
|
||||
* This is measured in bytes.
|
||||
* </p>
|
||||
* <p>The end argument specifies the encoding type of the 8-bit character
|
||||
* string which is to be converted to 16-bit unicode.
|
||||
* <p>The enc argument specifies the encoding type of the 8-bit byte sequence
|
||||
* which is to be converted to 16-bit unicode.
|
||||
* </p>
|
||||
* <p>The zone argument specifies a memory zone in which the function may
|
||||
* allocate a buffer to return data in.
|
||||
|
@ -971,7 +972,7 @@ else \
|
|||
* <list>
|
||||
* <item>If GSUniTerminate is set, the function is expected to null terminate
|
||||
* the output string, and will assume that it is safe to place the nul
|
||||
* just beyond the ned of the stated buffer size.
|
||||
* just beyond the end of the stated buffer size.
|
||||
* Also, if the function grows the buffer, it will allow for an extra
|
||||
* termination character.</item>
|
||||
* <item>If GSUniTemporary is set, the function will return the results in
|
||||
|
@ -1033,8 +1034,6 @@ GSToUnicode(unichar **dst, unsigned int *size, const unsigned char *src,
|
|||
{
|
||||
case NSUTF8StringEncoding:
|
||||
{
|
||||
result = YES;
|
||||
|
||||
while (spos < slen)
|
||||
{
|
||||
unsigned char c = src[spos++];
|
||||
|
@ -1452,27 +1451,27 @@ static inline int chop(unichar c, _ucc_ *table, int hi)
|
|||
}
|
||||
|
||||
/**
|
||||
* Function to convert from 16-bit unicode to 8-bit character data.
|
||||
* Function to convert from 16-bit unicode to 8-bit data.
|
||||
* <p>The dst argument is a pointer to a pointer to a buffer in which the
|
||||
* converted string is to be stored. If it is a null pointer, this function
|
||||
* converted data is to be stored. If it is a null pointer, this function
|
||||
* discards converted data, and is used only to determine the length of the
|
||||
* converted string. If the zone argument is non-nul, the function is free
|
||||
* converted data. If the zone argument is non-nul, the function is free
|
||||
* to allocate a larger buffer if necessary, and store this new buffer in
|
||||
* the dst argument. It will *NOT* deallocate the original buffer!
|
||||
* </p>
|
||||
* <p>The size argument is a pointer to the initial size of the destination
|
||||
* buffer. If the function changes the buffer size, this value will be
|
||||
* altered to the new size. This is measured in characters, not bytes.
|
||||
* altered to the new size. This is measured in bytes.
|
||||
* </p>
|
||||
* <p>The src argument is a pointer to the 16-bit unicode string which is
|
||||
* to be converted to 8-bit data.
|
||||
* </p>
|
||||
* <p>The slen argument is the length (bytes) of the 16-bit unicode string
|
||||
* <p>The slen argument is the length of the 16-bit unicode string
|
||||
* which is to be converted to 8-bit data.
|
||||
* This is measured in characters, not bytes.
|
||||
* This is measured in 16-bit characters, not bytes.
|
||||
* </p>
|
||||
* <p>The end argument specifies the encoding type of the 8-bit character
|
||||
* string which is to be produced from the 16-bit unicode.
|
||||
* <p>The enc argument specifies the encoding type of the 8-bit byte sequence
|
||||
* which is to be produced from the 16-bit unicode.
|
||||
* </p>
|
||||
* <p>The zone argument specifies a memory zone in which the function may
|
||||
* allocate a buffer to return data in.
|
||||
|
@ -1483,13 +1482,13 @@ static inline int chop(unichar c, _ucc_ *table, int hi)
|
|||
* The options argument controls some special behavior.
|
||||
* <list>
|
||||
* <item>If GSUniStrict is set, the function will fail if a character is
|
||||
* encountered which can't be displayed in the source. Otherwise, some
|
||||
* encountered in the source which can't be converted. Otherwise, some
|
||||
* approximation or marker will be placed in the destination.</item>
|
||||
* <item>If GSUniTerminate is set, the function is expected to null terminate
|
||||
* the output string, and will assume that it is safe to place the nul
|
||||
* just beyond the ned of the stated buffer size.
|
||||
* <item>If GSUniTerminate is set, the function is expected to nul terminate
|
||||
* the output data, and will assume that it is safe to place the nul
|
||||
* just beyond the end of the stated buffer size.
|
||||
* Also, if the function grows the buffer, it will allow for an extra
|
||||
* termination character.</item>
|
||||
* termination byte.</item>
|
||||
* <item>If GSUniTemporary is set, the function will return the results in
|
||||
* an autoreleased buffer rather than in a buffer that the caller must
|
||||
* release.</item>
|
||||
|
@ -1500,8 +1499,8 @@ static inline int chop(unichar c, _ucc_ *table, int hi)
|
|||
* </list>
|
||||
* <p>On return, the function result is a flag indicating success (YES)
|
||||
* or failure (NO), and on success, the value stored in size is the number
|
||||
* of characters in the converted string. The converted string itsself is
|
||||
* stored in the location gioven by dst.<br />
|
||||
* of bytes in the converted data. The converted data itself is
|
||||
* stored in the location given by dst.<br />
|
||||
* NB. If the value stored in dst has been changed, it is a pointer to
|
||||
* allocated memory which the caller is responsible for freeing, and the
|
||||
* caller is <em>still</em> responsible for freeing the original buffer.
|
||||
|
@ -1620,7 +1619,6 @@ GSFromUnicode(unsigned char **dst, unsigned int *size, const unichar *src,
|
|||
ptr[dpos++] = (u & 0x3f) | 0x80;
|
||||
}
|
||||
}
|
||||
result = YES;
|
||||
}
|
||||
break;
|
||||
|
||||
|
@ -1946,6 +1944,7 @@ tables:
|
|||
NSZoneFree(zone, ptr);
|
||||
}
|
||||
ptr = r;
|
||||
*dst = ptr;
|
||||
}
|
||||
else if (zone != 0 && (ptr == buf || bsize > dpos))
|
||||
{
|
||||
|
@ -1970,13 +1969,13 @@ tables:
|
|||
{
|
||||
ptr = NSZoneRealloc(zone, ptr, bytes);
|
||||
}
|
||||
*dst = ptr;
|
||||
}
|
||||
else if (ptr == buf)
|
||||
{
|
||||
ptr = NULL;
|
||||
result = NO;
|
||||
}
|
||||
*dst = ptr;
|
||||
}
|
||||
else if (ptr != buf && dst != 0 && ptr != *dst)
|
||||
{
|
||||
|
|
|
@ -1123,38 +1123,82 @@ static inline void
|
|||
getCString_u(ivars self, char *buffer, unsigned int maxLength,
|
||||
NSRange aRange, NSRange *leftoverRange)
|
||||
{
|
||||
unsigned int len;
|
||||
/* The primitive we have for converting from unicode, GSFromUnicode,
|
||||
can't deal with our leftoverRange case, so we need to use a bit of
|
||||
complexity instead. */
|
||||
unsigned int len;
|
||||
|
||||
if (maxLength > self->_count)
|
||||
/* TODO: this is an extremely ugly hack to work around buggy iconvs
|
||||
that return -1/E2BIG for buffers larger than 0x40000acf */
|
||||
if (maxLength > 0x40000000)
|
||||
maxLength = 0x40000000;
|
||||
|
||||
/* First, try converting the whole thing. */
|
||||
len = maxLength;
|
||||
if (GSFromUnicode((unsigned char **)&buffer, &len,
|
||||
self->_contents.u + aRange.location, aRange.length,
|
||||
defEnc, 0, GSUniTerminate | GSUniStrict) == YES)
|
||||
{
|
||||
maxLength = self->_count;
|
||||
}
|
||||
if (maxLength < aRange.length)
|
||||
{
|
||||
len = maxLength;
|
||||
if (leftoverRange != 0)
|
||||
{
|
||||
leftoverRange->location = aRange.location + maxLength;
|
||||
leftoverRange->length = aRange.length - maxLength;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
len = aRange.length;
|
||||
if (leftoverRange != 0)
|
||||
{
|
||||
leftoverRange->location = 0;
|
||||
leftoverRange->length = 0;
|
||||
}
|
||||
if (leftoverRange)
|
||||
leftoverRange->location = leftoverRange->length = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
if (GSFromUnicode((unsigned char **)&buffer, &len, self->_contents.u, len,
|
||||
defEnc, 0, GSUniTerminate | GSUniStrict) == NO)
|
||||
/* The conversion failed. Either the buffer is too small for the whole
|
||||
range, or there are characters in it we can't convert. Check for
|
||||
unconvertable characters first. */
|
||||
len = 0;
|
||||
if (GSFromUnicode(NULL, &len,
|
||||
self->_contents.u + aRange.location, aRange.length,
|
||||
defEnc, 0, GSUniTerminate | GSUniStrict) == NO)
|
||||
{
|
||||
[NSException raise: NSCharacterConversionException
|
||||
format: @"Can't get cString from Unicode string."];
|
||||
return;
|
||||
}
|
||||
buffer[len] = '\0';
|
||||
|
||||
/* The string can be converted, but not all of it. Do a binary search
|
||||
to find the longest subrange that fits in the buffer. */
|
||||
{
|
||||
unsigned int lo, hi, mid;
|
||||
|
||||
lo = 0;
|
||||
hi = aRange.length;
|
||||
while (lo < hi)
|
||||
{
|
||||
mid = (lo + hi + 1) / 2; /* round up to get edge case right */
|
||||
len = maxLength;
|
||||
if (GSFromUnicode((unsigned char **)&buffer, &len,
|
||||
self->_contents.u + aRange.location, mid,
|
||||
defEnc, 0, GSUniTerminate | GSUniStrict) == YES)
|
||||
{
|
||||
lo = mid;
|
||||
}
|
||||
else
|
||||
{
|
||||
hi = mid - 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* lo==hi characters fit. Do the real conversion. */
|
||||
len = maxLength;
|
||||
if (lo == 0)
|
||||
{
|
||||
buffer[0] = 0;
|
||||
}
|
||||
else if (GSFromUnicode((unsigned char **)&buffer, &len,
|
||||
self->_contents.u + aRange.location, lo,
|
||||
defEnc, 0, GSUniTerminate | GSUniStrict) == NO)
|
||||
{
|
||||
NSCAssert(NO, @"binary search gave inconsistent results");
|
||||
}
|
||||
|
||||
if (leftoverRange)
|
||||
{
|
||||
leftoverRange->location = aRange.location + lo;
|
||||
leftoverRange->length = NSMaxRange(aRange) - leftoverRange->location;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static inline int
|
||||
|
|
Loading…
Reference in a new issue