Fix issues in -getCString:...

git-svn-id: svn+ssh://svn.gna.org/svn/gnustep/libs/base/trunk@17174 72102866-910b-0410-8b05-ffd578937521
2025-05-31 16:50:58 +00:00 · 2003-07-09 02:13:58 +00:00 · 2003-07-09 02:13:58 +00:00 · ea152c1c5e
commit ea152c1c5e
parent 0383992b80
3 changed files with 105 additions and 51 deletions
--- a/11
+++ b/11
@ -1,3 +1,14 @@
 2003-07-09 02:41  Alexander Malmberg <alexander@malmberg.org>
 	* Source/GSString.m (getCString_u): Rewrite to correctly handle
 	all encodings, the range argument, and the leftoverRange attribute.
 	* Source/Unicode.m (GSToUnicode): Clarify the documentation (and fix
 	a few typos in it).
 	(GSFromUnicode): Clarify the documentation. Signal failure correctly
 	from the utf8 encoder. Make sure dst isn't set to NULL if zone is
 	NULL but *size is 0.
 2003-07-08  Richard Frith-Macdonald <rfm@gnu.org>
 	* Source/NSString.m: Avoid using atof() in parsing plists ... we
--- a/Source/Additions/Unicode.m
+++ b/Source/Additions/Unicode.m
@ -939,7 +939,7 @@ else \
  }
 /**
- * Function to convert from 8-bit character data to 16-bit unicode.
+ * Function to convert from 8-bit data to 16-bit unicode characters.
 * <p>The dst argument is a pointer to a pointer to a buffer in which the
 * converted string is to be stored.  If it is a null pointer, this function
 * discards converted data, and is used only to determine the length of the
@ -949,17 +949,18 @@ else \
 * </p>
 * <p>The size argument is a pointer to the initial size of the destination
 * buffer.  If the function changes the buffer size, this value will be
- * altered to the new size.  This is measured in characters, not bytes.
+ * altered to the new size.  This is measured in 16-bit unicode characters,
 * not bytes.
 * </p>
- * <p>The src argument is a pointer to the 8-bit character string which is
+ * <p>The src argument is a pointer to the byte sequence which is
 * to be converted to 16-bit unicode.
 * </p>
- * <p>The slen argument is the length (bytes) of the 8-bit character string
+ * <p>The slen argument is the length of the byte sequence
 * which is to be converted to 16-bit unicode.
- * This is measured in characters, not bytes.
+ * This is measured in bytes.
 * </p>
- * <p>The end argument specifies the encoding type of the 8-bit character
+ * <p>The enc argument specifies the encoding type of the 8-bit byte sequence
- * string which is to be converted to 16-bit unicode.
+ * which is to be converted to 16-bit unicode.
 * </p>
 * <p>The zone argument specifies a memory zone in which the function may
 * allocate a buffer to return data in.
@ -971,7 +972,7 @@ else \
 * <list>
 * <item>If GSUniTerminate is set, the function is expected to null terminate
 * the output string, and will assume that it is safe to place the nul
- * just beyond the ned of the stated buffer size.
+ * just beyond the end of the stated buffer size.
 * Also, if the function grows the buffer, it will allow for an extra
 * termination character.</item>
 * <item>If GSUniTemporary is set, the function will return the results in
@ -1033,8 +1034,6 @@ GSToUnicode(unichar **dst, unsigned int *size, const unsigned char *src,
    {
      case NSUTF8StringEncoding:
 	{
 	  result = YES;
 	  while (spos < slen)
 	    {
 	      unsigned char	c = src[spos++];
@ -1452,27 +1451,27 @@ static inline int chop(unichar c, _ucc_ *table, int hi)
 }
 /**
- * Function to convert from 16-bit unicode to 8-bit character data.
+ * Function to convert from 16-bit unicode to 8-bit data.
 * <p>The dst argument is a pointer to a pointer to a buffer in which the
- * converted string is to be stored.  If it is a null pointer, this function
+ * converted data is to be stored.  If it is a null pointer, this function
 * discards converted data, and is used only to determine the length of the
- * converted string.  If the zone argument is non-nul, the function is free
+ * converted data.  If the zone argument is non-nul, the function is free
 * to allocate a larger buffer if necessary, and store this new buffer in
 * the dst argument.  It will *NOT* deallocate the original buffer!
 * </p>
 * <p>The size argument is a pointer to the initial size of the destination
 * buffer.  If the function changes the buffer size, this value will be
- * altered to the new size.  This is measured in characters, not bytes.
+ * altered to the new size.  This is measured in bytes.
 * </p>
 * <p>The src argument is a pointer to the 16-bit unicode string which is
 * to be converted to 8-bit data.
 * </p>
- * <p>The slen argument is the length (bytes) of the 16-bit unicode string
+ * <p>The slen argument is the length of the 16-bit unicode string
 * which is to be converted to 8-bit data.
- * This is measured in characters, not bytes.
+ * This is measured in 16-bit characters, not bytes.
 * </p>
- * <p>The end argument specifies the encoding type of the 8-bit character
+ * <p>The enc argument specifies the encoding type of the 8-bit byte sequence
- * string which is to be produced from the 16-bit unicode.
+ * which is to be produced from the 16-bit unicode.
 * </p>
 * <p>The zone argument specifies a memory zone in which the function may
 * allocate a buffer to return data in.
@ -1483,13 +1482,13 @@ static inline int chop(unichar c, _ucc_ *table, int hi)
 * The options argument controls some special behavior.
 * <list>
 * <item>If GSUniStrict is set, the function will fail if a character is
- * encountered which can't be displayed in the source.  Otherwise, some
+ * encountered in the source which can't be converted.  Otherwise, some
 * approximation or marker will be placed in the destination.</item>
- * <item>If GSUniTerminate is set, the function is expected to null terminate
+ * <item>If GSUniTerminate is set, the function is expected to nul terminate
- * the output string, and will assume that it is safe to place the nul
+ * the output data, and will assume that it is safe to place the nul
- * just beyond the ned of the stated buffer size.
+ * just beyond the end of the stated buffer size.
 * Also, if the function grows the buffer, it will allow for an extra
- * termination character.</item>
+ * termination byte.</item>
 * <item>If GSUniTemporary is set, the function will return the results in
 * an autoreleased buffer rather than in a buffer that the caller must
 * release.</item>
@ -1500,8 +1499,8 @@ static inline int chop(unichar c, _ucc_ *table, int hi)
 * </list>
 * <p>On return, the function result is a flag indicating success (YES)
 * or failure (NO), and on success, the value stored in size is the number
- * of characters in the converted string.  The converted string itsself is
+ * of bytes in the converted data.  The converted data itself is
- * stored in the location gioven by dst.<br />
+ * stored in the location given by dst.<br />
 * NB. If the value stored in dst has been changed, it is a pointer to
 * allocated memory which the caller is responsible for freeing, and the
 * caller is <em>still</em> responsible for freeing the original buffer.
@ -1620,7 +1619,6 @@ GSFromUnicode(unsigned char **dst, unsigned int *size, const unichar *src,
 		  ptr[dpos++] = (u & 0x3f) | 0x80;
 	        }
 	    }
 	  result = YES;
        }
        break;
@ -1946,6 +1944,7 @@ tables:
 	      NSZoneFree(zone, ptr);
 	    }
 	  ptr = r;
 	  *dst = ptr;
 	}
      else if (zone != 0 && (ptr == buf || bsize > dpos))
 	{
@ -1970,13 +1969,13 @@ tables:
 	    {
 	      ptr = NSZoneRealloc(zone, ptr, bytes);
 	    }
 	  *dst = ptr;
 	}
      else if (ptr == buf)
 	{
 	  ptr = NULL;
 	  result = NO;
 	}
      *dst = ptr;
    }
  else if (ptr != buf && dst != 0 && ptr != *dst)
    {
--- a/Source/GSString.m
+++ b/Source/GSString.m
@ -1123,38 +1123,82 @@ static inline void
 getCString_u(ivars self, char *buffer, unsigned int maxLength,
  NSRange aRange, NSRange *leftoverRange)
 {
-  unsigned int	len;
+  /* The primitive we have for converting from unicode, GSFromUnicode,
  can't deal with our leftoverRange case, so we need to use a bit of
  complexity instead. */
  unsigned int len;
-  if (maxLength > self->_count)
+  /* TODO: this is an extremely ugly hack to work around buggy iconvs
  that return -1/E2BIG for buffers larger than 0x40000acf */
  if (maxLength > 0x40000000)
    maxLength = 0x40000000;
  /* First, try converting the whole thing. */
  len = maxLength;
  if (GSFromUnicode((unsigned char **)&buffer, &len,
 		    self->_contents.u + aRange.location, aRange.length,
 		    defEnc, 0, GSUniTerminate | GSUniStrict) == YES)
    {
-      maxLength = self->_count;
+      if (leftoverRange)
-    }
+	leftoverRange->location = leftoverRange->length = 0;
-  if (maxLength < aRange.length)
+      return;
    {
      len = maxLength;
      if (leftoverRange != 0)
 	{
 	  leftoverRange->location = aRange.location + maxLength;
 	  leftoverRange->length = aRange.length - maxLength;
 	}
    }
  else
    {
      len = aRange.length;
      if (leftoverRange != 0)
 	{
 	  leftoverRange->location = 0;
 	  leftoverRange->length = 0;
 	}
    }
-  if (GSFromUnicode((unsigned char **)&buffer, &len, self->_contents.u, len,
+  /* The conversion failed. Either the buffer is too small for the whole
-    defEnc, 0, GSUniTerminate | GSUniStrict) == NO)
+  range, or there are characters in it we can't convert. Check for
  unconvertable characters first. */
  len = 0;
  if (GSFromUnicode(NULL, &len,
 		    self->_contents.u + aRange.location, aRange.length,
 		    defEnc, 0, GSUniTerminate | GSUniStrict) == NO)
    {
      [NSException raise: NSCharacterConversionException
 		  format: @"Can't get cString from Unicode string."];
      return;
    }
-  buffer[len] = '\0';
+
  /* The string can be converted, but not all of it. Do a binary search
  to find the longest subrange that fits in the buffer. */
  {
    unsigned int lo, hi, mid;
    lo = 0;
    hi = aRange.length;
    while (lo < hi)
      {
 	mid = (lo + hi + 1) / 2; /* round up to get edge case right */
 	len = maxLength;
 	if (GSFromUnicode((unsigned char **)&buffer, &len,
 			  self->_contents.u + aRange.location, mid,
 			  defEnc, 0, GSUniTerminate | GSUniStrict) == YES)
 	  {
 	    lo = mid;
 	  }
 	else
 	  {
 	    hi = mid - 1;
 	  }
      }
    /* lo==hi characters fit. Do the real conversion. */
    len = maxLength;
    if (lo == 0)
      {
        buffer[0] = 0;
      }
    else if (GSFromUnicode((unsigned char **)&buffer, &len,
 			   self->_contents.u + aRange.location, lo,
 			   defEnc, 0, GSUniTerminate | GSUniStrict) == NO)
      {
        NSCAssert(NO, @"binary search gave inconsistent results");
      }
    if (leftoverRange)
      {
 	leftoverRange->location = aRange.location + lo;
 	leftoverRange->length = NSMaxRange(aRange) - leftoverRange->location;
      }
  }
 }
 static inline int