NSString changes.

git-svn-id: svn+ssh://svn.gna.org/svn/gnustep/libs/base/trunk@9508 72102866-910b-0410-8b05-ffd578937521
This commit is contained in:
jagapen 2001-03-27 17:30:07 +00:00
parent d71377fffb
commit 769199b429
2 changed files with 92 additions and 6 deletions

View file

@ -1,3 +1,10 @@
2001-03-37 Jonathan Gapen <jagapen@home.com>
* Source/NSString.m: New private function to compute the scalar value
of a high-low surrogate pair. New surrogate range class variables.
Optimize ([-hasPrefix]) and ([-hasSuffix]) by using NSAnchoredSearch.
Add UTF-8 code to ([-dataUsingEncoding:allowLossyConversion:]).
2001-03-26 Jonathan Gapen <jagapen@home.com>
* Source/Makefile.preamble: No need to define HAVE_LIBXML here.

View file

@ -217,13 +217,26 @@ pathSepMember(unichar c)
return (*sepMember)(myPathSeps, @selector(characterIsMember:), c);
}
/* Convert a high-low surrogate pair into Unicode scalar code-point */
static inline gsu32
surrogatePairValue(unichar high, unichar low)
{
return ((high - (unichar)0xD800) * (unichar)400) +
((low - (unichar)0xDC00) + (unichar)10000);
}
@implementation NSString
static NSStringEncoding _DefaultStringEncoding;
static const unichar byteOrderMark = 0xFFFE;
static const unichar byteOrderMarkSwapped = 0xFEFF;
static const unichar byteOrderMark = 0xFEFF;
static const unichar byteOrderMarkSwapped = 0xFFFE;
/* UTF-16 Surrogate Ranges */
static NSRange highSurrogateRange = {0xD800, 1024};
static NSRange lowSurrogateRange = {0xDC00, 1024};
#if HAVE_REGISTER_PRINTF_FUNCTION
#include <stdio.h>
@ -1440,15 +1453,16 @@ handle_printf_atsign (FILE *stream,
- (BOOL) hasPrefix: (NSString*)aString
{
NSRange range;
range = [self rangeOfString: aString];
return ((range.location == 0) && (range.length != 0)) ? YES : NO;
range = [self rangeOfString: aString options: NSAnchoredSearch];
return (range.length > 0) ? YES : NO;
}
- (BOOL) hasSuffix: (NSString*)aString
{
NSRange range;
range = [self rangeOfString: aString options: NSBackwardsSearch];
return (range.length > 0 && range.location == ([self length] - [aString length])) ? YES : NO;
range = [self rangeOfString: aString
options: NSAnchoredSearch | NSBackwardsSearch];
return (range.length > 0) ? YES : NO;
}
- (BOOL) isEqual: (id)anObject
@ -2145,6 +2159,71 @@ handle_printf_atsign (FILE *stream,
buff[count] = '\0';
return [NSDataClass dataWithBytesNoCopy: buff length: count];
}
else if (encoding == NSUTF8StringEncoding)
{
unsigned char *buff;
unsigned i, j;
unichar ch, ch2;
gsu32 cp;
buff = (unsigned char *)NSZoneMalloc(NSDefaultMallocZone(), len*3);
/*
* Each UTF-16 character maps to at most 3 bytes of UTF-8, so we simply
* allocate three times as many bytes as UTF-16 characters, then use
* NSZoneRealloc() later to trim the excess. Most Unix virtual memory
* implementations allocate address space, and actual memory pages are
* not actually allocated until used, so this method shouldn't cause
* memory problems on most Unix systems. On other systems, it may prove
* advantageous to scan the UTF-16 string to determine the UTF-8 string
* length before allocating memory.
*/
for (i = j = 0; i < len; i++)
{
ch = (*caiImp)(self, caiSel, i);
if (NSLocationInRange(ch, highSurrogateRange) && ((i+1) < len))
{
ch2 = (*caiImp)(self, caiSel, i+1);
if (NSLocationInRange(ch2, lowSurrogateRange))
{
cp = surrogatePairValue(ch, ch2);
i++;
}
else
cp = (gsu32)ch;
}
else
cp = (gsu32)ch;
if (cp < 0x80)
{
buff[j++] = cp;
}
else if (cp < 0x800)
{
buff[j++] = 0xC0 | ch>>6;
buff[j++] = 0x80 | (ch & 0x3F);
}
else if (cp < 0x10000)
{
buff[j++] = 0xE0 | ch>>12;
buff[j++] = 0x80 | (ch>>6 & 0x3F);
buff[j++] = 0x80 | (ch & 0x3F);
}
else if (cp < 0x200000)
{
buff[j++] = 0xF0 | ch>>18;
buff[j++] = 0x80 | (ch>>12 & 0x3F);
buff[j++] = 0x80 | (ch>>6 & 0x3F);
buff[j++] = 0x80 | (ch & 0x3F);
}
}
NSZoneRealloc(NSDefaultMallocZone(), buff, j);
return [NSDataClass dataWithBytesNoCopy: buff
length: count];
}
else if (encoding == NSUnicodeStringEncoding)
{
unichar *buff;