Some string optimisation

git-svn-id: svn+ssh://svn.gna.org/svn/gnustep/libs/base/trunk@38518 72102866-910b-0410-8b05-ffd578937521
This commit is contained in:
Richard Frith-MacDonald 2015-05-22 14:28:08 +00:00
parent 8556e8e744
commit d8e9f81945
3 changed files with 249 additions and 91 deletions

View file

@ -964,6 +964,16 @@ tsbytes(uintptr_t s, char *buf)
return strtoll(buf, 0, 10);
}
- (NSRange) rangeOfComposedCharacterSequenceAtIndex: (NSUInteger)anIndex
{
uintptr_t s = (uintptr_t)self;
NSUInteger l = (s >> TINY_STRING_LENGTH_SHIFT) & TINY_STRING_LENGTH_MASK;
if (anIndex >= l)
[NSException raise: NSRangeException format:@"Invalid location."];
return NSMakeRange(anIndex, 1);
}
- (const char*) UTF8String
{
char *buf = GSAutoreleasedBuffer(9);
@ -1004,6 +1014,7 @@ tsbytes(uintptr_t s, char *buf)
{
return self;
}
- (NSUInteger) retainCount
{
return UINT_MAX;
@ -1213,36 +1224,37 @@ fixBOM(unsigned char **bytes, NSUInteger*length, BOOL *owned,
length: (NSUInteger)length
encoding: (NSStringEncoding)encoding
{
const void *original;
void *chars = 0;
BOOL flag = NO;
if (GSPrivateIsEncodingSupported(encoding) == NO)
if (0 == length)
{
return nil; // Invalid encoding
return (id)@"";
}
if (length > 0)
if (0 == bytes)
{
const void *original;
[NSException raise: NSInvalidArgumentException
format: @"-initWithBytes:lenth:encoding given nul bytes"];
}
if (0 == bytes)
{
[NSException raise: NSInvalidArgumentException
format: @"-initWithBytes:lenth:encoding given nul bytes"];
}
original = bytes;
#if defined(OBJC_SMALL_OBJECT_SHIFT) && (OBJC_SMALL_OBJECT_SHIFT == 3)
if (useTinyStrings)
if (useTinyStrings)
{
if (NSASCIIStringEncoding == encoding)
{
if (NSASCIIStringEncoding == encoding)
{
id tinyString = createTinyString(bytes, length);
id tinyString = createTinyString(bytes, length);
if (tinyString)
{
return tinyString;
}
if (tinyString)
{
return tinyString;
}
if (NSUTF8StringEncoding == encoding && (length < 9))
}
if (length < 9)
{
if (NSUTF8StringEncoding == encoding
|| GSPrivateIsByteEncoding(encoding))
{
NSUInteger i;
@ -1264,31 +1276,33 @@ fixBOM(unsigned char **bytes, NSUInteger*length, BOOL *owned,
}
}
}
}
#endif
fixBOM((unsigned char**)&bytes, &length, &flag, encoding);
/*
* We need to copy the data if there is any, unless fixBOM()
* has already done it for us.
*/
if (original == bytes)
{
original = bytes;
fixBOM((unsigned char**)&bytes, &length, &flag, encoding);
/*
* We need to copy the data if there is any, unless fixBOM()
* has already done it for us.
*/
if (original == bytes)
{
#if GS_WITH_GC
chars = NSAllocateCollectable(length, 0);
chars = NSAllocateCollectable(length, 0);
#else
chars = NSZoneMalloc([self zone], length);
chars = NSZoneMalloc([self zone], length);
#endif
memcpy(chars, bytes, length);
}
else
{
/*
* The fixBOM() function has already copied the data and allocated
* new memory, so we can just pass that to the designated initialiser
*/
chars = (void*)bytes;
}
memcpy(chars, bytes, length);
}
else
{
/*
* The fixBOM() function has already copied the data and allocated
* new memory, so we can just pass that to the designated initialiser
*/
chars = (void*)bytes;
}
return [self initWithBytesNoCopy: chars
length: length
encoding: encoding
@ -1305,49 +1319,27 @@ fixBOM(unsigned char **bytes, NSUInteger*length, BOOL *owned,
BOOL isLatin1 = NO;
GSStr me;
if (GSPrivateIsEncodingSupported(encoding) == NO)
if (0 == length)
{
if (flag == YES && bytes != 0)
{
NSZoneFree(NSZoneFromPointer(bytes), bytes);
}
return nil; // Invalid encoding
}
if (length > 0)
{
fixBOM((unsigned char**)&bytes, &length, &flag, encoding);
if (encoding == NSUnicodeStringEncoding)
{
chars.u = bytes;
}
else
{
chars.c = bytes;
}
}
if (encoding == NSUTF8StringEncoding)
{
unsigned i;
for (i = 0; i < length; i++)
if (0 != bytes)
{
if ((chars.c)[i] > 127)
{
break;
}
NSZoneFree(NSZoneFromPointer(bytes), bytes);
}
if (i == length)
{
/*
* This is actually ASCII data ... so we can just store it as if
* in the internal 8bit encoding scheme.
*/
encoding = internalEncoding;
}
return (id)@"";
}
else if (encoding != internalEncoding && isByteEncoding(encoding) == YES)
fixBOM((unsigned char**)&bytes, &length, &flag, encoding);
if (encoding == NSUnicodeStringEncoding)
{
chars.u = bytes;
}
else
{
chars.c = bytes;
}
if (encoding == NSUTF8StringEncoding
|| (encoding != internalEncoding && isByteEncoding(encoding) == YES))
{
unsigned i;
@ -1376,7 +1368,6 @@ fixBOM(unsigned char **bytes, NSUInteger*length, BOOL *owned,
}
}
if (encoding == internalEncoding)
{
#if GS_WITH_GC
@ -1407,6 +1398,15 @@ fixBOM(unsigned char **bytes, NSUInteger*length, BOOL *owned,
unichar *u = 0;
unsigned l = 0;
if (GSPrivateIsEncodingSupported(encoding) == NO)
{
if (flag == YES && bytes != 0)
{
NSZoneFree(NSZoneFromPointer(bytes), bytes);
}
return nil; // Invalid encoding
}
if (GSToUnicode(&u, &l, chars.c, length, encoding,
[self zone], 0) == NO)
{
@ -3947,6 +3947,7 @@ agree, create a new GSCInlineString otherwise.
return rangeOfCharacter_c((GSStr)self, aSet, mask, aRange);
}
/*
- (NSRange) rangeOfString: (NSString*)aString
options: (NSUInteger)mask
range: (NSRange)aRange
@ -3966,6 +3967,7 @@ agree, create a new GSCInlineString otherwise.
}
return rangeOfString_c((GSStr)self, aString, mask, aRange);
}
*/
- (NSStringEncoding) smallestEncoding
{
@ -4338,6 +4340,7 @@ agree, create a new GSCInlineString otherwise.
return rangeOfCharacter_u((GSStr)self, aSet, mask, aRange);
}
/*
- (NSRange) rangeOfString: (NSString*)aString
options: (NSUInteger)mask
range: (NSRange)aRange
@ -4357,6 +4360,7 @@ agree, create a new GSCInlineString otherwise.
}
return rangeOfString_u((GSStr)self, aString, mask, aRange);
}
*/
- (NSStringEncoding) smallestEncoding
{
@ -5230,6 +5234,7 @@ NSAssert(_flags.owned == 1 && _zone != 0, NSInternalInconsistencyException);
return rangeOfCharacter_c((GSStr)self, aSet, mask, aRange);
}
/*
- (NSRange) rangeOfString: (NSString*)aString
options: (NSUInteger)mask
range: (NSRange)aRange
@ -5252,6 +5257,7 @@ NSAssert(_flags.owned == 1 && _zone != 0, NSInternalInconsistencyException);
else
return rangeOfString_c((GSStr)self, aString, mask, aRange);
}
*/
- (void) replaceCharactersInRange: (NSRange)aRange
withString: (NSString*)aString

View file

@ -149,7 +149,7 @@ static SEL cMemberSel = 0;
#define IS_BIT_SET(a,i) ((((a) & (1<<(i)))) > 0)
static NSCharacterSet *nonspace = nil;
static NSData *whitespaceBitmap;
static NSData *whitespaceBitmap;
static unsigned const char *whitespaceBitmapRep = NULL;
#define GS_IS_WHITESPACE(X) IS_BIT_SET(whitespaceBitmapRep[(X)/8], (X) % 8)
@ -623,8 +623,7 @@ GSICUCollatorOpen(NSStringCompareOptions mask, NSLocale *locale)
{
localeCString = [[locale localeIdentifier] UTF8String];
if (localeCString == NULL
|| strcmp("", localeCString) == 0)
if (localeCString == NULL || strcmp("", localeCString) == 0)
{
return NULL;
}
@ -2273,7 +2272,10 @@ GSICUCollatorOpen(NSStringCompareOptions mask, NSLocale *locale)
range: (NSRange)searchRange
locale: (NSLocale *)locale
{
GS_RANGE_CHECK(searchRange, [self length]);
NSUInteger length = [self length];
NSUInteger countOther;
GS_RANGE_CHECK(searchRange, length);
if (aString == nil)
[NSException raise: NSInvalidArgumentException format: @"range of nil"];
@ -2301,7 +2303,150 @@ GSICUCollatorOpen(NSStringCompareOptions mask, NSLocale *locale)
return r;
}
countOther = [aString length];
if (0 == countOther)
{
if ((mask & NSBackwardsSearch) == NSBackwardsSearch)
{
searchRange.location += searchRange.length;
}
searchRange.length = 0;
return searchRange;
}
if ((mask & NSLiteralSearch) == NSLiteralSearch)
{
NSRange result;
if (searchRange.length < countOther)
{
/* Range to search is smaller than string to look for.
*/
result = NSMakeRange(NSNotFound, 0);
}
else
{
GS_BEGINITEMBUF(charsOther, (countOther*sizeof(unichar)), unichar)
[aString getCharacters: charsOther range: NSMakeRange(0, countOther)];
if ((mask & NSAnchoredSearch) == NSAnchoredSearch
|| searchRange.length == countOther)
{
/* Range to search is same size as string to look for.
*/
GS_BEGINITEMBUF2(charsSelf, (countOther*sizeof(unichar)), unichar)
if ((mask & NSBackwardsSearch) == NSBackwardsSearch)
{
searchRange.location = NSMaxRange(searchRange) - countOther;
searchRange.length = countOther;
}
else
{
searchRange.length = countOther;
}
[self getCharacters: charsSelf range: searchRange];
if (memcmp(&charsSelf[0], &charsOther[0],
countOther * sizeof(unichar)) == 0)
{
result = searchRange;
}
else
{
result = NSMakeRange(NSNotFound, 0);
}
GS_ENDITEMBUF2()
}
else
{
NSUInteger pos;
NSUInteger end;
end = searchRange.length - countOther + 1;
if ((mask & NSBackwardsSearch) == NSBackwardsSearch)
{
pos = end;
}
else
{
pos = 0;
}
/* Range to search is bigger than string to look for.
*/
GS_BEGINITEMBUF2(charsSelf, (searchRange.length*sizeof(unichar)),
unichar)
[self getCharacters: charsSelf range: searchRange];
if (1 == countOther)
{
unichar u = charsOther[pos];
if ((mask & NSBackwardsSearch) == NSBackwardsSearch)
{
while (pos-- > 0)
{
if (charsSelf[pos] == u)
{
break;
}
}
}
else
{
while (pos < end)
{
if (charsSelf[pos] == u)
{
break;
}
pos++;
}
}
}
else
{
if ((mask & NSBackwardsSearch) == NSBackwardsSearch)
{
while (pos-- > 0)
{
if (memcmp(&charsSelf[pos], charsOther,
countOther * sizeof(unichar)) == 0)
{
break;
}
}
}
else
{
while (pos < end)
{
if (memcmp(&charsSelf[pos], charsOther,
countOther * sizeof(unichar)) == 0)
{
break;
}
pos++;
}
}
}
if (pos >= end)
{
result = NSMakeRange(NSNotFound, 0);
}
else
{
result = NSMakeRange(searchRange.location + pos, countOther);
}
GS_ENDITEMBUF2()
}
GS_ENDITEMBUF()
}
return result;
}
#if GS_USE_ICU == 1
if (nil != locale && ![locale isKindOfClass: [NSLocale class]])
{
locale = [NSLocale currentLocale];
}
{
UCollator *coll = GSICUCollatorOpen(mask, locale);
@ -2310,7 +2455,6 @@ GSICUCollatorOpen(NSStringCompareOptions mask, NSLocale *locale)
NSRange result = NSMakeRange(NSNotFound, 0);
UErrorCode status = U_ZERO_ERROR;
NSUInteger countSelf = searchRange.length;
NSUInteger countOther = [aString length];
UStringSearch *search = NULL;
GS_BEGINITEMBUF(charsSelf, (countSelf * sizeof(unichar)), unichar)
GS_BEGINITEMBUF2(charsOther, (countOther * sizeof(unichar)), unichar)
@ -2405,28 +2549,36 @@ GSICUCollatorOpen(NSStringCompareOptions mask, NSLocale *locale)
*/
- (NSRange) rangeOfComposedCharacterSequenceAtIndex: (NSUInteger)anIndex
{
static NSCharacterSet *nonbase = nil;
static SEL nbSel;
static BOOL (*nbImp)(id, SEL, unichar) = 0;
unsigned start;
unsigned end;
unsigned length = [self length];
unichar ch;
unichar (*caiImp)(NSString*, SEL, NSUInteger);
NSCharacterSet *nbSet = [NSCharacterSet nonBaseCharacterSet];
if (anIndex >= length)
[NSException raise: NSRangeException format:@"Invalid location."];
caiImp = (unichar (*)(NSString*,SEL,NSUInteger))
[self methodForSelector: caiSel];
if (nil == nonbase)
{
nonbase = [[NSCharacterSet nonBaseCharacterSet] retain];
nbSel = @selector(characterIsMember:);
nbImp = (BOOL(*)(id,SEL,unichar))[nonbase methodForSelector: nbSel];
}
for (start = anIndex; start > 0; start--)
{
ch = (*caiImp)(self, caiSel, start);
if ([nbSet characterIsMember: ch] == NO)
if ((*nbImp)(nonbase, nbSel, ch) == NO)
break;
}
for (end = start+1; end < length; end++)
{
ch = (*caiImp)(self, caiSel, end);
if ([nbSet characterIsMember: ch] == NO)
if ((*nbImp)(nonbase, nbSel, ch) == NO)
break;
}
@ -5247,12 +5399,10 @@ static NSFileManager *fm = nil;
[NSException raise: NSInvalidArgumentException format: @"compare with nil"];
#if GS_USE_ICU == 1
if (nil != locale
&& ![locale isKindOfClass: [NSLocale class]])
if (nil != locale && ![locale isKindOfClass: [NSLocale class]])
{
locale = [NSLocale currentLocale];
}
{
UCollator *coll = GSICUCollatorOpen(mask, locale);

View file

@ -967,9 +967,10 @@ static SEL foundIgnorableSel;
if (this->shouldProcessNamespaces)
{
NSRange r = [tag rangeOfString: @":"];
NSRange r;
NSString *p = @"";
r = [tag rangeOfString: @":" options: NSLiteralSearch];
if (r.length > 0)
{
p = [tag substringToIndex: r.location];
@ -1354,9 +1355,10 @@ NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
qualified = tag;
if (this->shouldProcessNamespaces)
{
NSRange r = [tag rangeOfString: @":"];
NSRange r;
NSString *p = @"";
r = [tag rangeOfString: @":" options: NSLiteralSearch];
if (r.length > 0)
{
p = [tag substringToIndex: r.location];