mirror of
https://github.com/gnustep/libs-base.git
synced 2025-06-01 09:02:01 +00:00
improve literal string optimisation
git-svn-id: svn+ssh://svn.gna.org/svn/gnustep/libs/base/trunk@34101 72102866-910b-0410-8b05-ffd578937521
This commit is contained in:
parent
84d3190d31
commit
27807e706c
2 changed files with 138 additions and 125 deletions
|
@ -1,3 +1,8 @@
|
||||||
|
2011-11-01 Richard Frith-Macdonald <rfm@gnu.org>
|
||||||
|
|
||||||
|
* Source/GSString.m: Partial restructuring suggested by Fred, plus
|
||||||
|
some more minor optimisation of equality checks for literal strings.
|
||||||
|
|
||||||
2011-10-28 Richard Frith-Macdonald <rfm@gnu.org>
|
2011-10-28 Richard Frith-Macdonald <rfm@gnu.org>
|
||||||
|
|
||||||
* Source/GSString.m: Fix error (spotted by Fred) in check of UTF-8
|
* Source/GSString.m: Fix error (spotted by Fred) in check of UTF-8
|
||||||
|
|
|
@ -59,6 +59,9 @@
|
||||||
|
|
||||||
#import "GNUstepBase/Unicode.h"
|
#import "GNUstepBase/Unicode.h"
|
||||||
|
|
||||||
|
static NSStringEncoding externalEncoding = 0;
|
||||||
|
static NSStringEncoding internalEncoding = NSISOLatin1StringEncoding;
|
||||||
|
|
||||||
static BOOL isByteEncoding(NSStringEncoding enc)
|
static BOOL isByteEncoding(NSStringEncoding enc)
|
||||||
{
|
{
|
||||||
return GSPrivateIsByteEncoding(enc);
|
return GSPrivateIsByteEncoding(enc);
|
||||||
|
@ -291,6 +294,129 @@ nextUTF8(const uint8_t *p, unsigned l, unsigned *o, unichar *n)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static BOOL
|
||||||
|
literalIsEqualInternal(NXConstantString *s, GSStr o)
|
||||||
|
{
|
||||||
|
unsigned len = o->_count;
|
||||||
|
|
||||||
|
/* Since UTF-8 is a multibyte character set, it must have at least
|
||||||
|
* as many bytes as another string of the same length. So if the
|
||||||
|
* UTF-8 string is shorter, the two cannot be equal.
|
||||||
|
* A check for this can quickly give us a result in half the cases
|
||||||
|
* where the two strings have different lengths.
|
||||||
|
*/
|
||||||
|
if (len > s->nxcslen)
|
||||||
|
{
|
||||||
|
return NO;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
NSUInteger pos = 0;
|
||||||
|
unichar n = 0;
|
||||||
|
unsigned i = 0;
|
||||||
|
unichar u;
|
||||||
|
|
||||||
|
if (0 == o->_flags.wide)
|
||||||
|
{
|
||||||
|
/* A narrow internal characterset must have ASCII as a subset,
|
||||||
|
* and so does a UTF-8 string literal, so a bytewise comparison
|
||||||
|
* of strings that contain the same number of bytes will give
|
||||||
|
* a quick check for the common case where both are ASCII.
|
||||||
|
*/
|
||||||
|
if (len == s->nxcslen && 0 == memcmp(o->_contents.c, s->nxcsptr, len))
|
||||||
|
{
|
||||||
|
return YES;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If the other string is a buffer containing ascii or latin1,
|
||||||
|
* we can compare buffer contents with unichar values directly.
|
||||||
|
*/
|
||||||
|
if (internalEncoding == NSISOLatin1StringEncoding
|
||||||
|
|| internalEncoding == NSASCIIStringEncoding)
|
||||||
|
{
|
||||||
|
while (i < s->nxcslen || n > 0)
|
||||||
|
{
|
||||||
|
u = nextUTF8((const uint8_t *)s->nxcsptr, s->nxcslen, &i, &n);
|
||||||
|
if (pos >= len || (unichar)o->_contents.c[pos] != u)
|
||||||
|
{
|
||||||
|
return NO;
|
||||||
|
}
|
||||||
|
pos++;
|
||||||
|
}
|
||||||
|
if (pos != len)
|
||||||
|
{
|
||||||
|
return NO;
|
||||||
|
}
|
||||||
|
return YES;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* For small strings, or ones where we already have an array of
|
||||||
|
* UTF-16 characters, we can do a UTF-16 comparison directly.
|
||||||
|
* For larger strings, we may do as well with a character by
|
||||||
|
* character comparison.
|
||||||
|
*/
|
||||||
|
if (1 == o->_flags.wide || len < 200)
|
||||||
|
{
|
||||||
|
unichar *ptr;
|
||||||
|
|
||||||
|
if (1 == o->_flags.wide)
|
||||||
|
{
|
||||||
|
ptr = o->_contents.u;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ptr = alloca(sizeof(unichar) * len);
|
||||||
|
if (NO == GSToUnicode(&ptr, &len, o->_contents.c,
|
||||||
|
len, internalEncoding, 0, 0))
|
||||||
|
{
|
||||||
|
return NO;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Now we have a UTF-16 buffer, so we can do a UTF-16 comparison.
|
||||||
|
*/
|
||||||
|
while (i < s->nxcslen || n > 0)
|
||||||
|
{
|
||||||
|
u = nextUTF8((const uint8_t *)s->nxcsptr, s->nxcslen, &i, &n);
|
||||||
|
if (pos >= len || ptr[pos] != u)
|
||||||
|
{
|
||||||
|
return NO;
|
||||||
|
}
|
||||||
|
pos++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
unichar (*imp)(id, SEL, NSUInteger);
|
||||||
|
|
||||||
|
/* Do a character by character comparison using characterAtIndex:
|
||||||
|
* This will be relatively slow, but how often will we actually
|
||||||
|
* need to do this for a literal string? Most string literals will
|
||||||
|
* either be short or will differ from any other string we are
|
||||||
|
* doing a comparison with within the first few tens of characters.
|
||||||
|
*/
|
||||||
|
imp = (unichar(*)(id,SEL,NSUInteger))[(id)o methodForSelector:
|
||||||
|
@selector(characterAtIndex:)];
|
||||||
|
while (i < s->nxcslen || n > 0)
|
||||||
|
{
|
||||||
|
u = nextUTF8((const uint8_t *)s->nxcsptr, s->nxcslen, &i, &n);
|
||||||
|
if (pos >= len
|
||||||
|
|| (*imp)((id)o, @selector(characterAtIndex:), pos) != u)
|
||||||
|
{
|
||||||
|
return NO;
|
||||||
|
}
|
||||||
|
pos++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (pos != len)
|
||||||
|
{
|
||||||
|
return NO;
|
||||||
|
}
|
||||||
|
return YES;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* GSPlaceholderString - placeholder class for objects awaiting intialisation.
|
* GSPlaceholderString - placeholder class for objects awaiting intialisation.
|
||||||
|
@ -460,9 +586,6 @@ static BOOL (*equalImp)(id, SEL, id);
|
||||||
static SEL hashSel;
|
static SEL hashSel;
|
||||||
static NSUInteger (*hashImp)(id, SEL);
|
static NSUInteger (*hashImp)(id, SEL);
|
||||||
|
|
||||||
static NSStringEncoding externalEncoding = 0;
|
|
||||||
static NSStringEncoding internalEncoding = NSISOLatin1StringEncoding;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The setup() function is called when any concrete string class is
|
* The setup() function is called when any concrete string class is
|
||||||
* initialized, and caches classes and some method implementations.
|
* initialized, and caches classes and some method implementations.
|
||||||
|
@ -2350,48 +2473,7 @@ isEqual_c(GSStr self, id anObject)
|
||||||
c = object_getClass(anObject);
|
c = object_getClass(anObject);
|
||||||
if (c == NSConstantStringClass)
|
if (c == NSConstantStringClass)
|
||||||
{
|
{
|
||||||
NXConstantString *other = (NXConstantString*)anObject;
|
return literalIsEqualInternal((NXConstantString*)anObject, (GSStr)self);
|
||||||
|
|
||||||
if (self->_count > other->nxcslen)
|
|
||||||
{
|
|
||||||
/* Since UTF-8 is a multibyte character set, it must have at least
|
|
||||||
* as many bytes as another string of the same length. So if the
|
|
||||||
* UTF-8 string is shorter, the two cannot be equal.
|
|
||||||
*/
|
|
||||||
return NO;
|
|
||||||
}
|
|
||||||
if (internalEncoding == NSASCIIStringEncoding)
|
|
||||||
{
|
|
||||||
if (self->_count == other->nxcslen
|
|
||||||
&& 0 == memcmp(self->_contents.c, other->nxcsptr, other->nxcslen))
|
|
||||||
{
|
|
||||||
return YES;
|
|
||||||
}
|
|
||||||
return NO;
|
|
||||||
}
|
|
||||||
if (internalEncoding == NSISOLatin1StringEncoding)
|
|
||||||
{
|
|
||||||
NSUInteger pos = 0;
|
|
||||||
unichar n = 0;
|
|
||||||
unsigned i = 0;
|
|
||||||
unichar u;
|
|
||||||
|
|
||||||
while (i < other->nxcslen || n > 0)
|
|
||||||
{
|
|
||||||
u = nextUTF8((const uint8_t *)other->nxcsptr, other->nxcslen,
|
|
||||||
&i, &n);
|
|
||||||
if (pos >= self->_count || (unichar)self->_contents.c[pos] != u)
|
|
||||||
{
|
|
||||||
return NO;
|
|
||||||
}
|
|
||||||
pos++;
|
|
||||||
}
|
|
||||||
if (pos != self->_count)
|
|
||||||
{
|
|
||||||
return NO;
|
|
||||||
}
|
|
||||||
return YES;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
if (c == GSMutableStringClass || GSObjCIsKindOf(c, GSStringClass) == YES)
|
if (c == GSMutableStringClass || GSObjCIsKindOf(c, GSStringClass) == YES)
|
||||||
{
|
{
|
||||||
|
@ -2454,34 +2536,7 @@ isEqual_u(GSStr self, id anObject)
|
||||||
c = object_getClass(anObject);
|
c = object_getClass(anObject);
|
||||||
if (c == NSConstantStringClass)
|
if (c == NSConstantStringClass)
|
||||||
{
|
{
|
||||||
NXConstantString *other = (NXConstantString*)anObject;
|
return literalIsEqualInternal((NXConstantString*)anObject, (GSStr)self);
|
||||||
NSUInteger pos = 0;
|
|
||||||
unichar n = 0;
|
|
||||||
unsigned i = 0;
|
|
||||||
unichar u;
|
|
||||||
|
|
||||||
if (self->_count > other->nxcslen)
|
|
||||||
{
|
|
||||||
/* Since UTF-8 is a multibyte character set, it must have at least
|
|
||||||
* as many bytes as another string of the same character length.
|
|
||||||
* So if the UTF-8 string is shorter, the two cannot be equal.
|
|
||||||
*/
|
|
||||||
return NO;
|
|
||||||
}
|
|
||||||
while (i < other->nxcslen || n > 0)
|
|
||||||
{
|
|
||||||
u = nextUTF8((const uint8_t *)other->nxcsptr, other->nxcslen, &i, &n);
|
|
||||||
if (pos >= self->_count || self->_contents.u[pos] != u)
|
|
||||||
{
|
|
||||||
return NO;
|
|
||||||
}
|
|
||||||
pos++;
|
|
||||||
}
|
|
||||||
if (pos != self->_count)
|
|
||||||
{
|
|
||||||
return NO;
|
|
||||||
}
|
|
||||||
return YES;
|
|
||||||
}
|
}
|
||||||
if (c == GSMutableStringClass || GSObjCIsKindOf(c, GSStringClass) == YES)
|
if (c == GSMutableStringClass || GSObjCIsKindOf(c, GSStringClass) == YES)
|
||||||
{
|
{
|
||||||
|
@ -5053,6 +5108,10 @@ literalIsEqual(NXConstantString *self, id anObject)
|
||||||
}
|
}
|
||||||
return YES;
|
return YES;
|
||||||
}
|
}
|
||||||
|
else if (c == GSMutableStringClass || GSObjCIsKindOf(c, GSStringClass) == YES)
|
||||||
|
{
|
||||||
|
return literalIsEqualInternal(self, (GSStr)anObject);
|
||||||
|
}
|
||||||
else if (YES == [anObject isKindOfClass: NSStringClass]) // may be proxy
|
else if (YES == [anObject isKindOfClass: NSStringClass]) // may be proxy
|
||||||
{
|
{
|
||||||
unichar (*imp)(id, SEL, NSUInteger);
|
unichar (*imp)(id, SEL, NSUInteger);
|
||||||
|
@ -5071,58 +5130,7 @@ literalIsEqual(NXConstantString *self, id anObject)
|
||||||
return NO;
|
return NO;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Try to do a fast comparison for well known scring classes.
|
/* Do a character by character comparison using characterAtIndex:
|
||||||
*/
|
|
||||||
if (c == GSMutableStringClass || GSObjCIsKindOf(c, GSStringClass) == YES)
|
|
||||||
{
|
|
||||||
GSStr o = (GSStr)anObject;
|
|
||||||
|
|
||||||
if (o->_flags.wide == 1)
|
|
||||||
{
|
|
||||||
/* Other string is a unichar buffer
|
|
||||||
*/
|
|
||||||
while (i < self->nxcslen || n > 0)
|
|
||||||
{
|
|
||||||
u = nextUTF8((const uint8_t *)self->nxcsptr,
|
|
||||||
self->nxcslen, &i, &n);
|
|
||||||
if (pos >= len || o->_contents.u[pos] != u)
|
|
||||||
{
|
|
||||||
return NO;
|
|
||||||
}
|
|
||||||
pos++;
|
|
||||||
}
|
|
||||||
if (pos != len)
|
|
||||||
{
|
|
||||||
return NO;
|
|
||||||
}
|
|
||||||
return YES;
|
|
||||||
}
|
|
||||||
else if (internalEncoding == NSISOLatin1StringEncoding
|
|
||||||
|| internalEncoding == NSASCIIStringEncoding)
|
|
||||||
{
|
|
||||||
/* Other string is a buffer containing ascii or latin1 so
|
|
||||||
* we can compare buffer contents with unichar values directly.
|
|
||||||
*/
|
|
||||||
while (i < self->nxcslen || n > 0)
|
|
||||||
{
|
|
||||||
u = nextUTF8((const uint8_t *)self->nxcsptr,
|
|
||||||
self->nxcslen, &i, &n);
|
|
||||||
if (pos >= len || (unichar)o->_contents.c[pos] != u)
|
|
||||||
{
|
|
||||||
return NO;
|
|
||||||
}
|
|
||||||
pos++;
|
|
||||||
}
|
|
||||||
if (pos != len)
|
|
||||||
{
|
|
||||||
return NO;
|
|
||||||
}
|
|
||||||
return YES;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Fall through to do a character by character comparison
|
|
||||||
* using characterAtIndex:
|
|
||||||
*/
|
*/
|
||||||
imp = (unichar(*)(id,SEL,NSUInteger))[anObject methodForSelector:
|
imp = (unichar(*)(id,SEL,NSUInteger))[anObject methodForSelector:
|
||||||
@selector(characterAtIndex:)];
|
@selector(characterAtIndex:)];
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue