Checkpoint new string ABI work.

ASCII and UTF-16 strings in the new representation now work, but some of
the string optimisations are disabled.
This commit is contained in:
David Chisnall 2018-04-07 20:06:16 +01:00
parent 7bf1179f60
commit 2bdcae390d
2 changed files with 162 additions and 1 deletions

View file

@ -881,6 +881,12 @@ typedef NSUInteger NSStringEncodingConversionOptions;
@end
#ifdef __OBJC_GNUSTEP_RUNTIME_ABI__
# if __OBJC_GNUSTEP_RUNTIME_ABI__ >= 20
# define GNUSTEP_NEW_STRING_ABI
# endif
#endif
/**
* <p>The NXConstantString class is used to hold constant 8-bit character
* string objects produced by the compiler where it sees @"..." in the
@ -908,8 +914,16 @@ typedef NSUInteger NSStringEncodingConversionOptions;
@interface NXConstantString : NSString
{
@public
#ifdef GNUSTEP_NEW_STRING_ABI
uint32_t flags;
uint32_t nxcslen;
uint32_t size;
uint32_t hash;
const char * const nxcsptr;
#else
const char * const nxcsptr;
const unsigned int nxcslen;
#endif
}
@end

View file

@ -298,6 +298,32 @@ nextUTF8(const uint8_t *p, unsigned l, unsigned *o, unichar *n)
static BOOL
literalIsEqualInternal(NXConstantString *s, GSStr o)
{
#ifdef GNUSTEP_NEW_STRING_ABI
if (s->nxcslen != o->_count)
{
return NO;
}
size_t end = s->nxcslen;
static const int buffer_size = 64;
unichar buffer1[buffer_size];
unichar buffer2[buffer_size];
NSRange r = { 0, buffer_size };
do
{
if (r.location + r.length > end)
{
r.length = s->nxcslen - r.location;
}
[s getCharacters: buffer1 range: r];
[o getCharacters: buffer2 range: r];
if (memcmp(buffer1, buffer2, r.length * sizeof(unichar)) != 0)
{
return NO;
}
r.location += buffer_size;
} while (r.location < end);
return YES;
#else
unsigned len = o->_count;
/* Since UTF-8 is a multibyte character set, it must have at least
@ -450,6 +476,7 @@ literalIsEqualInternal(NXConstantString *s, GSStr o)
}
return YES;
}
#endif
}
@ -4484,10 +4511,12 @@ agree, create a new GSCInlineString otherwise.
return obj;
}
#if 0
- (NSRange) rangeOfComposedCharacterSequenceAtIndex: (NSUInteger)anIndex
{
return rangeOfSequence_u((GSStr)self, anIndex);
}
#endif
- (NSRange) rangeOfCharacterFromSet: (NSCharacterSet*)aSet
options: (NSUInteger)mask
@ -5365,6 +5394,7 @@ NSAssert(_flags.owned == 1 && _zone != 0, NSInternalInconsistencyException);
return obj;
}
#if 0
- (NSRange) rangeOfComposedCharacterSequenceAtIndex: (NSUInteger)anIndex
{
if (_flags.wide == 1)
@ -5372,6 +5402,7 @@ NSAssert(_flags.owned == 1 && _zone != 0, NSInternalInconsistencyException);
else
return rangeOfSequence_c((GSStr)self, anIndex);
}
#endif
- (NSRange) rangeOfCharacterFromSet: (NSCharacterSet*)aSet
options: (NSUInteger)mask
@ -5791,6 +5822,13 @@ literalIsEqual(NXConstantString *self, id anObject)
return NO;
}
#ifdef GNUSTEP_NEW_STRING_ABI
# define CONSTANT_STRING_ENCODING() (flags & 3)
# define CONSTANT_STRING_HAS_HASH() ((flags & (1<<16)) == (1<<16))
# define CONSTANT_STRING_SET_HAS_HASH() do { flags |= (1<<16); } while(0)
#endif
/**
* <p>The NXConstantString class is used by the compiler for constant
* strings, as such its ivar layout is determined by the compiler
@ -5803,17 +5841,47 @@ literalIsEqual(NXConstantString *self, id anObject)
{
if (self == [NXConstantString class])
{
NSConstantStringClass = self;
NSConstantStringClass = nil;
}
}
- (const char*) UTF8String
{
#ifdef GNUSTEP_NEW_STRING_ABI
switch (CONSTANT_STRING_ENCODING())
{
case 0: // ASCII
case 1: // UTF-8
return nxcsptr;
case 2: // UTF-16
case 4: // UTF-32
return [super UTF8String];
}
__builtin_unreachable();
#else
return nxcsptr;
#endif
}
- (unichar) characterAtIndex: (NSUInteger)index
{
#ifdef GNUSTEP_NEW_STRING_ABI
if (index >= nxcslen)
{
[NSException raise: NSInvalidArgumentException
format: @"-characterAtIndex: index out of range"];
}
switch (CONSTANT_STRING_ENCODING())
{
case 0: // ASCII
case 1: // UTF-8
return nxcsptr[index];
case 2: // UTF-16
return ((unichar*)(void*)nxcsptr)[index];
}
__builtin_unreachable();
#else
NSUInteger l = 0;
unichar u;
unichar n = 0;
@ -5831,8 +5899,11 @@ literalIsEqual(NXConstantString *self, id anObject)
[NSException raise: NSInvalidArgumentException
format: @"-characterAtIndex: index out of range"];
return 0;
#endif
}
#ifndef GNUSTEP_NEW_STRING_ABI
- (BOOL) canBeConvertedToEncoding: (NSStringEncoding)encoding
{
/* If the string contains bad (non-utf8) data, the lengthUTF8() function
@ -5936,6 +6007,8 @@ literalIsEqual(NXConstantString *self, id anObject)
return [super dataUsingEncoding: encoding allowLossyConversion: flag];
}
#endif
- (void) dealloc
{
GSNOSUPERDEALLOC;
@ -5944,6 +6017,27 @@ literalIsEqual(NXConstantString *self, id anObject)
- (void) getCharacters: (unichar*)buffer
range: (NSRange)aRange
{
#ifdef GNUSTEP_NEW_STRING_ABI
GS_RANGE_CHECK(aRange, nxcslen);
switch (CONSTANT_STRING_ENCODING())
{
case 0: // ASCII
for (int i=0 ; i<aRange.length ; i++)
{
buffer[i] = (unichar)nxcsptr[aRange.location + i];
}
return;
case 1: // UTF-8
NSAssert(0, @"UTF-8 constant strings not yet supported");
case 2: // UTF-16
fprintf(stderr, "Copying %d bytes from %p\n", (int)(aRange.length * sizeof(unichar)), nxcsptr + (aRange.location * sizeof(unichar)));
memcpy(buffer, nxcsptr + (aRange.location * sizeof(unichar)), aRange.length * sizeof(unichar));
return;
case 3:
NSAssert(0, @"UTF-32 constant strings not yet supported");
}
__builtin_unreachable();
#else
unichar n = 0;
unsigned i = 0;
NSUInteger max = NSMaxRange(aRange);
@ -5973,8 +6067,10 @@ literalIsEqual(NXConstantString *self, id anObject)
@"in %s, range { %"PRIuPTR", %"PRIuPTR" } extends beyond string",
GSNameFromSelector(_cmd), aRange.location, aRange.length];
}
#endif
}
#ifndef GNUSTEP_NEW_STRING_ABI
- (BOOL) getCString: (char*)buffer
maxLength: (NSUInteger)maxLength
encoding: (NSStringEncoding)encoding
@ -6039,12 +6135,20 @@ literalIsEqual(NXConstantString *self, id anObject)
}
return [super getCString: buffer maxLength: maxLength encoding: encoding];
}
#endif
/* Must match the implementation in NSString
* To avoid allocating memory, we build the hash incrementally.
*/
- (NSUInteger) hash
{
#ifdef GNUSTEP_NEW_STRING_ABI
if (CONSTANT_STRING_HAS_HASH())
return hash;
hash = [super hash];
CONSTANT_STRING_SET_HAS_HASH();
return hash;
#else
if (nxcslen > 0)
{
uint32_t s0 = 0;
@ -6087,6 +6191,7 @@ literalIsEqual(NXConstantString *self, id anObject)
{
return 0x0ffffffe; /* Hash for an empty string. */
}
#endif
}
- (id) initWithBytes: (const void*)bytes
@ -6108,6 +6213,16 @@ literalIsEqual(NXConstantString *self, id anObject)
return nil;
}
#ifdef GNUSTEP_NEW_STRING_ABI
- (NSUInteger) length
{
// In the new encoding, nxcslen is always the length of the string in UTF-16
// codepoints
return nxcslen;
}
#else
- (BOOL) isEqual: (id)anObject
{
return literalIsEqual(self, anObject);
@ -6256,6 +6371,7 @@ literalIsEqual(NXConstantString *self, id anObject)
format: @"-rangeOfComposedCharacterSequenceAtIndex: index out of range"];
return NSMakeRange(NSNotFound, 0);
}
#endif // GNUSTEP_NEW_STRING_ABI
- (id) retain
{
@ -6284,12 +6400,43 @@ literalIsEqual(NXConstantString *self, id anObject)
- (NSStringEncoding) fastestEncoding
{
#ifdef GNUSTEP_NEW_STRING_ABI
switch (CONSTANT_STRING_ENCODING())
{
case 0: // ASCII
return NSASCIIStringEncoding;
case 1: // UTF-8
return NSUTF8StringEncoding;
case 2: // UTF-16
return NSUTF16StringEncoding;
case 3: // UTF-32
return NSUTF32StringEncoding;
}
__builtin_unreachable();
#else
return NSUTF8StringEncoding;
#endif
}
- (NSStringEncoding) smallestEncoding
{
#ifdef GNUSTEP_NEW_STRING_ABI
// UTF-16 might not be the smallest encoding for UTF-16 strings, but for now
// we'll pretend that it is.
switch (CONSTANT_STRING_ENCODING())
{
case 0: // ASCII
return NSASCIIStringEncoding;
case 1: // UTF-8
return NSUTF8StringEncoding;
case 2: // UTF-16
case 3: // UTF-32
return NSUTF16StringEncoding;
}
__builtin_unreachable();
#else
return NSUTF8StringEncoding;
#endif
}
/* This method is provided to enable regression tests to ensure they are