Add NSConstantString implementation for v2 ABI.

This is largely the version from the newabi branch, but with a few
cleanups made possible by other bug fixes in the GSString
implementations.
This commit is contained in:
David Chisnall 2018-12-27 13:42:12 +00:00
parent 00472f14d0
commit 3847c54f3f
2 changed files with 196 additions and 0 deletions

View file

@ -881,6 +881,12 @@ typedef NSUInteger NSStringEncodingConversionOptions;
@end
#ifdef __OBJC_GNUSTEP_RUNTIME_ABI__
# if __OBJC_GNUSTEP_RUNTIME_ABI__ >= 20
# define GNUSTEP_NEW_STRING_ABI
# endif
#endif
/**
* <p>The NXConstantString class is used to hold constant 8-bit character
* string objects produced by the compiler where it sees @"..." in the
@ -908,8 +914,43 @@ typedef NSUInteger NSStringEncodingConversionOptions;
@interface NXConstantString : NSString
{
@public
#ifdef GNUSTEP_NEW_STRING_ABI
/**
* Flags. The low 16 bits are reserved for the compiler, the top 16 for use
* by the Foundation Framework. Currently only the low 2 bits are used, to
* indicate the encoding of the string, with the following values:
*
* 0. ASCII (UTF-8 using only 7-bit characters)
* 1. UTF-8
* 2. UTF-16
* 3. UTF-32
*
*/
uint32_t flags;
/**
* The number of characters (UTF-16 code units) in the string.
*/
uint32_t nxcslen;
/**
* The number of bytes in the string. For fixed-length encodings, this is a
* fixed multiple of nxcslen, but for UTF-8 it can be different.
*/
uint32_t size;
/**
* Hash value.
*/
uint32_t hash;
/**
* Pointer to the byte data of the string. Note that `char*` is the correct
* type only if the low two bits of the flags indicate that this is an ASCII
* or UTF-8 string, otherwise it is a pointer to 16- or 32-bit characters in
* native byte order.
*/
const char * const nxcsptr;
#else
const char * const nxcsptr;
const unsigned int nxcslen;
#endif
}
@end

View file

@ -298,6 +298,32 @@ nextUTF8(const uint8_t *p, unsigned l, unsigned *o, unichar *n)
static BOOL
literalIsEqualInternal(NXConstantString *s, GSStr o)
{
#ifdef GNUSTEP_NEW_STRING_ABI
if (s->nxcslen != o->_count)
{
return NO;
}
size_t end = s->nxcslen;
static const int buffer_size = 64;
unichar buffer1[buffer_size];
unichar buffer2[buffer_size];
NSRange r = { 0, buffer_size };
do
{
if (r.location + r.length > end)
{
r.length = s->nxcslen - r.location;
}
[s getCharacters: buffer1 range: r];
[o getCharacters: buffer2 range: r];
if (memcmp(buffer1, buffer2, r.length * sizeof(unichar)) != 0)
{
return NO;
}
r.location += buffer_size;
} while (r.location < end);
return YES;
#else
unsigned len = o->_count;
/* Since UTF-8 is a multibyte character set, it must have at least
@ -450,6 +476,7 @@ literalIsEqualInternal(NXConstantString *s, GSStr o)
}
return YES;
}
#endif
}
@ -5664,6 +5691,13 @@ literalIsEqual(NXConstantString *self, id anObject)
return NO;
}
#ifdef GNUSTEP_NEW_STRING_ABI
# define CONSTANT_STRING_ENCODING() (flags & 3)
# define CONSTANT_STRING_HAS_HASH() ((flags & (1<<16)) == (1<<16))
# define CONSTANT_STRING_SET_HAS_HASH() do { flags |= (1<<16); } while(0)
#endif
/**
* <p>The NXConstantString class is used by the compiler for constant
* strings, as such its ivar layout is determined by the compiler
@ -5682,11 +5716,52 @@ literalIsEqual(NXConstantString *self, id anObject)
- (const char*) UTF8String
{
#ifdef GNUSTEP_NEW_STRING_ABI
switch (CONSTANT_STRING_ENCODING())
{
case 0: // ASCII
case 1: // UTF-8
return nxcsptr;
case 2: // UTF-16
{
unsigned int l = 0;
unsigned char *r = 0;
if (GSFromUnicode(&r, &l, (const unichar*)(void*)nxcsptr, nxcslen, NSUTF8StringEncoding,
NSDefaultMallocZone(), GSUniTerminate|GSUniTemporary|GSUniStrict) == NO)
{
[NSException raise: NSCharacterConversionException
format: @"Can't get UTF8 from Unicode string."];
}
return (const char*)r;
}
case 4: // UTF-32
return [super UTF8String];
}
GS_UNREACHABLE();
#else
return nxcsptr;
#endif
}
- (unichar) characterAtIndex: (NSUInteger)index
{
#ifdef GNUSTEP_NEW_STRING_ABI
if (index >= nxcslen)
{
[NSException raise: NSInvalidArgumentException
format: @"-characterAtIndex: index out of range"];
}
switch (CONSTANT_STRING_ENCODING())
{
case 0: // ASCII
case 1: // UTF-8
return nxcsptr[index];
case 2: // UTF-16
return ((unichar*)(void*)nxcsptr)[index];
}
GS_UNREACHABLE();
#else
NSUInteger l = 0;
unichar u;
unichar n = 0;
@ -5704,8 +5779,11 @@ literalIsEqual(NXConstantString *self, id anObject)
[NSException raise: NSInvalidArgumentException
format: @"-characterAtIndex: index out of range"];
return 0;
#endif
}
#ifndef GNUSTEP_NEW_STRING_ABI
- (BOOL) canBeConvertedToEncoding: (NSStringEncoding)encoding
{
/* If the string contains bad (non-utf8) data, the lengthUTF8() function
@ -5809,6 +5887,8 @@ literalIsEqual(NXConstantString *self, id anObject)
return [super dataUsingEncoding: encoding allowLossyConversion: flag];
}
#endif
- (void) dealloc
{
GSNOSUPERDEALLOC;
@ -5817,6 +5897,26 @@ literalIsEqual(NXConstantString *self, id anObject)
- (void) getCharacters: (unichar*)buffer
range: (NSRange)aRange
{
#ifdef GNUSTEP_NEW_STRING_ABI
GS_RANGE_CHECK(aRange, nxcslen);
switch (CONSTANT_STRING_ENCODING())
{
case 0: // ASCII
for (int i=0 ; i<aRange.length ; i++)
{
buffer[i] = (unichar)nxcsptr[aRange.location + i];
}
return;
case 1: // UTF-8
NSAssert(0, @"UTF-8 constant strings not yet supported");
case 2: // UTF-16
memcpy(buffer, nxcsptr + (aRange.location * sizeof(unichar)), aRange.length * sizeof(unichar));
return;
case 3:
NSAssert(0, @"UTF-32 constant strings not yet supported");
}
GS_UNREACHABLE();
#else
unichar n = 0;
unsigned i = 0;
NSUInteger max = NSMaxRange(aRange);
@ -5846,8 +5946,12 @@ literalIsEqual(NXConstantString *self, id anObject)
@"in %s, range { %"PRIuPTR", %"PRIuPTR" } extends beyond string",
GSNameFromSelector(_cmd), aRange.location, aRange.length];
}
#endif
}
// This method was deprecated on Mac OS X 10.5, so if we provide an improved
// version here then we should do it using the newer version.
#ifndef GNUSTEP_NEW_STRING_ABI
- (BOOL) getCString: (char*)buffer
maxLength: (NSUInteger)maxLength
encoding: (NSStringEncoding)encoding
@ -5912,12 +6016,20 @@ literalIsEqual(NXConstantString *self, id anObject)
}
return [super getCString: buffer maxLength: maxLength encoding: encoding];
}
#endif
/* Must match the implementation in NSString
* To avoid allocating memory, we build the hash incrementally.
*/
- (NSUInteger) hash
{
#ifdef GNUSTEP_NEW_STRING_ABI
if (CONSTANT_STRING_HAS_HASH())
return hash;
hash = [super hash];
CONSTANT_STRING_SET_HAS_HASH();
return hash;
#else
if (nxcslen > 0)
{
uint32_t s0 = 0;
@ -5960,6 +6072,7 @@ literalIsEqual(NXConstantString *self, id anObject)
{
return 0x0ffffffe; /* Hash for an empty string. */
}
#endif
}
- (id) initWithBytes: (const void*)bytes
@ -5981,6 +6094,16 @@ literalIsEqual(NXConstantString *self, id anObject)
return nil;
}
#ifdef GNUSTEP_NEW_STRING_ABI
- (NSUInteger) length
{
// In the new encoding, nxcslen is always the length of the string in UTF-16
// codepoints
return nxcslen;
}
#else
- (BOOL) isEqual: (id)anObject
{
return literalIsEqual(self, anObject);
@ -6129,6 +6252,7 @@ literalIsEqual(NXConstantString *self, id anObject)
format: @"-rangeOfComposedCharacterSequenceAtIndex: index out of range"];
return NSMakeRange(NSNotFound, 0);
}
#endif // GNUSTEP_NEW_STRING_ABI
- (id) retain
{
@ -6157,12 +6281,43 @@ literalIsEqual(NXConstantString *self, id anObject)
- (NSStringEncoding) fastestEncoding
{
#ifdef GNUSTEP_NEW_STRING_ABI
switch (CONSTANT_STRING_ENCODING())
{
case 0: // ASCII
return NSASCIIStringEncoding;
case 1: // UTF-8
return NSUTF8StringEncoding;
case 2: // UTF-16
return NSUTF16StringEncoding;
case 3: // UTF-32
return NSUTF32StringEncoding;
}
GS_UNREACHABLE();
#else
return NSUTF8StringEncoding;
#endif
}
- (NSStringEncoding) smallestEncoding
{
#ifdef GNUSTEP_NEW_STRING_ABI
// UTF-16 might not be the smallest encoding for UTF-16 strings, but for now
// we'll pretend that it is.
switch (CONSTANT_STRING_ENCODING())
{
case 0: // ASCII
return NSASCIIStringEncoding;
case 1: // UTF-8
return NSUTF8StringEncoding;
case 2: // UTF-16
case 3: // UTF-32
return NSUTF16StringEncoding;
}
GS_UNREACHABLE();
#else
return NSUTF8StringEncoding;
#endif
}
@end