mirror of
https://github.com/gnustep/libs-base.git
synced 2025-04-23 09:04:13 +00:00
Checkpoint new string ABI work.
ASCII and UTF-16 strings in the new representation now work, but some of the string optimisations are disabled.
This commit is contained in:
parent
7bf1179f60
commit
2bdcae390d
2 changed files with 162 additions and 1 deletions
|
@ -881,6 +881,12 @@ typedef NSUInteger NSStringEncodingConversionOptions;
|
|||
|
||||
@end
|
||||
|
||||
#ifdef __OBJC_GNUSTEP_RUNTIME_ABI__
|
||||
# if __OBJC_GNUSTEP_RUNTIME_ABI__ >= 20
|
||||
# define GNUSTEP_NEW_STRING_ABI
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/**
|
||||
* <p>The NXConstantString class is used to hold constant 8-bit character
|
||||
* string objects produced by the compiler where it sees @"..." in the
|
||||
|
@ -908,8 +914,16 @@ typedef NSUInteger NSStringEncodingConversionOptions;
|
|||
@interface NXConstantString : NSString
|
||||
{
|
||||
@public
|
||||
#ifdef GNUSTEP_NEW_STRING_ABI
|
||||
uint32_t flags;
|
||||
uint32_t nxcslen;
|
||||
uint32_t size;
|
||||
uint32_t hash;
|
||||
const char * const nxcsptr;
|
||||
#else
|
||||
const char * const nxcsptr;
|
||||
const unsigned int nxcslen;
|
||||
#endif
|
||||
}
|
||||
@end
|
||||
|
||||
|
|
|
@ -298,6 +298,32 @@ nextUTF8(const uint8_t *p, unsigned l, unsigned *o, unichar *n)
|
|||
static BOOL
|
||||
literalIsEqualInternal(NXConstantString *s, GSStr o)
|
||||
{
|
||||
#ifdef GNUSTEP_NEW_STRING_ABI
|
||||
if (s->nxcslen != o->_count)
|
||||
{
|
||||
return NO;
|
||||
}
|
||||
size_t end = s->nxcslen;
|
||||
static const int buffer_size = 64;
|
||||
unichar buffer1[buffer_size];
|
||||
unichar buffer2[buffer_size];
|
||||
NSRange r = { 0, buffer_size };
|
||||
do
|
||||
{
|
||||
if (r.location + r.length > end)
|
||||
{
|
||||
r.length = s->nxcslen - r.location;
|
||||
}
|
||||
[s getCharacters: buffer1 range: r];
|
||||
[o getCharacters: buffer2 range: r];
|
||||
if (memcmp(buffer1, buffer2, r.length * sizeof(unichar)) != 0)
|
||||
{
|
||||
return NO;
|
||||
}
|
||||
r.location += buffer_size;
|
||||
} while (r.location < end);
|
||||
return YES;
|
||||
#else
|
||||
unsigned len = o->_count;
|
||||
|
||||
/* Since UTF-8 is a multibyte character set, it must have at least
|
||||
|
@ -450,6 +476,7 @@ literalIsEqualInternal(NXConstantString *s, GSStr o)
|
|||
}
|
||||
return YES;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
@ -4484,10 +4511,12 @@ agree, create a new GSCInlineString otherwise.
|
|||
return obj;
|
||||
}
|
||||
|
||||
#if 0
|
||||
- (NSRange) rangeOfComposedCharacterSequenceAtIndex: (NSUInteger)anIndex
|
||||
{
|
||||
return rangeOfSequence_u((GSStr)self, anIndex);
|
||||
}
|
||||
#endif
|
||||
|
||||
- (NSRange) rangeOfCharacterFromSet: (NSCharacterSet*)aSet
|
||||
options: (NSUInteger)mask
|
||||
|
@ -5365,6 +5394,7 @@ NSAssert(_flags.owned == 1 && _zone != 0, NSInternalInconsistencyException);
|
|||
return obj;
|
||||
}
|
||||
|
||||
#if 0
|
||||
- (NSRange) rangeOfComposedCharacterSequenceAtIndex: (NSUInteger)anIndex
|
||||
{
|
||||
if (_flags.wide == 1)
|
||||
|
@ -5372,6 +5402,7 @@ NSAssert(_flags.owned == 1 && _zone != 0, NSInternalInconsistencyException);
|
|||
else
|
||||
return rangeOfSequence_c((GSStr)self, anIndex);
|
||||
}
|
||||
#endif
|
||||
|
||||
- (NSRange) rangeOfCharacterFromSet: (NSCharacterSet*)aSet
|
||||
options: (NSUInteger)mask
|
||||
|
@ -5791,6 +5822,13 @@ literalIsEqual(NXConstantString *self, id anObject)
|
|||
return NO;
|
||||
}
|
||||
|
||||
#ifdef GNUSTEP_NEW_STRING_ABI
|
||||
# define CONSTANT_STRING_ENCODING() (flags & 3)
|
||||
# define CONSTANT_STRING_HAS_HASH() ((flags & (1<<16)) == (1<<16))
|
||||
# define CONSTANT_STRING_SET_HAS_HASH() do { flags |= (1<<16); } while(0)
|
||||
#endif
|
||||
|
||||
|
||||
/**
|
||||
* <p>The NXConstantString class is used by the compiler for constant
|
||||
* strings, as such its ivar layout is determined by the compiler
|
||||
|
@ -5803,17 +5841,47 @@ literalIsEqual(NXConstantString *self, id anObject)
|
|||
{
|
||||
if (self == [NXConstantString class])
|
||||
{
|
||||
NSConstantStringClass = self;
|
||||
NSConstantStringClass = nil;
|
||||
}
|
||||
}
|
||||
|
||||
- (const char*) UTF8String
|
||||
{
|
||||
#ifdef GNUSTEP_NEW_STRING_ABI
|
||||
switch (CONSTANT_STRING_ENCODING())
|
||||
{
|
||||
case 0: // ASCII
|
||||
case 1: // UTF-8
|
||||
return nxcsptr;
|
||||
case 2: // UTF-16
|
||||
case 4: // UTF-32
|
||||
return [super UTF8String];
|
||||
}
|
||||
__builtin_unreachable();
|
||||
#else
|
||||
return nxcsptr;
|
||||
#endif
|
||||
}
|
||||
|
||||
- (unichar) characterAtIndex: (NSUInteger)index
|
||||
{
|
||||
#ifdef GNUSTEP_NEW_STRING_ABI
|
||||
if (index >= nxcslen)
|
||||
{
|
||||
[NSException raise: NSInvalidArgumentException
|
||||
format: @"-characterAtIndex: index out of range"];
|
||||
}
|
||||
switch (CONSTANT_STRING_ENCODING())
|
||||
{
|
||||
case 0: // ASCII
|
||||
case 1: // UTF-8
|
||||
return nxcsptr[index];
|
||||
case 2: // UTF-16
|
||||
return ((unichar*)(void*)nxcsptr)[index];
|
||||
|
||||
}
|
||||
__builtin_unreachable();
|
||||
#else
|
||||
NSUInteger l = 0;
|
||||
unichar u;
|
||||
unichar n = 0;
|
||||
|
@ -5831,8 +5899,11 @@ literalIsEqual(NXConstantString *self, id anObject)
|
|||
[NSException raise: NSInvalidArgumentException
|
||||
format: @"-characterAtIndex: index out of range"];
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifndef GNUSTEP_NEW_STRING_ABI
|
||||
|
||||
- (BOOL) canBeConvertedToEncoding: (NSStringEncoding)encoding
|
||||
{
|
||||
/* If the string contains bad (non-utf8) data, the lengthUTF8() function
|
||||
|
@ -5936,6 +6007,8 @@ literalIsEqual(NXConstantString *self, id anObject)
|
|||
return [super dataUsingEncoding: encoding allowLossyConversion: flag];
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
- (void) dealloc
|
||||
{
|
||||
GSNOSUPERDEALLOC;
|
||||
|
@ -5944,6 +6017,27 @@ literalIsEqual(NXConstantString *self, id anObject)
|
|||
- (void) getCharacters: (unichar*)buffer
|
||||
range: (NSRange)aRange
|
||||
{
|
||||
#ifdef GNUSTEP_NEW_STRING_ABI
|
||||
GS_RANGE_CHECK(aRange, nxcslen);
|
||||
switch (CONSTANT_STRING_ENCODING())
|
||||
{
|
||||
case 0: // ASCII
|
||||
for (int i=0 ; i<aRange.length ; i++)
|
||||
{
|
||||
buffer[i] = (unichar)nxcsptr[aRange.location + i];
|
||||
}
|
||||
return;
|
||||
case 1: // UTF-8
|
||||
NSAssert(0, @"UTF-8 constant strings not yet supported");
|
||||
case 2: // UTF-16
|
||||
fprintf(stderr, "Copying %d bytes from %p\n", (int)(aRange.length * sizeof(unichar)), nxcsptr + (aRange.location * sizeof(unichar)));
|
||||
memcpy(buffer, nxcsptr + (aRange.location * sizeof(unichar)), aRange.length * sizeof(unichar));
|
||||
return;
|
||||
case 3:
|
||||
NSAssert(0, @"UTF-32 constant strings not yet supported");
|
||||
}
|
||||
__builtin_unreachable();
|
||||
#else
|
||||
unichar n = 0;
|
||||
unsigned i = 0;
|
||||
NSUInteger max = NSMaxRange(aRange);
|
||||
|
@ -5973,8 +6067,10 @@ literalIsEqual(NXConstantString *self, id anObject)
|
|||
@"in %s, range { %"PRIuPTR", %"PRIuPTR" } extends beyond string",
|
||||
GSNameFromSelector(_cmd), aRange.location, aRange.length];
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifndef GNUSTEP_NEW_STRING_ABI
|
||||
- (BOOL) getCString: (char*)buffer
|
||||
maxLength: (NSUInteger)maxLength
|
||||
encoding: (NSStringEncoding)encoding
|
||||
|
@ -6039,12 +6135,20 @@ literalIsEqual(NXConstantString *self, id anObject)
|
|||
}
|
||||
return [super getCString: buffer maxLength: maxLength encoding: encoding];
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Must match the implementation in NSString
|
||||
* To avoid allocating memory, we build the hash incrementally.
|
||||
*/
|
||||
- (NSUInteger) hash
|
||||
{
|
||||
#ifdef GNUSTEP_NEW_STRING_ABI
|
||||
if (CONSTANT_STRING_HAS_HASH())
|
||||
return hash;
|
||||
hash = [super hash];
|
||||
CONSTANT_STRING_SET_HAS_HASH();
|
||||
return hash;
|
||||
#else
|
||||
if (nxcslen > 0)
|
||||
{
|
||||
uint32_t s0 = 0;
|
||||
|
@ -6087,6 +6191,7 @@ literalIsEqual(NXConstantString *self, id anObject)
|
|||
{
|
||||
return 0x0ffffffe; /* Hash for an empty string. */
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
- (id) initWithBytes: (const void*)bytes
|
||||
|
@ -6108,6 +6213,16 @@ literalIsEqual(NXConstantString *self, id anObject)
|
|||
return nil;
|
||||
}
|
||||
|
||||
#ifdef GNUSTEP_NEW_STRING_ABI
|
||||
- (NSUInteger) length
|
||||
{
|
||||
// In the new encoding, nxcslen is always the length of the string in UTF-16
|
||||
// codepoints
|
||||
return nxcslen;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
- (BOOL) isEqual: (id)anObject
|
||||
{
|
||||
return literalIsEqual(self, anObject);
|
||||
|
@ -6256,6 +6371,7 @@ literalIsEqual(NXConstantString *self, id anObject)
|
|||
format: @"-rangeOfComposedCharacterSequenceAtIndex: index out of range"];
|
||||
return NSMakeRange(NSNotFound, 0);
|
||||
}
|
||||
#endif // GNUSTEP_NEW_STRING_ABI
|
||||
|
||||
- (id) retain
|
||||
{
|
||||
|
@ -6284,12 +6400,43 @@ literalIsEqual(NXConstantString *self, id anObject)
|
|||
|
||||
- (NSStringEncoding) fastestEncoding
|
||||
{
|
||||
#ifdef GNUSTEP_NEW_STRING_ABI
|
||||
switch (CONSTANT_STRING_ENCODING())
|
||||
{
|
||||
case 0: // ASCII
|
||||
return NSASCIIStringEncoding;
|
||||
case 1: // UTF-8
|
||||
return NSUTF8StringEncoding;
|
||||
case 2: // UTF-16
|
||||
return NSUTF16StringEncoding;
|
||||
case 3: // UTF-32
|
||||
return NSUTF32StringEncoding;
|
||||
}
|
||||
__builtin_unreachable();
|
||||
#else
|
||||
return NSUTF8StringEncoding;
|
||||
#endif
|
||||
}
|
||||
|
||||
- (NSStringEncoding) smallestEncoding
|
||||
{
|
||||
#ifdef GNUSTEP_NEW_STRING_ABI
|
||||
// UTF-16 might not be the smallest encoding for UTF-16 strings, but for now
|
||||
// we'll pretend that it is.
|
||||
switch (CONSTANT_STRING_ENCODING())
|
||||
{
|
||||
case 0: // ASCII
|
||||
return NSASCIIStringEncoding;
|
||||
case 1: // UTF-8
|
||||
return NSUTF8StringEncoding;
|
||||
case 2: // UTF-16
|
||||
case 3: // UTF-32
|
||||
return NSUTF16StringEncoding;
|
||||
}
|
||||
__builtin_unreachable();
|
||||
#else
|
||||
return NSUTF8StringEncoding;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* This method is provided to enable regression tests to ensure they are
|
||||
|
|
Loading…
Reference in a new issue