libs-base/Source/NSCharacterSet.m
David Chisnall d2ea054c26 Lots of little fixes to make -base compile with -Werror (now builds without warnings).
Richard: I'm unsure about three of these, which were fixes in memset() calls in:
- NSConcreteMapTable.m
- NSConcreteHashTable.m
- Additions/NSData+GNUstepBase.m

Please can you check them?  I think they are intended to zero the entire object
(rather than the first word), but the lack of comments makes me unsure.

Most changes were just tweaks to variable types.  I've also removed some dead code from NSInvocation.  This was small group of things that were marked for internal use only, but not actually referenced in the code anywhere.

Other improvements:

- NSArray / NSDictionary fixed up to use the 10.7 (ARC-friendly) prototypes.
- getObjects:andKeys: implemented for NSDictionary (10.5 method)
- NSPointerArray and NSHashTable now properly support weak objects.
- Tests for weak objects in collections.



git-svn-id: svn+ssh://svn.gna.org/svn/gnustep/libs/base/trunk@33621 72102866-910b-0410-8b05-ffd578937521
2011-07-24 13:09:22 +00:00

1644 lines
34 KiB
Objective-C
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/** NSCharacterSet - Character set holder
Copyright (C) 1995, 1996, 1997, 1998 Free Software Foundation, Inc.
Written by: Adam Fedor <fedor@boulder.colorado.edu>
Date: Apr 1995
Updates by: Richard Frith-Macdonald <rfm@gnu.org>
This file is part of the GNUstep Base Library.
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free
Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
Boston, MA 02111 USA.
<title>NSCharacterSet class reference</title>
$Date$ $Revision$
*/
#import "common.h"
#import "GNUstepBase/GSLock.h"
#import "Foundation/NSArray.h"
#import "Foundation/NSCoder.h"
#import "Foundation/NSException.h"
#import "Foundation/NSData.h"
#import "Foundation/NSLock.h"
#import "Foundation/NSDictionary.h"
#import "Foundation/NSIndexSet.h"
#import "Foundation/NSThread.h"
#import "Foundation/NSNotification.h"
#import "Foundation/NSCharacterSet.h"
#import "Foundation/NSData.h"
#import "GNUstepBase/NSObject+GNUstepBase.h"
/* Using and index set to hold a characterset is more space efficient but
* on the intel core-2 system I benchmarked on, it made my applications
* about 20% slower.
* It only makes sense to build base to use index charactersets on machines
* with a very low memory (eg PDAs), and possibly not even there.
*/
//#define GNUSTEP_INDEX_CHARSET 1
#undef GNUSTEP_INDEX_CHARSET
#import "NSCharacterSetData.h"
#define GSUNICODE_MAX 1114112
#define GSBITMAP_SIZE 8192
#define GSBITMAP_MAX 139264
#ifndef GSSETBIT
#define GSSETBIT(a,i) ((a) |= 1<<(i))
#define GSCLRBIT(a,i) ((a) &= ~(1<<(i)))
#define GSISSET(a,i) ((a) & (1<<(i)))
#endif
@interface _GSIndexCharSet : NSCharacterSet
{
NSMutableIndexSet *indexes;
}
- (NSIndexSet*) _indexes;
- (id) initWithBitmap: (NSData*)d;
@end
@interface _GSMutableIndexCharSet : NSMutableCharacterSet
{
NSMutableIndexSet *indexes;
}
@end
@interface NSDataStatic : NSData // Help the compiler
@end
/* Private class from NSIndexSet.m
*/
@interface _GSStaticIndexSet : NSIndexSet
- (id) _initWithBytes: (const void*)bytes length: (NSUInteger)length;
@end
@interface NSBitmapCharSet : NSCharacterSet
{
const unsigned char *_data;
unsigned _length;
NSData *_obj;
unsigned _known;
unsigned _present;
}
- (id) initWithBitmap: (NSData*)bitmap;
@end
@interface NSMutableBitmapCharSet : NSMutableCharacterSet
{
unsigned char *_data;
unsigned _length;
NSMutableData *_obj;
unsigned _known;
unsigned _present;
}
- (id) initWithBitmap: (NSData*)bitmap;
@end
@implementation NSBitmapCharSet
- (NSData*) bitmapRepresentation
{
unsigned i = 17;
while (i > 0 && [self hasMemberInPlane: i-1] == NO)
{
i--;
}
i *= GSBITMAP_SIZE;
if (i < _length)
{
return [NSData dataWithBytes: _data length: i];
}
return _obj;
}
- (BOOL) characterIsMember: (unichar)aCharacter
{
unsigned byte = aCharacter/8;
if (byte < _length && GSISSET(_data[byte], aCharacter % 8))
{
return YES;
}
return NO;
}
- (Class) classForCoder
{
return [self class];
}
- (void) dealloc
{
DESTROY(_obj);
[super dealloc];
}
- (void) encodeWithCoder: (NSCoder*)aCoder
{
[aCoder encodeObject: [self bitmapRepresentation]];
}
- (BOOL) hasMemberInPlane: (uint8_t)aPlane
{
unsigned bit;
if (aPlane > 16)
{
return NO;
}
bit = (1 << aPlane);
if (_known & bit)
{
if (_present & bit)
{
return YES;
}
else
{
return NO;
}
}
if (aPlane * GSBITMAP_SIZE < _length)
{
unsigned i = GSBITMAP_SIZE * aPlane;
unsigned e = GSBITMAP_SIZE * (aPlane + 1);
while (i < e)
{
if (_data[i] != 0)
{
_present |= bit;
_known |= bit;
return YES;
}
i++;
}
}
_present &= ~bit;
_known |= bit;
return NO;
}
- (id) init
{
return [self initWithBitmap: nil];
}
- (id) initWithBitmap: (NSData*)bitmap
{
unsigned length = [bitmap length];
if ((length % GSBITMAP_SIZE) != 0 || length > GSBITMAP_MAX)
{
NSLog(@"attempt to initialize character set with invalid bitmap");
[self dealloc];
return nil;
}
if (bitmap == nil)
{
bitmap = [NSData data];
}
ASSIGNCOPY(_obj, bitmap);
_length = length;
_data = [_obj bytes];
return self;
}
- (id) initWithCoder: (NSCoder*)aCoder
{
NSData *rep;
rep = [aCoder decodeObject];
self = [self initWithBitmap: rep];
return self;
}
- (BOOL) longCharacterIsMember: (UTF32Char)aCharacter
{
unsigned byte = aCharacter/8;
if (aCharacter >= GSUNICODE_MAX)
{
[NSException raise: NSInvalidArgumentException
format: @"[%@-%@] argument (0x%08x) is too large",
NSStringFromClass([self class]), NSStringFromSelector(_cmd),
aCharacter];
}
if (byte < _length && GSISSET(_data[byte], aCharacter % 8))
{
return YES;
}
return NO;
}
@end
@implementation NSMutableBitmapCharSet
+ (void) initialize
{
if (self == [NSMutableBitmapCharSet class])
{
[self setVersion: 1];
GSObjCAddClassBehavior(self, [NSBitmapCharSet class]);
}
}
- (void) addCharactersInRange: (NSRange)aRange
{
NSUInteger i;
NSUInteger m;
NSUInteger b;
m = NSMaxRange(aRange);
if (m > GSUNICODE_MAX)
{
[NSException raise:NSInvalidArgumentException
format:@"Specified range exceeds character set"];
/* NOT REACHED */
}
else if (m < 1)
{
return;
}
/* Make space if needed.
*/
b = (m - 1) / 8;
if (b >= _length)
{
while (b >= _length)
{
_length += GSBITMAP_SIZE;
}
[_obj setLength: _length];
_data = [_obj mutableBytes];
}
/* Fill the first byte in the range.
*/
i = aRange.location;
b = i / 8;
while (i % 8 != 0 && i < m)
{
GSSETBIT(_data[b], i % 8);
i++;
}
/* Set any complete bytes in the range.
*/
b = (m - i) / 8;
if (b > 0)
{
memset(&_data[i / 8], 0xff, b);
i += b * 8;
}
/* Partial set of any bits needed in the last byte.
*/
b = i / 8;
while (i < m)
{
GSSETBIT(_data[b], i % 8);
i++;
}
_known = 0; // Invalidate cache
}
- (void) addCharactersInString: (NSString*)aString
{
unsigned length;
if (!aString)
{
[NSException raise:NSInvalidArgumentException
format:@"Adding characters from nil string"];
/* NOT REACHED */
}
length = [aString length];
if (length > 0)
{
unsigned i;
unichar (*get)(id, SEL, unsigned);
get = (unichar (*)(id, SEL, unsigned))
[aString methodForSelector: @selector(characterAtIndex:)];
for (i = 0; i < length; i++)
{
unichar letter;
unichar second;
unsigned byte;
letter = (*get)(aString, @selector(characterAtIndex:), i);
// Convert a surrogate pair if necessary
if (letter >= 0xd800 && letter <= 0xdbff && i < length-1
&& (second = (*get)(aString, @selector(characterAtIndex:), i+1))
>= 0xdc00 && second <= 0xdfff)
{
i++;
letter = ((letter - 0xd800) << 10)
+ (second - 0xdc00) + 0x0010000;
}
byte = letter/8;
if (byte >= _length)
{
while (byte >= _length)
{
_length += GSBITMAP_SIZE;
}
[_obj setLength: _length];
_data = [_obj mutableBytes];
}
GSSETBIT(_data[byte], letter % 8);
}
}
_known = 0; // Invalidate cache
}
- (NSData*) bitmapRepresentation
{
unsigned i = 17;
while (i > 0 && [self hasMemberInPlane: i-1] == NO)
{
i--;
}
i *= GSBITMAP_SIZE;
return [NSData dataWithBytes: _data length: i];
}
- (void) formIntersectionWithCharacterSet: (NSCharacterSet *)otherSet
{
unsigned i;
NSData *otherData = [otherSet bitmapRepresentation];
unsigned other_length = [otherData length];
const unsigned char *other_bytes = [otherData bytes];
if (_length > other_length)
{
[_obj setLength: other_length];
_length = other_length;
_data = [_obj mutableBytes];
}
for (i = 0; i < _length; i++)
{
_data[i] = (_data[i] & other_bytes[i]);
}
_known = 0; // Invalidate cache
}
- (void) formUnionWithCharacterSet: (NSCharacterSet*)otherSet
{
unsigned i;
NSData *otherData = [otherSet bitmapRepresentation];
unsigned other_length = [otherData length];
const unsigned char *other_bytes = [otherData bytes];
if (other_length > _length)
{
[_obj setLength: other_length];
_length = other_length;
_data = [_obj mutableBytes];
}
for (i = 0; i < other_length; i++)
{
_data[i] = (_data[i] | other_bytes[i]);
}
_known = 0; // Invalidate cache
}
- (id) initWithBitmap: (NSData*)bitmap
{
unsigned length = [bitmap length];
id tmp;
if ((length % GSBITMAP_SIZE) != 0 || length > GSBITMAP_MAX)
{
NSLog(@"attempt to initialize character set with invalid bitmap");
[self dealloc];
return nil;
}
if (bitmap == nil)
{
tmp = [NSMutableData new];
}
else
{
tmp = [bitmap mutableCopy];
}
DESTROY(_obj);
_obj = tmp;
_length = length;
_data = [_obj mutableBytes];
_known = 0; // Invalidate cache
return self;
}
- (void) invert
{
unsigned i;
if (_length < GSBITMAP_MAX)
{
[_obj setLength: GSBITMAP_MAX];
_length = GSBITMAP_MAX;
_data = [_obj mutableBytes];
}
for (i = 0; i < _length; i++)
{
_data[i] = ~_data[i];
}
_known = 0; // Invalidate cache
}
- (void) removeCharactersInRange: (NSRange)aRange
{
unsigned i;
unsigned limit = NSMaxRange(aRange);
if (NSMaxRange(aRange) > GSUNICODE_MAX)
{
[NSException raise:NSInvalidArgumentException
format:@"Specified range exceeds character set"];
/* NOT REACHED */
}
if (limit > _length * 8)
{
limit = _length * 8;
}
for (i = aRange.location; i < limit; i++)
{
GSCLRBIT(_data[i/8], i % 8);
}
_known = 0; // Invalidate cache
}
- (void) removeCharactersInString: (NSString*)aString
{
unsigned length;
if (!aString)
{
[NSException raise:NSInvalidArgumentException
format:@"Removing characters from nil string"];
/* NOT REACHED */
}
length = [aString length];
if (length > 0)
{
unsigned i;
unichar (*get)(id, SEL, unsigned);
get = (unichar (*)(id, SEL, unsigned))
[aString methodForSelector: @selector(characterAtIndex:)];
for (i = 0; i < length; i++)
{
unichar letter;
unichar second;
unsigned byte;
letter = (*get)(aString, @selector(characterAtIndex:), i);
// Convert a surrogate pair if necessary
if (letter >= 0xd800 && letter <= 0xdbff && i < length-1
&& (second = (*get)(aString, @selector(characterAtIndex:), i+1))
>= 0xdc00 && second <= 0xdfff)
{
i++;
letter = ((letter - 0xd800) << 10)
+ (second - 0xdc00) + 0x0010000;
}
byte = letter/8;
if (byte < _length)
{
GSCLRBIT(_data[byte], letter % 8);
}
}
}
_known = 0; // Invalidate cache
}
@end
/* A simple array for caching standard bitmap sets */
#define MAX_STANDARD_SETS 15
static NSCharacterSet *cache_set[MAX_STANDARD_SETS];
static NSLock *cache_lock = nil;
static Class abstractClass = nil;
static Class abstractMutableClass = nil;
static Class concreteClass = nil;
static Class concreteMutableClass = nil;
#if defined(GNUSTEP_INDEX_CHARSET)
@interface _GSStaticCharSet : _GSIndexCharSet
{
int _index;
}
@end
@implementation _GSStaticCharSet
- (Class) classForCoder
{
return abstractClass;
}
- (void) encodeWithCoder: (NSCoder*)aCoder
{
[aCoder encodeValueOfObjCType: @encode(int) at: &_index];
}
- (id) init
{
DESTROY(self);
return nil;
}
- (id) initWithBitmap: (NSData*)bitmap number: (int)number
{
_index = number;
indexes = [[_GSStaticIndexSet alloc] _initWithBytes: [bitmap bytes]
length: [bitmap length]];
return self;
}
@end
#else /* GNUSTEP_INDEX_CHARSET */
@interface _GSStaticCharSet : NSCharacterSet
{
const unsigned char *_data;
unsigned _length;
NSData *_obj;
unsigned _known;
unsigned _present;
int _index;
}
@end
@implementation _GSStaticCharSet
+ (void) initialize
{
GSObjCAddClassBehavior(self, [NSBitmapCharSet class]);
}
- (Class) classForCoder
{
return abstractClass;
}
- (void) encodeWithCoder: (NSCoder*)aCoder
{
[aCoder encodeValueOfObjCType: @encode(int) at: &_index];
}
- (id) init
{
DESTROY(self);
return nil;
}
- (id) initWithBitmap: (NSData*)bitmap number: (int)number
{
if ((self = (_GSStaticCharSet*)[(NSBitmapCharSet*)self initWithBitmap: bitmap]) != nil)
{
_index = number;
}
return self;
}
@end
#endif /* GNUSTEP_INDEX_CHARSET */
@implementation NSCharacterSet
+ (void) initialize
{
static BOOL one_time = NO;
if (one_time == NO)
{
abstractClass = [NSCharacterSet class];
abstractMutableClass = [NSMutableCharacterSet class];
#if defined(GNUSTEP_INDEX_CHARSET)
concreteClass = [_GSIndexCharSet class];
concreteMutableClass = [_GSMutableIndexCharSet class];
#else
concreteClass = [NSBitmapCharSet class];
concreteMutableClass = [NSMutableBitmapCharSet class];
#endif
one_time = YES;
}
cache_lock = [GSLazyLock new];
}
/**
* Creat and cache (or retrieve from cache) a characterset
* using static bitmap data.
* Return nil if no data is supplied and the cache is empty.
*/
+ (NSCharacterSet*) _staticSet: (const void*)bytes
length: (unsigned)length
number: (int)number
{
[cache_lock lock];
if (cache_set[number] == nil && bytes != 0)
{
NSData *bitmap;
bitmap = [[NSDataStatic alloc] initWithBytesNoCopy: (void*)bytes
length: length
freeWhenDone: NO];
cache_set[number]
= [[_GSStaticCharSet alloc] initWithBitmap: bitmap number: number];
RELEASE(bitmap);
}
[cache_lock unlock];
return cache_set[number];
}
+ (NSCharacterSet*) alphanumericCharacterSet
{
return [self _staticSet: alphanumericCharSet
length: sizeof(alphanumericCharSet)
number: 0];
}
+ (NSCharacterSet*) capitalizedLetterCharacterSet
{
return [self _staticSet: titlecaseLetterCharSet
length: sizeof(titlecaseLetterCharSet)
number: 13];
}
+ (NSCharacterSet*) controlCharacterSet
{
return [self _staticSet: controlCharSet
length: sizeof(controlCharSet)
number: 1];
}
+ (NSCharacterSet*) decimalDigitCharacterSet
{
return [self _staticSet: decimalDigitCharSet
length: sizeof(decimalDigitCharSet)
number: 2];
}
+ (NSCharacterSet*) decomposableCharacterSet
{
return [self _staticSet: decomposableCharSet
length: sizeof(decomposableCharSet)
number: 3];
}
+ (NSCharacterSet*) illegalCharacterSet
{
return [self _staticSet: illegalCharSet
length: sizeof(illegalCharSet)
number: 4];
}
+ (NSCharacterSet*) letterCharacterSet
{
return [self _staticSet: letterCharSet
length: sizeof(letterCharSet)
number: 5];
}
+ (NSCharacterSet*) lowercaseLetterCharacterSet
{
return [self _staticSet: lowercaseLetterCharSet
length: sizeof(lowercaseLetterCharSet)
number: 6];
}
+ (NSCharacterSet*) newlineCharacterSet
{
return [self _staticSet: newlineCharSet
length: sizeof(newlineCharSet)
number: 14];
}
+ (NSCharacterSet*) nonBaseCharacterSet
{
return [self _staticSet: nonBaseCharSet
length: sizeof(nonBaseCharSet)
number: 7];
}
+ (NSCharacterSet*) punctuationCharacterSet
{
return [self _staticSet: punctuationCharSet
length: sizeof(punctuationCharSet)
number: 8];
}
+ (NSCharacterSet*) symbolCharacterSet
{
return [self _staticSet: symbolAndOperatorCharSet
length: sizeof(symbolAndOperatorCharSet)
number: 9];
}
// FIXME ... deprecated ... remove after next release.
+ (NSCharacterSet*) symbolAndOperatorCharacterSet
{
GSOnceMLog(@"symbolAndOperatorCharacterSet is deprecated ... use symbolCharacterSet");
return [self _staticSet: symbolAndOperatorCharSet
length: sizeof(symbolAndOperatorCharSet)
number: 9];
}
+ (NSCharacterSet*) uppercaseLetterCharacterSet
{
return [self _staticSet: uppercaseLetterCharSet
length: sizeof(uppercaseLetterCharSet)
number: 10];
}
+ (NSCharacterSet*) whitespaceAndNewlineCharacterSet
{
return [self _staticSet: whitespaceAndNlCharSet
length: sizeof(whitespaceAndNlCharSet)
number: 11];
}
+ (NSCharacterSet*) whitespaceCharacterSet
{
return [self _staticSet: whitespaceCharSet
length: sizeof(whitespaceCharSet)
number: 12];
}
+ (NSCharacterSet*) characterSetWithBitmapRepresentation: (NSData*)data
{
return AUTORELEASE([[concreteClass alloc] initWithBitmap: data]);
}
+ (NSCharacterSet*) characterSetWithCharactersInString: (NSString*)aString
{
NSMutableCharacterSet *ms;
NSCharacterSet *cs;
ms = [NSMutableCharacterSet new];
[ms addCharactersInString: aString];
cs = [ms copy];
RELEASE(ms);
return AUTORELEASE(cs);
}
+ (NSCharacterSet*) characterSetWithRange: (NSRange)aRange
{
NSMutableCharacterSet *ms;
NSCharacterSet *cs;
ms = [NSMutableCharacterSet new];
[ms addCharactersInRange: aRange];
cs = [ms copy];
RELEASE(ms);
return AUTORELEASE(cs);
}
+ (NSCharacterSet*) characterSetWithContentsOfFile: (NSString*)aFile
{
if ([@"bitmap" isEqual: [aFile pathExtension]])
{
NSData *bitmap = [NSData dataWithContentsOfFile: aFile];
return [self characterSetWithBitmapRepresentation: bitmap];
}
else
return nil;
}
- (NSData*) bitmapRepresentation
{
BOOL (*imp)(id, SEL, unichar);
NSMutableData *m = [NSMutableData dataWithLength: 8192];
unsigned char *p = (unsigned char*)[m mutableBytes];
unsigned i;
imp = (BOOL (*)(id,SEL,unichar))
[self methodForSelector: @selector(characterIsMember:)];
for (i = 0; i <= 0xffff; i++)
{
if (imp(self, @selector(characterIsMember:), i) == YES)
{
GSSETBIT(p[i/8], i % 8);
}
}
return m;
}
- (BOOL) characterIsMember: (unichar)aCharacter
{
[self subclassResponsibility: _cmd];
return 0;
}
- (id) copyWithZone: (NSZone*)zone
{
if (NSShouldRetainWithZone(self, zone))
{
return RETAIN(self);
}
else
{
id obj;
obj = [concreteClass allocWithZone: zone];
obj = [obj initWithBitmap: [self bitmapRepresentation]];
return obj;
}
}
- (void) encodeWithCoder: (NSCoder*)aCoder
{
}
- (BOOL) hasMemberInPlane: (uint8_t)aPlane
{
if (aPlane == 0)
{
return YES;
}
return NO;
}
- (id) init
{
if (object_getClass(self) == abstractClass)
{
id obj;
obj = [concreteClass allocWithZone: [self zone]];
obj = [obj initWithBitmap: nil];
DESTROY(self);
self = obj;
}
return self;
}
- (id) initWithCoder: (NSCoder*)aCoder
{
if ([self class] == abstractClass)
{
int index;
/*
* Abstract class returns characterset from cache.
*/
DESTROY(self);
[aCoder decodeValueOfObjCType: @encode(int) at: &index];
self = RETAIN([abstractClass _staticSet: 0 length: 0 number: index]);
}
else
{
}
return self;
}
- (NSCharacterSet*) invertedSet
{
unsigned i;
unsigned length;
unsigned char *bytes;
NSMutableData *bitmap;
bitmap = AUTORELEASE([[self bitmapRepresentation] mutableCopy]);
length = [bitmap length];
bytes = [bitmap mutableBytes];
for (i = 0; i < length; i++)
{
bytes[i] = ~bytes[i];
}
return [[self class] characterSetWithBitmapRepresentation: bitmap];
}
- (BOOL) isEqual: (id)anObject
{
if (anObject == self)
return YES;
if ([anObject isKindOfClass: abstractClass])
{
unsigned i;
unsigned p;
BOOL (*rImp)(id, SEL, unichar);
BOOL (*oImp)(id, SEL, unichar);
rImp = (BOOL (*)(id,SEL,unichar))
[self methodForSelector: @selector(characterIsMember:)];
oImp = (BOOL (*)(id,SEL,unichar))
[anObject methodForSelector: @selector(characterIsMember:)];
for (p = 0; p <= 16; p++)
{
if ([self hasMemberInPlane: p] == YES)
{
if ([anObject hasMemberInPlane: p] == YES)
{
for (i = 0; i <= 0xffff; i++)
{
if (rImp(self, @selector(characterIsMember:), i)
!= oImp(anObject, @selector(characterIsMember:), i))
{
return NO;
}
}
}
else
{
return NO;
}
}
else
{
if ([anObject hasMemberInPlane: p] == YES)
{
return NO;
}
}
}
return YES;
}
return NO;
}
- (BOOL) isSupersetOfSet: (NSCharacterSet*)aSet
{
NSMutableCharacterSet *m = [self mutableCopy];
BOOL superset;
[m formUnionWithCharacterSet: aSet];
superset = [self isEqual: m];
RELEASE(m);
return superset;
}
- (BOOL) longCharacterIsMember: (UTF32Char)aCharacter
{
int plane = (aCharacter >> 16);
if (aCharacter >= GSUNICODE_MAX)
{
[NSException raise: NSInvalidArgumentException
format: @"[%@-%@] argument (0x%08x) is too large",
NSStringFromClass([self class]), NSStringFromSelector(_cmd),
aCharacter];
}
if (plane == 0)
{
unichar u = (unichar)(aCharacter & 0xffff);
return [self characterIsMember: u];
}
else
{
return NO;
}
}
- (id) mutableCopyWithZone: (NSZone*)zone
{
NSData *bitmap;
bitmap = [self bitmapRepresentation];
return [[concreteMutableClass allocWithZone: zone] initWithBitmap: bitmap];
}
@end
@implementation NSMutableCharacterSet
/* Override this from NSCharacterSet to create the correct class */
+ (NSCharacterSet*) characterSetWithBitmapRepresentation: (NSData*)data
{
return AUTORELEASE([[concreteMutableClass alloc] initWithBitmap: data]);
}
+ (NSCharacterSet*) alphanumericCharacterSet
{
return AUTORELEASE([[abstractClass performSelector: _cmd] mutableCopy]);
}
+ (NSCharacterSet*) capitalizedLetterCharacterSet
{
return AUTORELEASE([[abstractClass performSelector: _cmd] mutableCopy]);
}
+ (NSCharacterSet*) controlCharacterSet
{
return AUTORELEASE([[abstractClass performSelector: _cmd] mutableCopy]);
}
+ (NSCharacterSet*) decimalDigitCharacterSet
{
return AUTORELEASE([[abstractClass performSelector: _cmd] mutableCopy]);
}
+ (NSCharacterSet*) decomposableCharacterSet
{
return AUTORELEASE([[abstractClass performSelector: _cmd] mutableCopy]);
}
+ (NSCharacterSet*) illegalCharacterSet
{
return AUTORELEASE([[abstractClass performSelector: _cmd] mutableCopy]);
}
+ (NSCharacterSet*) letterCharacterSet
{
return AUTORELEASE([[abstractClass performSelector: _cmd] mutableCopy]);
}
+ (NSCharacterSet*) lowercaseLetterCharacterSet
{
return AUTORELEASE([[abstractClass performSelector: _cmd] mutableCopy]);
}
+ (NSCharacterSet*) newlineCharacterSet
{
return AUTORELEASE([[abstractClass performSelector: _cmd] mutableCopy]);
}
+ (NSCharacterSet*) nonBaseCharacterSet
{
return AUTORELEASE([[abstractClass performSelector: _cmd] mutableCopy]);
}
+ (NSCharacterSet*) punctuationCharacterSet
{
return AUTORELEASE([[abstractClass performSelector: _cmd] mutableCopy]);
}
+ (NSCharacterSet*) symbolCharacterSet
{
return AUTORELEASE([[abstractClass performSelector: _cmd] mutableCopy]);
}
// FIXME ... deprecated ... remove after next release.
+ (NSCharacterSet*) symbolAndOperatorCharacterSet
{
return AUTORELEASE([[abstractClass performSelector: _cmd] mutableCopy]);
}
+ (NSCharacterSet*) uppercaseLetterCharacterSet
{
return AUTORELEASE([[abstractClass performSelector: _cmd] mutableCopy]);
}
+ (NSCharacterSet*) whitespaceAndNewlineCharacterSet
{
return AUTORELEASE([[abstractClass performSelector: _cmd] mutableCopy]);
}
+ (NSCharacterSet*) whitespaceCharacterSet
{
return AUTORELEASE([[abstractClass performSelector: _cmd] mutableCopy]);
}
+ (NSCharacterSet*) characterSetWithCharactersInString: (NSString*)aString
{
NSMutableCharacterSet *ms;
ms = [abstractMutableClass new];
[ms addCharactersInString: aString];
return AUTORELEASE(ms);
}
+ (NSCharacterSet*) characterSetWithRange: (NSRange)aRange
{
NSMutableCharacterSet *ms;
ms = [abstractMutableClass new];
[ms addCharactersInRange: aRange];
return AUTORELEASE(ms);
}
- (void) addCharactersInRange: (NSRange)aRange
{
[self subclassResponsibility: _cmd];
}
- (void) addCharactersInString: (NSString*)aString
{
[self subclassResponsibility: _cmd];
}
- (id) copyWithZone: (NSZone*)zone
{
NSData *bitmap;
bitmap = [self bitmapRepresentation];
return [[concreteClass allocWithZone: zone] initWithBitmap: bitmap];
}
- (void) formIntersectionWithCharacterSet: (NSCharacterSet*)otherSet
{
[self subclassResponsibility: _cmd];
}
- (void) formUnionWithCharacterSet: (NSCharacterSet*)otherSet
{
[self subclassResponsibility: _cmd];
}
- (id) init
{
if (object_getClass(self) == abstractMutableClass)
{
id obj;
obj = [concreteMutableClass allocWithZone: [self zone]];
obj = [obj initWithBitmap: nil];
DESTROY(self);
self = obj;
}
return self;
}
- (id) initWithBitmap: (NSData*)bitmap
{
if (object_getClass(self) == abstractMutableClass)
{
id obj;
obj = [concreteMutableClass allocWithZone: [self zone]];
obj = [obj initWithBitmap: bitmap];
DESTROY(self);
self = obj;
}
return self;
}
- (void) invert
{
[self subclassResponsibility: _cmd];
}
- (void) removeCharactersInRange: (NSRange)aRange
{
[self subclassResponsibility: _cmd];
}
- (void) removeCharactersInString: (NSString*)aString
{
[self subclassResponsibility: _cmd];
}
@end
/* Below is an experimental implementation of a mutable character set
* implemented in terms of an NSMutableIndexSet. This should be much
* smaller than a bitmap representation for normal charactersets.
*/
@interface NSIndexSet (NSCharacterSet)
- (NSUInteger) _gapGreaterThanIndex: (NSUInteger)anIndex;
@end
@implementation _GSIndexCharSet
- (NSData*) bitmapRepresentation
{
NSMutableBitmapCharSet *tmp;
NSData *result;
unsigned index = 0;
tmp = [NSMutableBitmapCharSet new];
while ((index = [indexes indexGreaterThanOrEqualToIndex: index])
!= NSNotFound)
{
NSRange r;
r.location = index;
index = [indexes _gapGreaterThanIndex: index];
if (index == NSNotFound)
{
r.length = 1;
}
else
{
r.length = index - r.location;
}
[tmp addCharactersInRange: r];
index = NSMaxRange(r);
}
result = AUTORELEASE(RETAIN([tmp bitmapRepresentation]));
RELEASE(tmp);
return result;
}
- (BOOL) characterIsMember: (unichar)aCharacter
{
return [indexes containsIndex: (int)aCharacter];
}
- (Class) classForCoder
{
return [NSBitmapCharSet class];
}
- (void) dealloc
{
DESTROY(indexes);
[super dealloc];
}
- (void) encodeWithCoder: (NSCoder*)aCoder
{
[aCoder encodeObject: [self bitmapRepresentation]];
}
- (BOOL) hasMemberInPlane: (uint8_t)aPlane
{
unsigned found;
found = [indexes indexGreaterThanOrEqualToIndex: 0x10000 * aPlane];
if (found != NSNotFound && found < 0x10000 * (aPlane + 1))
{
return YES;
}
return NO;
}
- (NSIndexSet*) _indexes
{
return indexes;
}
- (id) init
{
return [self initWithBitmap: nil];
}
- (id) initWithBitmap: (NSData*)bitmap
{
const unsigned char *bytes = [bitmap bytes];
unsigned length = [bitmap length];
unsigned index = 0;
unsigned i;
NSRange r;
BOOL findingLocation = YES;
r.location = 0;
indexes = [NSMutableIndexSet new];
for (i = 0; i < length; i++)
{
unsigned char byte = bytes[i];
if (byte == 0)
{
if (findingLocation == NO)
{
r.length = index - r.location;
[indexes addIndexesInRange: r];
findingLocation = YES;
}
index += 8;
}
else if (byte == 0xff)
{
if (findingLocation == YES)
{
r.location = index;
findingLocation = NO;
}
index += 8;
}
else
{
unsigned int bit;
for (bit = 1; bit & 0xff; bit <<= 1)
{
if ((byte & bit) == 0)
{
if (findingLocation == NO)
{
r.length = index - r.location;
[indexes addIndexesInRange: r];
findingLocation = YES;
}
}
else
{
if (findingLocation == YES)
{
r.location = index;
findingLocation = NO;
}
}
index++;
}
}
}
if (findingLocation == NO)
{
r.length = index - r.location;
[indexes addIndexesInRange: r];
}
return self;
}
- (id) initWithCoder: (NSCoder*)aCoder
{
NSData *rep;
rep = [aCoder decodeObject];
self = [self initWithBitmap: rep];
return self;
}
- (BOOL) longCharacterIsMember: (UTF32Char)aCharacter
{
return [indexes containsIndex: (int)aCharacter];
}
@end
@implementation _GSMutableIndexCharSet
+ (void) initialize
{
if (self == [_GSMutableIndexCharSet class])
{
[self setVersion: 1];
GSObjCAddClassBehavior(self, [_GSIndexCharSet class]);
}
}
- (void) addCharactersInRange: (NSRange)aRange
{
if (NSMaxRange(aRange) > GSUNICODE_MAX)
{
[NSException raise:NSInvalidArgumentException
format:@"Specified range exceeds character set"];
/* NOT REACHED */
}
[indexes addIndexesInRange: aRange];
}
- (void) addCharactersInString: (NSString*)aString
{
unsigned length;
if (!aString)
{
[NSException raise:NSInvalidArgumentException
format:@"Adding characters from nil string"];
/* NOT REACHED */
}
length = [aString length];
if (length > 0)
{
unsigned i;
unichar (*get)(id, SEL, unsigned);
get = (unichar (*)(id, SEL, unsigned))
[aString methodForSelector: @selector(characterAtIndex:)];
for (i = 0; i < length; i++)
{
unichar letter;
unichar second;
letter = (*get)(aString, @selector(characterAtIndex:), i);
// Convert a surrogate pair if necessary
if (letter >= 0xd800 && letter <= 0xdbff && i < length-1
&& (second = (*get)(aString, @selector(characterAtIndex:), i+1))
>= 0xdc00 && second <= 0xdfff)
{
i++;
letter = ((letter - 0xd800) << 10)
+ (second - 0xdc00) + 0x0010000;
}
[indexes addIndexesInRange: NSMakeRange(letter, 1)];
}
}
}
- (Class) classForCoder
{
return [NSMutableBitmapCharSet class];
}
- (void) formIntersectionWithCharacterSet: (NSCharacterSet *)otherSet
{
NSIndexSet *otherIndexes;
unsigned index = 0;
unsigned i0;
unsigned i1;
if ([otherSet isKindOfClass: [_GSIndexCharSet class]] == YES)
{
otherIndexes = [(_GSIndexCharSet*)otherSet _indexes];
}
else
{
_GSIndexCharSet *tmp;
tmp = [[_GSIndexCharSet alloc] initWithBitmap:
[otherSet bitmapRepresentation]];
otherIndexes = AUTORELEASE(RETAIN([tmp _indexes]));
RELEASE(tmp);
}
/* Find first index in each set.
*/
i0 = [indexes indexGreaterThanOrEqualToIndex: 0];
i1 = [otherIndexes indexGreaterThanOrEqualToIndex: 0];
/* Loop until there are no more indexes to process in the set and
* the intersection operation has therefore completed.
*/
while (i0 != NSNotFound)
{
if (i1 == NSNotFound)
{
/* No more indexes in other set ... remove everything from the
* last gap onwards, and finish.
*/
[indexes removeIndexesInRange: NSMakeRange(index, NSNotFound-index)];
break;
}
if (i1 > i0)
{
/* Indexes in other set start after this set ... so remove any
* from the last gap to the index in the other set.
*/
[indexes removeIndexesInRange: NSMakeRange(index, i1 - index)];
index = i1;
}
else
{
index = i0;
}
/* Find the next gap in each set, and set our gap index to the
* lower of the two.
*/
i0 = [indexes _gapGreaterThanIndex: index];
i1 = [otherIndexes _gapGreaterThanIndex: index];
index = i0;
if (i1 < i0)
{
index = i1;
}
/* Find the next index in each set so wer can loop round and
* do it all again.
*/
i0 = [indexes indexGreaterThanIndex: i0];
i1 = [otherIndexes indexGreaterThanIndex: i1];
}
}
- (void) formUnionWithCharacterSet: (NSCharacterSet*)otherSet
{
NSIndexSet *otherIndexes;
unsigned index;
if ([otherSet isKindOfClass: [_GSIndexCharSet class]] == YES)
{
otherIndexes = [(_GSIndexCharSet*)otherSet _indexes];
}
else
{
_GSIndexCharSet *tmp;
tmp = [[_GSIndexCharSet alloc] initWithBitmap:
[otherSet bitmapRepresentation]];
otherIndexes = AUTORELEASE(RETAIN([tmp _indexes]));
RELEASE(tmp);
}
index = [otherIndexes indexGreaterThanOrEqualToIndex: 0];
while (index != NSNotFound)
{
NSRange r;
r.location = index;
index = [otherIndexes _gapGreaterThanIndex: index];
r.length = index - r.location;
[indexes addIndexesInRange: r];
index = [otherIndexes indexGreaterThanOrEqualToIndex: index];
}
}
- (void) invert
{
NSMutableIndexSet *tmp;
unsigned index;
tmp = [NSMutableIndexSet new];
/* Locate the start of the first gap
*/
if ([indexes containsIndex: 0] == YES)
{
index = [indexes _gapGreaterThanIndex: 0];
}
else
{
index = 0;
}
while (index != NSNotFound)
{
NSRange r;
r.location = index;
index = [indexes indexGreaterThanIndex: index];
if (index == NSNotFound)
{
/* No more indexes, so we have a gap to the end of all
* unicode characters which we can invert.
*/
index = GSUNICODE_MAX;
}
r.length = index - r.location;
[tmp addIndexesInRange: r];
index = [indexes _gapGreaterThanIndex: NSMaxRange(r) - 1];
}
ASSIGN(indexes, tmp);
RELEASE(tmp);
}
- (void) removeCharactersInRange: (NSRange)aRange
{
if (NSMaxRange(aRange) > GSUNICODE_MAX)
{
[NSException raise:NSInvalidArgumentException
format:@"Specified range exceeds character set"];
/* NOT REACHED */
}
[indexes removeIndexesInRange: aRange];
}
- (void) removeCharactersInString: (NSString*)aString
{
unsigned length;
if (!aString)
{
[NSException raise:NSInvalidArgumentException
format:@"Removing characters from nil string"];
/* NOT REACHED */
}
length = [aString length];
if (length > 0)
{
unsigned i;
unichar (*get)(id, SEL, unsigned);
get = (unichar (*)(id, SEL, unsigned))
[aString methodForSelector: @selector(characterAtIndex:)];
for (i = 0; i < length; i++)
{
unichar letter;
unichar second;
letter = (*get)(aString, @selector(characterAtIndex:), i);
// Convert a surrogate pair if necessary
if (letter >= 0xd800 && letter <= 0xdbff && i < length-1
&& (second = (*get)(aString, @selector(characterAtIndex:), i+1))
>= 0xdc00 && second <= 0xdfff)
{
i++;
letter = ((letter - 0xd800) << 10)
+ (second - 0xdc00) + 0x0010000;
}
[indexes removeIndexesInRange: NSMakeRange(letter, 1)];
}
}
}
@end