libs-base/Source/GSICUString.m
David Chisnall a5335c34fb Added implementation of NSRegularExpression, from iOS 4 Foundation.
This class is a thin wrapper around libicu regular expressions, so if we don't
have libicu we simply don't compile it at all.  This will give people a linker
failure, rather than a nonfunctional class if they try to use GNUstep without
ICU with code that requires it.

The Apple documentation says that this class has a primitive method that takes
a block as an argument and that this method is called by others, so subclasses
can replace that block method without touching the convenience methods.  We
mimic this behaviour when compiling with block, but when compiling without them
it's a problem.  The current code contains some ugly hacks that will work in
normal usage but break with subclassing when not compiling with blocks.

This commit also includes a partial implementation of NSTextCheckingResult,
implementing the subset of its functionality required for NSRegularExpression
to work.

It also includes numerous fixes to GSICUString.  This is heavily used by
NSRegularExpression, to avoid copying strings when mapping between UText for
libicu and NSString for GNUstep.


Note: I don't have a copy of iOS anywhere to test this against, so it's
entirely possible that there are significant discrepancies between this
implementation of NSRegularExpression and the iOS version.  This version should
function exactly as the iOS one is described as functioning, but I think we've
all seen that Apple documentation refers more to hopes than facts.  Any testing
that someone who does have an ip{hone,od,ad} can do is very welcome.



git-svn-id: svn+ssh://svn.gna.org/svn/gnustep/libs/base/trunk@31635 72102866-910b-0410-8b05-ffd578937521
2010-11-19 22:06:18 +00:00

381 lines
9.3 KiB
Objective-C

#import "config.h"
#if HAVE_ICU
#import "GSICUString.h"
/**
* The number of characters that we use per chunk when fetching a block of
* characters at once for iteration. Making this value larger will make UText
* iteration faster, at the cost of more memory. Making it larger than the
* size of a typical string will make it no faster but will still cost memory.
*/
static const NSUInteger chunkSize = 32;
/**
* Returns the number of UTF16 characters in a UText backed by an NSString.
*/
static int64_t UTextNSStringNativeLength(UText *ut)
{
return [(NSString*)ut->p length];
}
/**
* Loads a group of characters into the buffer that can be directly accessed by
* users of the UText. This is used for iteration but UText users.
*/
UBool UTextNSStringAccess(UText *ut, int64_t nativeIndex, UBool forward)
{
NSString *str = ut->p;
NSUInteger length = [str length];
if (nativeIndex >= length) { return FALSE; }
// Special case if the chunk already contains this index
if (nativeIndex >= ut->chunkNativeStart
&& nativeIndex < (ut->chunkNativeStart + ut->chunkLength))
{
ut->chunkOffset = nativeIndex - ut->chunkNativeStart;
return TRUE;
}
NSRange r = {nativeIndex, chunkSize};
forward = TRUE;
if (forward)
{
if (nativeIndex + chunkSize > length)
{
r.length = length - nativeIndex;
}
}
else
{
if (nativeIndex - chunkSize > 0)
{
r.location = nativeIndex - chunkSize;
r.length = chunkSize;
}
else
{
r.location = 0;
r.length = chunkSize - nativeIndex;
}
}
[str getCharacters: ut->pExtra range: r];
ut->chunkNativeStart = r.location;
ut->chunkNativeLimit = r.location + r.length;
ut->chunkLength = r.length;
ut->chunkOffset = 0;
return TRUE;
}
/**
* Replaces characters in an NSString-backed UText.
*/
static int32_t UTextNSMutableStringReplace(UText *ut,
int64_t nativeStart,
int64_t nativeLimit,
const UChar *replacementText,
int32_t replacmentLength,
UErrorCode *status)
{
NSMutableString *str = (NSMutableString*)ut->p;
NSRange r = NSMakeRange(nativeStart, nativeLimit-nativeStart);
NSString *replacement = [NSString alloc];
if (replacmentLength < 0)
{
replacement = [replacement initWithCString: (const char*)replacementText
encoding: NSUTF16StringEncoding];
}
else
{
replacement = [replacement initWithCharactersNoCopy: (unichar*)replacementText
length: replacmentLength
freeWhenDone: NO];
}
[str replaceCharactersInRange: r withString: replacement];
// Setting the chunk length to 0 here forces UTextNSStringAccess to fetch
// the data from the string object.
ut->chunkLength = 0;
UTextNSStringAccess(ut, r.location + [replacement length] + 1, TRUE);
ut->chunkOffset++;
[replacement release];
if (NULL != status)
{
*status = 0;
}
return 0;
}
/**
* Reads some characters. This is roughly analogous to NSString's
* -getCharacters:range:.
*/
static int32_t UTextNSStringExtract(UText *ut,
int64_t nativeStart,
int64_t nativeLimit,
UChar *dest,
int32_t destCapacity,
UErrorCode *status)
{
// If we're loading no characters, we are expected to return the number of
// characters that we could load if requested.
if (destCapacity == 0)
{
return nativeLimit - nativeStart;
}
NSString *str = ut->p;
NSUInteger length = [str length];
if (nativeLimit > length)
{
nativeLimit = length;
}
NSRange r = NSMakeRange(nativeStart, nativeLimit - nativeStart );
if (destCapacity < r.length)
{
r.length = destCapacity;
}
[str getCharacters: dest range: r];
if (destCapacity > r.length)
{
dest[r.length] = 0;
}
return r.length;
}
/**
* Copy or move some characters within a UText.
*/
void UTextNSStringCopy(UText *ut,
int64_t nativeStart,
int64_t nativeLimit,
int64_t nativeDest,
UBool move,
UErrorCode *status)
{
NSMutableString *str = ut->p;
NSUInteger length = [str length];
if (nativeLimit > length)
{
nativeLimit = length;
}
NSRange r = NSMakeRange(nativeStart, nativeLimit - nativeStart);
NSString *substr = [str substringWithRange: r];
[str insertString: substr atIndex: nativeDest];
if (move)
{
if (nativeDest < r.location)
{
r.location += r.length;
}
[str deleteCharactersInRange: r];
}
if (NULL != status) { *status = 0; }
}
/**
* Destructor for the NSString-specific parts of the UText. Because UTexts can
* be allocated on the stack, or reused by different storage implementations,
* this does not destroy the UText itself.
*/
static void UTextNStringClose(UText *ut)
{
ut->chunkContents = NULL;
[(NSString*)ut->p release];
ut->p = NULL;
}
/**
* Copies the UText object, optionally copying the NSString. This version is
* for NSString-backed UTexts, so uses -copy to copy the string if required.
* Typically, this should not actually copy the underlying storage, because it
* is immutable.
*/
UText* UTextNSStringClone(UText *dest,
const UText *src,
UBool deep,
UErrorCode *status)
{
NSString *str = src->p;
if (deep)
{
str = [[str copy] autorelease];
}
return UTextInitWithNSString(dest, str);
}
/**
* Copies the UText object, optionally copying the NSMutableString.
*/
UText* UTextNSMutableStringClone(UText *dest,
const UText *src,
UBool deep,
UErrorCode *status)
{
NSMutableString *str = src->p;
if (deep)
{
str = [str mutableCopy];
}
return UTextInitWithNSMutableString(dest, str);
}
/**
* Returns the index of the current character in the temporary buffer.
*/
int64_t UTextNSStringMapOffsetToNative(const UText *ut)
{
return ut->chunkNativeStart + ut->chunkOffset;
}
/**
* Vtable for NSString-backed UTexts.
*/
static const UTextFuncs NSStringFuncs =
{
sizeof(UTextFuncs), // Table size
0, 0, 0, // Reserved
UTextNSStringClone,
UTextNSStringNativeLength,
UTextNSStringAccess,
UTextNSStringExtract,
0, // Replace
UTextNSStringCopy,
UTextNSStringMapOffsetToNative,
0, // Map to UTF16
UTextNStringClose,
0, 0, 0 // Spare
};
/**
* Vtable for NSMutableString-backed UTexts.
*/
static const UTextFuncs NSMutableStringFuncs =
{
sizeof(UTextFuncs), // Table size
0, 0, 0, // Reserved
UTextNSMutableStringClone,
UTextNSStringNativeLength,
UTextNSStringAccess,
UTextNSStringExtract,
UTextNSMutableStringReplace,
UTextNSStringCopy,
UTextNSStringMapOffsetToNative,
0, // Map to UTF16
UTextNStringClose,
0, 0, 0 // Spare
};
UText* UTextInitWithNSMutableString(UText *txt, NSMutableString *str)
{
UErrorCode status = 0;
txt = utext_setup(txt, chunkSize * sizeof(unichar), &status);
if (U_FAILURE(status)) { return NULL; }
txt->p = [str retain];
txt->pFuncs = &NSMutableStringFuncs;
txt->chunkContents = txt->pExtra;
txt->nativeIndexingLimit = INT32_MAX;
txt->providerProperties = 1<<UTEXT_PROVIDER_WRITABLE;
return txt;
}
UText* UTextInitWithNSString(UText *txt, NSString *str)
{
UErrorCode status = 0;
txt = utext_setup(txt, 64, &status);
if (U_FAILURE(status)) { return NULL; }
txt->p = [str retain];
txt->pFuncs = &NSStringFuncs;
txt->chunkContents = txt->pExtra;
txt->nativeIndexingLimit = INT32_MAX;
return txt;
}
@implementation GSUTextString
- init
{
if (nil == (self = [super init])) { return nil; }
UText t = UTEXT_INITIALIZER;
memcpy(&txt, &t, sizeof(t));
return self;
}
- (NSUInteger)length
{
return utext_nativeLength(&txt);
}
- (unichar)characterAtIndex: (NSUInteger)idx
{
unichar c;
[self getCharacters: &c range: NSMakeRange(idx, 1)];
return c;
}
- (void)getCharacters: (unichar*)buffer range: (NSRange)r
{
UErrorCode status = 0;
utext_extract(&txt, r.location, r.location+r.length, buffer, r.length,
&status);
if (U_FAILURE(status))
{
_NSRangeExceptionRaise();
}
}
- (void)dealloc
{
utext_close(&txt);
[super dealloc];
}
@end
@implementation GSUTextMutableString
- init
{
if (nil == (self = [super init])) { return nil; }
UText t = UTEXT_INITIALIZER;
memcpy(&txt, &t, sizeof(t));
return self;
}
- (NSUInteger)length
{
return utext_nativeLength(&txt);
}
- (unichar)characterAtIndex: (NSUInteger)idx
{
unichar c;
[self getCharacters: &c range: NSMakeRange(idx, 1)];
return c;
}
- (void)getCharacters: (unichar*)buffer range: (NSRange)r
{
UErrorCode status = 0;
utext_extract(&txt, r.location, r.location+r.length, buffer, r.length,
&status);
if (U_FAILURE(status))
{
_NSRangeExceptionRaise();
}
}
- (void)replaceCharactersInRange: (NSRange)r
withString: (NSString*)aString
{
NSUInteger size = [aString length];
UErrorCode status = 0;
TEMP_BUFFER(buffer, size);
[aString getCharacters: buffer range: NSMakeRange(0, size)];
utext_replace(&txt, r.location, r.location + r.length, buffer, size,
&status);
}
- (void)dealloc
{
utext_close(&txt);
[super dealloc];
}
@end
#endif // HAV_ICU