mirror of
https://github.com/gnustep/libs-base.git
synced 2025-04-25 01:31:08 +00:00
Added implementation of NSRegularExpression, from iOS 4 Foundation.
This class is a thin wrapper around libicu regular expressions, so if we don't have libicu we simply don't compile it at all. This will give people a linker failure, rather than a nonfunctional class if they try to use GNUstep without ICU with code that requires it. The Apple documentation says that this class has a primitive method that takes a block as an argument and that this method is called by others, so subclasses can replace that block method without touching the convenience methods. We mimic this behaviour when compiling with block, but when compiling without them it's a problem. The current code contains some ugly hacks that will work in normal usage but break with subclassing when not compiling with blocks. This commit also includes a partial implementation of NSTextCheckingResult, implementing the subset of its functionality required for NSRegularExpression to work. It also includes numerous fixes to GSICUString. This is heavily used by NSRegularExpression, to avoid copying strings when mapping between UText for libicu and NSString for GNUstep. Note: I don't have a copy of iOS anywhere to test this against, so it's entirely possible that there are significant discrepancies between this implementation of NSRegularExpression and the iOS version. This version should function exactly as the iOS one is described as functioning, but I think we've all seen that Apple documentation refers more to hopes than facts. Any testing that someone who does have an ip{hone,od,ad} can do is very welcome. git-svn-id: svn+ssh://svn.gna.org/svn/gnustep/libs/base/trunk@31635 72102866-910b-0410-8b05-ffd578937521
This commit is contained in:
parent
1cccd4bebd
commit
ddbaf75490
8 changed files with 834 additions and 60 deletions
|
@ -44,6 +44,7 @@
|
|||
#import <Foundation/NSBundle.h>
|
||||
#import <Foundation/NSByteOrder.h>
|
||||
#import <Foundation/NSCache.h>
|
||||
#import <Foundation/NSCalendar.h>
|
||||
#import <Foundation/NSCalendarDate.h>
|
||||
#import <Foundation/NSCharacterSet.h>
|
||||
#import <Foundation/NSClassDescription.h>
|
||||
|
@ -78,8 +79,8 @@
|
|||
#import <Foundation/NSKeyedArchiver.h>
|
||||
#import <Foundation/NSKeyValueCoding.h>
|
||||
#import <Foundation/NSKeyValueObserving.h>
|
||||
#import <Foundation/NSLock.h>
|
||||
#import <Foundation/NSLocale.h>
|
||||
#import <Foundation/NSLock.h>
|
||||
#import <Foundation/NSMapTable.h>
|
||||
#import <Foundation/NSMethodSignature.h>
|
||||
#import <Foundation/NSNotification.h>
|
||||
|
|
|
@ -44,6 +44,7 @@
|
|||
#import <Foundation/NSBundle.h>
|
||||
#import <Foundation/NSByteOrder.h>
|
||||
#import <Foundation/NSCache.h>
|
||||
#import <Foundation/NSCalendar.h>
|
||||
#import <Foundation/NSCalendarDate.h>
|
||||
#import <Foundation/NSCharacterSet.h>
|
||||
#import <Foundation/NSClassDescription.h>
|
||||
|
@ -99,6 +100,7 @@
|
|||
#import <Foundation/NSProtocolChecker.h>
|
||||
#import <Foundation/NSProxy.h>
|
||||
#import <Foundation/NSRange.h>
|
||||
#import <Foundation/NSRegularExpression.h>
|
||||
#import <Foundation/NSRunLoop.h>
|
||||
#import <Foundation/NSScanner.h>
|
||||
#import <Foundation/NSSerialization.h>
|
||||
|
@ -108,6 +110,7 @@
|
|||
#import <Foundation/NSStream.h>
|
||||
#import <Foundation/NSString.h>
|
||||
#import <Foundation/NSTask.h>
|
||||
#import <Foundation/NSTextCheckingResult.h>
|
||||
#import <Foundation/NSThread.h>
|
||||
#import <Foundation/NSTimer.h>
|
||||
#import <Foundation/NSTimeZone.h>
|
||||
|
|
90
Headers/Foundation/NSRegularExpression.h
Normal file
90
Headers/Foundation/NSRegularExpression.h
Normal file
|
@ -0,0 +1,90 @@
|
|||
#import "NSObject.h"
|
||||
#import "GNUstepBase/GSBlocks.h"
|
||||
@class NSTextCheckingResult;
|
||||
|
||||
typedef NSUInteger NSRegularExpressionOptions;
|
||||
static const NSRegularExpressionOptions NSRegularExpressionCaseInsensitive = 1<<0;
|
||||
static const NSRegularExpressionOptions NSRegularExpressionAllowCommentsAndWhitespace = 1<<1;
|
||||
static const NSRegularExpressionOptions NSRegularExpressionIgnoreMetacharacters = 1<<2;
|
||||
static const NSRegularExpressionOptions NSRegularExpressionDotMatchesLineSeparators = 1<<3;
|
||||
static const NSRegularExpressionOptions NSRegularExpressionAnchorsMatchLines = 1<<4;
|
||||
static const NSRegularExpressionOptions NSRegularExpressionUseUnixLineSeparators = 1<<5;
|
||||
static const NSRegularExpressionOptions NSRegularExpressionUseUnicodeWordBoundaries = 1<<6;
|
||||
|
||||
typedef NSUInteger NSMatchingFlags;
|
||||
static const NSMatchingFlags NSMatchingProgress = 1<<0;
|
||||
static const NSMatchingFlags NSMatchingCompleted = 1<<1;
|
||||
static const NSMatchingFlags NSMatchingHitEnd = 1<<2;
|
||||
static const NSMatchingFlags NSMatchingRequiredEnd = 1<<3;
|
||||
static const NSMatchingFlags NSMatchingInternalError = 1<<4;
|
||||
|
||||
typedef NSUInteger NSMatchingOptions;
|
||||
static const NSMatchingOptions NSMatchingReportProgress = 1<<0;
|
||||
static const NSMatchingOptions NSMatchingReportCompletion = 1<<1;
|
||||
static const NSMatchingOptions NSMatchingAnchored = 1<<2;
|
||||
static const NSMatchingOptions NSMatchingWithTransparentBounds = 1<<3;
|
||||
static const NSMatchingOptions NSMatchingWithoutAnchoringBounds = 1<<4;
|
||||
|
||||
|
||||
DEFINE_BLOCK_TYPE(GSRegexBlock, void, NSTextCheckingResult*, NSMatchingFlags, BOOL*);
|
||||
|
||||
#ifndef GSREGEXTYPE
|
||||
#define GSREGEXTYPE void
|
||||
#endif
|
||||
|
||||
@interface NSRegularExpression : NSObject <NSCoding, NSCopying>
|
||||
{
|
||||
@private
|
||||
GSREGEXTYPE *regex;
|
||||
NSRegularExpressionOptions options;
|
||||
}
|
||||
+ (NSRegularExpression*)regularExpressionWithPattern: (NSString*)aPattern
|
||||
options: (NSRegularExpressionOptions)opts
|
||||
error: (NSError**)e;
|
||||
- initWithPattern: (NSString*)aPattern
|
||||
options: (NSRegularExpressionOptions)opts
|
||||
error: (NSError**)e;
|
||||
+ (NSRegularExpression*)regularExpressionWithPattern: (NSString*)aPattern
|
||||
options: (NSRegularExpressionOptions)opts
|
||||
error: (NSError**)e;
|
||||
- initWithPattern: (NSString*)aPattern
|
||||
options: (NSRegularExpressionOptions)opts
|
||||
error: (NSError**)e;
|
||||
- (NSString*)pattern;
|
||||
- (void)enumerateMatchesInString: (NSString*)string
|
||||
options: (NSMatchingOptions)options
|
||||
range: (NSRange)range
|
||||
usingBlock: (GSRegexBlock)block;
|
||||
- (NSUInteger)numberOfMatchesInString: (NSString*)string
|
||||
options: (NSMatchingOptions)options
|
||||
range: (NSRange)range;
|
||||
|
||||
- (NSTextCheckingResult*)firstMatchInString: (NSString*)string
|
||||
options: (NSMatchingOptions)options
|
||||
range: (NSRange)range;
|
||||
- (NSArray*)matchesInString: (NSString*)string
|
||||
options:(NSMatchingOptions)options
|
||||
range:(NSRange)range;
|
||||
- (NSRange)rangeOfFirstMatchInString: (NSString*)string
|
||||
options: (NSMatchingOptions)options
|
||||
range: (NSRange)range;
|
||||
- (NSUInteger)replaceMatchesInString: (NSMutableString*)string
|
||||
options: (NSMatchingOptions)options
|
||||
range: (NSRange)range
|
||||
withTemplate: (NSString*)template;
|
||||
- (NSString*)stringByReplacingMatchesInString: (NSString*)string
|
||||
options: (NSMatchingOptions)options
|
||||
range: (NSRange)range
|
||||
withTemplate: (NSString*)template;
|
||||
- (NSString*)replacementStringForResult: (NSTextCheckingResult*)result
|
||||
inString: (NSString*)string
|
||||
offset: (NSInteger)offset
|
||||
template: (NSString*)template;
|
||||
#if GS_HAS_DECLARED_PROPERTIES
|
||||
@property (readonly) NSRegularExpressionOptions options;
|
||||
@property (readonly) NSUInteger numberOfCaptureGroups;
|
||||
#else
|
||||
- (NSRegularExpressionOptions)options;
|
||||
- (NSUInteger)numberOfCaptureGroups;
|
||||
#endif
|
||||
@end
|
55
Headers/Foundation/NSTextCheckingResult.h
Normal file
55
Headers/Foundation/NSTextCheckingResult.h
Normal file
|
@ -0,0 +1,55 @@
|
|||
#import "NSObject.h"
|
||||
#import "NSGeometry.h"
|
||||
|
||||
@class NSArray;
|
||||
@class NSDate;
|
||||
@class NSDictionary;
|
||||
@class NSOrthography;
|
||||
@class NSRegularExpression;
|
||||
@class NSString;
|
||||
@class NSTimeZone;
|
||||
@class NSURL;
|
||||
|
||||
typedef uint64_t NSTextCheckingType;
|
||||
static const NSTextCheckingType NSTextCheckingTypeRegularExpression = 1ULL<<10;
|
||||
|
||||
/**
|
||||
* NSTextCheckingResult is an abstract class encapsulating the result of some
|
||||
* operation that checks
|
||||
*/
|
||||
@interface NSTextCheckingResult : NSObject
|
||||
#if GS_HAS_DECLARED_PROPERTIES
|
||||
@property(readonly) NSDictionary *addressComponents;
|
||||
@property(readonly) NSDictionary *components;
|
||||
@property(readonly) NSDate *date;
|
||||
@property(readonly) NSTimeInterval duration;
|
||||
@property(readonly) NSArray *grammarDetails;
|
||||
@property(readonly) NSUInteger numberOfRanges;
|
||||
@property(readonly) NSOrthography *orthography;
|
||||
@property(readonly) NSString *phoneNumber;
|
||||
@property(readonly) NSRange range;
|
||||
@property(readonly) NSRegularExpression *regularExpression;
|
||||
@property(readonly) NSString *replacementString;
|
||||
@property(readonly) NSTextCheckingType resultType;
|
||||
@property(readonly) NSTimeZone *timeZone;
|
||||
@property(readonly) NSURL *URL;
|
||||
#else
|
||||
- (NSDictionary*)addressComponents;
|
||||
- (NSDictionary*)components;
|
||||
- (NSDate*)date;
|
||||
- (NSTimeInterval) duration;
|
||||
- (NSArray*)grammarDetails;
|
||||
- (NSUInteger) numberOfRanges;
|
||||
- (NSOrthography*)orthography;
|
||||
- (NSString*)phoneNumber;
|
||||
- (NSRange) range;
|
||||
- (NSRegularExpression*)regularExpression;
|
||||
- (NSString*)replacementString;
|
||||
- (NSTextCheckingType) resultType;
|
||||
- (NSTimeZone*)timeZone;
|
||||
- (NSURL*)URL;
|
||||
#endif
|
||||
+ (NSTextCheckingResult*)regularExpressionCheckingResultWithRanges: (NSRangePointer)ranges
|
||||
count: (NSUInteger)count
|
||||
regularExpression: (NSRegularExpression*)regularExpression;
|
||||
@end
|
|
@ -161,6 +161,7 @@ GSSet.m \
|
|||
GSSocketStream.m \
|
||||
GSStream.m \
|
||||
GSString.m \
|
||||
GSICUString.m \
|
||||
GSValue.m \
|
||||
NSAffineTransform.m \
|
||||
NSArchiver.m \
|
||||
|
@ -241,6 +242,7 @@ NSPropertyList.m \
|
|||
NSProtocolChecker.m \
|
||||
NSProxy.m \
|
||||
NSRange.m \
|
||||
NSRegularExpression.m\
|
||||
NSRunLoop.m \
|
||||
NSScanner.m \
|
||||
NSSerializer.m \
|
||||
|
@ -267,6 +269,7 @@ NSURLProtectionSpace.m \
|
|||
NSURLProtocol.m \
|
||||
NSURLRequest.m \
|
||||
NSURLResponse.m \
|
||||
NSTextCheckingResult.m\
|
||||
NSURLHandle.m \
|
||||
NSUserDefaults.m \
|
||||
NSValue.m \
|
||||
|
@ -394,6 +397,7 @@ NSPropertyList.h \
|
|||
NSProtocolChecker.h \
|
||||
NSProxy.h \
|
||||
NSRange.h \
|
||||
NSRegularExpression.h\
|
||||
NSRunLoop.h \
|
||||
NSScanner.h \
|
||||
NSSerialization.h \
|
||||
|
@ -403,6 +407,7 @@ NSSpellServer.h \
|
|||
NSStream.h \
|
||||
NSString.h \
|
||||
NSTask.h \
|
||||
NSTextCheckingResult.h\
|
||||
NSThread.h \
|
||||
NSTimer.h \
|
||||
NSTimeZone.h \
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
#import "config.h"
|
||||
#if HAVE_ICU
|
||||
#import "GSICUString.h"
|
||||
|
||||
/**
|
||||
|
@ -16,6 +18,53 @@ static int64_t UTextNSStringNativeLength(UText *ut)
|
|||
return [(NSString*)ut->p length];
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Loads a group of characters into the buffer that can be directly accessed by
|
||||
* users of the UText. This is used for iteration but UText users.
|
||||
*/
|
||||
UBool UTextNSStringAccess(UText *ut, int64_t nativeIndex, UBool forward)
|
||||
{
|
||||
NSString *str = ut->p;
|
||||
NSUInteger length = [str length];
|
||||
if (nativeIndex >= length) { return FALSE; }
|
||||
// Special case if the chunk already contains this index
|
||||
if (nativeIndex >= ut->chunkNativeStart
|
||||
&& nativeIndex < (ut->chunkNativeStart + ut->chunkLength))
|
||||
{
|
||||
ut->chunkOffset = nativeIndex - ut->chunkNativeStart;
|
||||
return TRUE;
|
||||
}
|
||||
NSRange r = {nativeIndex, chunkSize};
|
||||
forward = TRUE;
|
||||
if (forward)
|
||||
{
|
||||
if (nativeIndex + chunkSize > length)
|
||||
{
|
||||
r.length = length - nativeIndex;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (nativeIndex - chunkSize > 0)
|
||||
{
|
||||
r.location = nativeIndex - chunkSize;
|
||||
r.length = chunkSize;
|
||||
}
|
||||
else
|
||||
{
|
||||
r.location = 0;
|
||||
r.length = chunkSize - nativeIndex;
|
||||
}
|
||||
}
|
||||
[str getCharacters: ut->pExtra range: r];
|
||||
ut->chunkNativeStart = r.location;
|
||||
ut->chunkNativeLimit = r.location + r.length;
|
||||
ut->chunkLength = r.length;
|
||||
ut->chunkOffset = 0;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Replaces characters in an NSString-backed UText.
|
||||
*/
|
||||
|
@ -41,10 +90,14 @@ static int32_t UTextNSMutableStringReplace(UText *ut,
|
|||
freeWhenDone: NO];
|
||||
}
|
||||
[str replaceCharactersInRange: r withString: replacement];
|
||||
|
||||
// Setting the chunk length to 0 here forces UTextNSStringAccess to fetch
|
||||
// the data from the string object.
|
||||
ut->chunkLength = 0;
|
||||
UTextNSStringAccess(ut, r.location + [replacement length] + 1, TRUE);
|
||||
ut->chunkOffset++;
|
||||
|
||||
[replacement release];
|
||||
// Update the chunk to reflect the internal changes.
|
||||
r = NSMakeRange(ut->chunkNativeStart, ut->chunkLength);
|
||||
[str getCharacters: ut->pExtra range: r];
|
||||
if (NULL != status)
|
||||
{
|
||||
*status = 0;
|
||||
|
@ -52,51 +105,6 @@ static int32_t UTextNSMutableStringReplace(UText *ut,
|
|||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads a group of characters into the buffer that can be directly accessed by
|
||||
* users of the UText. This is used for iteration but UText users.
|
||||
*/
|
||||
UBool UTextNSStringAccess(UText *ut, int64_t nativeIndex, UBool forward)
|
||||
{
|
||||
// Special case if the chunk already contains this index
|
||||
if (nativeIndex > ut->chunkNativeStart
|
||||
&& nativeIndex < (ut->chunkNativeStart + ut->chunkLength))
|
||||
{
|
||||
ut->chunkOffset = nativeIndex - ut->chunkNativeStart;
|
||||
return TRUE;
|
||||
}
|
||||
NSString *str = ut->p;
|
||||
NSUInteger length = [str length];
|
||||
if (nativeIndex > length) { return FALSE; }
|
||||
NSRange r = {nativeIndex, chunkSize};
|
||||
forward = TRUE;
|
||||
if (forward)
|
||||
{
|
||||
if (nativeIndex + chunkSize > length)
|
||||
{
|
||||
r.length = length - nativeIndex;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (nativeIndex - chunkSize > 0)
|
||||
{
|
||||
r.location = nativeIndex - chunkSize;
|
||||
r.length = chunkSize;
|
||||
}
|
||||
else
|
||||
{
|
||||
r.location = 0;
|
||||
r.length = chunkSize - nativeIndex;
|
||||
}
|
||||
}
|
||||
[str getCharacters: ut->pExtra range: r];
|
||||
ut->chunkNativeStart = r.location;
|
||||
ut->chunkLength = r.length;
|
||||
ut->chunkOffset = 0;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads some characters. This is roughly analogous to NSString's
|
||||
* -getCharacters:range:.
|
||||
|
@ -216,7 +224,7 @@ UText* UTextNSMutableStringClone(UText *dest,
|
|||
*/
|
||||
int64_t UTextNSStringMapOffsetToNative(const UText *ut)
|
||||
{
|
||||
return ut->chunkNativeLimit + ut->chunkOffset;
|
||||
return ut->chunkNativeStart + ut->chunkOffset;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -262,11 +270,12 @@ UText* UTextInitWithNSMutableString(UText *txt, NSMutableString *str)
|
|||
UErrorCode status = 0;
|
||||
txt = utext_setup(txt, chunkSize * sizeof(unichar), &status);
|
||||
|
||||
if (0 != status) { return NULL; }
|
||||
if (U_FAILURE(status)) { return NULL; }
|
||||
|
||||
txt->p = str;
|
||||
txt->p = [str retain];
|
||||
txt->pFuncs = &NSMutableStringFuncs;
|
||||
txt->chunkContents = txt->pExtra;
|
||||
txt->nativeIndexingLimit = INT32_MAX;
|
||||
|
||||
txt->providerProperties = 1<<UTEXT_PROVIDER_WRITABLE;
|
||||
|
||||
|
@ -278,17 +287,24 @@ UText* UTextInitWithNSString(UText *txt, NSString *str)
|
|||
UErrorCode status = 0;
|
||||
txt = utext_setup(txt, 64, &status);
|
||||
|
||||
if (0 != status) { return NULL; }
|
||||
if (U_FAILURE(status)) { return NULL; }
|
||||
|
||||
txt->p = str;
|
||||
txt->p = [str retain];
|
||||
txt->pFuncs = &NSStringFuncs;
|
||||
txt->chunkContents = txt->pExtra;
|
||||
txt->nativeIndexingLimit = INT32_MAX;
|
||||
|
||||
return txt;
|
||||
}
|
||||
|
||||
|
||||
@implementation GSUTextString
|
||||
- init
|
||||
{
|
||||
if (nil == (self = [super init])) { return nil; }
|
||||
UText t = UTEXT_INITIALIZER;
|
||||
memcpy(&txt, &t, sizeof(t));
|
||||
return self;
|
||||
}
|
||||
- (NSUInteger)length
|
||||
{
|
||||
return utext_nativeLength(&txt);
|
||||
|
@ -301,10 +317,10 @@ UText* UTextInitWithNSString(UText *txt, NSString *str)
|
|||
}
|
||||
- (void)getCharacters: (unichar*)buffer range: (NSRange)r
|
||||
{
|
||||
UErrorCode status;
|
||||
UErrorCode status = 0;
|
||||
utext_extract(&txt, r.location, r.location+r.length, buffer, r.length,
|
||||
&status);
|
||||
if (0 != status)
|
||||
if (U_FAILURE(status))
|
||||
{
|
||||
_NSRangeExceptionRaise();
|
||||
}
|
||||
|
@ -317,6 +333,13 @@ UText* UTextInitWithNSString(UText *txt, NSString *str)
|
|||
@end
|
||||
|
||||
@implementation GSUTextMutableString
|
||||
- init
|
||||
{
|
||||
if (nil == (self = [super init])) { return nil; }
|
||||
UText t = UTEXT_INITIALIZER;
|
||||
memcpy(&txt, &t, sizeof(t));
|
||||
return self;
|
||||
}
|
||||
- (NSUInteger)length
|
||||
{
|
||||
return utext_nativeLength(&txt);
|
||||
|
@ -329,10 +352,10 @@ UText* UTextInitWithNSString(UText *txt, NSString *str)
|
|||
}
|
||||
- (void)getCharacters: (unichar*)buffer range: (NSRange)r
|
||||
{
|
||||
UErrorCode status;
|
||||
UErrorCode status = 0;
|
||||
utext_extract(&txt, r.location, r.location+r.length, buffer, r.length,
|
||||
&status);
|
||||
if (0 != status)
|
||||
if (U_FAILURE(status))
|
||||
{
|
||||
_NSRangeExceptionRaise();
|
||||
}
|
||||
|
@ -341,7 +364,7 @@ UText* UTextInitWithNSString(UText *txt, NSString *str)
|
|||
withString: (NSString*)aString
|
||||
{
|
||||
NSUInteger size = [aString length];
|
||||
UErrorCode status;
|
||||
UErrorCode status = 0;
|
||||
TEMP_BUFFER(buffer, size);
|
||||
[aString getCharacters: buffer range: NSMakeRange(0, size)];
|
||||
|
||||
|
@ -355,3 +378,4 @@ UText* UTextInitWithNSString(UText *txt, NSString *str)
|
|||
[super dealloc];
|
||||
}
|
||||
@end
|
||||
#endif // HAV_ICU
|
||||
|
|
517
Source/NSRegularExpression.m
Normal file
517
Source/NSRegularExpression.m
Normal file
|
@ -0,0 +1,517 @@
|
|||
#include "config.h"
|
||||
#if HAVE_ICU
|
||||
#include "unicode/uregex.h"
|
||||
#define GSREGEXTYPE URegularExpression
|
||||
#import "GSICUString.h"
|
||||
#import "Foundation/NSRegularExpression.h"
|
||||
#import "Foundation/NSTextCheckingResult.h"
|
||||
#import "Foundation/NSArray.h"
|
||||
#import "Foundation/NSCoder.h"
|
||||
|
||||
|
||||
/**
|
||||
* To be helpful, Apple decided to define a set of flags that mean exactly the
|
||||
* same thing as the URegexpFlags enum in libicu, but have different values.
|
||||
* This was completely stupid, but we probably have to live with it. We could
|
||||
* in theory use the libicu values directly (that would be sensible), but that
|
||||
* would break any code that didn't correctly use the symbolic constants.
|
||||
*/
|
||||
uint32_t NSRegularExpressionOptionsToURegexpFlags(NSRegularExpressionOptions opts)
|
||||
{
|
||||
uint32_t flags = 0;
|
||||
if (opts & NSRegularExpressionCaseInsensitive)
|
||||
{
|
||||
flags |= UREGEX_CASE_INSENSITIVE;
|
||||
}
|
||||
if (opts & NSRegularExpressionAllowCommentsAndWhitespace)
|
||||
{
|
||||
flags |= UREGEX_COMMENTS;
|
||||
}
|
||||
if (opts & NSRegularExpressionIgnoreMetacharacters)
|
||||
{
|
||||
flags |= UREGEX_LITERAL;
|
||||
}
|
||||
if (opts & NSRegularExpressionDotMatchesLineSeparators)
|
||||
{
|
||||
flags |= UREGEX_DOTALL;
|
||||
}
|
||||
if (opts & NSRegularExpressionAnchorsMatchLines)
|
||||
{
|
||||
flags |= UREGEX_MULTILINE;
|
||||
}
|
||||
if (opts & NSRegularExpressionUseUnixLineSeparators)
|
||||
{
|
||||
flags |= UREGEX_UNIX_LINES;
|
||||
}
|
||||
if (opts & NSRegularExpressionUseUnicodeWordBoundaries)
|
||||
{
|
||||
flags |= UREGEX_UWORD;
|
||||
}
|
||||
return flags;
|
||||
}
|
||||
|
||||
@implementation NSRegularExpression
|
||||
+ (NSRegularExpression*)regularExpressionWithPattern: (NSString*)aPattern
|
||||
options: (NSRegularExpressionOptions)opts
|
||||
error: (NSError**)e
|
||||
{
|
||||
return [[[self alloc] initWithPattern: aPattern options: opts error: e] autorelease];
|
||||
}
|
||||
- initWithPattern: (NSString*)aPattern
|
||||
options: (NSRegularExpressionOptions)opts
|
||||
error: (NSError**)e
|
||||
{
|
||||
uint32_t flags = NSRegularExpressionOptionsToURegexpFlags(opts);
|
||||
UText p = UTEXT_INITIALIZER;
|
||||
UTextInitWithNSString(&p, aPattern);
|
||||
UParseError pe = {0};
|
||||
UErrorCode s = 0;
|
||||
regex = uregex_openUText(&p, flags, &pe, &s);
|
||||
utext_close(&p);
|
||||
if (U_FAILURE(s))
|
||||
{
|
||||
// FIXME: Do something sensible with the error parameter.
|
||||
[self release];
|
||||
return nil;
|
||||
}
|
||||
options = opts;
|
||||
return self;
|
||||
}
|
||||
- (NSString*)pattern
|
||||
{
|
||||
UErrorCode s = 0;
|
||||
UText *t = uregex_patternUText(regex, &s);
|
||||
if (U_FAILURE(s))
|
||||
{
|
||||
return nil;
|
||||
}
|
||||
GSUTextString *str = [GSUTextString new];
|
||||
utext_clone(&str->txt, t, FALSE, TRUE, &s);
|
||||
utext_close(t);
|
||||
return [str autorelease];
|
||||
}
|
||||
|
||||
static UBool callback(const void *context, int32_t steps)
|
||||
{
|
||||
if (NULL == context) { return FALSE; }
|
||||
BOOL stop = NO;
|
||||
GSRegexBlock block = (GSRegexBlock)context;
|
||||
CALL_BLOCK(block, nil, NSMatchingProgress, &stop);
|
||||
return stop;
|
||||
}
|
||||
/**
|
||||
* Sets up a libicu regex object for use. Note: the documentation states that
|
||||
* NSRegularExpression must be thread safe. To accomplish this, we store a
|
||||
* prototype URegularExpression in the object, and then clone it in each
|
||||
* method. This is required because URegularExpression, unlike
|
||||
* NSRegularExpression, is stateful, and sharing this state between threads
|
||||
* would break concurrent calls.
|
||||
*/
|
||||
static URegularExpression *setupRegex(URegularExpression *regex,
|
||||
NSString *string,
|
||||
UText *txt,
|
||||
NSMatchingOptions options,
|
||||
NSRange range,
|
||||
GSRegexBlock block)
|
||||
{
|
||||
UErrorCode s = 0;
|
||||
URegularExpression *r = uregex_clone(regex, &s);
|
||||
if (options & NSMatchingReportProgress)
|
||||
{
|
||||
uregex_setMatchCallback(r, callback, block, &s);
|
||||
}
|
||||
UTextInitWithNSString(txt, string);
|
||||
uregex_setUText(r, txt, &s);
|
||||
uregex_setRegion(r, range.location, range.location+range.length, &s);
|
||||
if (options & NSMatchingWithoutAnchoringBounds)
|
||||
{
|
||||
uregex_useAnchoringBounds(r, FALSE, &s);
|
||||
}
|
||||
if (options & NSMatchingWithTransparentBounds)
|
||||
{
|
||||
uregex_useTransparentBounds(r, TRUE, &s);
|
||||
}
|
||||
if (U_FAILURE(s))
|
||||
{
|
||||
uregex_close(r);
|
||||
return NULL;
|
||||
}
|
||||
return r;
|
||||
}
|
||||
static uint32_t prepareResult(NSRegularExpression *regex,
|
||||
URegularExpression *r,
|
||||
NSRangePointer ranges,
|
||||
NSUInteger groups,
|
||||
UErrorCode *s)
|
||||
{
|
||||
uint32_t flags = 0;
|
||||
for (NSUInteger i=0 ; i<groups ; i++)
|
||||
{
|
||||
NSUInteger start = uregex_start(r, i, s);
|
||||
NSUInteger end = uregex_end(r, i, s);
|
||||
ranges[i] = NSMakeRange(start, end-start);
|
||||
}
|
||||
if (uregex_hitEnd(r, s))
|
||||
{
|
||||
flags |= NSMatchingHitEnd;
|
||||
}
|
||||
if (uregex_requireEnd(r, s))
|
||||
{
|
||||
flags |= NSMatchingRequiredEnd;
|
||||
}
|
||||
if (0 != *s)
|
||||
{
|
||||
flags |= NSMatchingInternalError;
|
||||
}
|
||||
return flags;
|
||||
}
|
||||
|
||||
- (void)enumerateMatchesInString: (NSString*)string
|
||||
options: (NSMatchingOptions)opts
|
||||
range: (NSRange)range
|
||||
usingBlock: (GSRegexBlock)block
|
||||
{
|
||||
UErrorCode s = 0;
|
||||
UText txt = UTEXT_INITIALIZER;
|
||||
BOOL stop = NO;
|
||||
URegularExpression *r = setupRegex(regex, string, &txt, opts, range, block);
|
||||
NSUInteger groups = [self numberOfCaptureGroups] + 1;
|
||||
NSRange ranges[groups];
|
||||
// Should this throw some kind of exception?
|
||||
if (NULL == r) { return; }
|
||||
if (opts & NSMatchingAnchored)
|
||||
{
|
||||
if (uregex_lookingAt(r, -1, &s) && (0==s))
|
||||
{
|
||||
// FIXME: Factor all of this out into prepareResult()
|
||||
uint32_t flags = prepareResult(self, r, ranges, groups, &s);
|
||||
NSTextCheckingResult *result =
|
||||
[NSTextCheckingResult regularExpressionCheckingResultWithRanges: ranges
|
||||
count: groups
|
||||
regularExpression: self];
|
||||
CALL_BLOCK(block, result, flags, &stop);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
while (!stop && uregex_findNext(r, &s) && (s == 0))
|
||||
{
|
||||
uint32_t flags = prepareResult(self, r, ranges, groups, &s);
|
||||
NSTextCheckingResult *result =
|
||||
[NSTextCheckingResult regularExpressionCheckingResultWithRanges: ranges
|
||||
count: groups
|
||||
regularExpression: self];
|
||||
CALL_BLOCK(block, result, flags, &stop);
|
||||
}
|
||||
}
|
||||
if (opts & NSMatchingCompleted)
|
||||
{
|
||||
CALL_BLOCK(block, nil, NSMatchingCompleted, &stop);
|
||||
}
|
||||
utext_close(&txt);
|
||||
uregex_close(r);
|
||||
}
|
||||
// The remaining methods are all meant to be wrappers around the primitive
|
||||
// method that takes a block argument. Unfortunately, this is not really
|
||||
// possible when compiling with a compiler that doesn't support blocks.
|
||||
#if __has_feature(blocks)
|
||||
- (NSUInteger)numberOfMatchesInString: (NSString*)string
|
||||
options: (NSMatchingOptions)opts
|
||||
range: (NSRange)range
|
||||
|
||||
{
|
||||
__block NSUInteger count = 0;
|
||||
opts &= ~NSMatchingReportProgress;
|
||||
opts &= ~NSMatchingReportCompletion;
|
||||
GSRegexBlock block =
|
||||
^(NSTextCheckingResult *result, NSMatchingFlags flags, BOOL *stop)
|
||||
{
|
||||
count++;
|
||||
};
|
||||
[self enumerateMatchesInString: string
|
||||
options: opts
|
||||
range: range
|
||||
usingBlock: block];
|
||||
return count;
|
||||
}
|
||||
- (NSTextCheckingResult*)firstMatchInString: (NSString*)string
|
||||
options: (NSMatchingOptions)opts
|
||||
range: (NSRange)range
|
||||
{
|
||||
__block NSTextCheckingResult *r = nil;
|
||||
opts &= ~NSMatchingReportProgress;
|
||||
opts &= ~NSMatchingReportCompletion;
|
||||
GSRegexBlock block =
|
||||
^(NSTextCheckingResult *result, NSMatchingFlags flags, BOOL *stop)
|
||||
{
|
||||
r = result;
|
||||
*stop = YES;
|
||||
};
|
||||
[self enumerateMatchesInString: string
|
||||
options: opts
|
||||
range: range
|
||||
usingBlock: block];
|
||||
return r;
|
||||
}
|
||||
- (NSArray*)matchesInString: (NSString*)string
|
||||
options:(NSMatchingOptions)opts
|
||||
range:(NSRange)range
|
||||
{
|
||||
NSMutableArray *array = [NSMutableArray array];
|
||||
opts &= ~NSMatchingReportProgress;
|
||||
opts &= ~NSMatchingReportCompletion;
|
||||
GSRegexBlock block =
|
||||
^(NSTextCheckingResult *result, NSMatchingFlags flags, BOOL *stop)
|
||||
{
|
||||
[array addObject: result];
|
||||
};
|
||||
[self enumerateMatchesInString: string
|
||||
options: opts
|
||||
range: range
|
||||
usingBlock: block];
|
||||
return array;
|
||||
}
|
||||
- (NSRange)rangeOfFirstMatchInString: (NSString*)string
|
||||
options: (NSMatchingOptions)opts
|
||||
range: (NSRange)range
|
||||
{
|
||||
__block NSRange r;
|
||||
opts &= ~NSMatchingReportProgress;
|
||||
opts &= ~NSMatchingReportCompletion;
|
||||
GSRegexBlock block =
|
||||
^(NSTextCheckingResult *result, NSMatchingFlags flags, BOOL *stop)
|
||||
{
|
||||
r= [result range];
|
||||
*stop = YES;
|
||||
};
|
||||
[self enumerateMatchesInString: string
|
||||
options: opts
|
||||
range: range
|
||||
usingBlock: block];
|
||||
return r;
|
||||
}
|
||||
#else
|
||||
# warning Your compiler does not support blocks. NSRegularExpression will deviate from the documented behaviour when subclassing and any code that subclasses NSRegularExpression may break in unexpected ways. It is strongly recommended that you use a compiler with blocks support.
|
||||
# ifdef __clang__
|
||||
# warning Your compiler would support blocks if you added -fblocks to your OBJCFLAGS
|
||||
# endif
|
||||
#define FAKE_BLOCK_HACK(failRet, code) \
|
||||
UErrorCode s = 0;\
|
||||
UText txt = UTEXT_INITIALIZER;\
|
||||
BOOL stop = NO;\
|
||||
URegularExpression *r = setupRegex(regex, string, &txt, opts, range, 0);\
|
||||
if (NULL == r) { return failRet; }\
|
||||
if (opts & NSMatchingAnchored)\
|
||||
{\
|
||||
if (uregex_lookingAt(r, -1, &s) && (0==s))\
|
||||
{\
|
||||
code\
|
||||
}\
|
||||
}\
|
||||
else\
|
||||
{\
|
||||
while (!stop && uregex_findNext(r, &s) && (s == 0))\
|
||||
{\
|
||||
code\
|
||||
}\
|
||||
}\
|
||||
utext_close(&txt);\
|
||||
uregex_close(r);
|
||||
- (NSUInteger)numberOfMatchesInString: (NSString*)string
|
||||
options: (NSMatchingOptions)opts
|
||||
range: (NSRange)range
|
||||
|
||||
{
|
||||
NSUInteger count = 0;
|
||||
FAKE_BLOCK_HACK(count,
|
||||
{
|
||||
count++;
|
||||
});
|
||||
return count;
|
||||
}
|
||||
- (NSTextCheckingResult*)firstMatchInString: (NSString*)string
|
||||
options: (NSMatchingOptions)opts
|
||||
range: (NSRange)range
|
||||
{
|
||||
NSTextCheckingResult *result = nil;
|
||||
NSUInteger groups = [self numberOfCaptureGroups] + 1;
|
||||
NSRange ranges[groups];
|
||||
FAKE_BLOCK_HACK(result,
|
||||
{
|
||||
prepareResult(self, r, ranges, groups, &s);
|
||||
result =
|
||||
[NSTextCheckingResult regularExpressionCheckingResultWithRanges: ranges
|
||||
count: groups
|
||||
regularExpression: self];
|
||||
stop = YES;
|
||||
});
|
||||
return result;
|
||||
}
|
||||
- (NSArray*)matchesInString: (NSString*)string
|
||||
options:(NSMatchingOptions)opts
|
||||
range:(NSRange)range
|
||||
{
|
||||
NSMutableArray *array = [NSMutableArray array];
|
||||
NSUInteger groups = [self numberOfCaptureGroups] + 1;
|
||||
NSRange ranges[groups];
|
||||
FAKE_BLOCK_HACK(array,
|
||||
{
|
||||
prepareResult(self, r, ranges, groups, &s);
|
||||
NSTextCheckingResult *result =
|
||||
[NSTextCheckingResult regularExpressionCheckingResultWithRanges: ranges
|
||||
count: groups
|
||||
regularExpression: self];
|
||||
[array addObject: result];
|
||||
});
|
||||
return array;
|
||||
}
|
||||
- (NSRange)rangeOfFirstMatchInString: (NSString*)string
|
||||
options: (NSMatchingOptions)opts
|
||||
range: (NSRange)range
|
||||
{
|
||||
NSRange result = {0,0};
|
||||
FAKE_BLOCK_HACK(result,
|
||||
{
|
||||
prepareResult(self, r, &result, 1, &s);
|
||||
stop = YES;
|
||||
});
|
||||
return result;
|
||||
}
|
||||
#endif
|
||||
- (NSUInteger)replaceMatchesInString: (NSMutableString*)string
|
||||
options: (NSMatchingOptions)opts
|
||||
range: (NSRange)range
|
||||
withTemplate: (NSString*)template
|
||||
{
|
||||
// FIXME: We're computing a value that is most likely ignored in an
|
||||
// expensive way.
|
||||
NSInteger results = [self numberOfMatchesInString: string
|
||||
options: opts
|
||||
range: range];
|
||||
UErrorCode s = 0;
|
||||
UText txt = UTEXT_INITIALIZER;
|
||||
UText replacement = UTEXT_INITIALIZER;
|
||||
GSUTextString *ret = [GSUTextString new];
|
||||
URegularExpression *r = setupRegex(regex, string, &txt, opts, range, 0);
|
||||
UTextInitWithNSString(&replacement, template);
|
||||
|
||||
UText *output = uregex_replaceAllUText(r, &replacement, NULL, &s);
|
||||
utext_clone(&ret->txt, output, TRUE, TRUE, &s);
|
||||
[string setString: ret];
|
||||
[ret release];
|
||||
uregex_close(r);
|
||||
|
||||
utext_close(&txt);
|
||||
utext_close(output);
|
||||
utext_close(&replacement);
|
||||
return results;
|
||||
}
|
||||
|
||||
- (NSString*)stringByReplacingMatchesInString: (NSString*)string
|
||||
options: (NSMatchingOptions)opts
|
||||
range: (NSRange)range
|
||||
withTemplate: (NSString*)template
|
||||
{
|
||||
UErrorCode s = 0;
|
||||
UText txt = UTEXT_INITIALIZER;
|
||||
UText replacement = UTEXT_INITIALIZER;
|
||||
GSUTextString *ret = [GSUTextString new];
|
||||
URegularExpression *r = setupRegex(regex, string, &txt, opts, range, 0);
|
||||
UTextInitWithNSString(&replacement, template);
|
||||
|
||||
|
||||
UText *output = uregex_replaceAllUText(r, &replacement, NULL, &s);
|
||||
utext_clone(&ret->txt, output, TRUE, TRUE, &s);
|
||||
uregex_close(r);
|
||||
|
||||
utext_close(&txt);
|
||||
utext_close(output);
|
||||
utext_close(&replacement);
|
||||
return ret;
|
||||
}
|
||||
|
||||
- (NSString*)replacementStringForResult: (NSTextCheckingResult*)result
|
||||
inString: (NSString*)string
|
||||
offset: (NSInteger)offset
|
||||
template: (NSString*)template
|
||||
{
|
||||
UErrorCode s = 0;
|
||||
UText txt = UTEXT_INITIALIZER;
|
||||
UText replacement = UTEXT_INITIALIZER;
|
||||
GSUTextString *ret = [GSUTextString new];
|
||||
NSRange range = [result range];
|
||||
URegularExpression *r = setupRegex(regex,
|
||||
[string substringWithRange: range],
|
||||
&txt,
|
||||
0,
|
||||
NSMakeRange(0, range.length),
|
||||
0);
|
||||
UTextInitWithNSString(&replacement, template);
|
||||
|
||||
|
||||
UText *output = uregex_replaceFirstUText(r, &replacement, NULL, &s);
|
||||
utext_clone(&ret->txt, output, TRUE, TRUE, &s);
|
||||
uregex_close(r);
|
||||
|
||||
utext_close(&txt);
|
||||
utext_close(output);
|
||||
utext_close(&replacement);
|
||||
return ret;
|
||||
}
|
||||
- (NSRegularExpressionOptions)options
|
||||
{
|
||||
return options;
|
||||
}
|
||||
- (NSUInteger)numberOfCaptureGroups
|
||||
{
|
||||
UErrorCode s = 0;
|
||||
return uregex_groupCount(regex, &s);
|
||||
}
|
||||
- (void)dealloc
|
||||
{
|
||||
uregex_close(regex);
|
||||
[super dealloc];
|
||||
}
|
||||
- (void)encodeWithCoder: (NSCoder*)aCoder
|
||||
{
|
||||
if ([aCoder allowsKeyedCoding])
|
||||
{
|
||||
[aCoder encodeInteger: options forKey: @"options"];
|
||||
[aCoder encodeObject: [self pattern] forKey: @"pattern"];
|
||||
}
|
||||
else
|
||||
{
|
||||
[aCoder encodeValueOfObjCType: @encode(NSRegularExpressionOptions) at: &options];
|
||||
[aCoder encodeObject: [self pattern]];
|
||||
}
|
||||
}
|
||||
- initWithCoder: (NSCoder*)aCoder
|
||||
{
|
||||
NSString *pattern;
|
||||
if ([aCoder allowsKeyedCoding])
|
||||
{
|
||||
options = [aCoder decodeIntegerForKey: @"options"];
|
||||
pattern = [aCoder decodeObjectForKey: @"pattern"];
|
||||
}
|
||||
else
|
||||
{
|
||||
[aCoder decodeValueOfObjCType: @encode(NSRegularExpressionOptions) at: &options];
|
||||
pattern = [aCoder decodeObject];
|
||||
}
|
||||
return [self initWithPattern: pattern options: options error: NULL];
|
||||
}
|
||||
- copyWithZone: (NSZone*)aZone
|
||||
{
|
||||
NSRegularExpressionOptions opts = options;
|
||||
UErrorCode s = 0;
|
||||
URegularExpression *r = uregex_clone(regex, &s);
|
||||
if (0 != s) { return nil; }
|
||||
|
||||
self = [[self class] allocWithZone: aZone];
|
||||
if (nil == self) { return nil; }
|
||||
options = opts;
|
||||
regex = r;
|
||||
return self;
|
||||
}
|
||||
@end
|
||||
#endif //HAV_ICU
|
79
Source/NSTextCheckingResult.m
Normal file
79
Source/NSTextCheckingResult.m
Normal file
|
@ -0,0 +1,79 @@
|
|||
#import "Foundation/NSTextCheckingResult.h"
|
||||
#import "Foundation/NSRegularExpression.h"
|
||||
|
||||
/**
|
||||
* Private class encapsulating a regular expression match.
|
||||
*/
|
||||
@interface GSRegularExpressionCheckingResult : NSTextCheckingResult
|
||||
{
|
||||
// TODO: This could be made more efficient by adding a variant that only
|
||||
// contained a single range.
|
||||
@public
|
||||
/** The number of ranges matched */
|
||||
NSUInteger rangeCount;
|
||||
/** The array of ranges. */
|
||||
NSRange *ranges;
|
||||
/** The regular expression object that generated this match. */
|
||||
NSRegularExpression *regularExpression;
|
||||
}
|
||||
@end
|
||||
|
||||
@implementation NSTextCheckingResult
|
||||
+ (NSTextCheckingResult*)regularExpressionCheckingResultWithRanges: (NSRangePointer)ranges
|
||||
count: (NSUInteger)count
|
||||
regularExpression: (NSRegularExpression*)regularExpression
|
||||
{
|
||||
GSRegularExpressionCheckingResult *result = [GSRegularExpressionCheckingResult new];
|
||||
result->rangeCount = count;
|
||||
result->ranges = calloc(sizeof(NSRange), count);
|
||||
memcpy(result->ranges, ranges, (sizeof(NSRange) * count));
|
||||
ASSIGN(result->regularExpression, regularExpression);
|
||||
return [result autorelease];
|
||||
}
|
||||
- (NSDictionary*)addressComponents { return 0; }
|
||||
- (NSDictionary*)components { return 0; }
|
||||
- (NSDate*)date { return 0; }
|
||||
- (NSTimeInterval) duration { return 0; }
|
||||
- (NSArray*)grammarDetails { return 0; }
|
||||
- (NSUInteger) numberOfRanges { return 0; }
|
||||
- (NSOrthography*)orthography { return 0; }
|
||||
- (NSString*)phoneNumber { return 0; }
|
||||
- (NSRange) range { return NSMakeRange(0, NSNotFound); }
|
||||
- (NSRegularExpression*)regularExpression { return 0; }
|
||||
- (NSString*)replacementString { return 0; }
|
||||
- (NSTextCheckingType)resultType { return -1; }
|
||||
- (NSTimeZone*)timeZone { return 0; }
|
||||
- (NSURL*)URL { return 0; }
|
||||
@end
|
||||
|
||||
|
||||
|
||||
@implementation GSRegularExpressionCheckingResult
|
||||
- (NSUInteger)rangeCount
|
||||
{
|
||||
return rangeCount;
|
||||
}
|
||||
- (NSRange)range
|
||||
{
|
||||
return ranges[0];
|
||||
}
|
||||
- (NSRange)rangeAtIndex: (NSUInteger)idx
|
||||
{
|
||||
if (idx >= rangeCount)
|
||||
{
|
||||
return NSMakeRange(0, NSNotFound);
|
||||
}
|
||||
return ranges[idx];
|
||||
}
|
||||
- (NSTextCheckingType)resultType
|
||||
{
|
||||
return NSTextCheckingTypeRegularExpression;
|
||||
}
|
||||
- (void)dealloc
|
||||
{
|
||||
[regularExpression release];
|
||||
free(ranges);
|
||||
[super dealloc];
|
||||
}
|
||||
@end
|
||||
|
Loading…
Reference in a new issue