mirror of
https://github.com/gnustep/libs-base.git
synced 2025-04-23 00:41:02 +00:00
Regular expression improvements for classic environment
This commit is contained in:
parent
90dfb08668
commit
c722328e5c
5 changed files with 6214 additions and 29 deletions
12
ChangeLog
12
ChangeLog
|
@ -1,3 +1,15 @@
|
|||
2025-01-04 Richard Frith-Macdonald <rfm@gnu.org>
|
||||
|
||||
* Headers/Foundation/NSRegularExpression.h:
|
||||
* Source/NSRegularExpression.m:
|
||||
* Tests/base/NSRegularExpression/bigSource.txt:
|
||||
* Tests/base/NSRegularExpression/callbacks.m:
|
||||
Restructure to allow enumeration of matches when using the classic
|
||||
compiler/runtime (ie avoiding blocks) using a new primitive method
|
||||
specifying a callback function and context. Also fix callback bug
|
||||
spotted by git user sensetalkdoug and add testcases based on his
|
||||
pull request (#476) - thanks.
|
||||
|
||||
2025-01-01 Richard Frith-Macdonald <rfm@gnu.org>
|
||||
|
||||
* Source/NSKeyValueMutableArray.m: Fix mutable array support so that
|
||||
|
|
|
@ -64,6 +64,15 @@ static const NSMatchingOptions NSMatchingAnchored = 1<<2;
|
|||
static const NSMatchingOptions NSMatchingWithTransparentBounds = 1<<3;
|
||||
static const NSMatchingOptions NSMatchingWithoutAnchoringBounds = 1<<4;
|
||||
|
||||
@class NSRegularExpression;
|
||||
|
||||
#if GS_API_VERSION(GS_API_NONE, GS_API_NONE)
|
||||
/** Enumeration with a C function callback uses this prototype
|
||||
*/
|
||||
typedef void (*GSRegexEnumerationCallback)(NSRegularExpression *regex,
|
||||
void *context, NSTextCheckingResult *match,
|
||||
NSMatchingFlags flags, BOOL *shouldStop);
|
||||
#endif
|
||||
|
||||
DEFINE_BLOCK_TYPE(GSRegexBlock, void, NSTextCheckingResult*,\
|
||||
NSMatchingFlags, BOOL*);
|
||||
|
@ -71,6 +80,7 @@ DEFINE_BLOCK_TYPE(GSRegexBlock, void, NSTextCheckingResult*,\
|
|||
#ifndef GSREGEXTYPE
|
||||
# define GSREGEXTYPE void
|
||||
#endif
|
||||
|
||||
/**
|
||||
* NSRegularExpression is used to inspect and manipulate strings using regular
|
||||
* expressions. The interface is thread safe: The same NSRegularExpression
|
||||
|
@ -118,7 +128,36 @@ GS_EXPORT_CLASS
|
|||
- (id) initWithPattern: (NSString*)aPattern
|
||||
options: (NSRegularExpressionOptions)opts
|
||||
error: (NSError**)e;
|
||||
- (NSString*) pattern;
|
||||
- (NSString*) pattern;
|
||||
#if GS_API_VERSION(GS_API_NONE, GS_API_NONE)
|
||||
/** In the GNUstep implementation this method is the fundametal primitive
|
||||
* (unlike OSX which depends on blocks) upon which other methods are based.
|
||||
* Its behavior is like that of the
|
||||
* -enumerateMatchesInString:options:range:usingBlock: method, except that
|
||||
* it uses a callback rather than a block, and the callback is supplied with
|
||||
* both the NSRegularExpression instance being used and the context value.
|
||||
* <br />
|
||||
* The operation of the method is basically to call the supplied callback
|
||||
* function for each match of the expression in the string.
|
||||
* The callback may set the flag pointed to by stop to YES to cancel the
|
||||
* enumeration at that point.
|
||||
* <br />
|
||||
* Setting the NSMatchingReportProgress option asks the underlying ICU code
|
||||
* to call the callback at additional points during long operations (passing
|
||||
* nil for the match information and NSMatchingProgress for the flags) so
|
||||
* that the callback can terminate the enumeration earlier.
|
||||
* <br />
|
||||
* Setting the NSMatchingReportCompletion option causes the callback to be
|
||||
* called once after the last match (with nil match information and the
|
||||
* NSMatchingCompleted matching flag as well as any additional flags from
|
||||
* NSMatchingHitEnd, NSMatchingRequiredEnd, or NSMatchingInternalError).
|
||||
*/
|
||||
- (void) enumerateMatchesInString: (NSString*)string
|
||||
options: (NSMatchingOptions)options
|
||||
range: (NSRange)range
|
||||
callback: (GSRegexEnumerationCallback)handler
|
||||
context: (void*)context;
|
||||
#endif
|
||||
- (void) enumerateMatchesInString: (NSString*)string
|
||||
options: (NSMatchingOptions)options
|
||||
range: (NSRange)range
|
||||
|
|
|
@ -64,6 +64,12 @@
|
|||
#import "Foundation/FoundationErrors.h"
|
||||
#import "Foundation/NSError.h"
|
||||
|
||||
typedef struct {
|
||||
NSRegularExpression *e; // The RE being used
|
||||
GSRegexEnumerationCallback h; // The handler callback function
|
||||
void *c; // Context for this enumeration
|
||||
} GSRegexContext;
|
||||
|
||||
|
||||
/**
|
||||
* To be helpful, Apple decided to define a set of flags that mean exactly the
|
||||
|
@ -110,6 +116,22 @@ NSRegularExpressionOptionsToURegexpFlags(NSRegularExpressionOptions opts)
|
|||
|
||||
@implementation NSRegularExpression
|
||||
|
||||
/* Callback method to invoke a block
|
||||
*/
|
||||
static void
|
||||
blockCallback(NSRegularExpression *regex,
|
||||
void *context, NSTextCheckingResult *match,
|
||||
NSMatchingFlags flags, BOOL *shouldStop)
|
||||
{
|
||||
GSRegexBlock block = (GSRegexBlock)context;
|
||||
|
||||
if (block)
|
||||
{
|
||||
CALL_BLOCK(block, match, flags, shouldStop);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
+ (NSRegularExpression*) regularExpressionWithPattern: (NSString*)aPattern
|
||||
options: (NSRegularExpressionOptions)opts
|
||||
error: (NSError**)e
|
||||
|
@ -146,10 +168,11 @@ NSRegularExpressionOptionsToURegexpFlags(NSRegularExpressionOptions opts)
|
|||
}
|
||||
|
||||
#if !__has_feature(blocks)
|
||||
if ([self class] != [NSRegularExpression class])
|
||||
{
|
||||
GSOnceMLog(@"Warning: NSRegularExpression was built by a compiler without blocks support. NSRegularExpression will deviate from the documented behaviour when subclassing and any code that subclasses NSRegularExpression may break in unexpected ways. If you must subclass NSRegularExpression, you are strongly recommended to use a compiler with blocks support.");
|
||||
}
|
||||
GSOnceMLog(@"Warning: this implementation of NSRegularExpression uses"
|
||||
@" -enumerateMatchesInString:options:range:callback:context: as a"
|
||||
@" primitive method rather than the blocks-dependtent method used"
|
||||
@" by Apple. If you must subclass NSRegularExpression, you must"
|
||||
@" bear that difference in mind");
|
||||
#endif
|
||||
|
||||
UTextInitWithNSString(&p, aPattern);
|
||||
|
@ -250,10 +273,11 @@ NSRegularExpressionOptionsToURegexpFlags(NSRegularExpressionOptions opts)
|
|||
TEMP_BUFFER(buffer, length);
|
||||
|
||||
#if !__has_feature(blocks)
|
||||
if ([self class] != [NSRegularExpression class])
|
||||
{
|
||||
GSOnceMLog(@"Warning: NSRegularExpression was built by a compiler without blocks support. NSRegularExpression will deviate from the documented behaviour when subclassing and any code that subclasses NSRegularExpression may break in unexpected ways. If you must subclass NSRegularExpression, you are strongly recommended to use a compiler with blocks support.");
|
||||
}
|
||||
GSOnceMLog(@"Warning: this implementation of NSRegularExpression uses"
|
||||
@" -enumerateMatchesInString:options:range:callback:context: as a"
|
||||
@" primitive method rather than the blocks-dependtent method used"
|
||||
@" by Apple. If you must subclass NSRegularExpression, you must"
|
||||
@" bear that difference in mind");
|
||||
#endif
|
||||
|
||||
// Raise an NSInvalidArgumentException to match macOS behaviour.
|
||||
|
@ -370,15 +394,16 @@ NSRegularExpressionOptionsToURegexpFlags(NSRegularExpressionOptions opts)
|
|||
static UBool
|
||||
callback(const void *context, int32_t steps)
|
||||
{
|
||||
BOOL stop = NO;
|
||||
GSRegexBlock block = (GSRegexBlock)context;
|
||||
BOOL stop = NO;
|
||||
GSRegexContext *c = (GSRegexContext*)context;
|
||||
|
||||
if (NULL == context)
|
||||
if (NULL == c)
|
||||
{
|
||||
return FALSE;
|
||||
}
|
||||
CALL_BLOCK(block, nil, NSMatchingProgress, &stop);
|
||||
return stop;
|
||||
(*c->h)(c->e, c->c, nil, NSMatchingProgress, &stop);
|
||||
|
||||
return (stop ? FALSE : TRUE);
|
||||
}
|
||||
|
||||
|
||||
|
@ -394,9 +419,10 @@ static int32_t _workLimit = DEFAULT_WORK_LIMIT;
|
|||
+ (void) _defaultsChanged: (NSNotification*)n
|
||||
{
|
||||
NSUserDefaults *defs = [NSUserDefaults standardUserDefaults];
|
||||
id value = [defs objectForKey: @"GSRegularExpressionWorkLimit"];
|
||||
id value;
|
||||
int32_t newLimit = DEFAULT_WORK_LIMIT;
|
||||
|
||||
value = [defs objectForKey: @"GSRegularExpressionWorkLimit"];
|
||||
if ([value respondsToSelector: @selector(intValue)])
|
||||
{
|
||||
int32_t v = [value intValue];
|
||||
|
@ -440,14 +466,15 @@ setupRegex(URegularExpression *regex,
|
|||
UText *txt,
|
||||
NSMatchingOptions options,
|
||||
NSRange range,
|
||||
GSRegexBlock block)
|
||||
GSRegexContext *ctx)
|
||||
{
|
||||
UErrorCode s = 0;
|
||||
URegularExpression *r = uregex_clone(regex, &s);
|
||||
|
||||
if (options & NSMatchingReportProgress)
|
||||
{
|
||||
uregex_setMatchCallback(r, callback, block, &s);
|
||||
uregex_setMatchCallback(r, callback, ctx, &s);
|
||||
if (U_FAILURE(s)) NSLog(@"uregex_setMatchCallback() failed");
|
||||
}
|
||||
UTextInitWithNSString(txt, string);
|
||||
uregex_setUText(r, txt, &s);
|
||||
|
@ -476,7 +503,7 @@ setupRegex(URegularExpression *regex,
|
|||
int32_t length,
|
||||
NSMatchingOptions options,
|
||||
NSRange range,
|
||||
GSRegexBlock block)
|
||||
GSRegexContext *ctx)
|
||||
{
|
||||
UErrorCode s = 0;
|
||||
URegularExpression *r = uregex_clone(regex, &s);
|
||||
|
@ -484,7 +511,8 @@ setupRegex(URegularExpression *regex,
|
|||
[string getCharacters: buffer range: NSMakeRange(0, length)];
|
||||
if (options & NSMatchingReportProgress)
|
||||
{
|
||||
uregex_setMatchCallback(r, callback, block, &s);
|
||||
uregex_setMatchCallback(r, callback, ctx, &s);
|
||||
if (U_FAILURE(s)) NSLog(@"uregex_setMatchCallback() failed");
|
||||
}
|
||||
uregex_setText(r, buffer, length, &s);
|
||||
uregex_setRegion(r, range.location, range.location+range.length, &s);
|
||||
|
@ -555,16 +583,19 @@ prepareResult(NSRegularExpression *regex,
|
|||
return flags;
|
||||
}
|
||||
|
||||
|
||||
#if HAVE_UREGEX_OPENUTEXT
|
||||
- (void) enumerateMatchesInString: (NSString*)string
|
||||
options: (NSMatchingOptions)opts
|
||||
range: (NSRange)range
|
||||
usingBlock: (GSRegexBlock)block
|
||||
callback: (GSRegexEnumerationCallback)handler
|
||||
context: (void*)context
|
||||
{
|
||||
UErrorCode s = 0;
|
||||
UText txt = UTEXT_INITIALIZER;
|
||||
BOOL stop = NO;
|
||||
URegularExpression *r = setupRegex(regex, string, &txt, opts, range, block);
|
||||
GSRegexContext ctx = { self, handler, context };
|
||||
URegularExpression *r = setupRegex(regex, string, &txt, opts, range, &ctx);
|
||||
NSUInteger groups = [self numberOfCaptureGroups] + 1;
|
||||
NSRange ranges[groups];
|
||||
|
||||
|
@ -587,7 +618,7 @@ prepareResult(NSRegularExpression *regex,
|
|||
regularExpressionCheckingResultWithRanges: ranges
|
||||
count: groups
|
||||
regularExpression: self];
|
||||
CALL_BLOCK(block, result, flags, &stop);
|
||||
(*handler)(self, context, result, flags, &stop);
|
||||
}
|
||||
}
|
||||
else
|
||||
|
@ -603,12 +634,12 @@ prepareResult(NSRegularExpression *regex,
|
|||
regularExpressionCheckingResultWithRanges: ranges
|
||||
count: groups
|
||||
regularExpression: self];
|
||||
CALL_BLOCK(block, result, flags, &stop);
|
||||
(*handler)(self, context, result, flags, &stop);
|
||||
}
|
||||
}
|
||||
if (opts & NSMatchingCompleted)
|
||||
{
|
||||
CALL_BLOCK(block, nil, NSMatchingCompleted, &stop);
|
||||
(*handler)(self, context, nil, NSMatchingCompleted, &stop);
|
||||
}
|
||||
utext_close(&txt);
|
||||
uregex_close(r);
|
||||
|
@ -617,7 +648,8 @@ prepareResult(NSRegularExpression *regex,
|
|||
- (void) enumerateMatchesInString: (NSString*)string
|
||||
options: (NSMatchingOptions)opts
|
||||
range: (NSRange)range
|
||||
usingBlock: (GSRegexBlock)block
|
||||
callback: (GSRegexEnumerationCallback)handler
|
||||
context: (void*)context
|
||||
{
|
||||
UErrorCode s = 0;
|
||||
BOOL stop = NO;
|
||||
|
@ -625,9 +657,10 @@ prepareResult(NSRegularExpression *regex,
|
|||
URegularExpression *r;
|
||||
NSUInteger groups = [self numberOfCaptureGroups] + 1;
|
||||
NSRange ranges[groups];
|
||||
GSRegexContext ctx = { self, handler, context };
|
||||
TEMP_BUFFER(buffer, length);
|
||||
|
||||
r = setupRegex(regex, string, buffer, length, opts, range, block);
|
||||
r = setupRegex(regex, string, buffer, length, opts, range, &ctx);
|
||||
|
||||
// Should this throw some kind of exception?
|
||||
if (NULL == r)
|
||||
|
@ -648,7 +681,7 @@ prepareResult(NSRegularExpression *regex,
|
|||
regularExpressionCheckingResultWithRanges: ranges
|
||||
count: groups
|
||||
regularExpression: self];
|
||||
CALL_BLOCK(block, result, flags, &stop);
|
||||
(*handler)(self, context, result, flags, &stop);
|
||||
}
|
||||
}
|
||||
else
|
||||
|
@ -664,17 +697,31 @@ prepareResult(NSRegularExpression *regex,
|
|||
regularExpressionCheckingResultWithRanges: ranges
|
||||
count: groups
|
||||
regularExpression: self];
|
||||
CALL_BLOCK(block, result, flags, &stop);
|
||||
(*handler)(self, context, result, flags, &stop);
|
||||
}
|
||||
}
|
||||
if (opts & NSMatchingCompleted)
|
||||
{
|
||||
CALL_BLOCK(block, nil, NSMatchingCompleted, &stop);
|
||||
(*handler)(self, context, nil, NSMatchingCompleted, &stop);
|
||||
}
|
||||
uregex_close(r);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
- (void) enumerateMatchesInString: (NSString*)string
|
||||
options: (NSMatchingOptions)opts
|
||||
range: (NSRange)range
|
||||
usingBlock: (GSRegexBlock)block
|
||||
{
|
||||
[self enumerateMatchesInString: string
|
||||
options: opts
|
||||
range: range
|
||||
callback: blockCallback
|
||||
context: (void*)block];
|
||||
}
|
||||
|
||||
|
||||
/* The remaining methods are all meant to be wrappers around the primitive
|
||||
* method that takes a block argument. Unfortunately, this is not really
|
||||
* possible when compiling with a compiler that doesn't support blocks.
|
||||
|
|
6000
Tests/base/NSRegularExpression/bigSource.txt
Normal file
6000
Tests/base/NSRegularExpression/bigSource.txt
Normal file
File diff suppressed because it is too large
Load diff
87
Tests/base/NSRegularExpression/callbacks.m
Normal file
87
Tests/base/NSRegularExpression/callbacks.m
Normal file
|
@ -0,0 +1,87 @@
|
|||
#import <Foundation/Foundation.h>
|
||||
#import "ObjectTesting.h"
|
||||
|
||||
static void callback(NSRegularExpression *re, void *context,
|
||||
NSTextCheckingResult *match, NSMatchingFlags flags, BOOL *stop)
|
||||
{
|
||||
if (match)
|
||||
{
|
||||
(*(NSInteger*)context)++;
|
||||
}
|
||||
else
|
||||
{
|
||||
NSLog(@"FLAGS: %lu", (unsigned long)flags);
|
||||
}
|
||||
}
|
||||
|
||||
int main(void)
|
||||
{
|
||||
START_SET("NSRegularExpression + callbacks")
|
||||
|
||||
#if !(__APPLE__ || GS_USE_ICU)
|
||||
SKIP("NSRegularExpression not built, please install libicu")
|
||||
#else
|
||||
NSString *sourceText;
|
||||
NSRegularExpression *simpleRegex;
|
||||
NSRange sourceRange;
|
||||
NSArray *simpleMatches;
|
||||
NSUInteger matchCount = 0;
|
||||
|
||||
// load source file containing some text repeated 1000 times
|
||||
sourceText = [NSString stringWithContentsOfFile: @"bigSource.txt"];
|
||||
simpleRegex = [NSRegularExpression regularExpressionWithPattern: @"ABC"
|
||||
options: 0
|
||||
error: NULL];
|
||||
|
||||
sourceRange = NSMakeRange(0, [sourceText length] - 1);
|
||||
// matchesInString:... uses enumerateMatchesInString:... without any callbacks
|
||||
simpleMatches = [simpleRegex matchesInString: sourceText
|
||||
options: 0
|
||||
range: sourceRange];
|
||||
|
||||
// NSLog(@"Simple matches: %ld", [simpleMatches count]);
|
||||
PASS([simpleMatches count] == 1000, "1000 matches");
|
||||
|
||||
# ifndef __has_feature
|
||||
# define __has_feature(x) 0
|
||||
# endif
|
||||
# if __has_feature(blocks)
|
||||
|
||||
// call enumerateMatchesInString:... directly, with block
|
||||
__block NSInteger blockCount = 0;
|
||||
[simpleRegex enumerateMatchesInString: sourceText
|
||||
options: NSMatchingReportProgress
|
||||
range: NSMakeRange(0, [sourceText length] - 1)
|
||||
usingBlock:
|
||||
^(NSTextCheckingResult * result, NSMatchingFlags flags, BOOL *stop)
|
||||
{
|
||||
if (result)
|
||||
{
|
||||
blockCount++;
|
||||
}
|
||||
else
|
||||
{
|
||||
NSLog(@"FLAGS: %lu", (unsigned long)flags);
|
||||
}
|
||||
}];
|
||||
|
||||
// NSLog(@"Number of matches: %ld", blockCount);
|
||||
PASS(blockCount == 1000, "enumerate with block has same count");
|
||||
# endif
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(GNUSTEP_BASE_LIBRARY)
|
||||
|
||||
[simpleRegex enumerateMatchesInString: sourceText
|
||||
options: NSMatchingReportProgress
|
||||
range: NSMakeRange(0, [sourceText length] - 1)
|
||||
callback: callback
|
||||
context: (void*)&matchCount];
|
||||
// NSLog(@"Number of matches: %ld", matchCount);
|
||||
PASS(matchCount == 1000, "enumerate with callback has same count");
|
||||
#endif
|
||||
|
||||
END_SET("NSRegularExpression + callbacks")
|
||||
return 0;
|
||||
}
|
Loading…
Reference in a new issue