Regular expression improvements for classic environment

This commit is contained in:
rfm 2025-01-04 15:05:14 +00:00
parent 90dfb08668
commit c722328e5c
5 changed files with 6214 additions and 29 deletions

View file

@ -1,3 +1,15 @@
2025-01-04 Richard Frith-Macdonald <rfm@gnu.org>
* Headers/Foundation/NSRegularExpression.h:
* Source/NSRegularExpression.m:
* Tests/base/NSRegularExpression/bigSource.txt:
* Tests/base/NSRegularExpression/callbacks.m:
Restructure to allow enumeration of matches when using the classic
compiler/runtime (ie avoiding blocks) using a new primitive method
specifying a callback function and context. Also fix callback bug
spotted by git user sensetalkdoug and add testcases based on his
pull request (#476) - thanks.
2025-01-01 Richard Frith-Macdonald <rfm@gnu.org>
* Source/NSKeyValueMutableArray.m: Fix mutable array support so that

View file

@ -64,6 +64,15 @@ static const NSMatchingOptions NSMatchingAnchored = 1<<2;
static const NSMatchingOptions NSMatchingWithTransparentBounds = 1<<3;
static const NSMatchingOptions NSMatchingWithoutAnchoringBounds = 1<<4;
@class NSRegularExpression;
#if GS_API_VERSION(GS_API_NONE, GS_API_NONE)
/** Enumeration with a C function callback uses this prototype
*/
typedef void (*GSRegexEnumerationCallback)(NSRegularExpression *regex,
void *context, NSTextCheckingResult *match,
NSMatchingFlags flags, BOOL *shouldStop);
#endif
DEFINE_BLOCK_TYPE(GSRegexBlock, void, NSTextCheckingResult*,\
NSMatchingFlags, BOOL*);
@ -71,6 +80,7 @@ DEFINE_BLOCK_TYPE(GSRegexBlock, void, NSTextCheckingResult*,\
#ifndef GSREGEXTYPE
# define GSREGEXTYPE void
#endif
/**
* NSRegularExpression is used to inspect and manipulate strings using regular
* expressions. The interface is thread safe: The same NSRegularExpression
@ -118,7 +128,36 @@ GS_EXPORT_CLASS
- (id) initWithPattern: (NSString*)aPattern
options: (NSRegularExpressionOptions)opts
error: (NSError**)e;
- (NSString*) pattern;
- (NSString*) pattern;
#if GS_API_VERSION(GS_API_NONE, GS_API_NONE)
/** In the GNUstep implementation this method is the fundametal primitive
* (unlike OSX which depends on blocks) upon which other methods are based.
* Its behavior is like that of the
* -enumerateMatchesInString:options:range:usingBlock: method, except that
* it uses a callback rather than a block, and the callback is supplied with
* both the NSRegularExpression instance being used and the context value.
* <br />
* The operation of the method is basically to call the supplied callback
* function for each match of the expression in the string.
* The callback may set the flag pointed to by stop to YES to cancel the
* enumeration at that point.
* <br />
* Setting the NSMatchingReportProgress option asks the underlying ICU code
* to call the callback at additional points during long operations (passing
* nil for the match information and NSMatchingProgress for the flags) so
* that the callback can terminate the enumeration earlier.
* <br />
* Setting the NSMatchingReportCompletion option causes the callback to be
* called once after the last match (with nil match information and the
* NSMatchingCompleted matching flag as well as any additional flags from
* NSMatchingHitEnd, NSMatchingRequiredEnd, or NSMatchingInternalError).
*/
- (void) enumerateMatchesInString: (NSString*)string
options: (NSMatchingOptions)options
range: (NSRange)range
callback: (GSRegexEnumerationCallback)handler
context: (void*)context;
#endif
- (void) enumerateMatchesInString: (NSString*)string
options: (NSMatchingOptions)options
range: (NSRange)range

View file

@ -64,6 +64,12 @@
#import "Foundation/FoundationErrors.h"
#import "Foundation/NSError.h"
typedef struct {
NSRegularExpression *e; // The RE being used
GSRegexEnumerationCallback h; // The handler callback function
void *c; // Context for this enumeration
} GSRegexContext;
/**
* To be helpful, Apple decided to define a set of flags that mean exactly the
@ -110,6 +116,22 @@ NSRegularExpressionOptionsToURegexpFlags(NSRegularExpressionOptions opts)
@implementation NSRegularExpression
/* Callback method to invoke a block
*/
static void
blockCallback(NSRegularExpression *regex,
void *context, NSTextCheckingResult *match,
NSMatchingFlags flags, BOOL *shouldStop)
{
GSRegexBlock block = (GSRegexBlock)context;
if (block)
{
CALL_BLOCK(block, match, flags, shouldStop);
}
}
+ (NSRegularExpression*) regularExpressionWithPattern: (NSString*)aPattern
options: (NSRegularExpressionOptions)opts
error: (NSError**)e
@ -146,10 +168,11 @@ NSRegularExpressionOptionsToURegexpFlags(NSRegularExpressionOptions opts)
}
#if !__has_feature(blocks)
if ([self class] != [NSRegularExpression class])
{
GSOnceMLog(@"Warning: NSRegularExpression was built by a compiler without blocks support. NSRegularExpression will deviate from the documented behaviour when subclassing and any code that subclasses NSRegularExpression may break in unexpected ways. If you must subclass NSRegularExpression, you are strongly recommended to use a compiler with blocks support.");
}
GSOnceMLog(@"Warning: this implementation of NSRegularExpression uses"
@" -enumerateMatchesInString:options:range:callback:context: as a"
@" primitive method rather than the blocks-dependtent method used"
@" by Apple. If you must subclass NSRegularExpression, you must"
@" bear that difference in mind");
#endif
UTextInitWithNSString(&p, aPattern);
@ -250,10 +273,11 @@ NSRegularExpressionOptionsToURegexpFlags(NSRegularExpressionOptions opts)
TEMP_BUFFER(buffer, length);
#if !__has_feature(blocks)
if ([self class] != [NSRegularExpression class])
{
GSOnceMLog(@"Warning: NSRegularExpression was built by a compiler without blocks support. NSRegularExpression will deviate from the documented behaviour when subclassing and any code that subclasses NSRegularExpression may break in unexpected ways. If you must subclass NSRegularExpression, you are strongly recommended to use a compiler with blocks support.");
}
GSOnceMLog(@"Warning: this implementation of NSRegularExpression uses"
@" -enumerateMatchesInString:options:range:callback:context: as a"
@" primitive method rather than the blocks-dependtent method used"
@" by Apple. If you must subclass NSRegularExpression, you must"
@" bear that difference in mind");
#endif
// Raise an NSInvalidArgumentException to match macOS behaviour.
@ -370,15 +394,16 @@ NSRegularExpressionOptionsToURegexpFlags(NSRegularExpressionOptions opts)
static UBool
callback(const void *context, int32_t steps)
{
BOOL stop = NO;
GSRegexBlock block = (GSRegexBlock)context;
BOOL stop = NO;
GSRegexContext *c = (GSRegexContext*)context;
if (NULL == context)
if (NULL == c)
{
return FALSE;
}
CALL_BLOCK(block, nil, NSMatchingProgress, &stop);
return stop;
(*c->h)(c->e, c->c, nil, NSMatchingProgress, &stop);
return (stop ? FALSE : TRUE);
}
@ -394,9 +419,10 @@ static int32_t _workLimit = DEFAULT_WORK_LIMIT;
+ (void) _defaultsChanged: (NSNotification*)n
{
NSUserDefaults *defs = [NSUserDefaults standardUserDefaults];
id value = [defs objectForKey: @"GSRegularExpressionWorkLimit"];
id value;
int32_t newLimit = DEFAULT_WORK_LIMIT;
value = [defs objectForKey: @"GSRegularExpressionWorkLimit"];
if ([value respondsToSelector: @selector(intValue)])
{
int32_t v = [value intValue];
@ -440,14 +466,15 @@ setupRegex(URegularExpression *regex,
UText *txt,
NSMatchingOptions options,
NSRange range,
GSRegexBlock block)
GSRegexContext *ctx)
{
UErrorCode s = 0;
URegularExpression *r = uregex_clone(regex, &s);
if (options & NSMatchingReportProgress)
{
uregex_setMatchCallback(r, callback, block, &s);
uregex_setMatchCallback(r, callback, ctx, &s);
if (U_FAILURE(s)) NSLog(@"uregex_setMatchCallback() failed");
}
UTextInitWithNSString(txt, string);
uregex_setUText(r, txt, &s);
@ -476,7 +503,7 @@ setupRegex(URegularExpression *regex,
int32_t length,
NSMatchingOptions options,
NSRange range,
GSRegexBlock block)
GSRegexContext *ctx)
{
UErrorCode s = 0;
URegularExpression *r = uregex_clone(regex, &s);
@ -484,7 +511,8 @@ setupRegex(URegularExpression *regex,
[string getCharacters: buffer range: NSMakeRange(0, length)];
if (options & NSMatchingReportProgress)
{
uregex_setMatchCallback(r, callback, block, &s);
uregex_setMatchCallback(r, callback, ctx, &s);
if (U_FAILURE(s)) NSLog(@"uregex_setMatchCallback() failed");
}
uregex_setText(r, buffer, length, &s);
uregex_setRegion(r, range.location, range.location+range.length, &s);
@ -555,16 +583,19 @@ prepareResult(NSRegularExpression *regex,
return flags;
}
#if HAVE_UREGEX_OPENUTEXT
- (void) enumerateMatchesInString: (NSString*)string
options: (NSMatchingOptions)opts
range: (NSRange)range
usingBlock: (GSRegexBlock)block
callback: (GSRegexEnumerationCallback)handler
context: (void*)context
{
UErrorCode s = 0;
UText txt = UTEXT_INITIALIZER;
BOOL stop = NO;
URegularExpression *r = setupRegex(regex, string, &txt, opts, range, block);
GSRegexContext ctx = { self, handler, context };
URegularExpression *r = setupRegex(regex, string, &txt, opts, range, &ctx);
NSUInteger groups = [self numberOfCaptureGroups] + 1;
NSRange ranges[groups];
@ -587,7 +618,7 @@ prepareResult(NSRegularExpression *regex,
regularExpressionCheckingResultWithRanges: ranges
count: groups
regularExpression: self];
CALL_BLOCK(block, result, flags, &stop);
(*handler)(self, context, result, flags, &stop);
}
}
else
@ -603,12 +634,12 @@ prepareResult(NSRegularExpression *regex,
regularExpressionCheckingResultWithRanges: ranges
count: groups
regularExpression: self];
CALL_BLOCK(block, result, flags, &stop);
(*handler)(self, context, result, flags, &stop);
}
}
if (opts & NSMatchingCompleted)
{
CALL_BLOCK(block, nil, NSMatchingCompleted, &stop);
(*handler)(self, context, nil, NSMatchingCompleted, &stop);
}
utext_close(&txt);
uregex_close(r);
@ -617,7 +648,8 @@ prepareResult(NSRegularExpression *regex,
- (void) enumerateMatchesInString: (NSString*)string
options: (NSMatchingOptions)opts
range: (NSRange)range
usingBlock: (GSRegexBlock)block
callback: (GSRegexEnumerationCallback)handler
context: (void*)context
{
UErrorCode s = 0;
BOOL stop = NO;
@ -625,9 +657,10 @@ prepareResult(NSRegularExpression *regex,
URegularExpression *r;
NSUInteger groups = [self numberOfCaptureGroups] + 1;
NSRange ranges[groups];
GSRegexContext ctx = { self, handler, context };
TEMP_BUFFER(buffer, length);
r = setupRegex(regex, string, buffer, length, opts, range, block);
r = setupRegex(regex, string, buffer, length, opts, range, &ctx);
// Should this throw some kind of exception?
if (NULL == r)
@ -648,7 +681,7 @@ prepareResult(NSRegularExpression *regex,
regularExpressionCheckingResultWithRanges: ranges
count: groups
regularExpression: self];
CALL_BLOCK(block, result, flags, &stop);
(*handler)(self, context, result, flags, &stop);
}
}
else
@ -664,17 +697,31 @@ prepareResult(NSRegularExpression *regex,
regularExpressionCheckingResultWithRanges: ranges
count: groups
regularExpression: self];
CALL_BLOCK(block, result, flags, &stop);
(*handler)(self, context, result, flags, &stop);
}
}
if (opts & NSMatchingCompleted)
{
CALL_BLOCK(block, nil, NSMatchingCompleted, &stop);
(*handler)(self, context, nil, NSMatchingCompleted, &stop);
}
uregex_close(r);
}
#endif
- (void) enumerateMatchesInString: (NSString*)string
options: (NSMatchingOptions)opts
range: (NSRange)range
usingBlock: (GSRegexBlock)block
{
[self enumerateMatchesInString: string
options: opts
range: range
callback: blockCallback
context: (void*)block];
}
/* The remaining methods are all meant to be wrappers around the primitive
* method that takes a block argument. Unfortunately, this is not really
* possible when compiling with a compiler that doesn't support blocks.

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,87 @@
#import <Foundation/Foundation.h>
#import "ObjectTesting.h"
static void callback(NSRegularExpression *re, void *context,
NSTextCheckingResult *match, NSMatchingFlags flags, BOOL *stop)
{
if (match)
{
(*(NSInteger*)context)++;
}
else
{
NSLog(@"FLAGS: %lu", (unsigned long)flags);
}
}
int main(void)
{
START_SET("NSRegularExpression + callbacks")
#if !(__APPLE__ || GS_USE_ICU)
SKIP("NSRegularExpression not built, please install libicu")
#else
NSString *sourceText;
NSRegularExpression *simpleRegex;
NSRange sourceRange;
NSArray *simpleMatches;
NSUInteger matchCount = 0;
// load source file containing some text repeated 1000 times
sourceText = [NSString stringWithContentsOfFile: @"bigSource.txt"];
simpleRegex = [NSRegularExpression regularExpressionWithPattern: @"ABC"
options: 0
error: NULL];
sourceRange = NSMakeRange(0, [sourceText length] - 1);
// matchesInString:... uses enumerateMatchesInString:... without any callbacks
simpleMatches = [simpleRegex matchesInString: sourceText
options: 0
range: sourceRange];
// NSLog(@"Simple matches: %ld", [simpleMatches count]);
PASS([simpleMatches count] == 1000, "1000 matches");
# ifndef __has_feature
# define __has_feature(x) 0
# endif
# if __has_feature(blocks)
// call enumerateMatchesInString:... directly, with block
__block NSInteger blockCount = 0;
[simpleRegex enumerateMatchesInString: sourceText
options: NSMatchingReportProgress
range: NSMakeRange(0, [sourceText length] - 1)
usingBlock:
^(NSTextCheckingResult * result, NSMatchingFlags flags, BOOL *stop)
{
if (result)
{
blockCount++;
}
else
{
NSLog(@"FLAGS: %lu", (unsigned long)flags);
}
}];
// NSLog(@"Number of matches: %ld", blockCount);
PASS(blockCount == 1000, "enumerate with block has same count");
# endif
#endif
#if defined(GNUSTEP_BASE_LIBRARY)
[simpleRegex enumerateMatchesInString: sourceText
options: NSMatchingReportProgress
range: NSMakeRange(0, [sourceText length] - 1)
callback: callback
context: (void*)&matchCount];
// NSLog(@"Number of matches: %ld", matchCount);
PASS(matchCount == 1000, "enumerate with callback has same count");
#endif
END_SET("NSRegularExpression + callbacks")
return 0;
}