mirror of
https://github.com/gnustep/libs-base.git
synced 2025-05-31 00:30:53 +00:00
regular expression range search
This commit is contained in:
parent
1fdf6395bd
commit
35bb9f48ef
5 changed files with 112 additions and 57 deletions
|
@ -1,3 +1,12 @@
|
||||||
|
2024-05-12 ethanc8R (github user)
|
||||||
|
|
||||||
|
* Headers/Foundation/NSRegularExpression.h:
|
||||||
|
* Headers/Foundation/NSString.h:
|
||||||
|
* Source/NSRegularExpression.m:
|
||||||
|
* Source/NSString.m:
|
||||||
|
* Tests/base/NSString/enumerateSubstringsInRange.m:
|
||||||
|
Added regular expression search methods.
|
||||||
|
|
||||||
2024-05-08 Hugo Melder <hugo@algoriddim.com>
|
2024-05-08 Hugo Melder <hugo@algoriddim.com>
|
||||||
|
|
||||||
* Source/NSIndexSet.m:
|
* Source/NSIndexSet.m:
|
||||||
|
|
|
@ -150,7 +150,7 @@ GS_EXPORT_CLASS
|
||||||
offset: (NSInteger)offset
|
offset: (NSInteger)offset
|
||||||
template: (NSString*)templat;
|
template: (NSString*)templat;
|
||||||
#if OS_API_VERSION(MAC_OS_X_VERSION_10_7, GS_API_LATEST)
|
#if OS_API_VERSION(MAC_OS_X_VERSION_10_7, GS_API_LATEST)
|
||||||
+ (NSString *)escapedPatternForString:(NSString *)string;
|
+ (NSString *) escapedPatternForString: (NSString *)string;
|
||||||
#endif
|
#endif
|
||||||
#if GS_HAS_DECLARED_PROPERTIES
|
#if GS_HAS_DECLARED_PROPERTIES
|
||||||
@property (readonly) NSRegularExpressionOptions options;
|
@property (readonly) NSRegularExpressionOptions options;
|
||||||
|
|
|
@ -527,7 +527,7 @@ GS_EXPORT_CLASS
|
||||||
length: (NSUInteger)length;
|
length: (NSUInteger)length;
|
||||||
+ (instancetype) stringWithCString: (const char*)byteString;
|
+ (instancetype) stringWithCString: (const char*)byteString;
|
||||||
+ (instancetype) stringWithFormat: (NSString*)format, ... NS_FORMAT_FUNCTION(1,2);
|
+ (instancetype) stringWithFormat: (NSString*)format, ... NS_FORMAT_FUNCTION(1,2);
|
||||||
+ (instancetype) stringWithContentsOfFile:(NSString *)path;
|
+ (instancetype) stringWithContentsOfFile: (NSString *)path;
|
||||||
|
|
||||||
// Initializing Newly Allocated Strings
|
// Initializing Newly Allocated Strings
|
||||||
- (instancetype) init;
|
- (instancetype) init;
|
||||||
|
|
|
@ -640,11 +640,12 @@ prepareResult(NSRegularExpression *regex,
|
||||||
|
|
||||||
{
|
{
|
||||||
__block NSUInteger count = 0;
|
__block NSUInteger count = 0;
|
||||||
|
GSRegexBlock block;
|
||||||
|
|
||||||
opts &= ~NSMatchingReportProgress;
|
opts &= ~NSMatchingReportProgress;
|
||||||
opts &= ~NSMatchingReportCompletion;
|
opts &= ~NSMatchingReportCompletion;
|
||||||
|
|
||||||
GSRegexBlock block =
|
block =
|
||||||
^(NSTextCheckingResult *result, NSMatchingFlags flags, BOOL *stop)
|
^(NSTextCheckingResult *result, NSMatchingFlags flags, BOOL *stop)
|
||||||
{
|
{
|
||||||
count++;
|
count++;
|
||||||
|
@ -660,12 +661,13 @@ prepareResult(NSRegularExpression *regex,
|
||||||
options: (NSMatchingOptions)opts
|
options: (NSMatchingOptions)opts
|
||||||
range: (NSRange)range
|
range: (NSRange)range
|
||||||
{
|
{
|
||||||
__block NSTextCheckingResult *r = nil;
|
__block NSTextCheckingResult *r = nil;
|
||||||
|
GSRegexBlock block;
|
||||||
|
|
||||||
opts &= ~NSMatchingReportProgress;
|
opts &= ~NSMatchingReportProgress;
|
||||||
opts &= ~NSMatchingReportCompletion;
|
opts &= ~NSMatchingReportCompletion;
|
||||||
|
|
||||||
GSRegexBlock block =
|
block =
|
||||||
^(NSTextCheckingResult *result, NSMatchingFlags flags, BOOL *stop)
|
^(NSTextCheckingResult *result, NSMatchingFlags flags, BOOL *stop)
|
||||||
{
|
{
|
||||||
r = result;
|
r = result;
|
||||||
|
@ -683,11 +685,12 @@ prepareResult(NSRegularExpression *regex,
|
||||||
range:(NSRange)range
|
range:(NSRange)range
|
||||||
{
|
{
|
||||||
NSMutableArray *array = [NSMutableArray array];
|
NSMutableArray *array = [NSMutableArray array];
|
||||||
|
GSRegexBlock block;
|
||||||
|
|
||||||
opts &= ~NSMatchingReportProgress;
|
opts &= ~NSMatchingReportProgress;
|
||||||
opts &= ~NSMatchingReportCompletion;
|
opts &= ~NSMatchingReportCompletion;
|
||||||
|
|
||||||
GSRegexBlock block =
|
block =
|
||||||
^(NSTextCheckingResult *result, NSMatchingFlags flags, BOOL *stop)
|
^(NSTextCheckingResult *result, NSMatchingFlags flags, BOOL *stop)
|
||||||
{
|
{
|
||||||
[array addObject: result];
|
[array addObject: result];
|
||||||
|
@ -703,12 +706,13 @@ prepareResult(NSRegularExpression *regex,
|
||||||
options: (NSMatchingOptions)opts
|
options: (NSMatchingOptions)opts
|
||||||
range: (NSRange)range
|
range: (NSRange)range
|
||||||
{
|
{
|
||||||
__block NSRange r = {NSNotFound, 0};
|
__block NSRange r = {NSNotFound, 0};
|
||||||
|
GSRegexBlock block;
|
||||||
|
|
||||||
opts &= ~NSMatchingReportProgress;
|
opts &= ~NSMatchingReportProgress;
|
||||||
opts &= ~NSMatchingReportCompletion;
|
opts &= ~NSMatchingReportCompletion;
|
||||||
|
|
||||||
GSRegexBlock block =
|
block =
|
||||||
^(NSTextCheckingResult *result, NSMatchingFlags flags, BOOL *stop)
|
^(NSTextCheckingResult *result, NSMatchingFlags flags, BOOL *stop)
|
||||||
{
|
{
|
||||||
r = [result range];
|
r = [result range];
|
||||||
|
@ -1064,9 +1068,11 @@ prepareResult(NSRegularExpression *regex,
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
+ (NSString *)escapedPatternForString:(NSString *)string {
|
+ (NSString*) escapedPatternForString: (NSString *)string
|
||||||
// https://unicode-org.github.io/icu/userguide/strings/regexp.html
|
{
|
||||||
// Need to escape * ? + [ ( ) { } ^ $ | \ .
|
/* https://unicode-org.github.io/icu/userguide/strings/regexp.html
|
||||||
|
* Need to escape * ? + [ ( ) { } ^ $ | \ .
|
||||||
|
*/
|
||||||
return [[NSRegularExpression
|
return [[NSRegularExpression
|
||||||
regularExpressionWithPattern: @"([*?+\\[(){}^$|\\\\.])"
|
regularExpressionWithPattern: @"([*?+\\[(){}^$|\\\\.])"
|
||||||
options: 0
|
options: 0
|
||||||
|
|
|
@ -6294,29 +6294,47 @@ static NSFileManager *fm = nil;
|
||||||
currentLocation = range.location;
|
currentLocation = range.location;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (substringType == NSStringEnumerationByLines || substringType == NSStringEnumerationByParagraphs)
|
if (substringType == NSStringEnumerationByLines
|
||||||
|
|| substringType == NSStringEnumerationByParagraphs)
|
||||||
{
|
{
|
||||||
BOOL isLineSep = substringType == NSStringEnumerationByLines;
|
BOOL isLineSep = substringType == NSStringEnumerationByLines;
|
||||||
|
|
||||||
while (YES)
|
while (YES)
|
||||||
{
|
{
|
||||||
// contains the index of the first character of the line containing the beginning of aRange.
|
/* Contains the index of the first character of the line
|
||||||
NSUInteger start;
|
* containing the beginning of aRange.
|
||||||
// contains the index of the first character past the terminator of the line containing the end of aRange.
|
*/
|
||||||
NSUInteger end;
|
NSUInteger start;
|
||||||
// contains the index of the first character of the terminator of the line containing the end of aRange.
|
|
||||||
NSUInteger contentsEnd;
|
/* Contains the index of the first character past the
|
||||||
NSRange currentLocationRange = NSMakeRange(currentLocation, 0);
|
* terminator of the line containing the end of aRange.
|
||||||
|
*/
|
||||||
|
NSUInteger end;
|
||||||
|
|
||||||
|
/* Contains the index of the first character of the terminator
|
||||||
|
* of the line containing the end of aRange.
|
||||||
|
*/
|
||||||
|
NSUInteger contentsEnd;
|
||||||
|
NSRange currentLocationRange = NSMakeRange(currentLocation, 0);
|
||||||
|
NSUInteger substringStart;
|
||||||
|
NSRange substringRange;
|
||||||
|
|
||||||
[self _getStart: &start
|
[self _getStart: &start
|
||||||
end: &end
|
end: &end
|
||||||
contentsEnd: &contentsEnd
|
contentsEnd: &contentsEnd
|
||||||
forRange: currentLocationRange
|
forRange: currentLocationRange
|
||||||
lineSep: isLineSep];
|
lineSep: isLineSep];
|
||||||
// If the enumerated range starts after the line/paragraph, we start at the beginning of the enumerated range
|
|
||||||
NSUInteger substringStart = start > range.location ? start : range.location;
|
/* If the enumerated range starts after the line/paragraph,
|
||||||
NSRange substringRange = NSMakeRange(substringStart, contentsEnd - substringStart);
|
* we start at the beginning of the enumerated range
|
||||||
|
*/
|
||||||
|
substringStart = start > range.location ? start : range.location;
|
||||||
|
substringRange
|
||||||
|
= NSMakeRange(substringStart, contentsEnd - substringStart);
|
||||||
CALL_BLOCK(block,
|
CALL_BLOCK(block,
|
||||||
substringNotRequired ? nil : [self substringWithRange: substringRange],
|
substringNotRequired
|
||||||
|
? nil
|
||||||
|
: [self substringWithRange: substringRange],
|
||||||
substringRange,
|
substringRange,
|
||||||
NSMakeRange(start, end - start),
|
NSMakeRange(start, end - start),
|
||||||
&stop);
|
&stop);
|
||||||
|
@ -6327,21 +6345,31 @@ static NSFileManager *fm = nil;
|
||||||
}
|
}
|
||||||
else if (substringType == NSStringEnumerationByComposedCharacterSequences)
|
else if (substringType == NSStringEnumerationByComposedCharacterSequences)
|
||||||
{
|
{
|
||||||
// We could also use rangeOfComposedCharacterSequenceAtIndex:, but then we would need different logic.
|
/* We could also use rangeOfComposedCharacterSequenceAtIndex:,
|
||||||
|
* but then we would need different logic.
|
||||||
|
*/
|
||||||
while (YES)
|
while (YES)
|
||||||
{
|
{
|
||||||
// Since all characters are in a composed character sequence, enclosingRange == substringRange
|
NSRange enclosingRange;
|
||||||
NSRange enclosingRange = [self rangeOfComposedCharacterSequenceAtIndex: currentLocation];
|
|
||||||
|
/* Since all characters are in a composed character sequence,
|
||||||
|
* enclosingRange == substringRange
|
||||||
|
*/
|
||||||
|
enclosingRange
|
||||||
|
= [self rangeOfComposedCharacterSequenceAtIndex: currentLocation];
|
||||||
CALL_BLOCK(block,
|
CALL_BLOCK(block,
|
||||||
substringNotRequired ? nil : [self substringWithRange: enclosingRange],
|
substringNotRequired
|
||||||
|
? nil
|
||||||
|
: [self substringWithRange: enclosingRange],
|
||||||
enclosingRange,
|
enclosingRange,
|
||||||
enclosingRange,
|
enclosingRange,
|
||||||
&stop);
|
&stop);
|
||||||
if(stop) break;
|
if (stop) break;
|
||||||
currentLocation = enclosingRange.location + enclosingRange.length;
|
currentLocation = enclosingRange.location + enclosingRange.length;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (substringType == NSStringEnumerationByWords || substringType == NSStringEnumerationBySentences)
|
else if (substringType == NSStringEnumerationByWords
|
||||||
|
|| substringType == NSStringEnumerationBySentences)
|
||||||
{
|
{
|
||||||
#if GS_USE_ICU
|
#if GS_USE_ICU
|
||||||
// These macros may be useful elsewhere.
|
// These macros may be useful elsewhere.
|
||||||
|
@ -6355,63 +6383,75 @@ static NSFileManager *fm = nil;
|
||||||
errorCode = U_ZERO_ERROR; \
|
errorCode = U_ZERO_ERROR; \
|
||||||
} while (NO)
|
} while (NO)
|
||||||
|
|
||||||
BOOL byWords = substringType == NSStringEnumerationByWords;
|
BOOL byWords = substringType == NSStringEnumerationByWords;
|
||||||
NSUInteger length = range.length;
|
NSUInteger length = range.length;
|
||||||
UChar characters[length];
|
UChar characters[length];
|
||||||
|
UErrorCode errorCode = U_ZERO_ERROR;
|
||||||
|
const char *locale;
|
||||||
|
UBreakIterator *breakIterator;
|
||||||
|
|
||||||
[self getCharacters: characters range: range];
|
[self getCharacters: characters range: range];
|
||||||
UErrorCode errorCode = U_ZERO_ERROR;
|
/* @ss=standard will use lists of common abbreviations,
|
||||||
const char* locale = localized
|
* such as Mr., Mrs., etc.
|
||||||
? [[[[NSLocale currentLocale]
|
*/
|
||||||
localeIdentifier]
|
locale = localized
|
||||||
// @ss=standard will use lists of common abbreviations, such as Mr., Mrs., etc.
|
? [[[[NSLocale currentLocale] localeIdentifier]
|
||||||
stringByAppendingString: @"@ss=standard"]
|
stringByAppendingString: @"@ss=standard"] UTF8String]
|
||||||
UTF8String]
|
|
||||||
: "en_US_POSIX";
|
: "en_US_POSIX";
|
||||||
UBreakIterator* breakIterator = ubrk_open(byWords ? UBRK_WORD : UBRK_SENTENCE, // type
|
breakIterator = ubrk_open(
|
||||||
locale, // locale
|
byWords ? UBRK_WORD : UBRK_SENTENCE, // type
|
||||||
characters, // text
|
locale, // locale
|
||||||
length, // textLength
|
characters, // text
|
||||||
&errorCode);
|
length, // textLength
|
||||||
|
&errorCode);
|
||||||
GS_U_HANDLE_ERROR(errorCode, @"opening ICU break iterator");
|
GS_U_HANDLE_ERROR(errorCode, @"opening ICU break iterator");
|
||||||
ubrk_first(breakIterator);
|
ubrk_first(breakIterator);
|
||||||
while (YES)
|
while (YES)
|
||||||
{
|
{
|
||||||
// Make sure it's a valid substring.
|
// Make sure it's a valid substring.
|
||||||
BOOL isValidSubstring = YES;
|
BOOL isValidSubstring = YES;
|
||||||
|
int32_t nextPosition;
|
||||||
|
NSUInteger nextLocation;
|
||||||
|
NSRange enclosingRange;
|
||||||
|
|
||||||
if (byWords)
|
if (byWords)
|
||||||
{
|
{
|
||||||
int32_t ruleStatus = ubrk_getRuleStatus(breakIterator);
|
int32_t ruleStatus = ubrk_getRuleStatus(breakIterator);
|
||||||
// From ICU User Guide:
|
/* From ICU User Guide:
|
||||||
// A status value UBRK_WORD_NONE indicates that the boundary does
|
* A status value UBRK_WORD_NONE indicates that the boundary
|
||||||
// not start a word or number.
|
* does not start a word or number.
|
||||||
// However, valid words seem to be UBRK_WORD_NONE, and invalid words
|
* However, valid words seem to be UBRK_WORD_NONE, and invalid
|
||||||
// seem to be UBRK_WORD_NONE_LIMIT.
|
* words seem to be UBRK_WORD_NONE_LIMIT.
|
||||||
|
*/
|
||||||
isValidSubstring = ruleStatus != UBRK_WORD_NONE_LIMIT;
|
isValidSubstring = ruleStatus != UBRK_WORD_NONE_LIMIT;
|
||||||
NSLog(@"Status for position %d (%d): %d", (int)currentLocation, (int)ubrk_current(breakIterator), (int) ruleStatus);
|
// NSLog(@"Status for position %d (%d): %d", (int)currentLocation, (int)ubrk_current(breakIterator), (int) ruleStatus);
|
||||||
}
|
}
|
||||||
|
|
||||||
int32_t nextPosition = ubrk_next(breakIterator);
|
nextPosition = ubrk_next(breakIterator);
|
||||||
if (nextPosition == UBRK_DONE) break;
|
if (nextPosition == UBRK_DONE) break;
|
||||||
|
|
||||||
NSUInteger nextLocation = range.location + nextPosition;
|
nextLocation = range.location + nextPosition;
|
||||||
// Same as substringRange
|
// Same as substringRange
|
||||||
NSRange enclosingRange = NSMakeRange(currentLocation, nextLocation - currentLocation);
|
enclosingRange
|
||||||
|
= NSMakeRange(currentLocation, nextLocation - currentLocation);
|
||||||
|
|
||||||
if (isValidSubstring)
|
if (isValidSubstring)
|
||||||
{
|
{
|
||||||
CALL_BLOCK(block,
|
CALL_BLOCK(block,
|
||||||
substringNotRequired ? nil : [self substringWithRange: enclosingRange],
|
substringNotRequired
|
||||||
|
? nil
|
||||||
|
: [self substringWithRange: enclosingRange],
|
||||||
enclosingRange,
|
enclosingRange,
|
||||||
enclosingRange,
|
enclosingRange,
|
||||||
&stop);
|
&stop);
|
||||||
if(stop) break;
|
if (stop) break;
|
||||||
}
|
}
|
||||||
|
|
||||||
currentLocation = nextLocation;
|
currentLocation = nextLocation;
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
NSWarnLog(@"NSStringEnumerationByWords and NSStringEnumerationBySentences are not supported when GNUstep-base is compiled without ICU.");
|
NSWarnLog(@"NSStringEnumerationByWords and NSStringEnumerationBySentences"
|
||||||
|
@" are not supported when GNUstep-base is compiled without ICU.");
|
||||||
return;
|
return;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue