mirror of
https://github.com/gnustep/libs-base.git
synced 2025-05-30 00:11:26 +00:00
regular expression range search
This commit is contained in:
parent
1fdf6395bd
commit
35bb9f48ef
5 changed files with 112 additions and 57 deletions
|
@ -1,3 +1,12 @@
|
|||
2024-05-12 ethanc8R (github user)
|
||||
|
||||
* Headers/Foundation/NSRegularExpression.h:
|
||||
* Headers/Foundation/NSString.h:
|
||||
* Source/NSRegularExpression.m:
|
||||
* Source/NSString.m:
|
||||
* Tests/base/NSString/enumerateSubstringsInRange.m:
|
||||
Added regular expression search methods.
|
||||
|
||||
2024-05-08 Hugo Melder <hugo@algoriddim.com>
|
||||
|
||||
* Source/NSIndexSet.m:
|
||||
|
|
|
@ -150,7 +150,7 @@ GS_EXPORT_CLASS
|
|||
offset: (NSInteger)offset
|
||||
template: (NSString*)templat;
|
||||
#if OS_API_VERSION(MAC_OS_X_VERSION_10_7, GS_API_LATEST)
|
||||
+ (NSString *)escapedPatternForString:(NSString *)string;
|
||||
+ (NSString *) escapedPatternForString: (NSString *)string;
|
||||
#endif
|
||||
#if GS_HAS_DECLARED_PROPERTIES
|
||||
@property (readonly) NSRegularExpressionOptions options;
|
||||
|
|
|
@ -527,7 +527,7 @@ GS_EXPORT_CLASS
|
|||
length: (NSUInteger)length;
|
||||
+ (instancetype) stringWithCString: (const char*)byteString;
|
||||
+ (instancetype) stringWithFormat: (NSString*)format, ... NS_FORMAT_FUNCTION(1,2);
|
||||
+ (instancetype) stringWithContentsOfFile:(NSString *)path;
|
||||
+ (instancetype) stringWithContentsOfFile: (NSString *)path;
|
||||
|
||||
// Initializing Newly Allocated Strings
|
||||
- (instancetype) init;
|
||||
|
|
|
@ -640,11 +640,12 @@ prepareResult(NSRegularExpression *regex,
|
|||
|
||||
{
|
||||
__block NSUInteger count = 0;
|
||||
GSRegexBlock block;
|
||||
|
||||
opts &= ~NSMatchingReportProgress;
|
||||
opts &= ~NSMatchingReportCompletion;
|
||||
|
||||
GSRegexBlock block =
|
||||
block =
|
||||
^(NSTextCheckingResult *result, NSMatchingFlags flags, BOOL *stop)
|
||||
{
|
||||
count++;
|
||||
|
@ -660,12 +661,13 @@ prepareResult(NSRegularExpression *regex,
|
|||
options: (NSMatchingOptions)opts
|
||||
range: (NSRange)range
|
||||
{
|
||||
__block NSTextCheckingResult *r = nil;
|
||||
__block NSTextCheckingResult *r = nil;
|
||||
GSRegexBlock block;
|
||||
|
||||
opts &= ~NSMatchingReportProgress;
|
||||
opts &= ~NSMatchingReportCompletion;
|
||||
|
||||
GSRegexBlock block =
|
||||
block =
|
||||
^(NSTextCheckingResult *result, NSMatchingFlags flags, BOOL *stop)
|
||||
{
|
||||
r = result;
|
||||
|
@ -683,11 +685,12 @@ prepareResult(NSRegularExpression *regex,
|
|||
range:(NSRange)range
|
||||
{
|
||||
NSMutableArray *array = [NSMutableArray array];
|
||||
GSRegexBlock block;
|
||||
|
||||
opts &= ~NSMatchingReportProgress;
|
||||
opts &= ~NSMatchingReportCompletion;
|
||||
|
||||
GSRegexBlock block =
|
||||
block =
|
||||
^(NSTextCheckingResult *result, NSMatchingFlags flags, BOOL *stop)
|
||||
{
|
||||
[array addObject: result];
|
||||
|
@ -703,12 +706,13 @@ prepareResult(NSRegularExpression *regex,
|
|||
options: (NSMatchingOptions)opts
|
||||
range: (NSRange)range
|
||||
{
|
||||
__block NSRange r = {NSNotFound, 0};
|
||||
__block NSRange r = {NSNotFound, 0};
|
||||
GSRegexBlock block;
|
||||
|
||||
opts &= ~NSMatchingReportProgress;
|
||||
opts &= ~NSMatchingReportCompletion;
|
||||
|
||||
GSRegexBlock block =
|
||||
block =
|
||||
^(NSTextCheckingResult *result, NSMatchingFlags flags, BOOL *stop)
|
||||
{
|
||||
r = [result range];
|
||||
|
@ -1064,9 +1068,11 @@ prepareResult(NSRegularExpression *regex,
|
|||
}
|
||||
#endif
|
||||
|
||||
+ (NSString *)escapedPatternForString:(NSString *)string {
|
||||
// https://unicode-org.github.io/icu/userguide/strings/regexp.html
|
||||
// Need to escape * ? + [ ( ) { } ^ $ | \ .
|
||||
+ (NSString*) escapedPatternForString: (NSString *)string
|
||||
{
|
||||
/* https://unicode-org.github.io/icu/userguide/strings/regexp.html
|
||||
* Need to escape * ? + [ ( ) { } ^ $ | \ .
|
||||
*/
|
||||
return [[NSRegularExpression
|
||||
regularExpressionWithPattern: @"([*?+\\[(){}^$|\\\\.])"
|
||||
options: 0
|
||||
|
|
|
@ -6294,29 +6294,47 @@ static NSFileManager *fm = nil;
|
|||
currentLocation = range.location;
|
||||
}
|
||||
|
||||
if (substringType == NSStringEnumerationByLines || substringType == NSStringEnumerationByParagraphs)
|
||||
if (substringType == NSStringEnumerationByLines
|
||||
|| substringType == NSStringEnumerationByParagraphs)
|
||||
{
|
||||
BOOL isLineSep = substringType == NSStringEnumerationByLines;
|
||||
|
||||
while (YES)
|
||||
{
|
||||
// contains the index of the first character of the line containing the beginning of aRange.
|
||||
NSUInteger start;
|
||||
// contains the index of the first character past the terminator of the line containing the end of aRange.
|
||||
NSUInteger end;
|
||||
// contains the index of the first character of the terminator of the line containing the end of aRange.
|
||||
NSUInteger contentsEnd;
|
||||
NSRange currentLocationRange = NSMakeRange(currentLocation, 0);
|
||||
/* Contains the index of the first character of the line
|
||||
* containing the beginning of aRange.
|
||||
*/
|
||||
NSUInteger start;
|
||||
|
||||
/* Contains the index of the first character past the
|
||||
* terminator of the line containing the end of aRange.
|
||||
*/
|
||||
NSUInteger end;
|
||||
|
||||
/* Contains the index of the first character of the terminator
|
||||
* of the line containing the end of aRange.
|
||||
*/
|
||||
NSUInteger contentsEnd;
|
||||
NSRange currentLocationRange = NSMakeRange(currentLocation, 0);
|
||||
NSUInteger substringStart;
|
||||
NSRange substringRange;
|
||||
|
||||
[self _getStart: &start
|
||||
end: &end
|
||||
contentsEnd: &contentsEnd
|
||||
forRange: currentLocationRange
|
||||
lineSep: isLineSep];
|
||||
// If the enumerated range starts after the line/paragraph, we start at the beginning of the enumerated range
|
||||
NSUInteger substringStart = start > range.location ? start : range.location;
|
||||
NSRange substringRange = NSMakeRange(substringStart, contentsEnd - substringStart);
|
||||
|
||||
/* If the enumerated range starts after the line/paragraph,
|
||||
* we start at the beginning of the enumerated range
|
||||
*/
|
||||
substringStart = start > range.location ? start : range.location;
|
||||
substringRange
|
||||
= NSMakeRange(substringStart, contentsEnd - substringStart);
|
||||
CALL_BLOCK(block,
|
||||
substringNotRequired ? nil : [self substringWithRange: substringRange],
|
||||
substringNotRequired
|
||||
? nil
|
||||
: [self substringWithRange: substringRange],
|
||||
substringRange,
|
||||
NSMakeRange(start, end - start),
|
||||
&stop);
|
||||
|
@ -6327,21 +6345,31 @@ static NSFileManager *fm = nil;
|
|||
}
|
||||
else if (substringType == NSStringEnumerationByComposedCharacterSequences)
|
||||
{
|
||||
// We could also use rangeOfComposedCharacterSequenceAtIndex:, but then we would need different logic.
|
||||
/* We could also use rangeOfComposedCharacterSequenceAtIndex:,
|
||||
* but then we would need different logic.
|
||||
*/
|
||||
while (YES)
|
||||
{
|
||||
// Since all characters are in a composed character sequence, enclosingRange == substringRange
|
||||
NSRange enclosingRange = [self rangeOfComposedCharacterSequenceAtIndex: currentLocation];
|
||||
NSRange enclosingRange;
|
||||
|
||||
/* Since all characters are in a composed character sequence,
|
||||
* enclosingRange == substringRange
|
||||
*/
|
||||
enclosingRange
|
||||
= [self rangeOfComposedCharacterSequenceAtIndex: currentLocation];
|
||||
CALL_BLOCK(block,
|
||||
substringNotRequired ? nil : [self substringWithRange: enclosingRange],
|
||||
substringNotRequired
|
||||
? nil
|
||||
: [self substringWithRange: enclosingRange],
|
||||
enclosingRange,
|
||||
enclosingRange,
|
||||
&stop);
|
||||
if(stop) break;
|
||||
if (stop) break;
|
||||
currentLocation = enclosingRange.location + enclosingRange.length;
|
||||
}
|
||||
}
|
||||
else if (substringType == NSStringEnumerationByWords || substringType == NSStringEnumerationBySentences)
|
||||
else if (substringType == NSStringEnumerationByWords
|
||||
|| substringType == NSStringEnumerationBySentences)
|
||||
{
|
||||
#if GS_USE_ICU
|
||||
// These macros may be useful elsewhere.
|
||||
|
@ -6355,63 +6383,75 @@ static NSFileManager *fm = nil;
|
|||
errorCode = U_ZERO_ERROR; \
|
||||
} while (NO)
|
||||
|
||||
BOOL byWords = substringType == NSStringEnumerationByWords;
|
||||
NSUInteger length = range.length;
|
||||
UChar characters[length];
|
||||
BOOL byWords = substringType == NSStringEnumerationByWords;
|
||||
NSUInteger length = range.length;
|
||||
UChar characters[length];
|
||||
UErrorCode errorCode = U_ZERO_ERROR;
|
||||
const char *locale;
|
||||
UBreakIterator *breakIterator;
|
||||
|
||||
[self getCharacters: characters range: range];
|
||||
UErrorCode errorCode = U_ZERO_ERROR;
|
||||
const char* locale = localized
|
||||
? [[[[NSLocale currentLocale]
|
||||
localeIdentifier]
|
||||
// @ss=standard will use lists of common abbreviations, such as Mr., Mrs., etc.
|
||||
stringByAppendingString: @"@ss=standard"]
|
||||
UTF8String]
|
||||
/* @ss=standard will use lists of common abbreviations,
|
||||
* such as Mr., Mrs., etc.
|
||||
*/
|
||||
locale = localized
|
||||
? [[[[NSLocale currentLocale] localeIdentifier]
|
||||
stringByAppendingString: @"@ss=standard"] UTF8String]
|
||||
: "en_US_POSIX";
|
||||
UBreakIterator* breakIterator = ubrk_open(byWords ? UBRK_WORD : UBRK_SENTENCE, // type
|
||||
locale, // locale
|
||||
characters, // text
|
||||
length, // textLength
|
||||
&errorCode);
|
||||
breakIterator = ubrk_open(
|
||||
byWords ? UBRK_WORD : UBRK_SENTENCE, // type
|
||||
locale, // locale
|
||||
characters, // text
|
||||
length, // textLength
|
||||
&errorCode);
|
||||
GS_U_HANDLE_ERROR(errorCode, @"opening ICU break iterator");
|
||||
ubrk_first(breakIterator);
|
||||
while (YES)
|
||||
{
|
||||
// Make sure it's a valid substring.
|
||||
BOOL isValidSubstring = YES;
|
||||
BOOL isValidSubstring = YES;
|
||||
int32_t nextPosition;
|
||||
NSUInteger nextLocation;
|
||||
NSRange enclosingRange;
|
||||
|
||||
if (byWords)
|
||||
{
|
||||
int32_t ruleStatus = ubrk_getRuleStatus(breakIterator);
|
||||
// From ICU User Guide:
|
||||
// A status value UBRK_WORD_NONE indicates that the boundary does
|
||||
// not start a word or number.
|
||||
// However, valid words seem to be UBRK_WORD_NONE, and invalid words
|
||||
// seem to be UBRK_WORD_NONE_LIMIT.
|
||||
/* From ICU User Guide:
|
||||
* A status value UBRK_WORD_NONE indicates that the boundary
|
||||
* does not start a word or number.
|
||||
* However, valid words seem to be UBRK_WORD_NONE, and invalid
|
||||
* words seem to be UBRK_WORD_NONE_LIMIT.
|
||||
*/
|
||||
isValidSubstring = ruleStatus != UBRK_WORD_NONE_LIMIT;
|
||||
NSLog(@"Status for position %d (%d): %d", (int)currentLocation, (int)ubrk_current(breakIterator), (int) ruleStatus);
|
||||
// NSLog(@"Status for position %d (%d): %d", (int)currentLocation, (int)ubrk_current(breakIterator), (int) ruleStatus);
|
||||
}
|
||||
|
||||
int32_t nextPosition = ubrk_next(breakIterator);
|
||||
nextPosition = ubrk_next(breakIterator);
|
||||
if (nextPosition == UBRK_DONE) break;
|
||||
|
||||
NSUInteger nextLocation = range.location + nextPosition;
|
||||
nextLocation = range.location + nextPosition;
|
||||
// Same as substringRange
|
||||
NSRange enclosingRange = NSMakeRange(currentLocation, nextLocation - currentLocation);
|
||||
enclosingRange
|
||||
= NSMakeRange(currentLocation, nextLocation - currentLocation);
|
||||
|
||||
if (isValidSubstring)
|
||||
{
|
||||
CALL_BLOCK(block,
|
||||
substringNotRequired ? nil : [self substringWithRange: enclosingRange],
|
||||
substringNotRequired
|
||||
? nil
|
||||
: [self substringWithRange: enclosingRange],
|
||||
enclosingRange,
|
||||
enclosingRange,
|
||||
&stop);
|
||||
if(stop) break;
|
||||
if (stop) break;
|
||||
}
|
||||
|
||||
currentLocation = nextLocation;
|
||||
}
|
||||
#else
|
||||
NSWarnLog(@"NSStringEnumerationByWords and NSStringEnumerationBySentences are not supported when GNUstep-base is compiled without ICU.");
|
||||
NSWarnLog(@"NSStringEnumerationByWords and NSStringEnumerationBySentences"
|
||||
@" are not supported when GNUstep-base is compiled without ICU.");
|
||||
return;
|
||||
#endif
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue