Implement -[NSString enumerateSubstringsInRange:options:usingBlock]

This commit is contained in:
ethanc8 2023-02-12 11:00:27 -06:00 committed by ethanc8
parent 7862434d85
commit b8239f604f
3 changed files with 304 additions and 0 deletions

View file

@ -112,6 +112,14 @@
# include <icu.h>
#endif
#import "Foundation/NSObjCRuntime.h"
#import "GNUstepBase/GSBlocks.h"
#if GS_USE_ICU
#include <unicode/ubrk.h>
#include <unicode/utypes.h>
#endif
/* Create local inline versions of key functions for case-insensitive operations
*/
#import "Additions/unicode/caseconv.h"
@ -6248,6 +6256,156 @@ static NSFileManager *fm = nil;
return [self rangeOfString: string].location != NSNotFound;
}
- (void) enumerateSubstringsInRange: (NSRange)range
options: (NSStringEnumerationOptions)opts
usingBlock: (GSNSStringEnumerationBlock)block
{
// Get low byte.
uint8_t substringType = opts & 0xFF;
BOOL isReverse = opts & NSStringEnumerationReverse;
BOOL substringNotRequired = opts & NSStringEnumerationSubstringNotRequired;
BOOL localized = opts & NSStringEnumerationLocalized;
NSUInteger currentLocation;
BOOL stop = NO;
if(isReverse) {
currentLocation = range.location + range.length;
} else {
currentLocation = range.location;
}
if (substringType == NSStringEnumerationByLines || substringType == NSStringEnumerationByParagraphs) {
BOOL isLineSep = substringType == NSStringEnumerationByLines;
while(YES) {
// contains the index of the first character of the line containing the beginning of aRange.
NSUInteger start;
// contains the index of the first character past the terminator of the line containing the end of aRange.
NSUInteger end;
// contains the index of the first character of the terminator of the line containing the end of aRange.
NSUInteger contentsEnd;
NSRange currentLocationRange = (NSRange){
.location = currentLocation,
.length = 0,
};
[self _getStart: &start
end: &end
contentsEnd: &contentsEnd
forRange: currentLocationRange
lineSep: isLineSep];
// If the enumerated range starts after the line/paragraph, we start at the beginning of the enumerated range
NSUInteger substringStart = start > range.location ? start : range.location;
NSRange substringRange = (NSRange){
.location = substringStart,
.length = contentsEnd - substringStart
};
CALL_BLOCK(block,
substringNotRequired ? nil : [self substringWithRange: substringRange],
substringRange,
(NSRange){
.location = start,
.length = end - start
},
&stop);
if(stop) break;
if(end == range.location + range.length) break;
currentLocation = end;
}
} else if (substringType == NSStringEnumerationByComposedCharacterSequences) {
// We could also use rangeOfComposedCharacterSequenceAtIndex:, but then we would need different logic.
while(YES) {
// Since all characters are in a composed character sequence, enclosingRange == substringRange
NSRange enclosingRange = [self rangeOfComposedCharacterSequenceAtIndex: currentLocation];
CALL_BLOCK(block,
substringNotRequired ? nil : [self substringWithRange: enclosingRange],
enclosingRange,
enclosingRange,
&stop);
if(stop) break;
currentLocation = enclosingRange.location + enclosingRange.length;
}
} else if (substringType == NSStringEnumerationByWords || substringType == NSStringEnumerationBySentences) {
#if GS_USE_ICU
// These macros may be useful elsewhere.
#define GS_U_HANDLE_ERROR(errorCode, description) do { \
if(U_FAILURE(errorCode)) { \
NSWarnMLog(@"Error " description ": %s", u_errorName(errorCode)); \
return; \
} else if(errorCode < U_ZERO_ERROR) { \
NSWarnMLog(@"Warning " description ": %s", u_errorName(errorCode)); \
} \
errorCode = U_ZERO_ERROR; \
} while (NO)
BOOL byWords = substringType == NSStringEnumerationByWords;
NSUInteger length = range.length;
UChar characters[length];
[self getCharacters: characters range: range];
UErrorCode errorCode = U_ZERO_ERROR;
const char* locale = localized
? [[[[NSLocale currentLocale]
localeIdentifier]
// @ss=standard will use lists of common abbreviations, such as Mr., Mrs., etc.
stringByAppendingString: @"@ss=standard"]
UTF8String]
: "en_US_POSIX";
UBreakIterator* breakIterator = ubrk_open(byWords ? UBRK_WORD : UBRK_SENTENCE, // type
locale, // locale
characters, // text
length, // textLength
&errorCode);
GS_U_HANDLE_ERROR(errorCode, @"opening ICU break iterator");
ubrk_first(breakIterator);
while(YES) {
// Make sure it's a valid substring.
BOOL isValidSubstring = YES;
if(byWords) {
int32_t ruleStatus = ubrk_getRuleStatus(breakIterator);
// From ICU User Guide:
// A status value UBRK_WORD_NONE indicates that the boundary does
// not start a word or number.
// However, valid words seem to be UBRK_WORD_NONE, and invalid words
// seem to be UBRK_WORD_NONE_LIMIT.
isValidSubstring = ruleStatus != UBRK_WORD_NONE_LIMIT;
NSLog(@"Status for position %d (%d): %d", (int)currentLocation, (int)ubrk_current(breakIterator), (int) ruleStatus);
}
int32_t nextPosition = ubrk_next(breakIterator);
if(nextPosition == UBRK_DONE) break;
NSUInteger nextLocation = range.location + nextPosition;
// Same as substringRange
NSRange enclosingRange = (NSRange){
.location = currentLocation,
.length = nextLocation - currentLocation
};
if(isValidSubstring) {
CALL_BLOCK(block,
substringNotRequired ? nil : [self substringWithRange: enclosingRange],
enclosingRange,
enclosingRange,
&stop);
if(stop) break;
}
currentLocation = nextLocation;
}
#else
NSWarnLog(@"NSStringEnumerationByWords and NSStringEnumerationBySentences are not supported when GNUstep-base is compiled without ICU.");
return;
#endif
} else if (substringType == NSStringEnumerationByCaretPositions) {
// FIXME - Not documented by Apple.
NSWarnLog(@"NSStringEnumerationByCaretPositions is not supported");
return;
} else if (substringType == NSStringEnumerationByDeletionClusters) {
// FIXME - Not documented by Apple.
NSWarnLog(@"NSStringEnumerationByDeletionClusters is not supported");
return;
}
}
@end
/**