Merge pull request #370 from ethanc8/ethanc8-3

Implement +[NSRegularExpression escapedPatternForString:] and -[NSString enumerateSubstringsInRange:options:usingBlock]
This commit is contained in:
rfm 2024-05-12 09:32:34 +01:00 committed by GitHub
commit 1fdf6395bd
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 350 additions and 0 deletions

21
.gitignore vendored
View file

@ -84,3 +84,24 @@ DerivedData/
**/xcshareddata/WorkspaceSettings.xcsettings
# End of https://www.gitignore.io/api/xcode
.cache
compile_commands.json
**.kdev4
# Documentation
Documentation/Base*
Documentation/General
Documentation/manual
Documentation/ReleaseNotes
Documentation/ANNOUNCE
Documentation/*.pdf
Documentation/README
Documentation/version.texi
Documentation/*.aux
Documentation/*.toc
Documentation/INSTALL
Documentation/NEWS
**/dependencies
Source/Base.gsdoc
Source/BaseAdditions.gsdoc

View file

@ -149,6 +149,9 @@ GS_EXPORT_CLASS
inString: (NSString*)string
offset: (NSInteger)offset
template: (NSString*)templat;
#if OS_API_VERSION(MAC_OS_X_VERSION_10_7, GS_API_LATEST)
+ (NSString *)escapedPatternForString:(NSString *)string;
#endif
#if GS_HAS_DECLARED_PROPERTIES
@property (readonly) NSRegularExpressionOptions options;
@property (readonly) NSUInteger numberOfCaptureGroups;

View file

@ -426,6 +426,48 @@ enum {
typedef NSUInteger NSStringEncodingConversionOptions;
#endif
#if OS_API_VERSION(MAC_OS_X_VERSION_10_6,GS_API_LATEST)
/** For enumerateSubstringsInRange:options:usingBlock:
You must include an substring type (`NSStringEnumerationBy`), and may
bitwise or (`|`) with any of the other options. */
enum {
/* Must include one of these
Must fit into 8 bits. */
/** Enumerate by lines. Uses lineRangeForRange: */
NSStringEnumerationByLines = 0,
/** Enumerate by paragraph. Uses paragraphRangeForRange: */
NSStringEnumerationByParagraphs = 1,
/** Enumerate by composed character sequence. Uses rangeOfComposedCharacterSequencesForRange: */
NSStringEnumerationByComposedCharacterSequences = 2,
/** Enumerate by word, as specified in Unicode TR 29.
Only supported if GNUstep is compiled with ICU.
Uses UBRK_WORD, with current locale and standard abbreviation lists if
NSStringEnumerationLocalized is passed, otherwise the locale is "en_US_POSIX". */
NSStringEnumerationByWords = 3,
/** Enumerate by sentence, as specified in Unicode TR 29.
Only supported if GNUstep is compiled with ICU.
Uses UBRK_WORD, with current locale and standard abbreviation lists if
NSStringEnumerationLocalized is passed, otherwise the locale is "en_US_POSIX". */
NSStringEnumerationBySentences = 4,
#if OS_API_VERSION(MAC_OS_X_VERSION_11,GS_API_LATEST)
/** Undocumented public API on macOS. Not supported by GNUstep. */
NSStringEnumerationByCaretPositions = 5,
/** Undocumented public API on macOS. Not supported by GNUstep. */
NSStringEnumerationByDeletionClusters = 6,
#endif
/* May pass one of these via bitwise or.
Must be a single bit set at an offset >= 8. */
NSStringEnumerationReverse = 1UL << 8,
NSStringEnumerationSubstringNotRequired = 1UL << 9,
NSStringEnumerationLocalized = 1UL << 10
};
typedef NSUInteger NSStringEnumerationOptions;
DEFINE_BLOCK_TYPE(GSNSStringEnumerationBlock, void, NSString* substring, NSRange substringRange, NSRange enclosingRange, BOOL* stop);
#endif
/**
* <p>
* <code>NSString</code> objects represent an immutable string of Unicode 3.0
@ -1050,6 +1092,12 @@ GS_EXPORT_CLASS
+ (Class) constantStringClass;
#endif /* GS_API_NONE */
#if OS_API_VERSION(MAC_OS_X_VERSION_10_6,GS_API_LATEST)
- (void) enumerateSubstringsInRange: (NSRange)range
options: (NSStringEnumerationOptions)opts
usingBlock: (GSNSStringEnumerationBlock)block;
#endif
@end
GS_EXPORT_CLASS

View file

@ -1064,6 +1064,20 @@ prepareResult(NSRegularExpression *regex,
}
#endif
+ (NSString *)escapedPatternForString:(NSString *)string {
// https://unicode-org.github.io/icu/userguide/strings/regexp.html
// Need to escape * ? + [ ( ) { } ^ $ | \ .
return [[NSRegularExpression
regularExpressionWithPattern: @"([*?+\\[(){}^$|\\\\.])"
options: 0
error: NULL]
stringByReplacingMatchesInString: string
options: 0
range: NSMakeRange(0, [string length])
withTemplate: @"\\\\$1"
];
}
- (NSRegularExpressionOptions) options
{
return options;

View file

@ -112,6 +112,14 @@
# include <icu.h>
#endif
#import "Foundation/NSObjCRuntime.h"
#import "GNUstepBase/GSBlocks.h"
#if GS_USE_ICU
#include <unicode/ubrk.h>
#include <unicode/utypes.h>
#endif
/* Create local inline versions of key functions for case-insensitive operations
*/
#import "Additions/unicode/caseconv.h"
@ -6263,6 +6271,164 @@ static NSFileManager *fm = nil;
return [self rangeOfString: string].location != NSNotFound;
}
- (void) enumerateSubstringsInRange: (NSRange)range
options: (NSStringEnumerationOptions)opts
usingBlock: (GSNSStringEnumerationBlock)block
{
// Get low byte.
uint8_t substringType = opts & 0xFF;
BOOL isReverse = opts & NSStringEnumerationReverse;
BOOL substringNotRequired = opts & NSStringEnumerationSubstringNotRequired;
BOOL localized = opts & NSStringEnumerationLocalized;
NSUInteger currentLocation;
BOOL stop = NO;
if (isReverse)
{
currentLocation = range.location + range.length;
}
else
{
currentLocation = range.location;
}
if (substringType == NSStringEnumerationByLines || substringType == NSStringEnumerationByParagraphs)
{
BOOL isLineSep = substringType == NSStringEnumerationByLines;
while (YES)
{
// contains the index of the first character of the line containing the beginning of aRange.
NSUInteger start;
// contains the index of the first character past the terminator of the line containing the end of aRange.
NSUInteger end;
// contains the index of the first character of the terminator of the line containing the end of aRange.
NSUInteger contentsEnd;
NSRange currentLocationRange = NSMakeRange(currentLocation, 0);
[self _getStart: &start
end: &end
contentsEnd: &contentsEnd
forRange: currentLocationRange
lineSep: isLineSep];
// If the enumerated range starts after the line/paragraph, we start at the beginning of the enumerated range
NSUInteger substringStart = start > range.location ? start : range.location;
NSRange substringRange = NSMakeRange(substringStart, contentsEnd - substringStart);
CALL_BLOCK(block,
substringNotRequired ? nil : [self substringWithRange: substringRange],
substringRange,
NSMakeRange(start, end - start),
&stop);
if (stop) break;
if (end == range.location + range.length) break;
currentLocation = end;
}
}
else if (substringType == NSStringEnumerationByComposedCharacterSequences)
{
// We could also use rangeOfComposedCharacterSequenceAtIndex:, but then we would need different logic.
while (YES)
{
// Since all characters are in a composed character sequence, enclosingRange == substringRange
NSRange enclosingRange = [self rangeOfComposedCharacterSequenceAtIndex: currentLocation];
CALL_BLOCK(block,
substringNotRequired ? nil : [self substringWithRange: enclosingRange],
enclosingRange,
enclosingRange,
&stop);
if(stop) break;
currentLocation = enclosingRange.location + enclosingRange.length;
}
}
else if (substringType == NSStringEnumerationByWords || substringType == NSStringEnumerationBySentences)
{
#if GS_USE_ICU
// These macros may be useful elsewhere.
#define GS_U_HANDLE_ERROR(errorCode, description) do { \
if (U_FAILURE(errorCode)) { \
NSWarnMLog(@"Error " description ": %s", u_errorName(errorCode)); \
return; \
} else if (errorCode < U_ZERO_ERROR) { \
NSWarnMLog(@"Warning " description ": %s", u_errorName(errorCode)); \
} \
errorCode = U_ZERO_ERROR; \
} while (NO)
BOOL byWords = substringType == NSStringEnumerationByWords;
NSUInteger length = range.length;
UChar characters[length];
[self getCharacters: characters range: range];
UErrorCode errorCode = U_ZERO_ERROR;
const char* locale = localized
? [[[[NSLocale currentLocale]
localeIdentifier]
// @ss=standard will use lists of common abbreviations, such as Mr., Mrs., etc.
stringByAppendingString: @"@ss=standard"]
UTF8String]
: "en_US_POSIX";
UBreakIterator* breakIterator = ubrk_open(byWords ? UBRK_WORD : UBRK_SENTENCE, // type
locale, // locale
characters, // text
length, // textLength
&errorCode);
GS_U_HANDLE_ERROR(errorCode, @"opening ICU break iterator");
ubrk_first(breakIterator);
while (YES)
{
// Make sure it's a valid substring.
BOOL isValidSubstring = YES;
if (byWords)
{
int32_t ruleStatus = ubrk_getRuleStatus(breakIterator);
// From ICU User Guide:
// A status value UBRK_WORD_NONE indicates that the boundary does
// not start a word or number.
// However, valid words seem to be UBRK_WORD_NONE, and invalid words
// seem to be UBRK_WORD_NONE_LIMIT.
isValidSubstring = ruleStatus != UBRK_WORD_NONE_LIMIT;
NSLog(@"Status for position %d (%d): %d", (int)currentLocation, (int)ubrk_current(breakIterator), (int) ruleStatus);
}
int32_t nextPosition = ubrk_next(breakIterator);
if (nextPosition == UBRK_DONE) break;
NSUInteger nextLocation = range.location + nextPosition;
// Same as substringRange
NSRange enclosingRange = NSMakeRange(currentLocation, nextLocation - currentLocation);
if (isValidSubstring)
{
CALL_BLOCK(block,
substringNotRequired ? nil : [self substringWithRange: enclosingRange],
enclosingRange,
enclosingRange,
&stop);
if(stop) break;
}
currentLocation = nextLocation;
}
#else
NSWarnLog(@"NSStringEnumerationByWords and NSStringEnumerationBySentences are not supported when GNUstep-base is compiled without ICU.");
return;
#endif
}
else if (substringType == NSStringEnumerationByCaretPositions)
{
// FIXME - Not documented by Apple.
NSWarnLog(@"NSStringEnumerationByCaretPositions is not supported");
return;
}
else if (substringType == NSStringEnumerationByDeletionClusters)
{
// FIXME - Not documented by Apple.
NSWarnLog(@"NSStringEnumerationByDeletionClusters is not supported");
return;
}
}
@end
/**

View file

@ -0,0 +1,98 @@
#import "ObjectTesting.h"
#import <Foundation/NSAutoreleasePool.h>
#import <Foundation/NSString.h>
#if defined(__has_extension) && __has_extension(blocks)
int main (int argc, const char * argv[])
{
NSAutoreleasePool * pool = [[NSAutoreleasePool alloc] init];
START_SET("Enumerate substrings by lines");
NSString* s1 = @"Line 1\nLine 2";
__block NSUInteger currentIteration = 0;
[s1 enumerateSubstringsInRange:(NSRange){
.location = 0,
.length = [s1 length]
} options: NSStringEnumerationByLines
usingBlock: ^(NSString *substring, NSRange substringRange, NSRange enclosingRange, BOOL *stop) {
NSLog(@"Substring range: {.location=%ld, .length=%ld}", substringRange.location, substringRange.length);
NSLog(@"Enclosing range: {.location=%ld, .length=%ld}", enclosingRange.location, enclosingRange.length);
NSLog(@"Substring: %@", substring);
// *stop = YES;
if(currentIteration == 0) PASS([substring isEqual: @"Line 1"], "First line of \"Line 1\\nLine 2\" is \"Line 1\"");
if(currentIteration == 1) PASS([substring isEqual: @"Line 2"], "Second line of \"Line 1\\nLine 2\" is \"Line 2\"");
currentIteration++;
}];
PASS(currentIteration == 2, "There are only two lines in \"Line 1\\nLine 2\"");
END_SET("Enumerate substrings by lines");
START_SET("Enumerate substrings by paragraphs");
NSString* s1 = @"Paragraph 1\nParagraph 2";
__block NSUInteger currentIteration = 0;
[s1 enumerateSubstringsInRange:(NSRange){
.location = 0,
.length = [s1 length]
} options: NSStringEnumerationByParagraphs
usingBlock: ^(NSString *substring, NSRange substringRange, NSRange enclosingRange, BOOL *stop) {
NSLog(@"Substring range: {.location=%ld, .length=%ld}", substringRange.location, substringRange.length);
NSLog(@"Enclosing range: {.location=%ld, .length=%ld}", enclosingRange.location, enclosingRange.length);
NSLog(@"Substring: %@", substring);
// *stop = YES;
if(currentIteration == 0) PASS([substring isEqual: @"Paragraph 1"], "First paragraph of \"Paragraph 1\\nParagraph 2\" is \"Paragraph 1\"");
if(currentIteration == 1) PASS([substring isEqual: @"Paragraph 2"], "Second paragraph of \"Paragraph 1\\nParagraph 2\" is \"Paragraph 2\"");
currentIteration++;
}];
PASS(currentIteration == 2, "There are only two paragraphs in \"Paragraph 1\\nParagraph 2\"");
END_SET("Enumerate substrings by paragraphs");
START_SET("Enumerate substrings by words");
NSString* s1 = @"Word1 word2.";
__block NSUInteger currentIteration = 0;
[s1 enumerateSubstringsInRange:(NSRange){
.location = 0,
.length = [s1 length]
} options: NSStringEnumerationByWords
usingBlock: ^(NSString *substring, NSRange substringRange, NSRange enclosingRange, BOOL *stop) {
NSLog(@"Substring range: {.location=%ld, .length=%ld}", substringRange.location, substringRange.length);
NSLog(@"Enclosing range: {.location=%ld, .length=%ld}", enclosingRange.location, enclosingRange.length);
NSLog(@"Substring: %@", substring);
// *stop = YES;
if(currentIteration == 0) PASS([substring isEqual: @"Word1"], "First word of \"Word1 word2.\" is \"Word1\"");
if(currentIteration == 1) PASS([substring isEqual: @"word2"], "Second word of \"Word1 word2.\" is \"word2\"");
currentIteration++;
}];
PASS(currentIteration == 2, "There are only two words in \"Word1 word2.\"");
END_SET("Enumerate substrings by words");
START_SET("Enumerate substrings by sentences");
NSString* s1 = @"Sentence 1. Sentence 2.";
__block NSUInteger currentIteration = 0;
[s1 enumerateSubstringsInRange:(NSRange){
.location = 0,
.length = [s1 length]
} options: NSStringEnumerationBySentences
usingBlock: ^(NSString *substring, NSRange substringRange, NSRange enclosingRange, BOOL *stop) {
NSLog(@"Substring range: {.location=%ld, .length=%ld}", substringRange.location, substringRange.length);
NSLog(@"Enclosing range: {.location=%ld, .length=%ld}", enclosingRange.location, enclosingRange.length);
NSLog(@"Substring: %@", substring);
// *stop = YES;
if(currentIteration == 0) PASS([substring isEqual: @"Sentence 1. "], "First sentence of \"Sentence 1. Sentence 2.\" is \"Sentence 1. \"");
if(currentIteration == 1) PASS([substring isEqual: @"Sentence 2."], "Second sentence of \"Sentence 1. Sentence 2.\" is \"Sentence 2.\"");
currentIteration++;
}];
PASS(currentIteration == 2, "There are only two sentences in \"Sentence 1. Sentence 2.");
END_SET("Enumerate substrings by sentences");
[pool drain];
return 0;
}
#else
int main (int argc, const char * argv[])
{
return 0;
}
#endif