mirror of
https://github.com/gnustep/libs-base.git
synced 2025-04-22 16:33:29 +00:00
Merge pull request #370 from ethanc8/ethanc8-3
Implement +[NSRegularExpression escapedPatternForString:] and -[NSString enumerateSubstringsInRange:options:usingBlock]
This commit is contained in:
commit
1fdf6395bd
6 changed files with 350 additions and 0 deletions
21
.gitignore
vendored
21
.gitignore
vendored
|
@ -84,3 +84,24 @@ DerivedData/
|
|||
**/xcshareddata/WorkspaceSettings.xcsettings
|
||||
|
||||
# End of https://www.gitignore.io/api/xcode
|
||||
|
||||
.cache
|
||||
compile_commands.json
|
||||
**.kdev4
|
||||
|
||||
# Documentation
|
||||
Documentation/Base*
|
||||
Documentation/General
|
||||
Documentation/manual
|
||||
Documentation/ReleaseNotes
|
||||
Documentation/ANNOUNCE
|
||||
Documentation/*.pdf
|
||||
Documentation/README
|
||||
Documentation/version.texi
|
||||
Documentation/*.aux
|
||||
Documentation/*.toc
|
||||
Documentation/INSTALL
|
||||
Documentation/NEWS
|
||||
**/dependencies
|
||||
Source/Base.gsdoc
|
||||
Source/BaseAdditions.gsdoc
|
|
@ -149,6 +149,9 @@ GS_EXPORT_CLASS
|
|||
inString: (NSString*)string
|
||||
offset: (NSInteger)offset
|
||||
template: (NSString*)templat;
|
||||
#if OS_API_VERSION(MAC_OS_X_VERSION_10_7, GS_API_LATEST)
|
||||
+ (NSString *)escapedPatternForString:(NSString *)string;
|
||||
#endif
|
||||
#if GS_HAS_DECLARED_PROPERTIES
|
||||
@property (readonly) NSRegularExpressionOptions options;
|
||||
@property (readonly) NSUInteger numberOfCaptureGroups;
|
||||
|
|
|
@ -426,6 +426,48 @@ enum {
|
|||
typedef NSUInteger NSStringEncodingConversionOptions;
|
||||
#endif
|
||||
|
||||
#if OS_API_VERSION(MAC_OS_X_VERSION_10_6,GS_API_LATEST)
|
||||
/** For enumerateSubstringsInRange:options:usingBlock:
|
||||
You must include an substring type (`NSStringEnumerationBy`), and may
|
||||
bitwise or (`|`) with any of the other options. */
|
||||
enum {
|
||||
/* Must include one of these
|
||||
Must fit into 8 bits. */
|
||||
/** Enumerate by lines. Uses lineRangeForRange: */
|
||||
NSStringEnumerationByLines = 0,
|
||||
/** Enumerate by paragraph. Uses paragraphRangeForRange: */
|
||||
NSStringEnumerationByParagraphs = 1,
|
||||
/** Enumerate by composed character sequence. Uses rangeOfComposedCharacterSequencesForRange: */
|
||||
NSStringEnumerationByComposedCharacterSequences = 2,
|
||||
/** Enumerate by word, as specified in Unicode TR 29.
|
||||
Only supported if GNUstep is compiled with ICU.
|
||||
Uses UBRK_WORD, with current locale and standard abbreviation lists if
|
||||
NSStringEnumerationLocalized is passed, otherwise the locale is "en_US_POSIX". */
|
||||
NSStringEnumerationByWords = 3,
|
||||
/** Enumerate by sentence, as specified in Unicode TR 29.
|
||||
Only supported if GNUstep is compiled with ICU.
|
||||
Uses UBRK_WORD, with current locale and standard abbreviation lists if
|
||||
NSStringEnumerationLocalized is passed, otherwise the locale is "en_US_POSIX". */
|
||||
NSStringEnumerationBySentences = 4,
|
||||
#if OS_API_VERSION(MAC_OS_X_VERSION_11,GS_API_LATEST)
|
||||
/** Undocumented public API on macOS. Not supported by GNUstep. */
|
||||
NSStringEnumerationByCaretPositions = 5,
|
||||
/** Undocumented public API on macOS. Not supported by GNUstep. */
|
||||
NSStringEnumerationByDeletionClusters = 6,
|
||||
#endif
|
||||
|
||||
/* May pass one of these via bitwise or.
|
||||
Must be a single bit set at an offset >= 8. */
|
||||
NSStringEnumerationReverse = 1UL << 8,
|
||||
NSStringEnumerationSubstringNotRequired = 1UL << 9,
|
||||
NSStringEnumerationLocalized = 1UL << 10
|
||||
};
|
||||
|
||||
typedef NSUInteger NSStringEnumerationOptions;
|
||||
|
||||
DEFINE_BLOCK_TYPE(GSNSStringEnumerationBlock, void, NSString* substring, NSRange substringRange, NSRange enclosingRange, BOOL* stop);
|
||||
#endif
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* <code>NSString</code> objects represent an immutable string of Unicode 3.0
|
||||
|
@ -1050,6 +1092,12 @@ GS_EXPORT_CLASS
|
|||
+ (Class) constantStringClass;
|
||||
#endif /* GS_API_NONE */
|
||||
|
||||
#if OS_API_VERSION(MAC_OS_X_VERSION_10_6,GS_API_LATEST)
|
||||
- (void) enumerateSubstringsInRange: (NSRange)range
|
||||
options: (NSStringEnumerationOptions)opts
|
||||
usingBlock: (GSNSStringEnumerationBlock)block;
|
||||
#endif
|
||||
|
||||
@end
|
||||
|
||||
GS_EXPORT_CLASS
|
||||
|
|
|
@ -1064,6 +1064,20 @@ prepareResult(NSRegularExpression *regex,
|
|||
}
|
||||
#endif
|
||||
|
||||
+ (NSString *)escapedPatternForString:(NSString *)string {
|
||||
// https://unicode-org.github.io/icu/userguide/strings/regexp.html
|
||||
// Need to escape * ? + [ ( ) { } ^ $ | \ .
|
||||
return [[NSRegularExpression
|
||||
regularExpressionWithPattern: @"([*?+\\[(){}^$|\\\\.])"
|
||||
options: 0
|
||||
error: NULL]
|
||||
stringByReplacingMatchesInString: string
|
||||
options: 0
|
||||
range: NSMakeRange(0, [string length])
|
||||
withTemplate: @"\\\\$1"
|
||||
];
|
||||
}
|
||||
|
||||
- (NSRegularExpressionOptions) options
|
||||
{
|
||||
return options;
|
||||
|
|
|
@ -112,6 +112,14 @@
|
|||
# include <icu.h>
|
||||
#endif
|
||||
|
||||
#import "Foundation/NSObjCRuntime.h"
|
||||
#import "GNUstepBase/GSBlocks.h"
|
||||
#if GS_USE_ICU
|
||||
#include <unicode/ubrk.h>
|
||||
#include <unicode/utypes.h>
|
||||
#endif
|
||||
|
||||
|
||||
/* Create local inline versions of key functions for case-insensitive operations
|
||||
*/
|
||||
#import "Additions/unicode/caseconv.h"
|
||||
|
@ -6263,6 +6271,164 @@ static NSFileManager *fm = nil;
|
|||
return [self rangeOfString: string].location != NSNotFound;
|
||||
}
|
||||
|
||||
- (void) enumerateSubstringsInRange: (NSRange)range
|
||||
options: (NSStringEnumerationOptions)opts
|
||||
usingBlock: (GSNSStringEnumerationBlock)block
|
||||
{
|
||||
// Get low byte.
|
||||
uint8_t substringType = opts & 0xFF;
|
||||
|
||||
BOOL isReverse = opts & NSStringEnumerationReverse;
|
||||
BOOL substringNotRequired = opts & NSStringEnumerationSubstringNotRequired;
|
||||
BOOL localized = opts & NSStringEnumerationLocalized;
|
||||
|
||||
NSUInteger currentLocation;
|
||||
BOOL stop = NO;
|
||||
|
||||
if (isReverse)
|
||||
{
|
||||
currentLocation = range.location + range.length;
|
||||
}
|
||||
else
|
||||
{
|
||||
currentLocation = range.location;
|
||||
}
|
||||
|
||||
if (substringType == NSStringEnumerationByLines || substringType == NSStringEnumerationByParagraphs)
|
||||
{
|
||||
BOOL isLineSep = substringType == NSStringEnumerationByLines;
|
||||
|
||||
while (YES)
|
||||
{
|
||||
// contains the index of the first character of the line containing the beginning of aRange.
|
||||
NSUInteger start;
|
||||
// contains the index of the first character past the terminator of the line containing the end of aRange.
|
||||
NSUInteger end;
|
||||
// contains the index of the first character of the terminator of the line containing the end of aRange.
|
||||
NSUInteger contentsEnd;
|
||||
NSRange currentLocationRange = NSMakeRange(currentLocation, 0);
|
||||
[self _getStart: &start
|
||||
end: &end
|
||||
contentsEnd: &contentsEnd
|
||||
forRange: currentLocationRange
|
||||
lineSep: isLineSep];
|
||||
// If the enumerated range starts after the line/paragraph, we start at the beginning of the enumerated range
|
||||
NSUInteger substringStart = start > range.location ? start : range.location;
|
||||
NSRange substringRange = NSMakeRange(substringStart, contentsEnd - substringStart);
|
||||
CALL_BLOCK(block,
|
||||
substringNotRequired ? nil : [self substringWithRange: substringRange],
|
||||
substringRange,
|
||||
NSMakeRange(start, end - start),
|
||||
&stop);
|
||||
if (stop) break;
|
||||
if (end == range.location + range.length) break;
|
||||
currentLocation = end;
|
||||
}
|
||||
}
|
||||
else if (substringType == NSStringEnumerationByComposedCharacterSequences)
|
||||
{
|
||||
// We could also use rangeOfComposedCharacterSequenceAtIndex:, but then we would need different logic.
|
||||
while (YES)
|
||||
{
|
||||
// Since all characters are in a composed character sequence, enclosingRange == substringRange
|
||||
NSRange enclosingRange = [self rangeOfComposedCharacterSequenceAtIndex: currentLocation];
|
||||
CALL_BLOCK(block,
|
||||
substringNotRequired ? nil : [self substringWithRange: enclosingRange],
|
||||
enclosingRange,
|
||||
enclosingRange,
|
||||
&stop);
|
||||
if(stop) break;
|
||||
currentLocation = enclosingRange.location + enclosingRange.length;
|
||||
}
|
||||
}
|
||||
else if (substringType == NSStringEnumerationByWords || substringType == NSStringEnumerationBySentences)
|
||||
{
|
||||
#if GS_USE_ICU
|
||||
// These macros may be useful elsewhere.
|
||||
#define GS_U_HANDLE_ERROR(errorCode, description) do { \
|
||||
if (U_FAILURE(errorCode)) { \
|
||||
NSWarnMLog(@"Error " description ": %s", u_errorName(errorCode)); \
|
||||
return; \
|
||||
} else if (errorCode < U_ZERO_ERROR) { \
|
||||
NSWarnMLog(@"Warning " description ": %s", u_errorName(errorCode)); \
|
||||
} \
|
||||
errorCode = U_ZERO_ERROR; \
|
||||
} while (NO)
|
||||
|
||||
BOOL byWords = substringType == NSStringEnumerationByWords;
|
||||
NSUInteger length = range.length;
|
||||
UChar characters[length];
|
||||
[self getCharacters: characters range: range];
|
||||
UErrorCode errorCode = U_ZERO_ERROR;
|
||||
const char* locale = localized
|
||||
? [[[[NSLocale currentLocale]
|
||||
localeIdentifier]
|
||||
// @ss=standard will use lists of common abbreviations, such as Mr., Mrs., etc.
|
||||
stringByAppendingString: @"@ss=standard"]
|
||||
UTF8String]
|
||||
: "en_US_POSIX";
|
||||
UBreakIterator* breakIterator = ubrk_open(byWords ? UBRK_WORD : UBRK_SENTENCE, // type
|
||||
locale, // locale
|
||||
characters, // text
|
||||
length, // textLength
|
||||
&errorCode);
|
||||
GS_U_HANDLE_ERROR(errorCode, @"opening ICU break iterator");
|
||||
ubrk_first(breakIterator);
|
||||
while (YES)
|
||||
{
|
||||
// Make sure it's a valid substring.
|
||||
BOOL isValidSubstring = YES;
|
||||
|
||||
if (byWords)
|
||||
{
|
||||
int32_t ruleStatus = ubrk_getRuleStatus(breakIterator);
|
||||
// From ICU User Guide:
|
||||
// A status value UBRK_WORD_NONE indicates that the boundary does
|
||||
// not start a word or number.
|
||||
// However, valid words seem to be UBRK_WORD_NONE, and invalid words
|
||||
// seem to be UBRK_WORD_NONE_LIMIT.
|
||||
isValidSubstring = ruleStatus != UBRK_WORD_NONE_LIMIT;
|
||||
NSLog(@"Status for position %d (%d): %d", (int)currentLocation, (int)ubrk_current(breakIterator), (int) ruleStatus);
|
||||
}
|
||||
|
||||
int32_t nextPosition = ubrk_next(breakIterator);
|
||||
if (nextPosition == UBRK_DONE) break;
|
||||
|
||||
NSUInteger nextLocation = range.location + nextPosition;
|
||||
// Same as substringRange
|
||||
NSRange enclosingRange = NSMakeRange(currentLocation, nextLocation - currentLocation);
|
||||
|
||||
if (isValidSubstring)
|
||||
{
|
||||
CALL_BLOCK(block,
|
||||
substringNotRequired ? nil : [self substringWithRange: enclosingRange],
|
||||
enclosingRange,
|
||||
enclosingRange,
|
||||
&stop);
|
||||
if(stop) break;
|
||||
}
|
||||
|
||||
currentLocation = nextLocation;
|
||||
}
|
||||
#else
|
||||
NSWarnLog(@"NSStringEnumerationByWords and NSStringEnumerationBySentences are not supported when GNUstep-base is compiled without ICU.");
|
||||
return;
|
||||
#endif
|
||||
}
|
||||
else if (substringType == NSStringEnumerationByCaretPositions)
|
||||
{
|
||||
// FIXME - Not documented by Apple.
|
||||
NSWarnLog(@"NSStringEnumerationByCaretPositions is not supported");
|
||||
return;
|
||||
}
|
||||
else if (substringType == NSStringEnumerationByDeletionClusters)
|
||||
{
|
||||
// FIXME - Not documented by Apple.
|
||||
NSWarnLog(@"NSStringEnumerationByDeletionClusters is not supported");
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
@end
|
||||
|
||||
/**
|
||||
|
|
98
Tests/base/NSString/enumerateSubstringsInRange.m
Normal file
98
Tests/base/NSString/enumerateSubstringsInRange.m
Normal file
|
@ -0,0 +1,98 @@
|
|||
#import "ObjectTesting.h"
|
||||
#import <Foundation/NSAutoreleasePool.h>
|
||||
#import <Foundation/NSString.h>
|
||||
|
||||
#if defined(__has_extension) && __has_extension(blocks)
|
||||
int main (int argc, const char * argv[])
|
||||
{
|
||||
NSAutoreleasePool * pool = [[NSAutoreleasePool alloc] init];
|
||||
START_SET("Enumerate substrings by lines");
|
||||
|
||||
NSString* s1 = @"Line 1\nLine 2";
|
||||
__block NSUInteger currentIteration = 0;
|
||||
[s1 enumerateSubstringsInRange:(NSRange){
|
||||
.location = 0,
|
||||
.length = [s1 length]
|
||||
} options: NSStringEnumerationByLines
|
||||
usingBlock: ^(NSString *substring, NSRange substringRange, NSRange enclosingRange, BOOL *stop) {
|
||||
NSLog(@"Substring range: {.location=%ld, .length=%ld}", substringRange.location, substringRange.length);
|
||||
NSLog(@"Enclosing range: {.location=%ld, .length=%ld}", enclosingRange.location, enclosingRange.length);
|
||||
NSLog(@"Substring: %@", substring);
|
||||
// *stop = YES;
|
||||
if(currentIteration == 0) PASS([substring isEqual: @"Line 1"], "First line of \"Line 1\\nLine 2\" is \"Line 1\"");
|
||||
if(currentIteration == 1) PASS([substring isEqual: @"Line 2"], "Second line of \"Line 1\\nLine 2\" is \"Line 2\"");
|
||||
currentIteration++;
|
||||
}];
|
||||
PASS(currentIteration == 2, "There are only two lines in \"Line 1\\nLine 2\"");
|
||||
END_SET("Enumerate substrings by lines");
|
||||
|
||||
START_SET("Enumerate substrings by paragraphs");
|
||||
|
||||
NSString* s1 = @"Paragraph 1\nParagraph 2";
|
||||
__block NSUInteger currentIteration = 0;
|
||||
[s1 enumerateSubstringsInRange:(NSRange){
|
||||
.location = 0,
|
||||
.length = [s1 length]
|
||||
} options: NSStringEnumerationByParagraphs
|
||||
usingBlock: ^(NSString *substring, NSRange substringRange, NSRange enclosingRange, BOOL *stop) {
|
||||
NSLog(@"Substring range: {.location=%ld, .length=%ld}", substringRange.location, substringRange.length);
|
||||
NSLog(@"Enclosing range: {.location=%ld, .length=%ld}", enclosingRange.location, enclosingRange.length);
|
||||
NSLog(@"Substring: %@", substring);
|
||||
// *stop = YES;
|
||||
if(currentIteration == 0) PASS([substring isEqual: @"Paragraph 1"], "First paragraph of \"Paragraph 1\\nParagraph 2\" is \"Paragraph 1\"");
|
||||
if(currentIteration == 1) PASS([substring isEqual: @"Paragraph 2"], "Second paragraph of \"Paragraph 1\\nParagraph 2\" is \"Paragraph 2\"");
|
||||
currentIteration++;
|
||||
}];
|
||||
PASS(currentIteration == 2, "There are only two paragraphs in \"Paragraph 1\\nParagraph 2\"");
|
||||
END_SET("Enumerate substrings by paragraphs");
|
||||
|
||||
START_SET("Enumerate substrings by words");
|
||||
|
||||
NSString* s1 = @"Word1 word2.";
|
||||
__block NSUInteger currentIteration = 0;
|
||||
[s1 enumerateSubstringsInRange:(NSRange){
|
||||
.location = 0,
|
||||
.length = [s1 length]
|
||||
} options: NSStringEnumerationByWords
|
||||
usingBlock: ^(NSString *substring, NSRange substringRange, NSRange enclosingRange, BOOL *stop) {
|
||||
NSLog(@"Substring range: {.location=%ld, .length=%ld}", substringRange.location, substringRange.length);
|
||||
NSLog(@"Enclosing range: {.location=%ld, .length=%ld}", enclosingRange.location, enclosingRange.length);
|
||||
NSLog(@"Substring: %@", substring);
|
||||
// *stop = YES;
|
||||
if(currentIteration == 0) PASS([substring isEqual: @"Word1"], "First word of \"Word1 word2.\" is \"Word1\"");
|
||||
if(currentIteration == 1) PASS([substring isEqual: @"word2"], "Second word of \"Word1 word2.\" is \"word2\"");
|
||||
currentIteration++;
|
||||
}];
|
||||
PASS(currentIteration == 2, "There are only two words in \"Word1 word2.\"");
|
||||
END_SET("Enumerate substrings by words");
|
||||
|
||||
START_SET("Enumerate substrings by sentences");
|
||||
|
||||
NSString* s1 = @"Sentence 1. Sentence 2.";
|
||||
__block NSUInteger currentIteration = 0;
|
||||
[s1 enumerateSubstringsInRange:(NSRange){
|
||||
.location = 0,
|
||||
.length = [s1 length]
|
||||
} options: NSStringEnumerationBySentences
|
||||
usingBlock: ^(NSString *substring, NSRange substringRange, NSRange enclosingRange, BOOL *stop) {
|
||||
NSLog(@"Substring range: {.location=%ld, .length=%ld}", substringRange.location, substringRange.length);
|
||||
NSLog(@"Enclosing range: {.location=%ld, .length=%ld}", enclosingRange.location, enclosingRange.length);
|
||||
NSLog(@"Substring: %@", substring);
|
||||
// *stop = YES;
|
||||
if(currentIteration == 0) PASS([substring isEqual: @"Sentence 1. "], "First sentence of \"Sentence 1. Sentence 2.\" is \"Sentence 1. \"");
|
||||
if(currentIteration == 1) PASS([substring isEqual: @"Sentence 2."], "Second sentence of \"Sentence 1. Sentence 2.\" is \"Sentence 2.\"");
|
||||
currentIteration++;
|
||||
}];
|
||||
PASS(currentIteration == 2, "There are only two sentences in \"Sentence 1. Sentence 2.");
|
||||
END_SET("Enumerate substrings by sentences");
|
||||
|
||||
[pool drain];
|
||||
|
||||
return 0;
|
||||
}
|
||||
#else
|
||||
int main (int argc, const char * argv[])
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
Loading…
Reference in a new issue