From e22ff194a44ca7fcb1425c0285b3a9c39639f053 Mon Sep 17 00:00:00 2001 From: mccallum Date: Sat, 3 May 1997 18:05:21 +0000 Subject: [PATCH] Changes from Stevo Crvenkovski. See ChangeLog Mar 23 22:14:21 git-svn-id: svn+ssh://svn.gna.org/svn/gnustep/libs/base/trunk@2302 72102866-910b-0410-8b05-ffd578937521 --- ChangeLog | 52 ++ Headers/gnustep/base/NSString.h | 5 + Source/Makefile.in | 6 + Source/NSGCString.m | 22 +- Source/NSString.m | 1120 ++++++++++++++++++++++--------- 5 files changed, 875 insertions(+), 330 deletions(-) diff --git a/ChangeLog b/ChangeLog index 7e33697f3..4246c6818 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,8 @@ Sat May 3 12:28:48 1997 Andrew McCallum + * examples/Makefile.in (RCS_FILES): Added custom-zone.m. + * examples/custom-zone.m: New file. + * src/NSBundle.m ([NSBundle +pathForResource:ofType:inDirectory:]): Change it from an instance method to a class method, in order to conform to spec. @@ -15,6 +18,55 @@ Sat May 3 12:28:48 1997 Andrew McCallum Use NSBundle's pathForResource:ofType:inDirectory method properly. (Reported by Stevo Crvenkovski .) +Sun Mar 23 22:14:21 1997 Stevo Crvenkovski + + * src/NSGString.m: New file Initial implementation of Unicode + * src/include/NSGString.h : New file Interface for NSGString + * src/NSGSequence.m: New file - class that deals with Unicode + composite character sequences + * src/include/NSGSequence.h : New file Interface for NSGSequence + * src/Unicode.m: New file - functions for Unicode + * src/include/Unicode.h : New file + + * src/Makefile.in: Added NSGString.m, NSGString.h, NSGSequence.m, + NSGSequence.h, Unicode.m, Unicode.h + + * src/NSGCString.m ([NSGCString -length]): Method implemented. + ([NSGCString -characterAtIndex]): Changed to work + with encodings + ([NSGCString -_baseLength]): private method for + composite character sequences. + ([NSGCString -initWithString:]): CString implementation + moved here from NSString.m + + * src/NSString.m : Changed NSString_concrete_class to NSGString + and NSMutableString_concrete_class to NSGMutableCString + ([NSString +stringWithCharacters]): Method implemented. + ([NSString -initWithCharacters:length:]): Method implemented. + ([NSString -stringByAppendingString]): changed for Unicode. + ([NSString -rangeOfString]): split method into 8 private _search* + methods and implemented full Unicode + ([NSString -compare:options:range:]): changed for Unicode. + ([NSString -isEqualToString]): changed for Unicode + ([NSString -description]): changed. + ([NSString -commonPrefixWithString]): changed for Unicode + ([NSString -capitalizedString]): changed for Unicode + ([NSString -lowercaseString]): changed for Unicode + ([NSString -uppercaseString]): changed for Unicode + ([NSString -getCString]): changed for Unicode + ([NSString -defaultCStringEncoding]): method implemented + ([NSString -_baseLength]): private method for composite + sequences. + ([NSString -stringWithFormat:]): null terminate buffer. + + *src/unicode : New directory - temporary holds data for unicode + functions. + *src/unicode/caseconv.h : New file + *src/unicode/cop.h : New file + *src/unicode/cyrillic.h : New file + *src/unicode/decomp.h : New file + *src/unicode/nextstep.h : New file + Fri Feb 14 08:51:39 1997 Yoo C. Chung * src/NSZone.m: Use [NSString -cStringNoCopy]. diff --git a/Headers/gnustep/base/NSString.h b/Headers/gnustep/base/NSString.h index 090174782..9be20417e 100644 --- a/Headers/gnustep/base/NSString.h +++ b/Headers/gnustep/base/NSString.h @@ -55,6 +55,7 @@ typedef enum _NSStringEncoding NSUTFStringEncoding, NSISOLatin1StringEncoding, NSSymbolStringEncoding, + NSCyrillicStringEncoding, NSNonLossyASCIIStringEncoding } NSStringEncoding; @@ -194,6 +195,9 @@ typedef enum _NSStringEncoding - (NSString*) stringByResolvingSymlinksInPath; - (NSString*) stringByStandardizingPath; +// for methods working with decomposed strings +- (int) _baseLength; + #ifndef STRICT_OPENSTEP + (NSString*) localizedStringWithFormat: (NSString*) format, ...; + (NSString*) stringWithFormat: (NSString*)format @@ -254,6 +258,7 @@ compiler warning. @end /* Because the compiler thinks that @".." strings are NXConstantString's. */ +#include #include @interface NXConstantString : NSGCString @end diff --git a/Source/Makefile.in b/Source/Makefile.in index d97b7e9a2..fd014735c 100644 --- a/Source/Makefile.in +++ b/Source/Makefile.in @@ -172,6 +172,7 @@ TcpPort.m \ TextCStream.m \ Time.m \ UdpPort.m \ +Unicode.m \ behavior.m \ lex.pl.m \ lex.sf.m \ @@ -295,6 +296,7 @@ include/TcpPort.h \ include/TextCStream.h \ include/Time.h \ include/UdpPort.h \ +include/Unicode.h \ include/ValueHolding.h \ include/behavior.h \ include/fake-main.h \ @@ -378,6 +380,8 @@ NSGCString.m \ NSGData.m \ NSGDictionary.m \ NSGSet.m \ +NSGSequence.m \ +NSGString.m \ NSHashTable.m \ NSHost.m \ NSInvocation.m \ @@ -455,6 +459,8 @@ include/NSGCString.h \ include/NSGData.h \ include/NSGDictionary.h \ include/NSGSet.h \ +include/NSGSequence.h \ +include/NSGString.h \ include/NSHashTable.h \ include/NSHost.h \ include/NSInvocation.h \ diff --git a/Source/NSGCString.m b/Source/NSGCString.m index 429693be8..96534c423 100644 --- a/Source/NSGCString.m +++ b/Source/NSGCString.m @@ -31,6 +31,9 @@ #include /* memcpy(), strlen(), strcmp() are gcc builtin's */ +#include + + @implementation NSGCString /* This is the designated initializer for this class. */ @@ -111,13 +114,19 @@ return _count; } +- (unsigned int) length +{ + return _count; +} + - (unichar) characterAtIndex: (unsigned int)index { /* xxx This should raise an NSException. */ CHECK_INDEX_RANGE_ERROR(index, _count); - return (unichar) _contents_chars[index]; + return chartouni(_contents_chars[index]); } + - (NSString*) substringFromRange: (NSRange)aRange { if (aRange.location > _count) @@ -128,6 +137,7 @@ length: aRange.length]; } + // FOR IndexedCollection SUPPORT; - objectAtIndex: (unsigned)index @@ -136,6 +146,16 @@ return [NSNumber numberWithChar: _contents_chars[index]]; } +- (int) _baseLength +{ + return _count; +} + +- (id) initWithString: (NSString*)string +{ + return [self initWithCString:[string cStringNoCopy]]; +} + @end diff --git a/Source/NSString.m b/Source/NSString.m index c5d0d291d..54fd11c9e 100644 --- a/Source/NSString.m +++ b/Source/NSString.m @@ -3,7 +3,11 @@ Written by: Andrew Kachites McCallum Date: January 1995 - + + Unicode implementation by Stevo Crvenkovski + + Date: February 1997 + This file is part of the GNUstep Base Library. This library is free software; you can redistribute it and/or @@ -15,21 +19,24 @@ but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. - + You should have received a copy of the GNU Library General Public License along with this library; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ +*/ /* Caveats: - Only supports C Strings. Some implementations will need to be - changed when we get other string backing classes. - + Some implementations will need to be changed. Does not support all justification directives for `%@' in format strings on non-GNU-libc systems. */ +/* Initial implementation of Unicode. Version 0.0.0 :) + Locales and encoding methods not yet supported. + Limited choice of default encodings. +*/ + #include #include #include @@ -49,6 +56,20 @@ #include #include +#include +#include + +#include +#include + + +// Choose default encoding +// xxx Should be install time option, not compile time +#define DEFAULT_ENCODING NSNEXTSTEPStringEncoding +// #define DEFAULT_ENCODING NSASCIIStringEncoding +// #define DEFAULT_ENCODING NSISOLatin1StringEncoding +// #define DEFAULT_ENCODING NSCyrillicStringEncoding + /* xxx Temporarily set HAVE_REGISTER_PRINTF_FUNCTION function to 0 because I can't seem to figure out which versions of libc pass a `va_list' to the output handler, and which pass a `void**' to the @@ -60,7 +81,7 @@ @implementation NSString -/* For unichar strings. (Not implemented---using cStrings) */ +/* For unichar strings. */ static Class NSString_concrete_class; static Class NSMutableString_concrete_class; @@ -165,9 +186,9 @@ handle_printf_atsign (FILE *stream, { if (self == [NSString class]) { - NSString_concrete_class = [NSGCString class]; + NSString_concrete_class = [NSGString class]; NSString_c_concrete_class = [NSGCString class]; - NSMutableString_concrete_class = [NSGMutableCString class]; + NSMutableString_concrete_class = [NSGMutableString class]; NSMutableString_c_concrete_class = [NSGMutableCString class]; #if HAVE_REGISTER_PRINTF_FUNCTION @@ -204,6 +225,14 @@ handle_printf_atsign (FILE *stream, return [[[self alloc] init] autorelease]; } ++ (NSString*) stringWithCharacters: (const unichar*)chars + length: (unsigned int)length +{ + return [[[self alloc] + initWithCharacters:chars length:length] + autorelease]; +} + + (NSString*) stringWithCString: (const char*) byteString { return [[[self alloc] initWithCString:byteString] @@ -224,11 +253,10 @@ handle_printf_atsign (FILE *stream, initWithContentsOfFile: path] autorelease]; } -+ (NSString*) stringWithCharacters: (const unichar*)chars - length: (unsigned int)length ++ (NSString*) stringWithCString: (const char*) byteString { - [self notImplemented:_cmd]; - return self; + return [[[self alloc] initWithCString:byteString] + autorelease]; } + (NSString*) stringWithFormat: (NSString*)format,... @@ -251,28 +279,27 @@ handle_printf_atsign (FILE *stream, autorelease]; } + // Initializing Newly Allocated Strings -- (id) init +/* This is the designated initializer for Unicode Strings. */ +- (id) initWithCharactersNoCopy: (unichar*)chars + length: (unsigned int)length + freeWhenDone: (BOOL)flag { - return [self initWithCString:""]; + [self subclassResponsibility:_cmd]; + return self; } -- (id) initWithCString: (const char*)byteString -{ - return [self initWithCString:byteString - length:(byteString ? strlen(byteString) : 0)]; -} - -- (id) initWithCString: (const char*)byteString +- (id) initWithCharacters: (const unichar*)chars length: (unsigned int)length { - char *s; - OBJC_MALLOC(s, char, length+1); - if (byteString) - memcpy(s, byteString, length); - s[length] = '\0'; - return [self initWithCStringNoCopy:s length:length freeWhenDone:YES]; + unichar *s; + OBJC_MALLOC(s, unichar, length+1); + if (chars) + memcpy(s, chars,2*length); + s[length] = (unichar)0; + return [self initWithCharactersNoCopy:s length:length freeWhenDone:YES]; } /* This is the designated initializer for CStrings. */ @@ -284,71 +311,29 @@ handle_printf_atsign (FILE *stream, return self; } -- (id) initWithCharacters: (const unichar*)chars - length: (unsigned int)length +- (id) initWithCString: (const char*)byteString length: (unsigned int)length { - [self notImplemented:_cmd]; - return self; + char *s; + OBJC_MALLOC(s, char, length+1); + if (byteString) + memcpy(s, byteString, length); + s[length] = '\0'; + return [self initWithCStringNoCopy:s length:length freeWhenDone:YES]; } -/* This is the designated initializer for unichar Strings. */ -- (id) initWithCharactersNoCopy: (unichar*)chars - length: (unsigned int)length - freeWhenDone: (BOOL)flag +- (id) initWithCString: (const char*)byteString { - [self subclassResponsibility:_cmd]; - return self; + return [self initWithCString:byteString + length:(byteString ? strlen(byteString) : 0)]; } -- (id) initWithContentsOfFile: (NSString*)path - { - /* xxx Maybe this should use StdioStream? */ -#ifdef __WIN32__ - NSMutableString *s = [NSMutableString stringWithCString:""]; - DWORD dwread; - char bytes[1024]; - BOOL res, done = NO; - HANDLE fd = CreateFile([path cString], GENERIC_READ, FILE_SHARE_READ, - NULL, OPEN_EXISTING, FILE_ATTRIBUTE_READONLY, NULL); - - while (!done) - { - res = ReadFile(fd, bytes, 1023, &dwread, NULL); - bytes[dwread] = '\0'; - if ((res) && (dwread == 0)) - done = YES; - else - [s appendString: [NSString stringWithCString: bytes]]; - } - CloseHandle(fd); - [self initWithString: s]; - [s release]; - return self; -#else - int fd = open([path cString], O_RDONLY); - struct stat fstat_buf; - char* bytes = NULL; - - if((fd == -1) || (fstat(fd, &fstat_buf) == -1)) - return nil; - - OBJC_MALLOC(bytes, char, fstat_buf.st_size + 1); - if (read(fd, bytes, fstat_buf.st_size) != fstat_buf.st_size) { - OBJC_FREE(bytes); - return nil; - } - close(fd); - bytes[fstat_buf.st_size] = '\0'; - return [self initWithCStringNoCopy:bytes length:fstat_buf.st_size - freeWhenDone:YES]; -#endif -} - -- (id) initWithData: (NSData*)data - encoding: (NSStringEncoding)encoding +- (id) initWithString: (NSString*)string { - [self notImplemented:_cmd]; - return self; + unichar *s; + OBJC_MALLOC(s, unichar, [string length]+1); + [string getCharacters:s]; + s[[string length]] = (unichar)0; + return [self initWithCharactersNoCopy:s length:[string length] freeWhenDone:YES]; } - (id) initWithFormat: (NSString*)format,... @@ -439,30 +424,77 @@ handle_printf_atsign (FILE *stream, return self; } -/* xxx Change this when we have non-CString classes */ -- (id) initWithString: (NSString*)string +- (id) initWithData: (NSData*)data + encoding: (NSStringEncoding)encoding { - return [self initWithCString:[string cStringNoCopy]]; + [self notImplemented:_cmd]; + return self; } +- (id) initWithContentsOfFile: (NSString*)path + { + /* xxx Maybe this should use StdioStream? */ +#ifdef __WIN32__ + NSMutableString *s = [NSMutableString stringWithCString:""]; + DWORD dwread; + char bytes[1024]; + BOOL res, done = NO; + HANDLE fd = CreateFile([path cString], GENERIC_READ, FILE_SHARE_READ, + NULL, OPEN_EXISTING, FILE_ATTRIBUTE_READONLY, NULL); + + while (!done) + { + res = ReadFile(fd, bytes, 1023, &dwread, NULL); + bytes[dwread] = '\0'; + if ((res) && (dwread == 0)) + done = YES; + else + [s appendString: [NSString stringWithCString: bytes]]; + } + CloseHandle(fd); + [self initWithString: s]; + [s release]; + return self; +#else + int fd = open([path cString], O_RDONLY); + struct stat fstat_buf; + char* bytes = NULL; + + if((fd == -1) || (fstat(fd, &fstat_buf) == -1)) + return nil; + + OBJC_MALLOC(bytes, char, fstat_buf.st_size + 1); + if (read(fd, bytes, fstat_buf.st_size) != fstat_buf.st_size) { + OBJC_FREE(bytes); + return nil; + } + close(fd); + bytes[fstat_buf.st_size] = '\0'; + return [self initWithCStringNoCopy:bytes length:fstat_buf.st_size + freeWhenDone:YES]; +#endif +} + +// xxx check this +- (id) init +{ + return [self initWithCString:""]; +} // Getting a String's Length -/* xxx Change this when we have non-CString classes */ - (unsigned int) length { - return [self cStringLength]; + [self subclassResponsibility:_cmd]; + return 0; } - // Accessing Characters -/* xxx Change this when we have non-CString classes */ - (unichar) characterAtIndex: (unsigned int)index { - /* xxx raise NSException instead of assert. */ - assert(index < [self cStringLength]); - return (unichar) [self cStringNoCopy][index]; + [self subclassResponsibility:_cmd]; + return (unichar)0; } /* Inefficient. Should be overridden */ @@ -481,9 +513,9 @@ handle_printf_atsign (FILE *stream, { buffer[i] = [self characterAtIndex: aRange.location+i]; } + buffer[aRange.length] = (unichar)0; } - // Combining Strings - (NSString*) stringByAppendingFormat: (NSString*)format,... @@ -497,14 +529,15 @@ handle_printf_atsign (FILE *stream, return ret; } -/* xxx Change this when we have non-CString classes */ - (NSString*) stringByAppendingString: (NSString*)aString { - unsigned len = [self cStringLength]; - char *s = alloca(len + [aString cStringLength] + 1); - s = strcpy(s, [self cStringNoCopy]); - strcpy(s + len, [aString cStringNoCopy]); - return [NSString stringWithCString:s]; + unsigned len = [self length]; + unichar *s; + OBJC_MALLOC(s, unichar, len + [aString length]+1); + [self getCharacters:s]; + [aString getCharacters:s+len]; + s[len + [aString length]]=(unichar) 0; + return [[self class] stringWithCharacters:s length: len + [aString length]]; } @@ -544,19 +577,8 @@ handle_printf_atsign (FILE *stream, - (NSString*) substringFromRange: (NSRange)aRange { - char buffer[aRange.length]; - int count = [self length]; - - if (aRange.location > count) - [NSException raise: NSRangeException format: @"Invalid location."]; - if (aRange.length > (count - aRange.location)) - [NSException raise: NSRangeException format: @"Invalid location+length."]; - /* This will only DTRT for CString's... but that's all we have right now. */ - [self getCString: buffer - maxLength: aRange.length - range: aRange - remainingRange: NULL]; - return [[self class] stringWithCString: buffer length: aRange.length]; + [self subclassResponsibility:_cmd]; + return self; } - (NSString*) substringToIndex: (unsigned int)index @@ -564,7 +586,6 @@ handle_printf_atsign (FILE *stream, return [self substringFromRange:((NSRange){0,index})];; } - // Finding Ranges of Characters and Substrings - (NSRange) rangeOfCharacterFromSet: (NSCharacterSet*)aSet @@ -584,8 +605,7 @@ handle_printf_atsign (FILE *stream, range:all]; } -/* FIXME: how do you do a case insensitive search? what's an anchored - search? what's a literal search? */ +/* xxx FIXME */ - (NSRange) rangeOfCharacterFromSet: (NSCharacterSet*)aSet options: (unsigned int)mask range: (NSRange)aRange @@ -635,49 +655,24 @@ handle_printf_atsign (FILE *stream, range:all]; } -- (NSRange) rangeOfString:(NSString *) aString +- (NSRange) _searchForwardCaseInsensitiveLiteral:(NSString *) aString options:(unsigned int) mask range:(NSRange) aRange { - int stepDirection; - unsigned int myIndex, myLength, myEndIndex; + unsigned int myIndex, myEndIndex; unsigned int strLength; unichar strFirstCharacter; - /* Check that the search range is reasonable */ - myLength = [self length]; - if (aRange.location > myLength) - [NSException raise: NSRangeException format:@"Invalid location."]; - if (aRange.length > (myLength - aRange.location)) - [NSException raise: NSRangeException format:@"Invalid location+length."]; - - /* Ensure the string can be found */ strLength = [aString length]; - if (strLength > aRange.length) - return (NSRange){0, 0}; - /* Decide where to start and end the search */ - if (mask & NSBackwardsSearch) - { - stepDirection = -1; - myIndex = aRange.location + aRange.length - strLength; - myEndIndex = aRange.location; - } - else - { - stepDirection = 1; - myIndex = aRange.location; - myEndIndex = aRange.location + aRange.length - strLength; - } - /* FIXME: I am guessing that this is what NSAnchoredSearch does. */ + myIndex = aRange.location; + myEndIndex = aRange.location + aRange.length - strLength; + if (mask & NSAnchoredSearch) myEndIndex = myIndex; - /* Start searching. For efficiency there are separate loops for - case-sensitive and case-insensitive searches. */ strFirstCharacter = [aString characterAtIndex:0]; - if (mask & NSCaseInsensitiveSearch) - { + for (;;) { unsigned int i = 1; @@ -686,14 +681,8 @@ handle_printf_atsign (FILE *stream, for (;;) { - /* FIXME: I have no idea how to make case-insensitive - comparisons work over the full range of Unicode characters. */ if ((myCharacter != strCharacter) && - (!isascii (myCharacter) - || !isalpha (myCharacter) - || !isascii (strCharacter) - || !isalpha (strCharacter) - || (tolower (myCharacter) != tolower (strCharacter)))) + ((uni_tolower (myCharacter) != uni_tolower (strCharacter)))) break; if (i == strLength) return (NSRange){myIndex, strLength}; @@ -703,11 +692,72 @@ handle_printf_atsign (FILE *stream, } if (myIndex == myEndIndex) break; - myIndex += stepDirection; + myIndex ++; } - } - else - { + return (NSRange){0, 0}; +} + +- (NSRange) _searchBackwardCaseInsensitiveLiteral:(NSString *) aString + options:(unsigned int) mask + range:(NSRange) aRange +{ + unsigned int myIndex, myEndIndex; + unsigned int strLength; + unichar strFirstCharacter; + + strLength = [aString length]; + + myIndex = aRange.location + aRange.length - strLength; + myEndIndex = aRange.location; + + + if (mask & NSAnchoredSearch) + myEndIndex = myIndex; + + strFirstCharacter = [aString characterAtIndex:0]; + + for (;;) + { + unsigned int i = 1; + unichar myCharacter = [self characterAtIndex:myIndex]; + unichar strCharacter = strFirstCharacter; + + for (;;) + { + if ((myCharacter != strCharacter) && + ((uni_tolower (myCharacter) != uni_tolower (strCharacter)))) + break; + if (i == strLength) + return (NSRange){myIndex, strLength}; + myCharacter = [self characterAtIndex:myIndex + i]; + strCharacter = [aString characterAtIndex:i]; + i++; + } + if (myIndex == myEndIndex) + break; + myIndex --; + } + return (NSRange){0, 0}; +} + +- (NSRange) _searchForwardLiteral:(NSString *) aString + options:(unsigned int) mask + range:(NSRange) aRange +{ + unsigned int myIndex, myEndIndex; + unsigned int strLength; + unichar strFirstCharacter; + + strLength = [aString length]; + + myIndex = aRange.location; + myEndIndex = aRange.location + aRange.length - strLength; + + if (mask & NSAnchoredSearch) + myEndIndex = myIndex; + + strFirstCharacter = [aString characterAtIndex:0]; + for (;;) { unsigned int i = 1; @@ -726,29 +776,408 @@ handle_printf_atsign (FILE *stream, } if (myIndex == myEndIndex) break; - myIndex += stepDirection; + myIndex ++; } - } return (NSRange){0, 0}; } +- (NSRange) _searchBackwardLiteral:(NSString *) aString + options:(unsigned int) mask + range:(NSRange) aRange +{ + unsigned int myIndex, myEndIndex; + unsigned int strLength; + unichar strFirstCharacter; + + strLength = [aString length]; + + myIndex = aRange.location + aRange.length - strLength; + myEndIndex = aRange.location; + + + if (mask & NSAnchoredSearch) + myEndIndex = myIndex; + + strFirstCharacter = [aString characterAtIndex:0]; + + for (;;) + { + unsigned int i = 1; + unichar myCharacter = [self characterAtIndex:myIndex]; + unichar strCharacter = strFirstCharacter; + + for (;;) + { + if (myCharacter != strCharacter) + break; + if (i == strLength) + return (NSRange){myIndex, strLength}; + myCharacter = [self characterAtIndex:myIndex + i]; + strCharacter = [aString characterAtIndex:i]; + i++; + } + if (myIndex == myEndIndex) + break; + myIndex --; + } + return (NSRange){0, 0}; +} + + +- (NSRange) _searchForwardCaseInsensitive:(NSString *) aString + options:(unsigned int) mask + range:(NSRange) aRange +{ + unsigned int myIndex, myEndIndex; + unsigned int strLength, strBaseLength; + id strFirstCharacterSeq; + + strLength = [aString length]; + strBaseLength = [aString _baseLength]; + + myIndex = aRange.location; + myEndIndex = aRange.location + aRange.length - strBaseLength; + + if (mask & NSAnchoredSearch) + myEndIndex = myIndex; + + strFirstCharacterSeq = [NSGSequence sequenceWithString: aString + range: [aString rangeOfComposedCharacterSequenceAtIndex: 0]]; + + for (;;) + { + NSRange myRange; + NSRange mainRange; + NSRange strRange; + unsigned int myCount = 1; + unsigned int strCount = 1; + id myCharacter = [NSGSequence sequenceWithString: self + range: [self rangeOfComposedCharacterSequenceAtIndex: myIndex]]; + id strCharacter = strFirstCharacterSeq; + for (;;) + { + if (![[myCharacter normalize] isEqual: [strCharacter normalize]] + && ![[[myCharacter lowercase] normalize] isEqual: [[strCharacter lowercase] normalize]]) + + break; + if (strCount >= strLength) + return (NSRange){myIndex, myCount}; + myRange = [self rangeOfComposedCharacterSequenceAtIndex: myIndex + myCount]; + myCharacter = [NSGSequence sequenceWithString: self range: myRange]; + strRange = [aString rangeOfComposedCharacterSequenceAtIndex: strCount]; + strCharacter = [NSGSequence sequenceWithString: aString range: strRange]; + myCount += myRange.length; + strCount += strRange.length; + } /* for */ + if (myIndex >= myEndIndex) + break; + mainRange = [self rangeOfComposedCharacterSequenceAtIndex: myIndex]; + myIndex += mainRange.length; + } /* for */ + return (NSRange){0, 0}; +} + +- (NSRange) _searchBackwardCaseInsensitive:(NSString *) aString + options:(unsigned int) mask + range:(NSRange) aRange +{ + unsigned int myIndex, myEndIndex; + unsigned int strLength, strBaseLength; + id strFirstCharacterSeq; + + strLength = [aString length]; + strBaseLength = [aString _baseLength]; + + myIndex = aRange.location + aRange.length - strBaseLength; + myEndIndex = aRange.location; + + if (mask & NSAnchoredSearch) + myEndIndex = myIndex; + + strFirstCharacterSeq = [NSGSequence sequenceWithString: aString + range: [aString rangeOfComposedCharacterSequenceAtIndex: 0]]; + + for (;;) + { + NSRange myRange; + NSRange strRange; + unsigned int myCount = 1; + unsigned int strCount = 1; + id myCharacter = [NSGSequence sequenceWithString: self + range: [self rangeOfComposedCharacterSequenceAtIndex: myIndex]]; + id strCharacter = strFirstCharacterSeq; + for (;;) + { + if (![[myCharacter normalize] isEqual: [strCharacter normalize]] + && ![[[myCharacter lowercase] normalize] isEqual: [[strCharacter lowercase] normalize]]) + + break; + if (strCount >= strLength) + return (NSRange){myIndex, myCount}; + myCharacter = [NSGSequence sequenceWithString: self range: [self rangeOfComposedCharacterSequenceAtIndex: myIndex + myCount]]; + myRange = [self rangeOfComposedCharacterSequenceAtIndex: myIndex + myCount]; + strCharacter = [NSGSequence sequenceWithString: aString range: [aString rangeOfComposedCharacterSequenceAtIndex: strCount]]; + strRange = [aString rangeOfComposedCharacterSequenceAtIndex: strCount]; + myCount += myRange.length; + strCount += strRange.length; + } /* for */ + if (myIndex <= myEndIndex) + break; + myIndex--; + while(uni_isnonsp([self characterAtIndex: myIndex])&&(myIndex>0)) + myIndex--; + } /* for */ + return (NSRange){0, 0}; +} + + +- (NSRange) _searchForward:(NSString *) aString + options:(unsigned int) mask + range:(NSRange) aRange +{ + unsigned int myIndex, myEndIndex; + unsigned int strLength, strBaseLength; + id strFirstCharacterSeq; + + strLength = [aString length]; + strBaseLength = [aString _baseLength]; + + myIndex = aRange.location; + myEndIndex = aRange.location + aRange.length - strBaseLength; + + if (mask & NSAnchoredSearch) + myEndIndex = myIndex; + + strFirstCharacterSeq = [NSGSequence sequenceWithString: aString + range: [aString rangeOfComposedCharacterSequenceAtIndex: 0]]; + + for (;;) + { + NSRange myRange; + NSRange strRange; + NSRange mainRange; + unsigned int myCount = 1; + unsigned int strCount = 1; + id myCharacter = [NSGSequence sequenceWithString: self + range: [self rangeOfComposedCharacterSequenceAtIndex: myIndex]]; + id strCharacter = strFirstCharacterSeq; + for (;;) + { + if (![[myCharacter normalize] isEqual: [strCharacter normalize]]) + break; + if (strCount >= strLength) + return (NSRange){myIndex, myCount}; + myRange = [self rangeOfComposedCharacterSequenceAtIndex: myIndex + myCount]; + myCharacter = [NSGSequence sequenceWithString: self range: myRange]; + strRange = [aString rangeOfComposedCharacterSequenceAtIndex: strCount]; + strCharacter = [NSGSequence sequenceWithString: aString range: strRange]; + myCount += myRange.length; + strCount += strRange.length; + } /* for */ + if (myIndex >= myEndIndex) + break; + mainRange = [self rangeOfComposedCharacterSequenceAtIndex: myIndex]; + myIndex += mainRange.length; + } /* for */ + return (NSRange){0, 0}; +} + + +- (NSRange) _searchBackward:(NSString *) aString + options:(unsigned int) mask + range:(NSRange) aRange +{ + unsigned int myIndex, myEndIndex; + unsigned int strLength, strBaseLength; + id strFirstCharacterSeq; + + strLength = [aString length]; + strBaseLength = [aString _baseLength]; + + myIndex = aRange.location + aRange.length - strBaseLength; + myEndIndex = aRange.location; + + if (mask & NSAnchoredSearch) + myEndIndex = myIndex; + + strFirstCharacterSeq = [NSGSequence sequenceWithString: aString + range: [aString rangeOfComposedCharacterSequenceAtIndex: 0]]; + + for (;;) + { + NSRange myRange; + NSRange strRange; + unsigned int myCount = 1; + unsigned int strCount = 1; + id myCharacter = [NSGSequence sequenceWithString: self + range: [self rangeOfComposedCharacterSequenceAtIndex: myIndex]]; + id strCharacter = strFirstCharacterSeq; + for (;;) + { + if (![[myCharacter normalize] isEqual: [strCharacter normalize]]) + + break; + if (strCount >= strLength) + return (NSRange){myIndex, myCount}; + myCharacter = [NSGSequence sequenceWithString: self range: [self rangeOfComposedCharacterSequenceAtIndex: myIndex + myCount]]; + myRange = [self rangeOfComposedCharacterSequenceAtIndex: myIndex + myCount]; + strCharacter = [NSGSequence sequenceWithString: aString range: [aString rangeOfComposedCharacterSequenceAtIndex: strCount]]; + strRange = [aString rangeOfComposedCharacterSequenceAtIndex: strCount]; + myCount += myRange.length; + strCount += strRange.length; + } /* for */ + if (myIndex <= myEndIndex) + break; + myIndex--; + while(uni_isnonsp([self characterAtIndex: myIndex])&&(myIndex>0)) + myIndex--; + } /* for */ + return (NSRange){0, 0}; +} + +- (NSRange) rangeOfString:(NSString *) aString + options:(unsigned int) mask + range:(NSRange) aRange +{ + + #define FCLS 3 + #define BCLS 7 + #define FLS 2 + #define BLS 6 + #define FCS 1 + #define BCS 5 + #define FS 0 + #define BS 4 + #define FCLAS 11 + #define BCLAS 15 + #define FLAS 10 + #define BLAS 14 + #define FCAS 9 + #define BCAS 13 + #define FAS 8 + #define BAS 12 + + unsigned int myLength, strLength; + + /* Check that the search range is reasonable */ + myLength = [self length]; + if (aRange.location > myLength) + [NSException raise: NSRangeException format:@"Invalid location."]; + if (aRange.length > (myLength - aRange.location)) + [NSException raise: NSRangeException format:@"Invalid location+length."]; + + + /* Ensure the string can be found */ + strLength = [aString length]; + if (strLength > aRange.length) + return (NSRange){0, 0}; + + switch (mask) + { + case FCLS : + case FCLAS : + return [self _searchForwardCaseInsensitiveLiteral: aString + options: mask + range: aRange]; + break; + + case BCLS : + case BCLAS : + return [self _searchBackwardCaseInsensitiveLiteral: aString + options: mask + range: aRange]; + break; + + case FLS : + case FLAS : + return [self _searchForwardLiteral: aString + options: mask + range: aRange]; + break; + + case BLS : + case BLAS : + return [self _searchBackwardLiteral: aString + options: mask + range: aRange]; + break; + + case FCS : + case FCAS : + return [self _searchForwardCaseInsensitive: aString + options: mask + range: aRange]; + break; + + case BCS : + case BCAS : + return [self _searchBackwardCaseInsensitive: aString + options: mask + range: aRange]; + break; + + case BS : + case BAS : + return [self _searchBackward: aString + options: mask + range: aRange]; + break; + + case FS : + case FAS : + default : + return [self _searchForward: aString + options: mask + range: aRange]; + break; + } + return (NSRange){0, 0}; +} + // Determining Composed Character Sequences -- (NSRange) rangeOfComposedCharacterSequenceAtIndex: (unsigned int)anIndex + - (NSRange) rangeOfComposedCharacterSequenceAtIndex: (unsigned int)anIndex { - [self notImplemented:_cmd]; - return ((NSRange){0,0}); + unsigned int start, end; + + start=anIndex; + while(uni_isnonsp([self characterAtIndex: start])) + start++; + end=start+1; + if(end < [self length]) + while(uni_isnonsp([self characterAtIndex: end])) + end++; + return NSMakeRange(start, end-start); } +// Converting String Contents into a Property List + +// xxx C strings only ??? +- (id)propertyList +{ + id obj; + void *bufstate; + bufstate = (void *)pl_scan_string([self cString]); + obj = (id)plparse(); + pl_delete_buffer(bufstate); + return obj; +} + +// xxx C strings only ??? +- (NSDictionary*) propertyListFromStringsFileFormat +{ + id dict = [[[NSMutableDictionary alloc] init] autorelease]; + void *bufstate; + + bufstate = (void *)sf_scan_string([self cString]); + sfSetDict(dict); + sfparse(dict); + sf_delete_buffer(bufstate); + return dict; +} // Identifying and Comparing Strings -- (NSComparisonResult) caseInsensitiveCompare: (NSString*)aString -{ - return [self compare:aString options:NSCaseInsensitiveSearch - range:((NSRange){0, [self length]})]; -} - - (NSComparisonResult) compare: (NSString*)aString { return [self compare:aString options:0]; @@ -761,36 +1190,31 @@ handle_printf_atsign (FILE *stream, range:((NSRange){0, MAX([self length], [aString length])})]; } +// xxx Should implement full POSIX.2 collate - (NSComparisonResult) compare: (NSString*)aString options: (unsigned int)mask range: (NSRange)aRange { - /* xxx ignores NSAnchoredSearch in mask. Fix this. */ - /* xxx only handles C-string encoding */ - +if (mask & NSLiteralSearch) +{ int i, start, end, increment; - const char *s1 = [self cStringNoCopy]; - const char *s2 = [aString cStringNoCopy]; + unichar *s1; + unichar *s2; + OBJC_MALLOC(s1, unichar,[self length] +1); + OBJC_MALLOC(s2, unichar,[aString length] +1); + [self getCharacters:s1]; + [aString getCharacters:s2]; - if (mask & NSBackwardsSearch) - { - start = aRange.location + aRange.length; - end = aRange.location; - increment = -1; - } - else - { start = aRange.location; end = aRange.location + aRange.length; increment = 1; - } if (mask & NSCaseInsensitiveSearch) { for (i = start; i < end; i += increment) { - int c1 = tolower(s1[i]); - int c2 = tolower(s2[i]); + int c1 = uni_tolower(s1[i]); + int c2 = uni_tolower(s2[i]); if (c1 < c2) return NSOrderedAscending; if (c1 > c2) return NSOrderedDescending; } @@ -804,6 +1228,38 @@ handle_printf_atsign (FILE *stream, } } return NSOrderedSame; +} /* if NSLiteralSearch */ +else +{ + int start, end, myCount, strCount; + NSRange myRange, strRange; + id mySeq, strSeq; + NSComparisonResult result; + + start = aRange.location; + end = aRange.location + aRange.length; + myCount = start; + strCount = start; + while(myCount < end) + { + if(strCount>=[aString length]) + return NSOrderedAscending; + myRange = [self rangeOfComposedCharacterSequenceAtIndex: myCount]; + myCount += myRange.length; + strRange = [aString rangeOfComposedCharacterSequenceAtIndex: strCount]; + strCount += strRange.length; + mySeq = [NSGSequence sequenceWithString: self range: myRange]; + strSeq = [NSGSequence sequenceWithString: aString range: strRange]; + if (mask & NSCaseInsensitiveSearch) + result = [[mySeq lowercase] compare: [strSeq lowercase]]; + else + result = [mySeq compare: strSeq]; + if(result != NSOrderedSame) + return result; + } /* while */ + return NSOrderedSame; + } /* else */ + return NSOrderedSame; } - (BOOL) hasPrefix: (NSString*)aString @@ -820,6 +1276,20 @@ handle_printf_atsign (FILE *stream, return (range.location == ([self length] - [aString length])) ? YES : NO; } +- (BOOL) isEqual: (id)anObject +{ + if ([anObject isKindOf:[NSString class]]) + return [self isEqualToString:anObject]; + return NO; +} + +- (BOOL) isEqualToString: (NSString*)aString +{ + return [self compare:aString]==NSOrderedSame; +} + +// xxx C string implementation +// xxx Should work on normalized strings - (unsigned int) hash { unsigned ret = 0; @@ -835,18 +1305,82 @@ handle_printf_atsign (FILE *stream, return ret; } -- (BOOL) isEqual: (id)anObject +// Getting a Shared Prefix + +// xxx Unicode level 1 only +- (NSString*) commonPrefixWithString: (NSString*)aString + options: (unsigned int)mask { - if ([anObject isKindOf:[NSString class]]) - return [self isEqualToString:anObject]; - return NO; + int prefix_len = 0; + unichar *s1; + unichar *s2; + unichar *u; + OBJC_MALLOC(s1, unichar,[self length] +1); + OBJC_MALLOC(s2, unichar,[aString length] +1); + u=s1; + [self getCharacters:s1]; + [aString getCharacters:s2]; + while (*s1 && *s2 + && ((*s1 == *s2) + || ((mask & NSCaseInsensitiveSearch) + && (uni_tolower (*s1) == uni_tolower (*s2))))) + + { + s1++; + s2++; + prefix_len++; + } + return [NSString stringWithCharacters: u length: prefix_len]; } -- (BOOL) isEqualToString: (NSString*)aString +// Changing Case + +// xxx There is more than this in word capitalization in Unicode, +// but this will work in most cases +// xxx fix me - consider tab, newline and friends +- (NSString*) capitalizedString { - return ! strcmp([self cStringNoCopy], [aString cStringNoCopy]); + unichar *s; + int count=0; + int len=[self length]; + OBJC_MALLOC(s, unichar,len +1); + s[0]=uni_toupper([self characterAtIndex:0]); + while(countlength = aRange.length - maxLength; } } - memcpy(buffer, [self cStringNoCopy] + aRange.location, len); + count=0; + while(count