More changes to build in standard bitmaps,

update the charset data from the latest usincode,
add 0x85 to the whitespace and newline set
add the capitalizedLetterCharacterSet


git-svn-id: svn+ssh://svn.gna.org/svn/gnustep/libs/base/trunk@20856 72102866-910b-0410-8b05-ffd578937521
This commit is contained in:
CaS 2005-03-06 09:15:08 +00:00
parent 41c07070c3
commit 563b6a4282
6 changed files with 114882 additions and 126 deletions

View file

@ -1,3 +1,17 @@
2005-03-06 Richard Frith-Macdonald <rfm@gnu.org>
* Headers/Foundation/NSCharacterSet.h: Add documentation and method
([-capitalizedLetterCharacterSet])
* Source/NSCharacterSet.m: Add new method and move documentation to
header. Include local NSCharacterSetData.h for bitmap data.
* Source/NSCharacterSetData.h: New file containing the bitmap data
for the standard charactersets. Generated by dev-apps/charsets
tools using the latest unicode characterset data today.
* Source/NSCharacterSets/README.CharSet: mark as obsolete.
* Source/GNUmakefile: No longer build/install NSCharacterSets
The standard bitmap data for charactersets is now built in to
NSCharacterSet.
2005-03-04 Richard Frith-Macdonald <rfm@gnu.org>
* Source/Additions/GSXML.m: GSXMLRPC integrated.

View file

@ -50,7 +50,7 @@ PACKAGE_NAME = gnustep-base
#
# The list of subproject directories
#
SUBPROJECTS = NSCharacterSets Source SSL Tools NSTimeZones Resources
SUBPROJECTS = Source SSL Tools NSTimeZones Resources
-include Makefile.preamble

View file

@ -28,45 +28,185 @@
@class NSData;
/**
* Represents a set of unicode characters. Used by [NSScanner] and [NSString]
* for parsing-related methods.
*/
@interface NSCharacterSet : NSObject <NSCoding, NSCopying, NSMutableCopying>
// Creating standard character sets
/**
* Returns a character set containing letters, numbers, and diacritical
* marks. Note that "letters" includes all alphabetic as well as Chinese
* characters, etc..
*/
+ (NSCharacterSet*) alphanumericCharacterSet;
#ifndef STRICT_OPENSTEP
/**
* Returns a character set containing letters in the unicode
* Titlecase category.
*/
+ (NSCharacterSet*) capitalizedLetterCharacterSet;
#endif
/**
* Returns a character set containing control and format characters.
*/
+ (NSCharacterSet*) controlCharacterSet;
/**
* Returns a character set containing characters that represent
* the decimal digits 0 through 9.
*/
+ (NSCharacterSet*) decimalDigitCharacterSet;
/**
* Returns a character set containing individual charactars that
* can be represented also by a composed character sequence.
*/
+ (NSCharacterSet*) decomposableCharacterSet;
/**
* Returns a character set containing unassigned (illegal)
* character values.
*/
+ (NSCharacterSet*) illegalCharacterSet;
/**
* Returns a character set containing letters, including all alphabetic as
* well as Chinese characters, etc..
*/
+ (NSCharacterSet*) letterCharacterSet;
/**
* Returns a character set that contains the lowercase characters.
* This set does not include caseless characters, only those that
* have corresponding characters in uppercase and/or titlecase.
*/
+ (NSCharacterSet*) lowercaseLetterCharacterSet;
/**
* Returns a character set containing characters for diacritical marks, which
* are usually only rendered in conjunction with another character.
*/
+ (NSCharacterSet*) nonBaseCharacterSet;
/**
* Returns a character set containing punctuation marks.
*/
+ (NSCharacterSet*) punctuationCharacterSet;
/**
* Returns a character set containing mathematical symbols, etc..
*/
+ (NSCharacterSet*) symbolAndOperatorCharacterSet;
/**
* Returns a character set that contains the uppercase characters.
* This set does not include caseless characters, only those that
* have corresponding characters in lowercase and/or titlecase.
*/
+ (NSCharacterSet*) uppercaseLetterCharacterSet;
/**
* Returns a character set that contains the whitespace characters,
* plus the newline characters, values 0x000A and 0x000D and nextline
* 0x0085 character.
*/
+ (NSCharacterSet*) whitespaceAndNewlineCharacterSet;
/**
* Returns a character set that contains the whitespace characters.
*/
+ (NSCharacterSet*) whitespaceCharacterSet;
// Creating custom character sets
/**
* Returns a character set containing characters as encoded in the
* data object (8192 bytes)
*/
+ (NSCharacterSet*) characterSetWithBitmapRepresentation: (NSData*)data;
/**
* Returns set with characters in aString, or empty set for empty string.
* Raises an exception if given a nil string.
*/
+ (NSCharacterSet*) characterSetWithCharactersInString: (NSString*)aString;
/**
* Returns set containing unicode index range given by aRange.
*/
+ (NSCharacterSet*) characterSetWithRange: (NSRange)aRange;
- (NSData*) bitmapRepresentation;
- (BOOL) characterIsMember: (unichar)aCharacter;
- (NSCharacterSet*) invertedSet;
#ifndef STRICT_MACOS_X
/**
* Initializes from a bitmap. (See [NSBitmapCharSet].) File must have
* extension "<code>.bitmap</code>". (To get around this load the file
* into data yourself and use
* [NSCharacterSet -characterSetWithBitmapRepresentation].
*/
+ (NSCharacterSet*) characterSetWithContentsOfFile: (NSString*)aFile;
#endif
/**
* Returns a bitmap representation of the receiver's character set
* suitable for archiving or writing to a file, in an NSData object.
*/
- (NSData*) bitmapRepresentation;
/**
* Returns YES if the receiver contains <em>aCharacter</em>, NO if
* it does not.
*/
- (BOOL) characterIsMember: (unichar)aCharacter;
/**
* Returns a character set containing only characters that the
* receiver does not contain.
*/
- (NSCharacterSet*) invertedSet;
@end
/**
* An [NSCharacterSet] that can be modified.
*/
@interface NSMutableCharacterSet : NSCharacterSet
/**
* Adds characters specified by unicode indices in aRange to set.
*/
- (void) addCharactersInRange: (NSRange)aRange;
/**
* Adds characters in aString to set.
*/
- (void) addCharactersInString: (NSString*)aString;
/**
* Set union of character sets.
*/
- (void) formUnionWithCharacterSet: (NSCharacterSet*)otherSet;
/**
* Set intersection of character sets.
*/
- (void) formIntersectionWithCharacterSet: (NSCharacterSet*)otherSet;
/**
* Drop given range of characters. No error for characters not currently in
* set.
*/
- (void) removeCharactersInRange: (NSRange)aRange;
/**
* Drop characters in aString. No error for characters not currently in
* set.
*/
- (void) removeCharactersInString: (NSString*)aString;
/**
* Remove all characters currently in set and add all other characters.
*/
- (void) invert;
@end

View file

@ -1,3 +1,11 @@
#############################################################################
THIS DIRECTORY IS OBSOLETE AND IS NO LONGER USED
The NSCharacterSetData.h file (in CVS in core/base/Source) is generated
using the tools in dev-apps/charsets and the library in dev-libs/ucsdata
#############################################################################
This files in this directory are CharacterSet bitmaps which contain
a bitmap representation of the Unicode characters in the respective
@ -7,10 +15,6 @@
of the Unicode character set as of Feb. 27, 2001. The Unicode character
set can be obtained from http://www.unicode.org
The NSCharacterSetData.h file is generated from the binary bitmap
data in the individual .dat files, and is compiled in to the
NSCharacterSet class.
The binary data files are no longer used at runtime.
The current character sets are based on UnicodeData.txt version 3.0.1.

View file

@ -36,7 +36,7 @@
#include "Foundation/NSThread.h"
#include "Foundation/NSNotification.h"
#include "../NSCharacterSets/NSCharacterSetData.h"
#include "NSCharacterSetData.h"
/* A simple array for caching standard bitmap sets */
#define MAX_STANDARD_SETS 15
@ -88,10 +88,6 @@ static Class abstractClass = nil;
@end
/**
* Represents a set of unicode characters. Used by [NSScanner] and [NSString]
* for parsing-related methods.
*/
@implementation NSCharacterSet
+ (void) initialize
@ -123,137 +119,81 @@ static Class abstractClass = nil;
return cache_set[number];
}
/**
* Returns a character set containing letters, numbers, and diacritical
* marks. Note that "letters" includes all alphabetic as well as Chinese
* characters, etc..
*/
+ (NSCharacterSet*) alphanumericCharacterSet
{
return [self _staticSet: alphanumericCharSet number: 0];
}
/**
* Returns a character set containing control and format characters.
*/
+ (NSCharacterSet*) capitalizedLetterCharacterSet
{
return [self _staticSet: titlecaseLetterCharSet number: 13];
}
+ (NSCharacterSet*) controlCharacterSet
{
return [self _staticSet: controlCharSet number: 1];
}
/**
* Returns a character set containing characters that represent
* the decimal digits 0 through 9.
*/
+ (NSCharacterSet*) decimalDigitCharacterSet
{
return [self _staticSet: decimalDigitCharSet number: 2];
}
/**
* Returns a character set containing individual charactars that
* can be represented also by a composed character sequence.
*/
+ (NSCharacterSet*) decomposableCharacterSet
{
return [self _staticSet: decomposableCharSet number: 3];
}
/**
* Returns a character set containing unassigned (illegal)
* character values.
*/
+ (NSCharacterSet*) illegalCharacterSet
{
return [self _staticSet: illegalCharSet number: 4];
}
/**
* Returns a character set containing letters, including all alphabetic as
* well as Chinese characters, etc..
*/
+ (NSCharacterSet*) letterCharacterSet
{
return [self _staticSet: letterCharSet number: 5];
}
/**
* Returns a character set that contains the lowercase characters.
* This set does not include caseless characters, only those that
* have corresponding characters in uppercase and/or titlecase.
*/
+ (NSCharacterSet*) lowercaseLetterCharacterSet
{
return [self _staticSet: lowercaseLetterCharSet number: 6];
}
/**
* Returns a character set containing characters for diacritical marks, which
* are usually only rendered in conjunction with another character.
*/
+ (NSCharacterSet*) nonBaseCharacterSet
{
return [self _staticSet: nonBaseCharSet number: 7];
}
/**
* Returns a character set containing punctuation marks.
*/
+ (NSCharacterSet*) punctuationCharacterSet
{
return [self _staticSet: punctuationCharSet number: 8];
}
/**
* Returns a character set containing mathematical symbols, etc..
*/
+ (NSCharacterSet*) symbolAndOperatorCharacterSet
{
return [self _staticSet: symbolAndOperatorCharSet number: 9];
}
/**
* Returns a character set that contains the uppercase characters.
* This set does not include caseless characters, only those that
* have corresponding characters in lowercase and/or titlecase.
*/
+ (NSCharacterSet*) uppercaseLetterCharacterSet
{
return [self _staticSet: uppercaseLetterCharSet number: 10];
}
/**
* Returns a character set that contains the whitespace characters,
* plus the newline characters, values 0x000A and 0x000D.
*/
+ (NSCharacterSet*) whitespaceAndNewlineCharacterSet
{
return [self _staticSet: whitespaceAndNlCharSet number: 11];
}
/**
* Returns a character set that contains the whitespace characters.
*/
+ (NSCharacterSet*) whitespaceCharacterSet
{
return [self _staticSet: whitespaceCharSet number: 12];
}
// Creating custom character sets
/**
* Returns a character set containing characters as encoded in the
* data object.
*/
+ (NSCharacterSet*) characterSetWithBitmapRepresentation: (NSData*)data
{
return AUTORELEASE([[NSBitmapCharSet alloc] initWithBitmap: data]);
}
/**
* Returns set with characters in aString, or empty set for empty string.
* Raises an exception if given a nil string.
*/
+ (NSCharacterSet*) characterSetWithCharactersInString: (NSString*)aString
{
unsigned i;
@ -280,9 +220,6 @@ static Class abstractClass = nil;
return [self characterSetWithBitmapRepresentation: bitmap];
}
/**
* Returns set containing unicode index range given by aRange.
*/
+ (NSCharacterSet*)characterSetWithRange: (NSRange)aRange
{
unsigned i;
@ -304,12 +241,6 @@ static Class abstractClass = nil;
return [self characterSetWithBitmapRepresentation: bitmap];
}
/**
* Initializes from a bitmap. (See [NSBitmapCharSet].) File must have
* extension "<code>.bitmap</code>". (To get around this load the file
* into data yourself and use
* [NSCharacterSet -characterSetWithBitmapRepresentation].
*/
+ (NSCharacterSet*) characterSetWithContentsOfFile: (NSString*)aFile
{
if ([@"bitmap" isEqual: [aFile pathExtension]])
@ -321,20 +252,12 @@ static Class abstractClass = nil;
return nil;
}
/**
* Returns a bitmap representation of the receiver's character set
* suitable for archiving or writing to a file, in an NSData object.
*/
- (NSData*) bitmapRepresentation
{
[self subclassResponsibility: _cmd];
return 0;
}
/**
* Returns YES if the receiver contains <em>aCharacter</em>, NO if
* it does not.
*/
- (BOOL) characterIsMember: (unichar)aCharacter
{
[self subclassResponsibility: _cmd];
@ -385,10 +308,6 @@ static Class abstractClass = nil;
return NO;
}
/**
* Returns a character set containing only characters that the
* receiver does not contain.
*/
- (NSCharacterSet*) invertedSet
{
unsigned i;
@ -425,9 +344,6 @@ static Class abstractClass = nil;
@end
/**
* An [NSCharacterSet] that can be modified.
*/
@implementation NSMutableCharacterSet
/* Provide a default object for allocation */
@ -447,6 +363,11 @@ static Class abstractClass = nil;
return AUTORELEASE([[abstractClass performSelector: _cmd] mutableCopy]);
}
+ (NSCharacterSet*) capitalizedLetterCharacterSet
{
return AUTORELEASE([[abstractClass performSelector: _cmd] mutableCopy]);
}
+ (NSCharacterSet*) controlCharacterSet
{
return AUTORELEASE([[abstractClass performSelector: _cmd] mutableCopy]);
@ -507,61 +428,36 @@ static Class abstractClass = nil;
return AUTORELEASE([[abstractClass performSelector: _cmd] mutableCopy]);
}
/* Mutable subclasses must implement ALL of these methods. */
/**
* Adds characters specified by unicode indices in aRange to set.
*/
- (void) addCharactersInRange: (NSRange)aRange
{
[self subclassResponsibility: _cmd];
}
/**
* Adds characters in aString to set.
*/
- (void) addCharactersInString: (NSString*)aString
{
[self subclassResponsibility: _cmd];
}
/**
* Set union of character sets.
*/
- (void) formUnionWithCharacterSet: (NSCharacterSet*)otherSet
{
[self subclassResponsibility: _cmd];
}
/**
* Set intersection of character sets.
*/
- (void) formIntersectionWithCharacterSet: (NSCharacterSet*)otherSet
{
[self subclassResponsibility: _cmd];
}
/**
* Drop given range of characters. No error for characters not currently in
* set.
*/
- (void) removeCharactersInRange: (NSRange)aRange
{
[self subclassResponsibility: _cmd];
}
/**
* Drop characters in aString. No error for characters not currently in
* set.
*/
- (void) removeCharactersInString: (NSString*)aString
{
[self subclassResponsibility: _cmd];
}
/**
* Remove all characters currently in set and add all other characters.
*/
- (void) invert
{
[self subclassResponsibility: _cmd];

114702
Source/NSCharacterSetData.h Normal file

File diff suppressed because it is too large Load diff