Use UTF8 in XML parser

git-svn-id: svn+ssh://svn.gna.org/svn/gnustep/libs/base/trunk@7659 72102866-910b-0410-8b05-ffd578937521
This commit is contained in:
Richard Frith-MacDonald 2000-09-30 04:54:43 +00:00
parent 401298ab15
commit 313b6cb7b3
5 changed files with 153 additions and 82 deletions

View file

@ -1,3 +1,10 @@
2000-09-30 Richard Frith-Macdonald <rfm@gnu.org>
* Source/NSString.m: ([-initWithUTFString:length:]),
([-initWithData:encoding]) create UTF strings as 8-bit strings if
they actually only contain ascii characters.
* Source/GSXML.m: Create strings from UTF8 data throughout.
2000-09-29 Richard Frith-Macdonald <rfm@gnu.org>
* Source/GSCompatibility.m: Fix stupid error in last modification -

View file

@ -47,10 +47,27 @@ extern int xmlGetWarningsDefaultValue;
*
*/
static Class NSString_class;
static IMP csImp;
static IMP cslImp;
static SEL csSel = @selector(stringWithCString:);
static SEL cslSel = @selector(stringWithCString:length:);
static IMP usImp;
static SEL usSel = @selector(stringWithUTF8String:);
inline static NSString*
UTF8Str(const char *bytes)
{
return (*usImp)(NSString_class, usSel, bytes);
}
inline static NSString*
UTF8StrLen(const char *bytes, unsigned length)
{
char *buf = NSZoneMalloc(NSDefaultMallocZone(), length+1);
NSString *str;
memcpy(buf, bytes, length);
buf[length] = '\0';
str = UTF8Str(buf);
NSZoneFree(NSDefaultMallocZone(), buf);
return str;
}
static BOOL cacheDone = NO;
@ -61,8 +78,7 @@ setupCache()
{
cacheDone = YES;
NSString_class = [NSString class];
csImp = [NSString_class methodForSelector: csSel];
cslImp = [NSString_class methodForSelector: cslSel];
usImp = [NSString_class methodForSelector: usSel];
}
}
@ -311,13 +327,13 @@ setupCache()
/* return the namespace prefix */
- (NSString*) prefix
{
return (*csImp)(NSString_class, csSel, ((xmlNsPtr)(lib))->prefix);
return UTF8Str(((xmlNsPtr)(lib))->prefix);
}
/* the namespace reference */
- (NSString*) href
{
return (*csImp)(NSString_class, csSel, ((xmlNsPtr)(lib))->href);
return UTF8Str(((xmlNsPtr)(lib))->href);
}
/* type of namespace */
@ -484,7 +500,7 @@ static NSMapTable *nodeNames = 0;
{
if (((xmlNodePtr)lib)->content != NULL)
{
return (*csImp)(NSString_class, csSel, ((xmlNodePtr)lib)->content);
return UTF8Str(((xmlNodePtr)lib)->content);
}
else
{
@ -496,7 +512,7 @@ static NSMapTable *nodeNames = 0;
{
if (lib != NULL)
{
return (*csImp)(NSString_class, csSel, ((xmlNodePtr)lib)->name);
return UTF8Str(((xmlNodePtr)lib)->name);
}
else
{
@ -543,13 +559,11 @@ static NSMapTable *nodeNames = 0;
{
const void *content = prop->children->content;
[d setObject: (*csImp)(NSString_class, csSel, content)
forKey: (*csImp)(NSString_class, csSel, name)];
[d setObject: UTF8Str(content) forKey: UTF8Str(name)];
}
else
{
[d setObject: @""
forKey: (*csImp)(NSString_class, csSel, name)];
[d setObject: @"" forKey: UTF8Str(name)];
}
prop = prop->next;
}
@ -777,8 +791,7 @@ static NSMapTable *nodeNames = 0;
if (((xmlNodePtr)lib)->children != NULL
&& ((xmlNodePtr)lib)->children->content != NULL)
{
return (*csImp)(NSString_class, csSel,
((xmlNodePtr)(lib))->children->content);
return UTF8Str(((xmlNodePtr)(lib))->children->content);
}
return nil;
}
@ -1272,9 +1285,9 @@ internalSubsetFunction(void *ctx, const char *name,
const xmlChar *ExternalID, const xmlChar *SystemID)
{
NSCAssert(ctx,@"No Context");
[HANDLER internalSubset: (*csImp)(NSString_class, csSel, name)
externalID: (*csImp)(NSString_class, csSel, ExternalID)
systemID: (*csImp)(NSString_class, csSel, SystemID)];
[HANDLER internalSubset: UTF8Str(name)
externalID: UTF8Str(ExternalID)
systemID: UTF8Str(SystemID)];
}
static void
@ -1282,31 +1295,31 @@ externalSubsetFunction(void *ctx, const char *name,
const xmlChar *ExternalID, const xmlChar *SystemID)
{
NSCAssert(ctx,@"No Context");
[HANDLER externalSubset: (*csImp)(NSString_class, csSel, name)
externalID: (*csImp)(NSString_class, csSel, ExternalID)
systemID: (*csImp)(NSString_class, csSel, SystemID)];
[HANDLER externalSubset: UTF8Str(name)
externalID: UTF8Str(ExternalID)
systemID: UTF8Str(SystemID)];
}
static xmlParserInputPtr
resolveEntityFunction(void *ctx, const char *publicId, const char *systemId)
{
NSCAssert(ctx,@"No Context");
return [HANDLER resolveEntity: (*csImp)(NSString_class, csSel, publicId)
systemID: (*csImp)(NSString_class, csSel, systemId)];
return [HANDLER resolveEntity: UTF8Str(publicId)
systemID: UTF8Str(systemId)];
}
static xmlEntityPtr
getEntityFunction(void *ctx, const char *name)
{
NSCAssert(ctx,@"No Context");
return [HANDLER getEntity: (*csImp)(NSString_class, csSel, name)];
return [HANDLER getEntity: UTF8Str(name)];
}
static xmlEntityPtr
getParameterEntityFunction(void *ctx, const char *name)
{
NSCAssert(ctx,@"No Context");
return [HANDLER getParameterEntity: (*csImp)(NSString_class, csSel, name)];
return [HANDLER getParameterEntity: UTF8Str(name)];
}
static void
@ -1314,11 +1327,11 @@ entityDeclFunction(void *ctx, const char *name, int type,
const char *publicId, const char *systemId, char *content)
{
NSCAssert(ctx,@"No Context");
[HANDLER entityDecl: (*csImp)(NSString_class, csSel, name)
[HANDLER entityDecl: UTF8Str(name)
type: type
public: (*csImp)(NSString_class, csSel, publicId)
system: (*csImp)(NSString_class, csSel, systemId)
content: (*csImp)(NSString_class, csSel, content)];
public: UTF8Str(publicId)
system: UTF8Str(systemId)
content: UTF8Str(content)];
}
static void
@ -1326,11 +1339,11 @@ attributeDeclFunction(void *ctx, const char *elem, const char *name,
int type, int def, const char *defaultValue, xmlEnumerationPtr tree)
{
NSCAssert(ctx,@"No Context");
[HANDLER attributeDecl: (*csImp)(NSString_class, csSel, elem)
name: (*csImp)(NSString_class, csSel, name)
[HANDLER attributeDecl: UTF8Str(elem)
name: UTF8Str(name)
type: type
typeDefValue: def
defaultValue: (*csImp)(NSString_class, csSel, defaultValue)];
defaultValue: UTF8Str(defaultValue)];
}
static void
@ -1338,7 +1351,7 @@ elementDeclFunction(void *ctx, const char *name, int type,
xmlElementContentPtr content)
{
NSCAssert(ctx,@"No Context");
[HANDLER elementDecl: (*csImp)(NSString_class, csSel, name)
[HANDLER elementDecl: UTF8Str(name)
type: type];
}
@ -1348,9 +1361,9 @@ notationDeclFunction(void *ctx, const char *name,
const char *publicId, const char *systemId)
{
NSCAssert(ctx,@"No Context");
[HANDLER notationDecl: (*csImp)(NSString_class, csSel, name)
public: (*csImp)(NSString_class, csSel, publicId)
system: (*csImp)(NSString_class, csSel, systemId)];
[HANDLER notationDecl: UTF8Str(name)
public: UTF8Str(publicId)
system: UTF8Str(systemId)];
}
static void
@ -1358,10 +1371,10 @@ unparsedEntityDeclFunction(void *ctx, const char *name,
const char *publicId, const char *systemId, const char *notationName)
{
NSCAssert(ctx,@"No Context");
[HANDLER unparsedEntityDecl: (*csImp)(NSString_class, csSel, name)
public: (*csImp)(NSString_class, csSel, publicId)
system: (*csImp)(NSString_class, csSel, systemId)
notationName: (*csImp)(NSString_class, csSel, notationName)];
[HANDLER unparsedEntityDecl: UTF8Str(name)
public: UTF8Str(publicId)
system: UTF8Str(systemId)
notationName: UTF8Str(notationName)];
}
static void
@ -1381,7 +1394,7 @@ startElementFunction(void *ctx, const char *name, const char **atts)
[dict setObject: obj forKey: key];
}
}
[HANDLER startElement: (*csImp)(NSString_class, csSel, name)
[HANDLER startElement: UTF8Str(name)
attributes: dict];
}
@ -1389,50 +1402,50 @@ static void
endElementFunction(void *ctx, const char *name)
{
NSCAssert(ctx,@"No Context");
[HANDLER endElement: (*csImp)(NSString_class, csSel, name)];
[HANDLER endElement: UTF8Str(name)];
}
static void
charactersFunction(void *ctx, const char *ch, int len)
{
NSCAssert(ctx,@"No Context");
[HANDLER characters: (*cslImp)(NSString_class, cslSel, ch, len)];
[HANDLER characters: UTF8StrLen(ch, len)];
}
static void
referenceFunction(void *ctx, const char *name)
{
NSCAssert(ctx,@"No Context");
[HANDLER reference: (*csImp)(NSString_class, csSel, name)];
[HANDLER reference: UTF8Str(name)];
}
static void
ignorableWhitespaceFunction(void *ctx, const char *ch, int len)
{
NSCAssert(ctx,@"No Context");
[HANDLER ignoreWhitespace: (*cslImp)(NSString_class, cslSel, ch, len)];
[HANDLER ignoreWhitespace: UTF8StrLen(ch, len)];
}
static void
processInstructionFunction(void *ctx, const char *target, const char *data)
{
NSCAssert(ctx,@"No Context");
[HANDLER processInstruction: (*csImp)(NSString_class, csSel, target)
data: (*csImp)(NSString_class, csSel, data)];
[HANDLER processInstruction: UTF8Str(target)
data: UTF8Str(data)];
}
static void
cdataBlockFunction(void *ctx, const char *value, int len)
{
NSCAssert(ctx,@"No Context");
[HANDLER cdataBlock: (*cslImp)(NSString_class, cslSel, value, len)];
[HANDLER cdataBlock: UTF8StrLen(value, len)];
}
static void
commentFunction(void *ctx, const char *value)
{
NSCAssert(ctx,@"No Context");
[HANDLER comment: (*csImp)(NSString_class, csSel, value)];
[HANDLER comment: UTF8Str(value)];
}
static void
@ -1446,7 +1459,7 @@ warningFunction(void *ctx, const char *msg, ...)
va_end(args);
NSCAssert(ctx,@"No Context");
[HANDLER warning: (*csImp)(NSString_class, csSel, allMsg)];
[HANDLER warning: UTF8Str(allMsg)];
}
static void
@ -1459,7 +1472,7 @@ errorFunction(void *ctx, const char *msg, ...)
vsprintf(allMsg, msg, args);
va_end(args);
NSCAssert(ctx,@"No Context");
[HANDLER error: (*csImp)(NSString_class, csSel, allMsg)];
[HANDLER error: UTF8Str(allMsg)];
}
static void
@ -1471,8 +1484,8 @@ fatalErrorFunction(void *ctx, const char *msg, ...)
va_start(args, msg);
vsprintf(allMsg, msg, args);
va_end(args);
NSCAssert(ctx,@"No Context");
[HANDLER fatalError: (*csImp)(NSString_class, csSel, allMsg)];
NSCAssert(ctx, @"No Context");
[HANDLER fatalError: UTF8Str(allMsg)];
}
#undef HANDLER

View file

@ -125,17 +125,17 @@ static IMP msInitImp; /* designated initialiser for mutable */
{
_count = length;
_contents_chars = (unsigned char*)byteString;
if (flag == NO)
if (flag == YES && byteString != 0)
{
_zone = 0;
#if GS_WITH_GC
_zone = GSAtomicMallocZone();
#else
_zone = NSZoneFromPointer(byteString);
#endif
}
else
{
#if GS_WITH_GC
_zone = byteString ? GSAtomicMallocZone() : 0;
#else
_zone = byteString ? NSZoneFromPointer(byteString) : 0;
#endif
_zone = 0;
}
return self;
}
@ -992,12 +992,12 @@ stringDecrementCountAndFillHoleAt(NSGMutableCStringStruct *self,
_count = length;
_capacity = length;
_contents_chars = (unsigned char*)byteString;
if (flag == YES)
if (flag == YES && byteString != 0)
{
#if GS_WITH_GC
_zone = (byteString != 0) ? GSAtomicMallocZone() : 0;
_zone = GSAtomicMallocZone();
#else
_zone = (byteString != 0) ? NSZoneFromPointer(byteString) : 0;
_zone = NSZoneFromPointer(byteString);
#endif
}
else

View file

@ -184,7 +184,7 @@
{
_count = length;
_contents_chars = chars;
if (flag && chars)
if (flag == YES && chars != 0)
{
#if GS_WITH_GC
_zone = GSAtomicMallocZone();
@ -669,7 +669,7 @@ stringDecrementCountAndFillHoleAt(NSGMutableStringStruct *self,
_count = length;
_capacity = length;
_contents_chars = chars;
if (flag && chars)
if (flag == YES && chars != 0)
{
#if GS_WITH_GC
_zone = GSAtomicMallocZone();

View file

@ -309,7 +309,7 @@ handle_printf_atsign (FILE *stream,
NSDefaultMallocZone()] initWithCString: byteString length: length]);
}
+ (id)stringWithUTF8String:(const char *)bytes
+ (id) stringWithUTF8String: (const char *)bytes
{
return AUTORELEASE([[self allocWithZone: NSDefaultMallocZone()]
initWithUTF8String: bytes]);
@ -441,18 +441,38 @@ handle_printf_atsign (FILE *stream,
return self;
}
- (id) initWithUTF8String:(const char *)bytes
- (id) initWithUTF8String: (const char *)bytes
{
unsigned length = strlen(bytes);
if (length > 0)
{
unichar *s = NSZoneMalloc(fastZone(self), sizeof(unichar)*length);
unsigned i;
length = encode_strtoustr(s, bytes, length+1, NSUTF8StringEncoding);
self = [self initWithCharactersNoCopy: s
length: length
freeWhenDone: YES];
/*
* Check to see if we have in fact got an ascii string
*/
for (i = 0; i < length; i++)
{
if (((unsigned char*)bytes)[i] > 127)
{
break;
}
}
if (i == length)
{
self = [self initWithCString: bytes length: length];
}
else
{
unichar *s;
s = NSZoneMalloc(fastZone(self), sizeof(unichar)*length);
length = encode_strtoustr(s, bytes, length+1, NSUTF8StringEncoding);
self = [self initWithCharactersNoCopy: s
length: length
freeWhenDone: YES];
}
}
else
{
@ -798,6 +818,37 @@ handle_printf_atsign (FILE *stream,
self = [self initWithCStringNoCopy: 0 length: 0 freeWhenDone: NO];
}
return self;
}
else if (encoding == NSUTF8StringEncoding)
{
unsigned length = [data length];
const char *bytes = [data bytes];
unsigned i;
/*
* Check to see if we have in fact got an ascii string
*/
for (i = 0; i < length; i++)
{
if (((unsigned char*)bytes)[i] > 127)
{
break;
}
}
if (i == length)
{
self = [self initWithCString: bytes length: length];
}
else
{
unichar *u;
u = NSZoneMalloc(fastZone(self), sizeof(unichar)*length);
length = encode_strtoustr(u, bytes, length+1, NSUTF8StringEncoding);
self = [self initWithCharactersNoCopy: u
length: length
freeWhenDone: YES];
}
}
else
{
@ -1643,7 +1694,7 @@ handle_printf_atsign (FILE *stream,
return (const char*)[d bytes];
}
- (const char *)UTF8String
- (const char *) UTF8String
{
NSData *d;
@ -1777,7 +1828,7 @@ handle_printf_atsign (FILE *stream,
- (BOOL) canBeConvertedToEncoding: (NSStringEncoding)encoding
{
id d = [self dataUsingEncoding: encoding allowLossyConversion: NO];
id d = [self dataUsingEncoding: encoding allowLossyConversion: NO];
return d ? YES : NO;
}
@ -2533,10 +2584,10 @@ handle_printf_atsign (FILE *stream,
range: ((NSRange){0, [self length]})];
}
- (NSComparisonResult)compare:(NSString *)string
options:(unsigned)mask
range:(NSRange)compareRange
locale:(NSDictionary *)dict
- (NSComparisonResult) compare: (NSString *)string
options: (unsigned)mask
range: (NSRange)compareRange
locale: (NSDictionary *)dict
{
// FIXME: This does only a normal compare
return [self compare: string
@ -2544,7 +2595,7 @@ handle_printf_atsign (FILE *stream,
range: compareRange];
}
- (NSComparisonResult)localizedCompare:(NSString *)string
- (NSComparisonResult) localizedCompare: (NSString *)string
{
// FIXME: This does only a normal compare
return [self compare: string
@ -2552,7 +2603,7 @@ handle_printf_atsign (FILE *stream,
range: ((NSRange){0, [self length]})];
}
- (NSComparisonResult)localizedCaseInsensitiveCompare:(NSString *)string
- (NSComparisonResult) localizedCaseInsensitiveCompare: (NSString *)string
{
// FIXME: This does only a normal compare
return [self compare: string
@ -2569,7 +2620,7 @@ handle_printf_atsign (FILE *stream,
return [d writeToFile: filename atomically: useAuxiliaryFile];
}
- (BOOL)writeToURL:(NSURL *)anURL atomically:(BOOL)atomically
- (BOOL) writeToURL: (NSURL*)anURL atomically: (BOOL)atomically
{
id d;
if (!(d = [self dataUsingEncoding: [NSString defaultCStringEncoding]]))