Use UTF8 in XML parser

git-svn-id: svn+ssh://svn.gna.org/svn/gnustep/libs/base/trunk@7659 72102866-910b-0410-8b05-ffd578937521
This commit is contained in:
richard 2000-09-30 04:54:43 +00:00
parent 8a67887721
commit 2c7fd523ac
5 changed files with 153 additions and 82 deletions

View file

@ -1,3 +1,10 @@
2000-09-30 Richard Frith-Macdonald <rfm@gnu.org>
* Source/NSString.m: ([-initWithUTFString:length:]),
([-initWithData:encoding]) create UTF strings as 8-bit strings if
they actually only contain ascii characters.
* Source/GSXML.m: Create strings from UTF8 data throughout.
2000-09-29 Richard Frith-Macdonald <rfm@gnu.org> 2000-09-29 Richard Frith-Macdonald <rfm@gnu.org>
* Source/GSCompatibility.m: Fix stupid error in last modification - * Source/GSCompatibility.m: Fix stupid error in last modification -

View file

@ -47,10 +47,27 @@ extern int xmlGetWarningsDefaultValue;
* *
*/ */
static Class NSString_class; static Class NSString_class;
static IMP csImp; static IMP usImp;
static IMP cslImp; static SEL usSel = @selector(stringWithUTF8String:);
static SEL csSel = @selector(stringWithCString:);
static SEL cslSel = @selector(stringWithCString:length:); inline static NSString*
UTF8Str(const char *bytes)
{
return (*usImp)(NSString_class, usSel, bytes);
}
inline static NSString*
UTF8StrLen(const char *bytes, unsigned length)
{
char *buf = NSZoneMalloc(NSDefaultMallocZone(), length+1);
NSString *str;
memcpy(buf, bytes, length);
buf[length] = '\0';
str = UTF8Str(buf);
NSZoneFree(NSDefaultMallocZone(), buf);
return str;
}
static BOOL cacheDone = NO; static BOOL cacheDone = NO;
@ -61,8 +78,7 @@ setupCache()
{ {
cacheDone = YES; cacheDone = YES;
NSString_class = [NSString class]; NSString_class = [NSString class];
csImp = [NSString_class methodForSelector: csSel]; usImp = [NSString_class methodForSelector: usSel];
cslImp = [NSString_class methodForSelector: cslSel];
} }
} }
@ -311,13 +327,13 @@ setupCache()
/* return the namespace prefix */ /* return the namespace prefix */
- (NSString*) prefix - (NSString*) prefix
{ {
return (*csImp)(NSString_class, csSel, ((xmlNsPtr)(lib))->prefix); return UTF8Str(((xmlNsPtr)(lib))->prefix);
} }
/* the namespace reference */ /* the namespace reference */
- (NSString*) href - (NSString*) href
{ {
return (*csImp)(NSString_class, csSel, ((xmlNsPtr)(lib))->href); return UTF8Str(((xmlNsPtr)(lib))->href);
} }
/* type of namespace */ /* type of namespace */
@ -484,7 +500,7 @@ static NSMapTable *nodeNames = 0;
{ {
if (((xmlNodePtr)lib)->content != NULL) if (((xmlNodePtr)lib)->content != NULL)
{ {
return (*csImp)(NSString_class, csSel, ((xmlNodePtr)lib)->content); return UTF8Str(((xmlNodePtr)lib)->content);
} }
else else
{ {
@ -496,7 +512,7 @@ static NSMapTable *nodeNames = 0;
{ {
if (lib != NULL) if (lib != NULL)
{ {
return (*csImp)(NSString_class, csSel, ((xmlNodePtr)lib)->name); return UTF8Str(((xmlNodePtr)lib)->name);
} }
else else
{ {
@ -543,13 +559,11 @@ static NSMapTable *nodeNames = 0;
{ {
const void *content = prop->children->content; const void *content = prop->children->content;
[d setObject: (*csImp)(NSString_class, csSel, content) [d setObject: UTF8Str(content) forKey: UTF8Str(name)];
forKey: (*csImp)(NSString_class, csSel, name)];
} }
else else
{ {
[d setObject: @"" [d setObject: @"" forKey: UTF8Str(name)];
forKey: (*csImp)(NSString_class, csSel, name)];
} }
prop = prop->next; prop = prop->next;
} }
@ -777,8 +791,7 @@ static NSMapTable *nodeNames = 0;
if (((xmlNodePtr)lib)->children != NULL if (((xmlNodePtr)lib)->children != NULL
&& ((xmlNodePtr)lib)->children->content != NULL) && ((xmlNodePtr)lib)->children->content != NULL)
{ {
return (*csImp)(NSString_class, csSel, return UTF8Str(((xmlNodePtr)(lib))->children->content);
((xmlNodePtr)(lib))->children->content);
} }
return nil; return nil;
} }
@ -1272,9 +1285,9 @@ internalSubsetFunction(void *ctx, const char *name,
const xmlChar *ExternalID, const xmlChar *SystemID) const xmlChar *ExternalID, const xmlChar *SystemID)
{ {
NSCAssert(ctx,@"No Context"); NSCAssert(ctx,@"No Context");
[HANDLER internalSubset: (*csImp)(NSString_class, csSel, name) [HANDLER internalSubset: UTF8Str(name)
externalID: (*csImp)(NSString_class, csSel, ExternalID) externalID: UTF8Str(ExternalID)
systemID: (*csImp)(NSString_class, csSel, SystemID)]; systemID: UTF8Str(SystemID)];
} }
static void static void
@ -1282,31 +1295,31 @@ externalSubsetFunction(void *ctx, const char *name,
const xmlChar *ExternalID, const xmlChar *SystemID) const xmlChar *ExternalID, const xmlChar *SystemID)
{ {
NSCAssert(ctx,@"No Context"); NSCAssert(ctx,@"No Context");
[HANDLER externalSubset: (*csImp)(NSString_class, csSel, name) [HANDLER externalSubset: UTF8Str(name)
externalID: (*csImp)(NSString_class, csSel, ExternalID) externalID: UTF8Str(ExternalID)
systemID: (*csImp)(NSString_class, csSel, SystemID)]; systemID: UTF8Str(SystemID)];
} }
static xmlParserInputPtr static xmlParserInputPtr
resolveEntityFunction(void *ctx, const char *publicId, const char *systemId) resolveEntityFunction(void *ctx, const char *publicId, const char *systemId)
{ {
NSCAssert(ctx,@"No Context"); NSCAssert(ctx,@"No Context");
return [HANDLER resolveEntity: (*csImp)(NSString_class, csSel, publicId) return [HANDLER resolveEntity: UTF8Str(publicId)
systemID: (*csImp)(NSString_class, csSel, systemId)]; systemID: UTF8Str(systemId)];
} }
static xmlEntityPtr static xmlEntityPtr
getEntityFunction(void *ctx, const char *name) getEntityFunction(void *ctx, const char *name)
{ {
NSCAssert(ctx,@"No Context"); NSCAssert(ctx,@"No Context");
return [HANDLER getEntity: (*csImp)(NSString_class, csSel, name)]; return [HANDLER getEntity: UTF8Str(name)];
} }
static xmlEntityPtr static xmlEntityPtr
getParameterEntityFunction(void *ctx, const char *name) getParameterEntityFunction(void *ctx, const char *name)
{ {
NSCAssert(ctx,@"No Context"); NSCAssert(ctx,@"No Context");
return [HANDLER getParameterEntity: (*csImp)(NSString_class, csSel, name)]; return [HANDLER getParameterEntity: UTF8Str(name)];
} }
static void static void
@ -1314,11 +1327,11 @@ entityDeclFunction(void *ctx, const char *name, int type,
const char *publicId, const char *systemId, char *content) const char *publicId, const char *systemId, char *content)
{ {
NSCAssert(ctx,@"No Context"); NSCAssert(ctx,@"No Context");
[HANDLER entityDecl: (*csImp)(NSString_class, csSel, name) [HANDLER entityDecl: UTF8Str(name)
type: type type: type
public: (*csImp)(NSString_class, csSel, publicId) public: UTF8Str(publicId)
system: (*csImp)(NSString_class, csSel, systemId) system: UTF8Str(systemId)
content: (*csImp)(NSString_class, csSel, content)]; content: UTF8Str(content)];
} }
static void static void
@ -1326,11 +1339,11 @@ attributeDeclFunction(void *ctx, const char *elem, const char *name,
int type, int def, const char *defaultValue, xmlEnumerationPtr tree) int type, int def, const char *defaultValue, xmlEnumerationPtr tree)
{ {
NSCAssert(ctx,@"No Context"); NSCAssert(ctx,@"No Context");
[HANDLER attributeDecl: (*csImp)(NSString_class, csSel, elem) [HANDLER attributeDecl: UTF8Str(elem)
name: (*csImp)(NSString_class, csSel, name) name: UTF8Str(name)
type: type type: type
typeDefValue: def typeDefValue: def
defaultValue: (*csImp)(NSString_class, csSel, defaultValue)]; defaultValue: UTF8Str(defaultValue)];
} }
static void static void
@ -1338,7 +1351,7 @@ elementDeclFunction(void *ctx, const char *name, int type,
xmlElementContentPtr content) xmlElementContentPtr content)
{ {
NSCAssert(ctx,@"No Context"); NSCAssert(ctx,@"No Context");
[HANDLER elementDecl: (*csImp)(NSString_class, csSel, name) [HANDLER elementDecl: UTF8Str(name)
type: type]; type: type];
} }
@ -1348,9 +1361,9 @@ notationDeclFunction(void *ctx, const char *name,
const char *publicId, const char *systemId) const char *publicId, const char *systemId)
{ {
NSCAssert(ctx,@"No Context"); NSCAssert(ctx,@"No Context");
[HANDLER notationDecl: (*csImp)(NSString_class, csSel, name) [HANDLER notationDecl: UTF8Str(name)
public: (*csImp)(NSString_class, csSel, publicId) public: UTF8Str(publicId)
system: (*csImp)(NSString_class, csSel, systemId)]; system: UTF8Str(systemId)];
} }
static void static void
@ -1358,10 +1371,10 @@ unparsedEntityDeclFunction(void *ctx, const char *name,
const char *publicId, const char *systemId, const char *notationName) const char *publicId, const char *systemId, const char *notationName)
{ {
NSCAssert(ctx,@"No Context"); NSCAssert(ctx,@"No Context");
[HANDLER unparsedEntityDecl: (*csImp)(NSString_class, csSel, name) [HANDLER unparsedEntityDecl: UTF8Str(name)
public: (*csImp)(NSString_class, csSel, publicId) public: UTF8Str(publicId)
system: (*csImp)(NSString_class, csSel, systemId) system: UTF8Str(systemId)
notationName: (*csImp)(NSString_class, csSel, notationName)]; notationName: UTF8Str(notationName)];
} }
static void static void
@ -1381,7 +1394,7 @@ startElementFunction(void *ctx, const char *name, const char **atts)
[dict setObject: obj forKey: key]; [dict setObject: obj forKey: key];
} }
} }
[HANDLER startElement: (*csImp)(NSString_class, csSel, name) [HANDLER startElement: UTF8Str(name)
attributes: dict]; attributes: dict];
} }
@ -1389,50 +1402,50 @@ static void
endElementFunction(void *ctx, const char *name) endElementFunction(void *ctx, const char *name)
{ {
NSCAssert(ctx,@"No Context"); NSCAssert(ctx,@"No Context");
[HANDLER endElement: (*csImp)(NSString_class, csSel, name)]; [HANDLER endElement: UTF8Str(name)];
} }
static void static void
charactersFunction(void *ctx, const char *ch, int len) charactersFunction(void *ctx, const char *ch, int len)
{ {
NSCAssert(ctx,@"No Context"); NSCAssert(ctx,@"No Context");
[HANDLER characters: (*cslImp)(NSString_class, cslSel, ch, len)]; [HANDLER characters: UTF8StrLen(ch, len)];
} }
static void static void
referenceFunction(void *ctx, const char *name) referenceFunction(void *ctx, const char *name)
{ {
NSCAssert(ctx,@"No Context"); NSCAssert(ctx,@"No Context");
[HANDLER reference: (*csImp)(NSString_class, csSel, name)]; [HANDLER reference: UTF8Str(name)];
} }
static void static void
ignorableWhitespaceFunction(void *ctx, const char *ch, int len) ignorableWhitespaceFunction(void *ctx, const char *ch, int len)
{ {
NSCAssert(ctx,@"No Context"); NSCAssert(ctx,@"No Context");
[HANDLER ignoreWhitespace: (*cslImp)(NSString_class, cslSel, ch, len)]; [HANDLER ignoreWhitespace: UTF8StrLen(ch, len)];
} }
static void static void
processInstructionFunction(void *ctx, const char *target, const char *data) processInstructionFunction(void *ctx, const char *target, const char *data)
{ {
NSCAssert(ctx,@"No Context"); NSCAssert(ctx,@"No Context");
[HANDLER processInstruction: (*csImp)(NSString_class, csSel, target) [HANDLER processInstruction: UTF8Str(target)
data: (*csImp)(NSString_class, csSel, data)]; data: UTF8Str(data)];
} }
static void static void
cdataBlockFunction(void *ctx, const char *value, int len) cdataBlockFunction(void *ctx, const char *value, int len)
{ {
NSCAssert(ctx,@"No Context"); NSCAssert(ctx,@"No Context");
[HANDLER cdataBlock: (*cslImp)(NSString_class, cslSel, value, len)]; [HANDLER cdataBlock: UTF8StrLen(value, len)];
} }
static void static void
commentFunction(void *ctx, const char *value) commentFunction(void *ctx, const char *value)
{ {
NSCAssert(ctx,@"No Context"); NSCAssert(ctx,@"No Context");
[HANDLER comment: (*csImp)(NSString_class, csSel, value)]; [HANDLER comment: UTF8Str(value)];
} }
static void static void
@ -1446,7 +1459,7 @@ warningFunction(void *ctx, const char *msg, ...)
va_end(args); va_end(args);
NSCAssert(ctx,@"No Context"); NSCAssert(ctx,@"No Context");
[HANDLER warning: (*csImp)(NSString_class, csSel, allMsg)]; [HANDLER warning: UTF8Str(allMsg)];
} }
static void static void
@ -1459,7 +1472,7 @@ errorFunction(void *ctx, const char *msg, ...)
vsprintf(allMsg, msg, args); vsprintf(allMsg, msg, args);
va_end(args); va_end(args);
NSCAssert(ctx,@"No Context"); NSCAssert(ctx,@"No Context");
[HANDLER error: (*csImp)(NSString_class, csSel, allMsg)]; [HANDLER error: UTF8Str(allMsg)];
} }
static void static void
@ -1471,8 +1484,8 @@ fatalErrorFunction(void *ctx, const char *msg, ...)
va_start(args, msg); va_start(args, msg);
vsprintf(allMsg, msg, args); vsprintf(allMsg, msg, args);
va_end(args); va_end(args);
NSCAssert(ctx,@"No Context"); NSCAssert(ctx, @"No Context");
[HANDLER fatalError: (*csImp)(NSString_class, csSel, allMsg)]; [HANDLER fatalError: UTF8Str(allMsg)];
} }
#undef HANDLER #undef HANDLER

View file

@ -125,17 +125,17 @@ static IMP msInitImp; /* designated initialiser for mutable */
{ {
_count = length; _count = length;
_contents_chars = (unsigned char*)byteString; _contents_chars = (unsigned char*)byteString;
if (flag == NO) if (flag == YES && byteString != 0)
{ {
_zone = 0; #if GS_WITH_GC
_zone = GSAtomicMallocZone();
#else
_zone = NSZoneFromPointer(byteString);
#endif
} }
else else
{ {
#if GS_WITH_GC _zone = 0;
_zone = byteString ? GSAtomicMallocZone() : 0;
#else
_zone = byteString ? NSZoneFromPointer(byteString) : 0;
#endif
} }
return self; return self;
} }
@ -992,12 +992,12 @@ stringDecrementCountAndFillHoleAt(NSGMutableCStringStruct *self,
_count = length; _count = length;
_capacity = length; _capacity = length;
_contents_chars = (unsigned char*)byteString; _contents_chars = (unsigned char*)byteString;
if (flag == YES) if (flag == YES && byteString != 0)
{ {
#if GS_WITH_GC #if GS_WITH_GC
_zone = (byteString != 0) ? GSAtomicMallocZone() : 0; _zone = GSAtomicMallocZone();
#else #else
_zone = (byteString != 0) ? NSZoneFromPointer(byteString) : 0; _zone = NSZoneFromPointer(byteString);
#endif #endif
} }
else else

View file

@ -184,7 +184,7 @@
{ {
_count = length; _count = length;
_contents_chars = chars; _contents_chars = chars;
if (flag && chars) if (flag == YES && chars != 0)
{ {
#if GS_WITH_GC #if GS_WITH_GC
_zone = GSAtomicMallocZone(); _zone = GSAtomicMallocZone();
@ -669,7 +669,7 @@ stringDecrementCountAndFillHoleAt(NSGMutableStringStruct *self,
_count = length; _count = length;
_capacity = length; _capacity = length;
_contents_chars = chars; _contents_chars = chars;
if (flag && chars) if (flag == YES && chars != 0)
{ {
#if GS_WITH_GC #if GS_WITH_GC
_zone = GSAtomicMallocZone(); _zone = GSAtomicMallocZone();

View file

@ -309,7 +309,7 @@ handle_printf_atsign (FILE *stream,
NSDefaultMallocZone()] initWithCString: byteString length: length]); NSDefaultMallocZone()] initWithCString: byteString length: length]);
} }
+ (id)stringWithUTF8String:(const char *)bytes + (id) stringWithUTF8String: (const char *)bytes
{ {
return AUTORELEASE([[self allocWithZone: NSDefaultMallocZone()] return AUTORELEASE([[self allocWithZone: NSDefaultMallocZone()]
initWithUTF8String: bytes]); initWithUTF8String: bytes]);
@ -441,18 +441,38 @@ handle_printf_atsign (FILE *stream,
return self; return self;
} }
- (id) initWithUTF8String:(const char *)bytes - (id) initWithUTF8String: (const char *)bytes
{ {
unsigned length = strlen(bytes); unsigned length = strlen(bytes);
if (length > 0) if (length > 0)
{ {
unichar *s = NSZoneMalloc(fastZone(self), sizeof(unichar)*length); unsigned i;
length = encode_strtoustr(s, bytes, length+1, NSUTF8StringEncoding); /*
self = [self initWithCharactersNoCopy: s * Check to see if we have in fact got an ascii string
length: length */
freeWhenDone: YES]; for (i = 0; i < length; i++)
{
if (((unsigned char*)bytes)[i] > 127)
{
break;
}
}
if (i == length)
{
self = [self initWithCString: bytes length: length];
}
else
{
unichar *s;
s = NSZoneMalloc(fastZone(self), sizeof(unichar)*length);
length = encode_strtoustr(s, bytes, length+1, NSUTF8StringEncoding);
self = [self initWithCharactersNoCopy: s
length: length
freeWhenDone: YES];
}
} }
else else
{ {
@ -798,6 +818,37 @@ handle_printf_atsign (FILE *stream,
self = [self initWithCStringNoCopy: 0 length: 0 freeWhenDone: NO]; self = [self initWithCStringNoCopy: 0 length: 0 freeWhenDone: NO];
} }
return self; return self;
}
else if (encoding == NSUTF8StringEncoding)
{
unsigned length = [data length];
const char *bytes = [data bytes];
unsigned i;
/*
* Check to see if we have in fact got an ascii string
*/
for (i = 0; i < length; i++)
{
if (((unsigned char*)bytes)[i] > 127)
{
break;
}
}
if (i == length)
{
self = [self initWithCString: bytes length: length];
}
else
{
unichar *u;
u = NSZoneMalloc(fastZone(self), sizeof(unichar)*length);
length = encode_strtoustr(u, bytes, length+1, NSUTF8StringEncoding);
self = [self initWithCharactersNoCopy: u
length: length
freeWhenDone: YES];
}
} }
else else
{ {
@ -1643,7 +1694,7 @@ handle_printf_atsign (FILE *stream,
return (const char*)[d bytes]; return (const char*)[d bytes];
} }
- (const char *)UTF8String - (const char *) UTF8String
{ {
NSData *d; NSData *d;
@ -1777,7 +1828,7 @@ handle_printf_atsign (FILE *stream,
- (BOOL) canBeConvertedToEncoding: (NSStringEncoding)encoding - (BOOL) canBeConvertedToEncoding: (NSStringEncoding)encoding
{ {
id d = [self dataUsingEncoding: encoding allowLossyConversion: NO]; id d = [self dataUsingEncoding: encoding allowLossyConversion: NO];
return d ? YES : NO; return d ? YES : NO;
} }
@ -2533,10 +2584,10 @@ handle_printf_atsign (FILE *stream,
range: ((NSRange){0, [self length]})]; range: ((NSRange){0, [self length]})];
} }
- (NSComparisonResult)compare:(NSString *)string - (NSComparisonResult) compare: (NSString *)string
options:(unsigned)mask options: (unsigned)mask
range:(NSRange)compareRange range: (NSRange)compareRange
locale:(NSDictionary *)dict locale: (NSDictionary *)dict
{ {
// FIXME: This does only a normal compare // FIXME: This does only a normal compare
return [self compare: string return [self compare: string
@ -2544,7 +2595,7 @@ handle_printf_atsign (FILE *stream,
range: compareRange]; range: compareRange];
} }
- (NSComparisonResult)localizedCompare:(NSString *)string - (NSComparisonResult) localizedCompare: (NSString *)string
{ {
// FIXME: This does only a normal compare // FIXME: This does only a normal compare
return [self compare: string return [self compare: string
@ -2552,7 +2603,7 @@ handle_printf_atsign (FILE *stream,
range: ((NSRange){0, [self length]})]; range: ((NSRange){0, [self length]})];
} }
- (NSComparisonResult)localizedCaseInsensitiveCompare:(NSString *)string - (NSComparisonResult) localizedCaseInsensitiveCompare: (NSString *)string
{ {
// FIXME: This does only a normal compare // FIXME: This does only a normal compare
return [self compare: string return [self compare: string
@ -2569,7 +2620,7 @@ handle_printf_atsign (FILE *stream,
return [d writeToFile: filename atomically: useAuxiliaryFile]; return [d writeToFile: filename atomically: useAuxiliaryFile];
} }
- (BOOL)writeToURL:(NSURL *)anURL atomically:(BOOL)atomically - (BOOL) writeToURL: (NSURL*)anURL atomically: (BOOL)atomically
{ {
id d; id d;
if (!(d = [self dataUsingEncoding: [NSString defaultCStringEncoding]])) if (!(d = [self dataUsingEncoding: [NSString defaultCStringEncoding]]))