Some xml namespace handling fixes

git-svn-id: svn+ssh://svn.gna.org/svn/gnustep/libs/base/trunk@26002 72102866-910b-0410-8b05-ffd578937521
This commit is contained in:
Richard Frith-MacDonald 2008-01-26 08:34:58 +00:00
parent a6bfd106e1
commit 6425664347
4 changed files with 270 additions and 160 deletions

View file

@ -1,3 +1,11 @@
2008-01-26 Richard Frith-Macdonald <rfm@gnu.org>
* Source/Additions/NSXML.m: Fixup SAX interface to pass namespace
information separately (retain old method for binary backward
compatibility).
* Source/NSXMLParser.m: Implement handing of namespaces with libxml2
based parser.
2008-01-25 Richard Frith-Macdonald <rfm@gnu.org> 2008-01-25 Richard Frith-Macdonald <rfm@gnu.org>
* Source/Additions/NSXML.m: In SAX count namespace declarations * Source/Additions/NSXML.m: In SAX count namespace declarations

View file

@ -331,7 +331,8 @@ extern "C" {
- (void) startElement: (NSString*)elementName - (void) startElement: (NSString*)elementName
prefix: (NSString*)prefix prefix: (NSString*)prefix
href: (NSString*)href href: (NSString*)href
attributes: (NSMutableDictionary*)elementAttributes; attributes: (NSMutableDictionary*)elementAttributes
namespaces: (NSMutableDictionary*)elementNamespaces;
/** <override-dummy /> */ /** <override-dummy /> */
- (void) unparsedEntityDecl: (NSString*)name - (void) unparsedEntityDecl: (NSString*)name
public: (NSString*)publicId public: (NSString*)publicId

View file

@ -2794,23 +2794,24 @@ startElementNsFunction(void *ctx, const unsigned char *name,
int nb_attributes, int nb_defaulted, int nb_attributes, int nb_defaulted,
const unsigned char **atts) const unsigned char **atts)
{ {
NSMutableDictionary *dict; NSMutableDictionary *adict = nil;
NSMutableDictionary *ndict = nil;
NSString *elem; NSString *elem;
NSCAssert(ctx,@"No Context"); NSCAssert(ctx,@"No Context");
elem = UTF8Str(name); elem = UTF8Str(name);
dict = [NSMutableDictionary dictionary];
if (atts != NULL) if (atts != NULL)
{ {
int i; int i;
int j; int j;
adict = [NSMutableDictionary dictionaryWithCapacity: nb_attributes];
for (i = j = 0; i < nb_attributes; i++, j += 5) for (i = j = 0; i < nb_attributes; i++, j += 5)
{ {
NSString *key = UTF8Str(atts[j]); NSString *key = UTF8Str(atts[j]);
NSString *obj = UTF8StrLen(atts[j+3], atts[j+4]-atts[j+3]); NSString *obj = UTF8StrLen(atts[j+3], atts[j+4]-atts[j+3]);
[dict setObject: obj forKey: key]; [adict setObject: obj forKey: key];
} }
} }
if (nb_namespaces > 0) if (nb_namespaces > 0)
@ -2818,6 +2819,7 @@ startElementNsFunction(void *ctx, const unsigned char *name,
int i; int i;
int pos = 0; int pos = 0;
ndict = [NSMutableDictionary dictionaryWithCapacity: nb_namespaces];
for (i = 0; i < nb_namespaces; i++) for (i = 0; i < nb_namespaces; i++)
{ {
NSString *key; NSString *key;
@ -2842,13 +2844,14 @@ startElementNsFunction(void *ctx, const unsigned char *name,
obj = UTF8Str(namespaces[pos]); obj = UTF8Str(namespaces[pos]);
} }
pos++; pos++;
[dict setObject: obj forKey: key]; [ndict setObject: obj forKey: key];
} }
} }
[HANDLER startElement: elem [HANDLER startElement: elem
prefix: UTF8Str(prefix) prefix: UTF8Str(prefix)
href: UTF8Str(href) href: UTF8Str(href)
attributes: dict]; attributes: adict
namespaces: ndict];
} }
static void static void
@ -3055,6 +3058,18 @@ fatalErrorFunction(void *ctx, const unsigned char *msg, ...)
[self startElement: elementName attributes: elementAttributes]; [self startElement: elementName attributes: elementAttributes];
} }
- (void) startElement: (NSString*)elementName
prefix: (NSString*)prefix
href: (NSString*)href
attributes: (NSMutableDictionary*)elementAttributes
namespaces: (NSMutableDictionary*)elementNamespaces
{
[self startElement: elementName
prefix: prefix
href: href
attributes: elementAttributes];
}
/** /**
* Called when a closing tag has been processed. * Called when a closing tag has been processed.
*/ */

View file

@ -75,18 +75,24 @@ NSString* const NSXMLParserErrorDomain = @"NSXMLParserErrorDomain";
href: (NSString*)href href: (NSString*)href
attributes: (NSMutableDictionary*)elementAttributes attributes: (NSMutableDictionary*)elementAttributes
{ {
NSString *qName = elementName;
if ([prefix length] > 0)
{
qName = [NSString stringWithFormat: @"%@:%@", prefix, qName];
}
if (_shouldProcessNamespaces) if (_shouldProcessNamespaces)
{ {
[_delegate parser: _owner [_delegate parser: _owner
didStartElement: elementName didStartElement: elementName
namespaceURI: href namespaceURI: href
qualifiedName: prefix qualifiedName: qName
attributes: elementAttributes]; attributes: elementAttributes];
} }
else else
{ {
[_delegate parser: _owner [_delegate parser: _owner
didStartElement: elementName didStartElement: qName
namespaceURI: nil namespaceURI: nil
qualifiedName: nil qualifiedName: nil
attributes: elementAttributes]; attributes: elementAttributes];
@ -97,17 +103,23 @@ NSString* const NSXMLParserErrorDomain = @"NSXMLParserErrorDomain";
prefix: (NSString*)prefix prefix: (NSString*)prefix
href: (NSString*)href href: (NSString*)href
{ {
NSString *qName = elementName;
if ([prefix length] > 0)
{
qName = [NSString stringWithFormat: @"%@:%@", prefix, qName];
}
if (_shouldProcessNamespaces) if (_shouldProcessNamespaces)
{ {
[_delegate parser: _owner [_delegate parser: _owner
didEndElement: elementName didEndElement: elementName
namespaceURI: href namespaceURI: href
qualifiedName: prefix]; qualifiedName: qName];
} }
else else
{ {
[_delegate parser: _owner [_delegate parser: _owner
didEndElement: elementName didEndElement: qName
namespaceURI: nil namespaceURI: nil
qualifiedName: nil]; qualifiedName: nil];
} }
@ -613,7 +625,9 @@ typedef struct { @defs(NSXMLParser) } *xp;
withAttributes: (NSDictionary *)attributes withAttributes: (NSDictionary *)attributes
{ {
if (this->acceptHTML) if (this->acceptHTML)
{
tag = [tag lowercaseString]; // not case sensitive tag = [tag lowercaseString]; // not case sensitive
}
if (!flag) if (!flag)
{ {
if ([tag isEqualToString: @"?xml"]) if ([tag isEqualToString: @"?xml"])
@ -622,7 +636,9 @@ typedef struct { @defs(NSXMLParser) } *xp;
NSLog(@"parserDidStartDocument: "); NSLog(@"parserDidStartDocument: ");
#endif #endif
if ([_del respondsToSelector: @selector(parserDidStartDocument:)]) if ([_del respondsToSelector: @selector(parserDidStartDocument:)])
{
[_del parserDidStartDocument: self]; [_del parserDidStartDocument: self];
}
return; return;
} }
if ([tag hasPrefix: @"?"]) if ([tag hasPrefix: @"?"])
@ -656,6 +672,7 @@ NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
#endif #endif
return; return;
} }
[this->tagPath addObject: tag]; // push on stack [this->tagPath addObject: tag]; // push on stack
if ([_del respondsToSelector: if ([_del respondsToSelector:
@selector(parser:didStartElement:namespaceURI:qualifiedName:attributes:)]) @selector(parser:didStartElement:namespaceURI:qualifiedName:attributes:)])
@ -683,8 +700,10 @@ NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
[this->tagPath removeLastObject]; // pop from stack [this->tagPath removeLastObject]; // pop from stack
} }
if ([this->tagPath count] == 0) if ([this->tagPath count] == 0)
{
return; // ignore closing tag without matching open... return; // ignore closing tag without matching open...
} }
}
else if (![[this->tagPath lastObject] isEqualToString: tag]) else if (![[this->tagPath lastObject] isEqualToString: tag])
{ {
[self _parseError: [NSString stringWithFormat: [self _parseError: [NSString stringWithFormat:
@ -804,7 +823,6 @@ NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
c = cget(); // get first character c = cget(); // get first character
while (!this->abort) while (!this->abort)
{ {
// parse next element
#if EXTRA_DEBUG #if EXTRA_DEBUG
NSLog(@"_nextelement %02x %c", c, isprint(c)?c: ' '); NSLog(@"_nextelement %02x %c", c, isprint(c)?c: ' ');
#endif #endif
@ -813,39 +831,59 @@ NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
case '\r': case '\r':
this->column = 0; this->column = 0;
break; break;
case '\n': case '\n':
this->line++; this->line++;
this->column = 0; this->column = 0;
case EOF: case EOF:
case '<': case '<':
case '&': case '&':
{ {
// push out any characters that have been collected so far /* push out any characters that have been collected so far
*/
if (this->cp - vp > 1) if (this->cp - vp > 1)
{ {
// check for whitespace only - might set/reset a flag to indicate so /* check for whitespace only - might set/reset
if ([_del respondsToSelector: @selector(parser: foundCharacters: )]) * a flag to indicate so
[_del parser: self foundCharacters: UTF8STR(vp, this->cp - vp - 1)]; */
if ([_del respondsToSelector:
@selector(parser:foundCharacters:)])
{
[_del parser: self foundCharacters:
UTF8STR(vp, this->cp - vp - 1)];
}
vp = this->cp; vp = this->cp;
} }
} }
} }
switch(c) switch(c)
{ {
default: default:
c = cget(); // just collect until we push out (again) c = cget(); // just collect until we push out (again)
continue; continue;
case EOF: // end of file
case EOF:
{ {
if ([this->tagPath count] != 0) if ([this->tagPath count] != 0)
{ {
if (!this->acceptHTML) if (!this->acceptHTML)
return [self _parseError: @"unexpected end of file"]; // strict XML nesting error {
/* strict XML nesting error
*/
return [self _parseError: @"unexpected end of file"];
}
while ([this->tagPath count] > 0) while ([this->tagPath count] > 0)
{ {
// lazily close all open tags // lazily close all open tags
if ([_del respondsToSelector: @selector(parser: didEndElement: namespaceURI: qualifiedName: )]) if ([_del respondsToSelector:
[_del parser: self didEndElement: [this->tagPath lastObject] namespaceURI: nil qualifiedName: nil]; @selector(parser:didEndElement:namespaceURI:qualifiedName:)])
{
[_del parser: self
didEndElement: [this->tagPath lastObject]
namespaceURI: nil qualifiedName: nil];
}
[this->tagPath removeLastObject]; // pop from stack [this->tagPath removeLastObject]; // pop from stack
} }
} }
@ -854,36 +892,50 @@ NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
#endif #endif
if ([_del respondsToSelector: @selector(parserDidEndDocument: )]) if ([_del respondsToSelector: @selector(parserDidEndDocument: )])
{
[_del parserDidEndDocument: self]; [_del parserDidEndDocument: self];
}
return YES; return YES;
} }
case '&': case '&':
{ {
// escape entity begins
NSString *entity = [self _entity]; NSString *entity = [self _entity];
if (!entity) if (!entity)
{
return [self _parseError: @"empty entity name"]; return [self _parseError: @"empty entity name"];
}
if ([_del respondsToSelector: @selector(parser:foundCharacters:)]) if ([_del respondsToSelector: @selector(parser:foundCharacters:)])
{
[_del parser: self foundCharacters: entity]; [_del parser: self foundCharacters: entity];
}
vp = this->cp; // next value sequence starts here vp = this->cp; // next value sequence starts here
c = cget(); // first character behind ; c = cget(); // first character behind ;
continue; continue;
} }
case '<': case '<':
{ {
// tag begins
NSString *tag; NSString *tag;
NSMutableDictionary *parameters; NSMutableDictionary *parameters;
NSString *arg; NSString *arg;
const unsigned char *tp = this->cp; // tag pointer const unsigned char *tp = this->cp; // tag pointer
if (this->cp < this->cend-3 && strncmp((char *)this->cp, "!--", 3) == 0)
if (this->cp < this->cend-3
&& strncmp((char *)this->cp, "!--", 3) == 0)
{ {
// start of comment skip all characters until "-->" /* start of comment skip all characters until "-->"
*/
this->cp += 3; this->cp += 3;
while (this->cp < this->cend-3 && strncmp((char *)this->cp, "-->", 3) != 0) while (this->cp < this->cend-3
&& strncmp((char *)this->cp, "-->", 3) != 0)
{
this->cp++; // search this->cp++; // search
// if _del responds to parser: foundComment: }
// convert to string (tp+4 ... cp) /* if _del responds to parser: foundComment:
* convert to string (tp+4 ... cp)
*/
this->cp+=3; // might go beyond cend but does not care this->cp+=3; // might go beyond cend but does not care
vp = this->cp; // value might continue vp = this->cp; // value might continue
c = cget(); // get first character behind comment c = cget(); // get first character behind comment
@ -891,61 +943,93 @@ NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
} }
c = cget(); // get first character of tag c = cget(); // get first character of tag
if (c == '/') if (c == '/')
{
c = cget(); // closing tag </tag begins c = cget(); // closing tag </tag begins
}
else if (c == '?') else if (c == '?')
{ {
// special tag <?tag begins /* special tag <?tag begins
*/
c = cget(); // include in tag string c = cget(); // include in tag string
// NSLog(@"special tag <? found"); // NSLog(@"special tag <? found");
// FIXME: this->should process this tag in a special way so that e.g. <?php any PHP script ?> is read as a single tag! /* FIXME: this->should process this tag in a special
// to do this properly, we need a notion of comments and quoted string constants... * way so that e.g. <?php any PHP script ?> is read
* as a single tag!
* to do this properly, we need a notion of comments
* and quoted string constants...
*/
} }
while (!isspace(c) && c != '>' && (c != '/') && (c != '?')) while (c != EOF && !isspace(c)
&& c != '>' && c != '/' && c != '?')
{
c = cget(); // scan tag until we find a delimiting character c = cget(); // scan tag until we find a delimiting character
}
if (*tp == '/') if (*tp == '/')
tag = UTF8STR(tp + 1, this->cp - tp - 2); // don't include / and delimiting character {
tag = UTF8STR(tp + 1, this->cp - tp - 2);
}
else else
tag = UTF8STR(tp, this->cp - tp - 1); // don't include delimiting character {
tag = UTF8STR(tp, this->cp - tp - 1);
}
#if EXTRA_DEBUG #if EXTRA_DEBUG
NSLog(@"tag=%@ - %02x %c", tag, c, isprint(c)?c: ' '); NSLog(@"tag=%@ - %02x %c", tag, c, isprint(c)?c: ' ');
#endif #endif
parameters = [NSMutableDictionary dictionaryWithCapacity: 5]; parameters = [NSMutableDictionary dictionaryWithCapacity: 5];
while (c != EOF) while (c != EOF)
{ {
// collect arguments
if (c == '/' && *tp != '/') if (c == '/' && *tp != '/')
{ {
// appears to be a /> // appears to be a />
c = cget(); c = cget();
if (c != '>') if (c != '>')
{
return [self _parseError: @"<tag/ is missing the >"]; return [self _parseError: @"<tag/ is missing the >"];
[self _processTag: tag isEnd: NO withAttributes: parameters]; // opening tag
[self _processTag: tag isEnd: YES withAttributes: nil]; // closing tag
break; // done
} }
[self _processTag: tag
isEnd: NO
withAttributes: parameters];
[self _processTag: tag isEnd: YES withAttributes: nil];
break;
}
if (c == '?' && *tp == '?') if (c == '?' && *tp == '?')
{ {
// appears to be a ?> // appears to be a ?>
c = cget(); c = cget();
if (c != '>') if (c != '>')
return [self _parseError: @"<?tag ...? is missing the >"]; {
return [self _parseError:
@"<?tag ...? is missing the >"];
}
// process // process
[self _processTag: tag isEnd: NO withAttributes: parameters]; // single <?tag ...?> [self _processTag: tag
isEnd: NO
withAttributes: parameters]; // single <?tag ...?>
break; // done break; // done
} }
while (isspace(c)) // this->should also allow for line break and tab // this should also allow for line break and tab
while (isspace(c))
{
c = cget(); c = cget();
}
if (c == '>') if (c == '>')
{ {
[self _processTag: tag isEnd: (*tp=='/') withAttributes: parameters]; // handle tag [self _processTag: tag
isEnd: (*tp == '/')
withAttributes: parameters];
break; break;
} }
arg=[self _qarg]; // get next argument (eats up to /, ?, >, =, space) /* get next argument (eats up to /, ?, >, =, space)
*/
arg = [self _qarg];
#if EXTRA_DEBUG #if EXTRA_DEBUG
NSLog(@"arg=%@", arg); NSLog(@"arg=%@", arg);
#endif #endif
if (!this->acceptHTML && [arg length] == 0) if (!this->acceptHTML && [arg length] == 0)
{
return [self _parseError: @"empty attribute name"]; return [self _parseError: @"empty attribute name"];
}
c = cget(); // get delimiting character c = cget(); // get delimiting character
if (c == '=') if (c == '=')
{ {
@ -955,8 +1039,10 @@ NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
c = cget(); // get character behind qarg value c = cget(); // get character behind qarg value
} }
else // implicit else // implicit
{
[parameters setObject: @"" forKey: arg]; [parameters setObject: @"" forKey: arg];
} }
}
vp = this->cp; // prepare for next value vp = this->cp; // prepare for next value
c = cget(); // skip > and fetch next character c = cget(); // skip > and fetch next character
} }