Some xml namespace handling fixes

git-svn-id: svn+ssh://svn.gna.org/svn/gnustep/libs/base/trunk@26002 72102866-910b-0410-8b05-ffd578937521
This commit is contained in:
Richard Frith-MacDonald 2008-01-26 08:34:58 +00:00
parent a6bfd106e1
commit 6425664347
4 changed files with 270 additions and 160 deletions

View file

@ -1,3 +1,11 @@
2008-01-26 Richard Frith-Macdonald <rfm@gnu.org>
* Source/Additions/NSXML.m: Fixup SAX interface to pass namespace
information separately (retain old method for binary backward
compatibility).
* Source/NSXMLParser.m: Implement handing of namespaces with libxml2
based parser.
2008-01-25 Richard Frith-Macdonald <rfm@gnu.org> 2008-01-25 Richard Frith-Macdonald <rfm@gnu.org>
* Source/Additions/NSXML.m: In SAX count namespace declarations * Source/Additions/NSXML.m: In SAX count namespace declarations

View file

@ -331,7 +331,8 @@ extern "C" {
- (void) startElement: (NSString*)elementName - (void) startElement: (NSString*)elementName
prefix: (NSString*)prefix prefix: (NSString*)prefix
href: (NSString*)href href: (NSString*)href
attributes: (NSMutableDictionary*)elementAttributes; attributes: (NSMutableDictionary*)elementAttributes
namespaces: (NSMutableDictionary*)elementNamespaces;
/** <override-dummy /> */ /** <override-dummy /> */
- (void) unparsedEntityDecl: (NSString*)name - (void) unparsedEntityDecl: (NSString*)name
public: (NSString*)publicId public: (NSString*)publicId

View file

@ -2794,23 +2794,24 @@ startElementNsFunction(void *ctx, const unsigned char *name,
int nb_attributes, int nb_defaulted, int nb_attributes, int nb_defaulted,
const unsigned char **atts) const unsigned char **atts)
{ {
NSMutableDictionary *dict; NSMutableDictionary *adict = nil;
NSMutableDictionary *ndict = nil;
NSString *elem; NSString *elem;
NSCAssert(ctx,@"No Context"); NSCAssert(ctx,@"No Context");
elem = UTF8Str(name); elem = UTF8Str(name);
dict = [NSMutableDictionary dictionary];
if (atts != NULL) if (atts != NULL)
{ {
int i; int i;
int j; int j;
adict = [NSMutableDictionary dictionaryWithCapacity: nb_attributes];
for (i = j = 0; i < nb_attributes; i++, j += 5) for (i = j = 0; i < nb_attributes; i++, j += 5)
{ {
NSString *key = UTF8Str(atts[j]); NSString *key = UTF8Str(atts[j]);
NSString *obj = UTF8StrLen(atts[j+3], atts[j+4]-atts[j+3]); NSString *obj = UTF8StrLen(atts[j+3], atts[j+4]-atts[j+3]);
[dict setObject: obj forKey: key]; [adict setObject: obj forKey: key];
} }
} }
if (nb_namespaces > 0) if (nb_namespaces > 0)
@ -2818,6 +2819,7 @@ startElementNsFunction(void *ctx, const unsigned char *name,
int i; int i;
int pos = 0; int pos = 0;
ndict = [NSMutableDictionary dictionaryWithCapacity: nb_namespaces];
for (i = 0; i < nb_namespaces; i++) for (i = 0; i < nb_namespaces; i++)
{ {
NSString *key; NSString *key;
@ -2842,13 +2844,14 @@ startElementNsFunction(void *ctx, const unsigned char *name,
obj = UTF8Str(namespaces[pos]); obj = UTF8Str(namespaces[pos]);
} }
pos++; pos++;
[dict setObject: obj forKey: key]; [ndict setObject: obj forKey: key];
} }
} }
[HANDLER startElement: elem [HANDLER startElement: elem
prefix: UTF8Str(prefix) prefix: UTF8Str(prefix)
href: UTF8Str(href) href: UTF8Str(href)
attributes: dict]; attributes: adict
namespaces: ndict];
} }
static void static void
@ -3055,6 +3058,18 @@ fatalErrorFunction(void *ctx, const unsigned char *msg, ...)
[self startElement: elementName attributes: elementAttributes]; [self startElement: elementName attributes: elementAttributes];
} }
- (void) startElement: (NSString*)elementName
prefix: (NSString*)prefix
href: (NSString*)href
attributes: (NSMutableDictionary*)elementAttributes
namespaces: (NSMutableDictionary*)elementNamespaces
{
[self startElement: elementName
prefix: prefix
href: href
attributes: elementAttributes];
}
/** /**
* Called when a closing tag has been processed. * Called when a closing tag has been processed.
*/ */

View file

@ -75,39 +75,51 @@ NSString* const NSXMLParserErrorDomain = @"NSXMLParserErrorDomain";
href: (NSString*)href href: (NSString*)href
attributes: (NSMutableDictionary*)elementAttributes attributes: (NSMutableDictionary*)elementAttributes
{ {
NSString *qName = elementName;
if ([prefix length] > 0)
{
qName = [NSString stringWithFormat: @"%@:%@", prefix, qName];
}
if (_shouldProcessNamespaces) if (_shouldProcessNamespaces)
{ {
[_delegate parser: _owner [_delegate parser: _owner
didStartElement: elementName didStartElement: elementName
namespaceURI: href namespaceURI: href
qualifiedName: prefix qualifiedName: qName
attributes: elementAttributes]; attributes: elementAttributes];
} }
else else
{ {
[_delegate parser: _owner [_delegate parser: _owner
didStartElement: elementName didStartElement: qName
namespaceURI: nil namespaceURI: nil
qualifiedName: nil qualifiedName: nil
attributes: elementAttributes]; attributes: elementAttributes];
} }
} }
- (void) endElement: (NSString*) elementName - (void) endElement: (NSString*)elementName
prefix: (NSString*)prefix prefix: (NSString*)prefix
href: (NSString*)href href: (NSString*)href
{ {
NSString *qName = elementName;
if ([prefix length] > 0)
{
qName = [NSString stringWithFormat: @"%@:%@", prefix, qName];
}
if (_shouldProcessNamespaces) if (_shouldProcessNamespaces)
{ {
[_delegate parser: _owner [_delegate parser: _owner
didEndElement: elementName didEndElement: elementName
namespaceURI: href namespaceURI: href
qualifiedName: prefix]; qualifiedName: qName];
} }
else else
{ {
[_delegate parser: _owner [_delegate parser: _owner
didEndElement: elementName didEndElement: qName
namespaceURI: nil namespaceURI: nil
qualifiedName: nil]; qualifiedName: nil];
} }
@ -613,7 +625,9 @@ typedef struct { @defs(NSXMLParser) } *xp;
withAttributes: (NSDictionary *)attributes withAttributes: (NSDictionary *)attributes
{ {
if (this->acceptHTML) if (this->acceptHTML)
tag = [tag lowercaseString]; // not case sensitive {
tag = [tag lowercaseString]; // not case sensitive
}
if (!flag) if (!flag)
{ {
if ([tag isEqualToString: @"?xml"]) if ([tag isEqualToString: @"?xml"])
@ -622,7 +636,9 @@ typedef struct { @defs(NSXMLParser) } *xp;
NSLog(@"parserDidStartDocument: "); NSLog(@"parserDidStartDocument: ");
#endif #endif
if ([_del respondsToSelector: @selector(parserDidStartDocument:)]) if ([_del respondsToSelector: @selector(parserDidStartDocument:)])
[_del parserDidStartDocument: self]; {
[_del parserDidStartDocument: self];
}
return; return;
} }
if ([tag hasPrefix: @"?"]) if ([tag hasPrefix: @"?"])
@ -649,13 +665,14 @@ NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
} }
if ([tag isEqualToString: @"!CDATA"]) if ([tag isEqualToString: @"!CDATA"])
{ {
// pass through as NSData // pass through as NSData
// parser: foundCDATA: // parser: foundCDATA:
#if EXTRA_DEBUG #if EXTRA_DEBUG
NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes); NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
#endif #endif
return; return;
} }
[this->tagPath addObject: tag]; // push on stack [this->tagPath addObject: tag]; // push on stack
if ([_del respondsToSelector: if ([_del respondsToSelector:
@selector(parser:didStartElement:namespaceURI:qualifiedName:attributes:)]) @selector(parser:didStartElement:namespaceURI:qualifiedName:attributes:)])
@ -667,7 +684,7 @@ NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
} }
else else
{ {
// closing tag // closing tag
if (this->acceptHTML) if (this->acceptHTML)
{ {
// lazily close any missing tags on stack // lazily close any missing tags on stack
@ -683,7 +700,9 @@ NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
[this->tagPath removeLastObject]; // pop from stack [this->tagPath removeLastObject]; // pop from stack
} }
if ([this->tagPath count] == 0) if ([this->tagPath count] == 0)
return; // ignore closing tag without matching open... {
return; // ignore closing tag without matching open...
}
} }
else if (![[this->tagPath lastObject] isEqualToString: tag]) else if (![[this->tagPath lastObject] isEqualToString: tag])
{ {
@ -804,163 +823,230 @@ NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
c = cget(); // get first character c = cget(); // get first character
while (!this->abort) while (!this->abort)
{ {
// parse next element
#if EXTRA_DEBUG #if EXTRA_DEBUG
NSLog(@"_nextelement %02x %c", c, isprint(c)?c: ' '); NSLog(@"_nextelement %02x %c", c, isprint(c)?c: ' ');
#endif #endif
switch(c) switch(c)
{
case '\r':
this->column = 0;
break;
case '\n':
this->line++;
this->column = 0;
case EOF:
case '<':
case '&':
{ {
// push out any characters that have been collected so far case '\r':
if (this->cp - vp > 1) this->column = 0;
{ break;
// check for whitespace only - might set/reset a flag to indicate so
if ([_del respondsToSelector: @selector(parser: foundCharacters: )]) case '\n':
[_del parser: self foundCharacters: UTF8STR(vp, this->cp - vp - 1)]; this->line++;
vp = this->cp; this->column = 0;
}
} case EOF:
} case '<':
switch(c) case '&':
{
default:
c = cget(); // just collect until we push out (again)
continue;
case EOF: // end of file
{
if ([this->tagPath count] != 0)
{ {
if (!this->acceptHTML) /* push out any characters that have been collected so far
return [self _parseError: @"unexpected end of file"]; // strict XML nesting error */
while ([this->tagPath count] > 0) if (this->cp - vp > 1)
{ {
// lazily close all open tags /* check for whitespace only - might set/reset
if ([_del respondsToSelector: @selector(parser: didEndElement: namespaceURI: qualifiedName: )]) * a flag to indicate so
[_del parser: self didEndElement: [this->tagPath lastObject] namespaceURI: nil qualifiedName: nil]; */
[this->tagPath removeLastObject]; // pop from stack if ([_del respondsToSelector:
} @selector(parser:foundCharacters:)])
{
[_del parser: self foundCharacters:
UTF8STR(vp, this->cp - vp - 1)];
}
vp = this->cp;
}
} }
}
switch(c)
{
default:
c = cget(); // just collect until we push out (again)
continue;
case EOF:
{
if ([this->tagPath count] != 0)
{
if (!this->acceptHTML)
{
/* strict XML nesting error
*/
return [self _parseError: @"unexpected end of file"];
}
while ([this->tagPath count] > 0)
{
// lazily close all open tags
if ([_del respondsToSelector:
@selector(parser:didEndElement:namespaceURI:qualifiedName:)])
{
[_del parser: self
didEndElement: [this->tagPath lastObject]
namespaceURI: nil qualifiedName: nil];
}
[this->tagPath removeLastObject]; // pop from stack
}
}
#if EXTRA_DEBUG #if EXTRA_DEBUG
NSLog(@"parserDidEndDocument: "); NSLog(@"parserDidEndDocument: ");
#endif #endif
if ([_del respondsToSelector: @selector(parserDidEndDocument: )]) if ([_del respondsToSelector: @selector(parserDidEndDocument: )])
[_del parserDidEndDocument: self]; {
return YES; [_del parserDidEndDocument: self];
} }
case '&': return YES;
{
// escape entity begins
NSString *entity=[self _entity];
if (!entity)
return [self _parseError: @"empty entity name"];
if ([_del respondsToSelector: @selector(parser: foundCharacters: )])
[_del parser: self foundCharacters: entity];
vp = this->cp; // next value sequence starts here
c = cget(); // first character behind ;
continue;
}
case '<':
{
// tag begins
NSString *tag;
NSMutableDictionary *parameters;
NSString *arg;
const unsigned char *tp = this->cp; // tag pointer
if (this->cp < this->cend-3 && strncmp((char *)this->cp, "!--", 3) == 0)
{
// start of comment skip all characters until "-->"
this->cp+=3;
while (this->cp < this->cend-3 && strncmp((char *)this->cp, "-->", 3) != 0)
this->cp++; // search
// if _del responds to parser: foundComment:
// convert to string (tp+4 ... cp)
this->cp+=3; // might go beyond cend but does not care
vp = this->cp; // value might continue
c = cget(); // get first character behind comment
continue;
} }
c = cget(); // get first character of tag
if (c == '/') case '&':
c = cget(); // closing tag </tag begins
else if (c == '?')
{ {
// special tag <?tag begins NSString *entity = [self _entity];
c = cget(); // include in tag string
// NSLog(@"special tag <? found"); if (!entity)
// FIXME: this->should process this tag in a special way so that e.g. <?php any PHP script ?> is read as a single tag! {
// to do this properly, we need a notion of comments and quoted string constants... return [self _parseError: @"empty entity name"];
}
if ([_del respondsToSelector: @selector(parser:foundCharacters:)])
{
[_del parser: self foundCharacters: entity];
}
vp = this->cp; // next value sequence starts here
c = cget(); // first character behind ;
continue;
} }
while (!isspace(c) && c != '>' && (c != '/') && (c != '?'))
c = cget(); // scan tag until we find a delimiting character case '<':
if (*tp == '/') {
tag = UTF8STR(tp + 1, this->cp - tp - 2); // don't include / and delimiting character NSString *tag;
else NSMutableDictionary *parameters;
tag = UTF8STR(tp, this->cp - tp - 1); // don't include delimiting character NSString *arg;
const unsigned char *tp = this->cp; // tag pointer
if (this->cp < this->cend-3
&& strncmp((char *)this->cp, "!--", 3) == 0)
{
/* start of comment skip all characters until "-->"
*/
this->cp += 3;
while (this->cp < this->cend-3
&& strncmp((char *)this->cp, "-->", 3) != 0)
{
this->cp++; // search
}
/* if _del responds to parser: foundComment:
* convert to string (tp+4 ... cp)
*/
this->cp+=3; // might go beyond cend but does not care
vp = this->cp; // value might continue
c = cget(); // get first character behind comment
continue;
}
c = cget(); // get first character of tag
if (c == '/')
{
c = cget(); // closing tag </tag begins
}
else if (c == '?')
{
/* special tag <?tag begins
*/
c = cget(); // include in tag string
// NSLog(@"special tag <? found");
/* FIXME: this->should process this tag in a special
* way so that e.g. <?php any PHP script ?> is read
* as a single tag!
* to do this properly, we need a notion of comments
* and quoted string constants...
*/
}
while (c != EOF && !isspace(c)
&& c != '>' && c != '/' && c != '?')
{
c = cget(); // scan tag until we find a delimiting character
}
if (*tp == '/')
{
tag = UTF8STR(tp + 1, this->cp - tp - 2);
}
else
{
tag = UTF8STR(tp, this->cp - tp - 1);
}
#if EXTRA_DEBUG #if EXTRA_DEBUG
NSLog(@"tag=%@ - %02x %c", tag, c, isprint(c)?c: ' '); NSLog(@"tag=%@ - %02x %c", tag, c, isprint(c)?c: ' ');
#endif #endif
parameters = [NSMutableDictionary dictionaryWithCapacity: 5]; parameters = [NSMutableDictionary dictionaryWithCapacity: 5];
while (c != EOF) while (c != EOF)
{ {
// collect arguments if (c == '/' && *tp != '/')
if (c == '/' && *tp != '/') {
{ // appears to be a />
// appears to be a /> c = cget();
c = cget(); if (c != '>')
if (c != '>') {
return [self _parseError: @"<tag/ is missing the >"]; return [self _parseError: @"<tag/ is missing the >"];
[self _processTag: tag isEnd: NO withAttributes: parameters]; // opening tag }
[self _processTag: tag isEnd: YES withAttributes: nil]; // closing tag [self _processTag: tag
break; // done isEnd: NO
} withAttributes: parameters];
if (c == '?' && *tp == '?') [self _processTag: tag isEnd: YES withAttributes: nil];
{ break;
// appears to be a ?> }
c = cget();
if (c != '>') if (c == '?' && *tp == '?')
return [self _parseError: @"<?tag ...? is missing the >"]; {
// process // appears to be a ?>
[self _processTag: tag isEnd: NO withAttributes: parameters]; // single <?tag ...?> c = cget();
break; // done if (c != '>')
} {
while (isspace(c)) // this->should also allow for line break and tab return [self _parseError:
c = cget(); @"<?tag ...? is missing the >"];
if (c == '>') }
{ // process
[self _processTag: tag isEnd: (*tp=='/') withAttributes: parameters]; // handle tag [self _processTag: tag
break; isEnd: NO
} withAttributes: parameters]; // single <?tag ...?>
arg=[self _qarg]; // get next argument (eats up to /, ?, >, =, space) break; // done
}
// this should also allow for line break and tab
while (isspace(c))
{
c = cget();
}
if (c == '>')
{
[self _processTag: tag
isEnd: (*tp == '/')
withAttributes: parameters];
break;
}
/* get next argument (eats up to /, ?, >, =, space)
*/
arg = [self _qarg];
#if EXTRA_DEBUG #if EXTRA_DEBUG
NSLog(@"arg=%@", arg); NSLog(@"arg=%@", arg);
#endif #endif
if (!this->acceptHTML && [arg length] == 0) if (!this->acceptHTML && [arg length] == 0)
return [self _parseError: @"empty attribute name"]; {
c = cget(); // get delimiting character return [self _parseError: @"empty attribute name"];
if (c == '=') }
{ c = cget(); // get delimiting character
// explicit assignment if (c == '=')
c = cget(); // skip = {
[parameters setObject: [self _qarg] forKey: arg]; // explicit assignment
c = cget(); // get character behind qarg value c = cget(); // skip =
} [parameters setObject: [self _qarg] forKey: arg];
else // implicit c = cget(); // get character behind qarg value
[parameters setObject: @"" forKey: arg]; }
else // implicit
{
[parameters setObject: @"" forKey: arg];
}
}
vp = this->cp; // prepare for next value
c = cget(); // skip > and fetch next character
} }
vp = this->cp; // prepare for next value
c = cget(); // skip > and fetch next character
} }
}
} }
return [self _parseError: @"this->aborted"]; // this->aborted return [self _parseError: @"this->aborted"]; // this->aborted
} }