Some xml namespace handling fixes

git-svn-id: svn+ssh://svn.gna.org/svn/gnustep/libs/base/trunk@26002 72102866-910b-0410-8b05-ffd578937521
This commit is contained in:
Richard Frith-MacDonald 2008-01-26 08:34:58 +00:00
parent a6bfd106e1
commit 6425664347
4 changed files with 270 additions and 160 deletions

View file

@ -1,3 +1,11 @@
2008-01-26 Richard Frith-Macdonald <rfm@gnu.org>
* Source/Additions/NSXML.m: Fixup SAX interface to pass namespace
information separately (retain old method for binary backward
compatibility).
* Source/NSXMLParser.m: Implement handing of namespaces with libxml2
based parser.
2008-01-25 Richard Frith-Macdonald <rfm@gnu.org> 2008-01-25 Richard Frith-Macdonald <rfm@gnu.org>
* Source/Additions/NSXML.m: In SAX count namespace declarations * Source/Additions/NSXML.m: In SAX count namespace declarations

View file

@ -331,7 +331,8 @@ extern "C" {
- (void) startElement: (NSString*)elementName - (void) startElement: (NSString*)elementName
prefix: (NSString*)prefix prefix: (NSString*)prefix
href: (NSString*)href href: (NSString*)href
attributes: (NSMutableDictionary*)elementAttributes; attributes: (NSMutableDictionary*)elementAttributes
namespaces: (NSMutableDictionary*)elementNamespaces;
/** <override-dummy /> */ /** <override-dummy /> */
- (void) unparsedEntityDecl: (NSString*)name - (void) unparsedEntityDecl: (NSString*)name
public: (NSString*)publicId public: (NSString*)publicId

View file

@ -2794,23 +2794,24 @@ startElementNsFunction(void *ctx, const unsigned char *name,
int nb_attributes, int nb_defaulted, int nb_attributes, int nb_defaulted,
const unsigned char **atts) const unsigned char **atts)
{ {
NSMutableDictionary *dict; NSMutableDictionary *adict = nil;
NSMutableDictionary *ndict = nil;
NSString *elem; NSString *elem;
NSCAssert(ctx,@"No Context"); NSCAssert(ctx,@"No Context");
elem = UTF8Str(name); elem = UTF8Str(name);
dict = [NSMutableDictionary dictionary];
if (atts != NULL) if (atts != NULL)
{ {
int i; int i;
int j; int j;
adict = [NSMutableDictionary dictionaryWithCapacity: nb_attributes];
for (i = j = 0; i < nb_attributes; i++, j += 5) for (i = j = 0; i < nb_attributes; i++, j += 5)
{ {
NSString *key = UTF8Str(atts[j]); NSString *key = UTF8Str(atts[j]);
NSString *obj = UTF8StrLen(atts[j+3], atts[j+4]-atts[j+3]); NSString *obj = UTF8StrLen(atts[j+3], atts[j+4]-atts[j+3]);
[dict setObject: obj forKey: key]; [adict setObject: obj forKey: key];
} }
} }
if (nb_namespaces > 0) if (nb_namespaces > 0)
@ -2818,6 +2819,7 @@ startElementNsFunction(void *ctx, const unsigned char *name,
int i; int i;
int pos = 0; int pos = 0;
ndict = [NSMutableDictionary dictionaryWithCapacity: nb_namespaces];
for (i = 0; i < nb_namespaces; i++) for (i = 0; i < nb_namespaces; i++)
{ {
NSString *key; NSString *key;
@ -2842,13 +2844,14 @@ startElementNsFunction(void *ctx, const unsigned char *name,
obj = UTF8Str(namespaces[pos]); obj = UTF8Str(namespaces[pos]);
} }
pos++; pos++;
[dict setObject: obj forKey: key]; [ndict setObject: obj forKey: key];
} }
} }
[HANDLER startElement: elem [HANDLER startElement: elem
prefix: UTF8Str(prefix) prefix: UTF8Str(prefix)
href: UTF8Str(href) href: UTF8Str(href)
attributes: dict]; attributes: adict
namespaces: ndict];
} }
static void static void
@ -3055,6 +3058,18 @@ fatalErrorFunction(void *ctx, const unsigned char *msg, ...)
[self startElement: elementName attributes: elementAttributes]; [self startElement: elementName attributes: elementAttributes];
} }
- (void) startElement: (NSString*)elementName
prefix: (NSString*)prefix
href: (NSString*)href
attributes: (NSMutableDictionary*)elementAttributes
namespaces: (NSMutableDictionary*)elementNamespaces
{
[self startElement: elementName
prefix: prefix
href: href
attributes: elementAttributes];
}
/** /**
* Called when a closing tag has been processed. * Called when a closing tag has been processed.
*/ */

View file

@ -75,39 +75,51 @@ NSString* const NSXMLParserErrorDomain = @"NSXMLParserErrorDomain";
href: (NSString*)href href: (NSString*)href
attributes: (NSMutableDictionary*)elementAttributes attributes: (NSMutableDictionary*)elementAttributes
{ {
NSString *qName = elementName;
if ([prefix length] > 0)
{
qName = [NSString stringWithFormat: @"%@:%@", prefix, qName];
}
if (_shouldProcessNamespaces) if (_shouldProcessNamespaces)
{ {
[_delegate parser: _owner [_delegate parser: _owner
didStartElement: elementName didStartElement: elementName
namespaceURI: href namespaceURI: href
qualifiedName: prefix qualifiedName: qName
attributes: elementAttributes]; attributes: elementAttributes];
} }
else else
{ {
[_delegate parser: _owner [_delegate parser: _owner
didStartElement: elementName didStartElement: qName
namespaceURI: nil namespaceURI: nil
qualifiedName: nil qualifiedName: nil
attributes: elementAttributes]; attributes: elementAttributes];
} }
} }
- (void) endElement: (NSString*) elementName - (void) endElement: (NSString*)elementName
prefix: (NSString*)prefix prefix: (NSString*)prefix
href: (NSString*)href href: (NSString*)href
{ {
NSString *qName = elementName;
if ([prefix length] > 0)
{
qName = [NSString stringWithFormat: @"%@:%@", prefix, qName];
}
if (_shouldProcessNamespaces) if (_shouldProcessNamespaces)
{ {
[_delegate parser: _owner [_delegate parser: _owner
didEndElement: elementName didEndElement: elementName
namespaceURI: href namespaceURI: href
qualifiedName: prefix]; qualifiedName: qName];
} }
else else
{ {
[_delegate parser: _owner [_delegate parser: _owner
didEndElement: elementName didEndElement: qName
namespaceURI: nil namespaceURI: nil
qualifiedName: nil]; qualifiedName: nil];
} }
@ -613,7 +625,9 @@ typedef struct { @defs(NSXMLParser) } *xp;
withAttributes: (NSDictionary *)attributes withAttributes: (NSDictionary *)attributes
{ {
if (this->acceptHTML) if (this->acceptHTML)
tag = [tag lowercaseString]; // not case sensitive {
tag = [tag lowercaseString]; // not case sensitive
}
if (!flag) if (!flag)
{ {
if ([tag isEqualToString: @"?xml"]) if ([tag isEqualToString: @"?xml"])
@ -622,7 +636,9 @@ typedef struct { @defs(NSXMLParser) } *xp;
NSLog(@"parserDidStartDocument: "); NSLog(@"parserDidStartDocument: ");
#endif #endif
if ([_del respondsToSelector: @selector(parserDidStartDocument:)]) if ([_del respondsToSelector: @selector(parserDidStartDocument:)])
[_del parserDidStartDocument: self]; {
[_del parserDidStartDocument: self];
}
return; return;
} }
if ([tag hasPrefix: @"?"]) if ([tag hasPrefix: @"?"])
@ -649,13 +665,14 @@ NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
} }
if ([tag isEqualToString: @"!CDATA"]) if ([tag isEqualToString: @"!CDATA"])
{ {
// pass through as NSData // pass through as NSData
// parser: foundCDATA: // parser: foundCDATA:
#if EXTRA_DEBUG #if EXTRA_DEBUG
NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes); NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
#endif #endif
return; return;
} }
[this->tagPath addObject: tag]; // push on stack [this->tagPath addObject: tag]; // push on stack
if ([_del respondsToSelector: if ([_del respondsToSelector:
@selector(parser:didStartElement:namespaceURI:qualifiedName:attributes:)]) @selector(parser:didStartElement:namespaceURI:qualifiedName:attributes:)])
@ -667,7 +684,7 @@ NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
} }
else else
{ {
// closing tag // closing tag
if (this->acceptHTML) if (this->acceptHTML)
{ {
// lazily close any missing tags on stack // lazily close any missing tags on stack
@ -683,7 +700,9 @@ NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
[this->tagPath removeLastObject]; // pop from stack [this->tagPath removeLastObject]; // pop from stack
} }
if ([this->tagPath count] == 0) if ([this->tagPath count] == 0)
return; // ignore closing tag without matching open... {
return; // ignore closing tag without matching open...
}
} }
else if (![[this->tagPath lastObject] isEqualToString: tag]) else if (![[this->tagPath lastObject] isEqualToString: tag])
{ {
@ -804,163 +823,230 @@ NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
c = cget(); // get first character c = cget(); // get first character
while (!this->abort) while (!this->abort)
{ {
// parse next element
#if EXTRA_DEBUG #if EXTRA_DEBUG
NSLog(@"_nextelement %02x %c", c, isprint(c)?c: ' '); NSLog(@"_nextelement %02x %c", c, isprint(c)?c: ' ');
#endif #endif
switch(c) switch(c)
{
case '\r':
this->column = 0;
break;
case '\n':
this->line++;
this->column = 0;
case EOF:
case '<':
case '&':
{ {
// push out any characters that have been collected so far case '\r':
if (this->cp - vp > 1) this->column = 0;
{ break;
// check for whitespace only - might set/reset a flag to indicate so
if ([_del respondsToSelector: @selector(parser: foundCharacters: )]) case '\n':
[_del parser: self foundCharacters: UTF8STR(vp, this->cp - vp - 1)]; this->line++;
vp = this->cp; this->column = 0;
}
} case EOF:
} case '<':
switch(c) case '&':
{
default:
c = cget(); // just collect until we push out (again)
continue;
case EOF: // end of file
{
if ([this->tagPath count] != 0)
{ {
if (!this->acceptHTML) /* push out any characters that have been collected so far
return [self _parseError: @"unexpected end of file"]; // strict XML nesting error */
while ([this->tagPath count] > 0) if (this->cp - vp > 1)
{ {
// lazily close all open tags /* check for whitespace only - might set/reset
if ([_del respondsToSelector: @selector(parser: didEndElement: namespaceURI: qualifiedName: )]) * a flag to indicate so
[_del parser: self didEndElement: [this->tagPath lastObject] namespaceURI: nil qualifiedName: nil]; */
[this->tagPath removeLastObject]; // pop from stack if ([_del respondsToSelector:
} @selector(parser:foundCharacters:)])
{
[_del parser: self foundCharacters:
UTF8STR(vp, this->cp - vp - 1)];
}
vp = this->cp;
}
} }
#if EXTRA_DEBUG
NSLog(@"parserDidEndDocument: ");
#endif
if ([_del respondsToSelector: @selector(parserDidEndDocument: )])
[_del parserDidEndDocument: self];
return YES;
} }
case '&':
switch(c)
{ {
// escape entity begins default:
NSString *entity=[self _entity]; c = cget(); // just collect until we push out (again)
if (!entity)
return [self _parseError: @"empty entity name"];
if ([_del respondsToSelector: @selector(parser: foundCharacters: )])
[_del parser: self foundCharacters: entity];
vp = this->cp; // next value sequence starts here
c = cget(); // first character behind ;
continue;
}
case '<':
{
// tag begins
NSString *tag;
NSMutableDictionary *parameters;
NSString *arg;
const unsigned char *tp = this->cp; // tag pointer
if (this->cp < this->cend-3 && strncmp((char *)this->cp, "!--", 3) == 0)
{
// start of comment skip all characters until "-->"
this->cp+=3;
while (this->cp < this->cend-3 && strncmp((char *)this->cp, "-->", 3) != 0)
this->cp++; // search
// if _del responds to parser: foundComment:
// convert to string (tp+4 ... cp)
this->cp+=3; // might go beyond cend but does not care
vp = this->cp; // value might continue
c = cget(); // get first character behind comment
continue; continue;
}
c = cget(); // get first character of tag case EOF:
if (c == '/')
c = cget(); // closing tag </tag begins
else if (c == '?')
{ {
// special tag <?tag begins if ([this->tagPath count] != 0)
c = cget(); // include in tag string {
// NSLog(@"special tag <? found"); if (!this->acceptHTML)
// FIXME: this->should process this tag in a special way so that e.g. <?php any PHP script ?> is read as a single tag! {
// to do this properly, we need a notion of comments and quoted string constants... /* strict XML nesting error
} */
while (!isspace(c) && c != '>' && (c != '/') && (c != '?')) return [self _parseError: @"unexpected end of file"];
c = cget(); // scan tag until we find a delimiting character }
if (*tp == '/') while ([this->tagPath count] > 0)
tag = UTF8STR(tp + 1, this->cp - tp - 2); // don't include / and delimiting character {
else // lazily close all open tags
tag = UTF8STR(tp, this->cp - tp - 1); // don't include delimiting character if ([_del respondsToSelector:
@selector(parser:didEndElement:namespaceURI:qualifiedName:)])
{
[_del parser: self
didEndElement: [this->tagPath lastObject]
namespaceURI: nil qualifiedName: nil];
}
[this->tagPath removeLastObject]; // pop from stack
}
}
#if EXTRA_DEBUG #if EXTRA_DEBUG
NSLog(@"tag=%@ - %02x %c", tag, c, isprint(c)?c: ' '); NSLog(@"parserDidEndDocument: ");
#endif #endif
parameters = [NSMutableDictionary dictionaryWithCapacity: 5];
while (c != EOF) if ([_del respondsToSelector: @selector(parserDidEndDocument: )])
{
[_del parserDidEndDocument: self];
}
return YES;
}
case '&':
{ {
// collect arguments NSString *entity = [self _entity];
if (c == '/' && *tp != '/')
{ if (!entity)
// appears to be a /> {
c = cget(); return [self _parseError: @"empty entity name"];
if (c != '>') }
return [self _parseError: @"<tag/ is missing the >"]; if ([_del respondsToSelector: @selector(parser:foundCharacters:)])
[self _processTag: tag isEnd: NO withAttributes: parameters]; // opening tag {
[self _processTag: tag isEnd: YES withAttributes: nil]; // closing tag [_del parser: self foundCharacters: entity];
break; // done }
} vp = this->cp; // next value sequence starts here
if (c == '?' && *tp == '?') c = cget(); // first character behind ;
{ continue;
// appears to be a ?> }
c = cget();
if (c != '>') case '<':
return [self _parseError: @"<?tag ...? is missing the >"]; {
// process NSString *tag;
[self _processTag: tag isEnd: NO withAttributes: parameters]; // single <?tag ...?> NSMutableDictionary *parameters;
break; // done NSString *arg;
} const unsigned char *tp = this->cp; // tag pointer
while (isspace(c)) // this->should also allow for line break and tab
c = cget(); if (this->cp < this->cend-3
if (c == '>') && strncmp((char *)this->cp, "!--", 3) == 0)
{ {
[self _processTag: tag isEnd: (*tp=='/') withAttributes: parameters]; // handle tag /* start of comment skip all characters until "-->"
break; */
} this->cp += 3;
arg=[self _qarg]; // get next argument (eats up to /, ?, >, =, space) while (this->cp < this->cend-3
#if EXTRA_DEBUG && strncmp((char *)this->cp, "-->", 3) != 0)
NSLog(@"arg=%@", arg); {
#endif this->cp++; // search
if (!this->acceptHTML && [arg length] == 0) }
return [self _parseError: @"empty attribute name"]; /* if _del responds to parser: foundComment:
c = cget(); // get delimiting character * convert to string (tp+4 ... cp)
if (c == '=') */
{ this->cp+=3; // might go beyond cend but does not care
// explicit assignment vp = this->cp; // value might continue
c = cget(); // skip = c = cget(); // get first character behind comment
[parameters setObject: [self _qarg] forKey: arg]; continue;
c = cget(); // get character behind qarg value }
} c = cget(); // get first character of tag
else // implicit if (c == '/')
[parameters setObject: @"" forKey: arg]; {
c = cget(); // closing tag </tag begins
}
else if (c == '?')
{
/* special tag <?tag begins
*/
c = cget(); // include in tag string
// NSLog(@"special tag <? found");
/* FIXME: this->should process this tag in a special
* way so that e.g. <?php any PHP script ?> is read
* as a single tag!
* to do this properly, we need a notion of comments
* and quoted string constants...
*/
}
while (c != EOF && !isspace(c)
&& c != '>' && c != '/' && c != '?')
{
c = cget(); // scan tag until we find a delimiting character
}
if (*tp == '/')
{
tag = UTF8STR(tp + 1, this->cp - tp - 2);
}
else
{
tag = UTF8STR(tp, this->cp - tp - 1);
}
#if EXTRA_DEBUG
NSLog(@"tag=%@ - %02x %c", tag, c, isprint(c)?c: ' ');
#endif
parameters = [NSMutableDictionary dictionaryWithCapacity: 5];
while (c != EOF)
{
if (c == '/' && *tp != '/')
{
// appears to be a />
c = cget();
if (c != '>')
{
return [self _parseError: @"<tag/ is missing the >"];
}
[self _processTag: tag
isEnd: NO
withAttributes: parameters];
[self _processTag: tag isEnd: YES withAttributes: nil];
break;
}
if (c == '?' && *tp == '?')
{
// appears to be a ?>
c = cget();
if (c != '>')
{
return [self _parseError:
@"<?tag ...? is missing the >"];
}
// process
[self _processTag: tag
isEnd: NO
withAttributes: parameters]; // single <?tag ...?>
break; // done
}
// this should also allow for line break and tab
while (isspace(c))
{
c = cget();
}
if (c == '>')
{
[self _processTag: tag
isEnd: (*tp == '/')
withAttributes: parameters];
break;
}
/* get next argument (eats up to /, ?, >, =, space)
*/
arg = [self _qarg];
#if EXTRA_DEBUG
NSLog(@"arg=%@", arg);
#endif
if (!this->acceptHTML && [arg length] == 0)
{
return [self _parseError: @"empty attribute name"];
}
c = cget(); // get delimiting character
if (c == '=')
{
// explicit assignment
c = cget(); // skip =
[parameters setObject: [self _qarg] forKey: arg];
c = cget(); // get character behind qarg value
}
else // implicit
{
[parameters setObject: @"" forKey: arg];
}
}
vp = this->cp; // prepare for next value
c = cget(); // skip > and fetch next character
} }
vp = this->cp; // prepare for next value
c = cget(); // skip > and fetch next character
} }
}
} }
return [self _parseError: @"this->aborted"]; // this->aborted return [self _parseError: @"this->aborted"]; // this->aborted
} }