mirror of
https://github.com/gnustep/libs-base.git
synced 2025-05-29 16:01:38 +00:00
Some xml namespace handling fixes
git-svn-id: svn+ssh://svn.gna.org/svn/gnustep/libs/base/trunk@26002 72102866-910b-0410-8b05-ffd578937521
This commit is contained in:
parent
a6bfd106e1
commit
6425664347
4 changed files with 270 additions and 160 deletions
|
@ -1,3 +1,11 @@
|
|||
2008-01-26 Richard Frith-Macdonald <rfm@gnu.org>
|
||||
|
||||
* Source/Additions/NSXML.m: Fixup SAX interface to pass namespace
|
||||
information separately (retain old method for binary backward
|
||||
compatibility).
|
||||
* Source/NSXMLParser.m: Implement handing of namespaces with libxml2
|
||||
based parser.
|
||||
|
||||
2008-01-25 Richard Frith-Macdonald <rfm@gnu.org>
|
||||
|
||||
* Source/Additions/NSXML.m: In SAX count namespace declarations
|
||||
|
|
|
@ -331,7 +331,8 @@ extern "C" {
|
|||
- (void) startElement: (NSString*)elementName
|
||||
prefix: (NSString*)prefix
|
||||
href: (NSString*)href
|
||||
attributes: (NSMutableDictionary*)elementAttributes;
|
||||
attributes: (NSMutableDictionary*)elementAttributes
|
||||
namespaces: (NSMutableDictionary*)elementNamespaces;
|
||||
/** <override-dummy /> */
|
||||
- (void) unparsedEntityDecl: (NSString*)name
|
||||
public: (NSString*)publicId
|
||||
|
|
|
@ -2794,23 +2794,24 @@ startElementNsFunction(void *ctx, const unsigned char *name,
|
|||
int nb_attributes, int nb_defaulted,
|
||||
const unsigned char **atts)
|
||||
{
|
||||
NSMutableDictionary *dict;
|
||||
NSMutableDictionary *adict = nil;
|
||||
NSMutableDictionary *ndict = nil;
|
||||
NSString *elem;
|
||||
|
||||
NSCAssert(ctx,@"No Context");
|
||||
elem = UTF8Str(name);
|
||||
dict = [NSMutableDictionary dictionary];
|
||||
if (atts != NULL)
|
||||
{
|
||||
int i;
|
||||
int j;
|
||||
|
||||
adict = [NSMutableDictionary dictionaryWithCapacity: nb_attributes];
|
||||
for (i = j = 0; i < nb_attributes; i++, j += 5)
|
||||
{
|
||||
NSString *key = UTF8Str(atts[j]);
|
||||
NSString *obj = UTF8StrLen(atts[j+3], atts[j+4]-atts[j+3]);
|
||||
|
||||
[dict setObject: obj forKey: key];
|
||||
[adict setObject: obj forKey: key];
|
||||
}
|
||||
}
|
||||
if (nb_namespaces > 0)
|
||||
|
@ -2818,6 +2819,7 @@ startElementNsFunction(void *ctx, const unsigned char *name,
|
|||
int i;
|
||||
int pos = 0;
|
||||
|
||||
ndict = [NSMutableDictionary dictionaryWithCapacity: nb_namespaces];
|
||||
for (i = 0; i < nb_namespaces; i++)
|
||||
{
|
||||
NSString *key;
|
||||
|
@ -2842,13 +2844,14 @@ startElementNsFunction(void *ctx, const unsigned char *name,
|
|||
obj = UTF8Str(namespaces[pos]);
|
||||
}
|
||||
pos++;
|
||||
[dict setObject: obj forKey: key];
|
||||
[ndict setObject: obj forKey: key];
|
||||
}
|
||||
}
|
||||
[HANDLER startElement: elem
|
||||
prefix: UTF8Str(prefix)
|
||||
href: UTF8Str(href)
|
||||
attributes: dict];
|
||||
attributes: adict
|
||||
namespaces: ndict];
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -3055,6 +3058,18 @@ fatalErrorFunction(void *ctx, const unsigned char *msg, ...)
|
|||
[self startElement: elementName attributes: elementAttributes];
|
||||
}
|
||||
|
||||
- (void) startElement: (NSString*)elementName
|
||||
prefix: (NSString*)prefix
|
||||
href: (NSString*)href
|
||||
attributes: (NSMutableDictionary*)elementAttributes
|
||||
namespaces: (NSMutableDictionary*)elementNamespaces
|
||||
{
|
||||
[self startElement: elementName
|
||||
prefix: prefix
|
||||
href: href
|
||||
attributes: elementAttributes];
|
||||
}
|
||||
|
||||
/**
|
||||
* Called when a closing tag has been processed.
|
||||
*/
|
||||
|
|
|
@ -75,39 +75,51 @@ NSString* const NSXMLParserErrorDomain = @"NSXMLParserErrorDomain";
|
|||
href: (NSString*)href
|
||||
attributes: (NSMutableDictionary*)elementAttributes
|
||||
{
|
||||
NSString *qName = elementName;
|
||||
|
||||
if ([prefix length] > 0)
|
||||
{
|
||||
qName = [NSString stringWithFormat: @"%@:%@", prefix, qName];
|
||||
}
|
||||
if (_shouldProcessNamespaces)
|
||||
{
|
||||
[_delegate parser: _owner
|
||||
didStartElement: elementName
|
||||
namespaceURI: href
|
||||
qualifiedName: prefix
|
||||
qualifiedName: qName
|
||||
attributes: elementAttributes];
|
||||
}
|
||||
else
|
||||
{
|
||||
[_delegate parser: _owner
|
||||
didStartElement: elementName
|
||||
didStartElement: qName
|
||||
namespaceURI: nil
|
||||
qualifiedName: nil
|
||||
attributes: elementAttributes];
|
||||
}
|
||||
}
|
||||
|
||||
- (void) endElement: (NSString*) elementName
|
||||
- (void) endElement: (NSString*)elementName
|
||||
prefix: (NSString*)prefix
|
||||
href: (NSString*)href
|
||||
{
|
||||
NSString *qName = elementName;
|
||||
|
||||
if ([prefix length] > 0)
|
||||
{
|
||||
qName = [NSString stringWithFormat: @"%@:%@", prefix, qName];
|
||||
}
|
||||
if (_shouldProcessNamespaces)
|
||||
{
|
||||
[_delegate parser: _owner
|
||||
didEndElement: elementName
|
||||
namespaceURI: href
|
||||
qualifiedName: prefix];
|
||||
qualifiedName: qName];
|
||||
}
|
||||
else
|
||||
{
|
||||
[_delegate parser: _owner
|
||||
didEndElement: elementName
|
||||
didEndElement: qName
|
||||
namespaceURI: nil
|
||||
qualifiedName: nil];
|
||||
}
|
||||
|
@ -613,7 +625,9 @@ typedef struct { @defs(NSXMLParser) } *xp;
|
|||
withAttributes: (NSDictionary *)attributes
|
||||
{
|
||||
if (this->acceptHTML)
|
||||
tag = [tag lowercaseString]; // not case sensitive
|
||||
{
|
||||
tag = [tag lowercaseString]; // not case sensitive
|
||||
}
|
||||
if (!flag)
|
||||
{
|
||||
if ([tag isEqualToString: @"?xml"])
|
||||
|
@ -622,7 +636,9 @@ typedef struct { @defs(NSXMLParser) } *xp;
|
|||
NSLog(@"parserDidStartDocument: ");
|
||||
#endif
|
||||
if ([_del respondsToSelector: @selector(parserDidStartDocument:)])
|
||||
[_del parserDidStartDocument: self];
|
||||
{
|
||||
[_del parserDidStartDocument: self];
|
||||
}
|
||||
return;
|
||||
}
|
||||
if ([tag hasPrefix: @"?"])
|
||||
|
@ -649,13 +665,14 @@ NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
|
|||
}
|
||||
if ([tag isEqualToString: @"!CDATA"])
|
||||
{
|
||||
// pass through as NSData
|
||||
// parser: foundCDATA:
|
||||
// pass through as NSData
|
||||
// parser: foundCDATA:
|
||||
#if EXTRA_DEBUG
|
||||
NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
|
||||
#endif
|
||||
return;
|
||||
return;
|
||||
}
|
||||
|
||||
[this->tagPath addObject: tag]; // push on stack
|
||||
if ([_del respondsToSelector:
|
||||
@selector(parser:didStartElement:namespaceURI:qualifiedName:attributes:)])
|
||||
|
@ -667,7 +684,7 @@ NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
|
|||
}
|
||||
else
|
||||
{
|
||||
// closing tag
|
||||
// closing tag
|
||||
if (this->acceptHTML)
|
||||
{
|
||||
// lazily close any missing tags on stack
|
||||
|
@ -683,7 +700,9 @@ NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
|
|||
[this->tagPath removeLastObject]; // pop from stack
|
||||
}
|
||||
if ([this->tagPath count] == 0)
|
||||
return; // ignore closing tag without matching open...
|
||||
{
|
||||
return; // ignore closing tag without matching open...
|
||||
}
|
||||
}
|
||||
else if (![[this->tagPath lastObject] isEqualToString: tag])
|
||||
{
|
||||
|
@ -804,163 +823,230 @@ NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
|
|||
c = cget(); // get first character
|
||||
while (!this->abort)
|
||||
{
|
||||
// parse next element
|
||||
#if EXTRA_DEBUG
|
||||
NSLog(@"_nextelement %02x %c", c, isprint(c)?c: ' ');
|
||||
#endif
|
||||
switch(c)
|
||||
{
|
||||
case '\r':
|
||||
this->column = 0;
|
||||
break;
|
||||
case '\n':
|
||||
this->line++;
|
||||
this->column = 0;
|
||||
case EOF:
|
||||
case '<':
|
||||
case '&':
|
||||
switch(c)
|
||||
{
|
||||
// push out any characters that have been collected so far
|
||||
if (this->cp - vp > 1)
|
||||
{
|
||||
// check for whitespace only - might set/reset a flag to indicate so
|
||||
if ([_del respondsToSelector: @selector(parser: foundCharacters: )])
|
||||
[_del parser: self foundCharacters: UTF8STR(vp, this->cp - vp - 1)];
|
||||
vp = this->cp;
|
||||
}
|
||||
}
|
||||
}
|
||||
switch(c)
|
||||
{
|
||||
default:
|
||||
c = cget(); // just collect until we push out (again)
|
||||
continue;
|
||||
case EOF: // end of file
|
||||
{
|
||||
if ([this->tagPath count] != 0)
|
||||
case '\r':
|
||||
this->column = 0;
|
||||
break;
|
||||
|
||||
case '\n':
|
||||
this->line++;
|
||||
this->column = 0;
|
||||
|
||||
case EOF:
|
||||
case '<':
|
||||
case '&':
|
||||
{
|
||||
if (!this->acceptHTML)
|
||||
return [self _parseError: @"unexpected end of file"]; // strict XML nesting error
|
||||
while ([this->tagPath count] > 0)
|
||||
{
|
||||
// lazily close all open tags
|
||||
if ([_del respondsToSelector: @selector(parser: didEndElement: namespaceURI: qualifiedName: )])
|
||||
[_del parser: self didEndElement: [this->tagPath lastObject] namespaceURI: nil qualifiedName: nil];
|
||||
[this->tagPath removeLastObject]; // pop from stack
|
||||
}
|
||||
/* push out any characters that have been collected so far
|
||||
*/
|
||||
if (this->cp - vp > 1)
|
||||
{
|
||||
/* check for whitespace only - might set/reset
|
||||
* a flag to indicate so
|
||||
*/
|
||||
if ([_del respondsToSelector:
|
||||
@selector(parser:foundCharacters:)])
|
||||
{
|
||||
[_del parser: self foundCharacters:
|
||||
UTF8STR(vp, this->cp - vp - 1)];
|
||||
}
|
||||
vp = this->cp;
|
||||
}
|
||||
}
|
||||
#if EXTRA_DEBUG
|
||||
NSLog(@"parserDidEndDocument: ");
|
||||
#endif
|
||||
|
||||
if ([_del respondsToSelector: @selector(parserDidEndDocument: )])
|
||||
[_del parserDidEndDocument: self];
|
||||
return YES;
|
||||
}
|
||||
case '&':
|
||||
|
||||
switch(c)
|
||||
{
|
||||
// escape entity begins
|
||||
NSString *entity=[self _entity];
|
||||
if (!entity)
|
||||
return [self _parseError: @"empty entity name"];
|
||||
if ([_del respondsToSelector: @selector(parser: foundCharacters: )])
|
||||
[_del parser: self foundCharacters: entity];
|
||||
vp = this->cp; // next value sequence starts here
|
||||
c = cget(); // first character behind ;
|
||||
continue;
|
||||
}
|
||||
case '<':
|
||||
{
|
||||
// tag begins
|
||||
NSString *tag;
|
||||
NSMutableDictionary *parameters;
|
||||
NSString *arg;
|
||||
const unsigned char *tp = this->cp; // tag pointer
|
||||
if (this->cp < this->cend-3 && strncmp((char *)this->cp, "!--", 3) == 0)
|
||||
{
|
||||
// start of comment skip all characters until "-->"
|
||||
this->cp+=3;
|
||||
while (this->cp < this->cend-3 && strncmp((char *)this->cp, "-->", 3) != 0)
|
||||
this->cp++; // search
|
||||
// if _del responds to parser: foundComment:
|
||||
// convert to string (tp+4 ... cp)
|
||||
this->cp+=3; // might go beyond cend but does not care
|
||||
vp = this->cp; // value might continue
|
||||
c = cget(); // get first character behind comment
|
||||
default:
|
||||
c = cget(); // just collect until we push out (again)
|
||||
continue;
|
||||
}
|
||||
c = cget(); // get first character of tag
|
||||
if (c == '/')
|
||||
c = cget(); // closing tag </tag begins
|
||||
else if (c == '?')
|
||||
|
||||
case EOF:
|
||||
{
|
||||
// special tag <?tag begins
|
||||
c = cget(); // include in tag string
|
||||
// NSLog(@"special tag <? found");
|
||||
// FIXME: this->should process this tag in a special way so that e.g. <?php any PHP script ?> is read as a single tag!
|
||||
// to do this properly, we need a notion of comments and quoted string constants...
|
||||
}
|
||||
while (!isspace(c) && c != '>' && (c != '/') && (c != '?'))
|
||||
c = cget(); // scan tag until we find a delimiting character
|
||||
if (*tp == '/')
|
||||
tag = UTF8STR(tp + 1, this->cp - tp - 2); // don't include / and delimiting character
|
||||
else
|
||||
tag = UTF8STR(tp, this->cp - tp - 1); // don't include delimiting character
|
||||
if ([this->tagPath count] != 0)
|
||||
{
|
||||
if (!this->acceptHTML)
|
||||
{
|
||||
/* strict XML nesting error
|
||||
*/
|
||||
return [self _parseError: @"unexpected end of file"];
|
||||
}
|
||||
while ([this->tagPath count] > 0)
|
||||
{
|
||||
// lazily close all open tags
|
||||
if ([_del respondsToSelector:
|
||||
@selector(parser:didEndElement:namespaceURI:qualifiedName:)])
|
||||
{
|
||||
[_del parser: self
|
||||
didEndElement: [this->tagPath lastObject]
|
||||
namespaceURI: nil qualifiedName: nil];
|
||||
}
|
||||
[this->tagPath removeLastObject]; // pop from stack
|
||||
}
|
||||
}
|
||||
#if EXTRA_DEBUG
|
||||
NSLog(@"tag=%@ - %02x %c", tag, c, isprint(c)?c: ' ');
|
||||
NSLog(@"parserDidEndDocument: ");
|
||||
#endif
|
||||
parameters = [NSMutableDictionary dictionaryWithCapacity: 5];
|
||||
while (c != EOF)
|
||||
|
||||
if ([_del respondsToSelector: @selector(parserDidEndDocument: )])
|
||||
{
|
||||
[_del parserDidEndDocument: self];
|
||||
}
|
||||
return YES;
|
||||
}
|
||||
|
||||
case '&':
|
||||
{
|
||||
// collect arguments
|
||||
if (c == '/' && *tp != '/')
|
||||
{
|
||||
// appears to be a />
|
||||
c = cget();
|
||||
if (c != '>')
|
||||
return [self _parseError: @"<tag/ is missing the >"];
|
||||
[self _processTag: tag isEnd: NO withAttributes: parameters]; // opening tag
|
||||
[self _processTag: tag isEnd: YES withAttributes: nil]; // closing tag
|
||||
break; // done
|
||||
}
|
||||
if (c == '?' && *tp == '?')
|
||||
{
|
||||
// appears to be a ?>
|
||||
c = cget();
|
||||
if (c != '>')
|
||||
return [self _parseError: @"<?tag ...? is missing the >"];
|
||||
// process
|
||||
[self _processTag: tag isEnd: NO withAttributes: parameters]; // single <?tag ...?>
|
||||
break; // done
|
||||
}
|
||||
while (isspace(c)) // this->should also allow for line break and tab
|
||||
c = cget();
|
||||
if (c == '>')
|
||||
{
|
||||
[self _processTag: tag isEnd: (*tp=='/') withAttributes: parameters]; // handle tag
|
||||
break;
|
||||
}
|
||||
arg=[self _qarg]; // get next argument (eats up to /, ?, >, =, space)
|
||||
#if EXTRA_DEBUG
|
||||
NSLog(@"arg=%@", arg);
|
||||
#endif
|
||||
if (!this->acceptHTML && [arg length] == 0)
|
||||
return [self _parseError: @"empty attribute name"];
|
||||
c = cget(); // get delimiting character
|
||||
if (c == '=')
|
||||
{
|
||||
// explicit assignment
|
||||
c = cget(); // skip =
|
||||
[parameters setObject: [self _qarg] forKey: arg];
|
||||
c = cget(); // get character behind qarg value
|
||||
}
|
||||
else // implicit
|
||||
[parameters setObject: @"" forKey: arg];
|
||||
NSString *entity = [self _entity];
|
||||
|
||||
if (!entity)
|
||||
{
|
||||
return [self _parseError: @"empty entity name"];
|
||||
}
|
||||
if ([_del respondsToSelector: @selector(parser:foundCharacters:)])
|
||||
{
|
||||
[_del parser: self foundCharacters: entity];
|
||||
}
|
||||
vp = this->cp; // next value sequence starts here
|
||||
c = cget(); // first character behind ;
|
||||
continue;
|
||||
}
|
||||
|
||||
case '<':
|
||||
{
|
||||
NSString *tag;
|
||||
NSMutableDictionary *parameters;
|
||||
NSString *arg;
|
||||
const unsigned char *tp = this->cp; // tag pointer
|
||||
|
||||
if (this->cp < this->cend-3
|
||||
&& strncmp((char *)this->cp, "!--", 3) == 0)
|
||||
{
|
||||
/* start of comment skip all characters until "-->"
|
||||
*/
|
||||
this->cp += 3;
|
||||
while (this->cp < this->cend-3
|
||||
&& strncmp((char *)this->cp, "-->", 3) != 0)
|
||||
{
|
||||
this->cp++; // search
|
||||
}
|
||||
/* if _del responds to parser: foundComment:
|
||||
* convert to string (tp+4 ... cp)
|
||||
*/
|
||||
this->cp+=3; // might go beyond cend but does not care
|
||||
vp = this->cp; // value might continue
|
||||
c = cget(); // get first character behind comment
|
||||
continue;
|
||||
}
|
||||
c = cget(); // get first character of tag
|
||||
if (c == '/')
|
||||
{
|
||||
c = cget(); // closing tag </tag begins
|
||||
}
|
||||
else if (c == '?')
|
||||
{
|
||||
/* special tag <?tag begins
|
||||
*/
|
||||
c = cget(); // include in tag string
|
||||
// NSLog(@"special tag <? found");
|
||||
/* FIXME: this->should process this tag in a special
|
||||
* way so that e.g. <?php any PHP script ?> is read
|
||||
* as a single tag!
|
||||
* to do this properly, we need a notion of comments
|
||||
* and quoted string constants...
|
||||
*/
|
||||
}
|
||||
while (c != EOF && !isspace(c)
|
||||
&& c != '>' && c != '/' && c != '?')
|
||||
{
|
||||
c = cget(); // scan tag until we find a delimiting character
|
||||
}
|
||||
if (*tp == '/')
|
||||
{
|
||||
tag = UTF8STR(tp + 1, this->cp - tp - 2);
|
||||
}
|
||||
else
|
||||
{
|
||||
tag = UTF8STR(tp, this->cp - tp - 1);
|
||||
}
|
||||
#if EXTRA_DEBUG
|
||||
NSLog(@"tag=%@ - %02x %c", tag, c, isprint(c)?c: ' ');
|
||||
#endif
|
||||
parameters = [NSMutableDictionary dictionaryWithCapacity: 5];
|
||||
while (c != EOF)
|
||||
{
|
||||
if (c == '/' && *tp != '/')
|
||||
{
|
||||
// appears to be a />
|
||||
c = cget();
|
||||
if (c != '>')
|
||||
{
|
||||
return [self _parseError: @"<tag/ is missing the >"];
|
||||
}
|
||||
[self _processTag: tag
|
||||
isEnd: NO
|
||||
withAttributes: parameters];
|
||||
[self _processTag: tag isEnd: YES withAttributes: nil];
|
||||
break;
|
||||
}
|
||||
|
||||
if (c == '?' && *tp == '?')
|
||||
{
|
||||
// appears to be a ?>
|
||||
c = cget();
|
||||
if (c != '>')
|
||||
{
|
||||
return [self _parseError:
|
||||
@"<?tag ...? is missing the >"];
|
||||
}
|
||||
// process
|
||||
[self _processTag: tag
|
||||
isEnd: NO
|
||||
withAttributes: parameters]; // single <?tag ...?>
|
||||
break; // done
|
||||
}
|
||||
// this should also allow for line break and tab
|
||||
while (isspace(c))
|
||||
{
|
||||
c = cget();
|
||||
}
|
||||
if (c == '>')
|
||||
{
|
||||
[self _processTag: tag
|
||||
isEnd: (*tp == '/')
|
||||
withAttributes: parameters];
|
||||
break;
|
||||
}
|
||||
/* get next argument (eats up to /, ?, >, =, space)
|
||||
*/
|
||||
arg = [self _qarg];
|
||||
#if EXTRA_DEBUG
|
||||
NSLog(@"arg=%@", arg);
|
||||
#endif
|
||||
if (!this->acceptHTML && [arg length] == 0)
|
||||
{
|
||||
return [self _parseError: @"empty attribute name"];
|
||||
}
|
||||
c = cget(); // get delimiting character
|
||||
if (c == '=')
|
||||
{
|
||||
// explicit assignment
|
||||
c = cget(); // skip =
|
||||
[parameters setObject: [self _qarg] forKey: arg];
|
||||
c = cget(); // get character behind qarg value
|
||||
}
|
||||
else // implicit
|
||||
{
|
||||
[parameters setObject: @"" forKey: arg];
|
||||
}
|
||||
}
|
||||
vp = this->cp; // prepare for next value
|
||||
c = cget(); // skip > and fetch next character
|
||||
}
|
||||
vp = this->cp; // prepare for next value
|
||||
c = cget(); // skip > and fetch next character
|
||||
}
|
||||
}
|
||||
}
|
||||
return [self _parseError: @"this->aborted"]; // this->aborted
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue