mirror of
https://github.com/gnustep/libs-base.git
synced 2025-04-23 00:41:02 +00:00
Add rudimentary parser implementation for when libxml2 is not available.
git-svn-id: svn+ssh://svn.gna.org/svn/gnustep/libs/base/trunk@24240 72102866-910b-0410-8b05-ffd578937521
This commit is contained in:
parent
73f83c5c59
commit
06e90f42d7
2 changed files with 552 additions and 1 deletions
|
@ -1,3 +1,8 @@
|
|||
2006-12-26 Dr. H. Nikolaus Schaller, Richard Frith-Macdonald
|
||||
|
||||
* Source/NSXMLParser.m: Implement reduced functionality parser if
|
||||
LIBXML2 is not available.
|
||||
|
||||
2006-12-26 Richard Frith-Macdonald <rfm@gnu.org>
|
||||
|
||||
* Headers/Foundation/NSValueTransformer.h:
|
||||
|
|
|
@ -23,7 +23,8 @@
|
|||
|
||||
*/
|
||||
|
||||
#include <Additions/GNUstepBase/GSXML.h>
|
||||
#include "config.h"
|
||||
#include <Foundation/NSArray.h>
|
||||
#include <Foundation/NSError.h>
|
||||
#include <Foundation/NSException.h>
|
||||
#include <Foundation/NSXMLParser.h>
|
||||
|
@ -32,6 +33,10 @@
|
|||
|
||||
NSString* const NSXMLParserErrorDomain = @"NSXMLParserErrorDomain";
|
||||
|
||||
#ifdef HAVE_LIBXML
|
||||
|
||||
#include <Additions/GNUstepBase/GSXML.h>
|
||||
|
||||
@interface NSXMLSAXHandler : GSSAXHandler
|
||||
{
|
||||
@public
|
||||
|
@ -435,6 +440,547 @@ NSString* const NSXMLParserErrorDomain = @"NSXMLParserErrorDomain";
|
|||
|
||||
@end
|
||||
|
||||
#else
|
||||
|
||||
@implementation NSString (NSXMLParser)
|
||||
|
||||
- (NSString *) _stringByExpandingXMLEntities
|
||||
{
|
||||
NSMutableString *t=[NSMutableString stringWithString: self];
|
||||
[t replaceOccurrencesOfString: @"&" withString: @"&" options: 0 range: NSMakeRange(0, [t length])]; // must be first!
|
||||
[t replaceOccurrencesOfString: @"<" withString: @"<" options: 0 range: NSMakeRange(0, [t length])];
|
||||
[t replaceOccurrencesOfString: @">" withString: @">" options: 0 range: NSMakeRange(0, [t length])];
|
||||
[t replaceOccurrencesOfString: @"\"" withString: @""" options: 0 range: NSMakeRange(0, [t length])];
|
||||
[t replaceOccurrencesOfString: @"'" withString: @"'" options: 0 range: NSMakeRange(0, [t length])];
|
||||
return t;
|
||||
}
|
||||
|
||||
@end
|
||||
|
||||
static NSString *UTF8STR(const void *ptr, int len)
|
||||
{
|
||||
NSString *s;
|
||||
|
||||
s = [[NSString alloc] initWithBytes: ptr
|
||||
length: len
|
||||
encoding: NSUTF8StringEncoding];
|
||||
if (s == nil)
|
||||
NSLog(@"could not convert to UTF8 string! bytes=%08x len=%d", ptr, len);
|
||||
return AUTORELEASE(s);
|
||||
}
|
||||
|
||||
typedef struct NSXMLParserIvarsType
|
||||
{
|
||||
NSMutableArray *tagPath; // hierarchy of tags
|
||||
NSData *data;
|
||||
NSError *error;
|
||||
const unsigned char *cp; // character pointer
|
||||
const unsigned char *cend; // end of data
|
||||
int line; // current line (counts from 0)
|
||||
int column; // current column (counts from 0)
|
||||
BOOL abort; // abort parse loop
|
||||
BOOL shouldProcessNamespaces;
|
||||
BOOL shouldReportNamespacePrefixes;
|
||||
BOOL shouldResolveExternalEntities;
|
||||
BOOL acceptHTML; // be lazy with bad tag nesting
|
||||
} NSXMLParserIvars;
|
||||
|
||||
@implementation NSXMLParser
|
||||
|
||||
#define this ((NSXMLParserIvars*)_parser)
|
||||
#define _del ((id)_handler)
|
||||
|
||||
- (void) abortParsing
|
||||
{
|
||||
this->abort = YES;
|
||||
}
|
||||
|
||||
- (int) columnNumber
|
||||
{
|
||||
return this->column;
|
||||
}
|
||||
|
||||
- (void) dealloc;
|
||||
{
|
||||
if (this != 0)
|
||||
{
|
||||
RELEASE(this->data);
|
||||
RELEASE(this->error);
|
||||
RELEASE(this->tagPath);
|
||||
NSZoneFree([self zone], this);
|
||||
}
|
||||
[super dealloc];
|
||||
}
|
||||
|
||||
- (id) delegate
|
||||
{
|
||||
return _del;
|
||||
}
|
||||
|
||||
- (id) initWithContentsOfURL: (NSURL *)url
|
||||
{
|
||||
return [self initWithData: [NSData dataWithContentsOfURL: url]];
|
||||
}
|
||||
|
||||
- (id) initWithData: (NSData *)data
|
||||
{
|
||||
if (data == nil)
|
||||
{
|
||||
DESTROY(self);
|
||||
}
|
||||
else
|
||||
{
|
||||
self = [super init];
|
||||
if (self)
|
||||
{
|
||||
_parser = NSZoneMalloc([self zone], sizeof(NSXMLParserIvars));
|
||||
memset(_parser, '\0', sizeof(NSXMLParserIvars));
|
||||
this->data = [data copy];
|
||||
this->tagPath = [[NSMutableArray alloc] init];
|
||||
this->cp = [this->data bytes];
|
||||
this->cend = this->cp + [this->data length];
|
||||
}
|
||||
}
|
||||
return self;
|
||||
}
|
||||
|
||||
- (int) lineNumber
|
||||
{
|
||||
return this->line;
|
||||
}
|
||||
|
||||
- (void) setDelegate: (id)del
|
||||
{
|
||||
_handler = del;
|
||||
}
|
||||
|
||||
- (NSError *) parserError
|
||||
{
|
||||
return this->error;
|
||||
}
|
||||
|
||||
- (NSArray *) _tagPath
|
||||
{
|
||||
return this->tagPath;
|
||||
}
|
||||
|
||||
#define cget() ((this->cp < this->cend)?(this->column++, *this->cp++): -1)
|
||||
|
||||
- (BOOL) _parseError: (NSString *)message
|
||||
{
|
||||
#if 0
|
||||
NSLog(@"XML parseError: %@", message);
|
||||
#endif
|
||||
NSError *err = nil;
|
||||
|
||||
ASSIGN(this->error, err);
|
||||
this->abort = YES; // break look
|
||||
if ([_del respondsToSelector: @selector(parser:parseErrorOccurred:)])
|
||||
[_del parser: self parseErrorOccurred: this->error]; // pass error
|
||||
return NO;
|
||||
}
|
||||
|
||||
- (void) _processTag: (NSString *)tag
|
||||
isEnd: (BOOL)flag
|
||||
withAttributes: (NSDictionary *)attributes
|
||||
{
|
||||
#if 0
|
||||
NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
|
||||
#endif
|
||||
if (this->acceptHTML)
|
||||
tag = [tag lowercaseString]; // not case sensitive
|
||||
if (!flag)
|
||||
{
|
||||
if ([tag isEqualToString: @"?xml"])
|
||||
{
|
||||
// parse, i.e. check for UTF8 encoding and other attributes
|
||||
#if 0
|
||||
NSLog(@"parserDidStartDocument: ");
|
||||
#endif
|
||||
if ([_del respondsToSelector: @selector(parserDidStartDocument:)])
|
||||
[_del parserDidStartDocument: self];
|
||||
return;
|
||||
}
|
||||
if ([tag hasPrefix: @"?"])
|
||||
{
|
||||
#if 1
|
||||
NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
|
||||
#endif
|
||||
// parser: foundProcessingInstructionWithTarget: data:
|
||||
return;
|
||||
}
|
||||
if ([tag isEqualToString: @"!DOCTYPE"])
|
||||
{
|
||||
// parse and might load
|
||||
#if 1
|
||||
NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
if ([tag isEqualToString: @"!ENTITY"])
|
||||
{
|
||||
// parse
|
||||
#if 1
|
||||
NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
if ([tag isEqualToString: @"!CDATA"])
|
||||
{
|
||||
// pass through as NSData
|
||||
// parser: foundCDATA:
|
||||
#if 1
|
||||
NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
[this->tagPath addObject: tag]; // push on stack
|
||||
if ([_del respondsToSelector:@selector(parser:didStartElement:namespaceURI:qualifiedName:attributes:)])
|
||||
[_del parser: self didStartElement: tag namespaceURI: nil qualifiedName: nil attributes: attributes];
|
||||
}
|
||||
else
|
||||
{
|
||||
// closing tag
|
||||
if (this->acceptHTML)
|
||||
{
|
||||
// lazily close any missing tags on stack
|
||||
while([this->tagPath count] > 0 && ![[this->tagPath lastObject] isEqualToString: tag]) // must be literally equal!
|
||||
{
|
||||
if ([_del respondsToSelector: @selector(parser: didEndElement: namespaceURI: qualifiedName: )])
|
||||
[_del parser: self didEndElement: [this->tagPath lastObject] namespaceURI: nil qualifiedName: nil];
|
||||
[this->tagPath removeLastObject]; // pop from stack
|
||||
}
|
||||
if ([this->tagPath count] == 0)
|
||||
return; // ignore closing tag without matching open...
|
||||
}
|
||||
else if (![[this->tagPath lastObject] isEqualToString: tag]) // must be literally equal!
|
||||
{
|
||||
[self _parseError: [NSString stringWithFormat: @"tag nesting error (</%@> expected, </%@> found)", [this->tagPath lastObject], tag]];
|
||||
return;
|
||||
}
|
||||
if ([_del respondsToSelector: @selector(parser: didEndElement: namespaceURI: qualifiedName: )])
|
||||
[_del parser: self didEndElement: tag namespaceURI: nil qualifiedName: nil];
|
||||
[this->tagPath removeLastObject]; // pop from stack
|
||||
}
|
||||
}
|
||||
|
||||
- (NSString *) _entity;
|
||||
{
|
||||
// parse &xxx; sequence
|
||||
int c;
|
||||
const unsigned char *ep = this->cp; // should be position behind &
|
||||
int len;
|
||||
unsigned int val;
|
||||
NSString *entity;
|
||||
|
||||
do {
|
||||
c = cget();
|
||||
} while(c != EOF && c != '<' && c != ';');
|
||||
|
||||
if (c != ';')
|
||||
return nil; // invalid sequence - end of file or missing ; before next tag
|
||||
len = this->cp - ep - 1;
|
||||
if (*ep == '#')
|
||||
{
|
||||
// &#ddd; or &#xhh;
|
||||
// !!! ep+1 is not 0-terminated - but by ;!!
|
||||
if (sscanf((char *)ep+1, "x%x;", &val))
|
||||
return [NSString stringWithFormat: @"%C", val]; // &#xhh; hex value
|
||||
else if (sscanf((char *)ep+1, "%d;", &val))
|
||||
return [NSString stringWithFormat: @"%C", val]; // &ddd; decimal value
|
||||
}
|
||||
else
|
||||
{
|
||||
// the five predefined entities
|
||||
if (len == 3 && strncmp((char *)ep, "amp", len) == 0)
|
||||
return @"&";
|
||||
if (len == 2 && strncmp((char *)ep, "lt", len) == 0)
|
||||
return @"<";
|
||||
if (len == 2 && strncmp((char *)ep, "gt", len) == 0)
|
||||
return @">";
|
||||
if (len == 4 && strncmp((char *)ep, "quot", len) == 0)
|
||||
return @"\"";
|
||||
if (len == 4 && strncmp((char *)ep, "apos", len) == 0)
|
||||
return @"'";
|
||||
}
|
||||
entity = UTF8STR(ep, len);
|
||||
#if 1
|
||||
NSLog(@"NSXMLParser: unrecognized entity: &%@;", entity);
|
||||
#endif
|
||||
// entity=[entitiesTable objectForKey: entity]; // look up string in entity translation table
|
||||
if (!entity)
|
||||
entity=@"&??;"; // unknown entity
|
||||
return entity;
|
||||
}
|
||||
|
||||
- (NSString *) _qarg;
|
||||
{
|
||||
// get argument (might be quoted)
|
||||
const unsigned char *ap = --this->cp; // argument start pointer
|
||||
int c = cget(); // refetch first character
|
||||
|
||||
#if 0
|
||||
NSLog(@"_qarg: %02x %c", c, isprint(c)?c: ' ');
|
||||
#endif
|
||||
if (c == '\"')
|
||||
{
|
||||
// quoted argument
|
||||
do {
|
||||
c = cget();
|
||||
if (c == EOF)
|
||||
return nil; // unterminated!
|
||||
} while(c != '\"');
|
||||
return UTF8STR(ap + 1, this->cp - ap - 2);
|
||||
}
|
||||
if (c == '\'')
|
||||
{
|
||||
// apostrophed argument
|
||||
do {
|
||||
c = cget();
|
||||
if (c == EOF)
|
||||
return nil; // unterminated!
|
||||
} while(c != '\'');
|
||||
return UTF8STR(ap + 1, this->cp - ap - 2);
|
||||
}
|
||||
if (!this->acceptHTML)
|
||||
; // strict XML requires quoting (?)
|
||||
while(!isspace(c) && c != '>' && c != '/' && c != '?' && c != '=' &&c != EOF)
|
||||
c = cget();
|
||||
this->cp--; // go back to terminating character
|
||||
return UTF8STR(ap, this->cp - ap);
|
||||
}
|
||||
|
||||
- (BOOL) parse;
|
||||
{
|
||||
// read XML (or HTML) file
|
||||
const unsigned char *vp = this->cp; // value pointer
|
||||
int c;
|
||||
|
||||
if (!this->acceptHTML
|
||||
&& (this->cend - this->cp < 6
|
||||
|| strncmp((char *)this->cp, "<?xml ", 6) != 0))
|
||||
{
|
||||
// not a valid XML document start
|
||||
return [self _parseError: @"missing <?xml > preamble"];
|
||||
}
|
||||
c = cget(); // get first character
|
||||
while(!this->abort)
|
||||
{
|
||||
// parse next element
|
||||
#if 0
|
||||
NSLog(@"_nextelement %02x %c", c, isprint(c)?c: ' ');
|
||||
#endif
|
||||
switch(c)
|
||||
{
|
||||
case '\r':
|
||||
this->column = 0;
|
||||
break;
|
||||
case '\n':
|
||||
this->line++;
|
||||
this->column = 0;
|
||||
case EOF:
|
||||
case '<':
|
||||
case '&':
|
||||
{
|
||||
// push out any characters that have been collected so far
|
||||
if (this->cp - vp > 1)
|
||||
{
|
||||
// check for whitespace only - might set/reset a flag to indicate so
|
||||
if ([_del respondsToSelector: @selector(parser: foundCharacters: )])
|
||||
[_del parser: self foundCharacters: UTF8STR(vp, this->cp - vp - 1)];
|
||||
vp = this->cp;
|
||||
}
|
||||
}
|
||||
}
|
||||
switch(c)
|
||||
{
|
||||
default:
|
||||
c = cget(); // just collect until we push out (again)
|
||||
continue;
|
||||
case EOF: // end of file
|
||||
{
|
||||
if ([this->tagPath count] != 0)
|
||||
{
|
||||
if (!this->acceptHTML)
|
||||
return [self _parseError: @"unexpected end of file"]; // strict XML nesting error
|
||||
while([this->tagPath count] > 0)
|
||||
{
|
||||
// lazily close all open tags
|
||||
if ([_del respondsToSelector: @selector(parser: didEndElement: namespaceURI: qualifiedName: )])
|
||||
[_del parser: self didEndElement: [this->tagPath lastObject] namespaceURI: nil qualifiedName: nil];
|
||||
[this->tagPath removeLastObject]; // pop from stack
|
||||
}
|
||||
}
|
||||
#if 0
|
||||
NSLog(@"parserDidEndDocument: ");
|
||||
#endif
|
||||
|
||||
if ([_del respondsToSelector: @selector(parserDidEndDocument: )])
|
||||
[_del parserDidEndDocument: self];
|
||||
return YES;
|
||||
}
|
||||
case '&':
|
||||
{
|
||||
// escape entity begins
|
||||
NSString *entity=[self _entity];
|
||||
if (!entity)
|
||||
return [self _parseError: @"empty entity name"];
|
||||
if ([_del respondsToSelector: @selector(parser: foundCharacters: )])
|
||||
[_del parser: self foundCharacters: entity];
|
||||
vp = this->cp; // next value sequence starts here
|
||||
c = cget(); // first character behind ;
|
||||
continue;
|
||||
}
|
||||
case '<':
|
||||
{
|
||||
// tag begins
|
||||
NSString *tag;
|
||||
NSMutableDictionary *parameters;
|
||||
NSString *arg;
|
||||
const unsigned char *tp = this->cp; // tag pointer
|
||||
if (this->cp < this->cend-3 && strncmp((char *)this->cp, "!--", 3) == 0)
|
||||
{
|
||||
// start of comment skip all characters until "-->"
|
||||
this->cp+=3;
|
||||
while(this->cp < this->cend-3 && strncmp((char *)this->cp, "-->", 3) != 0)
|
||||
this->cp++; // search
|
||||
// if _del responds to parser: foundComment:
|
||||
// convert to string (tp+4 ... cp)
|
||||
this->cp+=3; // might go beyond cend but does not care
|
||||
vp = this->cp; // value might continue
|
||||
c = cget(); // get first character behind comment
|
||||
continue;
|
||||
}
|
||||
c = cget(); // get first character of tag
|
||||
if (c == '/')
|
||||
c = cget(); // closing tag </tag begins
|
||||
else if (c == '?')
|
||||
{
|
||||
// special tag <?tag begins
|
||||
c = cget(); // include in tag string
|
||||
// NSLog(@"special tag <? found");
|
||||
// FIXME: this->should process this tag in a special way so that e.g. <?php any PHP script ?> is read as a single tag!
|
||||
// to do this properly, we need a notion of comments and quoted string constants...
|
||||
}
|
||||
while(!isspace(c) && c != '>' && (c != '/') && (c != '?'))
|
||||
c = cget(); // scan tag until we find a delimiting character
|
||||
if (*tp == '/')
|
||||
tag = UTF8STR(tp + 1, this->cp - tp - 2); // don't include / and delimiting character
|
||||
else
|
||||
tag = UTF8STR(tp, this->cp - tp - 1); // don't include delimiting character
|
||||
#if 0
|
||||
NSLog(@"tag=%@ - %02x %c", tag, c, isprint(c)?c: ' ');
|
||||
#endif
|
||||
parameters=[NSMutableDictionary dictionaryWithCapacity: 5];
|
||||
while(c != EOF)
|
||||
{
|
||||
// collect arguments
|
||||
if (c == '/' && *tp != '/')
|
||||
{
|
||||
// appears to be a />
|
||||
c = cget();
|
||||
if (c != '>')
|
||||
return [self _parseError: @"<tag/ is missing the >"];
|
||||
[self _processTag: tag isEnd: NO withAttributes: parameters]; // opening tag
|
||||
[self _processTag: tag isEnd: YES withAttributes: nil]; // closing tag
|
||||
break; // done
|
||||
}
|
||||
if (c == '?' && *tp == '?')
|
||||
{
|
||||
// appears to be a ?>
|
||||
c = cget();
|
||||
if (c != '>')
|
||||
return [self _parseError: @"<?tag ...? is missing the >"];
|
||||
// process
|
||||
[self _processTag: tag isEnd: NO withAttributes: parameters]; // single <?tag ...?>
|
||||
break; // done
|
||||
}
|
||||
while(isspace(c)) // this->should also allow for line break and tab
|
||||
c = cget();
|
||||
if (c == '>')
|
||||
{
|
||||
[self _processTag: tag isEnd: (*tp=='/') withAttributes: parameters]; // handle tag
|
||||
break;
|
||||
}
|
||||
arg=[self _qarg]; // get next argument (eats up to /, ?, >, =, space)
|
||||
#if 0
|
||||
NSLog(@"arg=%@", arg);
|
||||
#endif
|
||||
if (!this->acceptHTML && [arg length] == 0)
|
||||
return [self _parseError: @"empty attribute name"];
|
||||
c = cget(); // get delimiting character
|
||||
if (c == '=')
|
||||
{
|
||||
// explicit assignment
|
||||
c = cget(); // skip =
|
||||
[parameters setObject: [self _qarg] forKey: arg];
|
||||
c = cget(); // get character behind qarg value
|
||||
}
|
||||
else // implicit
|
||||
[parameters setObject: @"" forKey: arg];
|
||||
}
|
||||
vp = this->cp; // prepare for next value
|
||||
c = cget(); // skip > and fetch next character
|
||||
}
|
||||
}
|
||||
}
|
||||
return [self _parseError: @"this->aborted"]; // this->aborted
|
||||
}
|
||||
|
||||
- (BOOL) acceptsHTML
|
||||
{
|
||||
return this->acceptHTML;
|
||||
}
|
||||
|
||||
- (BOOL) shouldProcessNamespaces
|
||||
{
|
||||
return this->shouldProcessNamespaces;
|
||||
}
|
||||
|
||||
- (BOOL) shouldReportNamespacePrefixes
|
||||
{
|
||||
return this->shouldReportNamespacePrefixes;
|
||||
}
|
||||
|
||||
- (BOOL) shouldResolveExternalEntities
|
||||
{
|
||||
return this->shouldResolveExternalEntities;
|
||||
}
|
||||
|
||||
- (void) setShouldProcessNamespaces: (BOOL)flag
|
||||
{
|
||||
this->shouldProcessNamespaces = flag;
|
||||
}
|
||||
|
||||
- (void) setShouldReportNamespacePrefixes: (BOOL)flag
|
||||
{
|
||||
this->shouldReportNamespacePrefixes = flag;
|
||||
}
|
||||
|
||||
- (void) setShouldResolveExternalEntities: (BOOL)flag
|
||||
{
|
||||
this->shouldProcessNamespaces = flag;
|
||||
}
|
||||
|
||||
- (void) _setAcceptHTML: (BOOL) flag
|
||||
{
|
||||
this->acceptHTML = flag;
|
||||
}
|
||||
|
||||
- (NSString *) publicID
|
||||
{
|
||||
return [self notImplemented: _cmd];
|
||||
}
|
||||
|
||||
- (NSString *) systemID
|
||||
{
|
||||
return [self notImplemented: _cmd];
|
||||
}
|
||||
|
||||
@end
|
||||
|
||||
#endif
|
||||
|
||||
@implementation NSObject (NSXMLParserDelegateEventAdditions)
|
||||
- (NSData*) parser: (NSXMLParser*)aParser
|
||||
resolveExternalEntityName: (NSString*)aName
|
||||
|
|
Loading…
Reference in a new issue