From 6f89020483f79e2954b15fa64ecd51322222e214 Mon Sep 17 00:00:00 2001 From: rfm Date: Tue, 25 May 2010 08:25:19 +0000 Subject: [PATCH] Fix ignorable whitespace in sloppy parser git-svn-id: svn+ssh://svn.gna.org/svn/gnustep/libs/base/trunk@30452 72102866-910b-0410-8b05-ffd578937521 --- ChangeLog | 8 ++++ Source/NSPropertyList.m | 8 +--- Source/NSXMLParser.m | 101 ++++++++++++++++++++++++++++++++++++---- 3 files changed, 103 insertions(+), 14 deletions(-) diff --git a/ChangeLog b/ChangeLog index 2ebad8a89..04bbc9428 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,11 @@ +2010-05-25 Richard Frith-Macdonald + + * Source/NSPropertyList.m: ([-parser;foundChartacters:]) don't trim + the supplied string ... we need to retain whitespace around entities + in value test. + * Source/NSXMLParser.m: Add support for ignorable whitespace so that + it doesn't get handed to the delegate as normal characters. + 2010-05-22 Richard Frith-Macdonald * Headers/Foundation/NSObjCRuntime.h: diff --git a/Source/NSPropertyList.m b/Source/NSPropertyList.m index 399ce8eea..9270a4699 100644 --- a/Source/NSPropertyList.m +++ b/Source/NSPropertyList.m @@ -139,13 +139,9 @@ extern BOOL GSScanDouble(unichar*, unsigned, double*); } - (void) parser: (NSXMLParser *)parser - foundCharacters: (NSString *)string +foundCharacters: (NSString *)string { - string = [string stringByTrimmingSpaces]; - if ([string length] > 0) - { - [value appendString: string]; - } + [value appendString: string]; } - (void) parser: (NSXMLParser *)parser diff --git a/Source/NSXMLParser.m b/Source/NSXMLParser.m index 6ab0f1778..242f1b526 100644 --- a/Source/NSXMLParser.m +++ b/Source/NSXMLParser.m @@ -622,6 +622,8 @@ typedef struct NSXMLParserIvarsType int line; // current line (counts from 0) int column; // current column (counts from 0) BOOL abort; // abort parse loop + BOOL ignorable; // whitespace is ignorable + BOOL whitespace; // had only whitespace in current data BOOL shouldProcessNamespaces; BOOL shouldReportNamespacePrefixes; BOOL shouldResolveExternalEntities; @@ -633,6 +635,7 @@ typedef struct NSXMLParserIvarsType IMP foundCDATA; IMP foundCharacters; IMP foundComment; + IMP foundIgnorable; } NSXMLParserIvars; @@ -643,6 +646,7 @@ static SEL didStartMappingPrefixSel; static SEL foundCDATASel; static SEL foundCharactersSel; static SEL foundCommentSel; +static SEL foundIgnorableSel; @implementation SloppyXMLParser @@ -675,6 +679,8 @@ static SEL foundCommentSel; = @selector(parser:foundCharacters:); foundCommentSel = @selector(parser:foundComment:); + foundIgnorableSel + = @selector(parser:foundIgnorableWhitespace:); } } @@ -838,6 +844,16 @@ static SEL foundCommentSel; { this->foundComment = 0; } + + if ([_del respondsToSelector: foundIgnorableSel]) + { + this->foundIgnorable + = [_del methodForSelector: foundIgnorableSel]; + } + else + { + this->foundIgnorable = 0; + } } } @@ -1247,6 +1263,10 @@ NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes); return [self _parseError: @"missing preamble" code: NSXMLParserDocumentStartError]; } + /* Start by accumulating ignorable whitespace. + */ + this->ignorable = YES; + this->whitespace = YES; c = cget(); // get first character while (!this->abort) { @@ -1264,22 +1284,51 @@ NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes); this->column = 0; break; + case '<': + /* Whitespace immediately before an element is always ignorable. + */ + this->ignorable = YES; /* Fall through to push out data */ case EOF: - case '<': case '&': { /* push out any characters that have been collected so far */ if (this->cp - vp > 1) { - /* check for whitespace only - might set/reset - * a flag to indicate so - */ - if (this->foundCharacters != 0) - { - NSString *s; + const unsigned char *p; + NSString *s; - s = NewUTF8STR(vp, this->cp - vp - 1); + p = this->cp - 1; + if (YES == this->ignorable) + { + if (YES == this->whitespace) + { + p = vp; // all whitespace + } + else + { + /* step through trailing whitespace (if any) + */ + while (p > vp && isspace(p[-1])) + { + p--; + } + } + if (p < this->cp - 1 && this->foundIgnorable != 0) + { + /* Process data as ignorable whitespace + */ + s = NewUTF8STR(p, this->cp - p - 1); + (*this->foundIgnorable)(_del, + foundIgnorableSel, self, s); + [s release]; + } + } + if (p - vp > 0 && this->foundCharacters != 0) + { + /* Process remaining data as characters + */ + s = NewUTF8STR(vp, p - vp); (*this->foundCharacters)(_del, foundCharactersSel, self, s); [s release]; @@ -1292,6 +1341,30 @@ NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes); switch(c) { default: + if (YES == this->whitespace && !isspace(c)) + { + if (YES == this->ignorable && this->cp - vp > 1) + { + /* We have accumulated ignorable whitespace ... + * push it out. + */ + if (this->foundIgnorable != 0) + { + NSString *s; + + s = NewUTF8STR(vp, this->cp - vp - 1); + (*this->foundIgnorable)(_del, + foundIgnorableSel, self, s); + [s release]; + } + vp = this->cp; + } + /* We have read non-space data, so whitespace is no longer + * ignorable, and the buffer no loinger contains only space. + */ + this->ignorable = NO; + this->whitespace = NO; + } c = cget(); // just collect until we push out (again) continue; @@ -1333,6 +1406,12 @@ NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes); { NSString *entity; + /* After any entity, whitespace is no longer ignorable, but + * we will have an empty buffer to accumulate it. + */ + this->ignorable = NO; + this->whitespace = YES; + if ([self _parseEntity: &entity] == NO) { return [self _parseError: @"empty entity name" @@ -1356,6 +1435,12 @@ NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes); NSString *arg; const unsigned char *tp = this->cp; // tag pointer + /* After processing a tag, whitespace will be ignorable and + * we can start accumulating it in our buffer. + */ + this->ignorable = YES; + this->whitespace = YES; + if (this->cp < this->cend-3 && strncmp((char *)this->cp, "!--", 3) == 0) {