Fix ignorable whitespace in sloppy parser

git-svn-id: svn+ssh://svn.gna.org/svn/gnustep/libs/base/trunk@30452 72102866-910b-0410-8b05-ffd578937521
This commit is contained in:
Richard Frith-MacDonald 2010-05-25 08:25:19 +00:00
parent 20e7daac62
commit fbfc255123
3 changed files with 103 additions and 14 deletions

View file

@ -1,3 +1,11 @@
2010-05-25 Richard Frith-Macdonald <rfm@gnu.org>
* Source/NSPropertyList.m: ([-parser;foundChartacters:]) don't trim
the supplied string ... we need to retain whitespace around entities
in value test.
* Source/NSXMLParser.m: Add support for ignorable whitespace so that
it doesn't get handed to the delegate as normal characters.
2010-05-22 Richard Frith-Macdonald <rfm@gnu.org>
* Headers/Foundation/NSObjCRuntime.h:

View file

@ -139,13 +139,9 @@ extern BOOL GSScanDouble(unichar*, unsigned, double*);
}
- (void) parser: (NSXMLParser *)parser
foundCharacters: (NSString *)string
foundCharacters: (NSString *)string
{
string = [string stringByTrimmingSpaces];
if ([string length] > 0)
{
[value appendString: string];
}
[value appendString: string];
}
- (void) parser: (NSXMLParser *)parser

View file

@ -622,6 +622,8 @@ typedef struct NSXMLParserIvarsType
int line; // current line (counts from 0)
int column; // current column (counts from 0)
BOOL abort; // abort parse loop
BOOL ignorable; // whitespace is ignorable
BOOL whitespace; // had only whitespace in current data
BOOL shouldProcessNamespaces;
BOOL shouldReportNamespacePrefixes;
BOOL shouldResolveExternalEntities;
@ -633,6 +635,7 @@ typedef struct NSXMLParserIvarsType
IMP foundCDATA;
IMP foundCharacters;
IMP foundComment;
IMP foundIgnorable;
} NSXMLParserIvars;
@ -643,6 +646,7 @@ static SEL didStartMappingPrefixSel;
static SEL foundCDATASel;
static SEL foundCharactersSel;
static SEL foundCommentSel;
static SEL foundIgnorableSel;
@implementation SloppyXMLParser
@ -675,6 +679,8 @@ static SEL foundCommentSel;
= @selector(parser:foundCharacters:);
foundCommentSel
= @selector(parser:foundComment:);
foundIgnorableSel
= @selector(parser:foundIgnorableWhitespace:);
}
}
@ -838,6 +844,16 @@ static SEL foundCommentSel;
{
this->foundComment = 0;
}
if ([_del respondsToSelector: foundIgnorableSel])
{
this->foundIgnorable
= [_del methodForSelector: foundIgnorableSel];
}
else
{
this->foundIgnorable = 0;
}
}
}
@ -1247,6 +1263,10 @@ NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
return [self _parseError: @"missing <?xml > preamble"
code: NSXMLParserDocumentStartError];
}
/* Start by accumulating ignorable whitespace.
*/
this->ignorable = YES;
this->whitespace = YES;
c = cget(); // get first character
while (!this->abort)
{
@ -1264,22 +1284,51 @@ NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
this->column = 0;
break;
case '<':
/* Whitespace immediately before an element is always ignorable.
*/
this->ignorable = YES; /* Fall through to push out data */
case EOF:
case '<':
case '&':
{
/* push out any characters that have been collected so far
*/
if (this->cp - vp > 1)
{
/* check for whitespace only - might set/reset
* a flag to indicate so
*/
if (this->foundCharacters != 0)
{
NSString *s;
const unsigned char *p;
NSString *s;
s = NewUTF8STR(vp, this->cp - vp - 1);
p = this->cp - 1;
if (YES == this->ignorable)
{
if (YES == this->whitespace)
{
p = vp; // all whitespace
}
else
{
/* step through trailing whitespace (if any)
*/
while (p > vp && isspace(p[-1]))
{
p--;
}
}
if (p < this->cp - 1 && this->foundIgnorable != 0)
{
/* Process data as ignorable whitespace
*/
s = NewUTF8STR(p, this->cp - p - 1);
(*this->foundIgnorable)(_del,
foundIgnorableSel, self, s);
[s release];
}
}
if (p - vp > 0 && this->foundCharacters != 0)
{
/* Process remaining data as characters
*/
s = NewUTF8STR(vp, p - vp);
(*this->foundCharacters)(_del,
foundCharactersSel, self, s);
[s release];
@ -1292,6 +1341,30 @@ NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
switch(c)
{
default:
if (YES == this->whitespace && !isspace(c))
{
if (YES == this->ignorable && this->cp - vp > 1)
{
/* We have accumulated ignorable whitespace ...
* push it out.
*/
if (this->foundIgnorable != 0)
{
NSString *s;
s = NewUTF8STR(vp, this->cp - vp - 1);
(*this->foundIgnorable)(_del,
foundIgnorableSel, self, s);
[s release];
}
vp = this->cp;
}
/* We have read non-space data, so whitespace is no longer
* ignorable, and the buffer no loinger contains only space.
*/
this->ignorable = NO;
this->whitespace = NO;
}
c = cget(); // just collect until we push out (again)
continue;
@ -1333,6 +1406,12 @@ NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
{
NSString *entity;
/* After any entity, whitespace is no longer ignorable, but
* we will have an empty buffer to accumulate it.
*/
this->ignorable = NO;
this->whitespace = YES;
if ([self _parseEntity: &entity] == NO)
{
return [self _parseError: @"empty entity name"
@ -1356,6 +1435,12 @@ NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
NSString *arg;
const unsigned char *tp = this->cp; // tag pointer
/* After processing a tag, whitespace will be ignorable and
* we can start accumulating it in our buffer.
*/
this->ignorable = YES;
this->whitespace = YES;
if (this->cp < this->cend-3
&& strncmp((char *)this->cp, "!--", 3) == 0)
{