mirror of
https://github.com/gnustep/libs-base.git
synced 2025-04-23 00:41:02 +00:00
Fix ignorable whitespace in sloppy parser
git-svn-id: svn+ssh://svn.gna.org/svn/gnustep/libs/base/trunk@30452 72102866-910b-0410-8b05-ffd578937521
This commit is contained in:
parent
20e7daac62
commit
fbfc255123
3 changed files with 103 additions and 14 deletions
|
@ -1,3 +1,11 @@
|
|||
2010-05-25 Richard Frith-Macdonald <rfm@gnu.org>
|
||||
|
||||
* Source/NSPropertyList.m: ([-parser;foundChartacters:]) don't trim
|
||||
the supplied string ... we need to retain whitespace around entities
|
||||
in value test.
|
||||
* Source/NSXMLParser.m: Add support for ignorable whitespace so that
|
||||
it doesn't get handed to the delegate as normal characters.
|
||||
|
||||
2010-05-22 Richard Frith-Macdonald <rfm@gnu.org>
|
||||
|
||||
* Headers/Foundation/NSObjCRuntime.h:
|
||||
|
|
|
@ -139,13 +139,9 @@ extern BOOL GSScanDouble(unichar*, unsigned, double*);
|
|||
}
|
||||
|
||||
- (void) parser: (NSXMLParser *)parser
|
||||
foundCharacters: (NSString *)string
|
||||
foundCharacters: (NSString *)string
|
||||
{
|
||||
string = [string stringByTrimmingSpaces];
|
||||
if ([string length] > 0)
|
||||
{
|
||||
[value appendString: string];
|
||||
}
|
||||
[value appendString: string];
|
||||
}
|
||||
|
||||
- (void) parser: (NSXMLParser *)parser
|
||||
|
|
|
@ -622,6 +622,8 @@ typedef struct NSXMLParserIvarsType
|
|||
int line; // current line (counts from 0)
|
||||
int column; // current column (counts from 0)
|
||||
BOOL abort; // abort parse loop
|
||||
BOOL ignorable; // whitespace is ignorable
|
||||
BOOL whitespace; // had only whitespace in current data
|
||||
BOOL shouldProcessNamespaces;
|
||||
BOOL shouldReportNamespacePrefixes;
|
||||
BOOL shouldResolveExternalEntities;
|
||||
|
@ -633,6 +635,7 @@ typedef struct NSXMLParserIvarsType
|
|||
IMP foundCDATA;
|
||||
IMP foundCharacters;
|
||||
IMP foundComment;
|
||||
IMP foundIgnorable;
|
||||
|
||||
} NSXMLParserIvars;
|
||||
|
||||
|
@ -643,6 +646,7 @@ static SEL didStartMappingPrefixSel;
|
|||
static SEL foundCDATASel;
|
||||
static SEL foundCharactersSel;
|
||||
static SEL foundCommentSel;
|
||||
static SEL foundIgnorableSel;
|
||||
|
||||
@implementation SloppyXMLParser
|
||||
|
||||
|
@ -675,6 +679,8 @@ static SEL foundCommentSel;
|
|||
= @selector(parser:foundCharacters:);
|
||||
foundCommentSel
|
||||
= @selector(parser:foundComment:);
|
||||
foundIgnorableSel
|
||||
= @selector(parser:foundIgnorableWhitespace:);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -838,6 +844,16 @@ static SEL foundCommentSel;
|
|||
{
|
||||
this->foundComment = 0;
|
||||
}
|
||||
|
||||
if ([_del respondsToSelector: foundIgnorableSel])
|
||||
{
|
||||
this->foundIgnorable
|
||||
= [_del methodForSelector: foundIgnorableSel];
|
||||
}
|
||||
else
|
||||
{
|
||||
this->foundIgnorable = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1247,6 +1263,10 @@ NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
|
|||
return [self _parseError: @"missing <?xml > preamble"
|
||||
code: NSXMLParserDocumentStartError];
|
||||
}
|
||||
/* Start by accumulating ignorable whitespace.
|
||||
*/
|
||||
this->ignorable = YES;
|
||||
this->whitespace = YES;
|
||||
c = cget(); // get first character
|
||||
while (!this->abort)
|
||||
{
|
||||
|
@ -1264,22 +1284,51 @@ NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
|
|||
this->column = 0;
|
||||
break;
|
||||
|
||||
case '<':
|
||||
/* Whitespace immediately before an element is always ignorable.
|
||||
*/
|
||||
this->ignorable = YES; /* Fall through to push out data */
|
||||
case EOF:
|
||||
case '<':
|
||||
case '&':
|
||||
{
|
||||
/* push out any characters that have been collected so far
|
||||
*/
|
||||
if (this->cp - vp > 1)
|
||||
{
|
||||
/* check for whitespace only - might set/reset
|
||||
* a flag to indicate so
|
||||
*/
|
||||
if (this->foundCharacters != 0)
|
||||
{
|
||||
NSString *s;
|
||||
const unsigned char *p;
|
||||
NSString *s;
|
||||
|
||||
s = NewUTF8STR(vp, this->cp - vp - 1);
|
||||
p = this->cp - 1;
|
||||
if (YES == this->ignorable)
|
||||
{
|
||||
if (YES == this->whitespace)
|
||||
{
|
||||
p = vp; // all whitespace
|
||||
}
|
||||
else
|
||||
{
|
||||
/* step through trailing whitespace (if any)
|
||||
*/
|
||||
while (p > vp && isspace(p[-1]))
|
||||
{
|
||||
p--;
|
||||
}
|
||||
}
|
||||
if (p < this->cp - 1 && this->foundIgnorable != 0)
|
||||
{
|
||||
/* Process data as ignorable whitespace
|
||||
*/
|
||||
s = NewUTF8STR(p, this->cp - p - 1);
|
||||
(*this->foundIgnorable)(_del,
|
||||
foundIgnorableSel, self, s);
|
||||
[s release];
|
||||
}
|
||||
}
|
||||
if (p - vp > 0 && this->foundCharacters != 0)
|
||||
{
|
||||
/* Process remaining data as characters
|
||||
*/
|
||||
s = NewUTF8STR(vp, p - vp);
|
||||
(*this->foundCharacters)(_del,
|
||||
foundCharactersSel, self, s);
|
||||
[s release];
|
||||
|
@ -1292,6 +1341,30 @@ NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
|
|||
switch(c)
|
||||
{
|
||||
default:
|
||||
if (YES == this->whitespace && !isspace(c))
|
||||
{
|
||||
if (YES == this->ignorable && this->cp - vp > 1)
|
||||
{
|
||||
/* We have accumulated ignorable whitespace ...
|
||||
* push it out.
|
||||
*/
|
||||
if (this->foundIgnorable != 0)
|
||||
{
|
||||
NSString *s;
|
||||
|
||||
s = NewUTF8STR(vp, this->cp - vp - 1);
|
||||
(*this->foundIgnorable)(_del,
|
||||
foundIgnorableSel, self, s);
|
||||
[s release];
|
||||
}
|
||||
vp = this->cp;
|
||||
}
|
||||
/* We have read non-space data, so whitespace is no longer
|
||||
* ignorable, and the buffer no loinger contains only space.
|
||||
*/
|
||||
this->ignorable = NO;
|
||||
this->whitespace = NO;
|
||||
}
|
||||
c = cget(); // just collect until we push out (again)
|
||||
continue;
|
||||
|
||||
|
@ -1333,6 +1406,12 @@ NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
|
|||
{
|
||||
NSString *entity;
|
||||
|
||||
/* After any entity, whitespace is no longer ignorable, but
|
||||
* we will have an empty buffer to accumulate it.
|
||||
*/
|
||||
this->ignorable = NO;
|
||||
this->whitespace = YES;
|
||||
|
||||
if ([self _parseEntity: &entity] == NO)
|
||||
{
|
||||
return [self _parseError: @"empty entity name"
|
||||
|
@ -1356,6 +1435,12 @@ NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
|
|||
NSString *arg;
|
||||
const unsigned char *tp = this->cp; // tag pointer
|
||||
|
||||
/* After processing a tag, whitespace will be ignorable and
|
||||
* we can start accumulating it in our buffer.
|
||||
*/
|
||||
this->ignorable = YES;
|
||||
this->whitespace = YES;
|
||||
|
||||
if (this->cp < this->cend-3
|
||||
&& strncmp((char *)this->cp, "!--", 3) == 0)
|
||||
{
|
||||
|
|
Loading…
Reference in a new issue