mirror of
https://github.com/gnustep/libs-base.git
synced 2025-05-31 00:30:53 +00:00
Fix ignorable whitespace in sloppy parser
git-svn-id: svn+ssh://svn.gna.org/svn/gnustep/libs/base/trunk@30452 72102866-910b-0410-8b05-ffd578937521
This commit is contained in:
parent
79083336bb
commit
6f89020483
3 changed files with 103 additions and 14 deletions
|
@ -1,3 +1,11 @@
|
||||||
|
2010-05-25 Richard Frith-Macdonald <rfm@gnu.org>
|
||||||
|
|
||||||
|
* Source/NSPropertyList.m: ([-parser;foundChartacters:]) don't trim
|
||||||
|
the supplied string ... we need to retain whitespace around entities
|
||||||
|
in value test.
|
||||||
|
* Source/NSXMLParser.m: Add support for ignorable whitespace so that
|
||||||
|
it doesn't get handed to the delegate as normal characters.
|
||||||
|
|
||||||
2010-05-22 Richard Frith-Macdonald <rfm@gnu.org>
|
2010-05-22 Richard Frith-Macdonald <rfm@gnu.org>
|
||||||
|
|
||||||
* Headers/Foundation/NSObjCRuntime.h:
|
* Headers/Foundation/NSObjCRuntime.h:
|
||||||
|
|
|
@ -139,13 +139,9 @@ extern BOOL GSScanDouble(unichar*, unsigned, double*);
|
||||||
}
|
}
|
||||||
|
|
||||||
- (void) parser: (NSXMLParser *)parser
|
- (void) parser: (NSXMLParser *)parser
|
||||||
foundCharacters: (NSString *)string
|
foundCharacters: (NSString *)string
|
||||||
{
|
{
|
||||||
string = [string stringByTrimmingSpaces];
|
[value appendString: string];
|
||||||
if ([string length] > 0)
|
|
||||||
{
|
|
||||||
[value appendString: string];
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
- (void) parser: (NSXMLParser *)parser
|
- (void) parser: (NSXMLParser *)parser
|
||||||
|
|
|
@ -622,6 +622,8 @@ typedef struct NSXMLParserIvarsType
|
||||||
int line; // current line (counts from 0)
|
int line; // current line (counts from 0)
|
||||||
int column; // current column (counts from 0)
|
int column; // current column (counts from 0)
|
||||||
BOOL abort; // abort parse loop
|
BOOL abort; // abort parse loop
|
||||||
|
BOOL ignorable; // whitespace is ignorable
|
||||||
|
BOOL whitespace; // had only whitespace in current data
|
||||||
BOOL shouldProcessNamespaces;
|
BOOL shouldProcessNamespaces;
|
||||||
BOOL shouldReportNamespacePrefixes;
|
BOOL shouldReportNamespacePrefixes;
|
||||||
BOOL shouldResolveExternalEntities;
|
BOOL shouldResolveExternalEntities;
|
||||||
|
@ -633,6 +635,7 @@ typedef struct NSXMLParserIvarsType
|
||||||
IMP foundCDATA;
|
IMP foundCDATA;
|
||||||
IMP foundCharacters;
|
IMP foundCharacters;
|
||||||
IMP foundComment;
|
IMP foundComment;
|
||||||
|
IMP foundIgnorable;
|
||||||
|
|
||||||
} NSXMLParserIvars;
|
} NSXMLParserIvars;
|
||||||
|
|
||||||
|
@ -643,6 +646,7 @@ static SEL didStartMappingPrefixSel;
|
||||||
static SEL foundCDATASel;
|
static SEL foundCDATASel;
|
||||||
static SEL foundCharactersSel;
|
static SEL foundCharactersSel;
|
||||||
static SEL foundCommentSel;
|
static SEL foundCommentSel;
|
||||||
|
static SEL foundIgnorableSel;
|
||||||
|
|
||||||
@implementation SloppyXMLParser
|
@implementation SloppyXMLParser
|
||||||
|
|
||||||
|
@ -675,6 +679,8 @@ static SEL foundCommentSel;
|
||||||
= @selector(parser:foundCharacters:);
|
= @selector(parser:foundCharacters:);
|
||||||
foundCommentSel
|
foundCommentSel
|
||||||
= @selector(parser:foundComment:);
|
= @selector(parser:foundComment:);
|
||||||
|
foundIgnorableSel
|
||||||
|
= @selector(parser:foundIgnorableWhitespace:);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -838,6 +844,16 @@ static SEL foundCommentSel;
|
||||||
{
|
{
|
||||||
this->foundComment = 0;
|
this->foundComment = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ([_del respondsToSelector: foundIgnorableSel])
|
||||||
|
{
|
||||||
|
this->foundIgnorable
|
||||||
|
= [_del methodForSelector: foundIgnorableSel];
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
this->foundIgnorable = 0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1247,6 +1263,10 @@ NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
|
||||||
return [self _parseError: @"missing <?xml > preamble"
|
return [self _parseError: @"missing <?xml > preamble"
|
||||||
code: NSXMLParserDocumentStartError];
|
code: NSXMLParserDocumentStartError];
|
||||||
}
|
}
|
||||||
|
/* Start by accumulating ignorable whitespace.
|
||||||
|
*/
|
||||||
|
this->ignorable = YES;
|
||||||
|
this->whitespace = YES;
|
||||||
c = cget(); // get first character
|
c = cget(); // get first character
|
||||||
while (!this->abort)
|
while (!this->abort)
|
||||||
{
|
{
|
||||||
|
@ -1264,22 +1284,51 @@ NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
|
||||||
this->column = 0;
|
this->column = 0;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case '<':
|
||||||
|
/* Whitespace immediately before an element is always ignorable.
|
||||||
|
*/
|
||||||
|
this->ignorable = YES; /* Fall through to push out data */
|
||||||
case EOF:
|
case EOF:
|
||||||
case '<':
|
|
||||||
case '&':
|
case '&':
|
||||||
{
|
{
|
||||||
/* push out any characters that have been collected so far
|
/* push out any characters that have been collected so far
|
||||||
*/
|
*/
|
||||||
if (this->cp - vp > 1)
|
if (this->cp - vp > 1)
|
||||||
{
|
{
|
||||||
/* check for whitespace only - might set/reset
|
const unsigned char *p;
|
||||||
* a flag to indicate so
|
NSString *s;
|
||||||
*/
|
|
||||||
if (this->foundCharacters != 0)
|
|
||||||
{
|
|
||||||
NSString *s;
|
|
||||||
|
|
||||||
s = NewUTF8STR(vp, this->cp - vp - 1);
|
p = this->cp - 1;
|
||||||
|
if (YES == this->ignorable)
|
||||||
|
{
|
||||||
|
if (YES == this->whitespace)
|
||||||
|
{
|
||||||
|
p = vp; // all whitespace
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* step through trailing whitespace (if any)
|
||||||
|
*/
|
||||||
|
while (p > vp && isspace(p[-1]))
|
||||||
|
{
|
||||||
|
p--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (p < this->cp - 1 && this->foundIgnorable != 0)
|
||||||
|
{
|
||||||
|
/* Process data as ignorable whitespace
|
||||||
|
*/
|
||||||
|
s = NewUTF8STR(p, this->cp - p - 1);
|
||||||
|
(*this->foundIgnorable)(_del,
|
||||||
|
foundIgnorableSel, self, s);
|
||||||
|
[s release];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (p - vp > 0 && this->foundCharacters != 0)
|
||||||
|
{
|
||||||
|
/* Process remaining data as characters
|
||||||
|
*/
|
||||||
|
s = NewUTF8STR(vp, p - vp);
|
||||||
(*this->foundCharacters)(_del,
|
(*this->foundCharacters)(_del,
|
||||||
foundCharactersSel, self, s);
|
foundCharactersSel, self, s);
|
||||||
[s release];
|
[s release];
|
||||||
|
@ -1292,6 +1341,30 @@ NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
|
||||||
switch(c)
|
switch(c)
|
||||||
{
|
{
|
||||||
default:
|
default:
|
||||||
|
if (YES == this->whitespace && !isspace(c))
|
||||||
|
{
|
||||||
|
if (YES == this->ignorable && this->cp - vp > 1)
|
||||||
|
{
|
||||||
|
/* We have accumulated ignorable whitespace ...
|
||||||
|
* push it out.
|
||||||
|
*/
|
||||||
|
if (this->foundIgnorable != 0)
|
||||||
|
{
|
||||||
|
NSString *s;
|
||||||
|
|
||||||
|
s = NewUTF8STR(vp, this->cp - vp - 1);
|
||||||
|
(*this->foundIgnorable)(_del,
|
||||||
|
foundIgnorableSel, self, s);
|
||||||
|
[s release];
|
||||||
|
}
|
||||||
|
vp = this->cp;
|
||||||
|
}
|
||||||
|
/* We have read non-space data, so whitespace is no longer
|
||||||
|
* ignorable, and the buffer no loinger contains only space.
|
||||||
|
*/
|
||||||
|
this->ignorable = NO;
|
||||||
|
this->whitespace = NO;
|
||||||
|
}
|
||||||
c = cget(); // just collect until we push out (again)
|
c = cget(); // just collect until we push out (again)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
@ -1333,6 +1406,12 @@ NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
|
||||||
{
|
{
|
||||||
NSString *entity;
|
NSString *entity;
|
||||||
|
|
||||||
|
/* After any entity, whitespace is no longer ignorable, but
|
||||||
|
* we will have an empty buffer to accumulate it.
|
||||||
|
*/
|
||||||
|
this->ignorable = NO;
|
||||||
|
this->whitespace = YES;
|
||||||
|
|
||||||
if ([self _parseEntity: &entity] == NO)
|
if ([self _parseEntity: &entity] == NO)
|
||||||
{
|
{
|
||||||
return [self _parseError: @"empty entity name"
|
return [self _parseError: @"empty entity name"
|
||||||
|
@ -1356,6 +1435,12 @@ NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
|
||||||
NSString *arg;
|
NSString *arg;
|
||||||
const unsigned char *tp = this->cp; // tag pointer
|
const unsigned char *tp = this->cp; // tag pointer
|
||||||
|
|
||||||
|
/* After processing a tag, whitespace will be ignorable and
|
||||||
|
* we can start accumulating it in our buffer.
|
||||||
|
*/
|
||||||
|
this->ignorable = YES;
|
||||||
|
this->whitespace = YES;
|
||||||
|
|
||||||
if (this->cp < this->cend-3
|
if (this->cp < this->cend-3
|
||||||
&& strncmp((char *)this->cp, "!--", 3) == 0)
|
&& strncmp((char *)this->cp, "!--", 3) == 0)
|
||||||
{
|
{
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue