fix entity parsing in quoted strings

git-svn-id: svn+ssh://svn.gna.org/svn/gnustep/libs/base/trunk@36458 72102866-910b-0410-8b05-ffd578937521
This commit is contained in:
Richard Frith-MacDonald 2013-04-03 15:28:28 +00:00
parent a2d444617c
commit dbf2cd01a0
5 changed files with 191 additions and 101 deletions

View file

@ -1,3 +1,9 @@
2013-04-03 Richard Frith-Macdonald <rfm@gnu.org>
* Source/NSSocketPort.m: Improve/simplify invalidation.
* Source/NSXMLParser.m:
Fix (implement) parsing of entities inside quoted strings.
2013-03-28 Richard Frith-Macdonald <rfm@gnu.org>
Make release 1.24.4

View file

@ -1218,37 +1218,23 @@ static Class runLoopClass;
{
#if defined(__MINGW__)
WSANETWORKEVENTS ocurredEvents;
#else
#endif
/*
* If we have been invalidated (desc < 0) then we should ignore this
/* If we have been invalidated then we should ignore this
* event and remove ourself from the runloop.
*/
if (desc == INVALID_SOCKET)
if (NO == valid || desc == INVALID_SOCKET)
{
NSRunLoop *l = [runLoopClass currentRunLoop];
#if defined(__MINGW__)
[l removeEvent: data
type: ET_HANDLE
forMode: mode
all: YES];
#else
[l removeEvent: data
type: ET_WDESC
forMode: mode
all: YES];
[l removeEvent: data
type: ET_EDESC
forMode: mode
all: YES];
#endif
return;
}
M_LOCK(myLock);
#if defined(__MINGW__)
if (WSAEnumNetworkEvents(desc, event, &ocurredEvents)==SOCKET_ERROR)
{
NSLog(@"Error getting event type %d", WSAGetLastError());
@ -1315,7 +1301,31 @@ static Class runLoopClass;
NSLog(@"Event not get %d", ocurredEvents.lNetworkEvents);
abort();
}
M_UNLOCK(myLock);
#else
/* If we have been invalidated then we should ignore this
* event and remove ourself from the runloop.
*/
if (NO == valid || desc < 0)
{
NSRunLoop *l = [runLoopClass currentRunLoop];
[l removeEvent: data
type: ET_WDESC
forMode: mode
all: YES];
[l removeEvent: data
type: ET_EDESC
forMode: mode
all: YES];
return;
}
M_LOCK(myLock);
if (type != ET_WDESC)
{
[self receivedEventRead];
@ -1324,9 +1334,9 @@ static Class runLoopClass;
{
[self receivedEventWrite];
}
#endif
M_UNLOCK(myLock);
#endif
}
- (BOOL) sendMessage: (NSArray*)components beforeDate: (NSDate*)when

View file

@ -1362,14 +1362,78 @@ NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
}
}
- (NSString*) _newEntity: (const unsigned char *)ep length: (int)len
{
NSString *entity;
if (*ep == '#')
{
if (len < 8)
{
unsigned int val;
char buf[8];
memcpy(buf, ep + 1, len - 1);
buf[len - 1] = '\0';
// &#ddd; or &#xhh;
if (sscanf(buf, "x%x;", &val))
{
// &#xhh; hex value
return [[NSString alloc] initWithFormat: @"%C", val];
}
else if (sscanf(buf, "%d;", &val))
{
// &ddd; decimal value
return [[NSString alloc] initWithFormat: @"%C", val];
}
}
}
else
{
// the five predefined entities
if (len == 3 && strncmp((char *)ep, "amp", len) == 0)
{
return @"&";
}
else if (len == 2 && strncmp((char *)ep, "lt", len) == 0)
{
return @"<";
}
else if (len == 2 && strncmp((char *)ep, "gt", len) == 0)
{
return @">";
}
else if (len == 4 && strncmp((char *)ep, "quot", len) == 0)
{
return @"\"";
}
else if (len == 4 && strncmp((char *)ep, "apos", len) == 0)
{
return @"'";
}
}
entity = NewUTF8STR(ep, len);
#if 1
NSLog(@"NSXMLParser: unrecognized entity: &%@;", entity);
#endif
// entity=[entitiesTable objectForKey: entity]; // look up string in entity translation table
if (nil == entity)
{
entity = @"&??;"; // unknown entity
}
return entity;
}
- (BOOL) _parseEntity: (NSString**)result
{
int c;
const unsigned char *ep = this->cp; // should be position behind &
int len;
unsigned int val;
NSString *entity;
if (0 == result) result = &entity;
do {
c = cget();
} while (c != EOF && c != '<' && c != ';');
@ -1381,78 +1445,10 @@ NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
}
len = this->cp - ep - 1;
if (*ep == '#')
*result = [self _newEntity: ep length: len];
if (entity == *result)
{
if (len < 8)
{
char buf[8];
memcpy(buf, ep + 1, len - 1);
buf[len - 1] = '\0';
// &#ddd; or &#xhh;
if (sscanf(buf, "x%x;", &val))
{
// &#xhh; hex value
if (result != 0)
{
*result = [[NSString alloc] initWithFormat: @"%C", val];
}
return YES;
}
else if (sscanf(buf, "%d;", &val))
{
// &ddd; decimal value
if (result != 0)
{
*result = [[NSString alloc] initWithFormat: @"%C", val];
}
return YES;
}
}
}
else
{
// the five predefined entities
if (len == 3 && strncmp((char *)ep, "amp", len) == 0)
{
if (result != 0) *result = @"&";
return YES;
}
else if (len == 2 && strncmp((char *)ep, "lt", len) == 0)
{
if (result != 0) *result = @"<";
return YES;
}
else if (len == 2 && strncmp((char *)ep, "gt", len) == 0)
{
if (result != 0) *result = @">";
return YES;
}
else if (len == 4 && strncmp((char *)ep, "quot", len) == 0)
{
if (result != 0) *result = @"\"";
return YES;
}
else if (len == 4 && strncmp((char *)ep, "apos", len) == 0)
{
if (result != 0) *result = @"'";
return YES;
}
}
entity = NewUTF8STR(ep, len);
#if 1
NSLog(@"NSXMLParser: unrecognized entity: &%@;", entity);
#endif
// entity=[entitiesTable objectForKey: entity]; // look up string in entity translation table
if (entity == nil)
{
entity = @"&??;"; // unknown entity
}
if (result != 0)
{
*result = entity;
[entity release]; // Won't be used
}
return YES;
}
@ -1462,6 +1458,8 @@ NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
// get argument (might be quoted)
const unsigned char *ap = --this->cp; // argument start pointer
int c = cget(); // refetch first character
int len;
BOOL containsEntity = NO;
#if EXTRA_DEBUG
NSLog(@"_newQarg: %02x %c", c, isprint(c)?c: ' ');
@ -1475,11 +1473,16 @@ NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
{
return nil; // unterminated!
}
if ('&' == c)
{
containsEntity = YES;
}
}
while (c != '\"');
return NewUTF8STR(ap + 1, this->cp - ap - 2);
len = this->cp - ap - 2;
ap++;
}
if (c == '\'')
else if (c == '\'')
{
do
{
@ -1488,18 +1491,74 @@ NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
{
return nil; // unterminated!
}
if ('&' == c)
{
containsEntity = YES;
}
}
while (c != '\'');
return NewUTF8STR(ap + 1, this->cp - ap - 2);
len = this->cp - ap - 2;
ap++;
}
/* strict XML requires quoting (?)
if (!this->acceptHTML)
;
*/
while (!isspace(c) && c != '>' && c != '/' && c != '?' && c != '=' &&c != EOF)
c = cget();
this->cp--; // go back to terminating character
return NewUTF8STR(ap, this->cp - ap);
else
{
/* strict XML requires quoting (?)
if (!this->acceptHTML)
;
*/
while (!isspace(c)
&& c != '>' && c != '/' && c != '?' && c != '=' && c != EOF)
{
if ('&' == c)
{
containsEntity = YES;
}
c = cget();
}
this->cp--; // go back to terminating character
len = this->cp - ap;
}
if (YES == containsEntity)
{
NSString *seg;
NSMutableString *m;
const unsigned char *start = ap;
const unsigned char *end = start + len;
const unsigned char *ptr = start;
m = [[NSMutableString alloc] initWithCapacity: len];
while (ptr < end)
{
while (ptr < end && *ptr != '&')
{
ptr++;
}
if (ptr > start)
{
seg = NewUTF8STR(start, ptr - start);
[m appendString: seg];
RELEASE(seg);
start = ptr;
}
else
{
while (ptr < end && *ptr != ';')
{
ptr++;
}
seg = [self _newEntity: start + 1 length: ptr - start - 1];
[m appendString: seg];
RELEASE(seg);
if (ptr < end)
{
ptr++; // Step past trailing semicolon
}
start = ptr;
}
}
return m;
}
return NewUTF8STR(ap, len);
}
- (BOOL) parse

View file

@ -0,0 +1,11 @@
parserDidStartDocument:
parser:didStartElement:namespaceURI:qualifiedName:attributes: node node {
attr = "-\"+";
}
parser:foundCharacters:
parser:foundCharacters: content
parser:foundCharacters:
parser:didEndElement:namespaceURI:qualifiedName: node node
parserDidEndDocument:

View file

@ -0,0 +1,4 @@
<?xml version='1.0'?>
<node attr = "-&quot;+">
content
</node>