Convert to using indexes into a buffer

This commit is contained in:
Richard Frith-Macdonald 2020-05-08 15:05:17 +01:00
parent 2a31a0d551
commit b26a2f49ea

View file

@ -110,8 +110,9 @@ NewUTF8STR(const void *ptr, int len)
NSMutableDictionary *defaults;
NSData *data;
NSError *error;
const unsigned char *cp; // character pointer
const unsigned char *cend; // end of data
const unsigned char *bytes;
NSUInteger cp; // character position
NSUInteger cend; // end of data
int line; // current line (counts from 0)
int column; // current column (counts from 0)
BOOL abort; // abort parse loop
@ -256,6 +257,8 @@ static Class strict = Nil;
return [self initWithData: [NSData dataWithContentsOfURL: anURL]];
}
#define addr(x) (this->bytes + (x))
- (id) initWithData: (NSData *)data
{
if (data == nil)
@ -291,12 +294,13 @@ static Class strict = Nil;
}
this->tagPath = [[NSMutableArray alloc] init];
this->namespaces = [[NSMutableArray alloc] init];
this->cp = [this->data bytes];
this->cend = this->cp + [this->data length];
this->bytes = [this->data bytes];
this->cp = 0;
this->cend = [this->data length];
/* If the data contained utf-8 with a BOM, we must skip it.
*/
if ((this->cend - this->cp) > 2 && this->cp[0] == 0xef
&& this->cp[1] == 0xbb && this->cp[2] == 0xbf)
if ((this->cend - this->cp) > 2 && addr(this->cp)[0] == 0xef
&& addr(this->cp)[1] == 0xbb && addr(this->cp)[2] == 0xbf)
{
this->cp += 3; // Skip BOM
}
@ -419,7 +423,10 @@ static Class strict = Nil;
return this->tagPath;
}
#define cget() ((this->cp < this->cend)?(this->column++, *this->cp++): -1)
#define cget() (\
(this->cp < this->cend) \
? (this->column++, *addr(this->cp++)) \
: -1)
- (BOOL) _parseError: (NSString *)message code: (NSInteger)code
{
@ -519,7 +526,7 @@ static Class strict = Nil;
- (void) _processDeclaration
{
const unsigned char *tp;
NSUInteger tp;
NSString *decl;
NSString *name;
int c;
@ -546,7 +553,7 @@ NSLog(@"parserDidStartDocument: ");
{
c = cget(); // scan name to delimiting character
}
decl = [NewUTF8STR(tp, this->cp - tp - 1) autorelease];
decl = [NewUTF8STR(addr(tp), this->cp - tp - 1) autorelease];
if (nil == decl)
{
[self _parseError: @"invalid character in declaraction"
@ -566,7 +573,7 @@ NSLog(@"parserDidStartDocument: ");
{
c = cget(); // scan name to delimiting character
}
name = [NewUTF8STR(tp, this->cp - tp - 1) autorelease];
name = [NewUTF8STR(addr(tp), this->cp - tp - 1) autorelease];
if (nil == name)
{
[self _parseError: @"invalid character in declaraction name"
@ -613,7 +620,7 @@ NSLog(@"_processDeclaration <%@%@ %@>", flag?@"/": @"", decl, name);
{
c = cget(); // scan name to delimiting character
}
name = NewUTF8STR(tp, this->cp - tp - 1);
name = NewUTF8STR(addr(tp), this->cp - tp - 1);
if (nil == name)
{
[self _parseError: @"invalid character in declaration attr"
@ -633,11 +640,12 @@ NSLog(@"name=%@ - %02x %c", name, c, isprint(c)?c: ' ');
{
c = cget(); // scan name to delimiting character
}
type = NewUTF8STR(tp, this->cp - tp - 1);
type = NewUTF8STR(addr(tp), this->cp - tp - 1);
if (nil == type)
{
[self _parseError: @"invalid character in declaration type"
code: NSXMLParserInvalidCharacterError];
RELEASE(name);
return;
}
#if 1 || EXTRA_DEBUG
@ -647,7 +655,7 @@ NSLog(@"type=%@ - %02x %c", type, c, isprint(c)?c: ' ');
*/
if ([type isEqualToString: @"CDATA"])
{
[type release];
RELEASE(type);
type = @"";
}
@ -690,9 +698,9 @@ NSLog(@"type=%@ - %02x %c", type, c, isprint(c)?c: ' ');
type: type
defaultValue: def];
}
[name release];
[type release];
[def release];
RELEASE(name);
RELEASE(type);
RELEASE(def);
}
return;
}
@ -1010,10 +1018,10 @@ NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
- (BOOL) _parseEntity: (NSString**)result
{
int c;
const unsigned char *ep = this->cp; // should be position behind &
int len;
NSString *entity;
int c;
NSUInteger ep = this->cp; // should be position behind &
int len;
NSString *entity;
if (0 == result) result = &entity;
do {
@ -1027,7 +1035,7 @@ NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
}
len = this->cp - ep - 1;
*result = [self _newEntity: ep length: len];
*result = [self _newEntity: addr(ep) length: len];
if (&entity == result)
{
[*result release]; // Won't be used
@ -1038,11 +1046,11 @@ NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
- (NSString *) _newQarg
{
// get argument (might be quoted)
const unsigned char *ap = --this->cp; // argument start pointer
int c = cget(); // refetch first character
int len;
BOOL containsEntity = NO;
NSString *qs;
NSUInteger ap = --this->cp; // argument start pointer
int c = cget(); // refetch first character
int len;
BOOL containsEntity = NO;
NSString *qs;
#if EXTRA_DEBUG
NSLog(@"_newQarg: %02x %c", c, isprint(c)?c: ' ');
@ -1105,7 +1113,7 @@ NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
{
NSString *seg;
NSMutableString *m;
const unsigned char *start = ap;
const unsigned char *start = addr(ap);
const unsigned char *end = start + len;
const unsigned char *ptr = start;
@ -1148,7 +1156,7 @@ NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
}
return m;
}
qs = NewUTF8STR(ap, len);
qs = NewUTF8STR(addr(ap), len);
if (nil == qs)
{
[self _parseError: @"invalid character in quoted string"
@ -1161,8 +1169,8 @@ NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
- (BOOL) parse
{
// read XML (or HTML) file
const unsigned char *vp = this->cp; // value pointer
int c;
NSUInteger vp = this->cp; // value position
int c;
/* Start by accumulating ignorable whitespace.
*/
@ -1174,7 +1182,7 @@ NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
#if EXTRA_DEBUG
NSLog(@"_nextelement %02x %c", c, isprint(c)?c: ' ');
#endif
switch(c)
switch (c)
{
case '\r':
this->column = 0;
@ -1196,8 +1204,8 @@ NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
*/
if (this->cp - vp > 1)
{
const unsigned char *p;
NSString *s;
NSUInteger p;
NSString *s;
p = this->cp - 1;
if (YES == this->ignorable)
@ -1210,7 +1218,7 @@ NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
{
/* step through trailing whitespace (if any)
*/
while (p > vp && isspace(p[-1]))
while (p > vp && isspace(addr(p)[-1]))
{
p--;
}
@ -1222,7 +1230,7 @@ NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
{
if (this->foundCharacters != 0)
{
s = NewUTF8STR(vp, p - vp);
s = NewUTF8STR(addr(vp), p - vp);
if (nil == s)
{
[self _parseError: @"invalid character data"
@ -1243,7 +1251,7 @@ NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
{
if (this->foundIgnorable != 0)
{
s = NewUTF8STR(p, this->cp - p - 1);
s = NewUTF8STR(addr(p), this->cp - p - 1);
if (nil == s)
{
[self _parseError: @"invalid whitespace data"
@ -1260,7 +1268,7 @@ NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
}
else if (this->foundCharacters != 0)
{
s = NewUTF8STR(p, this->cp - p - 1);
s = NewUTF8STR(addr(p), this->cp - p - 1);
if (nil == s)
{
[self _parseError: @"invalid character data"
@ -1282,7 +1290,7 @@ NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
}
}
switch(c)
switch (c)
{
default:
if (YES == this->whitespace && !isspace(c))
@ -1296,7 +1304,7 @@ NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
*/
if (this->foundIgnorable != 0)
{
s = NewUTF8STR(vp, this->cp - vp - 1);
s = NewUTF8STR(addr(vp), this->cp - vp - 1);
if (nil == s)
{
[self _parseError: @"invalid whitespace data"
@ -1311,7 +1319,7 @@ NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
}
else if (this->foundCharacters != 0)
{
s = NewUTF8STR(vp, this->cp - vp - 1);
s = NewUTF8STR(addr(vp), this->cp - vp - 1);
if (nil == s)
{
[self _parseError: @"invalid character data"
@ -1400,8 +1408,8 @@ NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
NSString *tag;
NSMutableDictionary *attributes;
NSString *arg;
const unsigned char *tp = this->cp; // tag pointer
const unsigned char *sp = tp - 1; // Open angle bracket
NSUInteger tp = this->cp; // tag position
NSUInteger sp = tp - 1; // Open angle bracket
/* After processing a tag, whitespace will be ignorable and
* we can start accumulating it in our buffer.
@ -1410,7 +1418,7 @@ NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
this->whitespace = YES;
if (this->cp < this->cend-3
&& strncmp((char *)this->cp, "!--", 3) == 0)
&& strncmp((char *)addr(this->cp), "!--", 3) == 0)
{
/* start of comment skip all characters until "-->"
*/
@ -1423,7 +1431,7 @@ NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
}
if (this->foundComment != 0)
{
NSString *c = NewUTF8STR(tp, this->cp - tp);
NSString *c = NewUTF8STR(addr(tp), this->cp - tp);
if (nil == c)
{
@ -1443,14 +1451,14 @@ NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
continue;
}
if (this->cp < this->cend-8
&& strncmp((char *)this->cp, "![CDATA[", 8) == 0)
&& strncmp((char *)addr(this->cp), "![CDATA[", 8) == 0)
{
/* start of CDATA skip all characters until "]>"
*/
this->cp += 8;
tp = this->cp;
while (this->cp < this->cend-3
&& strncmp((char *)this->cp, "]]>", 3) != 0)
&& strncmp((char *)addr(this->cp), "]]>", 3) != 0)
{
this->cp++; // search
}
@ -1458,7 +1466,7 @@ NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
{
NSData *d;
d = [[NSData alloc] initWithBytes: tp
d = [[NSData alloc] initWithBytes: addr(tp)
length: this->cp - tp];
(*this->foundCDATA)(_del,
foundCDATASel, self, d);
@ -1503,13 +1511,13 @@ NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
{
c = cget(); // scan tag until we find a delimiting character
}
if (*tp == '/')
if (*addr(tp) == '/')
{
tag = NewUTF8STR(tp + 1, this->cp - tp - 2);
tag = NewUTF8STR(addr(tp + 1), this->cp - tp - 2);
}
else
{
tag = NewUTF8STR(tp, this->cp - tp - 1);
tag = NewUTF8STR(addr(tp), this->cp - tp - 1);
}
if (nil == tag)
{
@ -1536,7 +1544,7 @@ NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
}
while (c != EOF)
{
if (c == '/' && *tp != '/')
if (c == '/' && *addr(tp) != '/')
{
// appears to be a />
c = cget();
@ -1554,7 +1562,7 @@ NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
break;
}
if (c == '?' && *tp == '?')
if (c == '?' && *addr(tp) == '?')
{
// appears to be a ?>
c = cget();
@ -1569,8 +1577,7 @@ NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
/* If this is the <?xml header, the opening angle
* bracket MUST be at the start of the data.
*/
if ([tag isEqualToString: @"?xml"]
&& sp != [this->data bytes])
if ([tag isEqualToString: @"?xml"] && sp != 0)
{
[attributes release];
[tag release];
@ -1590,7 +1597,7 @@ NSLog(@"_processTag <%@%@ %@>", flag?@"/": @"", tag, attributes);
if (c == '>')
{
[self _processTag: tag
isEnd: (*tp == '/')
isEnd: (*addr(tp) == '/')
withAttributes: attributes];
break;
}