Implement faster textmap parser

This commit is contained in:
MascaraSnake 2022-01-08 13:17:15 +01:00
parent b7f9626fda
commit 1269fe6c0e
3 changed files with 194 additions and 50 deletions

View file

@ -483,8 +483,11 @@ extern void *(*M_Memcpy)(void* dest, const void* src, size_t n) FUNCNONNULL;
char *va(const char *format, ...) FUNCPRINTF;
char *M_GetToken(const char *inputString);
void M_UnGetToken(void);
UINT32 M_GetTokenPos(void);
void M_SetTokenPos(UINT32 newPos);
void M_TokenizerOpen(const char *inputString);
void M_TokenizerClose(void);
const char *M_TokenizerRead(UINT32 i);
UINT32 M_TokenizerGetEndPos(void);
void M_TokenizerSetEndPos(UINT32 newPos);
char *sizeu1(size_t num);
char *sizeu2(size_t num);
char *sizeu3(size_t num);

View file

@ -1970,18 +1970,168 @@ void M_UnGetToken(void)
endPos = oldendPos;
}
/** Returns the current token's position.
*/
UINT32 M_GetTokenPos(void)
#define NUMTOKENS 2
static const char *tokenizerInput = NULL;
static UINT32 tokenCapacity[NUMTOKENS] = {0};
static char *tokenizerToken[NUMTOKENS] = {NULL};
static UINT32 tokenizerStartPos = 0;
static UINT32 tokenizerEndPos = 0;
static UINT32 tokenizerInputLength = 0;
static UINT8 tokenizerInComment = 0; // 0 = not in comment, 1 = // Single-line, 2 = /* Multi-line */
void M_TokenizerOpen(const char *inputString)
{
return endPos;
size_t i;
tokenizerInput = inputString;
for (i = 0; i < NUMTOKENS; i++)
{
tokenCapacity[i] = 1024;
tokenizerToken[i] = (char*)Z_Malloc(tokenCapacity[i] * sizeof(char), PU_STATIC, NULL);
}
tokenizerInputLength = strlen(tokenizerInput);
}
/** Sets the current token's position.
*/
void M_SetTokenPos(UINT32 newPos)
void M_TokenizerClose(void)
{
endPos = newPos;
size_t i;
tokenizerInput = NULL;
for (i = 0; i < NUMTOKENS; i++)
Z_Free(tokenizerToken[i]);
tokenizerStartPos = 0;
tokenizerEndPos = 0;
tokenizerInComment = 0;
}
static void M_DetectComment(UINT32 *pos)
{
if (tokenizerInComment)
return;
if (*pos >= tokenizerInputLength - 1)
return;
if (tokenizerInput[*pos] != '/')
return;
//Single-line comment start
if (tokenizerInput[*pos + 1] == '/')
tokenizerInComment = 1;
//Multi-line comment start
else if (tokenizerInput[*pos + 1] == '*')
tokenizerInComment = 2;
}
static void M_ReadTokenString(UINT32 i)
{
UINT32 tokenLength = tokenizerEndPos - tokenizerStartPos;
if (tokenLength + 1 > tokenCapacity[i])
{
tokenCapacity[i] = tokenLength + 1;
// Assign the memory. Don't forget an extra byte for the end of the string!
tokenizerToken[i] = (char *)Z_Malloc(tokenCapacity[i] * sizeof(char), PU_STATIC, NULL);
}
// Copy the string.
M_Memcpy(tokenizerToken[i], tokenizerInput + tokenizerStartPos, (size_t)tokenLength);
// Make the final character NUL.
tokenizerToken[i][tokenLength] = '\0';
}
const char *M_TokenizerRead(UINT32 i)
{
if (!tokenizerInput)
return NULL;
tokenizerStartPos = tokenizerEndPos;
// Try to detect comments now, in case we're pointing right at one
M_DetectComment(&tokenizerStartPos);
// Find the first non-whitespace char, or else the end of the string trying
while ((tokenizerInput[tokenizerStartPos] == ' '
|| tokenizerInput[tokenizerStartPos] == '\t'
|| tokenizerInput[tokenizerStartPos] == '\r'
|| tokenizerInput[tokenizerStartPos] == '\n'
|| tokenizerInput[tokenizerStartPos] == '\0'
|| tokenizerInput[tokenizerStartPos] == '=' || tokenizerInput[tokenizerStartPos] == ';' // UDMF TEXTMAP.
|| tokenizerInComment != 0)
&& tokenizerStartPos < tokenizerInputLength)
{
// Try to detect comment endings now
if (tokenizerInComment == 1 && tokenizerInput[tokenizerStartPos] == '\n')
tokenizerInComment = 0; // End of line for a single-line comment
else if (tokenizerInComment == 2
&& tokenizerStartPos < tokenizerInputLength - 1
&& tokenizerInput[tokenizerStartPos] == '*'
&& tokenizerInput[tokenizerStartPos+1] == '/')
{
// End of multi-line comment
tokenizerInComment = 0;
tokenizerStartPos++; // Make damn well sure we're out of the comment ending at the end of it all
}
tokenizerStartPos++;
M_DetectComment(&tokenizerStartPos);
}
// If the end of the string is reached, no token is to be read
if (tokenizerStartPos == tokenizerInputLength) {
tokenizerEndPos = tokenizerInputLength;
return NULL;
}
// Else, if it's one of these three symbols, capture only this one character
else if (tokenizerInput[tokenizerStartPos] == ','
|| tokenizerInput[tokenizerStartPos] == '{'
|| tokenizerInput[tokenizerStartPos] == '}')
{
tokenizerEndPos = tokenizerStartPos + 1;
tokenizerToken[i][0] = tokenizerInput[tokenizerStartPos];
tokenizerToken[i][1] = '\0';
return tokenizerToken[i];
}
// Return entire string within quotes, except without the quotes.
else if (tokenizerInput[tokenizerStartPos] == '"')
{
tokenizerEndPos = ++tokenizerStartPos;
while (tokenizerInput[tokenizerEndPos] != '"' && tokenizerEndPos < tokenizerInputLength)
tokenizerEndPos++;
M_ReadTokenString(i);
tokenizerEndPos++;
return tokenizerToken[i];
}
// Now find the end of the token. This includes several additional characters that are okay to capture as one character, but not trailing at the end of another token.
tokenizerEndPos = tokenizerStartPos + 1;
while ((tokenizerInput[tokenizerEndPos] != ' '
&& tokenizerInput[tokenizerEndPos] != '\t'
&& tokenizerInput[tokenizerEndPos] != '\r'
&& tokenizerInput[tokenizerEndPos] != '\n'
&& tokenizerInput[tokenizerEndPos] != ','
&& tokenizerInput[tokenizerEndPos] != '{'
&& tokenizerInput[tokenizerEndPos] != '}'
&& tokenizerInput[tokenizerEndPos] != '=' && tokenizerInput[tokenizerEndPos] != ';' // UDMF TEXTMAP.
&& tokenizerInComment == 0)
&& tokenizerEndPos < tokenizerInputLength)
{
tokenizerEndPos++;
// Try to detect comment starts now; if it's in a comment, we don't want it in this token
M_DetectComment(&tokenizerEndPos);
}
M_ReadTokenString(i);
return tokenizerToken[i];
}
UINT32 M_TokenizerGetEndPos(void)
{
return tokenizerEndPos;
}
void M_TokenizerSetEndPos(UINT32 newPos)
{
tokenizerEndPos = newPos;
}
/** Count bits in a number.

View file

@ -1428,9 +1428,9 @@ UINT32 vertexesPos[UINT16_MAX];
UINT32 sectorsPos[UINT16_MAX];
// Determine total amount of map data in TEXTMAP.
static boolean TextmapCount(UINT8 *data, size_t size)
static boolean TextmapCount(size_t size)
{
char *tkn = M_GetToken((char *)data);
const char *tkn = M_TokenizerRead(0);
UINT8 brackets = 0;
nummapthings = 0;
@ -1442,20 +1442,16 @@ static boolean TextmapCount(UINT8 *data, size_t size)
// Look for namespace at the beginning.
if (!fastcmp(tkn, "namespace"))
{
Z_Free(tkn);
CONS_Alert(CONS_ERROR, "No namespace at beginning of lump!\n");
return false;
}
Z_Free(tkn);
// Check if namespace is valid.
tkn = M_GetToken(NULL);
tkn = M_TokenizerRead(0);
if (!fastcmp(tkn, "srb2"))
CONS_Alert(CONS_WARNING, "Invalid namespace '%s', only 'srb2' is supported.\n", tkn);
Z_Free(tkn);
tkn = M_GetToken(NULL);
while (tkn && M_GetTokenPos() < size)
while ((tkn = M_TokenizerRead(0)) && M_TokenizerGetEndPos() < size)
{
// Avoid anything inside bracketed stuff, only look for external keywords.
if (brackets)
@ -1467,24 +1463,19 @@ static boolean TextmapCount(UINT8 *data, size_t size)
brackets++;
// Check for valid fields.
else if (fastcmp(tkn, "thing"))
mapthingsPos[nummapthings++] = M_GetTokenPos();
mapthingsPos[nummapthings++] = M_TokenizerGetEndPos();
else if (fastcmp(tkn, "linedef"))
linesPos[numlines++] = M_GetTokenPos();
linesPos[numlines++] = M_TokenizerGetEndPos();
else if (fastcmp(tkn, "sidedef"))
sidesPos[numsides++] = M_GetTokenPos();
sidesPos[numsides++] = M_TokenizerGetEndPos();
else if (fastcmp(tkn, "vertex"))
vertexesPos[numvertexes++] = M_GetTokenPos();
vertexesPos[numvertexes++] = M_TokenizerGetEndPos();
else if (fastcmp(tkn, "sector"))
sectorsPos[numsectors++] = M_GetTokenPos();
sectorsPos[numsectors++] = M_TokenizerGetEndPos();
else
CONS_Alert(CONS_NOTICE, "Unknown field '%s'.\n", tkn);
Z_Free(tkn);
tkn = M_GetToken(NULL);
}
Z_Free(tkn);
if (brackets)
{
CONS_Alert(CONS_ERROR, "Unclosed brackets detected in textmap lump.\n");
@ -1494,7 +1485,7 @@ static boolean TextmapCount(UINT8 *data, size_t size)
return true;
}
static void ParseTextmapVertexParameter(UINT32 i, char *param, char *val)
static void ParseTextmapVertexParameter(UINT32 i, const char *param, const char *val)
{
if (fastcmp(param, "x"))
vertexes[i].x = FLOAT_TO_FIXED(atof(val));
@ -1541,7 +1532,7 @@ typedef struct textmap_plane_s {
textmap_plane_t textmap_planefloor = {0, 0, 0, 0, 0};
textmap_plane_t textmap_planeceiling = {0, 0, 0, 0, 0};
static void ParseTextmapSectorParameter(UINT32 i, char *param, char *val)
static void ParseTextmapSectorParameter(UINT32 i, const char *param, const char *val)
{
if (fastcmp(param, "heightfloor"))
sectors[i].floorheight = atol(val) << FRACBITS;
@ -1565,7 +1556,7 @@ static void ParseTextmapSectorParameter(UINT32 i, char *param, char *val)
Tag_FSet(&sectors[i].tags, atol(val));
else if (fastcmp(param, "moreids"))
{
char* id = val;
const char* id = val;
while (id)
{
Tag_Add(&sectors[i].tags, atol(id));
@ -1754,7 +1745,7 @@ static void ParseTextmapSectorParameter(UINT32 i, char *param, char *val)
sectors[i].triggerer = atol(val);
}
static void ParseTextmapSidedefParameter(UINT32 i, char *param, char *val)
static void ParseTextmapSidedefParameter(UINT32 i, const char *param, const char *val)
{
if (fastcmp(param, "offsetx"))
sides[i].textureoffset = atol(val)<<FRACBITS;
@ -1772,13 +1763,13 @@ static void ParseTextmapSidedefParameter(UINT32 i, char *param, char *val)
sides[i].repeatcnt = atol(val);
}
static void ParseTextmapLinedefParameter(UINT32 i, char *param, char *val)
static void ParseTextmapLinedefParameter(UINT32 i, const char *param, const char *val)
{
if (fastcmp(param, "id"))
Tag_FSet(&lines[i].tags, atol(val));
else if (fastcmp(param, "moreids"))
{
char* id = val;
const char* id = val;
while (id)
{
Tag_Add(&lines[i].tags, atol(id));
@ -1866,13 +1857,13 @@ static void ParseTextmapLinedefParameter(UINT32 i, char *param, char *val)
lines[i].flags |= ML_TFERLINE;
}
static void ParseTextmapThingParameter(UINT32 i, char *param, char *val)
static void ParseTextmapThingParameter(UINT32 i, const char *param, const char *val)
{
if (fastcmp(param, "id"))
Tag_FSet(&mapthings[i].tags, atol(val));
else if (fastcmp(param, "moreids"))
{
char* id = val;
const char* id = val;
while (id)
{
Tag_Add(&mapthings[i].tags, atol(id));
@ -1923,32 +1914,25 @@ static void ParseTextmapThingParameter(UINT32 i, char *param, char *val)
* \param Structure number (mapthings, sectors, ...).
* \param Parser function pointer.
*/
static void TextmapParse(UINT32 dataPos, size_t num, void (*parser)(UINT32, char *, char *))
static void TextmapParse(UINT32 dataPos, size_t num, void (*parser)(UINT32, const char *, const char *))
{
char *param, *val;
const char *param, *val;
M_SetTokenPos(dataPos);
param = M_GetToken(NULL);
M_TokenizerSetEndPos(dataPos);
param = M_TokenizerRead(0);
if (!fastcmp(param, "{"))
{
Z_Free(param);
CONS_Alert(CONS_WARNING, "Invalid UDMF data capsule!\n");
return;
}
Z_Free(param);
while (true)
{
param = M_GetToken(NULL);
param = M_TokenizerRead(0);
if (fastcmp(param, "}"))
{
Z_Free(param);
break;
}
val = M_GetToken(NULL);
val = M_TokenizerRead(1);
parser(num, param, val);
Z_Free(param);
Z_Free(val);
}
}
@ -2649,8 +2633,12 @@ static boolean P_LoadMapData(const virtres_t *virt)
if (udmf) // Count how many entries for each type we got in textmap.
{
virtlump_t *textmap = vres_Find(virt, "TEXTMAP");
if (!TextmapCount(textmap->data, textmap->size))
M_TokenizerOpen((char *)textmap->data);
if (!TextmapCount(textmap->size))
{
M_TokenizerClose();
return false;
}
}
else
{
@ -2704,7 +2692,10 @@ static boolean P_LoadMapData(const virtres_t *virt)
// Load map data.
if (udmf)
{
P_LoadTextmap();
M_TokenizerClose();
}
else
{
P_LoadVertices(virtvertexes->data);