etqw-sdk/source/idlib/text/WLexer.cpp

959 lines
21 KiB
C++
Raw Permalink Normal View History

2008-05-29 00:00:00 +00:00
// Copyright (C) 2007 Id Software, Inc.
//
#include "../precompiled.h"
#pragma hdrstop
#define PUNCTABLE
//longer punctuations first
static wpunctuation_t default_punctuations[] = {
// binary operators
{L">>=",P_RSHIFT_ASSIGN},
{L"<<=",P_LSHIFT_ASSIGN},
//
{L"...",P_PARMS},
{L"->*",P_POINTER_TO_MEMBER_POINTER},
// define merge operator
{L"##",P_PRECOMPMERGE}, // pre-compiler
// logic operators
{L"&&",P_LOGIC_AND}, // pre-compiler
{L"||",P_LOGIC_OR}, // pre-compiler
{L">=",P_LOGIC_GEQ}, // pre-compiler
{L"<=",P_LOGIC_LEQ}, // pre-compiler
{L"==",P_LOGIC_EQ}, // pre-compiler
{L"!=",P_LOGIC_UNEQ}, // pre-compiler
// arithmetic operators
{L"*=",P_MUL_ASSIGN},
{L"/=",P_DIV_ASSIGN},
{L"%=",P_MOD_ASSIGN},
{L"+=",P_ADD_ASSIGN},
{L"-=",P_SUB_ASSIGN},
{L"++",P_INC},
{L"--",P_DEC},
// binary operators
{L"&=",P_BIN_AND_ASSIGN},
{L"|=",P_BIN_OR_ASSIGN},
{L"^=",P_BIN_XOR_ASSIGN},
{L">>",P_RSHIFT}, // pre-compiler
{L"<<",P_LSHIFT}, // pre-compiler
// member selection
{L"->",P_MEMBER_SELECTION_POINTER},
{L"::",P_SCOPE_RESOLUTION},
{L".*",P_POINTER_TO_MEMBER_OBJECT},
// arithmetic operators
{L"*",P_MUL}, // pre-compiler
{L"/",P_DIV}, // pre-compiler
{L"%",P_MOD}, // pre-compiler
{L"+",P_ADD}, // pre-compiler
{L"-",P_SUB}, // pre-compiler
{L"=",P_ASSIGN},
// binary operators
{L"&",P_BIN_AND}, // pre-compiler
{L"|",P_BIN_OR}, // pre-compiler
{L"^",P_BIN_XOR}, // pre-compiler
{L"~",P_BIN_NOT}, // pre-compiler
// logic operators
{L"!",P_LOGIC_NOT}, // pre-compiler
{L">",P_LOGIC_GREATER}, // pre-compiler
{L"<",P_LOGIC_LESS}, // pre-compiler
// member selection
{L".",P_MEMBER_SELECTION_OBJECT},
// separators
{L",",P_COMMA}, // pre-compiler
{L";",P_SEMICOLON},
// label indication
{L":",P_COLON}, // pre-compiler
// if statement
{L"?",P_QUESTIONMARK}, // pre-compiler
// embracements
{L"(",P_PARENTHESESOPEN}, // pre-compiler
{L")",P_PARENTHESESCLOSE}, // pre-compiler
{L"{",P_BRACEOPEN}, // pre-compiler
{L"}",P_BRACECLOSE}, // pre-compiler
{L"[",P_SQBRACKETOPEN},
{L"]",P_SQBRACKETCLOSE},
//
{L"\\",P_BACKSLASH},
// precompiler operator
{L"#",P_PRECOMP}, // pre-compiler
{L"$",P_DOLLAR},
{NULL, 0}
};
static int default_punctuationtable[ 256 ];
static int default_nextpunctuation[ sizeof( default_punctuations ) / sizeof( wpunctuation_t ) ];
static bool default_setup;
/*
================
idWLexer::CreatePunctuationTable
================
*/
void idWLexer::CreatePunctuationTable( const wpunctuation_t* punctuations ) {
int i, n, lastp;
const wpunctuation_t *p, *newp;
//get memory for the table
if ( punctuations == default_punctuations ) {
punctuationtable = default_punctuationtable;
nextpunctuation = default_nextpunctuation;
if ( default_setup ) {
return;
}
default_setup = true;
i = sizeof( default_punctuations ) / sizeof( punctuation_t );
}
else {
if ( !punctuationtable || punctuationtable == default_punctuationtable ) {
punctuationtable = (int *) Mem_Alloc( 256 * sizeof( int ) );
}
if ( nextpunctuation && nextpunctuation != default_nextpunctuation ) {
Mem_Free( nextpunctuation );
}
for (i = 0; punctuations[i].p; i++) {
}
nextpunctuation = (int *) Mem_Alloc(i * sizeof(int));
}
memset(punctuationtable, 0xFF, 256 * sizeof(int));
memset(nextpunctuation, 0xFF, i * sizeof(int));
//add the punctuations in the list to the punctuation table
for (i = 0; punctuations[i].p; i++) {
newp = &punctuations[i];
lastp = -1;
//sort the punctuations in this table entry on length (longer punctuations first)
for (n = punctuationtable[(unsigned int) newp->p[0]]; n >= 0; n = nextpunctuation[n] ) {
p = &punctuations[n];
if ( idWStr::Length( p->p ) < idWStr::Length( newp->p ) ) {
nextpunctuation[i] = n;
if (lastp >= 0) {
nextpunctuation[lastp] = i;
}
else {
punctuationtable[(unsigned int) newp->p[0]] = i;
}
break;
}
lastp = n;
}
if (n < 0) {
nextpunctuation[i] = -1;
if (lastp >= 0) {
nextpunctuation[lastp] = i;
}
else {
punctuationtable[(unsigned int) newp->p[0]] = i;
}
}
}
}
/*
================
idWLexer::Error
================
*/
void idWLexer::Error( const char *str, ... ) {
char text[MAX_STRING_CHARS];
va_list ap;
hadError = true;
va_start( ap, str );
vsprintf( text, str, ap );
va_end( ap );
idLib::common->Warning( "file %s, line %d: %s", filename.c_str(), line, text );
}
/*
================
idWLexer::Warning
================
*/
void idWLexer::Warning( const char *str, ... ) {
char text[4096];
va_list ap;
hadWarning = true;
va_start( ap, str );
vsprintf( text, str, ap );
va_end( ap );
idLib::common->Warning( "file %s, line %d: %s", filename.c_str(), line, text );
}
/*
================
idWLexer::SetPunctuations
================
*/
void idWLexer::SetPunctuations( const wpunctuation_t* p ) {
#ifdef PUNCTABLE
if ( p != NULL ) {
CreatePunctuationTable( p );
} else {
CreatePunctuationTable( default_punctuations );
}
#endif //PUNCTABLE
if ( p != NULL ) {
punctuations = p;
} else {
punctuations = default_punctuations;
}
}
/*
================
idWToken::SkipRestOfLine
================
*/
bool idWLexer::SkipRestOfLine( void ) {
idWToken token;
while( ReadToken( &token ) ) {
if ( token.linesCrossed ) {
script_p = lastScript_p;
line = lastline;
return true;
}
}
return false;
}
/*
=================
idWLexer::SkipBracedSection
Skips until a matching close brace is found.
Internal brace depths are properly skipped.
=================
*/
bool idWLexer::SkipBracedSection( bool parseFirstBrace ) {
idWToken token;
int depth;
depth = parseFirstBrace ? 0 : 1;
do {
if ( !ReadToken( &token ) ) {
return false;
}
if ( token.type == TT_PUNCTUATION ) {
if ( token == L"{" ) {
depth++;
} else if ( token == L"}" ) {
depth--;
}
}
} while( depth );
return true;
}
/*
============
idWLexer::SkipBracedSectionExact
============
*/
bool idWLexer::SkipBracedSectionExact( bool parseFirstBrace ) {
int depth;
if ( parseFirstBrace ) {
if ( !idWLexer::ExpectTokenString( L"{" ) ) {
return false;
}
}
depth = 1;
while( depth ) {
if ( !*idWLexer::script_p ) {
return false;
}
char c = *(idWLexer::script_p++);
switch ( c ) {
case L'{':
depth++;
break;
case L'}':
depth--;
break;
}
}
return true;
}
/*
================
idWLexer::SkipWhiteSpace
Reads spaces, tabs, C-like comments etc.
When a newline character is found the scripts line counter is increased.
Returns 0 if there is no token left to be read.
================
*/
bool idWLexer::SkipWhiteSpace( bool currentLine ) {
while ( true ) {
// skip white space
while ( *script_p <= L' ' ) {
if ( !*script_p ) {
return 0;
}
if ( *script_p == L'\n' ) {
line++;
if ( currentLine ) {
script_p++;
return 1;
}
}
script_p++;
}
// skip comments
if ( *script_p == L'/' ) {
// comments //
if ( *( script_p + 1 ) == L'/' ) {
script_p++;
do {
script_p++;
if ( *script_p == L'\0' ) {
return false;
}
}
while ( *script_p != L'\n' );
line++;
script_p++;
if ( currentLine ) {
return true;
}
if ( *script_p == L'\0' ) {
return false;
}
continue;
}
// comments /* */
else if ( *( script_p + 1 ) == L'*' ) {
script_p++;
while ( true ) {
script_p++;
if ( *script_p == L'\0' ) {
return 0;
}
if ( *script_p == L'\n' ) {
line++;
}
else if ( *script_p == L'/' ) {
if ( *( script_p - 1 ) == L'*' ) {
break;
}
if ( *( script_p + 1 ) == L'*' ) {
Warning( "nested comment" );
}
}
}
script_p++;
if ( *script_p == L'\0' ) {
return false;
}
continue;
}
}
break;
}
return true;
}
/*
================
idWLexer::ReadEscapeCharacter
================
*/
bool idWLexer::ReadEscapeCharacter( wchar_t* ch ) {
wchar_t c;
//int val, i;
// step over the leading '\\'
script_p++;
// determine the escape character
switch( *script_p ) {
case L'\\': c = L'\\'; break;
case L'n': c = L'\n'; break;
case L'r': c = L'\r'; break;
case L't': c = L'\t'; break;
case L'v': c = L'\v'; break;
case L'b': c = L'\b'; break;
case L'f': c = L'\f'; break;
case L'a': c = L'\a'; break;
case L'\'': c = L'\''; break;
case L'\"': c = L'\"'; break;
case L'\?': c = L'\?'; break;
default:
Error( "unsupported escape character" );
return false;
break;
#if 0
case L'x':
{
script_p++;
for ( i = 0, val = 0; ; i++, script_p++ ) {
c = *script_p;
if ( c >= L'0' && c <= L'9' )
c = c - L'0';
else if (c >= L'A' && c <= L'Z')
c = c - L'A' + 10;
else if (c >= L'a' && c <= L'z')
c = c - L'a' + 10;
else
break;
val = ( val << 4 ) + c;
}
script_p--;
if ( val > 0xFF ) {
Warning( "too large value in escape character" );
val = 0xFF;
}
c = val;
break;
}
default: //NOTE: decimal ASCII code, NOT octal
{
if ( *script_p < L'0' || *script_p > L'9' ) {
idLexer::Error("unknown escape char");
}
for ( i = 0, val = 0; ; i++, script_p++ ) {
c = *script_p;
if ( c >= L'0' && c <= L'9' )
c = c - L'0';
else
break;
val = val * 10 + c;
}
script_p--;
if ( val > 0xFF ) {
Warning( "too large value in escape character" );
val = 0xFF;
}
c = val;
break;
}
}
#endif
}
// step over the escape character or the last digit of the number
script_p++;
// store the escape character
*ch = c;
// succesfully read escape character
return true;
}
/*
================
idWLexer::ReadString
Escape characters are interpretted.
Reads two strings with only a white space between them as one string.
================
*/
bool idWLexer::ReadString( idWToken *token, wchar_t quote ) {
wchar_t ch;
if ( quote == L'\"' ) {
token->type = TT_STRING;
} else {
token->type = TT_LITERAL;
}
// leading quote
script_p++;
while ( true ) {
// if there is an escape character and escape characters are allowed
if ( *script_p == L'\\' ) {
if ( !ReadEscapeCharacter( &ch ) ) {
return false;
}
token->AppendDirty( ch );
}
// if a trailing quote
else if ( *script_p == quote ) {
// step over the quote
script_p++;
// consecutive strings should not be concatenated
break;
}
else {
if ( *script_p == L'\0' ) {
Error( "missing trailing quote" );
return false;
}
if ( *script_p == L'\n' ) {
Error( "newline inside string" );
return false;
}
token->AppendDirty( *script_p++ );
}
}
token->data[token->len] = L'\0';
if ( token->type == TT_LITERAL ) {
token->subtype = (*token)[0];
} else {
// the sub type is the length of the string
token->subtype = token->Length();
}
return true;
}
/*
================
idWLexer::ReadName
================
*/
bool idWLexer::ReadName( idWToken *token ) {
wchar_t c;
token->type = TT_NAME;
do {
token->AppendDirty( *script_p++ );
c = *script_p;
} while ((c >= L'a' && c <= L'z') ||
(c >= L'A' && c <= L'Z') ||
(c >= L'0' && c <= L'9') ||
c == L'_' );
token->data[token->len] = L'\0';
//the sub type is the length of the name
token->subtype = token->Length();
return true;
}
/*
================
idWLexer::CheckString
================
*/
ID_INLINE bool idWLexer::CheckString( const wchar_t *str ) const {
int i;
for ( i = 0; str[i]; i++ ) {
if ( script_p[i] != str[i] ) {
return false;
}
}
return true;
}
/*
================
idWLexer::ReadNumber
================
*/
bool idWLexer::ReadNumber( idWToken *token ) {
int i;
int dot;
wchar_t c, c2;
token->type = TT_NUMBER;
token->subtype = 0;
token->intvalue = 0;
token->floatvalue = 0;
c = *script_p;
c2 = *(script_p + 1);
if ( c == L'0' && c2 != L'.' ) {
// check for a hexadecimal number
if ( c2 == L'x' || c2 == L'X' ) {
token->AppendDirty( *script_p++ );
token->AppendDirty( *script_p++ );
c = *script_p;
while((c >= L'0' && c <= L'9') ||
(c >= L'a' && c <= L'f') ||
(c >= L'A' && c <= L'F')) {
token->AppendDirty( c );
c = *(++script_p);
}
token->subtype = TT_HEX | TT_INTEGER;
}
// check for a binary number
else if ( c2 == L'b' || c2 == L'B' ) {
token->AppendDirty( *script_p++ );
token->AppendDirty( *script_p++ );
c = *script_p;
while( c == L'0' || c == L'1' ) {
token->AppendDirty( c );
c = *(++script_p);
}
token->subtype = TT_BINARY | TT_INTEGER;
}
// its an octal number
else {
token->AppendDirty( *script_p++ );
c = *script_p;
while( c >= L'0' && c <= L'7' ) {
token->AppendDirty( c );
c = *(++script_p);
}
token->subtype = TT_OCTAL | TT_INTEGER;
}
}
else {
// decimal integer or floating point number or ip address
dot = 0;
while( 1 ) {
if ( c >= L'0' && c <= L'9' ) {
} else if ( c == L'.' ) {
dot++;
} else {
break;
}
token->AppendDirty( c );
c = *(++script_p);
}
// if a floating point number
if ( dot == 1 || c == L'e' ) {
token->subtype = TT_DECIMAL | TT_FLOAT;
// check for floating point exponent
if ( c == L'e' ) {
c = *(++script_p);
if ( c == L'-' ) {
token->AppendDirty( c );
c = *(++script_p);
} else if ( c == L'+' ) {
token->AppendDirty( c );
c = *(++script_p);
}
while( c >= L'0' && c <= L'9' ) {
token->AppendDirty( c );
c = *(++script_p);
}
}
// check for floating point exception infinite 1.#INF or indefinite 1.#IND or NaN
else if ( c == L'#' ) {
token->AppendDirty( c );
c = *(++script_p);
if ( CheckString( L"INF" ) ) {
token->subtype |= TT_INFINITE;
c2 = 3;
} else if ( CheckString( L"IND" ) ) {
token->subtype |= TT_INDEFINITE;
c2 = 3;
} else if ( CheckString( L"NAN" ) ) {
token->subtype |= TT_NAN;
c2 = 3;
} else if ( CheckString( L"QNAN" ) ) {
token->subtype |= TT_NAN;
c2 = 4;
} else if ( CheckString( L"SNAN" ) ) {
token->subtype |= TT_NAN;
c2 = 4;
}
for ( i = 0; i < c2; i++ ) {
token->AppendDirty( c );
c = *(++script_p);
}
while( c >= L'0' && c <= L'9' ) {
token->AppendDirty( c );
c = *(++script_p);
}
token->AppendDirty( 0 ); // zero terminate for c_str
Error( "parsed %s", token->c_str() );
}
}
else if ( dot > 1 ) {
Error( "more than one dot in number" );
}
else {
token->subtype = TT_DECIMAL | TT_INTEGER;
}
}
if ( token->subtype & TT_FLOAT ) {
if ( c > L' ' ) {
// single-precision: float
if ( c == L'f' || c == L'F' ) {
token->subtype |= TT_SINGLE_PRECISION;
script_p++;
}
// extended-precision: long double
else if ( c == L'l' || c == L'L' ) {
token->subtype |= TT_EXTENDED_PRECISION;
script_p++;
}
// default is double-precision: double
else {
token->subtype |= TT_DOUBLE_PRECISION;
}
}
else {
token->subtype |= TT_DOUBLE_PRECISION;
}
}
else if ( token->subtype & TT_INTEGER ) {
if ( c > L' ' ) {
// default: signed long
for ( i = 0; i < 2; i++ ) {
// long integer
if ( c == L'l' || c == L'L' ) {
token->subtype |= TT_LONG;
}
// unsigned integer
else if ( c == L'u' || c == L'U' ) {
token->subtype |= TT_UNSIGNED;
}
else {
break;
}
c = *(++script_p);
}
}
}
token->data[token->len] = L'\0';
return true;
}
/*
================
idWLexer::ReadPunctuation
================
*/
bool idWLexer::ReadPunctuation( idWToken *token ) {
int l, n, i;
wchar_t* p;
const wpunctuation_t *punc;
if ( (unsigned int)*(script_p) >= 0xFF ) {
return false;
}
#ifdef PUNCTABLE
for (n = punctuationtable[(unsigned int)*(script_p)]; n >= 0; n = nextpunctuation[n])
{
punc = &(punctuations[n]);
#else
for (n = 0; punctuations[n].p; n++) {
punc = &punctuations[n];
#endif
p = punc->p;
// check for this punctuation in the script
for ( l = 0; p[l] && script_p[l]; l++ ) {
if ( script_p[l] != p[l] ) {
break;
}
}
if ( !p[l] ) {
//
token->EnsureAlloced( l+1, false );
for ( i = 0; i <= l; i++ ) {
token->data[i] = p[i];
}
token->len = l;
//
script_p += l;
token->type = TT_PUNCTUATION;
// sub type is the punctuation id
token->subtype = punc->n;
return true;
}
}
return false;
}
/*
================
idWLexer::ReadToken
================
*/
bool idWLexer::ReadToken( idWToken* token ) {
wchar_t c;
if ( !loaded ) {
idLib::common->Error( "idWLexer::ReadToken: no file loaded" );
return false;
}
// save script pointer
lastScript_p = script_p;
// save line counter
lastline = line;
// clear the token stuff
token->data[0] = L'\0';
token->len = 0;
// start of the white space
whiteSpaceStart_p = script_p;
token->whiteSpaceStart_p = script_p;
// read white space before token
if ( !SkipWhiteSpace( false ) ) {
return false;
}
// end of the white space
whiteSpaceEnd_p = script_p;
token->whiteSpaceEnd_p = script_p;
// line the token is on
token->line = line;
// number of lines crossed before token
token->linesCrossed = line - lastline;
// clear token flags
token->flags = 0;
c = *script_p;
// if we're keeping everything as whitespace delimited strings
/*if ( flags & LEXFL_ONLYSTRINGS ) {
// if there is a leading quote
if ( c == '\"' || c == '\'' ) {
if (!idLexer::ReadString( token, c )) {
return 0;
}
} else if ( !idLexer::ReadName( token ) ) {
return 0;
}
}*/
// if there is a number
if ( ( c >= L'0' && c <= L'9' ) ||
( c == L'.' && ( *(script_p + 1) >= L'0' && *(script_p + 1) <= L'9' ) ) ) {
if ( !ReadNumber( token ) ) {
return false;
}
}
// if there is a leading quote
else if ( c == L'\"' || c == L'\'' ) {
if ( !ReadString( token, c ) ) {
return false;
}
}
// if there is a name
else if ( (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' ) {
if ( !ReadName( token ) ) {
return 0;
}
}
// check for punctuations
else if ( !ReadPunctuation( token ) ) {
Error( "unknown punctuation %lc", c );
return 0;
}
// successfully read a token
return true;
}
/*
================
idWLexer::ExpectTokenString
================
*/
bool idWLexer::ExpectTokenString( const wchar_t* string ) {
idWToken token;
if ( !ReadToken( &token ) ) {
Error( "couldn't find expected '%ls'", string );
return false;
}
if ( token != string ) {
Error( "expected '%ls' but found '%ls'", string, token.c_str() );
return false;
}
return true;
}
/*
================
idWLexer::ExpectAnyToken
================
*/
bool idWLexer::ExpectAnyToken( idWToken* token ) {
if ( !ReadToken( token ) ) {
Error( "couldn't read expected token" );
return false;
} else {
return true;
}
}
/*
================
idWLexer::LoadMemory
================
*/
bool idWLexer::LoadMemory( const wchar_t* ptr, int length, const char* name, int startLine ) {
if ( loaded ) {
idLib::common->Error( "idWLexer::LoadMemory: another script already loaded" );
return false;
}
filename = name;
filename.CollapsePath();
buffer = ptr;
length = length;
// pointer in script buffer
script_p = buffer;
// pointer in script buffer before reading token
lastScript_p = buffer;
// pointer to end of script buffer
end_p = &(buffer[length]);
line = startLine;
lastline = startLine;
allocated = false;
loaded = true;
return true;
}
/*
================
idWLexer::FreeSource
================
*/
void idWLexer::FreeSource( void ) {
if ( allocated ) {
Mem_Free( (void*)buffer );
buffer = NULL;
allocated = false;
}
loaded = false;
}
/*
================
idWLexer::idWLexer
================
*/
idWLexer::idWLexer( void ) {
loaded = false;
filename = "";
SetPunctuations( NULL );
allocated = false;
line = 0;
lastline = 0;
hadError = false;
hadWarning = false;
}
/*
================
idWLexer::idWLexer
================
*/
idWLexer::idWLexer( const wchar_t* ptr, int length, const char* name, int startLine ) {
loaded = false;
SetPunctuations( NULL );
allocated = false;
hadError = false;
hadWarning = false;
LoadMemory( ptr, length, name, startLine );
}
/*
================
idWLexer::~idWLexer
================
*/
idWLexer::~idWLexer( void ) {
FreeSource();
}