// Copyright (C) 2007 Id Software, Inc. // #include "../precompiled.h" #pragma hdrstop #define PUNCTABLE //longer punctuations first static wpunctuation_t default_punctuations[] = { // binary operators {L">>=",P_RSHIFT_ASSIGN}, {L"<<=",P_LSHIFT_ASSIGN}, // {L"...",P_PARMS}, {L"->*",P_POINTER_TO_MEMBER_POINTER}, // define merge operator {L"##",P_PRECOMPMERGE}, // pre-compiler // logic operators {L"&&",P_LOGIC_AND}, // pre-compiler {L"||",P_LOGIC_OR}, // pre-compiler {L">=",P_LOGIC_GEQ}, // pre-compiler {L"<=",P_LOGIC_LEQ}, // pre-compiler {L"==",P_LOGIC_EQ}, // pre-compiler {L"!=",P_LOGIC_UNEQ}, // pre-compiler // arithmetic operators {L"*=",P_MUL_ASSIGN}, {L"/=",P_DIV_ASSIGN}, {L"%=",P_MOD_ASSIGN}, {L"+=",P_ADD_ASSIGN}, {L"-=",P_SUB_ASSIGN}, {L"++",P_INC}, {L"--",P_DEC}, // binary operators {L"&=",P_BIN_AND_ASSIGN}, {L"|=",P_BIN_OR_ASSIGN}, {L"^=",P_BIN_XOR_ASSIGN}, {L">>",P_RSHIFT}, // pre-compiler {L"<<",P_LSHIFT}, // pre-compiler // member selection {L"->",P_MEMBER_SELECTION_POINTER}, {L"::",P_SCOPE_RESOLUTION}, {L".*",P_POINTER_TO_MEMBER_OBJECT}, // arithmetic operators {L"*",P_MUL}, // pre-compiler {L"/",P_DIV}, // pre-compiler {L"%",P_MOD}, // pre-compiler {L"+",P_ADD}, // pre-compiler {L"-",P_SUB}, // pre-compiler {L"=",P_ASSIGN}, // binary operators {L"&",P_BIN_AND}, // pre-compiler {L"|",P_BIN_OR}, // pre-compiler {L"^",P_BIN_XOR}, // pre-compiler {L"~",P_BIN_NOT}, // pre-compiler // logic operators {L"!",P_LOGIC_NOT}, // pre-compiler {L">",P_LOGIC_GREATER}, // pre-compiler {L"<",P_LOGIC_LESS}, // pre-compiler // member selection {L".",P_MEMBER_SELECTION_OBJECT}, // separators {L",",P_COMMA}, // pre-compiler {L";",P_SEMICOLON}, // label indication {L":",P_COLON}, // pre-compiler // if statement {L"?",P_QUESTIONMARK}, // pre-compiler // embracements {L"(",P_PARENTHESESOPEN}, // pre-compiler {L")",P_PARENTHESESCLOSE}, // pre-compiler {L"{",P_BRACEOPEN}, // pre-compiler {L"}",P_BRACECLOSE}, // pre-compiler {L"[",P_SQBRACKETOPEN}, {L"]",P_SQBRACKETCLOSE}, // {L"\\",P_BACKSLASH}, // precompiler operator {L"#",P_PRECOMP}, // pre-compiler {L"$",P_DOLLAR}, {NULL, 0} }; static int default_punctuationtable[ 256 ]; static int default_nextpunctuation[ sizeof( default_punctuations ) / sizeof( wpunctuation_t ) ]; static bool default_setup; /* ================ idWLexer::CreatePunctuationTable ================ */ void idWLexer::CreatePunctuationTable( const wpunctuation_t* punctuations ) { int i, n, lastp; const wpunctuation_t *p, *newp; //get memory for the table if ( punctuations == default_punctuations ) { punctuationtable = default_punctuationtable; nextpunctuation = default_nextpunctuation; if ( default_setup ) { return; } default_setup = true; i = sizeof( default_punctuations ) / sizeof( punctuation_t ); } else { if ( !punctuationtable || punctuationtable == default_punctuationtable ) { punctuationtable = (int *) Mem_Alloc( 256 * sizeof( int ) ); } if ( nextpunctuation && nextpunctuation != default_nextpunctuation ) { Mem_Free( nextpunctuation ); } for (i = 0; punctuations[i].p; i++) { } nextpunctuation = (int *) Mem_Alloc(i * sizeof(int)); } memset(punctuationtable, 0xFF, 256 * sizeof(int)); memset(nextpunctuation, 0xFF, i * sizeof(int)); //add the punctuations in the list to the punctuation table for (i = 0; punctuations[i].p; i++) { newp = &punctuations[i]; lastp = -1; //sort the punctuations in this table entry on length (longer punctuations first) for (n = punctuationtable[(unsigned int) newp->p[0]]; n >= 0; n = nextpunctuation[n] ) { p = &punctuations[n]; if ( idWStr::Length( p->p ) < idWStr::Length( newp->p ) ) { nextpunctuation[i] = n; if (lastp >= 0) { nextpunctuation[lastp] = i; } else { punctuationtable[(unsigned int) newp->p[0]] = i; } break; } lastp = n; } if (n < 0) { nextpunctuation[i] = -1; if (lastp >= 0) { nextpunctuation[lastp] = i; } else { punctuationtable[(unsigned int) newp->p[0]] = i; } } } } /* ================ idWLexer::Error ================ */ void idWLexer::Error( const char *str, ... ) { char text[MAX_STRING_CHARS]; va_list ap; hadError = true; va_start( ap, str ); vsprintf( text, str, ap ); va_end( ap ); idLib::common->Warning( "file %s, line %d: %s", filename.c_str(), line, text ); } /* ================ idWLexer::Warning ================ */ void idWLexer::Warning( const char *str, ... ) { char text[4096]; va_list ap; hadWarning = true; va_start( ap, str ); vsprintf( text, str, ap ); va_end( ap ); idLib::common->Warning( "file %s, line %d: %s", filename.c_str(), line, text ); } /* ================ idWLexer::SetPunctuations ================ */ void idWLexer::SetPunctuations( const wpunctuation_t* p ) { #ifdef PUNCTABLE if ( p != NULL ) { CreatePunctuationTable( p ); } else { CreatePunctuationTable( default_punctuations ); } #endif //PUNCTABLE if ( p != NULL ) { punctuations = p; } else { punctuations = default_punctuations; } } /* ================ idWToken::SkipRestOfLine ================ */ bool idWLexer::SkipRestOfLine( void ) { idWToken token; while( ReadToken( &token ) ) { if ( token.linesCrossed ) { script_p = lastScript_p; line = lastline; return true; } } return false; } /* ================= idWLexer::SkipBracedSection Skips until a matching close brace is found. Internal brace depths are properly skipped. ================= */ bool idWLexer::SkipBracedSection( bool parseFirstBrace ) { idWToken token; int depth; depth = parseFirstBrace ? 0 : 1; do { if ( !ReadToken( &token ) ) { return false; } if ( token.type == TT_PUNCTUATION ) { if ( token == L"{" ) { depth++; } else if ( token == L"}" ) { depth--; } } } while( depth ); return true; } /* ============ idWLexer::SkipBracedSectionExact ============ */ bool idWLexer::SkipBracedSectionExact( bool parseFirstBrace ) { int depth; if ( parseFirstBrace ) { if ( !idWLexer::ExpectTokenString( L"{" ) ) { return false; } } depth = 1; while( depth ) { if ( !*idWLexer::script_p ) { return false; } char c = *(idWLexer::script_p++); switch ( c ) { case L'{': depth++; break; case L'}': depth--; break; } } return true; } /* ================ idWLexer::SkipWhiteSpace Reads spaces, tabs, C-like comments etc. When a newline character is found the scripts line counter is increased. Returns 0 if there is no token left to be read. ================ */ bool idWLexer::SkipWhiteSpace( bool currentLine ) { while ( true ) { // skip white space while ( *script_p <= L' ' ) { if ( !*script_p ) { return 0; } if ( *script_p == L'\n' ) { line++; if ( currentLine ) { script_p++; return 1; } } script_p++; } // skip comments if ( *script_p == L'/' ) { // comments // if ( *( script_p + 1 ) == L'/' ) { script_p++; do { script_p++; if ( *script_p == L'\0' ) { return false; } } while ( *script_p != L'\n' ); line++; script_p++; if ( currentLine ) { return true; } if ( *script_p == L'\0' ) { return false; } continue; } // comments /* */ else if ( *( script_p + 1 ) == L'*' ) { script_p++; while ( true ) { script_p++; if ( *script_p == L'\0' ) { return 0; } if ( *script_p == L'\n' ) { line++; } else if ( *script_p == L'/' ) { if ( *( script_p - 1 ) == L'*' ) { break; } if ( *( script_p + 1 ) == L'*' ) { Warning( "nested comment" ); } } } script_p++; if ( *script_p == L'\0' ) { return false; } continue; } } break; } return true; } /* ================ idWLexer::ReadEscapeCharacter ================ */ bool idWLexer::ReadEscapeCharacter( wchar_t* ch ) { wchar_t c; //int val, i; // step over the leading '\\' script_p++; // determine the escape character switch( *script_p ) { case L'\\': c = L'\\'; break; case L'n': c = L'\n'; break; case L'r': c = L'\r'; break; case L't': c = L'\t'; break; case L'v': c = L'\v'; break; case L'b': c = L'\b'; break; case L'f': c = L'\f'; break; case L'a': c = L'\a'; break; case L'\'': c = L'\''; break; case L'\"': c = L'\"'; break; case L'\?': c = L'\?'; break; default: Error( "unsupported escape character" ); return false; break; #if 0 case L'x': { script_p++; for ( i = 0, val = 0; ; i++, script_p++ ) { c = *script_p; if ( c >= L'0' && c <= L'9' ) c = c - L'0'; else if (c >= L'A' && c <= L'Z') c = c - L'A' + 10; else if (c >= L'a' && c <= L'z') c = c - L'a' + 10; else break; val = ( val << 4 ) + c; } script_p--; if ( val > 0xFF ) { Warning( "too large value in escape character" ); val = 0xFF; } c = val; break; } default: //NOTE: decimal ASCII code, NOT octal { if ( *script_p < L'0' || *script_p > L'9' ) { idLexer::Error("unknown escape char"); } for ( i = 0, val = 0; ; i++, script_p++ ) { c = *script_p; if ( c >= L'0' && c <= L'9' ) c = c - L'0'; else break; val = val * 10 + c; } script_p--; if ( val > 0xFF ) { Warning( "too large value in escape character" ); val = 0xFF; } c = val; break; } } #endif } // step over the escape character or the last digit of the number script_p++; // store the escape character *ch = c; // succesfully read escape character return true; } /* ================ idWLexer::ReadString Escape characters are interpretted. Reads two strings with only a white space between them as one string. ================ */ bool idWLexer::ReadString( idWToken *token, wchar_t quote ) { wchar_t ch; if ( quote == L'\"' ) { token->type = TT_STRING; } else { token->type = TT_LITERAL; } // leading quote script_p++; while ( true ) { // if there is an escape character and escape characters are allowed if ( *script_p == L'\\' ) { if ( !ReadEscapeCharacter( &ch ) ) { return false; } token->AppendDirty( ch ); } // if a trailing quote else if ( *script_p == quote ) { // step over the quote script_p++; // consecutive strings should not be concatenated break; } else { if ( *script_p == L'\0' ) { Error( "missing trailing quote" ); return false; } if ( *script_p == L'\n' ) { Error( "newline inside string" ); return false; } token->AppendDirty( *script_p++ ); } } token->data[token->len] = L'\0'; if ( token->type == TT_LITERAL ) { token->subtype = (*token)[0]; } else { // the sub type is the length of the string token->subtype = token->Length(); } return true; } /* ================ idWLexer::ReadName ================ */ bool idWLexer::ReadName( idWToken *token ) { wchar_t c; token->type = TT_NAME; do { token->AppendDirty( *script_p++ ); c = *script_p; } while ((c >= L'a' && c <= L'z') || (c >= L'A' && c <= L'Z') || (c >= L'0' && c <= L'9') || c == L'_' ); token->data[token->len] = L'\0'; //the sub type is the length of the name token->subtype = token->Length(); return true; } /* ================ idWLexer::CheckString ================ */ ID_INLINE bool idWLexer::CheckString( const wchar_t *str ) const { int i; for ( i = 0; str[i]; i++ ) { if ( script_p[i] != str[i] ) { return false; } } return true; } /* ================ idWLexer::ReadNumber ================ */ bool idWLexer::ReadNumber( idWToken *token ) { int i; int dot; wchar_t c, c2; token->type = TT_NUMBER; token->subtype = 0; token->intvalue = 0; token->floatvalue = 0; c = *script_p; c2 = *(script_p + 1); if ( c == L'0' && c2 != L'.' ) { // check for a hexadecimal number if ( c2 == L'x' || c2 == L'X' ) { token->AppendDirty( *script_p++ ); token->AppendDirty( *script_p++ ); c = *script_p; while((c >= L'0' && c <= L'9') || (c >= L'a' && c <= L'f') || (c >= L'A' && c <= L'F')) { token->AppendDirty( c ); c = *(++script_p); } token->subtype = TT_HEX | TT_INTEGER; } // check for a binary number else if ( c2 == L'b' || c2 == L'B' ) { token->AppendDirty( *script_p++ ); token->AppendDirty( *script_p++ ); c = *script_p; while( c == L'0' || c == L'1' ) { token->AppendDirty( c ); c = *(++script_p); } token->subtype = TT_BINARY | TT_INTEGER; } // its an octal number else { token->AppendDirty( *script_p++ ); c = *script_p; while( c >= L'0' && c <= L'7' ) { token->AppendDirty( c ); c = *(++script_p); } token->subtype = TT_OCTAL | TT_INTEGER; } } else { // decimal integer or floating point number or ip address dot = 0; while( 1 ) { if ( c >= L'0' && c <= L'9' ) { } else if ( c == L'.' ) { dot++; } else { break; } token->AppendDirty( c ); c = *(++script_p); } // if a floating point number if ( dot == 1 || c == L'e' ) { token->subtype = TT_DECIMAL | TT_FLOAT; // check for floating point exponent if ( c == L'e' ) { c = *(++script_p); if ( c == L'-' ) { token->AppendDirty( c ); c = *(++script_p); } else if ( c == L'+' ) { token->AppendDirty( c ); c = *(++script_p); } while( c >= L'0' && c <= L'9' ) { token->AppendDirty( c ); c = *(++script_p); } } // check for floating point exception infinite 1.#INF or indefinite 1.#IND or NaN else if ( c == L'#' ) { token->AppendDirty( c ); c = *(++script_p); if ( CheckString( L"INF" ) ) { token->subtype |= TT_INFINITE; c2 = 3; } else if ( CheckString( L"IND" ) ) { token->subtype |= TT_INDEFINITE; c2 = 3; } else if ( CheckString( L"NAN" ) ) { token->subtype |= TT_NAN; c2 = 3; } else if ( CheckString( L"QNAN" ) ) { token->subtype |= TT_NAN; c2 = 4; } else if ( CheckString( L"SNAN" ) ) { token->subtype |= TT_NAN; c2 = 4; } for ( i = 0; i < c2; i++ ) { token->AppendDirty( c ); c = *(++script_p); } while( c >= L'0' && c <= L'9' ) { token->AppendDirty( c ); c = *(++script_p); } token->AppendDirty( 0 ); // zero terminate for c_str Error( "parsed %s", token->c_str() ); } } else if ( dot > 1 ) { Error( "more than one dot in number" ); } else { token->subtype = TT_DECIMAL | TT_INTEGER; } } if ( token->subtype & TT_FLOAT ) { if ( c > L' ' ) { // single-precision: float if ( c == L'f' || c == L'F' ) { token->subtype |= TT_SINGLE_PRECISION; script_p++; } // extended-precision: long double else if ( c == L'l' || c == L'L' ) { token->subtype |= TT_EXTENDED_PRECISION; script_p++; } // default is double-precision: double else { token->subtype |= TT_DOUBLE_PRECISION; } } else { token->subtype |= TT_DOUBLE_PRECISION; } } else if ( token->subtype & TT_INTEGER ) { if ( c > L' ' ) { // default: signed long for ( i = 0; i < 2; i++ ) { // long integer if ( c == L'l' || c == L'L' ) { token->subtype |= TT_LONG; } // unsigned integer else if ( c == L'u' || c == L'U' ) { token->subtype |= TT_UNSIGNED; } else { break; } c = *(++script_p); } } } token->data[token->len] = L'\0'; return true; } /* ================ idWLexer::ReadPunctuation ================ */ bool idWLexer::ReadPunctuation( idWToken *token ) { int l, n, i; wchar_t* p; const wpunctuation_t *punc; if ( (unsigned int)*(script_p) >= 0xFF ) { return false; } #ifdef PUNCTABLE for (n = punctuationtable[(unsigned int)*(script_p)]; n >= 0; n = nextpunctuation[n]) { punc = &(punctuations[n]); #else for (n = 0; punctuations[n].p; n++) { punc = &punctuations[n]; #endif p = punc->p; // check for this punctuation in the script for ( l = 0; p[l] && script_p[l]; l++ ) { if ( script_p[l] != p[l] ) { break; } } if ( !p[l] ) { // token->EnsureAlloced( l+1, false ); for ( i = 0; i <= l; i++ ) { token->data[i] = p[i]; } token->len = l; // script_p += l; token->type = TT_PUNCTUATION; // sub type is the punctuation id token->subtype = punc->n; return true; } } return false; } /* ================ idWLexer::ReadToken ================ */ bool idWLexer::ReadToken( idWToken* token ) { wchar_t c; if ( !loaded ) { idLib::common->Error( "idWLexer::ReadToken: no file loaded" ); return false; } // save script pointer lastScript_p = script_p; // save line counter lastline = line; // clear the token stuff token->data[0] = L'\0'; token->len = 0; // start of the white space whiteSpaceStart_p = script_p; token->whiteSpaceStart_p = script_p; // read white space before token if ( !SkipWhiteSpace( false ) ) { return false; } // end of the white space whiteSpaceEnd_p = script_p; token->whiteSpaceEnd_p = script_p; // line the token is on token->line = line; // number of lines crossed before token token->linesCrossed = line - lastline; // clear token flags token->flags = 0; c = *script_p; // if we're keeping everything as whitespace delimited strings /*if ( flags & LEXFL_ONLYSTRINGS ) { // if there is a leading quote if ( c == '\"' || c == '\'' ) { if (!idLexer::ReadString( token, c )) { return 0; } } else if ( !idLexer::ReadName( token ) ) { return 0; } }*/ // if there is a number if ( ( c >= L'0' && c <= L'9' ) || ( c == L'.' && ( *(script_p + 1) >= L'0' && *(script_p + 1) <= L'9' ) ) ) { if ( !ReadNumber( token ) ) { return false; } } // if there is a leading quote else if ( c == L'\"' || c == L'\'' ) { if ( !ReadString( token, c ) ) { return false; } } // if there is a name else if ( (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' ) { if ( !ReadName( token ) ) { return 0; } } // check for punctuations else if ( !ReadPunctuation( token ) ) { Error( "unknown punctuation %lc", c ); return 0; } // successfully read a token return true; } /* ================ idWLexer::ExpectTokenString ================ */ bool idWLexer::ExpectTokenString( const wchar_t* string ) { idWToken token; if ( !ReadToken( &token ) ) { Error( "couldn't find expected '%ls'", string ); return false; } if ( token != string ) { Error( "expected '%ls' but found '%ls'", string, token.c_str() ); return false; } return true; } /* ================ idWLexer::ExpectAnyToken ================ */ bool idWLexer::ExpectAnyToken( idWToken* token ) { if ( !ReadToken( token ) ) { Error( "couldn't read expected token" ); return false; } else { return true; } } /* ================ idWLexer::LoadMemory ================ */ bool idWLexer::LoadMemory( const wchar_t* ptr, int length, const char* name, int startLine ) { if ( loaded ) { idLib::common->Error( "idWLexer::LoadMemory: another script already loaded" ); return false; } filename = name; filename.CollapsePath(); buffer = ptr; length = length; // pointer in script buffer script_p = buffer; // pointer in script buffer before reading token lastScript_p = buffer; // pointer to end of script buffer end_p = &(buffer[length]); line = startLine; lastline = startLine; allocated = false; loaded = true; return true; } /* ================ idWLexer::FreeSource ================ */ void idWLexer::FreeSource( void ) { if ( allocated ) { Mem_Free( (void*)buffer ); buffer = NULL; allocated = false; } loaded = false; } /* ================ idWLexer::idWLexer ================ */ idWLexer::idWLexer( void ) { loaded = false; filename = ""; SetPunctuations( NULL ); allocated = false; line = 0; lastline = 0; hadError = false; hadWarning = false; } /* ================ idWLexer::idWLexer ================ */ idWLexer::idWLexer( const wchar_t* ptr, int length, const char* name, int startLine ) { loaded = false; SetPunctuations( NULL ); allocated = false; hadError = false; hadWarning = false; LoadMemory( ptr, length, name, startLine ); } /* ================ idWLexer::~idWLexer ================ */ idWLexer::~idWLexer( void ) { FreeSource(); }